Page MenuHomeClusterLabs Projects

No OneTemporary

diff --git a/cts/CTSvars.py.in b/cts/CTSvars.py.in
index af2fcc41..cb59119f 100644
--- a/cts/CTSvars.py.in
+++ b/cts/CTSvars.py.in
@@ -1,3 +1,3 @@
-class CTSvars:
+class CTSvars(object):
CTS_home="@prefix@/share/pacemaker/tests/cts"
INITDIR="/etc/init.d"
diff --git a/cts/corolab.py b/cts/corolab.py
index 8f1ebb5e..98564419 100755
--- a/cts/corolab.py
+++ b/cts/corolab.py
@@ -1,335 +1,337 @@
#!/usr/bin/python
'''CTS: Cluster Testing System: Lab environment module
'''
+from __future__ import print_function
+
__copyright__='''
Copyright (c) 2010 Red Hat, Inc.
'''
# All rights reserved.
#
# Author: Angus Salkeld <asalkeld@redhat.com>
#
# This software licensed under BSD license, the text of which follows:
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# - Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# - Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# - Neither the name of the MontaVista Software, Inc. nor the names of its
# contributors may be used to endorse or promote products derived from this
# software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
# THE POSSIBILITY OF SUCH DAMAGE.
import sys
from cts.CTSscenarios import *
from cts.logging import *
from corotests import CoroTestList
from corosync import *
sys.path.append("/usr/share/pacemaker/tests/cts") # So that things work from the source directory
try:
from CTSlab import *
except ImportError:
sys.stderr.write("abort: couldn't find CTSLab in [%s]\n" %
' '.join(sys.path))
sys.stderr.write("(check your install and PYTHONPATH)\n")
sys.exit(-1)
tests = None
cm = None
old_handler = None
DefaultFacility = "daemon"
def usage(arg):
- print "Illegal argument " + arg
- print "usage: " + sys.argv[0] +" [options] number-of-iterations"
- print "\nCommon options: "
- print "\t [--at-boot (1|0)], does the cluster software start at boot time"
- print "\t [--nodes 'node list'], list of cluster nodes separated by whitespace"
- print "\t [--limit-nodes max], only use the first 'max' cluster nodes supplied with --nodes"
- print "\t [--logfile path], where should the test software look for logs from cluster nodes"
- print "\t [--rrp-bindaddr addr], extra interface used for rrp, provide the bindaddr"
- print "\t [--outputfile path], optional location for the test software to write logs to"
- print "\t [--syslog-facility name], which syslog facility should the test software log to"
- print "\t [--choose testcase-name], run only the named test"
- print "\t [--list-tests], list the valid tests"
- print "\t [--benchmark], add the timing information"
- print "\t "
- print "Additional (less common) options: "
- print "\t [--trunc (truncate logfile before starting)]"
- print "\t [--xmit-loss lost-rate(0.0-1.0)]"
- print "\t [--recv-loss lost-rate(0.0-1.0)]"
- print "\t [--standby (1 | 0 | yes | no)]"
- print "\t [--fencing (1 | 0 | yes | no)]"
- print "\t [--once], run all valid tests once"
- print "\t [--no-loop-tests], dont run looping/time-based tests"
- print "\t [--no-unsafe-tests], dont run tests that are unsafe for use with ocfs2/drbd"
- print "\t [--valgrind-tests], include tests using valgrind"
- print "\t [--experimental-tests], include experimental tests"
- print "\t [--oprofile 'node list'], list of cluster nodes to run oprofile on]"
- print "\t [--qarsh] Use the QARSH backdoor to access nodes instead of SSH"
- print "\t [--seed random_seed]"
- print "\t [--set option=value]"
+ print("Illegal argument " + arg)
+ print("usage: " + sys.argv[0] +" [options] number-of-iterations")
+ print("\nCommon options: ")
+ print("\t [--at-boot (1|0)], does the cluster software start at boot time")
+ print("\t [--nodes 'node list'], list of cluster nodes separated by whitespace")
+ print("\t [--limit-nodes max], only use the first 'max' cluster nodes supplied with --nodes")
+ print("\t [--logfile path], where should the test software look for logs from cluster nodes")
+ print("\t [--rrp-bindaddr addr], extra interface used for rrp, provide the bindaddr")
+ print("\t [--outputfile path], optional location for the test software to write logs to")
+ print("\t [--syslog-facility name], which syslog facility should the test software log to")
+ print("\t [--choose testcase-name], run only the named test")
+ print("\t [--list-tests], list the valid tests")
+ print("\t [--benchmark], add the timing information")
+ print("\t ")
+ print("Additional (less common) options: ")
+ print("\t [--trunc (truncate logfile before starting)]")
+ print("\t [--xmit-loss lost-rate(0.0-1.0)]")
+ print("\t [--recv-loss lost-rate(0.0-1.0)]")
+ print("\t [--standby (1 | 0 | yes | no)]")
+ print("\t [--fencing (1 | 0 | yes | no)]")
+ print("\t [--once], run all valid tests once")
+ print("\t [--no-loop-tests], dont run looping/time-based tests")
+ print("\t [--no-unsafe-tests], dont run tests that are unsafe for use with ocfs2/drbd")
+ print("\t [--valgrind-tests], include tests using valgrind")
+ print("\t [--experimental-tests], include experimental tests")
+ print("\t [--oprofile 'node list'], list of cluster nodes to run oprofile on]")
+ print("\t [--qarsh] Use the QARSH backdoor to access nodes instead of SSH")
+ print("\t [--seed random_seed]")
+ print("\t [--set option=value]")
sys.exit(1)
class CoroLabEnvironment(CtsLab):
def __init__(self):
CtsLab.__init__(self)
# Get a random seed for the random number generator.
self["DoStonith"] = 0
self["DoStandby"] = 0
self["DoFencing"] = 0
self["XmitLoss"] = "0.0"
self["RecvLoss"] = "0.0"
self["IPBase"] = "127.0.0.10"
self["ClobberCIB"] = 0
self["CIBfilename"] = None
self["CIBResource"] = 0
self["DoBSC"] = 0
self["use_logd"] = 0
self["oprofile"] = []
self["RrpBindAddr"] = None
self["warn-inactive"] = 0
self["ListTests"] = 0
self["benchmark"] = 0
self["logrestartcmd"] = "systemctl restart rsyslog.service 2>&1 > /dev/null"
self["syslogd"] ="rsyslog"
self["Schema"] = "corosync 2.0"
self["Stack"] = "corosync"
self['CMclass'] = corosync_needle
self["stonith-type"] = "external/ssh"
self["stonith-params"] = "hostlist=all,livedangerously=yes"
self["at-boot"] = 0 # Does the cluster software start automatically when the node boot
self["logger"] = ([StdErrLog(self, 'corosync.log')])
self["loop-minutes"] = 60
self["valgrind-prefix"] = None
self["valgrind-procs"] = "corosync"
self["valgrind-opts"] = """--leak-check=full --show-reachable=yes --trace-children=no --num-callers=25 --gen-suppressions=all --suppressions="""+CTSvars.CTS_home+"""/cts.supp"""
self["experimental-tests"] = 0
self["valgrind-tests"] = 0
self["unsafe-tests"] = 0
self["loop-tests"] = 0
self["all-once"] = 0
self["LogWatcher"] = "remote"
self["SyslogFacility"] = DefaultFacility
self["stats"] = 0
self.log = LogFactory().log
#
# Main entry into the test system.
#
if __name__ == '__main__':
Environment = CoroLabEnvironment()
NumIter = 0
Version = 1
LimitNodes = 0
TestCase = None
TruncateLog = 0
ListTests = 0
HaveSeed = 0
node_list = ''
#
# The values of the rest of the parameters are now properly derived from
# the configuration files.
#
# Set the signal handler
signal.signal(15, sig_handler)
signal.signal(10, sig_handler)
# Process arguments...
skipthis=None
args=sys.argv[1:]
for i in range(0, len(args)):
if skipthis:
skipthis=None
continue
elif args[i] == "-l" or args[i] == "--limit-nodes":
skipthis=1
LimitNodes = int(args[i+1])
elif args[i] == "-L" or args[i] == "--logfile":
skipthis=1
Environment["LogFileName"] = args[i+1]
elif args[i] == "--outputfile":
skipthis=1
Environment["OutputFile"] = args[i+1]
elif args[i] == "--rrp-bindaddr":
skipthis=1
Environment["RrpBindAddr"] = args[i+1]
elif args[i] == "--oprofile":
skipthis=1
Environment["oprofile"] = args[i+1].split(' ')
elif args[i] == "--trunc":
Environment["TruncateLog"]=1
elif args[i] == "--list-tests":
Environment["ListTests"]=1
elif args[i] == "--benchmark":
Environment["benchmark"]=1
elif args[i] == "--qarsh":
Environment.rsh.enable_qarsh()
elif args[i] == "--fencing":
skipthis=1
if args[i+1] == "1" or args[i+1] == "yes":
Environment["DoFencing"] = 1
elif args[i+1] == "0" or args[i+1] == "no":
Environment["DoFencing"] = 0
else:
usage(args[i+1])
elif args[i] == "--xmit-loss":
try:
float(args[i+1])
except ValueError:
print ("--xmit-loss parameter should be float")
usage(args[i+1])
skipthis=1
Environment["XmitLoss"] = args[i+1]
elif args[i] == "--recv-loss":
try:
float(args[i+1])
except ValueError:
print ("--recv-loss parameter should be float")
usage(args[i+1])
skipthis=1
Environment["RecvLoss"] = args[i+1]
elif args[i] == "--choose":
skipthis=1
TestCase = args[i+1]
elif args[i] == "--nodes":
skipthis=1
node_list = args[i+1].split(' ')
elif args[i] == "--at-boot" or args[i] == "--cluster-starts-at-boot":
skipthis=1
if args[i+1] == "1" or args[i+1] == "yes":
Environment["at-boot"] = 1
elif args[i+1] == "0" or args[i+1] == "no":
Environment["at-boot"] = 0
else:
usage(args[i+1])
elif args[i] == "--set":
skipthis=1
(name, value) = args[i+1].split('=')
Environment[name] = value
elif args[i] == "--once":
skipthis=1
Environment["all-once"]=1
elif args[i] == "--stack":
skipthis=1
Environment["Stack"] = args[i+1]
else:
try:
NumIter=int(args[i])
except ValueError:
usage(args[i])
if Environment["OutputFile"]:
Environment["logger"].append(FileLog(Environment, Environment["OutputFile"]))
if len(node_list) < 1:
- print "No nodes specified!"
+ print("No nodes specified!")
sys.exit(1)
if LimitNodes > 0:
if len(node_list) > LimitNodes:
print("Limiting the number of nodes configured=%d (max=%d)"
%(len(node_list), LimitNodes))
while len(node_list) > LimitNodes:
node_list.pop(len(node_list)-1)
Environment["nodes"] = node_list
# Create the Cluster Manager object
cm = Environment['CMclass'](Environment)
Audits = CoroAuditList(cm)
if Environment["ListTests"] == 1 :
Tests = CoroTestList(cm, Audits)
Environment.log("Total %d tests"%len(Tests))
for test in Tests :
Environment.log(str(test.name));
sys.exit(0)
if TruncateLog:
Environment.log("Truncating %s" % LogFile)
lf = open(LogFile, "w");
if lf != None:
lf.truncate(0)
lf.close()
if TestCase != None:
for test in CoroTestList(cm, Audits):
if test.name == TestCase:
Tests.append(test)
if Tests == []:
usage("--choose: No applicable/valid tests chosen")
else:
Tests = CoroTestList(cm, Audits)
# Scenario selection
if Environment["DoBSC"]:
scenario = RandomTests(cm, [ BasicSanityCheck(Environment) ], Audits, Tests)
elif Environment["all-once"] or NumIter == 0:
NumIter = len(Tests)
scenario = AllOnce(
cm, [ BootCluster(Environment), TestAgentComponent(Environment), PacketLoss(Environment) ], Audits, Tests)
else:
scenario = RandomTests(
cm, [ BootCluster(Environment), TestAgentComponent(Environment), PacketLoss(Environment) ], Audits, Tests)
Environment.log(">>>>>>>>>>>>>>>> BEGINNING " + repr(NumIter) + " TESTS ")
Environment.log("Stack: %s" % Environment["Stack"])
Environment.log("Schema: %s" % Environment["Schema"])
Environment.log("Scenario: %s" % scenario.__doc__)
Environment.log("Random Seed: %s" % Environment["RandSeed"])
Environment.log("System log files: %s" % Environment["LogFileName"])
Environment.dump()
rc = Environment.run(scenario, NumIter)
sys.exit(rc)
diff --git a/cts/corosync.py b/cts/corosync.py
index 21898511..4c077417 100644
--- a/cts/corosync.py
+++ b/cts/corosync.py
@@ -1,673 +1,679 @@
'''CTS: Cluster Testing System: corosync...
'''
__copyright__='''
Copyright (c) 2010 Red Hat, Inc.
'''
# All rights reserved.
#
# Author: Angus Salkeld <asalkeld@redhat.com>
#
# This software licensed under BSD license, the text of which follows:
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# - Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# - Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# - Neither the name of the MontaVista Software, Inc. nor the names of its
# contributors may be used to endorse or promote products derived from this
# software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
# THE POSSIBILITY OF SUCH DAMAGE.
import os
import sys
import time
import socket
import shutil
import string
import augeas
from cts.CTS import ClusterManager
from cts.CTSscenarios import ScenarioComponent
from cts.remote import RemoteExec
from cts.remote import RemoteFactory
from cts.logging import *
from cts.CTSvars import CTSvars
from cts.CTSaudits import ClusterAudit
from cts.CTSaudits import LogAudit
###################################################################
class CoroConfig(object):
def __init__(self, corobase=None):
self.base = "/files/etc/corosync/corosync.conf/"
self.new_root = "/tmp/aug-root/"
if corobase == None:
self.corobase = os.getcwd() + "/.."
else:
self.corobase = corobase
example = self.corobase + "/conf/corosync.conf.example"
if os.path.isdir(self.new_root):
shutil.rmtree (self.new_root)
os.makedirs (self.new_root + "/etc/corosync")
shutil.copy (example, self.new_root + "/etc/corosync/corosync.conf")
self.aug = augeas.Augeas (root=self.new_root,
loadpath=self.corobase + "/conf/lenses")
self.original = {}
# store the original values (of totem), so we can restore them in
# apply_default_config()
totem = self.aug.match('/files/etc/corosync/corosync.conf/totem/*')
for c in totem:
# /files/etc/corosync/corosync.conf/
short_name = c[len(self.base):]
self.original[short_name] = self.aug.get(c)
interface = self.aug.match('/files/etc/corosync/corosync.conf/totem/interface/*')
for c in interface:
short_name = c[len(self.base):]
self.original[short_name] = self.aug.get(c)
def get (self, name):
return self.aug.get (self.base + name)
def set (self, name, value):
token = self.aug.set (self.base + name, str(value))
def save (self):
self.aug.save()
def get_filename(self):
return self.new_root + "/etc/corosync/corosync.conf"
###################################################################
class corosync_needle(ClusterManager):
'''
bla
'''
def __init__(self, Environment, randseed=None):
ClusterManager.__init__(self, Environment, randseed)
self.update({
"Name" : "corosync(needle)",
"StartCmd" : "service corosync start",
"StopCmd" : "service corosync stop",
"RereadCmd" : "service corosync reload",
"StatusCmd" : "service corosync status",
"DeadTime" : 30,
"StartTime" : 15, # Max time to start up
"StableTime" : 10,
"BreakCommCmd" : "/usr/share/corosync/tests/net_breaker.sh BreakCommCmd %s",
"FixCommCmd" : "/usr/share/corosync/tests/net_breaker.sh FixCommCmd %s",
"QuorumCmd" : "corosync-quorumtool -s",
"Pat:We_stopped" : "%s.*Corosync Cluster Engine exiting.*",
"Pat:They_stopped" : "%s.*Member left:.*%s.*",
"Pat:They_dead" : "corosync:.*Node %s is now: lost",
"Pat:Local_starting" : "%s.*Initializing transport",
"Pat:Local_started" : "%s.*Initializing transport",
"Pat:Master_started" : "%s.*Completed service synchronization, ready to provide service.",
"Pat:Slave_started" : "%s.*Completed service synchronization, ready to provide service.",
"Pat:ChildKilled" : "%s corosync.*Child process %s terminated with signal 9",
"Pat:ChildRespawn" : "%s corosync.*Respawning failed child process: %s",
"Pat:ChildExit" : "Child process .* exited",
"Pat:DC_IDLE" : ".*A new membership.*was formed.",
# Bad news Regexes. Should never occur.
"BadRegexes" : (
r"ERROR:",
r"CRIT:",
r"Shutting down\.",
r"Forcing shutdown\.",
r"core dump",
r"Could not bind AF_UNIX",
r"Too many open files",
r"Address already in use",
),
"LogFileName" : Environment["LogFileName"],
})
self.start_cpg = True
self.cpg_agent = {}
self.sam_agent = {}
self.votequorum_agent = {}
self.config = CoroConfig ()
self.node_to_ip = {}
self.new_config = {}
self.applied_config = {}
for n in self.Env["nodes"]:
ip = socket.gethostbyname(n)
ips = ip.split('.')
ips[3] = '0'
ip_mask = '.'.join(ips)
self.new_config['totem/interface/bindnetaddr'] = str(ip_mask)
return
def apply_default_config(self):
for c in self.applied_config:
if 'bindnetaddr' in c:
continue
- elif not self.config.original.has_key(c):
+ elif c not in self.config.original:
# new config option (non default)
pass
elif self.applied_config[c] is not self.config.original[c]:
# reset to the original
self.new_config[c] = self.config.original[c]
if len(self.new_config) > 0:
self.debug('applying default config')
self.stopall()
def apply_new_config(self, need_all_up=True):
if len(self.new_config) > 0:
self.debug('applying new config')
self.stopall()
if need_all_up:
self.startall()
def install_all_config(self):
tmp1 = {}
sorted_keys = sorted(self.new_config.keys())
for c in sorted_keys:
self.log('configuring: ' + c + ' = '+ str(self.new_config[c]))
self.config.set (c, self.new_config[c])
self.applied_config[c] = self.new_config[c]
tmp1[c] = self.new_config[c]
for c in tmp1:
del self.new_config[c]
self.config.save()
src_file = self.config.get_filename()
for node in self.Env["nodes"]:
self.rsh.cp(src_file, "%s:%s" % (node, "/etc/corosync/"))
def install_config(self, node):
# install gets new_config and installs it, then moves the
# config to applied_config
if len(self.new_config) > 0:
self.install_all_config()
def key_for_node(self, node):
- if not self.node_to_ip.has_key(node):
+ if node not in self.node_to_ip:
self.node_to_ip[node] = socket.gethostbyname (node)
return self.node_to_ip[node]
def StartaCM(self, node, verbose=False):
- if not self.ShouldBeStatus.has_key(node):
+ if node not in self.ShouldBeStatus:
self.ShouldBeStatus[node] = "down"
if self.ShouldBeStatus[node] != "down":
return 1
self.debug('starting corosync on : ' + node)
ret = ClusterManager.StartaCM(self, node)
if self.start_cpg:
- if self.cpg_agent.has_key(node):
+ if node in self.cpg_agent:
self.cpg_agent[node].restart()
else:
self.cpg_agent[node] = CpgTestAgent(node, self.Env)
self.cpg_agent[node].start()
- if self.sam_agent.has_key(node):
+ if node in self.sam_agent:
self.sam_agent[node].restart()
# votequorum agent started as needed.
- if self.applied_config.has_key('quorum/provider'):
+ if 'quorum/provider' in self.applied_config:
if self.applied_config['quorum/provider'] is 'corosync_votequorum':
- if self.votequorum_agent.has_key(node):
+ if node in self.votequorum_agent:
self.votequorum_agent[node].restart()
else:
self.votequorum_agent[node] = VoteQuorumTestAgent(node, self.Env)
self.votequorum_agent[node].start()
return ret
def StopaCM(self, node, verbose=False):
if self.ShouldBeStatus[node] != "up":
return 1
self.debug('stoping corosync on : ' + node)
- if self.cpg_agent.has_key(node):
+ if node in self.cpg_agent:
self.cpg_agent[node].stop()
- if self.sam_agent.has_key(node):
+ if node in self.sam_agent:
self.sam_agent[node].stop()
- if self.votequorum_agent.has_key(node):
+ if node in self.votequorum_agent:
self.votequorum_agent[node].stop()
return ClusterManager.StopaCM(self, node)
def test_node_CM(self, node):
# 2 - up and stable
# 1 - unstable
# 0 - down
(rc, lines) = self.rsh(node, self["StatusCmd"], stdout=2)
out = str(lines)
if 'systemd' in out:
if 'running' in out:
ret = 2
else:
ret = 0
else:
is_stopped = string.find(out, 'stopped')
is_dead = string.find(out, 'dead')
ret = (is_dead is -1 and is_stopped is -1)
try:
if ret:
ret = 2
if self.ShouldBeStatus[node] == "down":
self.log(
"Node status for %s is %s but we think it should be %s"
% (node, "up", self.ShouldBeStatus[node]))
else:
if self.ShouldBeStatus[node] == "up":
self.log(
"Node status for %s is %s but we think it should be %s"
% (node, "down", self.ShouldBeStatus[node]))
except KeyError: pass
if ret: self.ShouldBeStatus[node] = "up"
else: self.ShouldBeStatus[node] = "down"
return ret
def StataCM(self, node):
'''Report the status of corosync on a given node'''
if self.test_node_CM(node) > 0:
return 1
else:
return None
def RereadCM(self, node):
self.log('reloading corosync on : ' + node)
return ClusterManager.RereadCM(self, node)
def find_partitions(self):
ccm_partitions = []
return ccm_partitions
def prepare(self):
'''Finish the Initialization process. Prepare to test...'''
self.partitions_expected = 1
for node in self.Env["nodes"]:
self.ShouldBeStatus[node] = ""
self.unisolate_node(node)
self.StataCM(node)
def HasQuorum(self, node_list):
# If we are auditing a partition, then one side will
# have quorum and the other not.
# So the caller needs to tell us which we are checking
# If no value for node_list is specified... assume all nodes
if not node_list:
node_list = self.Env["nodes"]
for node in node_list:
if self.ShouldBeStatus[node] == "up":
(quorum, qout) = self.rsh(node, self["QuorumCmd"], stdout=2)
if quorum == 1:
return 1
elif quorum == 0:
return 0
else:
self.log("WARN: Unexpected quorum test result from %s : %d" % (node, quorum))
return 0
def Components(self):
return None
class ShmLeakAudit(ClusterAudit):
def __init__(self, cm):
self.CM = cm
def name(self):
return "ShmLeakAudit"
def is_applicable(self):
return 1
def __call__(self):
rc = 1
for node in self.CM.Env["nodes"]:
(res, lines) = self.CM.rsh(node, "/usr/share/corosync/tests/shm_leak_audit.sh", None)
for line in lines:
self.CM.log("%s leaked %s" % (node, line))
rc = 0
return rc
###################################################################
class TestAgentComponent(ScenarioComponent):
def __init__(self, Env):
self.Env = Env
def IsApplicable(self):
'''Return TRUE if the current ScenarioComponent is applicable
in the given LabEnvironment given to the constructor.
'''
return True
def SetUp(self, CM):
'''Set up the given ScenarioComponent'''
self.CM = CM
for node in self.Env["nodes"]:
if not CM.StataCM(node):
raise RuntimeError ("corosync not up")
if self.CM.start_cpg:
- if self.CM.cpg_agent.has_key(node):
+ if node in self.CM.cpg_agent:
self.CM.cpg_agent[node].clean_start()
else:
self.CM.cpg_agent[node] = CpgTestAgent(node, CM.Env)
self.CM.cpg_agent[node].start()
- if self.CM.sam_agent.has_key(node):
+ if node in self.CM.sam_agent:
self.CM.sam_agent[node].clean_start()
else:
self.CM.sam_agent[node] = SamTestAgent(node, CM.Env)
self.CM.sam_agent[node].start()
# votequorum agent started as needed.
- if self.CM.applied_config.has_key('quorum/provider'):
+ if 'quorum/provider' in self.CM.applied_config:
if CM.applied_config['quorum/provider'] is 'corosync_votequorum':
self.CM.votequorum_agent[node] = VoteQuorumTestAgent(node, CM.Env)
self.CM.votequorum_agent[node].start()
return 1
def TearDown(self, CM):
'''Tear down (undo) the given ScenarioComponent'''
self.CM = CM
for node in self.Env["nodes"]:
- if self.CM.cpg_agent.has_key(node):
+ if node in self.CM.cpg_agent:
self.CM.cpg_agent[node].stop()
self.CM.sam_agent[node].stop()
- if self.CM.votequorum_agent.has_key(node):
+ if node in self.CM.votequorum_agent:
self.CM.votequorum_agent[node].stop()
###################################################################
class TestAgent(object):
def __init__(self, binary, node, port, Env=None):
self.node = node
self.node_address = None
self.port = port
self.sock = None
self.binary = binary
self.started = False
resh = RemoteFactory.rsh
self.rsh = RemoteExec(resh)
- self.func_name = None
+ self.__name__ = None
self.used = False
self.env = Env
self.send_recv = False
def restart(self):
LogFactory().debug('%s:%s restarting' % (self.node, self.binary))
self.stop()
self.start()
def clean_start(self):
if self.used or not self.status():
LogFactory().debug('%s:%s cleaning' % (self.node, self.binary))
self.stop()
self.start()
def status(self):
if not self.started:
return False
try:
self.send_internal(["are_you_ok_dude"])
self.read()
self.started = True
return True
- except RuntimeError, msg:
+ except RuntimeError as msg:
self.started = False
return False
def start(self):
'''Set up the given ScenarioComponent'''
if self.status():
return
LogFactory().debug('%s:%s starting ' % (self.node, self.binary))
self.rsh(self.node, self.binary, synchronous=False)
self.sock = socket.socket (socket.AF_INET, socket.SOCK_STREAM)
ip = socket.gethostbyname(self.node)
is_connected = False
retries = 0
while not is_connected:
try:
retries = retries + 1
self.sock.connect ((ip, self.port))
is_connected = True
- except socket.error, msg:
+ except socket.error as msg:
if retries > 10:
LogFactory().log("%s:%s Tried connecting %d times. %s" % (self.node, self.binary, retries, str(msg)))
if retries > 30:
raise RuntimeError("%s:%s can't connect" % (self.node, self.binary))
time.sleep(1)
self.started = True
self.used = False
def stop(self):
'''Tear down (undo) the given ScenarioComponent'''
LogFactory().debug('%s:%s stopping' % (self.binary, self.node))
self.rsh(self.node, "killall " + self.binary + " 2>/dev/null")
if self.sock:
self.sock.close ()
del self.sock
self.sock = None
while self.getpid() != '':
time.sleep(1)
self.started = False
def kill(self):
'''Tear down (undo) the given ScenarioComponent'''
LogFactory().log('%s:%s killing' % (self.node, self.binary))
self.rsh(self.node, "killall -9 " + self.binary + " 2>/dev/null")
self.started = False
def getpid(self):
return self.rsh(self.node, 'pidof ' + self.binary, 1)
def send_internal(self, args):
real_msg = str (len (args))
for a in args:
a_str = str(a)
real_msg += ":" + str (len (a_str)) + ":" + a_str
real_msg += ";"
+ if sys.version_info > (3,):
+ real_msg = real_msg.encode("utf8")
try:
return self.sock.send (real_msg)
- except socket.error, msg:
+ except socket.error as msg:
LogFactory().log("send(%s): %s; error: %s" % (self.node, real_msg, msg))
return 0
def send (self, args):
if not self.started:
self.start()
sent = self.send_internal(args)
if sent == 0:
raise RuntimeError ("socket connection broken")
self.used = True
def __getattribute__(self,name):
try:
return object.__getattribute__(self, name)
except:
- self.func_name = name
+ self.__name__ = name
if self.send_recv:
return self.send_recv_dynamic
else:
return self.send_dynamic
def send_recv_dynamic (self, *args):
self.send_dynamic (args)
try:
res = self.read ()
- except RuntimeError, msg:
+ except RuntimeError as msg:
res = None
- LogFactory().log("send_recv_dynamic: %s(); error: %s" % (self.func_name, msg))
+ LogFactory().log("send_recv_dynamic: %s(); error: %s" % (self.__name__, msg))
return res
def send_dynamic (self, *args):
if not self.started:
raise RuntimeError ("agent not started")
# number of args+func
- real_msg = str (len (args) + 1) + ":" + str(len(self.func_name)) + ":" + self.func_name
+ real_msg = str (len (args) + 1) + ":" + str(len(self.__name__)) + ":" + self.__name__
for a in args:
a_str = str(a)
real_msg += ":" + str (len (a_str)) + ":" + a_str
real_msg += ";"
sent = 0
+ if sys.version_info > (3,):
+ real_msg = bytes(real_msg, encoding = "utf8")
try:
sent = self.sock.send (real_msg)
- except socket.error, msg:
+ except socket.error as msg:
LogFactory().log("send_dynamic(%s): %s; error: %s" % (self.node, real_msg, msg))
if sent == 0:
raise RuntimeError ("socket connection broken")
self.used = True
def read (self):
try:
msg = self.sock.recv (4096)
- except socket.error, msg:
+ except socket.error as msg:
raise RuntimeError(msg)
+ if sys.version_info > (3,):
+ msg = msg.decode("utf8")
if msg == '':
raise RuntimeError("socket connection broken")
return msg
-class CpgConfigEvent:
+class CpgConfigEvent(object):
def __init__(self, msg):
info = msg.split(',')
self.group_name = info[0]
self.node_id = info[1]
self.node = None
self.pid = info[2]
if "left" in info[3]:
self.is_member = False
else:
self.is_member = True
def __str__ (self):
str = self.group_name + "," + self.node_id + "," + self.pid + ","
if self.is_member:
return str + "joined"
else:
return str + "left"
###################################################################
class CpgTestAgent(TestAgent):
def __init__(self, node, Env=None):
TestAgent.__init__(self, "cpg_test_agent", node, 9034, Env)
self.nodeid = None
def start(self):
if not self.status():
TestAgent.start(self)
self.cpg_initialize()
self.used = False
def cpg_local_get(self):
if self.nodeid == None:
self.send (["cpg_local_get"])
self.nodeid = self.read ()
return self.nodeid
def record_config_events(self, truncate=True):
if truncate:
self.send (["record_config_events", "truncate"])
else:
self.send (["record_config_events", "append"])
return self.read ()
def read_config_event(self):
self.send (["read_config_event"])
msg = self.read ()
if "None" in msg:
return None
else:
return CpgConfigEvent(msg)
def read_messages(self, atmost):
self.send (["read_messages", atmost])
msg = self.read ()
if "None" in msg:
return None
else:
return msg
def context_test(self):
self.send (["context_test"])
return self.read ()
###################################################################
class SamTestAgent(TestAgent):
def __init__(self, node, Env=None):
TestAgent.__init__(self, "sam_test_agent", node, 9036, Env)
self.nodeid = None
self.send_recv = True
###################################################################
class VoteQuorumTestAgent(TestAgent):
def __init__(self, node, Env=None):
TestAgent.__init__(self, "votequorum_test_agent", node, 9037, Env)
self.nodeid = None
self.send_recv = True
AllAuditClasses = []
AllAuditClasses.append(LogAudit)
AllAuditClasses.append(ShmLeakAudit)
def CoroAuditList(cm):
result = []
for auditclass in AllAuditClasses:
a = auditclass(cm)
if a.is_applicable():
result.append(a)
return result
diff --git a/cts/corotests.py b/cts/corotests.py
index 5778100a..6a983fe4 100644
--- a/cts/corotests.py
+++ b/cts/corotests.py
@@ -1,1615 +1,1624 @@
+from __future__ import division
+from __future__ import print_function
+
__copyright__='''
Copyright (c) 2010 Red Hat, Inc.
'''
# All rights reserved.
#
# Author: Angus Salkeld <asalkeld@redhat.com>
#
# This software licensed under BSD license, the text of which follows:
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# - Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# - Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# - Neither the name of the MontaVista Software, Inc. nor the names of its
# contributors may be used to endorse or promote products derived from this
# software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
# THE POSSIBILITY OF SUCH DAMAGE.
import random
import socket
-from UserDict import UserDict
+import sys
+if sys.version_info < (3,):
+ from UserDict import UserDict
+else:
+ from collections import UserDict
from cts.CTStests import *
from corosync import CpgTestAgent
###################################################################
class CoroTest(CTSTest):
'''
basic class to make sure that new configuration is applied
and old configuration is removed.
'''
def __init__(self, cm):
CTSTest.__init__(self,cm)
self.start = StartTest(cm)
self.stop = StopTest(cm)
self.config = {}
self.config['logging/logger_subsys[1]/subsys'] = 'MAIN'
self.config['logging/logger_subsys[1]/debug'] = 'on'
self.need_all_up = True
self.CM.start_cpg = True
self.cpg_name = 'cts_group'
def setup(self, node):
ret = CTSTest.setup(self, node)
# setup the authkey
localauthkey = '/tmp/authkey'
if not os.path.exists(localauthkey):
self.CM.rsh(node, 'corosync-keygen -l')
self.CM.rsh.cp("%s:%s" % (node, "/etc/corosync/authkey"), localauthkey)
for n in self.CM.Env["nodes"]:
if n is not node:
#copy key onto other nodes
self.CM.rsh.cp(localauthkey, "%s:%s" % (n, "/etc/corosync/authkey"))
# copy over any new config
for c in self.config:
self.CM.new_config[c] = self.config[c]
# apply the config
self.CM.apply_new_config(self.need_all_up)
# start/stop all corosyncs'
for n in self.CM.Env["nodes"]:
if self.need_all_up and not self.CM.StataCM(n):
self.incr("started")
self.start(n)
if self.need_all_up and self.CM.start_cpg:
self.CM.cpg_agent[n].clean_start()
self.CM.cpg_agent[n].cpg_join(self.cpg_name)
self.CM.cpg_agent[n].cfg_initialize()
if not self.need_all_up and self.CM.StataCM(n):
self.incr("stopped")
self.stop(n)
return ret
def config_valid(self, config):
return True
def teardown(self, node):
self.CM.apply_default_config()
return CTSTest.teardown(self, node)
###################################################################
class CpgContextTest(CoroTest):
def __init__(self, cm):
self.name="CpgContextTest"
CoroTest.__init__(self, cm)
self.CM.start_cpg = True
def __call__(self, node):
self.incr("calls")
res = self.CM.cpg_agent[node].context_test()
if 'OK' in res:
return self.success()
else:
return self.failure('context_test failed')
###################################################################
class CpgConfigChangeBase(CoroTest):
'''
join a cpg group on each node, and test that the following
causes a leave event:
- a call to cpg_leave()
- app exit
- node leave
- node leave (with large token timeout)
'''
def setup(self, node):
ret = CoroTest.setup(self, node)
self.listener = None
self.wobbly = None
for n in self.CM.Env["nodes"]:
if self.wobbly is None:
self.wobbly = n
elif self.listener is None:
self.listener = n
- if self.CM.cpg_agent.has_key(self.wobbly):
+ if self.wobbly in self.CM.cpg_agent:
self.wobbly_id = self.CM.cpg_agent[self.wobbly].cpg_local_get()
- if self.CM.cpg_agent.has_key(self.listener):
+ if self.listener in self.CM.cpg_agent:
self.CM.cpg_agent[self.listener].record_config_events(truncate=True)
return ret
def wait_for_config_change(self):
found = False
max_timeout = 60 * 15
waited = 0
printit = 0
self.CM.log("Waiting for config change on " + self.listener)
while not found:
try:
event = self.CM.cpg_agent[self.listener].read_config_event()
except:
return self.failure('connection to test cpg_agent failed.')
if not event == None:
self.CM.debug("RECEIVED: " + str(event))
if event == None:
if waited >= max_timeout:
return self.failure("timedout(" + str(waited) + " sec) == no event!")
else:
time.sleep(1)
waited = waited + 1
printit = printit + 1
if printit is 60:
- print 'waited ' + str(waited) + ' seconds'
+ print('waited ' + str(waited) + ' seconds')
printit = 0
elif str(event.node_id) in str(self.wobbly_id) and not event.is_member:
self.CM.log("Got the config change in " + str(waited) + " seconds")
found = True
else:
self.CM.debug("No match")
self.CM.debug("wobbly nodeid:" + str(self.wobbly_id))
self.CM.debug("event nodeid:" + str(event.node_id))
self.CM.debug("event.is_member:" + str(event.is_member))
if found:
return self.success()
###################################################################
class CpgCfgChgOnGroupLeave(CpgConfigChangeBase):
def __init__(self, cm):
CpgConfigChangeBase.__init__(self,cm)
self.name="CpgCfgChgOnGroupLeave"
def failure_action(self):
self.CM.log("calling cpg_leave() on " + self.wobbly)
self.CM.cpg_agent[self.wobbly].cpg_leave(self.cpg_name)
def __call__(self, node):
self.incr("calls")
self.failure_action()
return self.wait_for_config_change()
###################################################################
class CpgCfgChgOnNodeLeave(CpgConfigChangeBase):
def __init__(self, cm):
CpgConfigChangeBase.__init__(self,cm)
self.name="CpgCfgChgOnNodeLeave"
def failure_action(self):
self.CM.log("stopping corosync on " + self.wobbly)
self.stop(self.wobbly)
def __call__(self, node):
self.incr("calls")
self.failure_action()
return self.wait_for_config_change()
###################################################################
class CpgCfgChgOnLowestNodeJoin(CTSTest):
'''
1) stop all nodes
2) start all but the node with the smallest ip address
3) start recording events
4) start the last node
'''
def __init__(self, cm):
CTSTest.__init__(self, cm)
self.name="CpgCfgChgOnLowestNodeJoin"
self.start = StartTest(cm)
self.stop = StopTest(cm)
self.config = {}
self.need_all_up = False
def config_valid(self, config):
return True
def lowest_ip_set(self):
self.lowest = None
for n in self.CM.Env["nodes"]:
if self.lowest is None:
self.lowest = n
self.CM.log("lowest node is " + self.lowest)
def setup(self, node):
# stop all nodes
for n in self.CM.Env["nodes"]:
self.CM.StopaCM(n)
self.lowest_ip_set()
# copy over any new config
for c in self.config:
self.CM.new_config[c] = self.config[c]
# install the config
self.CM.install_all_config()
# start all but lowest
self.listener = None
for n in self.CM.Env["nodes"]:
if n is not self.lowest:
if self.listener is None:
self.listener = n
self.incr("started")
self.CM.log("starting " + n)
self.start(n)
self.CM.cpg_agent[n].clean_start()
self.CM.cpg_agent[n].cpg_join(self.cpg_name)
# start recording events
pats = []
pats.append("%s .*sync: node joined.*" % self.listener)
pats.append("%s .*sync: activate correctly.*" % self.listener)
self.sync_log = self.create_watch(pats, 60)
self.sync_log.setwatch()
self.CM.log("setup done")
return CTSTest.setup(self, node)
def __call__(self, node):
self.incr("calls")
self.start(self.lowest)
self.CM.cpg_agent[self.lowest].clean_start()
self.CM.cpg_agent[self.lowest].cpg_join(self.cpg_name)
self.wobbly_id = self.CM.cpg_agent[self.lowest].cpg_local_get()
self.CM.log("waiting for sync events")
if not self.sync_log.lookforall():
return self.failure("Patterns not found: " + repr(self.sync_log.unmatched))
else:
return self.success()
###################################################################
class CpgCfgChgOnExecCrash(CpgConfigChangeBase):
def __init__(self, cm):
CpgConfigChangeBase.__init__(self,cm)
self.name="CpgCfgChgOnExecCrash"
def failure_action(self):
self.CM.log("sending KILL to corosync on " + self.wobbly)
self.CM.rsh(self.wobbly, "killall -9 corosync")
self.CM.rsh(self.wobbly, "rm -f /var/run/corosync.pid")
self.CM.rsh(self.wobbly, "rm -f /dev/shm/qb-corosync-blackbox*")
self.CM.ShouldBeStatus[self.wobbly] = "down"
def __call__(self, node):
self.incr("calls")
self.failure_action()
return self.wait_for_config_change()
###################################################################
class CpgCfgChgOnNodeIsolate(CpgConfigChangeBase):
def __init__(self, cm):
CpgConfigChangeBase.__init__(self,cm)
self.name="CpgCfgChgOnNodeIsolate"
def config_valid(self, config):
- if config.has_key('totem/rrp_mode'):
+ if 'totem/rrp_mode' in config:
return False
else:
return True
def failure_action(self):
self.CM.log("isolating node " + self.wobbly)
self.CM.isolate_node(self.wobbly)
def __call__(self, node):
self.incr("calls")
self.failure_action()
return self.wait_for_config_change()
def teardown(self, node):
self.CM.unisolate_node (self.wobbly)
return CpgConfigChangeBase.teardown(self, node)
###################################################################
class CpgCfgChgOnNodeRestart(CpgConfigChangeBase):
def __init__(self, cm):
CpgConfigChangeBase.__init__(self,cm)
self.name="CpgCfgChgOnNodeRestart"
self.CM.start_cpg = False
def config_valid(self, config):
- if config.has_key('totem/secauth'):
+ if 'totem/secauth' in config:
if config['totem/secauth'] is 'on':
return False
else:
return True
- if config.has_key('totem/rrp_mode'):
+ if 'totem/rrp_mode' in config:
return False
else:
return True
def failure_action(self):
self.CM.log("2: isolating node " + self.wobbly)
self.CM.isolate_node(self.wobbly)
self.CM.log("3: Killing corosync on " + self.wobbly)
self.CM.rsh(self.wobbly, "killall -9 corosync")
self.CM.rsh(self.wobbly, "rm -f /var/run/corosync.pid")
self.CM.ShouldBeStatus[self.wobbly] = "down"
self.CM.log("4: unisolating node " + self.wobbly)
self.CM.unisolate_node (self.wobbly)
self.CM.log("5: starting corosync on " + self.wobbly)
self.CM.StartaCM(self.wobbly)
time.sleep(5)
self.CM.log("6: starting cpg on all nodes")
self.CM.start_cpg = True
for node in self.CM.Env["nodes"]:
self.CM.cpg_agent[node] = CpgTestAgent(node, self.CM.Env)
self.CM.cpg_agent[node].start()
self.CM.cpg_agent[node].cpg_join(self.cpg_name)
self.wobbly_id = self.CM.cpg_agent[self.wobbly].cpg_local_get()
self.CM.cpg_agent[self.listener].record_config_events(truncate=True)
self.CM.log("7: isolating node " + self.wobbly)
self.CM.isolate_node(self.wobbly)
self.CM.log("8: Killing corosync on " + self.wobbly)
self.CM.rsh(self.wobbly, "killall -9 corosync")
self.CM.rsh(self.wobbly, "rm -f /var/run/corosync.pid")
self.CM.ShouldBeStatus[self.wobbly] = "down"
self.CM.log("9: unisolating node " + self.wobbly)
self.CM.unisolate_node (self.wobbly)
self.CM.log("10: starting corosync on " + self.wobbly)
self.CM.StartaCM(self.wobbly)
def __call__(self, node):
self.incr("calls")
self.failure_action()
return self.wait_for_config_change()
def teardown(self, node):
self.CM.unisolate_node (self.wobbly)
return CpgConfigChangeBase.teardown(self, node)
###################################################################
class CpgMsgOrderBase(CoroTest):
def __init__(self, cm):
CoroTest.__init__(self,cm)
self.num_msgs_per_node = 0
self.total_num_msgs = 0
def setup(self, node):
ret = CoroTest.setup(self, node)
for n in self.CM.Env["nodes"]:
self.CM.cpg_agent[n].clean_start()
self.CM.cpg_agent[n].cpg_join(self.cpg_name)
self.CM.cpg_agent[n].record_messages()
time.sleep(1)
return ret
def cpg_msg_blaster(self):
for n in self.CM.Env["nodes"]:
self.CM.cpg_agent[n].msg_blaster(self.num_msgs_per_node)
def wait_and_validate_order(self):
msgs = {}
self.total_num_msgs = 0
for n in self.CM.Env["nodes"]:
self.total_num_msgs = self.total_num_msgs + self.num_msgs_per_node
for n in self.CM.Env["nodes"]:
msgs[n] = []
stopped = False
waited = 0
while len(msgs[n]) < self.total_num_msgs and waited < 360:
try:
msg = self.CM.cpg_agent[n].read_messages(50)
except:
return self.failure('connection to test cpg_agent failed.')
if not msg == None:
msgl = msg.split(";")
# remove empty entries
not_done=True
while not_done:
try:
msgl.remove('')
except:
not_done = False
msgs[n].extend(msgl)
elif msg == None:
time.sleep(2)
waited = waited + 2
if len(msgs[n]) < self.total_num_msgs:
return self.failure("expected %d messages from %s got %d" % (self.total_num_msgs, n, len(msgs[n])))
fail = False
error_message = ''
for i in range(0, self.total_num_msgs):
first = None
for n in self.CM.Env["nodes"]:
# first test for errors
params = msgs[n][i].split(":")
if not 'OK' in params[3]:
fail = True
error_message = 'error: ' + params[3] + ' in received message'
self.CM.log(str(params))
# then look for out of order messages
if first == None:
first = n
else:
if not msgs[first][i] == msgs[n][i]:
# message order not the same!
fail = True
error_message = 'message out of order'
self.CM.log(msgs[first][i] + " != " + msgs[n][i])
if fail:
return self.failure(error_message)
else:
return self.success()
###################################################################
class CpgMsgOrderBasic(CpgMsgOrderBase):
'''
each sends & logs lots of messages
'''
def __init__(self, cm):
CpgMsgOrderBase.__init__(self,cm)
self.name="CpgMsgOrderBasic"
self.num_msgs_per_node = 9000
def __call__(self, node):
self.incr("calls")
for n in self.CM.Env["nodes"]:
self.CM.cpg_agent[n].msg_blaster(self.num_msgs_per_node)
return self.wait_and_validate_order()
###################################################################
class CpgMsgOrderZcb(CpgMsgOrderBase):
'''
each sends & logs lots of messages
'''
def __init__(self, cm):
CpgMsgOrderBase.__init__(self,cm)
self.name="CpgMsgOrderZcb"
self.num_msgs_per_node = 9000
def __call__(self, node):
self.incr("calls")
for n in self.CM.Env["nodes"]:
self.CM.cpg_agent[n].msg_blaster_zcb(self.num_msgs_per_node)
return self.wait_and_validate_order()
###################################################################
class MemLeakObject(CoroTest):
'''
run mem_leak_test.sh -1
'''
def __init__(self, cm):
CoroTest.__init__(self,cm)
self.name="MemLeakObject"
def __call__(self, node):
self.incr("calls")
mem_leaked = self.CM.rsh(node, "/usr/share/corosync/tests/mem_leak_test.sh -1")
if mem_leaked is 0:
return self.success()
else:
return self.failure(str(mem_leaked) + 'kB memory leaked.')
###################################################################
class MemLeakSession(CoroTest):
'''
run mem_leak_test.sh -2
'''
def __init__(self, cm):
CoroTest.__init__(self,cm)
self.name="MemLeakSession"
def __call__(self, node):
self.incr("calls")
mem_leaked = self.CM.rsh(node, "/usr/share/corosync/tests/mem_leak_test.sh -2")
if mem_leaked is 0:
return self.success()
else:
return self.failure(str(mem_leaked) + 'kB memory leaked.')
###################################################################
class CMapDispatchDeadlock(CoroTest):
'''
run cmap-dispatch-deadlock.sh
'''
def __init__(self, cm):
CoroTest.__init__(self,cm)
self.name="CMapDispatchDeadlock"
def __call__(self, node):
self.incr("calls")
result = self.CM.rsh(node, "/usr/share/corosync/tests/cmap-dispatch-deadlock.sh")
if result is 0:
return self.success()
else:
return self.failure('Deadlock detected')
###################################################################
class SamTest1(CoroTest):
def __init__(self, cm):
CoroTest.__init__(self, cm)
self.name="SamTest1"
def __call__(self, node):
self.incr("calls")
res = self.CM.sam_agent[node].test1()
if 'OK' in res:
return self.success()
else:
return self.failure(self.name + ' failed')
###################################################################
class SamTest2(CoroTest):
def __init__(self, cm):
CoroTest.__init__(self, cm)
self.name="SamTest2"
def __call__(self, node):
self.incr("calls")
res = self.CM.sam_agent[node].test2()
if 'OK' in res:
return self.success()
else:
return self.failure(self.name + ' failed')
###################################################################
class SamTest4(CoroTest):
def __init__(self, cm):
CoroTest.__init__(self, cm)
self.name="SamTest4"
def __call__(self, node):
self.incr("calls")
res = self.CM.sam_agent[node].test4()
if 'OK' in res:
return self.success()
else:
return self.failure(self.name + ' failed')
###################################################################
class SamTest5(CoroTest):
def __init__(self, cm):
CoroTest.__init__(self, cm)
self.name="SamTest5"
def __call__(self, node):
self.incr("calls")
res = self.CM.sam_agent[node].test5()
if 'OK' in res:
return self.success()
else:
return self.failure(self.name + ' failed')
###################################################################
class SamTest6(CoroTest):
def __init__(self, cm):
CoroTest.__init__(self, cm)
self.name="SamTest6"
def __call__(self, node):
self.incr("calls")
res = self.CM.sam_agent[node].test6()
if 'OK' in res:
return self.success()
else:
return self.failure(self.name + ' failed')
###################################################################
class SamTest8(CoroTest):
def __init__(self, cm):
CoroTest.__init__(self, cm)
self.name="SamTest8"
def __call__(self, node):
self.incr("calls")
res = self.CM.sam_agent[node].test8()
if 'OK' in res:
return self.success()
else:
return self.failure(self.name + ' failed')
###################################################################
class SamTest9(CoroTest):
def __init__(self, cm):
CoroTest.__init__(self, cm)
self.name="SamTest9"
def __call__(self, node):
self.incr("calls")
res = self.CM.sam_agent[node].test9()
if 'OK' in res:
return self.success()
else:
return self.failure(self.name + ' failed')
class QuorumState(object):
def __init__(self, cm, node):
self.node = node
self.CM = cm
self.CM.votequorum_agent[self.node].init()
def refresh(self):
info = self.CM.votequorum_agent[self.node].votequorum_getinfo()
assert(info != 'FAIL')
assert(info != 'NOT_SUPPORTED')
#self.CM.log('refresh: ' + info)
+ if info is None:
+ return
params = info.split(':')
self.node_votes = int(params[0])
self.expected_votes = int(params[1])
self.highest_expected = int(params[2])
self.total_votes = int(params[3])
self.quorum = int(params[4])
self.quorate = self.CM.votequorum_agent[self.node].quorum_getquorate()
assert(self.quorate != 'FAIL')
assert(self.quorate != 'NOT_SUPPORTED')
#self.CM.log('quorate: ' + str(self.quorate))
###################################################################
class VoteQuorumBase(CoroTest):
def setup(self, node):
ret = CoroTest.setup(self, node)
self.listener = None
for n in self.CM.Env["nodes"]:
if self.listener is None:
self.listener = n
return ret
def config_valid(self, config):
- if config.has_key('totem/rrp_mode'):
+ if 'totem/rrp_mode' in config:
return False
- if config.has_key('quorum/provider'):
+ if 'quorum/provider' in config:
return False
return True
###################################################################
class VoteQuorumGoDown(VoteQuorumBase):
# all up
# calc min expected votes to get Q
# bring nodes down one-by-one
# confirm cluster looses Q when V < EV
#
def __init__(self, cm):
VoteQuorumBase.__init__(self, cm)
self.name="VoteQuorumGoDown"
self.victims = []
self.expected = len(self.CM.Env["nodes"])
self.config['quorum/provider'] = 'corosync_votequorum'
self.config['quorum/expected_votes'] = self.expected
#self.CM.log('set expected to %d' % (self.expected))
def __call__(self, node):
self.incr("calls")
self.victims = []
pats = []
pats.append("%s .*VQ notification quorate: 0" % self.listener)
pats.append("%s .*NQ notification quorate: 0" % self.listener)
quorum = self.create_watch(pats, 30)
quorum.setwatch()
state = QuorumState(self.CM, self.listener)
state.refresh()
for n in self.CM.Env["nodes"]:
if n is self.listener:
continue
self.victims.append(n)
self.CM.StopaCM(n)
#if not self.wait_for_quorum_change():
# return self.failure(self.error_message)
nodes_alive = len(self.CM.Env["nodes"]) - len(self.victims)
state.refresh()
#self.expected = self.expected - 1
if state.node_votes != 1:
self.failure('unexpected number of node_votes')
if state.expected_votes != self.expected:
self.CM.log('nev: %d != exp %d' % (state.expected_votes, self.expected))
self.failure('unexpected number of expected_votes')
if state.total_votes != nodes_alive:
self.failure('unexpected number of total votes:%d, nodes_alive:%d' % (state.total_votes, nodes_alive))
- min = ((len(self.CM.Env["nodes"]) + 2) / 2)
+ min = int((len(self.CM.Env["nodes"]) + 2) / 2)
if min != state.quorum:
self.failure('we should have %d (not %d) as quorum' % (min, state.quorum))
if nodes_alive < state.quorum:
if state.quorate == 1:
self.failure('we should NOT have quorum(%d) %d > %d' % (state.quorate, state.quorum, nodes_alive))
else:
if state.quorate == 0:
self.failure('we should have quorum(%d) %d <= %d' % (state.quorate, state.quorum, nodes_alive))
if not quorum.lookforall():
self.CM.log("Patterns not found: " + repr(quorum.unmatched))
return self.failure('quorm event not found')
return self.success()
###################################################################
class VoteQuorumGoUp(VoteQuorumBase):
# all down
# calc min expected votes to get Q
# bring nodes up one-by-one
# confirm cluster gains Q when V >= EV
def __init__(self, cm):
VoteQuorumBase.__init__(self, cm)
self.name="VoteQuorumGoUp"
self.need_all_up = False
self.expected = len(self.CM.Env["nodes"])
self.config['quorum/provider'] = 'corosync_votequorum'
self.config['quorum/expected_votes'] = self.expected
#self.CM.log('set expected to %d' % (self.expected))
def __call__(self, node):
self.incr("calls")
pats = []
pats.append("%s .*VQ notification quorate: 1" % self.listener)
pats.append("%s .*NQ notification quorate: 1" % self.listener)
quorum = self.create_watch(pats, 30)
quorum.setwatch()
self.CM.StartaCM(self.listener)
nodes_alive = 1
state = QuorumState(self.CM, self.listener)
state.refresh()
for n in self.CM.Env["nodes"]:
if n is self.listener:
continue
#if not self.wait_for_quorum_change():
# return self.failure(self.error_message)
if state.node_votes != 1:
self.failure('unexpected number of node_votes')
if state.expected_votes != self.expected:
self.CM.log('nev: %d != exp %d' % (state.expected_votes, self.expected))
self.failure('unexpected number of expected_votes')
if state.total_votes != nodes_alive:
self.failure('unexpected number of total votes')
min = ((len(self.CM.Env["nodes"]) + 2) / 2)
if min != state.quorum:
self.failure('we should have %d (not %d) as quorum' % (min, state.quorum))
if nodes_alive < state.quorum:
if state.quorate == 1:
self.failure('we should NOT have quorum(%d) %d > %d' % (state.quorate, state.quorum, nodes_alive))
else:
if state.quorate == 0:
self.failure('we should have quorum(%d) %d <= %d' % (state.quorate, state.quorum, nodes_alive))
self.CM.StartaCM(n)
nodes_alive = nodes_alive + 1
state.refresh()
if not quorum.lookforall():
self.CM.log("Patterns not found: " + repr(quorum.unmatched))
return self.failure('quorm event not found')
return self.success()
###################################################################
class VoteQuorumWaitForAll(VoteQuorumBase):
# all down
# bring nodes up one-by-one
# confirm cluster gains Q when V == num nodes
def __init__(self, cm):
VoteQuorumBase.__init__(self, cm)
self.name="VoteQuorumWaitForAll"
self.need_all_up = False
self.expected = len(self.CM.Env["nodes"])
self.config['quorum/provider'] = 'corosync_votequorum'
self.config['quorum/expected_votes'] = self.expected
self.config['quorum/wait_for_all'] = '1'
def __call__(self, node):
self.incr("calls")
pats = []
pats.append("%s .*VQ notification quorate: 1" % self.listener)
pats.append("%s .*NQ notification quorate: 1" % self.listener)
quorum = self.create_watch(pats, 30)
quorum.setwatch()
# make absolutly all are stopped
for n in self.CM.Env["nodes"]:
self.CM.StopaCM(n)
# start the listener
self.CM.StartaCM(self.listener)
nodes_alive = 1
state = QuorumState(self.CM, self.listener)
state.refresh()
for n in self.CM.Env["nodes"]:
if n is self.listener:
continue
self.CM.StartaCM(n)
nodes_alive = nodes_alive + 1
state.refresh()
if state.node_votes != 1:
self.failure('unexpected number of node_votes')
if state.expected_votes != self.expected:
self.CM.log('nev: %d != exp %d' % (state.expected_votes, self.expected))
self.failure('unexpected number of expected_votes')
if state.total_votes != nodes_alive:
self.failure('unexpected number of total votes')
if nodes_alive < len(self.CM.Env["nodes"]):
if state.quorate == 1:
self.failure('we should NOT have quorum(%d) %d > %d' % (state.quorate,
len(self.CM.Env["nodes"]), nodes_alive))
else:
if state.quorate == 0:
self.failure('we should have quorum(%d) %d <= %d' % (state.quorate,
len(self.CM.Env["nodes"]), nodes_alive))
if not quorum.lookforall():
self.CM.log("Patterns not found: " + repr(quorum.unmatched))
return self.failure('quorm event not found')
return self.success()
###################################################################
class VoteQuorumContextTest(CoroTest):
def __init__(self, cm):
CoroTest.__init__(self, cm)
self.name="VoteQuorumContextTest"
self.expected = len(self.CM.Env["nodes"])
self.config['quorum/provider'] = 'corosync_votequorum'
self.config['quorum/expected_votes'] = self.expected
def __call__(self, node):
self.incr("calls")
res = self.CM.votequorum_agent[node].context_test()
if 'OK' in res:
return self.success()
else:
return self.failure('context_test failed')
###################################################################
class GenSimulStart(CoroTest):
'''Start all the nodes ~ simultaneously'''
def __init__(self, cm):
CoroTest.__init__(self,cm)
self.name="GenSimulStart"
self.need_all_up = False
self.stopall = SimulStopLite(cm)
self.startall = SimulStartLite(cm)
def __call__(self, dummy):
'''Perform the 'SimulStart' test. '''
self.incr("calls")
# We ignore the "node" parameter...
# Shut down all the nodes...
ret = self.stopall(None)
if not ret:
return self.failure("Setup failed")
-
- self.CM.clear_all_caches()
+ #clear_all_caches was removed
+ #self.CM.clear_all_caches()
if not self.startall(None):
return self.failure("Startall failed")
return self.success()
###################################################################
class GenSimulStop(CoroTest):
'''Stop all the nodes ~ simultaneously'''
def __init__(self, cm):
CoroTest.__init__(self,cm)
self.name="GenSimulStop"
self.startall = SimulStartLite(cm)
self.stopall = SimulStopLite(cm)
self.need_all_up = True
def __call__(self, dummy):
'''Perform the 'GenSimulStop' test. '''
self.incr("calls")
# We ignore the "node" parameter...
# Start up all the nodes...
ret = self.startall(None)
if not ret:
return self.failure("Setup failed")
if not self.stopall(None):
return self.failure("Stopall failed")
return self.success()
class GenFlipTest(CoroTest):
def __init__(self, cm):
CoroTest.__init__(self,cm)
self.name="GenFlipTest"
self.test = FlipTest(cm)
def __call__(self, dummy):
'''Perform the test. '''
self.incr("calls")
return self.test.__call__(dummy)
class GenRestartTest(CoroTest):
def __init__(self, cm):
CoroTest.__init__(self,cm)
self.name="GenRestartTest"
self.test = RestartTest(cm)
def __call__(self, dummy):
'''Perform the test. '''
self.incr("calls")
return self.test.__call__(dummy)
class GenStartOnebyOne(CoroTest):
def __init__(self, cm):
CoroTest.__init__(self,cm)
self.name="GenStartOnebyOne"
self.test = RestartOnebyOne(cm)
def __call__(self, dummy):
'''Perform the test. '''
self.incr("calls")
return self.test.__call__(dummy)
class GenStopOnebyOne(CoroTest):
def __init__(self, cm):
CoroTest.__init__(self,cm)
self.name="GenStopOnebyOne"
self.test = StopOnebyOne(cm)
def __call__(self, dummy):
'''Perform the test. '''
self.incr("calls")
return self.test.__call__(dummy)
class GenRestartOnebyOne(CoroTest):
def __init__(self, cm):
CoroTest.__init__(self,cm)
self.name="GenRestartOnebyOne"
self.test = RestartOnebyOne(cm)
def __call__(self, dummy):
'''Perform the test. '''
self.incr("calls")
return self.test.__call__(dummy)
###################################################################
class GenStopAllBeekhof(CoroTest):
'''Stop all the nodes ~ simultaneously'''
def __init__(self, cm):
CoroTest.__init__(self,cm)
self.name="GenStopAllBeekhof"
self.need_all_up = True
self.config['logging/logger_subsys[2]/subsys'] = 'CFG'
self.config['logging/logger_subsys[2]/debug'] = 'on'
def __call__(self, node):
'''Perform the 'GenStopAllBeekhof' test. '''
self.incr("calls")
stopping = int(time.time())
for n in self.CM.Env["nodes"]:
self.CM.cpg_agent[n].pcmk_test()
for n in self.CM.Env["nodes"]:
self.CM.cpg_agent[n].msg_blaster(1000)
for n in self.CM.Env["nodes"]:
self.CM.cpg_agent[n].cfg_shutdown()
self.CM.ShouldBeStatus[n] = "down"
waited = 0
max_wait = 60 * 15
still_up = list(self.CM.Env["nodes"])
while len(still_up) > 0:
waited = int(time.time()) - stopping
self.CM.log("%s still up %s; waited %d secs" % (self.name, str(still_up), waited))
if waited > max_wait:
break
time.sleep(3)
for v in self.CM.Env["nodes"]:
if v in still_up:
self.CM.ShouldBeStatus[n] = "down"
if not self.CM.StataCM(v):
still_up.remove(v)
waited = int(time.time()) - stopping
if waited > max_wait:
return self.failure("Waited %d secs for nodes: %s to stop" % (waited, str(still_up)))
self.CM.log("%s ALL good (waited %d secs)" % (self.name, waited))
return self.success()
###################################################################
class NoWDConfig(CoroTest):
'''Assertion: no config == no watchdog
Setup: no config, kmod inserted
1] make sure watchdog is not enabled
'''
def __init__(self, cm):
CoroTest.__init__(self,cm)
self.name="NoWDConfig"
self.need_all_up = False
def config_valid(self, config):
- return not config.has_key('resources')
+ return 'resources' not in config
def __call__(self, node):
'''Perform the 'NoWDConfig' test. '''
self.incr("calls")
self.CM.StopaCM(node)
pats = []
pats.append("%s .*no resources configured." % node)
w = self.create_watch(pats, 60)
w.setwatch()
self.CM.StartaCM(node)
if not w.lookforall():
return self.failure("Patterns not found: " + repr(w.unmatched))
else:
return self.success()
###################################################################
class WDConfigNoWd(CoroTest):
'''Assertion: watchdog config but no watchdog kmod will emit a log
Setup: config watchdog, but no kmod
1] look in the log for warning that there is no kmod
'''
def __init__(self, cm):
CoroTest.__init__(self,cm)
self.name="WDConfigNoWd"
self.need_all_up = False
def __call__(self, node):
'''Perform the 'WDConfigNoWd' test. '''
self.incr("calls")
self.CM.StopaCM(node)
self.CM.rsh(node, 'rmmod softdog')
pats = []
pats.append("%s .*No Watchdog, try modprobe.*" % node)
w = self.create_watch(pats, 60)
w.setwatch()
self.CM.StartaCM(node)
if not w.lookforall():
return self.failure("Patterns not found: " + repr(w.unmatched))
else:
return self.success()
###################################################################
class NoWDOnCorosyncStop(CoroTest):
'''Configure WD then /etc/init.d/corosync stop
must stay up for > 60 secs
'''
def __init__(self, cm):
CoroTest.__init__(self,cm)
self.name="NoWDOnCorosyncStop"
self.need_all_up = False
def __call__(self, node):
'''Perform the test. '''
self.incr("calls")
self.CM.StopaCM(node)
self.CM.rsh(node, 'modprobe softdog')
self.CM.StartaCM(node)
pats = []
pats.append("%s .*Unexpected close, not stopping watchdog.*" % node)
w = self.create_watch(pats, 60)
w.setwatch()
self.CM.StopaCM(node)
if w.lookforall():
return self.failure("Should have closed the WD better: " + repr(w.matched))
else:
return self.success()
###################################################################
class WDOnForkBomb(CoroTest):
'''Configure memory resource
run memory leaker / forkbomb
confirm watchdog action
'''
def __init__(self, cm):
CoroTest.__init__(self,cm)
self.name="WDOnForkBomb"
self.need_all_up = False
self.config['logging/logger_subsys[2]/subsys'] = 'WD'
self.config['logging/logger_subsys[2]/debug'] = 'on'
self.config['resources/system/memory_used/recovery'] = 'watchdog'
self.config['resources/system/memory_used/max'] = '80'
self.config['resources/system/memory_used/poll_period'] = '800'
def __call__(self, node):
'''Perform the test. '''
self.incr("calls")
# get the uptime
up_before = self.CM.rsh(node, 'cut -d. -f1 /proc/uptime', 1).rstrip()
self.CM.StopaCM(node)
self.CM.rsh(node, 'modprobe softdog')
self.CM.StartaCM(node)
self.CM.rsh(node, ':(){ :|:& };:', synchronous=0)
self.CM.log("wait for it to watchdog")
time.sleep(60 * 5)
ping_able = False
while not ping_able:
if self.CM.rsh("localhost", "ping -nq -c10 -w10 %s" % node) == 0:
ping_able = True
self.CM.log("can ping 10 in 10secs.")
else:
self.CM.log("not yet responding to pings.")
self.CM.ShouldBeStatus[node] = "down"
# wait for the node to come back up
self.CM.log("waiting for node to come back up.")
if self.CM.ns.WaitForNodeToComeUp(node):
up_after = self.CM.rsh(node, 'cut -d. -f1 /proc/uptime', 1).rstrip()
if int(up_after) < int(up_before):
return self.success()
else:
return self.failure("node didn't seem to watchdog uptime 1 %s; 2 %s" %(up_before, up_after))
else:
return self.failure("node didn't seem to come back up")
###################################################################
class SamWdIntegration1(CoroTest):
'''start sam hc
kill agent
confirm action
'''
def __init__(self, cm):
CoroTest.__init__(self,cm)
self.name="SamWdIntegration1"
self.need_all_up = True
self.config['logging/logger_subsys[2]/subsys'] = 'WD'
self.config['logging/logger_subsys[2]/debug'] = 'on'
def __call__(self, node):
'''Perform the test. '''
self.incr("calls")
self.CM.sam_agent[node].setup_hc()
pids = self.CM.sam_agent[node].getpid().rstrip().split(" ")
pats = []
for pid in pids:
pats.append('%s .*resource "%s" failed!' % (node, pid))
w = self.create_watch(pats, 60)
w.setwatch()
self.CM.sam_agent[node].kill()
look_result = w.look()
if not look_result:
return self.failure("Patterns not found: " + repr(w.regexes))
else:
return self.success()
###################################################################
class SamWdIntegration2(CoroTest):
'''start sam hc
call sam_stop()
confirm resource "stopped" and no watchdog action.
'''
def __init__(self, cm):
CoroTest.__init__(self,cm)
self.name="SamWdIntegration2"
self.need_all_up = True
self.config['logging/logger_subsys[2]/subsys'] = 'WD'
self.config['logging/logger_subsys[2]/debug'] = 'on'
def __call__(self, node):
'''Perform the test. '''
self.incr("calls")
self.CM.sam_agent[node].setup_hc()
pids = self.CM.sam_agent[node].getpid().rstrip().split(" ")
no_pats = []
yes_pats = []
for pid in pids:
no_pats.append('%s .*resource "%s" failed!' % (node, pid))
yes_pats.append('%s .*Fsm:%s event "config_changed", state "running" --> "stopped"' % (node, pid))
yes_w = self.create_watch(yes_pats, 10)
no_w = self.create_watch(no_pats, 10)
yes_w.setwatch()
no_w.setwatch()
time.sleep(2)
self.CM.sam_agent[node].sam_stop()
yes_matched = yes_w.look()
no_matched = no_w.look()
if no_matched:
return self.failure("Patterns found: " + repr(no_matched))
else:
if not yes_matched:
return self.failure("Patterns NOT found: " + repr(yes_w.regexes))
return self.success()
###################################################################
class WdDeleteResource(CoroTest):
'''config resource & start corosync
check that it is getting checked
delete the object resource object
check that we do NOT get watchdog'ed
'''
def __init__(self, cm):
CoroTest.__init__(self,cm)
self.name="WdDeleteResource"
self.need_all_up = True
self.config['logging/logger_subsys[2]/subsys'] = 'MON'
self.config['logging/logger_subsys[2]/debug'] = 'on'
self.config['logging/logger_subsys[3]/subsys'] = 'WD'
self.config['logging/logger_subsys[3]/debug'] = 'on'
self.config['resources/system/memory_used/recovery'] = 'watchdog'
self.config['resources/system/memory_used/max'] = '80'
self.config['resources/system/memory_used/poll_period'] = '800'
def __call__(self, node):
'''Perform the test. '''
self.incr("calls")
no_pats = []
yes_pats = []
no_pats.append('%s .*resource "memory_used" failed!' % node)
yes_pats.append('%s .*resource "memory_used" deleted from cmap!' % node)
yes_w = self.create_watch(yes_pats, 10)
no_w = self.create_watch(no_pats, 10)
yes_w.setwatch()
no_w.setwatch()
time.sleep(2)
self.CM.rsh(node, 'corosync-cmapctl -D resources.system.memory_used')
yes_matched = yes_w.look()
no_matched = no_w.look()
if no_matched:
return self.failure("Patterns found: " + repr(no_matched))
else:
if not yes_matched:
return self.failure("Patterns NOT found: " + repr(yes_w.regexes))
return self.success()
###################################################################
class ResourcePollAdjust(CoroTest):
'''config resource & start corosync
change the poll_period
check that we do NOT get watchdog'ed
'''
def __init__(self, cm):
CoroTest.__init__(self,cm)
self.name="ResourcePollAdjust"
self.need_all_up = True
self.config['logging/logger_subsys[2]/subsys'] = 'MON'
self.config['logging/logger_subsys[2]/debug'] = 'on'
self.config['logging/logger_subsys[3]/subsys'] = 'WD'
self.config['logging/logger_subsys[3]/debug'] = 'on'
self.config['resources/system/memory_used/recovery'] = 'none'
self.config['resources/system/memory_used/max'] = '80'
self.config['resources/system/memory_used/poll_period'] = '800'
def __call__(self, node):
'''Perform the test. '''
self.incr("calls")
no_pats = []
no_pats.append('%s .*resource "memory_used" failed!' % node)
no_pats.append('%s .*Could NOT use poll_period.*' % node)
no_w = self.create_watch(no_pats, 10)
no_w.setwatch()
changes = 0
while changes < 50:
changes = changes + 1
poll_period = int(random.random() * 5000)
if poll_period < 500:
poll_period = 500
self.CM.log("setting poll_period to: %d" % poll_period)
self.CM.rsh(node, 'corosync-cmapctl -s resources.system.memory_used.poll_period str %d' % poll_period)
sleep_time = poll_period * 2 / 1000
if sleep_time < 1:
sleep_time = 1
time.sleep(sleep_time)
no_matched = no_w.look()
if no_matched:
return self.failure("Patterns found: " + repr(no_matched))
return self.success()
###################################################################
class RebootOnHighMem(CoroTest):
'''Configure memory resource
run memory leaker / forkbomb
confirm reboot action
'''
def __init__(self, cm):
CoroTest.__init__(self,cm)
self.name="RebootOnHighMem"
self.need_all_up = True
self.config['logging/logger_subsys[2]/subsys'] = 'WD'
self.config['logging/logger_subsys[2]/debug'] = 'on'
self.config['resources/system/memory_used/recovery'] = 'reboot'
self.config['resources/system/memory_used/max'] = '80'
self.config['resources/system/memory_used/poll_period'] = '800'
def __call__(self, node):
'''Perform the test. '''
self.incr("calls")
# get the uptime
up_before = self.CM.rsh(node, 'cut -d. -f1 /proc/uptime', 1).rstrip()
cmd = 'corosync-cmapctl resources.system.memory_used. | grep current | cut -d= -f2'
mem_current_str = self.CM.rsh(node, cmd, 1).rstrip()
mem_new_max = int(mem_current_str) + 5
self.CM.log("current mem usage: %s, new max:%d" % (mem_current_str, mem_new_max))
cmd = 'corosync-cmapctl -s resources.system.memory_used.max str ' + str(mem_new_max)
self.CM.rsh(node, cmd)
self.CM.rsh(node, 'memhog -r10000 200m', synchronous=0)
self.CM.log("wait for it to reboot")
time.sleep(60 * 3)
cmd = 'corosync-cmapctl resources.system.memory_used. | grep current | cut -d= -f2'
mem_current_str = self.CM.rsh(node, cmd, 1).rstrip()
self.CM.log("current mem usage: %s" % (mem_current_str))
ping_able = False
while not ping_able:
if self.CM.rsh("localhost", "ping -nq -c10 -w10 %s" % node) == 0:
ping_able = True
self.CM.log("can ping 10 in 10secs.")
else:
self.CM.log("not yet responding to pings.")
self.CM.ShouldBeStatus[node] = "down"
# wait for the node to come back up
self.CM.log("waiting for node to come back up.")
if self.CM.ns.WaitForNodeToComeUp(node):
up_after = self.CM.rsh(node, 'cut -d. -f1 /proc/uptime', 1).rstrip()
if int(up_after) < int(up_before):
return self.success()
else:
return self.failure("node didn't seem to watchdog uptime 1 %s; 2 %s" %(up_before, up_after))
else:
return self.failure("node didn't seem to come back up")
GenTestClasses = []
GenTestClasses.append(GenSimulStart)
GenTestClasses.append(GenSimulStop)
GenTestClasses.append(GenFlipTest)
GenTestClasses.append(GenRestartTest)
GenTestClasses.append(GenStartOnebyOne)
GenTestClasses.append(GenStopOnebyOne)
GenTestClasses.append(GenRestartOnebyOne)
GenTestClasses.append(GenStopAllBeekhof)
GenTestClasses.append(CpgMsgOrderBasic)
GenTestClasses.append(CpgMsgOrderZcb)
GenTestClasses.append(CpgCfgChgOnExecCrash)
GenTestClasses.append(CpgCfgChgOnGroupLeave)
GenTestClasses.append(CpgCfgChgOnNodeLeave)
GenTestClasses.append(CpgCfgChgOnNodeIsolate)
#GenTestClasses.append(CpgCfgChgOnNodeRestart)
AllTestClasses = []
AllTestClasses.append(CpgContextTest)
AllTestClasses.append(SamTest1)
AllTestClasses.append(SamTest2)
AllTestClasses.append(SamTest4)
AllTestClasses.append(SamTest5)
AllTestClasses.append(SamTest6)
AllTestClasses.append(SamTest8)
AllTestClasses.append(SamTest9)
AllTestClasses.append(SamWdIntegration1)
AllTestClasses.append(SamWdIntegration2)
AllTestClasses.append(NoWDConfig)
AllTestClasses.append(WDConfigNoWd)
AllTestClasses.append(NoWDOnCorosyncStop)
#AllTestClasses.append(WDOnForkBomb)
AllTestClasses.append(WdDeleteResource)
#AllTestClasses.append(RebootOnHighMem)
AllTestClasses.append(ResourcePollAdjust)
AllTestClasses.append(MemLeakObject)
AllTestClasses.append(MemLeakSession)
#AllTestClasses.append(CMapDispatchDeadlock)
# quorum tests
AllTestClasses.append(VoteQuorumContextTest)
GenTestClasses.append(VoteQuorumGoDown)
GenTestClasses.append(VoteQuorumGoUp)
GenTestClasses.append(VoteQuorumWaitForAll)
# FIXME need log messages in sync
#GenTestClasses.append(CpgCfgChgOnLowestNodeJoin)
class ConfigContainer(UserDict):
def __init__ (self, name):
self.name = name
UserDict.__init__(self)
def CoroTestList(cm, audits):
result = []
configs = []
for testclass in AllTestClasses:
bound_test = testclass(cm)
if bound_test.is_applicable():
bound_test.Audits = audits
result.append(bound_test)
default = ConfigContainer('default')
default['logging/fileline'] = 'on'
default['logging/function_name'] = 'off'
default['logging/logfile_priority'] = 'info'
default['logging/syslog_priority'] = 'info'
default['logging/syslog_facility'] = 'daemon'
default['uidgid/uid'] = '0'
default['uidgid/gid'] = '0'
configs.append(default)
a = ConfigContainer('none_5min')
a['totem/token'] = (5 * 60 * 1000)
a['totem/consensus'] = int(5 * 60 * 1000 * 1.2) + 1
configs.append(a)
b = ConfigContainer('pcmk_basic')
b['totem/token'] = 5000
b['totem/token_retransmits_before_loss_const'] = 10
b['totem/join'] = 1000
b['totem/consensus'] = 7500
configs.append(b)
c = ConfigContainer('pcmk_sec_nss')
c['totem/secauth'] = 'on'
c['totem/crypto_type'] = 'nss'
c['totem/token'] = 5000
c['totem/token_retransmits_before_loss_const'] = 10
c['totem/join'] = 1000
c['totem/consensus'] = 7500
configs.append(c)
#
# s = ConfigContainer('pcmk_vq')
# s['quorum/provider'] = 'corosync_votequorum'
# s['quorum/expected_votes'] = len(cm.Env["nodes"])
# s['totem/token'] = 5000
# s['totem/token_retransmits_before_loss_const'] = 10
# s['totem/join'] = 1000
# s['totem/vsftype'] = 'none'
# s['totem/consensus'] = 7500
# s['totem/max_messages'] = 20
# configs.append(s)
#
d = ConfigContainer('sec_nss')
d['totem/secauth'] = 'on'
d['totem/crypto_type'] = 'nss'
configs.append(d)
if not cm.Env["RrpBindAddr"] is None:
g = ConfigContainer('rrp_passive')
g['totem/rrp_mode'] = 'passive'
g['totem/interface[2]/ringnumber'] = '1'
g['totem/interface[2]/bindnetaddr'] = cm.Env["RrpBindAddr"]
g['totem/interface[2]/mcastaddr'] = '226.94.1.2'
g['totem/interface[2]/mcastport'] = '5405'
configs.append(g)
h = ConfigContainer('rrp_active')
h['totem/rrp_mode'] = 'active'
h['totem/interface[2]/ringnumber'] = '1'
h['totem/interface[2]/bindnetaddr'] = cm.Env["RrpBindAddr"]
h['totem/interface[2]/mcastaddr'] = '226.94.1.2'
h['totem/interface[2]/mcastport'] = '5405'
configs.append(h)
else:
- print 'Not including rrp tests. Use --rrp-binaddr to enable them.'
+ print('Not including rrp tests. Use --rrp-binaddr to enable them.')
num=1
for cfg in configs:
for testclass in GenTestClasses:
bound_test = testclass(cm)
if bound_test.is_applicable() and bound_test.config_valid(cfg):
bound_test.Audits = audits
- for c in cfg.keys():
+ for c in list(cfg.keys()):
bound_test.config[c] = cfg[c]
bound_test.name = bound_test.name + '_' + cfg.name
result.append(bound_test)
num = num + 1
return result

File Metadata

Mime Type
text/x-diff
Expires
Wed, Jun 25, 3:02 AM (15 h, 20 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1951940
Default Alt Text
(94 KB)

Event Timeline