No OneTemporary
Actions

Size

94 KB

Referenced Files

None

Subscribers

None

View Options

	diff --git a/cts/CTSvars.py.in b/cts/CTSvars.py.in
	index af2fcc41..cb59119f 100644
	--- a/cts/CTSvars.py.in
	+++ b/cts/CTSvars.py.in
	@@ -1,3 +1,3 @@
	-class CTSvars:
	+class CTSvars(object):
	CTS_home="@prefix@/share/pacemaker/tests/cts"
	INITDIR="/etc/init.d"
	diff --git a/cts/corolab.py b/cts/corolab.py
	index 8f1ebb5e..98564419 100755
	--- a/cts/corolab.py
	+++ b/cts/corolab.py
	@@ -1,335 +1,337 @@
	#!/usr/bin/python

	'''CTS: Cluster Testing System: Lab environment module
	'''

	+from __future__ import print_function
	+
	__copyright__='''
	Copyright (c) 2010 Red Hat, Inc.

	'''

	# All rights reserved.
	#
	# Author: Angus Salkeld <asalkeld@redhat.com>
	#
	# This software licensed under BSD license, the text of which follows:
	#
	# Redistribution and use in source and binary forms, with or without
	# modification, are permitted provided that the following conditions are met:
	#
	# - Redistributions of source code must retain the above copyright notice,
	# this list of conditions and the following disclaimer.
	# - Redistributions in binary form must reproduce the above copyright notice,
	# this list of conditions and the following disclaimer in the documentation
	# and/or other materials provided with the distribution.
	# - Neither the name of the MontaVista Software, Inc. nor the names of its
	# contributors may be used to endorse or promote products derived from this
	# software without specific prior written permission.
	#
	# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
	# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
	# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
	# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
	# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
	# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
	# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
	# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
	# THE POSSIBILITY OF SUCH DAMAGE.

	import sys
	from cts.CTSscenarios import *
	from cts.logging import *
	from corotests import CoroTestList
	from corosync import *


	sys.path.append("/usr/share/pacemaker/tests/cts") # So that things work from the source directory

	try:
	from CTSlab import *

	except ImportError:
	sys.stderr.write("abort: couldn't find CTSLab in [%s]\n" %
	' '.join(sys.path))
	sys.stderr.write("(check your install and PYTHONPATH)\n")
	sys.exit(-1)

	tests = None
	cm = None
	old_handler = None
	DefaultFacility = "daemon"


	def usage(arg):
	- print "Illegal argument " + arg
	- print "usage: " + sys.argv[0] +" [options] number-of-iterations"
	- print "\nCommon options: "
	- print "\t [--at-boot (1\|0)], does the cluster software start at boot time"
	- print "\t [--nodes 'node list'], list of cluster nodes separated by whitespace"
	- print "\t [--limit-nodes max], only use the first 'max' cluster nodes supplied with --nodes"
	- print "\t [--logfile path], where should the test software look for logs from cluster nodes"
	- print "\t [--rrp-bindaddr addr], extra interface used for rrp, provide the bindaddr"
	- print "\t [--outputfile path], optional location for the test software to write logs to"
	- print "\t [--syslog-facility name], which syslog facility should the test software log to"
	- print "\t [--choose testcase-name], run only the named test"
	- print "\t [--list-tests], list the valid tests"
	- print "\t [--benchmark], add the timing information"
	- print "\t "
	- print "Additional (less common) options: "
	- print "\t [--trunc (truncate logfile before starting)]"
	- print "\t [--xmit-loss lost-rate(0.0-1.0)]"
	- print "\t [--recv-loss lost-rate(0.0-1.0)]"
	- print "\t [--standby (1 \| 0 \| yes \| no)]"
	- print "\t [--fencing (1 \| 0 \| yes \| no)]"
	- print "\t [--once], run all valid tests once"
	- print "\t [--no-loop-tests], dont run looping/time-based tests"
	- print "\t [--no-unsafe-tests], dont run tests that are unsafe for use with ocfs2/drbd"
	- print "\t [--valgrind-tests], include tests using valgrind"
	- print "\t [--experimental-tests], include experimental tests"
	- print "\t [--oprofile 'node list'], list of cluster nodes to run oprofile on]"
	- print "\t [--qarsh] Use the QARSH backdoor to access nodes instead of SSH"
	- print "\t [--seed random_seed]"
	- print "\t [--set option=value]"
	+ print("Illegal argument " + arg)
	+ print("usage: " + sys.argv[0] +" [options] number-of-iterations")
	+ print("\nCommon options: ")
	+ print("\t [--at-boot (1\|0)], does the cluster software start at boot time")
	+ print("\t [--nodes 'node list'], list of cluster nodes separated by whitespace")
	+ print("\t [--limit-nodes max], only use the first 'max' cluster nodes supplied with --nodes")
	+ print("\t [--logfile path], where should the test software look for logs from cluster nodes")
	+ print("\t [--rrp-bindaddr addr], extra interface used for rrp, provide the bindaddr")
	+ print("\t [--outputfile path], optional location for the test software to write logs to")
	+ print("\t [--syslog-facility name], which syslog facility should the test software log to")
	+ print("\t [--choose testcase-name], run only the named test")
	+ print("\t [--list-tests], list the valid tests")
	+ print("\t [--benchmark], add the timing information")
	+ print("\t ")
	+ print("Additional (less common) options: ")
	+ print("\t [--trunc (truncate logfile before starting)]")
	+ print("\t [--xmit-loss lost-rate(0.0-1.0)]")
	+ print("\t [--recv-loss lost-rate(0.0-1.0)]")
	+ print("\t [--standby (1 \| 0 \| yes \| no)]")
	+ print("\t [--fencing (1 \| 0 \| yes \| no)]")
	+ print("\t [--once], run all valid tests once")
	+ print("\t [--no-loop-tests], dont run looping/time-based tests")
	+ print("\t [--no-unsafe-tests], dont run tests that are unsafe for use with ocfs2/drbd")
	+ print("\t [--valgrind-tests], include tests using valgrind")
	+ print("\t [--experimental-tests], include experimental tests")
	+ print("\t [--oprofile 'node list'], list of cluster nodes to run oprofile on]")
	+ print("\t [--qarsh] Use the QARSH backdoor to access nodes instead of SSH")
	+ print("\t [--seed random_seed]")
	+ print("\t [--set option=value]")
	sys.exit(1)

	class CoroLabEnvironment(CtsLab):

	def __init__(self):
	CtsLab.__init__(self)

	# Get a random seed for the random number generator.
	self["DoStonith"] = 0
	self["DoStandby"] = 0
	self["DoFencing"] = 0
	self["XmitLoss"] = "0.0"
	self["RecvLoss"] = "0.0"
	self["IPBase"] = "127.0.0.10"
	self["ClobberCIB"] = 0
	self["CIBfilename"] = None
	self["CIBResource"] = 0
	self["DoBSC"] = 0
	self["use_logd"] = 0
	self["oprofile"] = []
	self["RrpBindAddr"] = None
	self["warn-inactive"] = 0
	self["ListTests"] = 0
	self["benchmark"] = 0
	self["logrestartcmd"] = "systemctl restart rsyslog.service 2>&1 > /dev/null"
	self["syslogd"] ="rsyslog"
	self["Schema"] = "corosync 2.0"
	self["Stack"] = "corosync"
	self['CMclass'] = corosync_needle
	self["stonith-type"] = "external/ssh"
	self["stonith-params"] = "hostlist=all,livedangerously=yes"
	self["at-boot"] = 0 # Does the cluster software start automatically when the node boot
	self["logger"] = ([StdErrLog(self, 'corosync.log')])
	self["loop-minutes"] = 60
	self["valgrind-prefix"] = None
	self["valgrind-procs"] = "corosync"
	self["valgrind-opts"] = """--leak-check=full --show-reachable=yes --trace-children=no --num-callers=25 --gen-suppressions=all --suppressions="""+CTSvars.CTS_home+"""/cts.supp"""

	self["experimental-tests"] = 0
	self["valgrind-tests"] = 0
	self["unsafe-tests"] = 0
	self["loop-tests"] = 0
	self["all-once"] = 0
	self["LogWatcher"] = "remote"
	self["SyslogFacility"] = DefaultFacility
	self["stats"] = 0
	self.log = LogFactory().log

	#
	# Main entry into the test system.
	#
	if __name__ == '__main__':
	Environment = CoroLabEnvironment()

	NumIter = 0
	Version = 1
	LimitNodes = 0
	TestCase = None
	TruncateLog = 0
	ListTests = 0
	HaveSeed = 0
	node_list = ''

	#
	# The values of the rest of the parameters are now properly derived from
	# the configuration files.
	#
	# Set the signal handler
	signal.signal(15, sig_handler)
	signal.signal(10, sig_handler)

	# Process arguments...

	skipthis=None
	args=sys.argv[1:]
	for i in range(0, len(args)):
	if skipthis:
	skipthis=None
	continue

	elif args[i] == "-l" or args[i] == "--limit-nodes":
	skipthis=1
	LimitNodes = int(args[i+1])

	elif args[i] == "-L" or args[i] == "--logfile":
	skipthis=1
	Environment["LogFileName"] = args[i+1]

	elif args[i] == "--outputfile":
	skipthis=1
	Environment["OutputFile"] = args[i+1]

	elif args[i] == "--rrp-bindaddr":
	skipthis=1
	Environment["RrpBindAddr"] = args[i+1]

	elif args[i] == "--oprofile":
	skipthis=1
	Environment["oprofile"] = args[i+1].split(' ')

	elif args[i] == "--trunc":
	Environment["TruncateLog"]=1

	elif args[i] == "--list-tests":
	Environment["ListTests"]=1

	elif args[i] == "--benchmark":
	Environment["benchmark"]=1

	elif args[i] == "--qarsh":
	Environment.rsh.enable_qarsh()

	elif args[i] == "--fencing":
	skipthis=1
	if args[i+1] == "1" or args[i+1] == "yes":
	Environment["DoFencing"] = 1
	elif args[i+1] == "0" or args[i+1] == "no":
	Environment["DoFencing"] = 0
	else:
	usage(args[i+1])

	elif args[i] == "--xmit-loss":
	try:
	float(args[i+1])
	except ValueError:
	print ("--xmit-loss parameter should be float")
	usage(args[i+1])
	skipthis=1
	Environment["XmitLoss"] = args[i+1]

	elif args[i] == "--recv-loss":
	try:
	float(args[i+1])
	except ValueError:
	print ("--recv-loss parameter should be float")
	usage(args[i+1])
	skipthis=1
	Environment["RecvLoss"] = args[i+1]

	elif args[i] == "--choose":
	skipthis=1
	TestCase = args[i+1]

	elif args[i] == "--nodes":
	skipthis=1
	node_list = args[i+1].split(' ')

	elif args[i] == "--at-boot" or args[i] == "--cluster-starts-at-boot":
	skipthis=1
	if args[i+1] == "1" or args[i+1] == "yes":
	Environment["at-boot"] = 1
	elif args[i+1] == "0" or args[i+1] == "no":
	Environment["at-boot"] = 0
	else:
	usage(args[i+1])

	elif args[i] == "--set":
	skipthis=1
	(name, value) = args[i+1].split('=')
	Environment[name] = value

	elif args[i] == "--once":
	skipthis=1
	Environment["all-once"]=1

	elif args[i] == "--stack":
	skipthis=1
	Environment["Stack"] = args[i+1]

	else:
	try:
	NumIter=int(args[i])
	except ValueError:
	usage(args[i])

	if Environment["OutputFile"]:
	Environment["logger"].append(FileLog(Environment, Environment["OutputFile"]))

	if len(node_list) < 1:
	- print "No nodes specified!"
	+ print("No nodes specified!")
	sys.exit(1)

	if LimitNodes > 0:
	if len(node_list) > LimitNodes:
	print("Limiting the number of nodes configured=%d (max=%d)"
	%(len(node_list), LimitNodes))
	while len(node_list) > LimitNodes:
	node_list.pop(len(node_list)-1)

	Environment["nodes"] = node_list

	# Create the Cluster Manager object
	cm = Environment['CMclass'](Environment)
	Audits = CoroAuditList(cm)

	if Environment["ListTests"] == 1 :
	Tests = CoroTestList(cm, Audits)
	Environment.log("Total %d tests"%len(Tests))
	for test in Tests :
	Environment.log(str(test.name));
	sys.exit(0)

	if TruncateLog:
	Environment.log("Truncating %s" % LogFile)
	lf = open(LogFile, "w");
	if lf != None:
	lf.truncate(0)
	lf.close()

	if TestCase != None:
	for test in CoroTestList(cm, Audits):
	if test.name == TestCase:
	Tests.append(test)
	if Tests == []:
	usage("--choose: No applicable/valid tests chosen")

	else:
	Tests = CoroTestList(cm, Audits)

	# Scenario selection
	if Environment["DoBSC"]:
	scenario = RandomTests(cm, [ BasicSanityCheck(Environment) ], Audits, Tests)

	elif Environment["all-once"] or NumIter == 0:
	NumIter = len(Tests)
	scenario = AllOnce(
	cm, [ BootCluster(Environment), TestAgentComponent(Environment), PacketLoss(Environment) ], Audits, Tests)
	else:
	scenario = RandomTests(
	cm, [ BootCluster(Environment), TestAgentComponent(Environment), PacketLoss(Environment) ], Audits, Tests)

	Environment.log(">>>>>>>>>>>>>>>> BEGINNING " + repr(NumIter) + " TESTS ")
	Environment.log("Stack: %s" % Environment["Stack"])
	Environment.log("Schema: %s" % Environment["Schema"])
	Environment.log("Scenario: %s" % scenario.__doc__)
	Environment.log("Random Seed: %s" % Environment["RandSeed"])
	Environment.log("System log files: %s" % Environment["LogFileName"])

	Environment.dump()
	rc = Environment.run(scenario, NumIter)
	sys.exit(rc)
	diff --git a/cts/corosync.py b/cts/corosync.py
	index 21898511..4c077417 100644
	--- a/cts/corosync.py
	+++ b/cts/corosync.py
	@@ -1,673 +1,679 @@
	'''CTS: Cluster Testing System: corosync...
	'''

	__copyright__='''
	Copyright (c) 2010 Red Hat, Inc.
	'''

	# All rights reserved.
	#
	# Author: Angus Salkeld <asalkeld@redhat.com>
	#
	# This software licensed under BSD license, the text of which follows:
	#
	# Redistribution and use in source and binary forms, with or without
	# modification, are permitted provided that the following conditions are met:
	#
	# - Redistributions of source code must retain the above copyright notice,
	# this list of conditions and the following disclaimer.
	# - Redistributions in binary form must reproduce the above copyright notice,
	# this list of conditions and the following disclaimer in the documentation
	# and/or other materials provided with the distribution.
	# - Neither the name of the MontaVista Software, Inc. nor the names of its
	# contributors may be used to endorse or promote products derived from this
	# software without specific prior written permission.
	#
	# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
	# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
	# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
	# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
	# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
	# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
	# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
	# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
	# THE POSSIBILITY OF SUCH DAMAGE.

	import os
	import sys
	import time
	import socket
	import shutil
	import string

	import augeas
	from cts.CTS import ClusterManager
	from cts.CTSscenarios import ScenarioComponent
	from cts.remote import RemoteExec
	from cts.remote import RemoteFactory
	from cts.logging import *
	from cts.CTSvars import CTSvars
	from cts.CTSaudits import ClusterAudit
	from cts.CTSaudits import LogAudit


	###################################################################
	class CoroConfig(object):
	def __init__(self, corobase=None):
	self.base = "/files/etc/corosync/corosync.conf/"
	self.new_root = "/tmp/aug-root/"
	if corobase == None:
	self.corobase = os.getcwd() + "/.."
	else:
	self.corobase = corobase
	example = self.corobase + "/conf/corosync.conf.example"

	if os.path.isdir(self.new_root):
	shutil.rmtree (self.new_root)
	os.makedirs (self.new_root + "/etc/corosync")
	shutil.copy (example, self.new_root + "/etc/corosync/corosync.conf")

	self.aug = augeas.Augeas (root=self.new_root,
	loadpath=self.corobase + "/conf/lenses")

	self.original = {}
	# store the original values (of totem), so we can restore them in
	# apply_default_config()
	totem = self.aug.match('/files/etc/corosync/corosync.conf/totem/*')
	for c in totem:
	# /files/etc/corosync/corosync.conf/
	short_name = c[len(self.base):]
	self.original[short_name] = self.aug.get(c)
	interface = self.aug.match('/files/etc/corosync/corosync.conf/totem/interface/*')
	for c in interface:
	short_name = c[len(self.base):]
	self.original[short_name] = self.aug.get(c)

	def get (self, name):
	return self.aug.get (self.base + name)

	def set (self, name, value):
	token = self.aug.set (self.base + name, str(value))

	def save (self):
	self.aug.save()

	def get_filename(self):
	return self.new_root + "/etc/corosync/corosync.conf"

	###################################################################
	class corosync_needle(ClusterManager):
	'''
	bla
	'''
	def __init__(self, Environment, randseed=None):
	ClusterManager.__init__(self, Environment, randseed)

	self.update({
	"Name" : "corosync(needle)",
	"StartCmd" : "service corosync start",
	"StopCmd" : "service corosync stop",
	"RereadCmd" : "service corosync reload",
	"StatusCmd" : "service corosync status",
	"DeadTime" : 30,
	"StartTime" : 15, # Max time to start up
	"StableTime" : 10,
	"BreakCommCmd" : "/usr/share/corosync/tests/net_breaker.sh BreakCommCmd %s",
	"FixCommCmd" : "/usr/share/corosync/tests/net_breaker.sh FixCommCmd %s",
	"QuorumCmd" : "corosync-quorumtool -s",

	"Pat:We_stopped" : "%s.Corosync Cluster Engine exiting.",
	"Pat:They_stopped" : "%s.Member left:.%s.*",
	"Pat:They_dead" : "corosync:.*Node %s is now: lost",
	"Pat:Local_starting" : "%s.*Initializing transport",
	"Pat:Local_started" : "%s.*Initializing transport",
	"Pat:Master_started" : "%s.*Completed service synchronization, ready to provide service.",
	"Pat:Slave_started" : "%s.*Completed service synchronization, ready to provide service.",
	"Pat:ChildKilled" : "%s corosync.*Child process %s terminated with signal 9",
	"Pat:ChildRespawn" : "%s corosync.*Respawning failed child process: %s",
	"Pat:ChildExit" : "Child process .* exited",
	"Pat:DC_IDLE" : ".A new membership.was formed.",
	# Bad news Regexes. Should never occur.
	"BadRegexes" : (
	r"ERROR:",
	r"CRIT:",
	r"Shutting down\.",
	r"Forcing shutdown\.",
	r"core dump",
	r"Could not bind AF_UNIX",
	r"Too many open files",
	r"Address already in use",
	),
	"LogFileName" : Environment["LogFileName"],
	})
	self.start_cpg = True
	self.cpg_agent = {}
	self.sam_agent = {}
	self.votequorum_agent = {}
	self.config = CoroConfig ()
	self.node_to_ip = {}

	self.new_config = {}
	self.applied_config = {}
	for n in self.Env["nodes"]:
	ip = socket.gethostbyname(n)
	ips = ip.split('.')
	ips[3] = '0'
	ip_mask = '.'.join(ips)
	self.new_config['totem/interface/bindnetaddr'] = str(ip_mask)
	return

	def apply_default_config(self):

	for c in self.applied_config:
	if 'bindnetaddr' in c:
	continue
	- elif not self.config.original.has_key(c):
	+ elif c not in self.config.original:
	# new config option (non default)
	pass
	elif self.applied_config[c] is not self.config.original[c]:
	# reset to the original
	self.new_config[c] = self.config.original[c]

	if len(self.new_config) > 0:
	self.debug('applying default config')
	self.stopall()

	def apply_new_config(self, need_all_up=True):

	if len(self.new_config) > 0:
	self.debug('applying new config')
	self.stopall()
	if need_all_up:
	self.startall()

	def install_all_config(self):
	tmp1 = {}
	sorted_keys = sorted(self.new_config.keys())
	for c in sorted_keys:
	self.log('configuring: ' + c + ' = '+ str(self.new_config[c]))
	self.config.set (c, self.new_config[c])
	self.applied_config[c] = self.new_config[c]
	tmp1[c] = self.new_config[c]

	for c in tmp1:
	del self.new_config[c]

	self.config.save()
	src_file = self.config.get_filename()
	for node in self.Env["nodes"]:
	self.rsh.cp(src_file, "%s:%s" % (node, "/etc/corosync/"))

	def install_config(self, node):
	# install gets new_config and installs it, then moves the
	# config to applied_config
	if len(self.new_config) > 0:
	self.install_all_config()

	def key_for_node(self, node):
	- if not self.node_to_ip.has_key(node):
	+ if node not in self.node_to_ip:
	self.node_to_ip[node] = socket.gethostbyname (node)
	return self.node_to_ip[node]

	def StartaCM(self, node, verbose=False):

	- if not self.ShouldBeStatus.has_key(node):
	+ if node not in self.ShouldBeStatus:
	self.ShouldBeStatus[node] = "down"

	if self.ShouldBeStatus[node] != "down":
	return 1

	self.debug('starting corosync on : ' + node)
	ret = ClusterManager.StartaCM(self, node)
	if self.start_cpg:
	- if self.cpg_agent.has_key(node):
	+ if node in self.cpg_agent:
	self.cpg_agent[node].restart()
	else:
	self.cpg_agent[node] = CpgTestAgent(node, self.Env)
	self.cpg_agent[node].start()

	- if self.sam_agent.has_key(node):
	+ if node in self.sam_agent:
	self.sam_agent[node].restart()

	# votequorum agent started as needed.
	- if self.applied_config.has_key('quorum/provider'):
	+ if 'quorum/provider' in self.applied_config:
	if self.applied_config['quorum/provider'] is 'corosync_votequorum':
	- if self.votequorum_agent.has_key(node):
	+ if node in self.votequorum_agent:
	self.votequorum_agent[node].restart()
	else:
	self.votequorum_agent[node] = VoteQuorumTestAgent(node, self.Env)
	self.votequorum_agent[node].start()

	return ret

	def StopaCM(self, node, verbose=False):
	if self.ShouldBeStatus[node] != "up":
	return 1

	self.debug('stoping corosync on : ' + node)
	- if self.cpg_agent.has_key(node):
	+ if node in self.cpg_agent:
	self.cpg_agent[node].stop()
	- if self.sam_agent.has_key(node):
	+ if node in self.sam_agent:
	self.sam_agent[node].stop()
	- if self.votequorum_agent.has_key(node):
	+ if node in self.votequorum_agent:
	self.votequorum_agent[node].stop()
	return ClusterManager.StopaCM(self, node)

	def test_node_CM(self, node):
	# 2 - up and stable
	# 1 - unstable
	# 0 - down
	(rc, lines) = self.rsh(node, self["StatusCmd"], stdout=2)
	out = str(lines)

	if 'systemd' in out:
	if 'running' in out:
	ret = 2
	else:
	ret = 0
	else:
	is_stopped = string.find(out, 'stopped')
	is_dead = string.find(out, 'dead')
	ret = (is_dead is -1 and is_stopped is -1)

	try:
	if ret:
	ret = 2
	if self.ShouldBeStatus[node] == "down":
	self.log(
	"Node status for %s is %s but we think it should be %s"
	% (node, "up", self.ShouldBeStatus[node]))
	else:
	if self.ShouldBeStatus[node] == "up":
	self.log(
	"Node status for %s is %s but we think it should be %s"
	% (node, "down", self.ShouldBeStatus[node]))
	except KeyError: pass

	if ret: self.ShouldBeStatus[node] = "up"
	else: self.ShouldBeStatus[node] = "down"
	return ret

	def StataCM(self, node):

	'''Report the status of corosync on a given node'''
	if self.test_node_CM(node) > 0:
	return 1
	else:
	return None

	def RereadCM(self, node):
	self.log('reloading corosync on : ' + node)
	return ClusterManager.RereadCM(self, node)

	def find_partitions(self):
	ccm_partitions = []
	return ccm_partitions

	def prepare(self):
	'''Finish the Initialization process. Prepare to test...'''

	self.partitions_expected = 1
	for node in self.Env["nodes"]:
	self.ShouldBeStatus[node] = ""
	self.unisolate_node(node)
	self.StataCM(node)

	def HasQuorum(self, node_list):
	# If we are auditing a partition, then one side will
	# have quorum and the other not.
	# So the caller needs to tell us which we are checking
	# If no value for node_list is specified... assume all nodes
	if not node_list:
	node_list = self.Env["nodes"]

	for node in node_list:
	if self.ShouldBeStatus[node] == "up":
	(quorum, qout) = self.rsh(node, self["QuorumCmd"], stdout=2)
	if quorum == 1:
	return 1
	elif quorum == 0:
	return 0
	else:
	self.log("WARN: Unexpected quorum test result from %s : %d" % (node, quorum))

	return 0

	def Components(self):
	return None

	class ShmLeakAudit(ClusterAudit):

	def __init__(self, cm):
	self.CM = cm

	def name(self):
	return "ShmLeakAudit"

	def is_applicable(self):
	return 1

	def __call__(self):
	rc = 1

	for node in self.CM.Env["nodes"]:
	(res, lines) = self.CM.rsh(node, "/usr/share/corosync/tests/shm_leak_audit.sh", None)
	for line in lines:
	self.CM.log("%s leaked %s" % (node, line))
	rc = 0

	return rc


	###################################################################
	class TestAgentComponent(ScenarioComponent):
	def __init__(self, Env):
	self.Env = Env

	def IsApplicable(self):
	'''Return TRUE if the current ScenarioComponent is applicable
	in the given LabEnvironment given to the constructor.
	'''
	return True

	def SetUp(self, CM):
	'''Set up the given ScenarioComponent'''
	self.CM = CM

	for node in self.Env["nodes"]:
	if not CM.StataCM(node):
	raise RuntimeError ("corosync not up")

	if self.CM.start_cpg:
	- if self.CM.cpg_agent.has_key(node):
	+ if node in self.CM.cpg_agent:
	self.CM.cpg_agent[node].clean_start()
	else:
	self.CM.cpg_agent[node] = CpgTestAgent(node, CM.Env)
	self.CM.cpg_agent[node].start()
	- if self.CM.sam_agent.has_key(node):
	+ if node in self.CM.sam_agent:
	self.CM.sam_agent[node].clean_start()
	else:
	self.CM.sam_agent[node] = SamTestAgent(node, CM.Env)
	self.CM.sam_agent[node].start()
	# votequorum agent started as needed.
	- if self.CM.applied_config.has_key('quorum/provider'):
	+ if 'quorum/provider' in self.CM.applied_config:
	if CM.applied_config['quorum/provider'] is 'corosync_votequorum':
	self.CM.votequorum_agent[node] = VoteQuorumTestAgent(node, CM.Env)
	self.CM.votequorum_agent[node].start()
	return 1

	def TearDown(self, CM):
	'''Tear down (undo) the given ScenarioComponent'''
	self.CM = CM
	for node in self.Env["nodes"]:
	- if self.CM.cpg_agent.has_key(node):
	+ if node in self.CM.cpg_agent:
	self.CM.cpg_agent[node].stop()
	self.CM.sam_agent[node].stop()
	- if self.CM.votequorum_agent.has_key(node):
	+ if node in self.CM.votequorum_agent:
	self.CM.votequorum_agent[node].stop()

	###################################################################
	class TestAgent(object):

	def __init__(self, binary, node, port, Env=None):
	self.node = node
	self.node_address = None
	self.port = port
	self.sock = None
	self.binary = binary
	self.started = False
	resh = RemoteFactory.rsh
	self.rsh = RemoteExec(resh)
	- self.func_name = None
	+ self.__name__ = None
	self.used = False
	self.env = Env
	self.send_recv = False

	def restart(self):
	LogFactory().debug('%s:%s restarting' % (self.node, self.binary))
	self.stop()
	self.start()

	def clean_start(self):
	if self.used or not self.status():
	LogFactory().debug('%s:%s cleaning' % (self.node, self.binary))
	self.stop()
	self.start()

	def status(self):
	if not self.started:
	return False

	try:
	self.send_internal(["are_you_ok_dude"])
	self.read()
	self.started = True
	return True
	- except RuntimeError, msg:
	+ except RuntimeError as msg:
	self.started = False
	return False

	def start(self):
	'''Set up the given ScenarioComponent'''
	if self.status():
	return
	LogFactory().debug('%s:%s starting ' % (self.node, self.binary))

	self.rsh(self.node, self.binary, synchronous=False)
	self.sock = socket.socket (socket.AF_INET, socket.SOCK_STREAM)
	ip = socket.gethostbyname(self.node)
	is_connected = False
	retries = 0
	while not is_connected:
	try:
	retries = retries + 1
	self.sock.connect ((ip, self.port))
	is_connected = True
	- except socket.error, msg:
	+ except socket.error as msg:
	if retries > 10:
	LogFactory().log("%s:%s Tried connecting %d times. %s" % (self.node, self.binary, retries, str(msg)))
	if retries > 30:
	raise RuntimeError("%s:%s can't connect" % (self.node, self.binary))
	time.sleep(1)
	self.started = True
	self.used = False

	def stop(self):
	'''Tear down (undo) the given ScenarioComponent'''
	LogFactory().debug('%s:%s stopping' % (self.binary, self.node))
	self.rsh(self.node, "killall " + self.binary + " 2>/dev/null")
	if self.sock:
	self.sock.close ()
	del self.sock
	self.sock = None
	while self.getpid() != '':
	time.sleep(1)
	self.started = False

	def kill(self):
	'''Tear down (undo) the given ScenarioComponent'''
	LogFactory().log('%s:%s killing' % (self.node, self.binary))
	self.rsh(self.node, "killall -9 " + self.binary + " 2>/dev/null")
	self.started = False

	def getpid(self):
	return self.rsh(self.node, 'pidof ' + self.binary, 1)

	def send_internal(self, args):

	real_msg = str (len (args))
	for a in args:
	a_str = str(a)
	real_msg += ":" + str (len (a_str)) + ":" + a_str
	real_msg += ";"
	+ if sys.version_info > (3,):
	+ real_msg = real_msg.encode("utf8")
	try:
	return self.sock.send (real_msg)
	- except socket.error, msg:
	+ except socket.error as msg:
	LogFactory().log("send(%s): %s; error: %s" % (self.node, real_msg, msg))
	return 0

	def send (self, args):
	if not self.started:
	self.start()

	sent = self.send_internal(args)

	if sent == 0:
	raise RuntimeError ("socket connection broken")
	self.used = True

	def __getattribute__(self,name):

	try:
	return object.__getattribute__(self, name)
	except:
	- self.func_name = name
	+ self.__name__ = name
	if self.send_recv:
	return self.send_recv_dynamic
	else:
	return self.send_dynamic

	def send_recv_dynamic (self, *args):
	self.send_dynamic (args)

	try:
	res = self.read ()
	- except RuntimeError, msg:
	+ except RuntimeError as msg:
	res = None
	- LogFactory().log("send_recv_dynamic: %s(); error: %s" % (self.func_name, msg))
	+ LogFactory().log("send_recv_dynamic: %s(); error: %s" % (self.__name__, msg))

	return res


	def send_dynamic (self, *args):
	if not self.started:
	raise RuntimeError ("agent not started")

	# number of args+func
	- real_msg = str (len (args) + 1) + ":" + str(len(self.func_name)) + ":" + self.func_name
	+ real_msg = str (len (args) + 1) + ":" + str(len(self.__name__)) + ":" + self.__name__
	for a in args:
	a_str = str(a)
	real_msg += ":" + str (len (a_str)) + ":" + a_str
	real_msg += ";"
	sent = 0
	+ if sys.version_info > (3,):
	+ real_msg = bytes(real_msg, encoding = "utf8")
	try:
	sent = self.sock.send (real_msg)
	- except socket.error, msg:
	+ except socket.error as msg:
	LogFactory().log("send_dynamic(%s): %s; error: %s" % (self.node, real_msg, msg))

	if sent == 0:
	raise RuntimeError ("socket connection broken")
	self.used = True

	def read (self):

	try:
	msg = self.sock.recv (4096)
	- except socket.error, msg:
	+ except socket.error as msg:
	raise RuntimeError(msg)
	+ if sys.version_info > (3,):
	+ msg = msg.decode("utf8")

	if msg == '':
	raise RuntimeError("socket connection broken")
	return msg


	-class CpgConfigEvent:
	+class CpgConfigEvent(object):
	def __init__(self, msg):
	info = msg.split(',')
	self.group_name = info[0]
	self.node_id = info[1]
	self.node = None
	self.pid = info[2]
	if "left" in info[3]:
	self.is_member = False
	else:
	self.is_member = True

	def __str__ (self):

	str = self.group_name + "," + self.node_id + "," + self.pid + ","
	if self.is_member:
	return str + "joined"
	else:
	return str + "left"

	###################################################################
	class CpgTestAgent(TestAgent):

	def __init__(self, node, Env=None):
	TestAgent.__init__(self, "cpg_test_agent", node, 9034, Env)
	self.nodeid = None

	def start(self):
	if not self.status():
	TestAgent.start(self)
	self.cpg_initialize()
	self.used = False

	def cpg_local_get(self):
	if self.nodeid == None:
	self.send (["cpg_local_get"])
	self.nodeid = self.read ()
	return self.nodeid

	def record_config_events(self, truncate=True):
	if truncate:
	self.send (["record_config_events", "truncate"])
	else:
	self.send (["record_config_events", "append"])
	return self.read ()

	def read_config_event(self):
	self.send (["read_config_event"])
	msg = self.read ()

	if "None" in msg:
	return None
	else:
	return CpgConfigEvent(msg)

	def read_messages(self, atmost):
	self.send (["read_messages", atmost])
	msg = self.read ()

	if "None" in msg:
	return None
	else:
	return msg

	def context_test(self):
	self.send (["context_test"])
	return self.read ()

	###################################################################
	class SamTestAgent(TestAgent):

	def __init__(self, node, Env=None):
	TestAgent.__init__(self, "sam_test_agent", node, 9036, Env)
	self.nodeid = None
	self.send_recv = True

	###################################################################
	class VoteQuorumTestAgent(TestAgent):

	def __init__(self, node, Env=None):
	TestAgent.__init__(self, "votequorum_test_agent", node, 9037, Env)
	self.nodeid = None
	self.send_recv = True

	AllAuditClasses = []
	AllAuditClasses.append(LogAudit)
	AllAuditClasses.append(ShmLeakAudit)

	def CoroAuditList(cm):
	result = []
	for auditclass in AllAuditClasses:
	a = auditclass(cm)
	if a.is_applicable():
	result.append(a)
	return result



	diff --git a/cts/corotests.py b/cts/corotests.py
	index 5778100a..6a983fe4 100644
	--- a/cts/corotests.py
	+++ b/cts/corotests.py
	@@ -1,1615 +1,1624 @@
	+from __future__ import division
	+from __future__ import print_function
	+
	__copyright__='''
	Copyright (c) 2010 Red Hat, Inc.
	'''

	# All rights reserved.
	#
	# Author: Angus Salkeld <asalkeld@redhat.com>
	#
	# This software licensed under BSD license, the text of which follows:
	#
	# Redistribution and use in source and binary forms, with or without
	# modification, are permitted provided that the following conditions are met:
	#
	# - Redistributions of source code must retain the above copyright notice,
	# this list of conditions and the following disclaimer.
	# - Redistributions in binary form must reproduce the above copyright notice,
	# this list of conditions and the following disclaimer in the documentation
	# and/or other materials provided with the distribution.
	# - Neither the name of the MontaVista Software, Inc. nor the names of its
	# contributors may be used to endorse or promote products derived from this
	# software without specific prior written permission.
	#
	# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
	# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
	# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
	# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
	# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
	# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
	# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
	# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
	# THE POSSIBILITY OF SUCH DAMAGE.

	import random
	import socket
	-from UserDict import UserDict
	+import sys
	+if sys.version_info < (3,):
	+ from UserDict import UserDict
	+else:
	+ from collections import UserDict
	from cts.CTStests import *
	from corosync import CpgTestAgent

	###################################################################
	class CoroTest(CTSTest):
	'''
	basic class to make sure that new configuration is applied
	and old configuration is removed.
	'''
	def __init__(self, cm):
	CTSTest.__init__(self,cm)
	self.start = StartTest(cm)
	self.stop = StopTest(cm)
	self.config = {}
	self.config['logging/logger_subsys[1]/subsys'] = 'MAIN'
	self.config['logging/logger_subsys[1]/debug'] = 'on'
	self.need_all_up = True
	self.CM.start_cpg = True
	self.cpg_name = 'cts_group'

	def setup(self, node):
	ret = CTSTest.setup(self, node)

	# setup the authkey
	localauthkey = '/tmp/authkey'
	if not os.path.exists(localauthkey):
	self.CM.rsh(node, 'corosync-keygen -l')
	self.CM.rsh.cp("%s:%s" % (node, "/etc/corosync/authkey"), localauthkey)

	for n in self.CM.Env["nodes"]:
	if n is not node:
	#copy key onto other nodes
	self.CM.rsh.cp(localauthkey, "%s:%s" % (n, "/etc/corosync/authkey"))

	# copy over any new config
	for c in self.config:
	self.CM.new_config[c] = self.config[c]

	# apply the config
	self.CM.apply_new_config(self.need_all_up)

	# start/stop all corosyncs'
	for n in self.CM.Env["nodes"]:
	if self.need_all_up and not self.CM.StataCM(n):
	self.incr("started")
	self.start(n)
	if self.need_all_up and self.CM.start_cpg:
	self.CM.cpg_agent[n].clean_start()
	self.CM.cpg_agent[n].cpg_join(self.cpg_name)
	self.CM.cpg_agent[n].cfg_initialize()
	if not self.need_all_up and self.CM.StataCM(n):
	self.incr("stopped")
	self.stop(n)
	return ret

	def config_valid(self, config):
	return True

	def teardown(self, node):
	self.CM.apply_default_config()
	return CTSTest.teardown(self, node)

	###################################################################
	class CpgContextTest(CoroTest):
	def __init__(self, cm):
	self.name="CpgContextTest"
	CoroTest.__init__(self, cm)
	self.CM.start_cpg = True

	def __call__(self, node):
	self.incr("calls")
	res = self.CM.cpg_agent[node].context_test()
	if 'OK' in res:
	return self.success()
	else:
	return self.failure('context_test failed')

	###################################################################
	class CpgConfigChangeBase(CoroTest):
	'''
	join a cpg group on each node, and test that the following
	causes a leave event:
	- a call to cpg_leave()
	- app exit
	- node leave
	- node leave (with large token timeout)
	'''

	def setup(self, node):
	ret = CoroTest.setup(self, node)

	self.listener = None
	self.wobbly = None
	for n in self.CM.Env["nodes"]:
	if self.wobbly is None:
	self.wobbly = n
	elif self.listener is None:
	self.listener = n

	- if self.CM.cpg_agent.has_key(self.wobbly):
	+ if self.wobbly in self.CM.cpg_agent:
	self.wobbly_id = self.CM.cpg_agent[self.wobbly].cpg_local_get()
	- if self.CM.cpg_agent.has_key(self.listener):
	+ if self.listener in self.CM.cpg_agent:
	self.CM.cpg_agent[self.listener].record_config_events(truncate=True)

	return ret

	def wait_for_config_change(self):
	found = False
	max_timeout = 60 * 15
	waited = 0
	printit = 0
	self.CM.log("Waiting for config change on " + self.listener)
	while not found:
	try:
	event = self.CM.cpg_agent[self.listener].read_config_event()
	except:
	return self.failure('connection to test cpg_agent failed.')
	if not event == None:
	self.CM.debug("RECEIVED: " + str(event))
	if event == None:
	if waited >= max_timeout:
	return self.failure("timedout(" + str(waited) + " sec) == no event!")
	else:
	time.sleep(1)
	waited = waited + 1
	printit = printit + 1
	if printit is 60:
	- print 'waited ' + str(waited) + ' seconds'
	+ print('waited ' + str(waited) + ' seconds')
	printit = 0

	elif str(event.node_id) in str(self.wobbly_id) and not event.is_member:
	self.CM.log("Got the config change in " + str(waited) + " seconds")
	found = True
	else:
	self.CM.debug("No match")
	self.CM.debug("wobbly nodeid:" + str(self.wobbly_id))
	self.CM.debug("event nodeid:" + str(event.node_id))
	self.CM.debug("event.is_member:" + str(event.is_member))

	if found:
	return self.success()


	###################################################################
	class CpgCfgChgOnGroupLeave(CpgConfigChangeBase):

	def __init__(self, cm):
	CpgConfigChangeBase.__init__(self,cm)
	self.name="CpgCfgChgOnGroupLeave"

	def failure_action(self):
	self.CM.log("calling cpg_leave() on " + self.wobbly)
	self.CM.cpg_agent[self.wobbly].cpg_leave(self.cpg_name)

	def __call__(self, node):
	self.incr("calls")
	self.failure_action()
	return self.wait_for_config_change()

	###################################################################
	class CpgCfgChgOnNodeLeave(CpgConfigChangeBase):

	def __init__(self, cm):
	CpgConfigChangeBase.__init__(self,cm)
	self.name="CpgCfgChgOnNodeLeave"

	def failure_action(self):
	self.CM.log("stopping corosync on " + self.wobbly)
	self.stop(self.wobbly)

	def __call__(self, node):
	self.incr("calls")
	self.failure_action()
	return self.wait_for_config_change()

	###################################################################
	class CpgCfgChgOnLowestNodeJoin(CTSTest):
	'''
	1) stop all nodes
	2) start all but the node with the smallest ip address
	3) start recording events
	4) start the last node
	'''
	def __init__(self, cm):
	CTSTest.__init__(self, cm)
	self.name="CpgCfgChgOnLowestNodeJoin"
	self.start = StartTest(cm)
	self.stop = StopTest(cm)
	self.config = {}
	self.need_all_up = False

	def config_valid(self, config):
	return True

	def lowest_ip_set(self):
	self.lowest = None
	for n in self.CM.Env["nodes"]:
	if self.lowest is None:
	self.lowest = n

	self.CM.log("lowest node is " + self.lowest)

	def setup(self, node):
	# stop all nodes
	for n in self.CM.Env["nodes"]:
	self.CM.StopaCM(n)

	self.lowest_ip_set()

	# copy over any new config
	for c in self.config:
	self.CM.new_config[c] = self.config[c]

	# install the config
	self.CM.install_all_config()

	# start all but lowest
	self.listener = None
	for n in self.CM.Env["nodes"]:
	if n is not self.lowest:
	if self.listener is None:
	self.listener = n
	self.incr("started")
	self.CM.log("starting " + n)
	self.start(n)
	self.CM.cpg_agent[n].clean_start()
	self.CM.cpg_agent[n].cpg_join(self.cpg_name)

	# start recording events
	pats = []
	pats.append("%s .sync: node joined." % self.listener)
	pats.append("%s .sync: activate correctly." % self.listener)
	self.sync_log = self.create_watch(pats, 60)
	self.sync_log.setwatch()

	self.CM.log("setup done")

	return CTSTest.setup(self, node)


	def __call__(self, node):
	self.incr("calls")

	self.start(self.lowest)
	self.CM.cpg_agent[self.lowest].clean_start()
	self.CM.cpg_agent[self.lowest].cpg_join(self.cpg_name)
	self.wobbly_id = self.CM.cpg_agent[self.lowest].cpg_local_get()

	self.CM.log("waiting for sync events")
	if not self.sync_log.lookforall():
	return self.failure("Patterns not found: " + repr(self.sync_log.unmatched))
	else:
	return self.success()


	###################################################################
	class CpgCfgChgOnExecCrash(CpgConfigChangeBase):

	def __init__(self, cm):
	CpgConfigChangeBase.__init__(self,cm)
	self.name="CpgCfgChgOnExecCrash"

	def failure_action(self):
	self.CM.log("sending KILL to corosync on " + self.wobbly)
	self.CM.rsh(self.wobbly, "killall -9 corosync")
	self.CM.rsh(self.wobbly, "rm -f /var/run/corosync.pid")
	self.CM.rsh(self.wobbly, "rm -f /dev/shm/qb-corosync-blackbox*")
	self.CM.ShouldBeStatus[self.wobbly] = "down"

	def __call__(self, node):
	self.incr("calls")
	self.failure_action()
	return self.wait_for_config_change()


	###################################################################
	class CpgCfgChgOnNodeIsolate(CpgConfigChangeBase):

	def __init__(self, cm):
	CpgConfigChangeBase.__init__(self,cm)
	self.name="CpgCfgChgOnNodeIsolate"

	def config_valid(self, config):
	- if config.has_key('totem/rrp_mode'):
	+ if 'totem/rrp_mode' in config:
	return False
	else:
	return True

	def failure_action(self):
	self.CM.log("isolating node " + self.wobbly)
	self.CM.isolate_node(self.wobbly)

	def __call__(self, node):
	self.incr("calls")
	self.failure_action()
	return self.wait_for_config_change()

	def teardown(self, node):
	self.CM.unisolate_node (self.wobbly)
	return CpgConfigChangeBase.teardown(self, node)

	###################################################################
	class CpgCfgChgOnNodeRestart(CpgConfigChangeBase):

	def __init__(self, cm):
	CpgConfigChangeBase.__init__(self,cm)
	self.name="CpgCfgChgOnNodeRestart"
	self.CM.start_cpg = False

	def config_valid(self, config):
	- if config.has_key('totem/secauth'):
	+ if 'totem/secauth' in config:
	if config['totem/secauth'] is 'on':
	return False
	else:
	return True
	- if config.has_key('totem/rrp_mode'):
	+ if 'totem/rrp_mode' in config:
	return False
	else:
	return True

	def failure_action(self):
	self.CM.log("2: isolating node " + self.wobbly)
	self.CM.isolate_node(self.wobbly)
	self.CM.log("3: Killing corosync on " + self.wobbly)
	self.CM.rsh(self.wobbly, "killall -9 corosync")
	self.CM.rsh(self.wobbly, "rm -f /var/run/corosync.pid")
	self.CM.ShouldBeStatus[self.wobbly] = "down"
	self.CM.log("4: unisolating node " + self.wobbly)
	self.CM.unisolate_node (self.wobbly)
	self.CM.log("5: starting corosync on " + self.wobbly)
	self.CM.StartaCM(self.wobbly)
	time.sleep(5)
	self.CM.log("6: starting cpg on all nodes")
	self.CM.start_cpg = True
	for node in self.CM.Env["nodes"]:
	self.CM.cpg_agent[node] = CpgTestAgent(node, self.CM.Env)
	self.CM.cpg_agent[node].start()
	self.CM.cpg_agent[node].cpg_join(self.cpg_name)

	self.wobbly_id = self.CM.cpg_agent[self.wobbly].cpg_local_get()
	self.CM.cpg_agent[self.listener].record_config_events(truncate=True)

	self.CM.log("7: isolating node " + self.wobbly)
	self.CM.isolate_node(self.wobbly)
	self.CM.log("8: Killing corosync on " + self.wobbly)
	self.CM.rsh(self.wobbly, "killall -9 corosync")
	self.CM.rsh(self.wobbly, "rm -f /var/run/corosync.pid")
	self.CM.ShouldBeStatus[self.wobbly] = "down"
	self.CM.log("9: unisolating node " + self.wobbly)
	self.CM.unisolate_node (self.wobbly)
	self.CM.log("10: starting corosync on " + self.wobbly)
	self.CM.StartaCM(self.wobbly)

	def __call__(self, node):
	self.incr("calls")
	self.failure_action()
	return self.wait_for_config_change()

	def teardown(self, node):
	self.CM.unisolate_node (self.wobbly)
	return CpgConfigChangeBase.teardown(self, node)

	###################################################################
	class CpgMsgOrderBase(CoroTest):

	def __init__(self, cm):
	CoroTest.__init__(self,cm)
	self.num_msgs_per_node = 0
	self.total_num_msgs = 0

	def setup(self, node):
	ret = CoroTest.setup(self, node)

	for n in self.CM.Env["nodes"]:
	self.CM.cpg_agent[n].clean_start()
	self.CM.cpg_agent[n].cpg_join(self.cpg_name)
	self.CM.cpg_agent[n].record_messages()

	time.sleep(1)
	return ret

	def cpg_msg_blaster(self):
	for n in self.CM.Env["nodes"]:
	self.CM.cpg_agent[n].msg_blaster(self.num_msgs_per_node)

	def wait_and_validate_order(self):
	msgs = {}

	self.total_num_msgs = 0
	for n in self.CM.Env["nodes"]:
	self.total_num_msgs = self.total_num_msgs + self.num_msgs_per_node

	for n in self.CM.Env["nodes"]:
	msgs[n] = []
	stopped = False
	waited = 0

	while len(msgs[n]) < self.total_num_msgs and waited < 360:

	try:
	msg = self.CM.cpg_agent[n].read_messages(50)
	except:
	return self.failure('connection to test cpg_agent failed.')

	if not msg == None:
	msgl = msg.split(";")

	# remove empty entries
	not_done=True
	while not_done:
	try:
	msgl.remove('')
	except:
	not_done = False

	msgs[n].extend(msgl)
	elif msg == None:
	time.sleep(2)
	waited = waited + 2

	if len(msgs[n]) < self.total_num_msgs:
	return self.failure("expected %d messages from %s got %d" % (self.total_num_msgs, n, len(msgs[n])))

	fail = False
	error_message = ''
	for i in range(0, self.total_num_msgs):
	first = None
	for n in self.CM.Env["nodes"]:
	# first test for errors
	params = msgs[n][i].split(":")
	if not 'OK' in params[3]:
	fail = True
	error_message = 'error: ' + params[3] + ' in received message'
	self.CM.log(str(params))

	# then look for out of order messages
	if first == None:
	first = n
	else:
	if not msgs[first][i] == msgs[n][i]:
	# message order not the same!
	fail = True
	error_message = 'message out of order'
	self.CM.log(msgs[first][i] + " != " + msgs[n][i])

	if fail:
	return self.failure(error_message)
	else:
	return self.success()

	###################################################################
	class CpgMsgOrderBasic(CpgMsgOrderBase):
	'''
	each sends & logs lots of messages
	'''
	def __init__(self, cm):
	CpgMsgOrderBase.__init__(self,cm)
	self.name="CpgMsgOrderBasic"
	self.num_msgs_per_node = 9000

	def __call__(self, node):
	self.incr("calls")
	for n in self.CM.Env["nodes"]:
	self.CM.cpg_agent[n].msg_blaster(self.num_msgs_per_node)

	return self.wait_and_validate_order()

	###################################################################
	class CpgMsgOrderZcb(CpgMsgOrderBase):
	'''
	each sends & logs lots of messages
	'''
	def __init__(self, cm):
	CpgMsgOrderBase.__init__(self,cm)
	self.name="CpgMsgOrderZcb"
	self.num_msgs_per_node = 9000

	def __call__(self, node):
	self.incr("calls")
	for n in self.CM.Env["nodes"]:
	self.CM.cpg_agent[n].msg_blaster_zcb(self.num_msgs_per_node)
	return self.wait_and_validate_order()

	###################################################################
	class MemLeakObject(CoroTest):
	'''
	run mem_leak_test.sh -1
	'''
	def __init__(self, cm):
	CoroTest.__init__(self,cm)
	self.name="MemLeakObject"

	def __call__(self, node):
	self.incr("calls")

	mem_leaked = self.CM.rsh(node, "/usr/share/corosync/tests/mem_leak_test.sh -1")
	if mem_leaked is 0:
	return self.success()
	else:
	return self.failure(str(mem_leaked) + 'kB memory leaked.')

	###################################################################
	class MemLeakSession(CoroTest):
	'''
	run mem_leak_test.sh -2
	'''
	def __init__(self, cm):
	CoroTest.__init__(self,cm)
	self.name="MemLeakSession"

	def __call__(self, node):
	self.incr("calls")

	mem_leaked = self.CM.rsh(node, "/usr/share/corosync/tests/mem_leak_test.sh -2")
	if mem_leaked is 0:
	return self.success()
	else:
	return self.failure(str(mem_leaked) + 'kB memory leaked.')

	###################################################################
	class CMapDispatchDeadlock(CoroTest):
	'''
	run cmap-dispatch-deadlock.sh
	'''
	def __init__(self, cm):
	CoroTest.__init__(self,cm)
	self.name="CMapDispatchDeadlock"

	def __call__(self, node):
	self.incr("calls")

	result = self.CM.rsh(node, "/usr/share/corosync/tests/cmap-dispatch-deadlock.sh")
	if result is 0:
	return self.success()
	else:
	return self.failure('Deadlock detected')

	###################################################################
	class SamTest1(CoroTest):
	def __init__(self, cm):
	CoroTest.__init__(self, cm)
	self.name="SamTest1"

	def __call__(self, node):
	self.incr("calls")
	res = self.CM.sam_agent[node].test1()
	if 'OK' in res:
	return self.success()
	else:
	return self.failure(self.name + ' failed')

	###################################################################
	class SamTest2(CoroTest):
	def __init__(self, cm):
	CoroTest.__init__(self, cm)
	self.name="SamTest2"

	def __call__(self, node):
	self.incr("calls")
	res = self.CM.sam_agent[node].test2()
	if 'OK' in res:
	return self.success()
	else:
	return self.failure(self.name + ' failed')

	###################################################################
	class SamTest4(CoroTest):
	def __init__(self, cm):
	CoroTest.__init__(self, cm)
	self.name="SamTest4"

	def __call__(self, node):
	self.incr("calls")
	res = self.CM.sam_agent[node].test4()
	if 'OK' in res:
	return self.success()
	else:
	return self.failure(self.name + ' failed')

	###################################################################
	class SamTest5(CoroTest):
	def __init__(self, cm):
	CoroTest.__init__(self, cm)
	self.name="SamTest5"

	def __call__(self, node):
	self.incr("calls")
	res = self.CM.sam_agent[node].test5()
	if 'OK' in res:
	return self.success()
	else:
	return self.failure(self.name + ' failed')

	###################################################################
	class SamTest6(CoroTest):
	def __init__(self, cm):
	CoroTest.__init__(self, cm)
	self.name="SamTest6"

	def __call__(self, node):
	self.incr("calls")
	res = self.CM.sam_agent[node].test6()
	if 'OK' in res:
	return self.success()
	else:
	return self.failure(self.name + ' failed')

	###################################################################
	class SamTest8(CoroTest):
	def __init__(self, cm):
	CoroTest.__init__(self, cm)
	self.name="SamTest8"

	def __call__(self, node):
	self.incr("calls")
	res = self.CM.sam_agent[node].test8()
	if 'OK' in res:
	return self.success()
	else:
	return self.failure(self.name + ' failed')

	###################################################################
	class SamTest9(CoroTest):
	def __init__(self, cm):
	CoroTest.__init__(self, cm)
	self.name="SamTest9"

	def __call__(self, node):
	self.incr("calls")
	res = self.CM.sam_agent[node].test9()
	if 'OK' in res:
	return self.success()
	else:
	return self.failure(self.name + ' failed')

	class QuorumState(object):
	def __init__(self, cm, node):
	self.node = node
	self.CM = cm
	self.CM.votequorum_agent[self.node].init()

	def refresh(self):
	info = self.CM.votequorum_agent[self.node].votequorum_getinfo()
	assert(info != 'FAIL')
	assert(info != 'NOT_SUPPORTED')

	#self.CM.log('refresh: ' + info)
	+ if info is None:
	+ return
	params = info.split(':')

	self.node_votes = int(params[0])
	self.expected_votes = int(params[1])
	self.highest_expected = int(params[2])
	self.total_votes = int(params[3])
	self.quorum = int(params[4])
	self.quorate = self.CM.votequorum_agent[self.node].quorum_getquorate()
	assert(self.quorate != 'FAIL')
	assert(self.quorate != 'NOT_SUPPORTED')
	#self.CM.log('quorate: ' + str(self.quorate))

	###################################################################
	class VoteQuorumBase(CoroTest):

	def setup(self, node):
	ret = CoroTest.setup(self, node)
	self.listener = None
	for n in self.CM.Env["nodes"]:
	if self.listener is None:
	self.listener = n

	return ret

	def config_valid(self, config):
	- if config.has_key('totem/rrp_mode'):
	+ if 'totem/rrp_mode' in config:
	return False
	- if config.has_key('quorum/provider'):
	+ if 'quorum/provider' in config:
	return False
	return True


	###################################################################
	class VoteQuorumGoDown(VoteQuorumBase):
	# all up
	# calc min expected votes to get Q
	# bring nodes down one-by-one
	# confirm cluster looses Q when V < EV
	#

	def __init__(self, cm):
	VoteQuorumBase.__init__(self, cm)
	self.name="VoteQuorumGoDown"
	self.victims = []
	self.expected = len(self.CM.Env["nodes"])
	self.config['quorum/provider'] = 'corosync_votequorum'
	self.config['quorum/expected_votes'] = self.expected
	#self.CM.log('set expected to %d' % (self.expected))

	def __call__(self, node):
	self.incr("calls")

	self.victims = []
	pats = []
	pats.append("%s .*VQ notification quorate: 0" % self.listener)
	pats.append("%s .*NQ notification quorate: 0" % self.listener)
	quorum = self.create_watch(pats, 30)
	quorum.setwatch()

	state = QuorumState(self.CM, self.listener)
	state.refresh()
	for n in self.CM.Env["nodes"]:
	if n is self.listener:
	continue

	self.victims.append(n)
	self.CM.StopaCM(n)

	#if not self.wait_for_quorum_change():
	# return self.failure(self.error_message)
	nodes_alive = len(self.CM.Env["nodes"]) - len(self.victims)
	state.refresh()
	#self.expected = self.expected - 1

	if state.node_votes != 1:
	self.failure('unexpected number of node_votes')

	if state.expected_votes != self.expected:
	self.CM.log('nev: %d != exp %d' % (state.expected_votes, self.expected))
	self.failure('unexpected number of expected_votes')

	if state.total_votes != nodes_alive:
	self.failure('unexpected number of total votes:%d, nodes_alive:%d' % (state.total_votes, nodes_alive))

	- min = ((len(self.CM.Env["nodes"]) + 2) / 2)
	+ min = int((len(self.CM.Env["nodes"]) + 2) / 2)
	if min != state.quorum:
	self.failure('we should have %d (not %d) as quorum' % (min, state.quorum))

	if nodes_alive < state.quorum:
	if state.quorate == 1:
	self.failure('we should NOT have quorum(%d) %d > %d' % (state.quorate, state.quorum, nodes_alive))
	else:
	if state.quorate == 0:
	self.failure('we should have quorum(%d) %d <= %d' % (state.quorate, state.quorum, nodes_alive))

	if not quorum.lookforall():
	self.CM.log("Patterns not found: " + repr(quorum.unmatched))
	return self.failure('quorm event not found')

	return self.success()

	###################################################################
	class VoteQuorumGoUp(VoteQuorumBase):
	# all down
	# calc min expected votes to get Q
	# bring nodes up one-by-one
	# confirm cluster gains Q when V >= EV

	def __init__(self, cm):
	VoteQuorumBase.__init__(self, cm)
	self.name="VoteQuorumGoUp"
	self.need_all_up = False
	self.expected = len(self.CM.Env["nodes"])
	self.config['quorum/provider'] = 'corosync_votequorum'
	self.config['quorum/expected_votes'] = self.expected
	#self.CM.log('set expected to %d' % (self.expected))

	def __call__(self, node):
	self.incr("calls")

	pats = []
	pats.append("%s .*VQ notification quorate: 1" % self.listener)
	pats.append("%s .*NQ notification quorate: 1" % self.listener)
	quorum = self.create_watch(pats, 30)
	quorum.setwatch()

	self.CM.StartaCM(self.listener)
	nodes_alive = 1
	state = QuorumState(self.CM, self.listener)
	state.refresh()

	for n in self.CM.Env["nodes"]:
	if n is self.listener:
	continue

	#if not self.wait_for_quorum_change():
	# return self.failure(self.error_message)

	if state.node_votes != 1:
	self.failure('unexpected number of node_votes')

	if state.expected_votes != self.expected:
	self.CM.log('nev: %d != exp %d' % (state.expected_votes, self.expected))
	self.failure('unexpected number of expected_votes')

	if state.total_votes != nodes_alive:
	self.failure('unexpected number of total votes')

	min = ((len(self.CM.Env["nodes"]) + 2) / 2)
	if min != state.quorum:
	self.failure('we should have %d (not %d) as quorum' % (min, state.quorum))

	if nodes_alive < state.quorum:
	if state.quorate == 1:
	self.failure('we should NOT have quorum(%d) %d > %d' % (state.quorate, state.quorum, nodes_alive))
	else:
	if state.quorate == 0:
	self.failure('we should have quorum(%d) %d <= %d' % (state.quorate, state.quorum, nodes_alive))

	self.CM.StartaCM(n)
	nodes_alive = nodes_alive + 1
	state.refresh()

	if not quorum.lookforall():
	self.CM.log("Patterns not found: " + repr(quorum.unmatched))
	return self.failure('quorm event not found')

	return self.success()

	###################################################################
	class VoteQuorumWaitForAll(VoteQuorumBase):
	# all down
	# bring nodes up one-by-one
	# confirm cluster gains Q when V == num nodes

	def __init__(self, cm):
	VoteQuorumBase.__init__(self, cm)
	self.name="VoteQuorumWaitForAll"
	self.need_all_up = False
	self.expected = len(self.CM.Env["nodes"])
	self.config['quorum/provider'] = 'corosync_votequorum'
	self.config['quorum/expected_votes'] = self.expected
	self.config['quorum/wait_for_all'] = '1'

	def __call__(self, node):
	self.incr("calls")

	pats = []
	pats.append("%s .*VQ notification quorate: 1" % self.listener)
	pats.append("%s .*NQ notification quorate: 1" % self.listener)
	quorum = self.create_watch(pats, 30)
	quorum.setwatch()

	# make absolutly all are stopped
	for n in self.CM.Env["nodes"]:
	self.CM.StopaCM(n)

	# start the listener
	self.CM.StartaCM(self.listener)
	nodes_alive = 1
	state = QuorumState(self.CM, self.listener)
	state.refresh()

	for n in self.CM.Env["nodes"]:
	if n is self.listener:
	continue

	self.CM.StartaCM(n)
	nodes_alive = nodes_alive + 1
	state.refresh()

	if state.node_votes != 1:
	self.failure('unexpected number of node_votes')

	if state.expected_votes != self.expected:
	self.CM.log('nev: %d != exp %d' % (state.expected_votes, self.expected))
	self.failure('unexpected number of expected_votes')

	if state.total_votes != nodes_alive:
	self.failure('unexpected number of total votes')

	if nodes_alive < len(self.CM.Env["nodes"]):
	if state.quorate == 1:
	self.failure('we should NOT have quorum(%d) %d > %d' % (state.quorate,
	len(self.CM.Env["nodes"]), nodes_alive))
	else:
	if state.quorate == 0:
	self.failure('we should have quorum(%d) %d <= %d' % (state.quorate,
	len(self.CM.Env["nodes"]), nodes_alive))

	if not quorum.lookforall():
	self.CM.log("Patterns not found: " + repr(quorum.unmatched))
	return self.failure('quorm event not found')

	return self.success()

	###################################################################
	class VoteQuorumContextTest(CoroTest):

	def __init__(self, cm):
	CoroTest.__init__(self, cm)
	self.name="VoteQuorumContextTest"
	self.expected = len(self.CM.Env["nodes"])
	self.config['quorum/provider'] = 'corosync_votequorum'
	self.config['quorum/expected_votes'] = self.expected

	def __call__(self, node):
	self.incr("calls")
	res = self.CM.votequorum_agent[node].context_test()
	if 'OK' in res:
	return self.success()
	else:
	return self.failure('context_test failed')


	###################################################################
	class GenSimulStart(CoroTest):
	'''Start all the nodes ~ simultaneously'''

	def __init__(self, cm):
	CoroTest.__init__(self,cm)
	self.name="GenSimulStart"
	self.need_all_up = False
	self.stopall = SimulStopLite(cm)
	self.startall = SimulStartLite(cm)

	def __call__(self, dummy):
	'''Perform the 'SimulStart' test. '''
	self.incr("calls")

	# We ignore the "node" parameter...

	# Shut down all the nodes...
	ret = self.stopall(None)
	if not ret:
	return self.failure("Setup failed")
	-
	- self.CM.clear_all_caches()
	+ #clear_all_caches was removed
	+ #self.CM.clear_all_caches()

	if not self.startall(None):
	return self.failure("Startall failed")

	return self.success()

	###################################################################
	class GenSimulStop(CoroTest):
	'''Stop all the nodes ~ simultaneously'''

	def __init__(self, cm):
	CoroTest.__init__(self,cm)
	self.name="GenSimulStop"
	self.startall = SimulStartLite(cm)
	self.stopall = SimulStopLite(cm)
	self.need_all_up = True

	def __call__(self, dummy):
	'''Perform the 'GenSimulStop' test. '''
	self.incr("calls")

	# We ignore the "node" parameter...

	# Start up all the nodes...
	ret = self.startall(None)
	if not ret:
	return self.failure("Setup failed")

	if not self.stopall(None):
	return self.failure("Stopall failed")

	return self.success()


	class GenFlipTest(CoroTest):
	def __init__(self, cm):
	CoroTest.__init__(self,cm)
	self.name="GenFlipTest"
	self.test = FlipTest(cm)

	def __call__(self, dummy):
	'''Perform the test. '''
	self.incr("calls")
	return self.test.__call__(dummy)

	class GenRestartTest(CoroTest):
	def __init__(self, cm):
	CoroTest.__init__(self,cm)
	self.name="GenRestartTest"
	self.test = RestartTest(cm)

	def __call__(self, dummy):
	'''Perform the test. '''
	self.incr("calls")
	return self.test.__call__(dummy)

	class GenStartOnebyOne(CoroTest):
	def __init__(self, cm):
	CoroTest.__init__(self,cm)
	self.name="GenStartOnebyOne"
	self.test = RestartOnebyOne(cm)

	def __call__(self, dummy):
	'''Perform the test. '''
	self.incr("calls")
	return self.test.__call__(dummy)

	class GenStopOnebyOne(CoroTest):
	def __init__(self, cm):
	CoroTest.__init__(self,cm)
	self.name="GenStopOnebyOne"
	self.test = StopOnebyOne(cm)

	def __call__(self, dummy):
	'''Perform the test. '''
	self.incr("calls")
	return self.test.__call__(dummy)

	class GenRestartOnebyOne(CoroTest):
	def __init__(self, cm):
	CoroTest.__init__(self,cm)
	self.name="GenRestartOnebyOne"
	self.test = RestartOnebyOne(cm)

	def __call__(self, dummy):
	'''Perform the test. '''
	self.incr("calls")
	return self.test.__call__(dummy)



	###################################################################
	class GenStopAllBeekhof(CoroTest):
	'''Stop all the nodes ~ simultaneously'''

	def __init__(self, cm):
	CoroTest.__init__(self,cm)
	self.name="GenStopAllBeekhof"
	self.need_all_up = True
	self.config['logging/logger_subsys[2]/subsys'] = 'CFG'
	self.config['logging/logger_subsys[2]/debug'] = 'on'

	def __call__(self, node):
	'''Perform the 'GenStopAllBeekhof' test. '''
	self.incr("calls")

	stopping = int(time.time())
	for n in self.CM.Env["nodes"]:
	self.CM.cpg_agent[n].pcmk_test()

	for n in self.CM.Env["nodes"]:
	self.CM.cpg_agent[n].msg_blaster(1000)

	for n in self.CM.Env["nodes"]:
	self.CM.cpg_agent[n].cfg_shutdown()
	self.CM.ShouldBeStatus[n] = "down"

	waited = 0
	max_wait = 60 * 15

	still_up = list(self.CM.Env["nodes"])
	while len(still_up) > 0:
	waited = int(time.time()) - stopping
	self.CM.log("%s still up %s; waited %d secs" % (self.name, str(still_up), waited))
	if waited > max_wait:
	break
	time.sleep(3)
	for v in self.CM.Env["nodes"]:
	if v in still_up:
	self.CM.ShouldBeStatus[n] = "down"
	if not self.CM.StataCM(v):
	still_up.remove(v)

	waited = int(time.time()) - stopping
	if waited > max_wait:
	return self.failure("Waited %d secs for nodes: %s to stop" % (waited, str(still_up)))

	self.CM.log("%s ALL good (waited %d secs)" % (self.name, waited))
	return self.success()

	###################################################################
	class NoWDConfig(CoroTest):
	'''Assertion: no config == no watchdog
	Setup: no config, kmod inserted
	1] make sure watchdog is not enabled
	'''
	def __init__(self, cm):
	CoroTest.__init__(self,cm)
	self.name="NoWDConfig"
	self.need_all_up = False

	def config_valid(self, config):
	- return not config.has_key('resources')
	+ return 'resources' not in config

	def __call__(self, node):
	'''Perform the 'NoWDConfig' test. '''
	self.incr("calls")

	self.CM.StopaCM(node)
	pats = []
	pats.append("%s .*no resources configured." % node)
	w = self.create_watch(pats, 60)
	w.setwatch()

	self.CM.StartaCM(node)
	if not w.lookforall():
	return self.failure("Patterns not found: " + repr(w.unmatched))
	else:
	return self.success()

	###################################################################
	class WDConfigNoWd(CoroTest):
	'''Assertion: watchdog config but no watchdog kmod will emit a log
	Setup: config watchdog, but no kmod
	1] look in the log for warning that there is no kmod
	'''
	def __init__(self, cm):
	CoroTest.__init__(self,cm)
	self.name="WDConfigNoWd"
	self.need_all_up = False

	def __call__(self, node):
	'''Perform the 'WDConfigNoWd' test. '''
	self.incr("calls")

	self.CM.StopaCM(node)
	self.CM.rsh(node, 'rmmod softdog')
	pats = []
	pats.append("%s .No Watchdog, try modprobe." % node)
	w = self.create_watch(pats, 60)
	w.setwatch()

	self.CM.StartaCM(node)
	if not w.lookforall():
	return self.failure("Patterns not found: " + repr(w.unmatched))
	else:
	return self.success()


	###################################################################
	class NoWDOnCorosyncStop(CoroTest):
	'''Configure WD then /etc/init.d/corosync stop
	must stay up for > 60 secs
	'''
	def __init__(self, cm):
	CoroTest.__init__(self,cm)
	self.name="NoWDOnCorosyncStop"
	self.need_all_up = False

	def __call__(self, node):
	'''Perform the test. '''
	self.incr("calls")

	self.CM.StopaCM(node)
	self.CM.rsh(node, 'modprobe softdog')
	self.CM.StartaCM(node)
	pats = []
	pats.append("%s .Unexpected close, not stopping watchdog." % node)
	w = self.create_watch(pats, 60)
	w.setwatch()
	self.CM.StopaCM(node)

	if w.lookforall():
	return self.failure("Should have closed the WD better: " + repr(w.matched))
	else:
	return self.success()


	###################################################################
	class WDOnForkBomb(CoroTest):
	'''Configure memory resource
	run memory leaker / forkbomb
	confirm watchdog action
	'''
	def __init__(self, cm):
	CoroTest.__init__(self,cm)
	self.name="WDOnForkBomb"
	self.need_all_up = False
	self.config['logging/logger_subsys[2]/subsys'] = 'WD'
	self.config['logging/logger_subsys[2]/debug'] = 'on'
	self.config['resources/system/memory_used/recovery'] = 'watchdog'
	self.config['resources/system/memory_used/max'] = '80'
	self.config['resources/system/memory_used/poll_period'] = '800'

	def __call__(self, node):
	'''Perform the test. '''
	self.incr("calls")

	# get the uptime
	up_before = self.CM.rsh(node, 'cut -d. -f1 /proc/uptime', 1).rstrip()
	self.CM.StopaCM(node)
	self.CM.rsh(node, 'modprobe softdog')
	self.CM.StartaCM(node)

	self.CM.rsh(node, ':(){ :\|:& };:', synchronous=0)

	self.CM.log("wait for it to watchdog")
	time.sleep(60 * 5)

	ping_able = False
	while not ping_able:
	if self.CM.rsh("localhost", "ping -nq -c10 -w10 %s" % node) == 0:
	ping_able = True
	self.CM.log("can ping 10 in 10secs.")
	else:
	self.CM.log("not yet responding to pings.")

	self.CM.ShouldBeStatus[node] = "down"
	# wait for the node to come back up
	self.CM.log("waiting for node to come back up.")
	if self.CM.ns.WaitForNodeToComeUp(node):
	up_after = self.CM.rsh(node, 'cut -d. -f1 /proc/uptime', 1).rstrip()
	if int(up_after) < int(up_before):
	return self.success()
	else:
	return self.failure("node didn't seem to watchdog uptime 1 %s; 2 %s" %(up_before, up_after))
	else:
	return self.failure("node didn't seem to come back up")


	###################################################################
	class SamWdIntegration1(CoroTest):
	'''start sam hc
	kill agent
	confirm action
	'''
	def __init__(self, cm):
	CoroTest.__init__(self,cm)
	self.name="SamWdIntegration1"
	self.need_all_up = True
	self.config['logging/logger_subsys[2]/subsys'] = 'WD'
	self.config['logging/logger_subsys[2]/debug'] = 'on'

	def __call__(self, node):
	'''Perform the test. '''
	self.incr("calls")
	self.CM.sam_agent[node].setup_hc()
	pids = self.CM.sam_agent[node].getpid().rstrip().split(" ")

	pats = []
	for pid in pids:
	pats.append('%s .*resource "%s" failed!' % (node, pid))

	w = self.create_watch(pats, 60)
	w.setwatch()

	self.CM.sam_agent[node].kill()

	look_result = w.look()
	if not look_result:
	return self.failure("Patterns not found: " + repr(w.regexes))
	else:
	return self.success()

	###################################################################
	class SamWdIntegration2(CoroTest):
	'''start sam hc
	call sam_stop()
	confirm resource "stopped" and no watchdog action.
	'''
	def __init__(self, cm):
	CoroTest.__init__(self,cm)
	self.name="SamWdIntegration2"
	self.need_all_up = True
	self.config['logging/logger_subsys[2]/subsys'] = 'WD'
	self.config['logging/logger_subsys[2]/debug'] = 'on'

	def __call__(self, node):
	'''Perform the test. '''
	self.incr("calls")
	self.CM.sam_agent[node].setup_hc()
	pids = self.CM.sam_agent[node].getpid().rstrip().split(" ")

	no_pats = []
	yes_pats = []
	for pid in pids:
	no_pats.append('%s .*resource "%s" failed!' % (node, pid))
	yes_pats.append('%s .*Fsm:%s event "config_changed", state "running" --> "stopped"' % (node, pid))

	yes_w = self.create_watch(yes_pats, 10)
	no_w = self.create_watch(no_pats, 10)
	yes_w.setwatch()
	no_w.setwatch()
	time.sleep(2)

	self.CM.sam_agent[node].sam_stop()

	yes_matched = yes_w.look()
	no_matched = no_w.look()
	if no_matched:
	return self.failure("Patterns found: " + repr(no_matched))
	else:
	if not yes_matched:
	return self.failure("Patterns NOT found: " + repr(yes_w.regexes))

	return self.success()

	###################################################################
	class WdDeleteResource(CoroTest):
	'''config resource & start corosync
	check that it is getting checked
	delete the object resource object
	check that we do NOT get watchdog'ed
	'''
	def __init__(self, cm):
	CoroTest.__init__(self,cm)
	self.name="WdDeleteResource"
	self.need_all_up = True
	self.config['logging/logger_subsys[2]/subsys'] = 'MON'
	self.config['logging/logger_subsys[2]/debug'] = 'on'
	self.config['logging/logger_subsys[3]/subsys'] = 'WD'
	self.config['logging/logger_subsys[3]/debug'] = 'on'
	self.config['resources/system/memory_used/recovery'] = 'watchdog'
	self.config['resources/system/memory_used/max'] = '80'
	self.config['resources/system/memory_used/poll_period'] = '800'

	def __call__(self, node):
	'''Perform the test. '''
	self.incr("calls")

	no_pats = []
	yes_pats = []
	no_pats.append('%s .*resource "memory_used" failed!' % node)
	yes_pats.append('%s .*resource "memory_used" deleted from cmap!' % node)
	yes_w = self.create_watch(yes_pats, 10)
	no_w = self.create_watch(no_pats, 10)
	yes_w.setwatch()
	no_w.setwatch()
	time.sleep(2)

	self.CM.rsh(node, 'corosync-cmapctl -D resources.system.memory_used')

	yes_matched = yes_w.look()
	no_matched = no_w.look()
	if no_matched:
	return self.failure("Patterns found: " + repr(no_matched))
	else:
	if not yes_matched:
	return self.failure("Patterns NOT found: " + repr(yes_w.regexes))

	return self.success()


	###################################################################
	class ResourcePollAdjust(CoroTest):
	'''config resource & start corosync
	change the poll_period
	check that we do NOT get watchdog'ed
	'''
	def __init__(self, cm):
	CoroTest.__init__(self,cm)
	self.name="ResourcePollAdjust"
	self.need_all_up = True
	self.config['logging/logger_subsys[2]/subsys'] = 'MON'
	self.config['logging/logger_subsys[2]/debug'] = 'on'
	self.config['logging/logger_subsys[3]/subsys'] = 'WD'
	self.config['logging/logger_subsys[3]/debug'] = 'on'
	self.config['resources/system/memory_used/recovery'] = 'none'
	self.config['resources/system/memory_used/max'] = '80'
	self.config['resources/system/memory_used/poll_period'] = '800'

	def __call__(self, node):
	'''Perform the test. '''
	self.incr("calls")

	no_pats = []
	no_pats.append('%s .*resource "memory_used" failed!' % node)
	no_pats.append('%s .Could NOT use poll_period.' % node)
	no_w = self.create_watch(no_pats, 10)
	no_w.setwatch()
	changes = 0
	while changes < 50:
	changes = changes + 1
	poll_period = int(random.random() * 5000)
	if poll_period < 500:
	poll_period = 500
	self.CM.log("setting poll_period to: %d" % poll_period)
	self.CM.rsh(node, 'corosync-cmapctl -s resources.system.memory_used.poll_period str %d' % poll_period)
	sleep_time = poll_period * 2 / 1000
	if sleep_time < 1:
	sleep_time = 1
	time.sleep(sleep_time)

	no_matched = no_w.look()
	if no_matched:
	return self.failure("Patterns found: " + repr(no_matched))

	return self.success()


	###################################################################
	class RebootOnHighMem(CoroTest):
	'''Configure memory resource
	run memory leaker / forkbomb
	confirm reboot action
	'''
	def __init__(self, cm):
	CoroTest.__init__(self,cm)
	self.name="RebootOnHighMem"
	self.need_all_up = True
	self.config['logging/logger_subsys[2]/subsys'] = 'WD'
	self.config['logging/logger_subsys[2]/debug'] = 'on'
	self.config['resources/system/memory_used/recovery'] = 'reboot'
	self.config['resources/system/memory_used/max'] = '80'
	self.config['resources/system/memory_used/poll_period'] = '800'

	def __call__(self, node):
	'''Perform the test. '''
	self.incr("calls")

	# get the uptime
	up_before = self.CM.rsh(node, 'cut -d. -f1 /proc/uptime', 1).rstrip()
	cmd = 'corosync-cmapctl resources.system.memory_used. \| grep current \| cut -d= -f2'
	mem_current_str = self.CM.rsh(node, cmd, 1).rstrip()
	mem_new_max = int(mem_current_str) + 5

	self.CM.log("current mem usage: %s, new max:%d" % (mem_current_str, mem_new_max))
	cmd = 'corosync-cmapctl -s resources.system.memory_used.max str ' + str(mem_new_max)
	self.CM.rsh(node, cmd)

	self.CM.rsh(node, 'memhog -r10000 200m', synchronous=0)

	self.CM.log("wait for it to reboot")
	time.sleep(60 * 3)
	cmd = 'corosync-cmapctl resources.system.memory_used. \| grep current \| cut -d= -f2'
	mem_current_str = self.CM.rsh(node, cmd, 1).rstrip()
	self.CM.log("current mem usage: %s" % (mem_current_str))

	ping_able = False
	while not ping_able:
	if self.CM.rsh("localhost", "ping -nq -c10 -w10 %s" % node) == 0:
	ping_able = True
	self.CM.log("can ping 10 in 10secs.")
	else:
	self.CM.log("not yet responding to pings.")

	self.CM.ShouldBeStatus[node] = "down"
	# wait for the node to come back up
	self.CM.log("waiting for node to come back up.")
	if self.CM.ns.WaitForNodeToComeUp(node):
	up_after = self.CM.rsh(node, 'cut -d. -f1 /proc/uptime', 1).rstrip()
	if int(up_after) < int(up_before):
	return self.success()
	else:
	return self.failure("node didn't seem to watchdog uptime 1 %s; 2 %s" %(up_before, up_after))
	else:
	return self.failure("node didn't seem to come back up")


	GenTestClasses = []
	GenTestClasses.append(GenSimulStart)
	GenTestClasses.append(GenSimulStop)
	GenTestClasses.append(GenFlipTest)
	GenTestClasses.append(GenRestartTest)
	GenTestClasses.append(GenStartOnebyOne)
	GenTestClasses.append(GenStopOnebyOne)
	GenTestClasses.append(GenRestartOnebyOne)
	GenTestClasses.append(GenStopAllBeekhof)
	GenTestClasses.append(CpgMsgOrderBasic)
	GenTestClasses.append(CpgMsgOrderZcb)
	GenTestClasses.append(CpgCfgChgOnExecCrash)
	GenTestClasses.append(CpgCfgChgOnGroupLeave)
	GenTestClasses.append(CpgCfgChgOnNodeLeave)
	GenTestClasses.append(CpgCfgChgOnNodeIsolate)
	#GenTestClasses.append(CpgCfgChgOnNodeRestart)

	AllTestClasses = []
	AllTestClasses.append(CpgContextTest)
	AllTestClasses.append(SamTest1)
	AllTestClasses.append(SamTest2)
	AllTestClasses.append(SamTest4)
	AllTestClasses.append(SamTest5)
	AllTestClasses.append(SamTest6)
	AllTestClasses.append(SamTest8)
	AllTestClasses.append(SamTest9)
	AllTestClasses.append(SamWdIntegration1)
	AllTestClasses.append(SamWdIntegration2)
	AllTestClasses.append(NoWDConfig)
	AllTestClasses.append(WDConfigNoWd)
	AllTestClasses.append(NoWDOnCorosyncStop)
	#AllTestClasses.append(WDOnForkBomb)
	AllTestClasses.append(WdDeleteResource)
	#AllTestClasses.append(RebootOnHighMem)
	AllTestClasses.append(ResourcePollAdjust)
	AllTestClasses.append(MemLeakObject)
	AllTestClasses.append(MemLeakSession)
	#AllTestClasses.append(CMapDispatchDeadlock)

	# quorum tests
	AllTestClasses.append(VoteQuorumContextTest)
	GenTestClasses.append(VoteQuorumGoDown)
	GenTestClasses.append(VoteQuorumGoUp)
	GenTestClasses.append(VoteQuorumWaitForAll)

	# FIXME need log messages in sync
	#GenTestClasses.append(CpgCfgChgOnLowestNodeJoin)


	class ConfigContainer(UserDict):
	def __init__ (self, name):
	self.name = name
	UserDict.__init__(self)

	def CoroTestList(cm, audits):
	result = []
	configs = []

	for testclass in AllTestClasses:
	bound_test = testclass(cm)
	if bound_test.is_applicable():
	bound_test.Audits = audits
	result.append(bound_test)

	default = ConfigContainer('default')
	default['logging/fileline'] = 'on'
	default['logging/function_name'] = 'off'
	default['logging/logfile_priority'] = 'info'
	default['logging/syslog_priority'] = 'info'
	default['logging/syslog_facility'] = 'daemon'
	default['uidgid/uid'] = '0'
	default['uidgid/gid'] = '0'
	configs.append(default)

	a = ConfigContainer('none_5min')
	a['totem/token'] = (5 * 60 * 1000)
	a['totem/consensus'] = int(5 * 60 * 1000 * 1.2) + 1
	configs.append(a)

	b = ConfigContainer('pcmk_basic')
	b['totem/token'] = 5000
	b['totem/token_retransmits_before_loss_const'] = 10
	b['totem/join'] = 1000
	b['totem/consensus'] = 7500
	configs.append(b)

	c = ConfigContainer('pcmk_sec_nss')
	c['totem/secauth'] = 'on'
	c['totem/crypto_type'] = 'nss'
	c['totem/token'] = 5000
	c['totem/token_retransmits_before_loss_const'] = 10
	c['totem/join'] = 1000
	c['totem/consensus'] = 7500
	configs.append(c)
	#
	# s = ConfigContainer('pcmk_vq')
	# s['quorum/provider'] = 'corosync_votequorum'
	# s['quorum/expected_votes'] = len(cm.Env["nodes"])
	# s['totem/token'] = 5000
	# s['totem/token_retransmits_before_loss_const'] = 10
	# s['totem/join'] = 1000
	# s['totem/vsftype'] = 'none'
	# s['totem/consensus'] = 7500
	# s['totem/max_messages'] = 20
	# configs.append(s)
	#
	d = ConfigContainer('sec_nss')
	d['totem/secauth'] = 'on'
	d['totem/crypto_type'] = 'nss'
	configs.append(d)

	if not cm.Env["RrpBindAddr"] is None:
	g = ConfigContainer('rrp_passive')
	g['totem/rrp_mode'] = 'passive'
	g['totem/interface[2]/ringnumber'] = '1'
	g['totem/interface[2]/bindnetaddr'] = cm.Env["RrpBindAddr"]
	g['totem/interface[2]/mcastaddr'] = '226.94.1.2'
	g['totem/interface[2]/mcastport'] = '5405'
	configs.append(g)

	h = ConfigContainer('rrp_active')
	h['totem/rrp_mode'] = 'active'
	h['totem/interface[2]/ringnumber'] = '1'
	h['totem/interface[2]/bindnetaddr'] = cm.Env["RrpBindAddr"]
	h['totem/interface[2]/mcastaddr'] = '226.94.1.2'
	h['totem/interface[2]/mcastport'] = '5405'
	configs.append(h)
	else:
	- print 'Not including rrp tests. Use --rrp-binaddr to enable them.'
	+ print('Not including rrp tests. Use --rrp-binaddr to enable them.')

	num=1
	for cfg in configs:
	for testclass in GenTestClasses:
	bound_test = testclass(cm)
	if bound_test.is_applicable() and bound_test.config_valid(cfg):
	bound_test.Audits = audits
	- for c in cfg.keys():
	+ for c in list(cfg.keys()):
	bound_test.config[c] = cfg[c]
	bound_test.name = bound_test.name + '_' + cfg.name
	result.append(bound_test)
	num = num + 1

	return result

File Metadata

Mime Type: text/x-diff
Expires: Wed, Jun 25, 3:02 AM (15 h, 20 m)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 1951940
Default Alt Text: (94 KB)

No OneTemporaryActions

View Options

File Metadata

Event Timeline

No OneTemporary
Actions