No OneTemporary
Actions

Size

22 KB

Referenced Files

None

Subscribers

None

View Options

	diff --git a/cts/CM_LinuxHAv2.py.in b/cts/CM_LinuxHAv2.py.in
	index 852d4d2134..483441f850 100755
	--- a/cts/CM_LinuxHAv2.py.in
	+++ b/cts/CM_LinuxHAv2.py.in
	@@ -1,545 +1,603 @@
	#!@PYTHON@

	'''CTS: Cluster Testing System: LinuxHA v2 dependent modules...
	'''

	__copyright__='''
	Author: Huang Zhen <zhenhltc@cn.ibm.com>
	Copyright (C) 2004 International Business Machines

	Additional Audits, Revised Start action, Default Configuration:
	Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>

	'''

	#
	# This program is free software; you can redistribute it and/or
	# modify it under the terms of the GNU General Public License
	# as published by the Free Software Foundation; either version 2
	# of the License, or (at your option) any later version.
	#
	# This program is distributed in the hope that it will be useful,
	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	# GNU General Public License for more details.
	#
	# You should have received a copy of the GNU General Public License
	# along with this program; if not, write to the Free Software
	# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.

	import os,sys,CTS,CTSaudits,CTStests
	from CTS import *
	from CM_hb import HeartbeatCM
	from xml.dom.minidom import *
	from CTSaudits import ClusterAudit
	from CTStests import *
	#######################################################################
	#
	# LinuxHA v2 dependent modules
	#
	#######################################################################


	class LinuxHAv2(HeartbeatCM):
	'''
	The linux-ha version 2 cluster manager class.
	It implements the things we need to talk to and manipulate
	linux-ha version 2 clusters
	'''
	def __init__(self, Environment, randseed=None):
	HeartbeatCM.__init__(self, Environment, randseed=randseed)

	self.update({
	"Name" : "linux-ha-v2",
	"DeadTime" : 300,
	"StartTime" : 300, # Max time to start up
	"StableTime" : 30,
	"StartCmd" : "@libdir@/heartbeat/ha_logd -d >/dev/null 2>&1; @libdir@/heartbeat/heartbeat >/dev/null 2>&1",
	"StopCmd" : "@libdir@/heartbeat/heartbeat -k",
	"ElectionCmd" : "@libdir@/heartbeat/crmadmin -E %s",
	"StatusCmd" : "@libdir@/heartbeat/crmadmin -S %s 2>/dev/null",
	"EpocheCmd" : "@libdir@/heartbeat/ccm_tool -e",
	"QuorumCmd" : "@libdir@/heartbeat/ccm_tool -q",
	"ParitionCmd" : "@libdir@/heartbeat/ccm_tool -p",
	"IsRscRunning" : "@libdir@/heartbeat/lrmadmin -E %s monitor 0 0 EVERYTIME 2>/dev/null\|grep return",
	"ExecuteRscOp" : "@libdir@/heartbeat/lrmadmin -E %s %s 0 0 EVERYTIME 2>/dev/null",
	"CIBfile" : "%s:@HA_VARLIBDIR@/heartbeat/crm/cib.xml",
	"TmpDir" : "/tmp",
	"BreakCommCmd2" : "/usr/lib/heartbeat/TestHeartbeatComm break-communication %s>/dev/null 2>&1",
	"IsIPAddrRscRunning" : "",

	+ "StandbyOnCmd" : "@libdir@/heartbeat/crmadmin -s %s",
	+ "StandbyOffCmd" : "@libdir@/heartbeat/crmadmin -a %s",
	+ "UUIDQueryCmd" : "@libdir@/heartbeat/crmadmin -N",
	+ "CIBQueryCmd" : "@libdir@/heartbeat/cibadmin -Ql -h %s",
	+
	# Patterns to look for in the log files for various occasions...
	"Pat:DC_IDLE" : "crmd:.State transition.-> S_IDLE",

	# This wont work if we have multiple partitions
	# Use: "Pat:They_started" : "%s crmd:.State transition.-> S_NOT_DC",
	"Pat:They_started" : "Updating node state to member for %s",
	"Pat:We_started" : "%s crmd:.State transition.-> S_IDLE",
	"Pat:We_stopped" : "%s heartbeat.*Heartbeat shutdown complete",
	"Pat:They_stopped" : "%s crmd:.LOST:. %s ",
	"Pat:All_stopped" : "%s .heartbeat.Heartbeat shutdown complete",
	"Pat:They_dead" : "node %s.*: is dead",
	"Pat:TransitionComplete" : "Transition status: Complete: complete",

	# Bad news Regexes. Should never occur.
	"BadRegexes" : (
	r"Shutting down\.",
	r"Forcing shutdown\.",
	r"Timer I_TERMINATE just popped",
	r"input=I_ERROR",
	r"input=I_FAIL",
	r"input=I_INTEGRATED cause=C_TIMER_POPPED",
	r"input=I_FINALIZED cause=C_TIMER_POPPED",
	r"input=I_ERROR",
	r", exiting\.",
	r"WARN.Ignoring HA message.vote.*not in our membership list",
	r"pengine:.*Attempting recovery of resource",
	r"pengine:.*Handling failed ",
	r"tengine:.*is taking more than 2x its timeout",
	r"Confirm not received from",
	r"Welcome reply not received from",
	r"ERROR:",
	r"CRIT:",
	),
	})
	del self["Standby"]
	self.check_transitions = 0
	self.check_elections = 0
	self.CIBsync = {}
	cib_prefix='''
	<cib cib_feature_revision="1" num_updates="1" have_quorum="false" epoche="1">
	<configuration>
	<crm_config>'''
	cib_options='''
	<nvpair id="transition_idle_timeout" name="transition_idle_timeout" value="120s"/>
	<nvpair id="require_quorum" name="require_quorum" value="true"/>
	<nvpair id="symmetric_cluster" name="symetric_cluster" value="true"/>
	<nvpair id="suppress_cib_writes" name="suppress_cib_writes" value="true"/>
	<nvpair id="no_quorum_policy" name="no_quorum_policy" value="stop"/>'''
	cib_glue_1='''
	</crm_config>
	<nodes/>
	<resources>'''
	cib_glue_2='''
	</resources>
	<constraints>'''
	cib_suffix='''
	</constraints>
	</configuration>
	<status/>
	</cib>
	'''
	resources=''' '''
	constraints=''' '''
	cib_fencing = ""
	if self.Env["CIBResource"] == 1:
	self.log("Enabling DC resource")
	resources='''
	<resource id="DcIPaddr" class="ocf" type="IPaddr" provider="heartbeat" is_managed="1">
	<operations>
	<op id="1" name="monitor" interval="5s" timeout="20s"/>
	</operations>
	<instance_attributes>
	<attributes>
	<nvpair id="1" name="ip" value="%s"/>
	</attributes>
	</instance_attributes>
	</resource>''' % self.Env["IPBase"]
	# DcIPaddr cant run anywhere but the DC
	constraints='''
	<rsc_location id="run_DcIPaddr" rsc="DcIPaddr">
	<rule id="cant_run_DcIPaddr" score="-INFINITY" boolean_op="and">
	<expression attribute="#is_dc" operation="eq" value="false"/>
	</rule>
	</rsc_location>'''
	fields = string.split(self.Env["IPBase"], '.')
	for node in self.Env["nodes"]:
	# These resources prefer to run on the node with the same name
	fields[3] = str(int(fields[3])+1)
	ip = string.join(fields, '.')
	node_resource=("""
	<resource id="%s" class="ocf" type="IPaddr" provider="heartbeat" is_managed="1">
	<operations>
	<op id="1" name="monitor" interval="5s" timeout="20s"/>
	</operations>
	<instance_attributes>
	<attributes>
	<nvpair id="1" name="ip" value="%s"/>
	</attributes>
	</instance_attributes>
	</resource>""" %("rsc_"+node, ip))
	resources = resources + node_resource
	node_constraint=("""
	<rsc_location id="run_%s" rsc="%s">
	<rule id="pref_run_%s" score="100" boolean_op="and">
	<expression attribute="#uname" operation="eq" value="%s"/>
	</rule>
	</rsc_location>""" % ("rsc_"+node, "rsc_"+node, "rsc_"+node, node))
	constraints = constraints + node_constraint

	# always add the fencing resource so that we test incarnations
	nodelist = ""
	for node in self.Env["nodes"]:
	nodelist += node + " "
	stonith_resource=("""
	<incarnation id="DoFencing">
	<instance_attributes>
	<attributes>
	<nvpair id="1" name="incarnation_max" value="%d"/>
	<nvpair id="2" name="incarnation_node_max" value="1"/>
	</attributes>
	</instance_attributes>
	<resource id="child_DoFencing" class="stonith" type="ssh">
	<operations>
	<op id="1" name="monitor" interval="5s" timeout="20s"/>
	</operations>
	<instance_attributes>
	<attributes>
	<nvpair id="1" name="hostlist" value="%s"/>
	</attributes>
	</instance_attributes>
	</resource>
	</incarnation>""" %(len(self.Env["nodes"]), nodelist))
	resources = resources + stonith_resource

	if self.Env["DoFencing"] == 1:
	cib_options=cib_options + '''
	<nvpair id="stonith_enabled" name="stonith_enabled" value="true"/>'''

	self.default_cts_cib=cib_prefix + cib_options + cib_glue_1 + \
	resources + cib_glue_2 + constraints + cib_suffix
	self.debug(self.default_cts_cib)

	def errorstoignore(self):
	# At some point implement a more elegant solution that
	# also produces a report at the end
	'''Return list of errors which are known and very noisey should be ignored'''
	if 1:
	return [
	"crmadmin:"
	]
	return []

	def install_config(self, node):
	if not self.CIBsync.has_key(node) and self.Env["ClobberCIB"] == 1:
	self.CIBsync[node] = 1
	if self.Env["CIBfilename"] == None:
	self.debug("Installing Generated CIB on node %s" %(node))
	os.system("rm -f /tmp/cts.default.cib")
	os.system("echo \'" + self.default_cts_cib + "\' > /tmp/cts.default.cib")
	if 0!=self.rsh.cp("/tmp/cts.default.cib",
	"root@" + (self["CIBfile"]%node)):
	raise ValueError("Can not scp file to %s "%node)

	os.system("rm -f /tmp/cts.default.cib")
	else:
	self.debug("Installing CIB (%s) on node %s" %(self.Env["CIBfilename"], node))
	if 0!=self.rsh.cp(self.Env["CIBfilename"],
	"root@" + (self["CIBfile"]%node)):
	raise ValueError("Can not scp file to %s "%node)

	def prepare(self):
	'''Finish the Initialization process. Prepare to test...'''

	for node in self.Env["nodes"]:
	self.ShouldBeStatus[node] = ""
	self.StataCM(node)

	def test_node_CM(self, node):
	'''Report the status of the cluster manager on a given node'''

	watchpats = [ ]
	watchpats.append("Current state: (S_IDLE\|S_NOT_DC)")
	watchpats.append(self["Pat:They_started"]%node)
	idle_watch = CTS.LogWatcher(self["LogFileName"], watchpats)
	idle_watch.setwatch()

	out=self.rsh.readaline(node, self["StatusCmd"]%node)
	ret= (string.find(out, 'ok') != -1)
	self.debug("Node %s status: %s" %(node, out))

	if not ret:
	if self.ShouldBeStatus[node] == self["up"]:
	self.log(
	"Node status for %s is %s but we think it should be %s"
	%(node, self["down"], self.ShouldBeStatus[node]))
	self.ShouldBeStatus[node]=self["down"]
	return 0

	if self.ShouldBeStatus[node] == self["down"]:
	self.log(
	"Node status for %s is %s but we think it should be %s: %s"
	%(node, self["up"], self.ShouldBeStatus[node], out))

	self.ShouldBeStatus[node]=self["up"]

	# check the output first - because syslog-ng looses messages
	if string.find(out, 'S_NOT_DC') != -1:
	# Up and stable
	return 2
	if string.find(out, 'S_IDLE') != -1:
	# Up and stable
	return 2

	# fall back to syslog-ng and wait
	if not idle_watch.look():
	# just up
	self.debug("Warn: Node %s is unstable: %s" %(node, out))
	return 1

	# Up and stable
	return 2

	# Is the node up or is the node down
	def StataCM(self, node):
	'''Report the status of the cluster manager on a given node'''

	if self.test_node_CM(node) > 0:
	return 1
	return None

	# Being up and being stable is not the same question...
	def node_stable(self, node):
	'''Report the status of the cluster manager on a given node'''

	if self.test_node_CM(node) == 2:
	return 1
	self.log("Warn: Node %s not stable" %(node))
	return None

	def cluster_stable(self, timeout=None):
	watchpats = [ ]
	watchpats.append("Current state: S_IDLE")
	watchpats.append(self["Pat:DC_IDLE"])

	if timeout == None:
	timeout = self["DeadTime"]

	idle_watch = CTS.LogWatcher(self["LogFileName"], watchpats, timeout)
	idle_watch.setwatch()

	any_up = 0
	for node in self.Env["nodes"]:
	# have each node dump its current state
	if self.ShouldBeStatus[node] == self["up"]:
	self.rsh.readaline(node, (self["StatusCmd"] %node) )
	any_up = 1

	if any_up == 0 or idle_watch.look():
	return 1

	self.log("Warn: Cluster Master not IDLE")
	return None

	def is_node_dc(self, node, status_line=None):
	rc = 0
	if not status_line:
	status_line = self.rsh.readaline(node, self["StatusCmd"]%node)

	if not status_line:
	rc = 0
	elif string.find(status_line, 'S_IDLE') != -1:
	rc = 1
	elif string.find(status_line, 'S_INTEGRATION') != -1:
	rc = 1
	elif string.find(status_line, 'S_FINALIZE_JOIN') != -1:
	rc = 1
	elif string.find(status_line, 'S_POLICY_ENGINE') != -1:
	rc = 1
	elif string.find(status_line, 'S_TRANSITION_ENGINE') != -1:
	rc = 1

	if rc == 1:
	self.debug("%s _is_ the DC" % node)

	return rc

	def isolate_node(self, node, allowlist):
	'''isolate the communication between the nodes'''
	rc = self.rsh(node, self["BreakCommCmd2"]%allowlist)
	if rc == 0:
	return 1
	else:
	self.log("Could not break the communication from node: %s",node)
	return None

	def Configuration(self):
	if self.Env["ClobberCIB"] == 1:
	if self.Env["CIBfilename"] == None:
	os.system("rm -f /tmp/cts.default.cib")
	os.system("echo \'" + self.default_cts_cib + "\' > /tmp/cts.default.cib")
	cib=parse("/tmp/cts.default.cib")
	# os.system("rm -f /tmp/cts.default.cib")
	else:
	cib=parse(self.Env["CIBfilename"])
	else:
	local_cib = "%s/cts_cib_%s.xml"%(self["TmpDir"],str(os.getpid()))
	if 0!=self.rsh.cp("root@"+self["CIBfile"]%self.Env["nodes"][0],local_cib):
	raise ValueError("Can not copy file to %s, maybe permission denied"%self["TmpDir"])
	cib=parse(local_cib)
	os.remove(local_cib)

	return cib.getElementsByTagName('configuration')[0]

	def Resources(self):
	ResourceList = []
	#read resources in cib
	configuration = self.Configuration()
	resources = configuration.getElementsByTagName('resources')[0]
	rscs = configuration.getElementsByTagName('resource')
	for rsc in rscs:
	if rsc in resources.childNodes:
	ResourceList.append(HAResource(self,rsc))
	incs = configuration.getElementsByTagName('incarnation')
	for inc in incs:
	max = 0
	inc_name = inc.getAttribute("id")
	instance_attributes = inc.getElementsByTagName('instance_attributes')[0]
	attributes = instance_attributes.getElementsByTagName('attributes')[0]
	nvpairs = attributes.getElementsByTagName('nvpair')
	for nvpair in nvpairs:
	if nvpair.getAttribute("name") == "incarnation_max":
	max = int(nvpair.getAttribute("value"))
	inc_rsc = inc.getElementsByTagName('resource')[0]
	for i in range(0,max):
	rsc = HAResource(self,inc_rsc)
	rsc.inc_no = i
	rsc.inc_name = inc_name
	rsc.inc_max = max
	rsc.rid = inc_name+":"+rsc.rid + ":%d"%i
	rsc.Instance = rsc.rid
	ResourceList.append(rsc)
	return ResourceList

	def Dependancies(self):
	DependancyList = []
	#read dependancy in cib
	configuration=self.Configuration()
	constraints=configuration.getElementsByTagName('constraints')[0]
	rsc_to_rscs=configuration.getElementsByTagName('rsc_to_rsc')
	for node in rsc_to_rscs:
	dependancy = {}
	dependancy["id"]=node.getAttribute('id')
	dependancy["from"]=node.getAttribute('from')
	dependancy["to"]=node.getAttribute('to')
	dependancy["type"]=node.getAttribute('type')
	dependancy["strength"]=node.getAttribute('strength')
	DependancyList.append(dependancy)
	return DependancyList

	def find_partitions(self):
	ccm_partitions = []

	for node in self.Env["nodes"]:
	if self.ShouldBeStatus[node] == self["up"]:
	partition = self.rsh.readaline(node, self["ParitionCmd"])

	if not partition:
	self.log("no partition details for %s" %node)
	elif len(partition) > 2:
	partition = partition[:-1]
	for a_partition in ccm_partitions:
	if partition != a_partition:
	ccm_partitions.append(partition)
	else:
	self.log("bad partition details for %s" %node)

	return ccm_partitions

	def HasQuorum(self, node_list):
	# If we are auditing a partition, then one side will
	# have quorum and the other not.
	# So the caller needs to tell us which we are checking
	# If no value for node_list is specified... assume all nodes
	if not node_list:
	node_list = self.Env["nodes"]

	for node in node_list:
	if self.ShouldBeStatus[node] == self["up"]:
	quorum = self.rsh.readaline(node, self["QuorumCmd"])
	return string.find(quorum,"1") != -1
	return 0
	def Components(self):
	complist = [Process("lrmd",self),Process("crmd",self)]
	if self.Env["DoFencing"] == 1 :
	complist.append(Process("stonithd",self))
	complist.append(Process("heartbeat",self))
	return complist

	+ def NodeUUID(self, node):
	+ lines = self.rsh.readlines(node, self["UUIDQueryCmd"])
	+ for line in lines:
	+ m = re.search(r'%s.+\((.+)\)' % node, line)
	+ if m:
	+ return m.group(1)
	+ return ""
	+
	+ def StandbyStatus(self, node):
	+ check_cib_cmd = self["CIBQueryCmd"] % node;
	+
	+ lines = self.rsh.readlines(node, check_cib_cmd);
	+ if not lines:
	+ return ""
	+
	+ cib_data = "".join(lines)
	+ try:
	+ cib = parseString(cib_data)
	+ except xml.parsers.expat.ExpatError:
	+ return ""
	+
	+ standby_status = "off"
	+ nodes = cib.getElementsByTagName('node')
	+
	+ for ha_node in nodes:
	+ if ha_node.getAttribute("uname") == node:
	+ nvpairs = ha_node.getElementsByTagName('nvpair')
	+ for nvpair in nvpairs:
	+ if nvpair.getAttribute('name') == 'standby':
	+ if nvpair.getAttribute('value') == 'on':
	+ standby_status = "on"
	+ break
	+
	+ return standby_status
	+
	+ # status == "on" : Enter Standby mode
	+ # status == "off": Enter Active mode
	+ def SetStandbyMode(self, node, status):
	+ current_status = self.StandbyStatus(node)
	+ if current_status == status:
	+ return True
	+
	+ if status == "on":
	+ cmd = self["StandbyOnCmd"] % self.NodeUUID(node)
	+ elif status == "off":
	+ cmd = self["StandbyOffCmd"] % self.NodeUUID(node)
	+ else:
	+ return False
	+
	+ ret = self.rsh(node, cmd)
	+
	+ return True
	+
	class HAResource(Resource):
	def __init__(self, cm, node):
	'''
	Get information from xml node
	'''
	self.rid = str(node.getAttribute('id'))
	self.rclass = str(node.getAttribute('class'))
	self.rtype = str(node.getAttribute('type'))
	self.inc_name = None
	self.inc_no = -1
	self.inc_max = -1
	self.rparameters = {}

	list = node.getElementsByTagName('instance_attributes')
	if len(list) > 0:
	attributes = list[0]
	list = attributes.getElementsByTagName('attributes')
	if len(list) > 0:
	parameters = list[0]
	nvpairs = parameters.getElementsByTagName('nvpair')

	for nvpair in nvpairs:
	name=nvpair.getAttribute('name')
	value=nvpair.getAttribute('value')
	self.rparameters[name]=value
	Resource.__init__(self, cm, self.rtype, self.rid)

	def IsRunningOn(self, nodename):
	'''
	This member function returns true if our resource is running
	on the given node in the cluster.
	We call the status operation for the resource script.
	'''

	out=self.CM.rsh.readaline(nodename, self.CM["IsRscRunning"]%self.rid)
	return re.search("0",out)

	def RunningNodes(self):
	ResourceNodes = []
	for node in self.CM.Env["nodes"]:
	if self.CM.ShouldBeStatus[node] == self.CM["up"]:
	if self.IsRunningOn(node):
	ResourceNodes.append(node)
	return ResourceNodes

	def _ResourceOperation(self, operation, nodename):
	'''
	Execute an operation on the resource
	'''
	self.CM.rsh.readaline(nodename, self.CM["ExecuteRscOp"]%(self.rid,operation))
	return self.CM.rsh.lastrc == 0

	def Start(self, nodename):
	'''
	This member function starts or activates the resource.
	'''
	return self._ResourceOperation("start", nodename)

	def Stop(self, nodename):
	'''
	This member function stops or deactivates the resource.
	'''
	return self._ResourceOperation("stop", nodename)

	def IsWorkingCorrectly(self, nodename):
	return self._ResourceOperation("monitor", nodename)


	#######################################################################
	#
	# A little test code...
	#
	# Which you are advised to completely ignore...
	#
	#######################################################################
	if __name__ == '__main__':
	pass

File Metadata

Mime Type: text/x-diff
Expires: Mon, Apr 21, 8:11 PM (6 h, 48 s)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 1665693
Default Alt Text: (22 KB)

No OneTemporaryActions

View Options

File Metadata

Event Timeline

No OneTemporary
Actions