Page MenuHomeClusterLabs Projects

No OneTemporary

diff --git a/cts/CM_LinuxHAv2.py.in b/cts/CM_LinuxHAv2.py.in
index 25ad1c2e75..ed2df4635e 100755
--- a/cts/CM_LinuxHAv2.py.in
+++ b/cts/CM_LinuxHAv2.py.in
@@ -1,479 +1,480 @@
#!@PYTHON@
'''CTS: Cluster Testing System: LinuxHA v2 dependent modules...
'''
__copyright__='''
Author: Huang Zhen <zhenhltc@cn.ibm.com>
Copyright (C) 2004 International Business Machines
Additional Audits, Revised Start action, Default Configuration:
Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
'''
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
import os,sys,CTS,CTSaudits,CTStests
from CTS import *
from CM_hb import HeartbeatCM
from xml.dom.minidom import *
from CTSaudits import ClusterAudit
from CTStests import *
#######################################################################
#
# LinuxHA v2 dependent modules
#
#######################################################################
class LinuxHAv2(HeartbeatCM):
'''
The linux-ha version 2 cluster manager class.
It implements the things we need to talk to and manipulate
linux-ha version 2 clusters
'''
def __init__(self, Environment, randseed=None):
HeartbeatCM.__init__(self, Environment, randseed=randseed)
self.update({
"Name" : "linux-ha-v2",
"DeadTime" : 600,
"StableTime" : 10,
"StartCmd" : "@libdir@/heartbeat/ha_logd -d >/dev/null 2>&1; @libdir@/heartbeat/heartbeat >/dev/null 2>&1",
"StopCmd" : "@libdir@/heartbeat/heartbeat -k",
"StatusCmd" : "@libdir@/heartbeat/crmadmin -S %s 2>/dev/null",
"EpocheCmd" : "@libdir@/heartbeat/ccm_tool -e",
"QuorumCmd" : "@libdir@/heartbeat/ccm_tool -q",
"ParitionCmd" : "@libdir@/heartbeat/ccm_tool -p",
"IsRscRunning" : "@libdir@/heartbeat/lrmadmin -E %s monitor 0 0 EVERYTIME 2>/dev/null|grep return",
"ExecuteRscOp" : "@libdir@/heartbeat/lrmadmin -E %s %s 0 0 EVERYTIME 2>/dev/null",
"CIBfile" : "%s:@HA_VARLIBDIR@/heartbeat/crm/cib.xml",
"TmpDir" : "/tmp",
"BreakCommCmd2" : "/usr/lib/heartbeat/TestHeartbeatComm break-communication %s>/dev/null 2>&1",
"IsIPAddrRscRunning" : "",
# Patterns to look for in the log files for various occasions...
"Pat:DC_IDLE" : "crmd:.*State transition.*-> S_IDLE",
"Pat:We_started" : "%s crmd:.*State transition.*-> (S_NOT_DC|S_IDLE)",
"Pat:They_started" : "%s crmd:.*State transition.*-> (S_NOT_DC|S_IDLE)",
"Pat:They_stopped" : "%s heartbeat.*Heartbeat shutdown complete",
"Pat:They_dead" : "node (%s).*: is dead",
"Pat:We_stopped" : ("%s heartbeat.*Heartbeat shutdown complete" %(self.OurNode)),
"Pat:All_stopped" : "%s heartbeat.*Heartbeat shutdown complete",
# Bad news Regexes. Should never occur.
"BadRegexes" : (
r"Shutting down\.",
r"Forcing shutdown\.",
r"Timer I_TERMINATE just popped",
r"input=I_ERROR",
r"input=I_FAIL",
r", exiting\.",
r"ERROR:",
r"CRIT:",
),
})
del self["Standby"]
self.CIBsync = {}
cib_prefix='''
<cib cib_feature_revision="1" num_updates="1" have_quorum="false" epoche="1">
<configuration>
<crm_config>'''
cib_options='''
<nvpair id="require_quorum" name="require_quorum" value="true"/>
<nvpair id="symmetric_cluster" name="symetric_cluster" value="true"/>
<nvpair id="suppress_cib_writes" name="suppress_cib_writes" value="true"/>'''
cib_glue_1='''
</crm_config>
<nodes/>
<resources>'''
cib_glue_2='''
</resources>
<constraints>'''
cib_suffix='''
</constraints>
</configuration>
<status/>
</cib>
'''
resources=''' '''
constraints=''' '''
cib_fencing = ""
if self.Env["CIBResource"] == 1:
self.log("Enabling DC resource")
resources='''
<resource id="DcIPaddr" class="ocf" type="IPaddr" provider="heartbeat">
<operations>
<op id="1" name="monitor" interval="5s" timeout="3s"/>
</operations>
<instance_attributes>
<attributes>
<nvpair name="ip" value="127.0.0.10"/>
</attributes>
</instance_attributes>
</resource>'''
# DcIPaddr cant run anywhere but the DC
constraints='''
<rsc_location id="run_DcIPaddr" rsc="DcIPaddr">
<rule id="cant_run_DcIPaddr" score="-INFINITY" boolean_op="and">
<expression attribute="is_dc" operation="eq" value="false"/>
</rule>
</rsc_location>'''
ip_num=21
for node in self.Env["nodes"]:
# These resources prefer to run on the node with the same name
node_resource=("""
<resource id="%s" class="ocf" type="IPaddr" provider="heartbeat">
<operations>
<op id="1" name="monitor" interval="5s" timeout="3s"/>
</operations>
<instance_attributes>
<attributes>
<nvpair name="ip" value="127.0.0.%d"/>
</attributes>
</instance_attributes>
- </resource>""" %(node, ip_num))
+ </resource>""" %("rsc_"+node, ip_num))
ip_num=ip_num+1
resources = resources + node_resource
node_constraint=("""
<rsc_location id="run_%s" rsc="%s">
<rule id="pref_run_%s" score="100" boolean_op="and">
<expression attribute="uname" operation="eq" value="%s"/>
</rule>
- </rsc_location>""" % (node, node, node, node))
+ </rsc_location>""" % ("rsc_"+node, "rsc_"+node, "rsc_"+node, node))
constraints = constraints + node_constraint
if self.Env["DoFencing"] == 1 :
cib_options=cib_options + '''
<nvpair id="stonith_enabled" name="stonith_enabled" value="true"/>'''
nodelist = ""
for node in self.Env["nodes"]:
nodelist += node + " "
stonith_resource=("""
<incarnation id="DoFencing" incarnation_max="%d" incarnation_node_max="1">
<instance_attributes>
<attributes>
<nvpair name="incarnation_max" value="%d"/>
<nvpair name="incarnation_node_max" value="%d"/>
</attributes>
</instance_attributes>
<resource id="child_DoFencing" class="stonith" type="ssh">
<instance_attributes>
<attributes>
<nvpair name="hostlist" value="%s"/>
</attributes>
</instance_attributes>
</resource>
</incarnation>""" %(len(nodelist), len(nodelist), nodelist))
resources = resources + stonith_resource
self.default_cts_cib=cib_prefix + cib_options + cib_glue_1 + \
resources + cib_glue_2 + constraints + cib_suffix
self.debug(self.default_cts_cib)
def errorstoignore(self):
# At some point implement a more elegant solution that
# also produces a report at the end
'''Return list of errors which are known and very noisey should be ignored'''
if 0:
return [
"Currently no errors to ignore"
]
return []
def StataCM(self, node):
'''Report the status of the cluster manager on a given node'''
out=self.rsh.readaline(node, self["StatusCmd"]%node)
ret= (string.find(out, 'ok') != -1)
try:
if ret:
if self.ShouldBeStatus[node] != self["up"]:
self.log(
"Node status for %s is %s but we think it should be %s"
% (node, self["up"], self.ShouldBeStatus[node]))
self.log("Expected: %s. Actual: %s"
% (self.ShouldBeStatus[node], out))
else:
if self.ShouldBeStatus[node] != self["down"]:
self.log(
"Node status for %s is %s but we think it should be %s"
% (node, self["down"], self.ShouldBeStatus[node]))
except KeyError: pass
if ret: self.ShouldBeStatus[node]=self["up"]
else: self.ShouldBeStatus[node]=self["down"]
return ret
def StartaCM(self, node):
'''Start up the cluster manager on a given node'''
localBadNewsPats = []
patterns = []
patterns.append(self["Pat:We_started"]%node)
# only search for this pattern if there is another node out there
# that should be the DC
if self.any_running() == 1:
patterns.append(self["Pat:DC_IDLE"])
localBadNewsPats.append("input=I_ELECTION ")
localBadNewsPats.append("input=I_ELECTION_DC ")
if self.CIBsync.has_key(node):
self.debug("CIB already installed on node %s" %node)
elif self.Env["ClobberCIB"] == 1:
self.CIBsync[node] = 1
self.debug("Installing CIB (%s) on node %s" %(self.Env["CIBfilename"], node))
if self.Env["CIBfilename"] == None:
os.system("rm -f /tmp/cts.default.cib")
os.system("echo \'" + self.default_cts_cib + "\' > /tmp/cts.default.cib")
if 0!=self.rsh.cp("/tmp/cts.default.cib",
"root@" + (self["CIBfile"]%node)):
raise ValueError("Can not scp file to %s "%node)
os.system("rm -f /tmp/cts.default.cib")
else:
if 0!=self.rsh.cp(self.Env["CIBfilename"],
"root@" + (self["CIBfile"]%node)):
raise ValueError("Can not scp file to %s "%node)
localBadNews = CTS.LogWatcher(self["LogFileName"], localBadNewsPats, 0)
localBadNews.setwatch()
watch = CTS.LogWatcher(self["LogFileName"], patterns, 180)
self.debug("Starting %s on node %s" %(self["Name"], node))
watch.setwatch()
self.rsh(node, self["StartCmd"])
self.ShouldBeStatus[node]=self["up"]
if watch.lookforall():
match=localBadNews.look()
while match:
if not re.search("CTS:", match):
self.log(match)
self.Env.RandomTests.incr("BadNews")
match=localBadNews.look()
return 1
# the watch() failed... lets check to see if the start _really_ failed
for regex in watch.unmatched:
self.log ("Warn: Startup pattern not found: %s" %(regex))
match=localBadNews.look()
while match:
if not re.search("CTS:", match):
self.log("Warn: %s" %match)
self.Env.RandomTests.incr("BadNews")
match=localBadNews.look()
out = self.rsh.readaline(node, (self["StatusCmd"] %node) )
if string.find(out, 'ok') == -1:
# yep, it _really_ failed
self.ShouldBeStatus[node]=self["down"]
return None
ret=(string.find(out, 'S_NOT_DC') != -1)
if ret:
# actually we joined the cluster just fine
self.log ("%s on %s joined the cluster" %(self["Name"], node))
return 1
ret= (string.find(out, 'S_IDLE') != -1)
if ret:
# actually we joined the cluster just fine
self.log ("%s on %s joined the cluster as DC" %(self["Name"], node))
return 1
self.log ("%s on %s started but unstable: %s"
%(self["Name"], node, out))
# self.ShouldBeStatus[node]=self["down"]
return None
def StopaCM(self, node):
'''Stop the cluster manager on a given node'''
self.debug("Stopping %s on node %s" %(self["Name"], node))
rc=self.rsh(node, self["StopCmd"])
if rc == 0:
self.ShouldBeStatus[node]=self["down"]
return 1
still_running = 0
if self.rsh(node, "killall -INT crmd") == 0:
still_running = 1
self.log("%s is still running on node %s" %(self["name"], node))
if self.rsh(node, "killall -INT heartbeat") == 0:
still_running = 1
self.log("Heartbeat is still running on node %s" %node)
if still_running == 0:
self.log ("Warn: %s failed, yet nothing is running on node %s" %(self["StopCmd"], self["Name"], node))
return 1
return None
def isolate_node(self, node, allowlist):
'''isolate the communication between the nodes'''
rc = self.rsh(node, self["BreakCommCmd2"]%allowlist)
if rc == 0:
return 1
else:
self.log("Could not break the communication from node: %s",node)
return None
def IsDC(self, node):
rc = 0
status_line = self.rsh.readaline(node, self["StatusCmd"]%node)
if string.find(status_line, 'S_IDLE') != -1:
rc = 1
elif string.find(status_line, 'S_INTEGRATION') != -1:
rc = 1
elif string.find(status_line, 'S_FINALIZE_JOIN') != -1:
rc = 1
elif string.find(status_line, 'S_POLICY_ENGINE') != -1:
rc = 1
elif string.find(status_line, 'S_TRANSITION_ENGINE') != -1:
rc = 1
return rc
def Configuration(self):
if self.Env["ClobberCIB"] == 1:
if self.Env["CIBfilename"] == None:
os.system("rm -f /tmp/cts.default.cib")
os.system("echo \'" + self.default_cts_cib + "\' > /tmp/cts.default.cib")
- cib=parse("/tmp/cts.default.cib")
+ cib=parse("/tmp/cts.default.cib")
# os.system("rm -f /tmp/cts.default.cib")
else:
- cib=parse(self.Env["CIBfilename"])
+ cib=parse(self.Env["CIBfilename"])
else:
- local_cib = "%s/cts_cib_%s.xml"%(self["TmpDir"],str(os.getpid()))
- if 0!=self.rsh.cp("root@"+self["CIBfile"]%self.Env["nodes"][0],local_cib):
- raise ValueError("Can not copy file to %s, maybe permission denied"%self["TmpDir"])
- cib=parse(local_cib)
+ local_cib = "%s/cts_cib_%s.xml"%(self["TmpDir"],str(os.getpid()))
+ if 0!=self.rsh.cp("root@"+self["CIBfile"]%self.Env["nodes"][0],local_cib):
+ raise ValueError("Can not copy file to %s, maybe permission denied"%self["TmpDir"])
+ cib=parse(local_cib)
+ os.remove(local_cib)
return cib.getElementsByTagName('configuration')[0]
def Resources(self):
ResourceList = []
#read resources in cib
configuration=self.Configuration()
resources=configuration.getElementsByTagName('resources')[0]
rscs=configuration.getElementsByTagName('resource')
for rsc in rscs:
ResourceList.append(HAResource(self,rsc))
return ResourceList
def Dependancies(self):
DependancyList = []
#read dependancy in cib
configuration=self.Configuration()
constraints=configuration.getElementsByTagName('constraints')[0]
rsc_to_rscs=configuration.getElementsByTagName('rsc_to_rsc')
for node in rsc_to_rscs:
dependancy = {}
dependancy["id"]=node.getAttribute('id')
dependancy["from"]=node.getAttribute('from')
dependancy["to"]=node.getAttribute('to')
dependancy["type"]=node.getAttribute('type')
dependancy["strength"]=node.getAttribute('strength')
DependancyList.append(dependancy)
return DependancyList
def any_running(self):
for node in self.Env["nodes"]:
if self.ShouldBeStatus[node] == self["up"]:
return 1
return 0
class HAResource(Resource):
def __init__(self, cm, node):
'''
Get information from xml node
'''
self.rid = str(node.getAttribute('id'))
self.rclass = str(node.getAttribute('class'))
self.rtype = str(node.getAttribute('type'))
self.rparameters = {}
- list = node.getElementsByTagName('instance_attributes')
- if len(list) > 0:
- attributes = list[0]
- list = attributes.getElementsByTagName('attributes')
- if len(list) > 0:
- parameters = list[0]
- nvpairs = parameters.getElementsByTagName('nvpair')
+ list = node.getElementsByTagName('instance_attributes')
+ if len(list) > 0:
+ attributes = list[0]
+ list = attributes.getElementsByTagName('attributes')
+ if len(list) > 0:
+ parameters = list[0]
+ nvpairs = parameters.getElementsByTagName('nvpair')
for nvpair in nvpairs:
name=nvpair.getAttribute('name')
value=nvpair.getAttribute('value')
self.rparameters[name]=value
Resource.__init__(self, cm, self.rtype, self.rid)
def IsRunningOn(self, nodename):
'''
This member function returns true if our resource is running
on the given node in the cluster.
We call the status operation for the resource script.
'''
out=self.CM.rsh.readaline(nodename, self.CM["IsRscRunning"]%self.rid)
return re.search("0",out)
def RunningNodes(self):
ResourceNodes = []
for node in self.CM.Env["nodes"]:
if self.CM.ShouldBeStatus[node] == self.CM["up"]:
if self.IsRunningOn(node):
ResourceNodes.append(node)
return ResourceNodes
def _ResourceOperation(self, operation, nodename):
'''
Execute an operation on the resource
'''
self.CM.rsh.readaline(nodename, self.CM["ExecuteRscOp"]%(self.rid,operation))
return self.CM.rsh.lastrc == 0
def Start(self, nodename):
'''
This member function starts or activates the resource.
'''
return self._ResourceOperation("start", nodename)
def Stop(self, nodename):
'''
This member function stops or deactivates the resource.
'''
return self._ResourceOperation("stop", nodename)
def IsWorkingCorrectly(self, nodename):
return self._ResourceOperation("monitor", nodename)
#######################################################################
#
# A little test code...
#
# Which you are advised to completely ignore...
#
#######################################################################
if __name__ == '__main__':
pass

File Metadata

Mime Type
text/x-diff
Expires
Thu, Jul 10, 2:20 AM (1 d, 17 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2009725
Default Alt Text
(19 KB)

Event Timeline