Page Menu
Home
ClusterLabs Projects
Search
Configure Global Search
Log In
Files
F3687632
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
22 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/cts/CM_LinuxHAv2.py.in b/cts/CM_LinuxHAv2.py.in
index 852d4d2134..483441f850 100755
--- a/cts/CM_LinuxHAv2.py.in
+++ b/cts/CM_LinuxHAv2.py.in
@@ -1,545 +1,603 @@
#!@PYTHON@
'''CTS: Cluster Testing System: LinuxHA v2 dependent modules...
'''
__copyright__='''
Author: Huang Zhen <zhenhltc@cn.ibm.com>
Copyright (C) 2004 International Business Machines
Additional Audits, Revised Start action, Default Configuration:
Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
'''
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
import os,sys,CTS,CTSaudits,CTStests
from CTS import *
from CM_hb import HeartbeatCM
from xml.dom.minidom import *
from CTSaudits import ClusterAudit
from CTStests import *
#######################################################################
#
# LinuxHA v2 dependent modules
#
#######################################################################
class LinuxHAv2(HeartbeatCM):
'''
The linux-ha version 2 cluster manager class.
It implements the things we need to talk to and manipulate
linux-ha version 2 clusters
'''
def __init__(self, Environment, randseed=None):
HeartbeatCM.__init__(self, Environment, randseed=randseed)
self.update({
"Name" : "linux-ha-v2",
"DeadTime" : 300,
"StartTime" : 300, # Max time to start up
"StableTime" : 30,
"StartCmd" : "@libdir@/heartbeat/ha_logd -d >/dev/null 2>&1; @libdir@/heartbeat/heartbeat >/dev/null 2>&1",
"StopCmd" : "@libdir@/heartbeat/heartbeat -k",
"ElectionCmd" : "@libdir@/heartbeat/crmadmin -E %s",
"StatusCmd" : "@libdir@/heartbeat/crmadmin -S %s 2>/dev/null",
"EpocheCmd" : "@libdir@/heartbeat/ccm_tool -e",
"QuorumCmd" : "@libdir@/heartbeat/ccm_tool -q",
"ParitionCmd" : "@libdir@/heartbeat/ccm_tool -p",
"IsRscRunning" : "@libdir@/heartbeat/lrmadmin -E %s monitor 0 0 EVERYTIME 2>/dev/null|grep return",
"ExecuteRscOp" : "@libdir@/heartbeat/lrmadmin -E %s %s 0 0 EVERYTIME 2>/dev/null",
"CIBfile" : "%s:@HA_VARLIBDIR@/heartbeat/crm/cib.xml",
"TmpDir" : "/tmp",
"BreakCommCmd2" : "/usr/lib/heartbeat/TestHeartbeatComm break-communication %s>/dev/null 2>&1",
"IsIPAddrRscRunning" : "",
+ "StandbyOnCmd" : "@libdir@/heartbeat/crmadmin -s %s",
+ "StandbyOffCmd" : "@libdir@/heartbeat/crmadmin -a %s",
+ "UUIDQueryCmd" : "@libdir@/heartbeat/crmadmin -N",
+ "CIBQueryCmd" : "@libdir@/heartbeat/cibadmin -Ql -h %s",
+
# Patterns to look for in the log files for various occasions...
"Pat:DC_IDLE" : "crmd:.*State transition.*-> S_IDLE",
# This wont work if we have multiple partitions
# Use: "Pat:They_started" : "%s crmd:.*State transition.*-> S_NOT_DC",
"Pat:They_started" : "Updating node state to member for %s",
"Pat:We_started" : "%s crmd:.*State transition.*-> S_IDLE",
"Pat:We_stopped" : "%s heartbeat.*Heartbeat shutdown complete",
"Pat:They_stopped" : "%s crmd:.*LOST:.* %s ",
"Pat:All_stopped" : "%s .*heartbeat.*Heartbeat shutdown complete",
"Pat:They_dead" : "node %s.*: is dead",
"Pat:TransitionComplete" : "Transition status: Complete: complete",
# Bad news Regexes. Should never occur.
"BadRegexes" : (
r"Shutting down\.",
r"Forcing shutdown\.",
r"Timer I_TERMINATE just popped",
r"input=I_ERROR",
r"input=I_FAIL",
r"input=I_INTEGRATED cause=C_TIMER_POPPED",
r"input=I_FINALIZED cause=C_TIMER_POPPED",
r"input=I_ERROR",
r", exiting\.",
r"WARN.*Ignoring HA message.*vote.*not in our membership list",
r"pengine:.*Attempting recovery of resource",
r"pengine:.*Handling failed ",
r"tengine:.*is taking more than 2x its timeout",
r"Confirm not received from",
r"Welcome reply not received from",
r"ERROR:",
r"CRIT:",
),
})
del self["Standby"]
self.check_transitions = 0
self.check_elections = 0
self.CIBsync = {}
cib_prefix='''
<cib cib_feature_revision="1" num_updates="1" have_quorum="false" epoche="1">
<configuration>
<crm_config>'''
cib_options='''
<nvpair id="transition_idle_timeout" name="transition_idle_timeout" value="120s"/>
<nvpair id="require_quorum" name="require_quorum" value="true"/>
<nvpair id="symmetric_cluster" name="symetric_cluster" value="true"/>
<nvpair id="suppress_cib_writes" name="suppress_cib_writes" value="true"/>
<nvpair id="no_quorum_policy" name="no_quorum_policy" value="stop"/>'''
cib_glue_1='''
</crm_config>
<nodes/>
<resources>'''
cib_glue_2='''
</resources>
<constraints>'''
cib_suffix='''
</constraints>
</configuration>
<status/>
</cib>
'''
resources=''' '''
constraints=''' '''
cib_fencing = ""
if self.Env["CIBResource"] == 1:
self.log("Enabling DC resource")
resources='''
<resource id="DcIPaddr" class="ocf" type="IPaddr" provider="heartbeat" is_managed="1">
<operations>
<op id="1" name="monitor" interval="5s" timeout="20s"/>
</operations>
<instance_attributes>
<attributes>
<nvpair id="1" name="ip" value="%s"/>
</attributes>
</instance_attributes>
</resource>''' % self.Env["IPBase"]
# DcIPaddr cant run anywhere but the DC
constraints='''
<rsc_location id="run_DcIPaddr" rsc="DcIPaddr">
<rule id="cant_run_DcIPaddr" score="-INFINITY" boolean_op="and">
<expression attribute="#is_dc" operation="eq" value="false"/>
</rule>
</rsc_location>'''
fields = string.split(self.Env["IPBase"], '.')
for node in self.Env["nodes"]:
# These resources prefer to run on the node with the same name
fields[3] = str(int(fields[3])+1)
ip = string.join(fields, '.')
node_resource=("""
<resource id="%s" class="ocf" type="IPaddr" provider="heartbeat" is_managed="1">
<operations>
<op id="1" name="monitor" interval="5s" timeout="20s"/>
</operations>
<instance_attributes>
<attributes>
<nvpair id="1" name="ip" value="%s"/>
</attributes>
</instance_attributes>
</resource>""" %("rsc_"+node, ip))
resources = resources + node_resource
node_constraint=("""
<rsc_location id="run_%s" rsc="%s">
<rule id="pref_run_%s" score="100" boolean_op="and">
<expression attribute="#uname" operation="eq" value="%s"/>
</rule>
</rsc_location>""" % ("rsc_"+node, "rsc_"+node, "rsc_"+node, node))
constraints = constraints + node_constraint
# always add the fencing resource so that we test incarnations
nodelist = ""
for node in self.Env["nodes"]:
nodelist += node + " "
stonith_resource=("""
<incarnation id="DoFencing">
<instance_attributes>
<attributes>
<nvpair id="1" name="incarnation_max" value="%d"/>
<nvpair id="2" name="incarnation_node_max" value="1"/>
</attributes>
</instance_attributes>
<resource id="child_DoFencing" class="stonith" type="ssh">
<operations>
<op id="1" name="monitor" interval="5s" timeout="20s"/>
</operations>
<instance_attributes>
<attributes>
<nvpair id="1" name="hostlist" value="%s"/>
</attributes>
</instance_attributes>
</resource>
</incarnation>""" %(len(self.Env["nodes"]), nodelist))
resources = resources + stonith_resource
if self.Env["DoFencing"] == 1:
cib_options=cib_options + '''
<nvpair id="stonith_enabled" name="stonith_enabled" value="true"/>'''
self.default_cts_cib=cib_prefix + cib_options + cib_glue_1 + \
resources + cib_glue_2 + constraints + cib_suffix
self.debug(self.default_cts_cib)
def errorstoignore(self):
# At some point implement a more elegant solution that
# also produces a report at the end
'''Return list of errors which are known and very noisey should be ignored'''
if 1:
return [
"crmadmin:"
]
return []
def install_config(self, node):
if not self.CIBsync.has_key(node) and self.Env["ClobberCIB"] == 1:
self.CIBsync[node] = 1
if self.Env["CIBfilename"] == None:
self.debug("Installing Generated CIB on node %s" %(node))
os.system("rm -f /tmp/cts.default.cib")
os.system("echo \'" + self.default_cts_cib + "\' > /tmp/cts.default.cib")
if 0!=self.rsh.cp("/tmp/cts.default.cib",
"root@" + (self["CIBfile"]%node)):
raise ValueError("Can not scp file to %s "%node)
os.system("rm -f /tmp/cts.default.cib")
else:
self.debug("Installing CIB (%s) on node %s" %(self.Env["CIBfilename"], node))
if 0!=self.rsh.cp(self.Env["CIBfilename"],
"root@" + (self["CIBfile"]%node)):
raise ValueError("Can not scp file to %s "%node)
def prepare(self):
'''Finish the Initialization process. Prepare to test...'''
for node in self.Env["nodes"]:
self.ShouldBeStatus[node] = ""
self.StataCM(node)
def test_node_CM(self, node):
'''Report the status of the cluster manager on a given node'''
watchpats = [ ]
watchpats.append("Current state: (S_IDLE|S_NOT_DC)")
watchpats.append(self["Pat:They_started"]%node)
idle_watch = CTS.LogWatcher(self["LogFileName"], watchpats)
idle_watch.setwatch()
out=self.rsh.readaline(node, self["StatusCmd"]%node)
ret= (string.find(out, 'ok') != -1)
self.debug("Node %s status: %s" %(node, out))
if not ret:
if self.ShouldBeStatus[node] == self["up"]:
self.log(
"Node status for %s is %s but we think it should be %s"
%(node, self["down"], self.ShouldBeStatus[node]))
self.ShouldBeStatus[node]=self["down"]
return 0
if self.ShouldBeStatus[node] == self["down"]:
self.log(
"Node status for %s is %s but we think it should be %s: %s"
%(node, self["up"], self.ShouldBeStatus[node], out))
self.ShouldBeStatus[node]=self["up"]
# check the output first - because syslog-ng looses messages
if string.find(out, 'S_NOT_DC') != -1:
# Up and stable
return 2
if string.find(out, 'S_IDLE') != -1:
# Up and stable
return 2
# fall back to syslog-ng and wait
if not idle_watch.look():
# just up
self.debug("Warn: Node %s is unstable: %s" %(node, out))
return 1
# Up and stable
return 2
# Is the node up or is the node down
def StataCM(self, node):
'''Report the status of the cluster manager on a given node'''
if self.test_node_CM(node) > 0:
return 1
return None
# Being up and being stable is not the same question...
def node_stable(self, node):
'''Report the status of the cluster manager on a given node'''
if self.test_node_CM(node) == 2:
return 1
self.log("Warn: Node %s not stable" %(node))
return None
def cluster_stable(self, timeout=None):
watchpats = [ ]
watchpats.append("Current state: S_IDLE")
watchpats.append(self["Pat:DC_IDLE"])
if timeout == None:
timeout = self["DeadTime"]
idle_watch = CTS.LogWatcher(self["LogFileName"], watchpats, timeout)
idle_watch.setwatch()
any_up = 0
for node in self.Env["nodes"]:
# have each node dump its current state
if self.ShouldBeStatus[node] == self["up"]:
self.rsh.readaline(node, (self["StatusCmd"] %node) )
any_up = 1
if any_up == 0 or idle_watch.look():
return 1
self.log("Warn: Cluster Master not IDLE")
return None
def is_node_dc(self, node, status_line=None):
rc = 0
if not status_line:
status_line = self.rsh.readaline(node, self["StatusCmd"]%node)
if not status_line:
rc = 0
elif string.find(status_line, 'S_IDLE') != -1:
rc = 1
elif string.find(status_line, 'S_INTEGRATION') != -1:
rc = 1
elif string.find(status_line, 'S_FINALIZE_JOIN') != -1:
rc = 1
elif string.find(status_line, 'S_POLICY_ENGINE') != -1:
rc = 1
elif string.find(status_line, 'S_TRANSITION_ENGINE') != -1:
rc = 1
if rc == 1:
self.debug("%s _is_ the DC" % node)
return rc
def isolate_node(self, node, allowlist):
'''isolate the communication between the nodes'''
rc = self.rsh(node, self["BreakCommCmd2"]%allowlist)
if rc == 0:
return 1
else:
self.log("Could not break the communication from node: %s",node)
return None
def Configuration(self):
if self.Env["ClobberCIB"] == 1:
if self.Env["CIBfilename"] == None:
os.system("rm -f /tmp/cts.default.cib")
os.system("echo \'" + self.default_cts_cib + "\' > /tmp/cts.default.cib")
cib=parse("/tmp/cts.default.cib")
# os.system("rm -f /tmp/cts.default.cib")
else:
cib=parse(self.Env["CIBfilename"])
else:
local_cib = "%s/cts_cib_%s.xml"%(self["TmpDir"],str(os.getpid()))
if 0!=self.rsh.cp("root@"+self["CIBfile"]%self.Env["nodes"][0],local_cib):
raise ValueError("Can not copy file to %s, maybe permission denied"%self["TmpDir"])
cib=parse(local_cib)
os.remove(local_cib)
return cib.getElementsByTagName('configuration')[0]
def Resources(self):
ResourceList = []
#read resources in cib
configuration = self.Configuration()
resources = configuration.getElementsByTagName('resources')[0]
rscs = configuration.getElementsByTagName('resource')
for rsc in rscs:
if rsc in resources.childNodes:
ResourceList.append(HAResource(self,rsc))
incs = configuration.getElementsByTagName('incarnation')
for inc in incs:
max = 0
inc_name = inc.getAttribute("id")
instance_attributes = inc.getElementsByTagName('instance_attributes')[0]
attributes = instance_attributes.getElementsByTagName('attributes')[0]
nvpairs = attributes.getElementsByTagName('nvpair')
for nvpair in nvpairs:
if nvpair.getAttribute("name") == "incarnation_max":
max = int(nvpair.getAttribute("value"))
inc_rsc = inc.getElementsByTagName('resource')[0]
for i in range(0,max):
rsc = HAResource(self,inc_rsc)
rsc.inc_no = i
rsc.inc_name = inc_name
rsc.inc_max = max
rsc.rid = inc_name+":"+rsc.rid + ":%d"%i
rsc.Instance = rsc.rid
ResourceList.append(rsc)
return ResourceList
def Dependancies(self):
DependancyList = []
#read dependancy in cib
configuration=self.Configuration()
constraints=configuration.getElementsByTagName('constraints')[0]
rsc_to_rscs=configuration.getElementsByTagName('rsc_to_rsc')
for node in rsc_to_rscs:
dependancy = {}
dependancy["id"]=node.getAttribute('id')
dependancy["from"]=node.getAttribute('from')
dependancy["to"]=node.getAttribute('to')
dependancy["type"]=node.getAttribute('type')
dependancy["strength"]=node.getAttribute('strength')
DependancyList.append(dependancy)
return DependancyList
def find_partitions(self):
ccm_partitions = []
for node in self.Env["nodes"]:
if self.ShouldBeStatus[node] == self["up"]:
partition = self.rsh.readaline(node, self["ParitionCmd"])
if not partition:
self.log("no partition details for %s" %node)
elif len(partition) > 2:
partition = partition[:-1]
for a_partition in ccm_partitions:
if partition != a_partition:
ccm_partitions.append(partition)
else:
self.log("bad partition details for %s" %node)
return ccm_partitions
def HasQuorum(self, node_list):
# If we are auditing a partition, then one side will
# have quorum and the other not.
# So the caller needs to tell us which we are checking
# If no value for node_list is specified... assume all nodes
if not node_list:
node_list = self.Env["nodes"]
for node in node_list:
if self.ShouldBeStatus[node] == self["up"]:
quorum = self.rsh.readaline(node, self["QuorumCmd"])
return string.find(quorum,"1") != -1
return 0
def Components(self):
complist = [Process("lrmd",self),Process("crmd",self)]
if self.Env["DoFencing"] == 1 :
complist.append(Process("stonithd",self))
complist.append(Process("heartbeat",self))
return complist
+ def NodeUUID(self, node):
+ lines = self.rsh.readlines(node, self["UUIDQueryCmd"])
+ for line in lines:
+ m = re.search(r'%s.+\((.+)\)' % node, line)
+ if m:
+ return m.group(1)
+ return ""
+
+ def StandbyStatus(self, node):
+ check_cib_cmd = self["CIBQueryCmd"] % node;
+
+ lines = self.rsh.readlines(node, check_cib_cmd);
+ if not lines:
+ return ""
+
+ cib_data = "".join(lines)
+ try:
+ cib = parseString(cib_data)
+ except xml.parsers.expat.ExpatError:
+ return ""
+
+ standby_status = "off"
+ nodes = cib.getElementsByTagName('node')
+
+ for ha_node in nodes:
+ if ha_node.getAttribute("uname") == node:
+ nvpairs = ha_node.getElementsByTagName('nvpair')
+ for nvpair in nvpairs:
+ if nvpair.getAttribute('name') == 'standby':
+ if nvpair.getAttribute('value') == 'on':
+ standby_status = "on"
+ break
+
+ return standby_status
+
+ # status == "on" : Enter Standby mode
+ # status == "off": Enter Active mode
+ def SetStandbyMode(self, node, status):
+ current_status = self.StandbyStatus(node)
+ if current_status == status:
+ return True
+
+ if status == "on":
+ cmd = self["StandbyOnCmd"] % self.NodeUUID(node)
+ elif status == "off":
+ cmd = self["StandbyOffCmd"] % self.NodeUUID(node)
+ else:
+ return False
+
+ ret = self.rsh(node, cmd)
+
+ return True
+
class HAResource(Resource):
def __init__(self, cm, node):
'''
Get information from xml node
'''
self.rid = str(node.getAttribute('id'))
self.rclass = str(node.getAttribute('class'))
self.rtype = str(node.getAttribute('type'))
self.inc_name = None
self.inc_no = -1
self.inc_max = -1
self.rparameters = {}
list = node.getElementsByTagName('instance_attributes')
if len(list) > 0:
attributes = list[0]
list = attributes.getElementsByTagName('attributes')
if len(list) > 0:
parameters = list[0]
nvpairs = parameters.getElementsByTagName('nvpair')
for nvpair in nvpairs:
name=nvpair.getAttribute('name')
value=nvpair.getAttribute('value')
self.rparameters[name]=value
Resource.__init__(self, cm, self.rtype, self.rid)
def IsRunningOn(self, nodename):
'''
This member function returns true if our resource is running
on the given node in the cluster.
We call the status operation for the resource script.
'''
out=self.CM.rsh.readaline(nodename, self.CM["IsRscRunning"]%self.rid)
return re.search("0",out)
def RunningNodes(self):
ResourceNodes = []
for node in self.CM.Env["nodes"]:
if self.CM.ShouldBeStatus[node] == self.CM["up"]:
if self.IsRunningOn(node):
ResourceNodes.append(node)
return ResourceNodes
def _ResourceOperation(self, operation, nodename):
'''
Execute an operation on the resource
'''
self.CM.rsh.readaline(nodename, self.CM["ExecuteRscOp"]%(self.rid,operation))
return self.CM.rsh.lastrc == 0
def Start(self, nodename):
'''
This member function starts or activates the resource.
'''
return self._ResourceOperation("start", nodename)
def Stop(self, nodename):
'''
This member function stops or deactivates the resource.
'''
return self._ResourceOperation("stop", nodename)
def IsWorkingCorrectly(self, nodename):
return self._ResourceOperation("monitor", nodename)
#######################################################################
#
# A little test code...
#
# Which you are advised to completely ignore...
#
#######################################################################
if __name__ == '__main__':
pass
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Mon, Apr 21, 8:11 PM (6 h, 48 s)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1665693
Default Alt Text
(22 KB)
Attached To
Mode
rP Pacemaker
Attached
Detach File
Event Timeline
Log In to Comment