Page MenuHomeClusterLabs Projects

No OneTemporary

diff --git a/cts/CM_LinuxHAv2.py.in b/cts/CM_LinuxHAv2.py.in
index 852d4d2134..483441f850 100755
--- a/cts/CM_LinuxHAv2.py.in
+++ b/cts/CM_LinuxHAv2.py.in
@@ -1,545 +1,603 @@
#!@PYTHON@
'''CTS: Cluster Testing System: LinuxHA v2 dependent modules...
'''
__copyright__='''
Author: Huang Zhen <zhenhltc@cn.ibm.com>
Copyright (C) 2004 International Business Machines
Additional Audits, Revised Start action, Default Configuration:
Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
'''
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
import os,sys,CTS,CTSaudits,CTStests
from CTS import *
from CM_hb import HeartbeatCM
from xml.dom.minidom import *
from CTSaudits import ClusterAudit
from CTStests import *
#######################################################################
#
# LinuxHA v2 dependent modules
#
#######################################################################
class LinuxHAv2(HeartbeatCM):
'''
The linux-ha version 2 cluster manager class.
It implements the things we need to talk to and manipulate
linux-ha version 2 clusters
'''
def __init__(self, Environment, randseed=None):
HeartbeatCM.__init__(self, Environment, randseed=randseed)
self.update({
"Name" : "linux-ha-v2",
"DeadTime" : 300,
"StartTime" : 300, # Max time to start up
"StableTime" : 30,
"StartCmd" : "@libdir@/heartbeat/ha_logd -d >/dev/null 2>&1; @libdir@/heartbeat/heartbeat >/dev/null 2>&1",
"StopCmd" : "@libdir@/heartbeat/heartbeat -k",
"ElectionCmd" : "@libdir@/heartbeat/crmadmin -E %s",
"StatusCmd" : "@libdir@/heartbeat/crmadmin -S %s 2>/dev/null",
"EpocheCmd" : "@libdir@/heartbeat/ccm_tool -e",
"QuorumCmd" : "@libdir@/heartbeat/ccm_tool -q",
"ParitionCmd" : "@libdir@/heartbeat/ccm_tool -p",
"IsRscRunning" : "@libdir@/heartbeat/lrmadmin -E %s monitor 0 0 EVERYTIME 2>/dev/null|grep return",
"ExecuteRscOp" : "@libdir@/heartbeat/lrmadmin -E %s %s 0 0 EVERYTIME 2>/dev/null",
"CIBfile" : "%s:@HA_VARLIBDIR@/heartbeat/crm/cib.xml",
"TmpDir" : "/tmp",
"BreakCommCmd2" : "/usr/lib/heartbeat/TestHeartbeatComm break-communication %s>/dev/null 2>&1",
"IsIPAddrRscRunning" : "",
+ "StandbyOnCmd" : "@libdir@/heartbeat/crmadmin -s %s",
+ "StandbyOffCmd" : "@libdir@/heartbeat/crmadmin -a %s",
+ "UUIDQueryCmd" : "@libdir@/heartbeat/crmadmin -N",
+ "CIBQueryCmd" : "@libdir@/heartbeat/cibadmin -Ql -h %s",
+
# Patterns to look for in the log files for various occasions...
"Pat:DC_IDLE" : "crmd:.*State transition.*-> S_IDLE",
# This wont work if we have multiple partitions
# Use: "Pat:They_started" : "%s crmd:.*State transition.*-> S_NOT_DC",
"Pat:They_started" : "Updating node state to member for %s",
"Pat:We_started" : "%s crmd:.*State transition.*-> S_IDLE",
"Pat:We_stopped" : "%s heartbeat.*Heartbeat shutdown complete",
"Pat:They_stopped" : "%s crmd:.*LOST:.* %s ",
"Pat:All_stopped" : "%s .*heartbeat.*Heartbeat shutdown complete",
"Pat:They_dead" : "node %s.*: is dead",
"Pat:TransitionComplete" : "Transition status: Complete: complete",
# Bad news Regexes. Should never occur.
"BadRegexes" : (
r"Shutting down\.",
r"Forcing shutdown\.",
r"Timer I_TERMINATE just popped",
r"input=I_ERROR",
r"input=I_FAIL",
r"input=I_INTEGRATED cause=C_TIMER_POPPED",
r"input=I_FINALIZED cause=C_TIMER_POPPED",
r"input=I_ERROR",
r", exiting\.",
r"WARN.*Ignoring HA message.*vote.*not in our membership list",
r"pengine:.*Attempting recovery of resource",
r"pengine:.*Handling failed ",
r"tengine:.*is taking more than 2x its timeout",
r"Confirm not received from",
r"Welcome reply not received from",
r"ERROR:",
r"CRIT:",
),
})
del self["Standby"]
self.check_transitions = 0
self.check_elections = 0
self.CIBsync = {}
cib_prefix='''
<cib cib_feature_revision="1" num_updates="1" have_quorum="false" epoche="1">
<configuration>
<crm_config>'''
cib_options='''
<nvpair id="transition_idle_timeout" name="transition_idle_timeout" value="120s"/>
<nvpair id="require_quorum" name="require_quorum" value="true"/>
<nvpair id="symmetric_cluster" name="symetric_cluster" value="true"/>
<nvpair id="suppress_cib_writes" name="suppress_cib_writes" value="true"/>
<nvpair id="no_quorum_policy" name="no_quorum_policy" value="stop"/>'''
cib_glue_1='''
</crm_config>
<nodes/>
<resources>'''
cib_glue_2='''
</resources>
<constraints>'''
cib_suffix='''
</constraints>
</configuration>
<status/>
</cib>
'''
resources=''' '''
constraints=''' '''
cib_fencing = ""
if self.Env["CIBResource"] == 1:
self.log("Enabling DC resource")
resources='''
<resource id="DcIPaddr" class="ocf" type="IPaddr" provider="heartbeat" is_managed="1">
<operations>
<op id="1" name="monitor" interval="5s" timeout="20s"/>
</operations>
<instance_attributes>
<attributes>
<nvpair id="1" name="ip" value="%s"/>
</attributes>
</instance_attributes>
</resource>''' % self.Env["IPBase"]
# DcIPaddr cant run anywhere but the DC
constraints='''
<rsc_location id="run_DcIPaddr" rsc="DcIPaddr">
<rule id="cant_run_DcIPaddr" score="-INFINITY" boolean_op="and">
<expression attribute="#is_dc" operation="eq" value="false"/>
</rule>
</rsc_location>'''
fields = string.split(self.Env["IPBase"], '.')
for node in self.Env["nodes"]:
# These resources prefer to run on the node with the same name
fields[3] = str(int(fields[3])+1)
ip = string.join(fields, '.')
node_resource=("""
<resource id="%s" class="ocf" type="IPaddr" provider="heartbeat" is_managed="1">
<operations>
<op id="1" name="monitor" interval="5s" timeout="20s"/>
</operations>
<instance_attributes>
<attributes>
<nvpair id="1" name="ip" value="%s"/>
</attributes>
</instance_attributes>
</resource>""" %("rsc_"+node, ip))
resources = resources + node_resource
node_constraint=("""
<rsc_location id="run_%s" rsc="%s">
<rule id="pref_run_%s" score="100" boolean_op="and">
<expression attribute="#uname" operation="eq" value="%s"/>
</rule>
</rsc_location>""" % ("rsc_"+node, "rsc_"+node, "rsc_"+node, node))
constraints = constraints + node_constraint
# always add the fencing resource so that we test incarnations
nodelist = ""
for node in self.Env["nodes"]:
nodelist += node + " "
stonith_resource=("""
<incarnation id="DoFencing">
<instance_attributes>
<attributes>
<nvpair id="1" name="incarnation_max" value="%d"/>
<nvpair id="2" name="incarnation_node_max" value="1"/>
</attributes>
</instance_attributes>
<resource id="child_DoFencing" class="stonith" type="ssh">
<operations>
<op id="1" name="monitor" interval="5s" timeout="20s"/>
</operations>
<instance_attributes>
<attributes>
<nvpair id="1" name="hostlist" value="%s"/>
</attributes>
</instance_attributes>
</resource>
</incarnation>""" %(len(self.Env["nodes"]), nodelist))
resources = resources + stonith_resource
if self.Env["DoFencing"] == 1:
cib_options=cib_options + '''
<nvpair id="stonith_enabled" name="stonith_enabled" value="true"/>'''
self.default_cts_cib=cib_prefix + cib_options + cib_glue_1 + \
resources + cib_glue_2 + constraints + cib_suffix
self.debug(self.default_cts_cib)
def errorstoignore(self):
# At some point implement a more elegant solution that
# also produces a report at the end
'''Return list of errors which are known and very noisey should be ignored'''
if 1:
return [
"crmadmin:"
]
return []
def install_config(self, node):
if not self.CIBsync.has_key(node) and self.Env["ClobberCIB"] == 1:
self.CIBsync[node] = 1
if self.Env["CIBfilename"] == None:
self.debug("Installing Generated CIB on node %s" %(node))
os.system("rm -f /tmp/cts.default.cib")
os.system("echo \'" + self.default_cts_cib + "\' > /tmp/cts.default.cib")
if 0!=self.rsh.cp("/tmp/cts.default.cib",
"root@" + (self["CIBfile"]%node)):
raise ValueError("Can not scp file to %s "%node)
os.system("rm -f /tmp/cts.default.cib")
else:
self.debug("Installing CIB (%s) on node %s" %(self.Env["CIBfilename"], node))
if 0!=self.rsh.cp(self.Env["CIBfilename"],
"root@" + (self["CIBfile"]%node)):
raise ValueError("Can not scp file to %s "%node)
def prepare(self):
'''Finish the Initialization process. Prepare to test...'''
for node in self.Env["nodes"]:
self.ShouldBeStatus[node] = ""
self.StataCM(node)
def test_node_CM(self, node):
'''Report the status of the cluster manager on a given node'''
watchpats = [ ]
watchpats.append("Current state: (S_IDLE|S_NOT_DC)")
watchpats.append(self["Pat:They_started"]%node)
idle_watch = CTS.LogWatcher(self["LogFileName"], watchpats)
idle_watch.setwatch()
out=self.rsh.readaline(node, self["StatusCmd"]%node)
ret= (string.find(out, 'ok') != -1)
self.debug("Node %s status: %s" %(node, out))
if not ret:
if self.ShouldBeStatus[node] == self["up"]:
self.log(
"Node status for %s is %s but we think it should be %s"
%(node, self["down"], self.ShouldBeStatus[node]))
self.ShouldBeStatus[node]=self["down"]
return 0
if self.ShouldBeStatus[node] == self["down"]:
self.log(
"Node status for %s is %s but we think it should be %s: %s"
%(node, self["up"], self.ShouldBeStatus[node], out))
self.ShouldBeStatus[node]=self["up"]
# check the output first - because syslog-ng looses messages
if string.find(out, 'S_NOT_DC') != -1:
# Up and stable
return 2
if string.find(out, 'S_IDLE') != -1:
# Up and stable
return 2
# fall back to syslog-ng and wait
if not idle_watch.look():
# just up
self.debug("Warn: Node %s is unstable: %s" %(node, out))
return 1
# Up and stable
return 2
# Is the node up or is the node down
def StataCM(self, node):
'''Report the status of the cluster manager on a given node'''
if self.test_node_CM(node) > 0:
return 1
return None
# Being up and being stable is not the same question...
def node_stable(self, node):
'''Report the status of the cluster manager on a given node'''
if self.test_node_CM(node) == 2:
return 1
self.log("Warn: Node %s not stable" %(node))
return None
def cluster_stable(self, timeout=None):
watchpats = [ ]
watchpats.append("Current state: S_IDLE")
watchpats.append(self["Pat:DC_IDLE"])
if timeout == None:
timeout = self["DeadTime"]
idle_watch = CTS.LogWatcher(self["LogFileName"], watchpats, timeout)
idle_watch.setwatch()
any_up = 0
for node in self.Env["nodes"]:
# have each node dump its current state
if self.ShouldBeStatus[node] == self["up"]:
self.rsh.readaline(node, (self["StatusCmd"] %node) )
any_up = 1
if any_up == 0 or idle_watch.look():
return 1
self.log("Warn: Cluster Master not IDLE")
return None
def is_node_dc(self, node, status_line=None):
rc = 0
if not status_line:
status_line = self.rsh.readaline(node, self["StatusCmd"]%node)
if not status_line:
rc = 0
elif string.find(status_line, 'S_IDLE') != -1:
rc = 1
elif string.find(status_line, 'S_INTEGRATION') != -1:
rc = 1
elif string.find(status_line, 'S_FINALIZE_JOIN') != -1:
rc = 1
elif string.find(status_line, 'S_POLICY_ENGINE') != -1:
rc = 1
elif string.find(status_line, 'S_TRANSITION_ENGINE') != -1:
rc = 1
if rc == 1:
self.debug("%s _is_ the DC" % node)
return rc
def isolate_node(self, node, allowlist):
'''isolate the communication between the nodes'''
rc = self.rsh(node, self["BreakCommCmd2"]%allowlist)
if rc == 0:
return 1
else:
self.log("Could not break the communication from node: %s",node)
return None
def Configuration(self):
if self.Env["ClobberCIB"] == 1:
if self.Env["CIBfilename"] == None:
os.system("rm -f /tmp/cts.default.cib")
os.system("echo \'" + self.default_cts_cib + "\' > /tmp/cts.default.cib")
cib=parse("/tmp/cts.default.cib")
# os.system("rm -f /tmp/cts.default.cib")
else:
cib=parse(self.Env["CIBfilename"])
else:
local_cib = "%s/cts_cib_%s.xml"%(self["TmpDir"],str(os.getpid()))
if 0!=self.rsh.cp("root@"+self["CIBfile"]%self.Env["nodes"][0],local_cib):
raise ValueError("Can not copy file to %s, maybe permission denied"%self["TmpDir"])
cib=parse(local_cib)
os.remove(local_cib)
return cib.getElementsByTagName('configuration')[0]
def Resources(self):
ResourceList = []
#read resources in cib
configuration = self.Configuration()
resources = configuration.getElementsByTagName('resources')[0]
rscs = configuration.getElementsByTagName('resource')
for rsc in rscs:
if rsc in resources.childNodes:
ResourceList.append(HAResource(self,rsc))
incs = configuration.getElementsByTagName('incarnation')
for inc in incs:
max = 0
inc_name = inc.getAttribute("id")
instance_attributes = inc.getElementsByTagName('instance_attributes')[0]
attributes = instance_attributes.getElementsByTagName('attributes')[0]
nvpairs = attributes.getElementsByTagName('nvpair')
for nvpair in nvpairs:
if nvpair.getAttribute("name") == "incarnation_max":
max = int(nvpair.getAttribute("value"))
inc_rsc = inc.getElementsByTagName('resource')[0]
for i in range(0,max):
rsc = HAResource(self,inc_rsc)
rsc.inc_no = i
rsc.inc_name = inc_name
rsc.inc_max = max
rsc.rid = inc_name+":"+rsc.rid + ":%d"%i
rsc.Instance = rsc.rid
ResourceList.append(rsc)
return ResourceList
def Dependancies(self):
DependancyList = []
#read dependancy in cib
configuration=self.Configuration()
constraints=configuration.getElementsByTagName('constraints')[0]
rsc_to_rscs=configuration.getElementsByTagName('rsc_to_rsc')
for node in rsc_to_rscs:
dependancy = {}
dependancy["id"]=node.getAttribute('id')
dependancy["from"]=node.getAttribute('from')
dependancy["to"]=node.getAttribute('to')
dependancy["type"]=node.getAttribute('type')
dependancy["strength"]=node.getAttribute('strength')
DependancyList.append(dependancy)
return DependancyList
def find_partitions(self):
ccm_partitions = []
for node in self.Env["nodes"]:
if self.ShouldBeStatus[node] == self["up"]:
partition = self.rsh.readaline(node, self["ParitionCmd"])
if not partition:
self.log("no partition details for %s" %node)
elif len(partition) > 2:
partition = partition[:-1]
for a_partition in ccm_partitions:
if partition != a_partition:
ccm_partitions.append(partition)
else:
self.log("bad partition details for %s" %node)
return ccm_partitions
def HasQuorum(self, node_list):
# If we are auditing a partition, then one side will
# have quorum and the other not.
# So the caller needs to tell us which we are checking
# If no value for node_list is specified... assume all nodes
if not node_list:
node_list = self.Env["nodes"]
for node in node_list:
if self.ShouldBeStatus[node] == self["up"]:
quorum = self.rsh.readaline(node, self["QuorumCmd"])
return string.find(quorum,"1") != -1
return 0
def Components(self):
complist = [Process("lrmd",self),Process("crmd",self)]
if self.Env["DoFencing"] == 1 :
complist.append(Process("stonithd",self))
complist.append(Process("heartbeat",self))
return complist
+ def NodeUUID(self, node):
+ lines = self.rsh.readlines(node, self["UUIDQueryCmd"])
+ for line in lines:
+ m = re.search(r'%s.+\((.+)\)' % node, line)
+ if m:
+ return m.group(1)
+ return ""
+
+ def StandbyStatus(self, node):
+ check_cib_cmd = self["CIBQueryCmd"] % node;
+
+ lines = self.rsh.readlines(node, check_cib_cmd);
+ if not lines:
+ return ""
+
+ cib_data = "".join(lines)
+ try:
+ cib = parseString(cib_data)
+ except xml.parsers.expat.ExpatError:
+ return ""
+
+ standby_status = "off"
+ nodes = cib.getElementsByTagName('node')
+
+ for ha_node in nodes:
+ if ha_node.getAttribute("uname") == node:
+ nvpairs = ha_node.getElementsByTagName('nvpair')
+ for nvpair in nvpairs:
+ if nvpair.getAttribute('name') == 'standby':
+ if nvpair.getAttribute('value') == 'on':
+ standby_status = "on"
+ break
+
+ return standby_status
+
+ # status == "on" : Enter Standby mode
+ # status == "off": Enter Active mode
+ def SetStandbyMode(self, node, status):
+ current_status = self.StandbyStatus(node)
+ if current_status == status:
+ return True
+
+ if status == "on":
+ cmd = self["StandbyOnCmd"] % self.NodeUUID(node)
+ elif status == "off":
+ cmd = self["StandbyOffCmd"] % self.NodeUUID(node)
+ else:
+ return False
+
+ ret = self.rsh(node, cmd)
+
+ return True
+
class HAResource(Resource):
def __init__(self, cm, node):
'''
Get information from xml node
'''
self.rid = str(node.getAttribute('id'))
self.rclass = str(node.getAttribute('class'))
self.rtype = str(node.getAttribute('type'))
self.inc_name = None
self.inc_no = -1
self.inc_max = -1
self.rparameters = {}
list = node.getElementsByTagName('instance_attributes')
if len(list) > 0:
attributes = list[0]
list = attributes.getElementsByTagName('attributes')
if len(list) > 0:
parameters = list[0]
nvpairs = parameters.getElementsByTagName('nvpair')
for nvpair in nvpairs:
name=nvpair.getAttribute('name')
value=nvpair.getAttribute('value')
self.rparameters[name]=value
Resource.__init__(self, cm, self.rtype, self.rid)
def IsRunningOn(self, nodename):
'''
This member function returns true if our resource is running
on the given node in the cluster.
We call the status operation for the resource script.
'''
out=self.CM.rsh.readaline(nodename, self.CM["IsRscRunning"]%self.rid)
return re.search("0",out)
def RunningNodes(self):
ResourceNodes = []
for node in self.CM.Env["nodes"]:
if self.CM.ShouldBeStatus[node] == self.CM["up"]:
if self.IsRunningOn(node):
ResourceNodes.append(node)
return ResourceNodes
def _ResourceOperation(self, operation, nodename):
'''
Execute an operation on the resource
'''
self.CM.rsh.readaline(nodename, self.CM["ExecuteRscOp"]%(self.rid,operation))
return self.CM.rsh.lastrc == 0
def Start(self, nodename):
'''
This member function starts or activates the resource.
'''
return self._ResourceOperation("start", nodename)
def Stop(self, nodename):
'''
This member function stops or deactivates the resource.
'''
return self._ResourceOperation("stop", nodename)
def IsWorkingCorrectly(self, nodename):
return self._ResourceOperation("monitor", nodename)
#######################################################################
#
# A little test code...
#
# Which you are advised to completely ignore...
#
#######################################################################
if __name__ == '__main__':
pass

File Metadata

Mime Type
text/x-diff
Expires
Mon, Apr 21, 8:11 PM (6 h, 48 s)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1665693
Default Alt Text
(22 KB)

Event Timeline