diff --git a/cts/CM_hb.py.in b/cts/CM_hb.py.in
index 41682d4762..355a1463f3 100755
--- a/cts/CM_hb.py.in
+++ b/cts/CM_hb.py.in
@@ -1,649 +1,649 @@
 #!@PYTHON@
 
 '''CTS: Cluster Testing System: heartbeat dependent modules...
 
 Classes related to testing high-availability clusters...
 
 Lots of things are implemented.
 
 Lots of things are not implemented.
 
 We have many more ideas of what to do than we've implemented.
  '''
 
 __copyright__='''
-Copyright (C) 2000,2001 Alan Robertson <alanr@unix.sh>
+Copyright (C) 2000,2001,2005 Alan Robertson <alanr@unix.sh>
 Licensed under the GNU GPL.
 '''
 
 #
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
 # as published by the Free Software Foundation; either version 2
 # of the License, or (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 
 from CTS import *
 
 
 class HeartbeatCM(ClusterManager):
     '''
     The heartbeat cluster manager class.
     It implements the things we need to talk to and manipulate
     heartbeat clusters
     '''
 
     def __init__(self, Environment, randseed=None):
 
         self.ResourceDirs = ["@sysconfdir@/ha.d/resource.d", "@sysconfdir@/rc.d/init.d", "@sysconfdir@/rc.d/"]
         self.ResourceFile = Environment["HAdir"] + "/haresources"
         self.ConfigFile = Environment["HAdir"]+ "/ha.cf"
         ClusterManager.__init__(self, Environment, randseed=randseed)
         self.update({
             "Name"           : "heartbeat",
             "DeadTime"       : 30,
             "StableTime"     : 30,
             "StartCmd"       : "@libdir@/heartbeat/ha_logd -d >/dev/null 2>&1; MALLOC_CHECK_=2 @libdir@/heartbeat/heartbeat >/dev/null 2>&1",
             "StopCmd"        : "@libdir@/heartbeat/heartbeat -k",
             "StatusCmd"      : "@libdir@/heartbeat/heartbeat -s",
             "RereadCmd"      : "@libdir@/heartbeat/heartbeat -r",
             "StartDRBDCmd"   : "@sysconfdir@/init.d/drbd start >/dev/null 2>&1",
             "StopDRBDCmd"    : "@sysconfdir@/init.d/drbd stop",
             "StatusDRBDCmd"  : "@sysconfdir@/init.d/drbd status",
             "DRBDCheckconf"  : "@sysconfdir@/init.d/drbd checkconfig >/var/run/drbdconf 2>&1",
             "BreakCommCmd"   : "@libdir@/heartbeat/TestHeartbeatComm break-communication >/dev/null 2>&1",
             "FixCommCmd"     : "@libdir@/heartbeat/TestHeartbeatComm fix-communication >/dev/null 2>&1",
             "DelFileCommCmd" : "/usr/lib/heartbeat/TestHeartbeatComm delete-testingfile >/dev/null 2>&1",
             "SaveFileCmd"    : "/usr/lib/heartbeat/TestHeartbeatComm save-testingfile /tmp/OnlyForTesting >/dev/null 2>&1",
             "ReduceCommCmd"  : "/usr/lib/heartbeat/TestHeartbeatComm reduce-communication %s %s>/dev/null 2>&1",
             "RestoreCommCmd" : "/usr/lib/heartbeat/TestHeartbeatComm restore-communication /tmp/OnlyForTesting >/dev/null 2>&1",
             "IPaddrCmd"      : "@sysconfdir@/ha.d/resource.d/IPaddr %s status",
             "Standby"        : "@libdir@/heartbeat/hb_standby >/dev/null 2>&1",
             "TestConfigDir"  : "@sysconfdir@/ha.d/testconfigs",
             "LogFileName"    : Environment["LogFileName"],
 
             # Patterns to look for in the log files for various occasions...
             "Pat:We_started"       : " (%s) .* Initial resource acquisition complete",
             "Pat:They_started"     : " (%s) .* Initial resource acquisition complete",
             "Pat:We_stopped"       : "%s heartbeat.*Heartbeat shutdown complete",
             "Pat:Logd_stopped"     : "%s logd:.*Exiting write process",
             "Pat:They_stopped"     : "%s heartbeat.*node (%s).*: is dead",
             "Pat:They_dead"        : "node (%s).*: is dead",
             "Pat:All_stopped"      : " (%s).*heartbeat.*Heartbeat shutdown complete",
             "Pat:StandbyOK"        : "Standby resource acquisition done",
             "Pat:StandbyNONE"      : "No reply to standby request",
             "Pat:StandbyTRANSIENT" : "standby message.*ignored.*in flux",
             "Pat:Return_partition" : "Cluster node %s returning after partition",
 
             # Bad news Regexes.  Should never occur.
             "BadRegexes"   : (
                 r"Shutting down\.",
                      r"Forcing shutdown\.",
                      r"Both machines own .* resources!",
                      r"No one owns .* resources!",
                      r", exiting\.",
                      r"ERROR:",
                      r"CRIT.*:",
             ),
         })
         
         self.cf=HBConfig(Environment["HAdir"])
         self._finalConditions()
 
     def SetClusterConfig(self, configpath="default", nodelist=None):
         '''Activate the named test configuration throughout the cluster.
         This code is specialized to heartbeat.
         '''
         rc=1
         Command='''
         cd %s%s%s;                : cd to test configuration directory
         for j in *
         do
           if
             [ -f "@sysconfdir@/ha.d/$j" ];
           then
             if
               cmp $j @sysconfdir@/ha.d/$j >/dev/null 2>&1;
             then
               : Config file $j is already up to correct.
             else
               echo "Touching $j"
               cp $j @sysconfdir@/ha.d/$j
             fi
           fi
         done
         ''' % (self["TestConfigDir"], os.sep, configpath)
 
         if nodelist == None:
             nodelist=self.Env["nodes"]
         for node in nodelist:
             if not self.rsh(node, Command): rc=None
 
         self.rereadall()
         return rc
 
 
     def ResourceGroups(self):
         '''
         Return the list of resources groups defined in this configuration.
 
         This code is specialized to heartbeat.
         We make the assumption that the resource file on the local machine
         is the same as that of a cluster member.
 
         We aren't necessarily a member of the cluster
         (In fact, we usually aren't).
         '''
         RscGroups=[]
         file = open(self.ResourceFile, "r")
         while (1):
 
             line = file.readline()
             if line == "":   break
 
             idx=string.find(line, '#')
             if idx >= 0:
                 line=line[:idx]
             if line == "":    continue
             line = string.strip(line)
 
             # Is this wrong?
             tokens = re.split("[ \t]+", line)
 
             # Ignore the default server for this resource group
             del tokens[0]
 
             Group=[]
             for token in tokens:
                 if token != "":
                     idx=string.find(token, "::")
                     if idx > 0:
                         tuple=string.split(token, "::")
                     else:
                         #
                         # Is this an IPaddr default resource type?
                         #
                         if re.match("^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$"
                         ,        token):
                             tuple=["IPaddr", token]
                         else:
                             tuple = [token, None]
                     Resource = self.hbResource(tuple[0], tuple[1])
                     Group.append(Resource)
 
             RscGroups.append(Group)
             
         file.close()
         return RscGroups
 
     def InternalCommConfig(self):
         '''
         Return a list of communication  paths. Each path consists of a tuple like this:
                mediatype                serial | ip
                interface/dev name       eth0 | /dev/ttyS0...
                protocol                 tcp?? | udp | None 
                port                     Number | None
         '''
 
         Path = {"mediatype" : None, "interface": None, "protocol" : None, "port": None}
         cf = self.cf
         for cfp in cf.Parameters:
             if cfp == "serial":
                 if Path["mediatype"] == None:
                      Path["mediatype"] = ["serial"]
                 else: Path["mediatype"].append("serial")
 
                 if Path["interface"] == None:
                      Path["interface"] = cf.Parameters["serial"]
                 else:
                     for serial in cf.Parameters["serial"]:
                          Path["interface"].append(serial)
 
             if cfp == "bcast" or cfp == "mcast" or cfp == "ucast" :
                 if Path["mediatype"] == None:
                     Path["mediatype"] = ["ip"]
                 else: Path["mediatype"].append("ip")
 
                 if cfp == "bcast":
                     interfaces = cf.Parameters[cfp]
                 if cfp == "ucast":
                     interfaces = [cf.Parameters[cfp][0]]
                 if cfp == "mcast":
                     Path["port"] = [cf.Parameters[cfp][0][2]]
                     Path["protocol"] = "udp"
                     interfaces = [cf.Parameters[cfp][0][0]]
 
                 if Path["interface"] == None:
                    Path["interface"] = interfaces
                 else:
                     for interface in interfaces:
                         if interface not in Path["interface"]:
                             Path["interface"].append(interface)
 
             if cfp == "udpport":
                 Path["port"] = cf.Parameters["udpport"]
                 Path["protocol"] = ["udp"]
 
             if Path["port"] == None:
                 Path["port"] = [694]
 
         return Path
                 
     def HasQuorum(self, node_list):
         (
 '''Return TRUE if the cluster currently has quorum.  According to
 current heartbeat code this means one node is up.
 ''')
         return self.upcount() >= 1
 
     def hbResource(self, type, instance):
         '''
         Our job is to create the right kind of resource.  For most
         resources, we just create an HBResource object, but for
         IP addresses, we create an HBipResource instead.
         Some other types of resources may also be added as special
         cases.
         '''
 
         if type == "IPaddr":
             return HBipResource(self, type, instance)
 
         return HBResource(self, type, instance)
 
 
 class HBResource(Resource):
 
     def IsRunningOn(self, nodename):
         '''
         This member function returns true if our resource is running
         on the given node in the cluster.
         We call the status operation for the resource script.
         '''
 
         return self._ResourceOperation("status", "OK|running", nodename)
 
     def _ResourceOperation(self, operation, pattern, nodename):
         '''
         We call the requested operation for the resource script.
         We don't care what kind of operation we were called to do
         particularly.
         When we were created, we were bound to a cluster manager, which
         has its own remote execution method (which we use here).
         '''
         if self.Instance == None:
             instance = ""
         else:
             instance = self.Instance
 
         Rlist = 'LIST="'
         for dir in self.CM.ResourceDirs:
           Rlist = Rlist + " " + dir
         Rlist = Rlist + '"; '
 
         Script= Rlist + '''
         T="''' + self.ResourceType + '''";
         I="''' + instance + '''";
         for dir in $LIST;
         do
           if
             [ -f "$dir/$T" -a  -x "$dir/$T" ]
           then
             "$dir/$T" $I ''' + operation + '''
             exit $?
           fi
         done 2>&1;
         exit 1;'''
 
         #print "Running " + Script + "\n"
 
         line = self.CM.rsh.readaline(nodename, Script)
         if operation == "status":
           if re.search(pattern, line):
               return 1
         return self.CM.rsh.lastrc == 0
 
     def Start(self, nodename):
         '''
         This member function starts or activates the resource.
         '''
         return self._ResourceOperation("start", None, nodename)
 
     def Stop(self, nodename):
         '''
         This member function stops or deactivates the resource.
         '''
         return self._ResourceOperation("stop", None, nodename)
 
         
 #    def IsWorkingCorrectly(self, nodename):
 #        "We default to returning TRUE for this one..."
 #        if self.Instance == None:
 #          self.CM.log("Faking out: " + self.ResourceType)
 #        else:
 #          self.CM.log("Faking out: " + self.ResourceType + self.Instance)
 #        return 1
 
     def IsWorkingCorrectly(self, nodename):
         return self._ResourceOperation("monitor", "OK", nodename)
 
 class HBipResource(HBResource):
     '''
     We are a specialized IP address resource which knows how to test
     to see if our resource type is actually being served.
     We are cheat and run the IPaddr resource script on
     the current machine, because it's a more interesting case.
     '''
 
     def IsWorkingCorrectly(self, nodename):
         return self._ResourceOperation("monitor", "OK", self.CM.OurNode)
 
 
 
 #
 #        A heartbeat configuration class...
 #        It reads and parses the heartbeat config
 #        files
 #
 
 class HBConfig:
 
     # Which options have multiple words on the line?
     MultiTokenKeywords = {"mcast" : None , "stonith_host": None}
 
     def __init__(self, configdir="/etc/ha.d"):
         self.Parameters = {}
         self.ResourceGroups = {}
 
         self._ReadConfig(os.path.join(configdir, "ha.cf"))
         
         FirstUp_NodeSelection()
         LastUp_NodeSelection()
         no_failback = NoAutoFailbackPolicy()
         auto_failback = AutoFailbackPolicy()
 
         #
         # We allow each resource group to have its own failover/back
         # policies
         #
         if self.Parameters.has_key("nice_failback")         \
         and                self.Parameters["nice_failback"] == "on":
             HBConfig.DefaultFailbackPolicy = no_failback
         elif self.Parameters.has_key("auto_failback")         \
         and                self.Parameters["auto_failback"] == "off":
             HBConfig.DefaultFailbackPolicy = no_failback
         else:
             HBConfig.DefaultFailbackPolicy = auto_failback
         HBConfig.DefaultNodeSelectionPolicy = NodeSelectionPolicies["FirstUp"]
 
         self._ReadResourceGroups(os.path.join(configdir, "haresources"))
 
     # Read ha.cf config file
     def _ReadConfig(self, ConfigFile):
         self.ConfigPath = ConfigFile;
         fp = open(ConfigFile)
 
         while 1:
             line=fp.readline()
             if not line:
                 return
             line = re.sub("#.*", "", line)
             line = string.rstrip(line)
             if len(line) < 1:
                 continue
             tokens = line.split()
             key = tokens[0]
             values = tokens[1:]
 
             if HBConfig.MultiTokenKeywords.has_key(key):
                 # group items from this line together, and separate
                 # from the items on other lines
                 values = [values]
             if self.Parameters.has_key(key):
                 if key == "node":
                     self.Parameters[key].extend(values)
                 else:
                     self.Parameters[key].append(values[0])
             else:
                 self.Parameters[key] = values
 
     # Read a line from the haresources file...
     # - allow for \ continuations...
     def _GetRscLine(self, fp):
         linesofar = None
         continuation=1
         while continuation:
             continuation = 0
             line=fp.readline()
             if not line:
                 break
             line = re.sub("#.*", "", line)
 
             if line[len(line)-2] == "\\":
                line = line[0:len(line)-2] + "\n"
                continuation=1
 
             if linesofar == None:
                 linesofar = line
             else:
                 linesofar = linesofar + line
         return linesofar
 
 
     def _ReadResourceGroups(self, RscFile):
         self.RscPath = RscFile;
         fp = open(RscFile)
 
         thisline = ""
         while 1:
             line=self._GetRscLine(fp)
             if not line:
                 return
             line = line.strip()
             if len(line) < 1:
                 continue
             tokens = line.split()
             node = tokens[0]
             resources = tokens[1:]
             rscargs=[]
             for resource in resources:
                 name=resource.split("::", 1)
                 if len(name) > 1:
                     args=name[1].split("::")
                 else:
                     args=None
                 name = name[0]
                 rscargs.append(Resource(name, args))
             name = tokens[0] + "__" + tokens[1]
 
             assert not self.ResourceGroups.has_key(name)
 
             #
             #        Create the resource group
             #
             self.ResourceGroups[name] = ResourceGroup(name \
             ,        rscargs
             ,        node.split(",")
                 # Provide default value
             ,        HBConfig.DefaultNodeSelectionPolicy
             ,        HBConfig.DefaultFailbackPolicy)
 #
 #        Return the list of nodes in the cluster...
 #
 
     def nodes(self):
         result = self.Parameters["node"]
         result.sort()
         return result
 
 class ClusterState:
     pass
 
 class ResourceGroup:
     def __init__(self, name, resourcelist, possiblenodes
     ,        nodeselection_policy, failback_policy):
         self.name = name
         self.resourcelist = resourcelist
         self.possiblenodes = possiblenodes
         self.nodeselection_policy = nodeselection_policy
         self.failback_policy = failback_policy
         self.state = None
         self.attributes = {}
         self.history = []
 
     def __str__(self):
         result = string.join(self.possiblenodes, ",")
         for rsc in self.resourcelist:
             result = result + " " + str(rsc)
         return result
 
 class Resource:
     def __init__(self, name, arguments=None):
         self.name = name
         self.arguments = arguments
 
     def __str__(self):
         result = self.name
         try:
             for arg in self.arguments:
                 result = result + "::" + arg
         except TypeError:
             pass
         return result
 
 
 #######################################################################
 #
 #        Base class defining policies for where we put resources
 #        when we're starting, or when a failure has occurred...
 #
 #######################################################################
 NodeSelectionPolicies = {}
 
 class NodeSelectionPolicy:
     def __init__(self, name):
         self.name = name
         NodeSelectionPolicies[name] = self
 
     def name(self):
         return self.name
 
 #
 #        nodenames: the list of nodes eligible to run this resource
 #        ResourceGroup:        the group to be started...
 #        ClusterState:        Cluster state information
 #
     def SelectNode(self, nodenames, ResourceGroup, ClusterState):
         return None
 
 #
 #        Choose the first node in the list...
 #
 class FirstUp_NodeSelection(NodeSelectionPolicy):
     def __init__(self):
         NodeSelectionPolicy.__init__(self, "FirstUp")
 
     def SelectNode(self, nodenames, ResourceGroup, ClusterState):
         return nodenames[0]
 
 #
 #        Choose the last node in the list...
 #        (kind of a dumb example)
 #
 class LastUp_NodeSelection(NodeSelectionPolicy):
     def __init__(self):
         NodeSelectionPolicy.__init__(self, "LastUp")
 
     def SelectNode(self, nodenames, ResourceGroup, ClusterState):
         return nodenames[len(nodenames)-1]
 
 #######################################################################
 #
 #        Failback policies...
 #
 #        Where to locate a resource group when an eligible node rejoins
 #        the cluster...
 #
 #######################################################################
 FailbackPolicies = {}
 
 class FailbackPolicy:
     def __init__(self, name):
         self.name = name
         FailbackPolicies[name] = self
 
     def name(self):
         return self.name
 
         #
         # currentnode:  The node the service is currently on
         # returningnode:  The node which just rejoined
         # eligiblenodes:  Permitted nodes which are up
         # SelectionPolicy:  the normal NodeSelectionPolicy
         # Cluster state information...
         #
     def SelectNewNode(self, currentnode, returningnode, eligiblenodes
     ,                SelectionPolicy, ResourceGroup, ClusterState):
         return None
 
 #
 #        This FailbackPolicy is like "normal failback" in heartbeat
 #
 class AutoFailbackPolicy(FailbackPolicy):
     def __init__(self):
         FailbackPolicy.__init__(self, "failback")
 
     def SelectNewNode(self, currentnode, returningnode, eligiblenodes
     ,                SelectionPolicy, ResourceGroup, ClusterState):
 
         # Select where it should run based on current normal policy
         # just as though we were starting it for the first time.
 
         return SelectionPolicy(eligiblenodes, ResourceGroup, ClusterState)
 #
 #        This FailbackPolicy is like "nice failback" in heartbeat
 #
 class NoAutoFailbackPolicy(FailbackPolicy):
     def __init__(self):
         FailbackPolicy.__init__(self, "failuresonly")
 
     def SelectNewNode(self, currentnode, returningnode, eligiblenodes
     ,                SelectionPolicy, ResourceGroup):
 
         # Always leave the resource where it is...
 
         return currentnode
 
 
 #######################################################################
 #
 #   A little test code...
 #
 #   Which you are advised to completely ignore...
 #
 #######################################################################
 if __name__ == '__main__': 
         FirstUp_NodeSelection()
         LastUp_NodeSelection()
 
         no_failback = NoAutoFailbackPolicy()
         auto_failback = AutoFailbackPolicy()
         
 
         cf=HBConfig("/etc/ha.d")
 
 
         print "Cluster configuration:\n"
 
         print "Nodes:", cf.nodes(), "\n"
         print "Config Parameters:", cf.Parameters, "\n"
 
         for groupname in cf.ResourceGroups.keys():
             print "Resource Group %s:\n\t%s\n" % (groupname, cf.ResourceGroups[groupname])
diff --git a/cts/CTSaudits.py.in b/cts/CTSaudits.py.in
index bc98a139ef..1b182e87ea 100755
--- a/cts/CTSaudits.py.in
+++ b/cts/CTSaudits.py.in
@@ -1,553 +1,553 @@
 #!@PYTHON@
 
 '''CTS: Cluster Testing System: Audit module
  '''
 
 __copyright__='''
-Copyright (C) 2000, 2001 Alan Robertson <alanr@unix.sh>
+Copyright (C) 2000, 2001,2005 Alan Robertson <alanr@unix.sh>
 Licensed under the GNU GPL.
 '''
 
 #
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
 # as published by the Free Software Foundation; either version 2
 # of the License, or (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 
 import time, os, popen2, string
 import CTS
 import os
 import popen2
 
 
 class ClusterAudit:
 
     def __init__(self, cm):
         self.CM = cm
 
     def __call__(self):
          raise ValueError("Abstract Class member (__call__)")
     
     def is_applicable(self):
         '''Return TRUE if we are applicable in the current test configuration'''
         raise ValueError("Abstract Class member (is_applicable)")
         return 1
 
     def name(self):
          raise ValueError("Abstract Class member (name)")
 
 AllAuditClasses = [ ]
 
 class ResourceAudit(ClusterAudit):
 
     def name(self):
         return "ResourceAudit"
 
     def _doauditRsc(self, resource):
         ResourceNodes = []
         for node in self.CM.Env["nodes"]:
             if self.CM.ShouldBeStatus[node] == self.CM["up"]:
                 if resource.IsRunningOn(node):
                     ResourceNodes.append(node)
         return ResourceNodes
 
     def _doaudit(self):
         '''Check to see if all resources are running in exactly one place
         in the cluster.
         We also verify that the members of a resource group are all
         running on the same node in the cluster,
         and we monitor that they are all running "properly".
         '''
         Fatal = 0
         result = []
 
         # Thought: use self.CM.find_partitions() and make this audit 
         #   aware of partitions.  Since in a split cluster one 
         #   partition may have quorum (and permission to run resources)
         #   and the other not.
 
         Groups = self.CM.ResourceGroups()
         for group in Groups:
             GrpServedBy = None
             lastResource = None
 
             for resource in group:
 
                 #
                 # _doauditRsc returns the set of nodes serving
                 # the given resource.  This is normally a single node.
                 #
 
                 ResourceNodes = self._doauditRsc(resource)
 
 
                 #        Is the resource served without quorum present?
 
                 if not self.CM.HasQuorum(None) and len(ResourceNodes) != 0 and resource.needs_quorum:
                     result.append("Resource " + repr(resource)
                     +        " active without Quorum: "
                     +        repr(ResourceNodes))
 
                 #        Is the resource served at all?
 
                 elif len(ResourceNodes) == 0 and self.CM.HasQuorum(None):
                     result.append("Resource " + repr(resource)
                     +        " not served anywhere.")
 
                 # Is the resource served too many times?
 
                 elif len(ResourceNodes) > 1:
                     result.append("Resource " + repr(resource)
                     +        " served too many times: "
                     +        repr(ResourceNodes))
                     self.CM.log("Resource " + repr(resource)
                     +        " served too many times: "
                     +        repr(ResourceNodes))
                     Fatal = 1
                 elif GrpServedBy == None:
                     GrpServedBy = ResourceNodes
 
                 # Are all the members of the Rsc Grp served by the same node?
 
                 elif GrpServedBy != ResourceNodes:
                     result.append("Resource group resources" + repr(resource)
                     +        " running on different nodes: "
                     +        repr(ResourceNodes)+" vs "+repr(GrpServedBy)
                     +   "(otherRsc = " + repr(lastResource) + ")")
                     self.CM.log("Resource group resources" + repr(resource)
                     +        " running on different nodes: "
                     +        repr(ResourceNodes)+" vs "+repr(GrpServedBy)
                     +   "(otherRsc = " + repr(lastResource) + ")")
                     Fatal = 1
 
                 if self.CM.Env.has_key("SuppressMonitoring") and \
                 self.CM.Env["SuppressMonitoring"]:
                     continue
 
                 # Is the resource working correctly ?
 
                 if not Fatal and len(ResourceNodes) == 1:
                     beforearpchild = popen2.Popen3("date;/sbin/arp -n|cut -c1-15,26-50,75-"
                     ,                None)
                     beforearpchild.tochild.close()   # /dev/null
                     if not resource.IsWorkingCorrectly(ResourceNodes[0]):
                         afterarpchild = popen2.Popen3("/sbin/arp -n|cut -c1-15,26-50,75-"
                         ,                None)
                         afterarpchild.tochild.close()   # /dev/null
                         result.append("Resource " + repr(resource)
                         +  " not operating properly."
                         +  "  Resource is running on " + ResourceNodes[0]);
                         Fatal = 1
                         self.CM.log("ARP table before failure ========");
                         for line in beforearpchild.fromchild.readlines():
                             self.CM.log(line)
                         self.CM.log("ARP table after failure  ========");
                         for line in afterarpchild.fromchild.readlines():
                             self.CM.log(line)
                         self.CM.log("End of ARP tables        ========");
                         try:
                             beforearpchild.wait()
                             afterarpchild.wait()
                         except OSError:        pass
                         afterarpchild.fromchild.close()
                     beforearpchild.fromchild.close()
                     
                 lastResource = resource
 
         if (Fatal):
              result.insert(0, "FATAL")  # Kludgy.
 
         return result
 
 
     def __call__(self):
         #
         # Audit the resources.  Since heartbeat doesn't really
         # know when resource acquisition is complete, we will
         # poll until things get stable.
         #
         # Having a resource duplicately implemented is a Fatal Error
         # with no tolerance granted.
         #
         audresult =  self._doaudit()
         #
         # Probably the constant below should be a CM parameter.
         # Then it could be 0 for FailSafe.
         # Of course, it really depends on what resources
         # you have in the test suite, and how long it takes
         # for them to settle.
         # Recently, we've changed heartbeat so we know better when
         # resource acquisition is done.
         #
         audcount=5;
 
         while(audcount > 0):
              audresult =  self._doaudit()
              if (len(audresult) <= 0 or audresult[0] == "FATAL"):
                  audcount=0
              else:
                  audcount = audcount - 1
              if (audcount > 0):
                  time.sleep(1)
         if (len(audresult) > 0):
             self.CM.log("Fatal Audit error: " + repr(audresult))
 
         return (len(audresult) == 0)
     
     def is_applicable(self):
         if self.CM["Name"] == "heartbeat":
             return 1
         return 0
 
 class HAResourceAudit(ClusterAudit):
     def __init__(self, cm):
         self.CM = cm
     
     def _RscRunningNodes(self, resource):
         ResourceNodes = []
         for node in self.CM.Env["nodes"]:
             if self.CM.ShouldBeStatus[node] == self.CM["up"]:
                 if resource.IsRunningOn(node):
                     ResourceNodes.append(node)
         return ResourceNodes
 
     def __call__(self):
         passed = 1
         NodeofRsc = {}
         NumofInc = {}
         MaxofInc = {}
         self.CM.debug("Do Audit HAResourceAudit")        
         
         #Calculate the count of active nodes
         up_count = 0;
         for node in self.CM.Env["nodes"]:
             if self.CM.ShouldBeStatus[node] == self.CM["up"]:
                 up_count += 1
                 
         #Make sure the resouces are running on one and only one node
         Resources = self.CM.Resources()
         for resource in Resources :
             RunningNodes = self._RscRunningNodes(resource)
             NodeofRsc[resource.rid]=RunningNodes
             if resource.inc_name == None:
                 #Is the resource served without quorum present?
                 if not self.CM.HasQuorum(None) and len(RunningNodes) != 0 and resource.needs_quorum:
                     self.CM.log("Resource " + repr(resource)
                     +        " active without Quorum: "
                     +        repr(RunningNodes))
                     passed = 0
                 #Is the resource served at all?
                 elif len(RunningNodes) == 0 and self.CM.HasQuorum(None):
                     self.CM.log("Resource " + repr(resource)
                     +        " not served anywhere.")
                     passed = 0
                 # Is the resource served too many times?
                 elif len(RunningNodes) > 1:
                     self.CM.log("Resource " + repr(resource)
                     +        " served too many times: "
                     +        repr(RunningNodes))
                     passed = 0
             else:
                 if not NumofInc.has_key(resource.inc_name):
                     NumofInc[resource.inc_name]=0
                     MaxofInc[resource.inc_name]=resource.inc_max
                 running = 1
                 #Is the resource served without quorum present?
                 if not self.CM.HasQuorum(None) and len(RunningNodes) != 0 and resource.needs_quorum == 1:
                     self.CM.log("Resource " + repr(resource)
                     +        " active without Quorum: "
                     +        repr(RunningNodes))
                     passed = 0
                 #Is the resource served at all?
                 elif len(RunningNodes) == 0 :
                     running = 0
                 # Is the resource served too many times?
                 elif len(RunningNodes) > 1:
                     self.CM.log("Resource " + repr(resource)
                     +        " served too many times: "
                     +        repr(RunningNodes))
                     passed = 0
                 
                 if running:
                     NumofInc[resource.inc_name] += 1
         if self.CM.HasQuorum(None):
             for inc_name in NumofInc.keys():
                 if NumofInc[inc_name] != min(up_count,MaxofInc[inc_name]):
                     passed = 0
                     self.CM.log("Incarnation %s has %d instances(max %d instances).\
                         Now %d nodes are up"%(str(inc_name),NumofInc[inc_name], \
                         MaxofInc[inc_name],up_count))
 
         #Make sure the resouces with "must","placement" constraint are running on the same node
         Dependancies = self.CM.Dependencies()
         for dependency in Dependancies:
             if dependency["type"] == "placement" and dependency["strength"] == "must":
                 if NodeofRsc[dependency["from"]] != NodeofRsc[dependency["to"]]:
                     print dependency["from"] + " and " + dependency["to"] + " should be run on same node"
                     passed = 0
                   
         return passed
     
     def is_applicable(self):
         if self.CM["Name"] == "linux-ha-v2" and self.CM.Env["ResCanStop"] == 0:
             return 1
         return 0
 
     def name(self):
         return "HAResourceAudit"
     
 
 class CrmdStateAudit(ClusterAudit):
     def __init__(self, cm):
         self.CM = cm
         self.Stats = {"calls":0
         ,        "success":0
         ,        "failure":0
         ,        "skipped":0
         ,        "auditfail":0}
 
     def has_key(self, key):
         return self.Stats.has_key(key)
 
     def __setitem__(self, key, value):
         self.Stats[key] = value
         
     def __getitem__(self, key):
         return self.Stats[key]
 
     def incr(self, name):
         '''Increment (or initialize) the value associated with the given name'''
         if not self.Stats.has_key(name):
             self.Stats[name]=0
         self.Stats[name] = self.Stats[name]+1
 
     def __call__(self):
         passed = 1
         up_are_down = 0
         down_are_up = 0
         unstable_list = []
         self.CM.debug("Do Audit %s"%self.name())
 
         for node in self.CM.Env["nodes"]:
 	    should_be = self.CM.ShouldBeStatus[node]
 	    rc = self.CM.StataCM(node)
 	    if rc:
 		if should_be == self.CM["down"]:
                     down_are_up = down_are_up + 1
 		if not self.CM.node_stable(node):
 		    unstable_list.append(node)
 	    elif should_be == self.CM["up"]:
 		up_are_down = up_are_down + 1
 
         if len(unstable_list) > 0:
             passed = 0
             self.CM.log("Cluster is not stable: %d (of %d): %s" 
                      %(len(unstable_list), self.CM.upcount(), repr(unstable_list)))
 
         if up_are_down > 0:
             passed = 0
             self.CM.log("%d (of %d) nodes expected to be up were down."
                      %(up_are_down, len(self.CM.Env["nodes"])))
 
         if down_are_up > 0:
             passed = 0
             self.CM.log("%d (of %d) nodes expected to be down were up." 
                      %(down_are_up, len(self.CM.Env["nodes"])))
             
         return passed
 
     def name(self):
         return "CrmdStateAudit"
     
     def is_applicable(self):
         if self.CM["Name"] == "linux-ha-v2":
             return 1
         return 0
 
 
 class PartitionAudit(ClusterAudit):
     def __init__(self, cm):
         self.CM = cm
         self.Stats = {"calls":0
         ,        "success":0
         ,        "failure":0
         ,        "skipped":0
         ,        "auditfail":0}
         self.NodeEpoche={}
         self.NodeState={}
         self.NodeQuorum={}
 
     def has_key(self, key):
         return self.Stats.has_key(key)
 
     def __setitem__(self, key, value):
         self.Stats[key] = value
         
     def __getitem__(self, key):
         return self.Stats[key]
     
     def incr(self, name):
         '''Increment (or initialize) the value associated with the given name'''
         if not self.Stats.has_key(name):
             self.Stats[name]=0
         self.Stats[name] = self.Stats[name]+1
 
     def __call__(self):
         self.CM.debug("Do Audit %s"%self.name())
         passed = 1
         ccm_partitions = self.CM.find_partitions()
 
         if len(ccm_partitions) == 0:
             return 1
 
         if len(ccm_partitions) > 1:
             self.CM.log("Warn: %d cluster partitions detected:" %len(ccm_partitions))
             for partition in ccm_partitions:
                 self.CM.log("\t %s" %partition)
 
         for partition in ccm_partitions:
             partition_passed = 0
             if self.audit_partition(partition) == 0:
                 passed = 0
 
         return passed
 
     def trim_string(self, avalue):
         if not avalue:
             return None
         if len(avalue) > 1:
             return avalue[:-1]
 
     def trim2int(self, avalue):
         if not avalue:
             return None
         if len(avalue) > 1:
             return int(avalue[:-1])
             
 
     def audit_partition(self, partition):
         passed = 0
         dc_found = []
         dc_allowed_list = []
         lowest_epoche = None
         node_list = partition.split()
 
         self.CM.debug("Auditing partition: %s" %(partition))
         for node in node_list:
             if self.CM.ShouldBeStatus[node] != self.CM["up"]:
                 self.CM.log("Warn: Node %s appeared out of nowhere" %(node))
                 self.CM.ShouldBeStatus[node] = self.CM["up"]
 		# not in itself a reason to fail the audit (not what we're
 		#  checking for in this audit)
 
 	    self.NodeState[node] = self.CM.rsh.readaline(
 		node, self.CM["StatusCmd"]%node)
 	    self.NodeEpoche[node] = self.CM.rsh.readaline(
 		node, self.CM["EpocheCmd"])
 	    self.NodeQuorum[node] = self.CM.rsh.readaline(
 		node, self.CM["QuorumCmd"])
 	    
 	    self.NodeState[node]  = self.trim_string(self.NodeState[node])
 	    self.NodeEpoche[node] = self.trim2int(self.NodeEpoche[node])
 	    self.NodeQuorum[node] = self.trim_string(self.NodeQuorum[node])
 
             if lowest_epoche == None or self.NodeEpoche[node] < lowest_epoche:
                 lowest_epoche = self.NodeEpoche[node]
                 
         for node in node_list:
             if self.CM.ShouldBeStatus[node] == self.CM["up"]:
                 if self.CM.is_node_dc(node, self.NodeState[node]):
                     dc_found.append(node)
                     if self.NodeEpoche[node] == lowest_epoche:
                         passed = 1
                     elif not self.NodeEpoche[node]:
                         self.CM.log("Cant determin epoche for DC %s" %(node))
                         passed = 0
 
                     else:
                         self.CM.log("DC %s is not the oldest node (%d vs. %d)"
                             %(node, self.NodeEpoche[node], lowest_epoche))
                         passed = 0
 
         if len(dc_found) == 0:
                 self.CM.log("DC not found on any of the %d allowed nodes: %s (of %s)"
                             %(len(dc_allowed_list), str(dc_allowed_list), str(node_list)))
         elif len(dc_found) > 1:
                 self.CM.log("%d DCs (%s) found in cluster partition: %s"
                             %(len(dc_found), str(dc_found), str(node_list)))
                 passed = 0
 
         if passed == 0:
             for node in node_list:
                 if self.CM.ShouldBeStatus[node] == self.CM["up"]:
                     self.CM.log("epoche %s : %s"  
                                 %(self.NodeEpoche[node], self.NodeState[node]))
 
         if self.CM.Env["CIBResource"] == 1 and len(dc_found) > 0 and self.NodeQuorum[dc_found[0]]:
             if self.audit_dc_resources(node_list, dc_found) == 0:
                 passed = 0
 
             Resources = self.CM.Resources()
             for node in node_list:
                 for resource in Resources:
                     if resource.rid == "rsc_"+node:
                         if resource.IsRunningOn(node) == 0:
                             self.CM.log("Node %s is not running its own resource" %(node))
                             passed = 0
         elif self.CM.Env["CIBResource"] == 1:
             # no quorum means no resource management
             self.CM.debug("Not auditing resources - no quorum")
 
         return passed
 
     def audit_dc_resources(self, node_list, dc_list):
         passed = 1
         Resources = self.CM.Resources()
         for resource in Resources:
             if resource.rid == "DcIPaddr":
                 self.CM.debug("Auditing resource: %s" %(resource))
                 # All DCs are running the resource
                 for dc in dc_list:
                     if self.NodeQuorum[dc]:
                         if resource.IsRunningOn(dc) == 0:
                             self.CM.log("Resource %s not running on DC: %s" 
                                         %(resource, dc))
                             passed = 0
 
                 # All nodes running the resource are DCs
                 for node in node_list:
                     if resource.IsRunningOn(node):
                         if self.CM.is_node_dc(node, self.NodeState[node]) == 0:
                             self.CM.log("Resource %s is running on non-DC node %s"  
                                         %("DcIPaddr", node))
                             passed = 0
 
         return passed
         
     def name(self):
         return "PartitionAudit"
     
     def is_applicable(self):
         if self.CM["Name"] == "linux-ha-v2":
             return 1
         return 0
 
 AllAuditClasses.append(CrmdStateAudit)
 AllAuditClasses.append(PartitionAudit)
 AllAuditClasses.append(ResourceAudit)
 AllAuditClasses.append(HAResourceAudit)
 
 def AuditList(cm):
     result = []
     for auditclass in AllAuditClasses:
         result.append(auditclass(cm))
     return result
diff --git a/cts/CTSlab.py.in b/cts/CTSlab.py.in
index e652db79fd..031e6fa62a 100755
--- a/cts/CTSlab.py.in
+++ b/cts/CTSlab.py.in
@@ -1,716 +1,716 @@
 #!@PYTHON@
 
 '''CTS: Cluster Testing System: Lab environment module
 
 
  '''
 
 __copyright__='''
-Copyright (C) 2001 Alan Robertson <alanr@unix.sh>
+Copyright (C) 2001,2005 Alan Robertson <alanr@unix.sh>
 Licensed under the GNU GPL.
 '''
 
 #
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
 # as published by the Free Software Foundation; either version 2
 # of the License, or (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 
 from UserDict import UserDict
 import sys, time, types, syslog, whrandom, os, struct, string
 from CTS  import ClusterManager
 from CM_hb import HeartbeatCM
 from CTStests import BSC_AddResource
 from socket import gethostbyname_ex
 
 class ResetMechanism:
     def reset(self, node):
         raise ValueError("Abstract class member (reset)")
 
 class Stonith(ResetMechanism):
     def __init__(self, sttype="ssh", parm="foobar"
         ,        path="@sbindir@/stonith"):
         self.pathname=path
         self.configstring=parm
         self.stonithtype=sttype
 
     def reset(self, node):
         cmdstring = "%s -t '%s' -p '%s' '%s' 2>/dev/null" % (self.pathname
         ,        self.stonithtype, self.configstring, node)
         return (os.system(cmdstring) == 0)
     
 class Stonithd(ResetMechanism):
     def __init__(self, nodes, sttype = 'ssh'):
         self.sttype = sttype
         self.nodes = nodes
 
         self.query_cmd_pat   = '/usr/lib/heartbeat/stonithdtest/apitest 0 %s 4000 0'
         self.reset_cmd_pat   = '/usr/lib/heartbeat/stonithdtest/apitest 1 %s 4000 0'
         self.poweron_cmd_pat = '/usr/lib/heartbeat/stonithdtest/apitest 2 %s 4000 0'
         self.poweroff_cmd_pat= '/usr/lib/heartbeat/stonithdtest/apitest 3 %s 4000 0'
 
         self.lrmd_add_pat    = '/usr/lib/heartbeat/lrmadmin -A %s stonith ' + sttype + ' NULL hostlist=%s'
         self.lrmd_start_pat  = '/usr/lib/heartbeat/lrmadmin -E %s start 0 0 0'
         self.lrmd_stop_pat   = '/usr/lib/heartbeat/lrmadmin -E %s stop 0 0 0'
         self.lrmd_del_pat    = '/usr/lib/heartbeat/lrmadmin -D %s'
 
         self.rsc_id          = 'my_stonithd_id'
 
         self.command = "/usr/bin/ssh -l root -n -x"
         self.command_noblock = "/usr/bin/ssh -f -l root -n -x"
 
         self.stonithd_started_nodes = []
         self.fail_reason = ''
 
     def _remote_exec(self, node, cmnd):
         return (os.system("%s %s %s > /dev/null" % (self.command, node, cmnd)) == 0)
 
     def _remote_readlines(self, node, cmnd):
         f = os.popen("%s %s %s" % (self.command, node, cmnd))
         return f.readlines()
 
     def _stonithd_started(self, node):
         return node in self.stonithd_started_nodes
 
     def _start_stonithd(self, node,  hosts):
         hostlist = string.join(hosts, ',')
 
         lrmd_add_cmd = self.lrmd_add_pat % (self.rsc_id,  hostlist)
         ret = self._remote_exec(node, lrmd_add_cmd)
         if not ret:return ret
 
         lrmd_start_cmd = self.lrmd_start_pat % self.rsc_id
 
         ret = self._remote_exec(node, lrmd_start_cmd)
 
         if not ret:return ret
 
         self.stonithd_started_nodes.append(node)
         return 1
 
     def _stop_stonithd(self, node):
         lrmd_stop_cmd = self.lrmd_stop_pat % self.rsc_id
         ret = self._remote_exec(node, lrmd_stop_cmd)
         if not ret:return ret
 
         lrmd_del_cmd = self.lrmd_del_pat % self.rsc_id
 
         ret = self._remote_exec(node, lrmd_del_cmd)
         if not ret:return ret
 
         self.stonithd_started_nodes.remove(node)
 
         return 1
 
     def _do_stonith(self, init_node, target_node, action):
 
         stonithd_started = self._stonithd_started(init_node)
 
         if not stonithd_started:
             ret = self._start_stonithd(init_node, [target_node])
             if not ret:
                 self.fail_reason = "failed to start stonithd on node %s" % init_node
                 return ret
 
         command = ""
 
         if action == "RESET":
             command = self.reset_cmd_pat % target_node
         elif action == "POWEROFF":
             command = self.poweroff_cmd_pat % target_node
         elif action == "POWERON":
             command = self.poweron_cmd_pat % target_node
         else:
             self.fail_reason = "unknown opration type %s" % action
             return 0
 
         lines = self._remote_readlines(init_node, command)
         result = "".join(lines)
 
         if not stonithd_started:
             self._stop_stonithd(init_node)
 
         index = result.find("result=0")
         if index == -1:
             self.fail_reason = "unexpected stonithd status: %s" % result
             return 0
 
         return 1 
 
     # Should we random choose a node as init_node here if init_node not specified?
     def reset(self, init_node, target_node):
         return self._do_stonith(init_node, target_node, "RESET")
 
     def poweron(self, init_node, target_node):
         return self._do_stonith(init_node, target_node, "POWERON")
 
     def poweroff(self, init_node, target_node):
         return self._do_stonith(init_node, target_node, "POWEROFF")
 
 class Logger:
     TimeFormat = "%b %d %H:%M:%S\t"
 
     def __call__(self, lines):
         raise ValueError("Abstract class member (__call__)")
 
 class SysLog(Logger):
     # http://docs.python.org/lib/module-syslog.html
     defaultsource="CTS"
     defaultfacility= syslog.LOG_LOCAL7
     map = {
             "kernel":	syslog.LOG_KERN,
             "user":	syslog.LOG_USER,
             "mail":	syslog.LOG_MAIL,
             "daemon":	syslog.LOG_MAIL,
             "auth":	syslog.LOG_AUTH,
             "lpr":	syslog.LOG_LPR,
             "news":	syslog.LOG_NEWS,
             "uucp":	syslog.LOG_UUCP,
             "cron":	syslog.LOG_CRON,
             "local0":	syslog.LOG_LOCAL0,
             "local1":	syslog.LOG_LOCAL1,
             "local2":	syslog.LOG_LOCAL2,
             "local3":	syslog.LOG_LOCAL3,
             "local4":	syslog.LOG_LOCAL4,
             "local5":	syslog.LOG_LOCAL5,
             "local6":	syslog.LOG_LOCAL6,
             "local7":	syslog.LOG_LOCAL7,
     }
     def __init__(self, labinfo):
 
         if labinfo.has_key("syslogsource"):
             self.source=labinfo["syslogsource"]
         else:
             self.source=SysLog.defaultsource
 
         if labinfo.has_key("SyslogFacility"):
             self.facility=labinfo["SyslogFacility"]
             if SysLog.map.has_key(self.facility):
               self.facility=SysLog.map[self.facility]
         else:
             self.facility=SysLog.defaultfacility
 
         syslog.openlog(self.source, 0, self.facility)
 
     def setfacility(self, facility):
         self.facility = facility
         if SysLog.map.has_key(self.facility):
           self.facility=SysLog.map[self.facility]
         syslog.closelog()
         syslog.openlog(self.source, 0, self.facility)
         
 
     def __call__(self, lines):
         if isinstance(lines, types.StringType):
             syslog.syslog(lines)
         else:
             for line in lines:
                 syslog.syslog(line)
 
     def name(self):
         return "Syslog"
 
 class StdErrLog(Logger):
 
     def __init__(self, labinfo):
         pass
 
     def __call__(self, lines):
         t = time.strftime(Logger.TimeFormat, time.localtime(time.time()))  
         if isinstance(lines, types.StringType):
             sys.__stderr__.writelines([t, lines, "\n"])
         else:
             for line in lines:
                 sys.__stderr__.writelines([t, line, "\n"])
         sys.__stderr__.flush()
 
     def name(self):
         return "StdErrLog"
 
 class FileLog(Logger):
     def __init__(self, labinfo, filename=None):
 
         if filename == None:
             filename=labinfo["LogFileName"]
         
         self.logfile=filename
         import os
         self.hostname = os.uname()[1]+" "
         self.source = "CTS: "
     def __call__(self, lines):
 
         fd = open(self.logfile, "a")
         t = time.strftime(Logger.TimeFormat, time.localtime(time.time()))  
 
         if isinstance(lines, types.StringType):
             fd.writelines([t, self.hostname, self.source, lines, "\n"])
         else:
             for line in lines:
                 fd.writelines([t, self.hostname, self.source, line, "\n"])
         fd.close()
 
     def name(self):
         return "FileLog"
 
 class CtsLab(UserDict):
     '''This class defines the Lab Environment for the Cluster Test System.
     It defines those things which are expected to change from test
     environment to test environment for the same cluster manager.
 
     It is where you define the set of nodes that are in your test lab
     what kind of reset mechanism you use, etc.
 
     This class is derived from a UserDict because we hold many
     different parameters of different kinds, and this provides
     provide a uniform and extensible interface useful for any kind of
     communication between the user/administrator/tester and CTS.
 
     At this point in time, it is the intent of this class to model static
     configuration and/or environmental data about the environment which
     doesn't change as the tests proceed.
 
     Well-known names (keys) are an important concept in this class.
     The HasMinimalKeys member function knows the minimal set of
     well-known names for the class.
 
     The following names are standard (well-known) at this time:
 
         nodes           An array of the nodes in the cluster
         reset           A ResetMechanism object
         logger          An array of objects that log strings...
         CMclass         The type of ClusterManager we are running
                         (This is a class object, not a class instance)
         RandSeed        Random seed.  It is a triple of bytes. (optional)
         HAdir           Base directory for HA installation
 
     The CTS code ignores names it doesn't know about/need.
     The individual tests have access to this information, and it is
     perfectly acceptable to provide hints, tweaks, fine-tuning
     directions or other information to the tests through this mechanism.
     '''
 
     def __init__(self, nodes):
         self.data = {}
         self["nodes"] = nodes
         self.MinimalKeys=["nodes", "reset", "logger", "CMclass", "HAdir"]
 
     def HasMinimalKeys(self):
         'Return TRUE if our object has the minimal set of keys/values in it'
         result = 1
         for key in self.MinimalKeys:
             if not self.has_key(key):
                 result = None
         return result
 
     def SupplyDefaults(self): 
         if not self.has_key("logger"):
             self["logger"] = (SysLog(self), StdErrLog(self))
         if not self.has_key("reset"):
             self["reset"] = Stonith()
         if not self.has_key("CMclass"):
             self["CMclass"] = HeartbeatCM
         if not self.has_key("HAdir"):
             self["HAdir"] = "@sysconfdir@/ha.d"
         if not self.has_key("LogFileName"):
             self["LogFileName"] = "/var/log/ha-log"
 
         #
         #  Now set up our random number generator...
         #
         self.RandomGen = whrandom.whrandom()
 
         #  Get a random seed for the random number generator.
 
         if self.has_key("RandSeed"):
             randseed = self["RandSeed"]
         else:
             f=open("/dev/urandom", "r")
             string=f.read(3)
             f.close()
             randseed=struct.unpack("BBB", string)
 
         self.log("Random seed is: " + str(randseed))
         self.randseed=randseed
 
         self.RandomGen.seed(randseed[0], randseed[1], randseed[2]) 
 
     def log(self, args):
         "Log using each of the supplied logging methods"
         for logfcn in self._logfunctions:
             logfcn(string.strip(args))
 
     def debug(self, args):
         "Log using each of the supplied logging methods"
         for logfcn in self._logfunctions:
             if logfcn.name() != "StdErrLog":
                 logfcn(string.strip(args))
 
     def __setitem__(self, key, value):
         '''Since this function gets called whenever we modify the
         dictionary (object), we can (and do) validate those keys that we
         know how to validate.  For the most part, we know how to validate
         the "MinimalKeys" elements.
         '''
 
         #
         #        List of nodes in the system
         #
         if key == "nodes":
             self.Nodes = {}
             for node in value:
                 # I don't think I need the IP address, etc. but this validates
                 # the node name against /etc/hosts and/or DNS, so it's a
                 # GoodThing(tm).
                 self.Nodes[node] = gethostbyname_ex(node)
             
         #
         #        Reset Mechanism
         #
         elif key == "reset":
             if not issubclass(value.__class__, ResetMechanism):
                 raise ValueError("'reset' Value must be a subclass"
                 " of ResetMechanism") 
         #
         #        List of Logging Mechanism(s)
         #
         elif key == "logger":
             if len(value) < 1:
                 raise ValueError("Must have at least one logging mechanism")
             for logger in value:
                 if not callable(logger):
                     raise ValueError("'logger' elements must be callable")
             self._logfunctions = value
         #
         #        Cluster Manager Class
         #
         elif key == "CMclass":
             if not issubclass(value, ClusterManager):
                 raise ValueError("'CMclass' must be a subclass of"
                 " ClusterManager")
         #
         #        Initial Random seed...
         #
         elif key == "RandSeed":
             if len(value) != 3:
                 raise ValueError("'Randseed' must be a 3-element list/tuple")
             for elem in value:
                 if not isinstance(elem, types.IntType):
                     raise ValueError("'Randseed' list must all be ints")
               
         self.data[key] = value
 
     def IsValidNode(self, node):
         'Return TRUE if the given node is valid'
         return self.Nodes.has_key(node)
 
     def __CheckNode(self, node):
         "Raise a ValueError if the given node isn't valid"
 
         if not self.IsValidNode(node):
             raise ValueError("Invalid node [%s] in CheckNode" % node)
 
     def RandomNode(self):
         '''Choose a random node from the cluster'''
         return self.RandomGen.choice(self["nodes"])
 
     def ResetNode(self, node):
         "Reset a node, (normally) using a hardware mechanism"
         self.__CheckNode(node)
         return self["reset"].reset(node)
     
     def ResetNode2(self, init_node, target_node, reasons):
         self.__CheckNode(target_node)
         stonithd = Stonithd(self["nodes"])
         ret = stonithd.reset(init_node, target_node)
         if not ret: 
             reasons.append(stonithd.fail_reason)
         return ret
 
 def usage(arg):
     print "Illegal argument " + arg
     print "usage: " + sys.argv[0]  \
     +  " --directory config-directory" \
     +  " -D config-directory" \
     +  " --logfile system-logfile-name" \
     +  " --trunc (truncate logfile before starting)" \
     +  " -L system-logfile-name" \
     +  " --limit-nodes maxnumnodes" \
     +  " --xmit-loss lost-rate(0.0-1.0)" \
     +  " --recv-loss lost-rate(0.0-1.0)" \
     +  " --suppressmonitoring" \
     +  " --syslog-facility syslog-facility" \
     +  " --facility syslog-facility" \
     +  " --choose testcase-name" \
     +  " --test-ip-base ip" \
     +  " (-2 |"\
     +  " -v2 |"\
     +  " --crm |"\
     +  " --classic)"\
     +  " --resource-can-stop" \
     +  " --stonith (1 | 0 | yes | no)" \
     +  " --standby (1 | 0 | yes | no)" \
     +  " --fencing (1 | 0 | yes | no)" \
     +  " --suppress_cib_writes (1 | 0 | yes | no)" \
     +  "  [number-of-iterations]"
     sys.exit(1)
 
     
 #
 #   A little test code...
 #
 if __name__ == '__main__': 
 
     from CTSaudits import AuditList
     from CTStests import TestList,RandomTests
     from CTS import Scenario, InitClusterManager, PingFest, PacketLoss, BasicSanityCheck
     import CM_hb
 
     HAdir = "/etc/ha.d"
     LogFile = "/var/log/ha-log-local7"
     DoStonith = 1
     DoStandby = 1
     DoFencing = 0
     NumIter = 500
     SuppressMonitoring = None
     Version = 1
     CIBfilename = None
     CIBResource = 0
     ClobberCIB = 0
     LimitNodes = 0
     TestCase = None
     LogFacility = None
     TruncateLog = 0
     ResCanStop = 0
     XmitLoss = "0.0"
     RecvLoss = "0.0"
     IPBase = "127.0.0.10"
     SuppressCib = 1
     DoBSC = 0
     #
     # The values of the rest of the parameters are now properly derived from
     # the configuration files.
     #
     # Stonith is configurable because it's slow, I have a few machines which
     # don't reboot very reliably, and it can mild damage to your machine if
     # you're using a real power switch.
     # 
     # Standby is configurable because the test is very heartbeat specific
     # and I haven't written the code to set it properly yet.  Patches are
     # being accepted...
 
 
     
     # Process arguments...
 
     skipthis=None
     args=sys.argv[1:]
     for i in range(0, len(args)):
        if skipthis:
            skipthis=None
            continue
        elif args[i] == "-D" or args[i] == "--directory":
            skipthis=1
            HAdir = args[i+1]
        elif args[i] == "-l" or args[i] == "--limit-nodes":
            skipthis=1
            LimitNodes = int(args[i+1])
        elif args[i] == "-r":
            CIBResource = 1
        elif args[i] == "-L" or args[i] == "--logfile":
            skipthis=1
            LogFile = args[i+1]
        elif args[i] == "--test-ip-base":
            skipthis=1
            IPBase = args[i+1]
        elif args[i] == "--trunc":
            TruncateLog=1
        elif args[i] == "-v2":
            Version=2
        elif args[i] == "--stonith":
            skipthis=1
            if args[i+1] == "1" or args[i+1] == "yes":
                DoStonith=1
            elif args[i+1] == "0" or args[i+1] == "no":
                DoStonith=0
            else:
                usage(args[i+1])
        elif args[i] == "--suppress-cib-writes":
            skipthis=1
            if args[i+1] == "1" or args[i+1] == "yes":
                SuppressCib=1
            elif args[i+1] == "0" or args[i+1] == "no":
                SuppressCib=0
            else:
                usage(args[i+1])
        elif args[i] == "--bsc":
            skipthis=1
            if args[i+1] == "1" or args[i+1] == "yes":
                DoBSC=1
            elif args[i+1] == "0" or args[i+1] == "no":
                DoSBC=0
            else:
                usage(args[i+1])
        elif args[i] == "--standby":
            skipthis=1
            if args[i+1] == "1" or args[i+1] == "yes":
                DoStandby=1
            elif args[i+1] == "0" or args[i+1] == "no":
                DoStandby=0
            else:
                usage(args[i+1])
        elif args[i] == "--fencing":
            skipthis=1
            if args[i+1] == "1" or args[i+1] == "yes":
                DoFencing=1
            elif args[i+1] == "0" or args[i+1] == "no":
                DoFencing=0
            else:
                usage(args[i+1])
        elif args[i] == "--suppressmonitoring":
            SuppressMonitoring = 1
        elif args[i] == "--resource-can-stop":
            ResCanStop = 1
        elif args[i] == "-2" or args[i] == "--crm":
            Version = 2
        elif args[i] == "-1" or args[i] == "--classic":
            Version = 1
        elif args[i] == "--clobber-cib" or args[i] == "-c":
            ClobberCIB = 1
        elif args[i] == "--cib-filename":
            skipthis=1
            CIBfilename = args[i+1]
        elif args[i] == "--xmit-loss":
            try:
                float(args[i+1])
            except ValueError:
                print ("--xmit-loss parameter should be float")
                usage(args[i+1])
            skipthis=1
            XmitLoss = args[i+1]
        elif args[i] == "--recv-loss":
            try:
                float(args[i+1])
            except ValueError:
                print ("--recv-loss parameter should be float")
                usage(args[i+1])
            skipthis=1
            RecvLoss = args[i+1]
        elif args[i] == "--choose":
            skipthis=1
            TestCase = args[i+1]
        elif args[i] == "--syslog-facility" or args[i] == "--facility":
            skipthis=1
            LogFacility = args[i+1]
        else:
            NumIter=int(args[i])
           
     #
     # This reading of HBconfig here is ugly, and I suppose ought to
     # be done by the Cluster manager.  This would probably mean moving the
     # list of cluster nodes into the ClusterManager class.  A good thought
     # for our Copious Spare Time in the future...
     #
     config = CM_hb.HBConfig(HAdir)
     node_list = config.Parameters["node"]
 
     if LogFacility == None:
         if config.Parameters.has_key("logfacility"): 
             LogFacility = config.Parameters["logfacility"][0]
         else:
             LogFacility = "local7"
 
     if LimitNodes > 0:
         if len(node_list) > LimitNodes:
             print("Limiting the number of nodes configured=%d (max=%d)"
                   %(len(node_list), LimitNodes))
             while len(node_list) > LimitNodes:
                 node_list.pop(len(node_list)-1)
 
     Environment = CtsLab(node_list)
     Environment["HAdir"] = HAdir
     Environment["ClobberCIB"] = ClobberCIB
     Environment["CIBfilename"] = CIBfilename
     Environment["CIBResource"] = CIBResource 
     Environment["LogFileName"] = LogFile
     Environment["DoStonith"] = DoStonith
     Environment["SyslogFacility"] = LogFacility
     Environment["DoStandby"] = DoStandby
     Environment["DoFencing"] = DoFencing
     Environment["ResCanStop"] = ResCanStop
     Environment["SuppressMonitoring"] = SuppressMonitoring
     Environment["XmitLoss"] = XmitLoss
     Environment["RecvLoss"] = RecvLoss
     Environment["IPBase"] = IPBase
     Environment["SuppressCib"] = SuppressCib
     Environment["DoBSC"] = DoBSC
     Environment["use_logd"] = 0
     if config.Parameters.has_key("use_logd"):
 	Environment["use_logd"] = 1
     
     if Version == 2:
         from CM_LinuxHAv2 import LinuxHAv2
         Environment['CMclass']=LinuxHAv2
     #Environment["RandSeed"] = (156, 104, 218)
 
     Environment["reset"] = Stonith(sttype="external/ssh", parm=string.join(node_list, " "))
     Environment.SupplyDefaults()
 
     # Your basic start up the world type of test scenario...
 
     #scenario = Scenario(
     #[        InitClusterManager(Environment)
     #,        PingFest(Environment)])
     if Environment["DoBSC"]:
 	scenario = Scenario([ BasicSanityCheck(Environment) ])
 	NumIter = 2
     else:
 	scenario = Scenario(
 	    [        InitClusterManager(Environment), PacketLoss(Environment)])
 
     # Create the Cluster Manager object
 
     cm = Environment['CMclass'](Environment)
     if TruncateLog:
         lf = open(LogFile, "w");
         if lf != None:
             lf.truncate(0)
             lf.close()
 
     cm.log(">>>>>>>>>>>>>>>> BEGINNING " + repr(NumIter) + " TESTS ")
     cm.log("HA configuration directory: " + Environment["HAdir"])
     cm.log("System log files: " + Environment["LogFileName"])
     cm.log("Enable Stonith: " + ("%d" % Environment["DoStonith"]))
     cm.log("Enable Standby: " + ("%d" % Environment["DoStandby"]))
     if Environment.has_key("SuppressMonitoring") \
     and Environment["SuppressMonitoring"]:
         cm.log("Resource Monitoring is disabled")
     cm.log("Cluster nodes: " + repr(config.Parameters["node"]))
 
     Audits = AuditList(cm)
     Tests = []
     
     if Environment["DoBSC"]:
         test = BSC_AddResource(cm)
 	Tests.append(test)
     elif TestCase != None:
         for test in TestList(cm):
             if test.name == TestCase:
                 Tests.append(test)
         if Tests == []:
             usage("--choose: No applicable/valid tests chosen")        
     else:
         Tests = TestList(cm)    
     
     tests = RandomTests(scenario, cm, Tests, Audits)
     Environment.RandomTests = tests
     overall, detailed = tests.run(NumIter)
     tests.summarize()