diff --git a/cts/CM_ais.py b/cts/CM_ais.py index bddc95a781..48f23ad5d6 100644 --- a/cts/CM_ais.py +++ b/cts/CM_ais.py @@ -1,352 +1,352 @@ '''CTS: Cluster Testing System: AIS dependent modules... ''' __copyright__=''' Copyright (C) 2007 Andrew Beekhof ''' # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. import os, sys, warnings from cts.CTSvars import * from cts.CM_lha import crm_lha from cts.CTS import Process ####################################################################### # # LinuxHA v2 dependent modules # ####################################################################### class crm_ais(crm_lha): ''' The crm version 3 cluster manager class. It implements the things we need to talk to and manipulate crm clusters running on top of openais ''' def __init__(self, Environment, randseed=None): crm_lha.__init__(self, Environment, randseed=randseed) self.update({ "Name" : "crm-ais", "EpocheCmd" : "crm_node -e --openais", "QuorumCmd" : "crm_node -q --openais", "ParitionCmd" : "crm_node -p --openais", "Pat:They_stopped" : "%s crmd:.*Node %s: .* state=lost .new", "Pat:ChildExit" : "Child process .* exited", # Bad news Regexes. Should never occur. "BadRegexes" : ( r"ERROR:", r"CRIT:", r"TRACE:", r"Shutting down\.", r"Forcing shutdown\.", r"Timer I_TERMINATE just popped", r"input=I_ERROR", r"input=I_FAIL", r"input=I_INTEGRATED cause=C_TIMER_POPPED", r"input=I_FINALIZED cause=C_TIMER_POPPED", r"input=I_ERROR", r", exiting\.", r"WARN.*Ignoring HA message.*vote.*not in our membership list", r"pengine.*Attempting recovery of resource", r"is taking more than 2x its timeout", r"Confirm not received from", r"Welcome reply not received from", r"Attempting to schedule .* after a stop", r"Resource .* was active at shutdown", r"duplicate entries for call_id", r"Search terminated:", r":global_timer_callback", r"Faking parameter digest creation", r"Parameters to .* action changed:", r"Parameters to .* changed", r"Child process .* terminated with signal 11", r"Executing .* fencing operation", r"LogActions: Recover", # Not inherently bad, but worth tracking - r"No need to invoke the TE", - r"ping.*: DEBUG: Updated connected = 0", - r"Digest mis-match:", + #r"No need to invoke the TE", + #r"ping.*: DEBUG: Updated connected = 0", + #r"Digest mis-match:", ), }) def errorstoignore(self): # At some point implement a more elegant solution that # also produces a report at the end '''Return list of errors which are known and very noisey should be ignored''' if 1: return [ "crm_mon:", "crmadmin:", "update_trace_data", "async_notify: strange, client not found", "ERROR: Message hist queue is filling up" ] return [] def NodeUUID(self, node): return node def ais_components(self): fullcomplist = {} self.complist = [] self.common_ignore = [ "Pending action:", "ERROR: crm_log_message_adv:", "ERROR: MSG: No message to dump", "pending LRM operations at shutdown", "Lost connection to the CIB service", "Connection to the CIB terminated...", "Sending message to CIB service FAILED", "apply_xml_diff: Diff application failed!", "crmd: .*Action A_RECOVER .* not supported", "pingd: .*ERROR: send_update: Could not send update", "send_ipc_message: IPC Channel to .* is not connected", "unconfirmed_actions: Waiting on .* unconfirmed actions", "cib_native_msgready: Message pending on command channel", "crmd:.*do_exit: Performing A_EXIT_1 - forcefully exiting the CRMd", "verify_stopped: Resource .* was active at shutdown. You may ignore this error if it is unmanaged.", "ERROR: attrd_connection_destroy: Lost connection to attrd", "nfo: te_fence_node: Executing .* fencing operation", ] fullcomplist["cib"] = Process(self, "cib", pats = [ "State transition .* S_RECOVERY", "Respawning .* crmd", "Respawning .* attrd", "Lost connection to the CIB service", "Connection to the CIB terminated...", "Child process crmd exited .* rc=2", "Child process attrd exited .* rc=1", "crmd: .*Input I_TERMINATE from do_recover", "crmd: .*I_ERROR.*crmd_cib_connection_destroy", "crmd:.*do_exit: Could not recover from internal error", ], badnews_ignore = self.common_ignore) fullcomplist["lrmd"] = Process(self, "lrmd", pats = [ "State transition .* S_RECOVERY", "LRM Connection failed", "Respawning .* crmd", "crmd: .*I_ERROR.*lrm_connection_destroy", "Child process crmd exited .* rc=2", "crmd: .*Input I_TERMINATE from do_recover", "crmd:.*do_exit: Could not recover from internal error", ], badnews_ignore = self.common_ignore) fullcomplist["crmd"] = Process(self, "crmd", pats = [ # "WARN: determine_online_status: Node .* is unclean", # "Scheduling Node .* for STONITH", # "Executing .* fencing operation", # Only if the node wasn't the DC: "State transition S_IDLE", "State transition .* -> S_IDLE", ], badnews_ignore = self.common_ignore) fullcomplist["attrd"] = Process(self, "attrd", pats = [ "crmd: .*ERROR: attrd_connection_destroy: Lost connection to attrd" ], badnews_ignore = self.common_ignore) fullcomplist["pengine"] = Process(self, "pengine", dc_pats = [ "State transition .* S_RECOVERY", "Respawning .* crmd", "Child process crmd exited .* rc=2", "crmd: .*pe_connection_destroy: Connection to the Policy Engine failed", "crmd: .*I_ERROR.*save_cib_contents", "crmd: .*Input I_TERMINATE from do_recover", "crmd:.*do_exit: Could not recover from internal error", ], badnews_ignore = self.common_ignore) stonith_ignore = [ "update_failcount: Updating failcount for child_DoFencing", "ERROR: te_connect_stonith: Sign-in failed: triggered a retry", ] stonith_ignore.extend(self.common_ignore) fullcomplist["stonith-ng"] = Process(self, "stonith-ng", process="stonithd", pats = [ "CRIT: stonith_dispatch.* Lost connection to the STONITH service", "tengine_stonith_connection_destroy: Fencing daemon connection failed", "Attempting connection to fencing daemon", "te_connect_stonith: Connected", ], badnews_ignore = stonith_ignore) vgrind = self.Env["valgrind-procs"].split() for key in fullcomplist.keys(): if self.Env["valgrind-tests"]: if key in vgrind: # Processes running under valgrind can't be shot with "killall -9 processname" self.log("Filtering %s from the component list as it is being profiled by valgrind" % key) continue if key == "stonith-ng" and not self.Env["DoFencing"]: continue self.complist.append(fullcomplist[key]) #self.complist = [ fullcomplist["pengine"] ] return self.complist class crm_whitetank(crm_ais): ''' The crm version 3 cluster manager class. It implements the things we need to talk to and manipulate crm clusters running on top of openais ''' def __init__(self, Environment, randseed=None): crm_ais.__init__(self, Environment, randseed=randseed) self.update({ "Name" : "crm-whitetank", "StartCmd" : CTSvars.INITDIR+"/openais start", "StopCmd" : CTSvars.INITDIR+"/openais stop", "Pat:We_stopped" : "%s.*openais.*pcmk_shutdown: Shutdown complete", "Pat:They_stopped" : "%s crmd:.*Node %s: .* state=lost .new", "Pat:They_dead" : "openais:.*Node %s is now: lost", "Pat:ChildKilled" : "%s openais.*Child process %s terminated with signal 9", "Pat:ChildRespawn" : "%s openais.*Respawning failed child process: %s", "Pat:ChildExit" : "Child process .* exited", }) def Components(self): self.ais_components() aisexec_ignore = [ "ERROR: ais_dispatch: Receiving message .* failed", "crmd: .*I_ERROR.*crmd_cib_connection_destroy", "cib: .*ERROR: cib_ais_destroy: AIS connection terminated", #"crmd: .*ERROR: crm_ais_destroy: AIS connection terminated", "crmd:.*do_exit: Could not recover from internal error", "crmd: .*I_TERMINATE.*do_recover", "attrd: .*CRIT: attrd_ais_destroy: Lost connection to OpenAIS service!", "stonithd: .*ERROR: AIS connection terminated", ] aisexec_ignore.extend(self.common_ignore) self.complist.append(Process(self, "aisexec", pats = [ "ERROR: ais_dispatch: AIS connection failed", "crmd: .*ERROR: do_exit: Could not recover from internal error", "pengine: .*Scheduling Node .* for STONITH", "stonithd: .*requests a STONITH operation RESET on node", "stonithd: .*Succeeded to STONITH the node", ], badnews_ignore = aisexec_ignore)) class crm_flatiron(crm_ais): ''' The crm version 3 cluster manager class. It implements the things we need to talk to and manipulate crm clusters running on top of openais ''' def __init__(self, Environment, randseed=None): crm_ais.__init__(self, Environment, randseed=randseed) self.update({ "Name" : "crm-flatiron", "StartCmd" : "service corosync start", "StopCmd" : "service corosync stop", # The next pattern is too early # "Pat:We_stopped" : "%s.*Service engine unloaded: Pacemaker Cluster Manager", # The next pattern would be preferred, but it doesn't always come out # "Pat:We_stopped" : "%s.*Corosync Cluster Engine exiting with status", "Pat:We_stopped" : "%s.*Service engine unloaded: corosync cluster quorum service", "Pat:They_stopped" : "%s crmd:.*Node %s: .* state=lost .new", "Pat:They_dead" : "corosync:.*Node %s is now: lost", "Pat:ChildKilled" : "%s corosync.*Child process %s terminated with signal 9", "Pat:ChildRespawn" : "%s corosync.*Respawning failed child process: %s", }) def Components(self): self.ais_components() corosync_ignore = [ "ERROR: ais_dispatch: Receiving message .* failed", "crmd: .*I_ERROR.*crmd_cib_connection_destroy", "cib: .*ERROR: cib_ais_destroy: AIS connection terminated", #"crmd: .*ERROR: crm_ais_destroy: AIS connection terminated", "crmd:.*do_exit: Could not recover from internal error", "crmd: .*I_TERMINATE.*do_recover", "attrd: .*CRIT: attrd_ais_destroy: Lost connection to Corosync service!", "stonithd: .*ERROR: AIS connection terminated", ] # corosync_ignore.extend(self.common_ignore) # self.complist.append(Process(self, "corosync", pats = [ # "ERROR: ais_dispatch: AIS connection failed", # "crmd: .*ERROR: do_exit: Could not recover from internal error", # "pengine: .*Scheduling Node .* for STONITH", # "stonithd: .*requests a STONITH operation RESET on node", # "stonithd: .*Succeeded to STONITH the node", # ], badnews_ignore = corosync_ignore)) return self.complist class crm_mcp(crm_flatiron): ''' The crm version 3 cluster manager class. It implements the things we need to talk to and manipulate crm clusters running on top of openais ''' def __init__(self, Environment, randseed=None): crm_flatiron.__init__(self, Environment, randseed=randseed) self.update({ "Name" : "crm-mcp", "StartCmd" : "service corosync start; service pacemaker start", "StopCmd" : "service pacemaker stop; service corosync stop", "Pat:We_stopped" : "%s.*Service engine unloaded: corosync cluster quorum service", "Pat:They_stopped" : "%s crmd:.*Node %s: .* state=lost .new", "Pat:They_dead" : "crmd:.*Node %s: .* state=lost .new", "Pat:ChildKilled" : "%s pacemakerd.*Child process %s terminated with signal 9", "Pat:ChildRespawn" : "%s pacemakerd.*Respawning failed child process: %s", }) class crm_cman(crm_flatiron): ''' The crm version 3 cluster manager class. It implements the things we need to talk to and manipulate crm clusters running on top of openais ''' def __init__(self, Environment, randseed=None): crm_flatiron.__init__(self, Environment, randseed=randseed) self.update({ "Name" : "crm-cman", "StartCmd" : "service cman start; service pacemaker start", "StopCmd" : "service pacemaker stop; service cman stop;", "EpocheCmd" : "crm_node -e --cman", "QuorumCmd" : "crm_node -q --cman", "ParitionCmd" : "crm_node -p --cman", "Pat:We_stopped" : "%s.*Service engine unloaded: corosync cluster quorum service", "Pat:They_stopped" : "%s crmd:.*Node %s: .* state=lost .new", "Pat:They_dead" : "crmd:.*Node %s: .* state=lost .new", "Pat:ChildKilled" : "%s pacemakerd.*Child process %s terminated with signal 9", "Pat:ChildRespawn" : "%s pacemakerd.*Respawning failed child process: %s", }) diff --git a/cts/CM_lha.py b/cts/CM_lha.py index 2acc0de34c..a4dae13106 100755 --- a/cts/CM_lha.py +++ b/cts/CM_lha.py @@ -1,625 +1,625 @@ '''CTS: Cluster Testing System: LinuxHA v2 dependent modules... ''' __copyright__=''' Author: Huang Zhen Copyright (C) 2004 International Business Machines Additional Audits, Revised Start action, Default Configuration: Copyright (C) 2004 Andrew Beekhof ''' # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. import os, sys, warnings from cts import CTS from cts.CTSvars import * from cts.CTS import * from cts.CIB import * from cts.CTStests import AuditResource try: from xml.dom.minidom import * except ImportError: sys.__stdout__.write("Python module xml.dom.minidom not found\n") sys.__stdout__.write("Please install python-xml or similar before continuing\n") sys.__stdout__.flush() sys.exit(1) ####################################################################### # # LinuxHA v2 dependent modules # ####################################################################### class crm_lha(ClusterManager): ''' The linux-ha version 2 cluster manager class. It implements the things we need to talk to and manipulate linux-ha version 2 clusters ''' def __init__(self, Environment, randseed=None): ClusterManager.__init__(self, Environment, randseed=randseed) #HeartbeatCM.__init__(self, Environment, randseed=randseed) self.fastfail = 0 self.clear_cache = 0 self.cib_installed = 0 self.config = None self.cluster_monitor = 0 self.use_short_names = 1 self.update({ "Name" : "crm-lha", "DeadTime" : 300, "StartTime" : 300, # Max time to start up "StableTime" : 30, "StartCmd" : CTSvars.INITDIR+"/heartbeat start > /dev/null 2>&1", "StopCmd" : CTSvars.INITDIR+"/heartbeat stop > /dev/null 2>&1", "ElectionCmd" : "crmadmin -E %s", "StatusCmd" : "crmadmin -t 60000 -S %s 2>/dev/null", "EpocheCmd" : "crm_node -H -e", "QuorumCmd" : "crm_node -H -q", "ParitionCmd" : "crm_node -H -p", "CibQuery" : "cibadmin -Ql", # 300,000 == 5 minutes "ExecuteRscOp" : "lrmadmin -n %s -E %s %s 300000 %d EVERYTIME 2>&1", "CIBfile" : "%s:"+CTSvars.CRM_CONFIG_DIR+"/cib.xml", "TmpDir" : "/tmp", "BreakCommCmd" : "iptables -A INPUT -s %s -j DROP >/dev/null 2>&1", "FixCommCmd" : "iptables -D INPUT -s %s -j DROP >/dev/null 2>&1", # tc qdisc add dev lo root handle 1: cbq avpkt 1000 bandwidth 1000mbit # tc class add dev lo parent 1: classid 1:1 cbq rate "$RATE"kbps allot 17000 prio 5 bounded isolated # tc filter add dev lo parent 1: protocol ip prio 16 u32 match ip dst 127.0.0.1 match ip sport $PORT 0xFFFF flowid 1:1 # tc qdisc add dev lo parent 1: netem delay "$LATENCY"msec "$(($LATENCY/4))"msec 10% 2> /dev/null > /dev/null "ReduceCommCmd" : "", "RestoreCommCmd" : "tc qdisc del dev lo root", "LogFileName" : Environment["LogFileName"], "UUIDQueryCmd" : "crmadmin -N", "StandbyCmd" : "crm_attribute -Q -U %s -n standby -l forever -v %s 2>/dev/null", "StandbyQueryCmd" : "crm_attribute -GQ -U %s -n standby -l forever -d off 2>/dev/null", # Patterns to look for in the log files for various occasions... "Pat:DC_IDLE" : "crmd.*State transition.*-> S_IDLE", # This wont work if we have multiple partitions "Pat:Local_started" : "%s crmd:.*The local CRM is operational", "Pat:Slave_started" : "%s crmd:.*State transition.*-> S_NOT_DC", "Pat:Master_started" : "%s crmd:.* State transition.*-> S_IDLE", "Pat:We_stopped" : "heartbeat.*%s.*Heartbeat shutdown complete", "Pat:Logd_stopped" : "%s logd:.*Exiting write process", "Pat:They_stopped" : "%s crmd:.*LOST:.* %s ", "Pat:They_dead" : "node %s.*: is dead", "Pat:TransitionComplete" : "Transition status: Complete: complete", "Pat:ChildKilled" : "%s heartbeat.*%s.*killed by signal 9", "Pat:ChildRespawn" : "%s heartbeat.*Respawning client.*%s", "Pat:ChildExit" : "ERROR: Client .* exited with return code", "Pat:They_fenced" : "stonith-ng: .* Operation 'reboot' .* for host '%s' with device .* returned: 0", "Pat:They_fenced_offset" : "for host '", # Bad news Regexes. Should never occur. "BadRegexes" : ( r"ERROR:", r"CRIT:", r"Shutting down\.", r"Forcing shutdown\.", r"Timer I_TERMINATE just popped", r"input=I_ERROR", r"input=I_FAIL", r"input=I_INTEGRATED cause=C_TIMER_POPPED", r"input=I_FINALIZED cause=C_TIMER_POPPED", r"input=I_ERROR", r", exiting\.", r"WARN.*Ignoring HA message.*vote.*not in our membership list", r"pengine.*Attempting recovery of resource", r"is taking more than 2x its timeout", r"Confirm not received from", r"Welcome reply not received from", r"Attempting to schedule .* after a stop", r"Resource .* was active at shutdown", r"duplicate entries for call_id", r"Search terminated:", r"No need to invoke the TE", r"global_timer_callback:", r"Faking parameter digest creation", r"Parameters to .* action changed:", r"Parameters to .* changed", ), }) if self.Env["DoBSC"]: del self["Pat:They_stopped"] del self["Pat:Logd_stopped"] self.Env["use_logd"] = 0 self._finalConditions() self.check_transitions = 0 self.check_elections = 0 self.CIBsync = {} self.CibFactory = ConfigFactory(self) self.cib = self.CibFactory.createConfig(self.Env["Schema"]) def errorstoignore(self): # At some point implement a more elegant solution that # also produces a report at the end '''Return list of errors which are known and very noisey should be ignored''' if 1: return [ "ERROR: crm_abort: crm_glib_handler: ", "ERROR: Message hist queue is filling up", "stonithd: .*CRIT: external_hostlist: 'vmware gethosts' returned an empty hostlist", "stonithd: .*ERROR: Could not list nodes for stonith RA external/vmware.", "pengine: Preventing .* from re-starting", ] return [] def install_config(self, node): if not self.ns.WaitForNodeToComeUp(node): self.log("Node %s is not up." % node) return None if not self.CIBsync.has_key(node) and self.Env["ClobberCIB"] == 1: self.CIBsync[node] = 1 self.rsh(node, "rm -f "+CTSvars.CRM_CONFIG_DIR+"/cib*") # Only install the CIB on the first node, all the other ones will pick it up from there if self.cib_installed == 1: return None self.cib_installed = 1 if self.Env["CIBfilename"] == None: self.log("Installing Generated CIB on node %s" %(node)) self.cib.install(node) else: self.log("Installing CIB (%s) on node %s" %(self.Env["CIBfilename"], node)) if 0 != self.rsh.cp(self.Env["CIBfilename"], "root@" + (self["CIBfile"]%node)): raise ValueError("Can not scp file to %s %d"%(node)) self.rsh(node, "chown "+CTSvars.CRM_DAEMON_USER+" "+CTSvars.CRM_CONFIG_DIR+"/cib.xml") def prepare(self): '''Finish the Initialization process. Prepare to test...''' self.partitions_expected = 1 for node in self.Env["nodes"]: self.ShouldBeStatus[node] = "" self.unisolate_node(node) self.StataCM(node) def test_node_CM(self, node): '''Report the status of the cluster manager on a given node''' watchpats = [ ] watchpats.append("Current ping state: (S_IDLE|S_NOT_DC)") watchpats.append(self["Pat:Slave_started"]%node) watchpats.append(self["Pat:Master_started"]%node) idle_watch = CTS.LogWatcher(self.Env, self["LogFileName"], watchpats, "ClusterIdle") idle_watch.setwatch() out = self.rsh(node, self["StatusCmd"]%node, 1) self.debug("Node %s status: '%s'" %(node, out)) if not out or string.find(out, 'ok') < 0: if self.ShouldBeStatus[node] == "up": self.log( "Node status for %s is %s but we think it should be %s" %(node, "down", self.ShouldBeStatus[node])) self.ShouldBeStatus[node]="down" return 0 if self.ShouldBeStatus[node] == "down": self.log( "Node status for %s is %s but we think it should be %s: %s" %(node, "up", self.ShouldBeStatus[node], out)) self.ShouldBeStatus[node]="up" # check the output first - because syslog-ng looses messages if string.find(out, 'S_NOT_DC') != -1: # Up and stable return 2 if string.find(out, 'S_IDLE') != -1: # Up and stable return 2 # fall back to syslog-ng and wait if not idle_watch.look(): # just up self.debug("Warn: Node %s is unstable: %s" %(node, out)) return 1 # Up and stable return 2 # Is the node up or is the node down def StataCM(self, node): '''Report the status of the cluster manager on a given node''' if self.test_node_CM(node) > 0: return 1 return None # Being up and being stable is not the same question... def node_stable(self, node): '''Report the status of the cluster manager on a given node''' if self.test_node_CM(node) == 2: return 1 self.log("Warn: Node %s not stable" %(node)) return None def partition_stable(self, nodes, timeout=None): watchpats = [ ] watchpats.append("Current ping state: S_IDLE") watchpats.append(self["Pat:DC_IDLE"]) self.debug("Waiting for cluster stability...") if timeout == None: timeout = self["DeadTime"] idle_watch = CTS.LogWatcher(self.Env, self["LogFileName"], watchpats, "ClusterStable", timeout) idle_watch.setwatch() any_up = 0 for node in self.Env["nodes"]: # have each node dump its current state if self.ShouldBeStatus[node] == "up": self.rsh(node, self["StatusCmd"] %node, 1) any_up = 1 if any_up == 0: self.debug("Cluster is inactive") return 1 ret = idle_watch.look() while ret: self.debug(ret) for node in nodes: if re.search(node, ret): return 1 ret = idle_watch.look() self.debug("Warn: Partition %s not IDLE after %ds" % (repr(nodes), timeout)) return None def cluster_stable(self, timeout=None, double_check=False): partitions = self.find_partitions() for partition in partitions: if not self.partition_stable(partition, timeout): return None if double_check: # Make sure we are really stable and that all resources, # including those that depend on transient node attributes, # are started if they were going to be time.sleep(5) for partition in partitions: if not self.partition_stable(partition, timeout): return None return 1 def is_node_dc(self, node, status_line=None): rc = 0 if not status_line: status_line = self.rsh(node, self["StatusCmd"]%node, 1) if not status_line: rc = 0 elif string.find(status_line, 'S_IDLE') != -1: rc = 1 elif string.find(status_line, 'S_INTEGRATION') != -1: rc = 1 elif string.find(status_line, 'S_FINALIZE_JOIN') != -1: rc = 1 elif string.find(status_line, 'S_POLICY_ENGINE') != -1: rc = 1 elif string.find(status_line, 'S_TRANSITION_ENGINE') != -1: rc = 1 return rc def active_resources(self, node): # [SM].* {node} matches Started, Slave, Master # Stopped wont be matched as it wont include {node} (rc, output) = self.rsh(node, """crm_resource -c""", None) resources = [] for line in output: if re.search("^Resource", line): tmp = AuditResource(self, line) if tmp.type == "primitive" and tmp.host == node: resources.append(tmp.id) return resources def ResourceOp(self, resource, op, node, interval=0, app="lrmadmin"): ''' Execute an operation on a resource ''' cmd = self["ExecuteRscOp"] % (app, resource, op, interval) (rc, lines) = self.rsh(node, cmd, None) if rc == 127: self.log("Command '%s' failed. Binary not installed?" % cmd) for line in lines: self.log("Output: "+line) return rc def ResourceLocation(self, rid): ResourceNodes = [] for node in self.Env["nodes"]: if self.ShouldBeStatus[node] == "up": dummy = 0 rc = self.ResourceOp(rid, "monitor", node) # Strange error codes from remote_py # 65024 == not installed # 2048 == 8 # 1792 == 7 # 0 == 0 if rc == 127: dummy = 1 elif rc == 254 or rc == 65024: dummy = 1 #self.debug("%s is not installed on %s: %d" % (rid, node, rc)) elif rc == 0 or rc == 2048 or rc == 8: ResourceNodes.append(node) elif rc == 7 or rc == 1792: dummy = 1 #self.debug("%s is not running on %s: %d" % (rid, node, rc)) else: # not active on this node? self.log("Unknown rc code for %s on %s: %d" % (rid, node, rc)) return ResourceNodes def find_partitions(self): ccm_partitions = [] for node in self.Env["nodes"]: if self.ShouldBeStatus[node] == "up": partition = self.rsh(node, self["ParitionCmd"], 1) if not partition: self.log("no partition details for %s" %node) elif len(partition) > 2: partition = partition[:-1] found=0 for a_partition in ccm_partitions: if partition == a_partition: found = 1 if found == 0: self.debug("Adding partition from %s: %s" %(node, partition)) ccm_partitions.append(partition) else: self.debug("Partition '%s' from %s is consistent with existing entries" %(partition, node)) else: self.log("bad partition details for %s" %node) else: self.debug("Node %s is down... skipping" %node) return ccm_partitions def HasQuorum(self, node_list): # If we are auditing a partition, then one side will # have quorum and the other not. # So the caller needs to tell us which we are checking # If no value for node_list is specified... assume all nodes if not node_list: node_list = self.Env["nodes"] for node in node_list: if self.ShouldBeStatus[node] == "up": quorum = self.rsh(node, self["QuorumCmd"], 1) if string.find(quorum, "1") != -1: return 1 elif string.find(quorum, "0") != -1: return 0 else: self.debug("WARN: Unexpected quorum test result from "+ node +":"+ quorum) return 0 def Components(self): complist = [] common_ignore = [ "Pending action:", "ERROR: crm_log_message_adv:", "ERROR: MSG: No message to dump", "pending LRM operations at shutdown", "Lost connection to the CIB service", "Connection to the CIB terminated...", "Sending message to CIB service FAILED", "crmd: .*Action A_RECOVER .* not supported", "ERROR: stonithd_op_result_ready: not signed on", "pingd: .*ERROR: send_update: Could not send update", "send_ipc_message: IPC Channel to .* is not connected", "unconfirmed_actions: Waiting on .* unconfirmed actions", "cib_native_msgready: Message pending on command channel", "crmd:.*do_exit: Performing A_EXIT_1 - forcefully exiting the CRMd", "verify_stopped: Resource .* was active at shutdown. You may ignore this error if it is unmanaged.", ] stonith_ignore = [ "ERROR: stonithd_signon: ", "update_failcount: Updating failcount for child_DoFencing", "ERROR: te_connect_stonith: Sign-in failed: triggered a retry", "lrmd: .*ERROR: cl_get_value: wrong argument (reply)", "lrmd: .*ERROR: is_expected_msg:.* null message", "lrmd: .*ERROR: stonithd_receive_ops_result failed.", ] stonith_ignore.extend(common_ignore) ccm_ignore = [ "ERROR: get_channel_token: No reply message - disconnected" ] ccm_ignore.extend(common_ignore) ccm = Process(self, "ccm", triggersreboot=self.fastfail, pats = [ "State transition .* S_RECOVERY", "CCM connection appears to have failed", "crmd: .*Action A_RECOVER .* not supported", "crmd: .*Input I_TERMINATE from do_recover", "Exiting to recover from CCM connection failure", "crmd:.*do_exit: Could not recover from internal error", "crmd: .*I_ERROR.*(ccm_dispatch|crmd_cib_connection_destroy)", "crmd .*exited with return code 2.", "attrd .*exited with return code 1.", "cib .*exited with return code 2.", # Not if it was fenced # "A new node joined the cluster", # "WARN: determine_online_status: Node .* is unclean", # "Scheduling Node .* for STONITH", # "Executing .* fencing operation", # "tengine_stonith_callback: .*result=0", # "Processing I_NODE_JOIN:.* cause=C_HA_MESSAGE", # "State transition S_.* -> S_INTEGRATION.*input=I_NODE_JOIN", "State transition S_STARTING -> S_PENDING", ], badnews_ignore = ccm_ignore) cib = Process(self, "cib", triggersreboot=self.fastfail, pats = [ "State transition .* S_RECOVERY", "Lost connection to the CIB service", "Connection to the CIB terminated...", "crmd: .*Input I_TERMINATE from do_recover", "crmd: .*I_ERROR.*crmd_cib_connection_destroy", "crmd:.*do_exit: Could not recover from internal error", "crmd .*exited with return code 2.", "attrd .*exited with return code 1.", ], badnews_ignore = common_ignore) lrmd = Process(self, "lrmd", triggersreboot=self.fastfail, pats = [ "State transition .* S_RECOVERY", "LRM Connection failed", "crmd: .*I_ERROR.*lrm_connection_destroy", "State transition S_STARTING -> S_PENDING", "crmd: .*Input I_TERMINATE from do_recover", "crmd:.*do_exit: Could not recover from internal error", "crmd .*exited with return code 2.", ], badnews_ignore = common_ignore) crmd = Process(self, "crmd", triggersreboot=self.fastfail, pats = [ # "WARN: determine_online_status: Node .* is unclean", # "Scheduling Node .* for STONITH", # "Executing .* fencing operation", # "tengine_stonith_callback: .*result=0", "State transition .* S_IDLE", "State transition S_STARTING -> S_PENDING", ], badnews_ignore = common_ignore) pengine = Process(self, "pengine", triggersreboot=self.fastfail, pats = [ "State transition .* S_RECOVERY", "crmd .*exited with return code 2.", "crmd: .*Input I_TERMINATE from do_recover", "crmd: .*do_exit: Could not recover from internal error", "crmd: .*CRIT: pe_connection_destroy: Connection to the Policy Engine failed", "crmd: .*I_ERROR.*save_cib_contents", "crmd .*exited with return code 2.", ], badnews_ignore = common_ignore, dc_only=1) if self.Env["DoFencing"] == 1 : complist.append(Process(self, "stoniths", triggersreboot=self.fastfail, dc_pats = [ "crmd: .*CRIT: tengine_stonith_connection_destroy: Fencing daemon connection failed", "Attempting connection to fencing daemon", "te_connect_stonith: Connected", ], badnews_ignore = stonith_ignore)) if self.fastfail == 0: ccm.pats.extend([ "attrd .* exited with return code 1", "ERROR: Respawning client .*attrd", "cib .* exited with return code 2", "ERROR: Respawning client .*cib", "crmd .* exited with return code 2", "ERROR: Respawning client .*crmd" ]) cib.pats.extend([ "attrd .* exited with return code 1", "ERROR: Respawning client .*attrd", "crmd .* exited with return code 2", "ERROR: Respawning client .*crmd" ]) lrmd.pats.extend([ "crmd .* exited with return code 2", "ERROR: Respawning client .*crmd" ]) pengine.pats.extend([ "ERROR: Respawning client .*crmd" ]) complist.append(ccm) complist.append(cib) complist.append(lrmd) complist.append(crmd) complist.append(pengine) return complist def NodeUUID(self, node): lines = self.rsh(node, self["UUIDQueryCmd"], 1) for line in lines: self.debug("UUIDLine:"+ line) m = re.search(r'%s.+\((.+)\)' % node, line) if m: return m.group(1) return "" def StandbyStatus(self, node): out=self.rsh(node, self["StandbyQueryCmd"]%node, 1) if not out: return "off" out = out[:-1] self.debug("Standby result: "+out) return out # status == "on" : Enter Standby mode # status == "off": Enter Active mode def SetStandbyMode(self, node, status): current_status = self.StandbyStatus(node) cmd = self["StandbyCmd"] % (node, status) ret = self.rsh(node, cmd) return True ####################################################################### # # A little test code... # # Which you are advised to completely ignore... # ####################################################################### if __name__ == '__main__': pass diff --git a/cts/CTS.py b/cts/CTS.py index e50395fa63..b67efa3db5 100644 --- a/cts/CTS.py +++ b/cts/CTS.py @@ -1,1451 +1,1451 @@ '''CTS: Cluster Testing System: Main module Classes related to testing high-availability clusters... ''' __copyright__=''' Copyright (C) 2000, 2001 Alan Robertson Licensed under the GNU GPL. ''' # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. import types, string, select, sys, time, re, os, struct, signal import time, syslog, random, traceback, base64, pickle, binascii, fcntl from socket import gethostbyname_ex from UserDict import UserDict from subprocess import Popen,PIPE from cts.CTSvars import * class CtsLab(UserDict): '''This class defines the Lab Environment for the Cluster Test System. It defines those things which are expected to change from test environment to test environment for the same cluster manager. It is where you define the set of nodes that are in your test lab what kind of reset mechanism you use, etc. This class is derived from a UserDict because we hold many different parameters of different kinds, and this provides provide a uniform and extensible interface useful for any kind of communication between the user/administrator/tester and CTS. At this point in time, it is the intent of this class to model static configuration and/or environmental data about the environment which doesn't change as the tests proceed. Well-known names (keys) are an important concept in this class. The HasMinimalKeys member function knows the minimal set of well-known names for the class. The following names are standard (well-known) at this time: nodes An array of the nodes in the cluster reset A ResetMechanism object logger An array of objects that log strings... CMclass The type of ClusterManager we are running (This is a class object, not a class instance) RandSeed Random seed. It is a triple of bytes. (optional) The CTS code ignores names it doesn't know about/need. The individual tests have access to this information, and it is perfectly acceptable to provide hints, tweaks, fine-tuning directions or other information to the tests through this mechanism. ''' def __init__(self): self.data = {} self.rsh = RemoteExec(self) self.RandomGen = random.Random() self.Scenario = None # Get a random seed for the random number generator. self["LogWatcher"] = "any" self["LogFileName"] = "/var/log/messages" self["OutputFile"] = None self["SyslogFacility"] = "daemon" self["CMclass"] = None self["logger"] = ([StdErrLog(self)]) self.SeedRandom() def SeedRandom(self, seed=None): if not seed: seed = int(time.time()) if self.has_key("RandSeed"): self.log("New random seed is: " + str(seed)) else: self.log("Random seed is: " + str(seed)) self["RandSeed"] = seed self.RandomGen.seed(str(seed)) def HasMinimalKeys(self): 'Return TRUE if our object has the minimal set of keys/values in it' result = 1 for key in self.MinimalKeys: if not self.has_key(key): result = None return result def log(self, args): "Log using each of the supplied logging methods" for logfcn in self._logfunctions: logfcn(string.strip(args)) def debug(self, args): "Log using each of the supplied logging methods" for logfcn in self._logfunctions: if logfcn.name() != "StdErrLog": logfcn("debug: %s" % string.strip(args)) def dump(self): keys = [] for key in self.keys(): keys.append(key) keys.sort() for key in keys: self.debug("Environment["+key+"]:\t"+str(self[key])) def run(self, Scenario, Iterations): if not Scenario: self.log("No scenario was defined") return 1 self.log("Cluster nodes: ") for node in self["nodes"]: self.log(" * %s" % (node)) if not Scenario.SetUp(): return 1 try : Scenario.run(Iterations) except : self.log("Exception by %s" % sys.exc_info()[0]) for logmethod in self["logger"]: traceback.print_exc(50, logmethod) Scenario.summarize() Scenario.TearDown() return 1 #ClusterManager.oprofileSave(Iterations) Scenario.TearDown() Scenario.summarize() if Scenario.Stats["failure"] > 0: return Scenario.Stats["failure"] elif Scenario.Stats["success"] != Iterations: self.log("No failure count but success != requested iterations") return 1 return 0 def __setitem__(self, key, value): '''Since this function gets called whenever we modify the dictionary (object), we can (and do) validate those keys that we know how to validate. For the most part, we know how to validate the "MinimalKeys" elements. ''' # # List of nodes in the system # if key == "nodes": self.Nodes = {} for node in value: # I don't think I need the IP address, etc. but this validates # the node name against /etc/hosts and/or DNS, so it's a # GoodThing(tm). try: self.Nodes[node] = gethostbyname_ex(node) except: print node+" not found in DNS... aborting" raise # # List of Logging Mechanism(s) # elif key == "logger": if len(value) < 1: raise ValueError("Must have at least one logging mechanism") for logger in value: if not callable(logger): raise ValueError("'logger' elements must be callable") self._logfunctions = value # # Cluster Manager Class # elif key == "CMclass": if value and not issubclass(value, ClusterManager): raise ValueError("'CMclass' must be a subclass of" " ClusterManager") # # Initial Random seed... # #elif key == "RandSeed": # if len(value) != 3: # raise ValueError("'Randseed' must be a 3-element list/tuple") # for elem in value: # if not isinstance(elem, types.IntType): # raise ValueError("'Randseed' list must all be ints") self.data[key] = value def IsValidNode(self, node): 'Return TRUE if the given node is valid' return self.Nodes.has_key(node) def __CheckNode(self, node): "Raise a ValueError if the given node isn't valid" if not self.IsValidNode(node): raise ValueError("Invalid node [%s] in CheckNode" % node) def RandomNode(self): '''Choose a random node from the cluster''' return self.RandomGen.choice(self["nodes"]) class Logger: TimeFormat = "%b %d %H:%M:%S\t" def __call__(self, lines): raise ValueError("Abstract class member (__call__)") def write(self, line): return self(line.rstrip()) def writelines(self, lines): for s in lines: self.write(s) return 1 def flush(self): return 1 def isatty(self): return None class SysLog(Logger): # http://docs.python.org/lib/module-syslog.html defaultsource="CTS" map = { "kernel": syslog.LOG_KERN, "user": syslog.LOG_USER, "mail": syslog.LOG_MAIL, "daemon": syslog.LOG_DAEMON, "auth": syslog.LOG_AUTH, "lpr": syslog.LOG_LPR, "news": syslog.LOG_NEWS, "uucp": syslog.LOG_UUCP, "cron": syslog.LOG_CRON, "local0": syslog.LOG_LOCAL0, "local1": syslog.LOG_LOCAL1, "local2": syslog.LOG_LOCAL2, "local3": syslog.LOG_LOCAL3, "local4": syslog.LOG_LOCAL4, "local5": syslog.LOG_LOCAL5, "local6": syslog.LOG_LOCAL6, "local7": syslog.LOG_LOCAL7, } def __init__(self, labinfo): if labinfo.has_key("syslogsource"): self.source=labinfo["syslogsource"] else: self.source=SysLog.defaultsource self.facility="daemon" if labinfo.has_key("SyslogFacility") and labinfo["SyslogFacility"]: if SysLog.map.has_key(labinfo["SyslogFacility"]): self.facility=labinfo["SyslogFacility"] else: raise ValueError("%s: bad syslog facility"%labinfo["SyslogFacility"]) self.facility=SysLog.map[self.facility] syslog.openlog(self.source, 0, self.facility) def setfacility(self, facility): self.facility = facility if SysLog.map.has_key(self.facility): self.facility=SysLog.map[self.facility] syslog.closelog() syslog.openlog(self.source, 0, self.facility) def __call__(self, lines): if isinstance(lines, types.StringType): syslog.syslog(lines) else: for line in lines: syslog.syslog(line) def name(self): return "Syslog" class StdErrLog(Logger): def __init__(self, labinfo): pass def __call__(self, lines): t = time.strftime(Logger.TimeFormat, time.localtime(time.time())) if isinstance(lines, types.StringType): sys.__stderr__.writelines([t, lines, "\n"]) else: for line in lines: sys.__stderr__.writelines([t, line, "\n"]) sys.__stderr__.flush() def name(self): return "StdErrLog" class FileLog(Logger): def __init__(self, labinfo, filename=None): if filename == None: filename=labinfo["LogFileName"] self.logfile=filename import os self.hostname = os.uname()[1]+" " self.source = "CTS: " def __call__(self, lines): fd = open(self.logfile, "a") t = time.strftime(Logger.TimeFormat, time.localtime(time.time())) if isinstance(lines, types.StringType): fd.writelines([t, self.hostname, self.source, lines, "\n"]) else: for line in lines: fd.writelines([t, self.hostname, self.source, line, "\n"]) fd.close() def name(self): return "FileLog" class RemoteExec: '''This is an abstract remote execution class. It runs a command on another machine - somehow. The somehow is up to us. This particular class uses ssh. Most of the work is done by fork/exec of ssh or scp. ''' def __init__(self, Env=None, silent=False): self.Env = Env self.silent = silent # -n: no stdin, -x: no X11, # -o ServerAliveInterval=5 disconnect after 3*5s if the server stops responding self.Command = "ssh -l root -n -x -o ServerAliveInterval=5 -o ConnectTimeout=10 -o TCPKeepAlive=yes -o ServerAliveCountMax=3 " # -B: batch mode, -q: no stats (quiet) self.CpCommand = "scp -B -q" self.OurNode=string.lower(os.uname()[1]) def enable_qarsh(self): # http://nstraz.wordpress.com/2008/12/03/introducing-qarsh/ self.log("Using QARSH for connections to cluster nodes") self.Command = "qarsh -l root" self.CpCommand = "qacp" def _fixcmd(self, cmd): return re.sub("\'", "'\\''", cmd) def _cmd(self, *args): '''Compute the string that will run the given command on the given remote system''' args= args[0] sysname = args[0] command = args[1] #print "sysname: %s, us: %s" % (sysname, self.OurNode) if sysname == None or string.lower(sysname) == self.OurNode or sysname == "localhost": ret = command else: ret = self.Command + " " + sysname + " '" + self._fixcmd(command) + "'" #print ("About to run %s\n" % ret) return ret def log(self, args): if not self.silent: if not self.Env: print (args) else: self.Env.log(args) def debug(self, args): if not self.silent: if not self.Env: print (args) else: self.Env.debug(args) def __call__(self, node, command, stdout=0, synchronous=1, silent=False, blocking=True): '''Run the given command on the given remote system If you call this class like a function, this is the function that gets called. It just runs it roughly as though it were a system() call on the remote machine. The first argument is name of the machine to run it on. ''' rc = 0 result = None if not synchronous: proc = Popen(self._cmd([node, command]), stdout = PIPE, stderr = PIPE, close_fds = True, shell = True) if not silent: self.debug("cmd: async: target=%s, rc=%d: %s" % (node, proc.pid, command)) if proc.pid > 0: return 0 return -1 proc = Popen(self._cmd([node, command]), stdout = PIPE, stderr = PIPE, close_fds = True, shell = True) #if not blocking: # fcntl.fcntl(proc.stdout.fileno(), fcntl.F_SETFL, os.O_NONBLOCK) if proc.stdout: if stdout == 1: result = proc.stdout.readline() else: result = proc.stdout.readlines() proc.stdout.close() else: self.log("No stdout stream") rc = proc.wait() if not silent: self.debug("cmd: target=%s, rc=%d: %s" % (node, rc, command)) if stdout == 1: return result if proc.stderr: errors = proc.stderr.readlines() proc.stderr.close() if not silent: for err in errors: self.debug("cmd: stderr: %s" % err) if stdout == 0: if not silent and result: for line in result: self.debug("cmd: stdout: %s" % line) return rc return (rc, result) def cp(self, source, target, silent=False): '''Perform a remote copy''' cpstring = self.CpCommand + " \'" + source + "\'" + " \'" + target + "\'" rc = os.system(cpstring) if not silent: self.debug("cmd: rc=%d: %s" % (rc, cpstring)) return rc has_log_watcher = {} log_watcher_bin = "/tmp/cts_log_watcher.py" log_watcher = """ import sys, os, fcntl ''' Remote logfile reader for CTS Reads a specified number of lines from the supplied offset Returns the current offset Contains logic for handling truncation ''' limit = 0 offset = 0 prefix = '' filename = '/var/log/messages' skipthis=None args=sys.argv[1:] for i in range(0, len(args)): if skipthis: skipthis=None continue elif args[i] == '-l' or args[i] == '--limit': skipthis=1 limit = int(args[i+1]) elif args[i] == '-f' or args[i] == '--filename': skipthis=1 filename = args[i+1] elif args[i] == '-o' or args[i] == '--offset': skipthis=1 offset = args[i+1] elif args[i] == '-p' or args[i] == '--prefix': skipthis=1 prefix = args[i+1] logfile=open(filename, 'r') logfile.seek(0, os.SEEK_END) newsize=logfile.tell() if offset != 'EOF': offset = int(offset) if newsize >= offset: logfile.seek(offset) else: print prefix + ('File truncated from %d to %d' % (offset, newsize)) if (newsize*1.05) < offset: logfile.seek(0) # else: we probably just lost a few logs after a fencing op # continue from the new end # TODO: accept a timestamp and discard all messages older than it # Don't block when we reach EOF fcntl.fcntl(logfile.fileno(), fcntl.F_SETFL, os.O_NONBLOCK) count = 0 while True: if logfile.tell() > newsize: break elif limit and count >= limit: break line = logfile.readline() if not line: break print line.strip() count += 1 print prefix + 'Last read: %d, limit=%d, count=%d' % (logfile.tell(), limit, count) logfile.close() """ class SearchObj: def __init__(self, Env, filename, host=None): self.Env = Env self.host = host self.filename = filename self.cache = [] self.offset = "EOF" if host == None: host = "localhost" global has_log_watcher if not has_log_watcher.has_key(host): global log_watcher global log_watcher_bin self.debug("Installing %s on %s" % (log_watcher_bin, host)) self.Env.rsh(host, '''echo "%s" > %s''' % (log_watcher, log_watcher_bin), silent=True) has_log_watcher[host] = 1 self.next() def __str__(self): if self.host: return "%s:%s" % (self.host, self.filename) return self.filename def log(self, args): message = "lw: %s: %s" % (self, args) if not self.Env: print (message) else: self.Env.log(message) def debug(self, args): message = "lw: %s: %s" % (self, args) if not self.Env: print (message) else: self.Env.debug(message) def next(self): cache = [] if not len(self.cache): global log_watcher_bin (rc, lines) = self.Env.rsh( self.host, "python %s -p CTSwatcher: -f %s -o %s" % (log_watcher_bin, self.filename, self.offset), stdout=None, silent=True, blocking=False) for line in lines: match = re.search("^CTSwatcher:Last read: (\d+)", line) if match: last_offset = self.offset self.offset = match.group(1) #if last_offset == "EOF": self.debug("Got %d lines, new offset: %s" % (len(lines), self.offset)) elif re.search("^CTSwatcher:.*truncated", line): self.log(line) elif re.search("^CTSwatcher:", line): self.debug("Got control line: "+ line) else: cache.append(line) return cache class LogWatcher(RemoteExec): '''This class watches logs for messages that fit certain regular expressions. Watching logs for events isn't the ideal way to do business, but it's better than nothing :-) On the other hand, this class is really pretty cool ;-) The way you use this class is as follows: Construct a LogWatcher object Call setwatch() when you want to start watching the log Call look() to scan the log looking for the patterns ''' def __init__(self, Env, log, regexes, name="Anon", timeout=10, debug_level=None, silent=False): '''This is the constructor for the LogWatcher class. It takes a log name to watch, and a list of regular expressions to watch for." ''' RemoteExec.__init__(self, Env) # Validate our arguments. Better sooner than later ;-) for regex in regexes: assert re.compile(regex) self.name = name self.regexes = regexes self.filename = log self.debug_level = debug_level self.whichmatch = -1 self.unmatched = None self.file_list = [] self.line_cache = [] if not silent: for regex in self.regexes: self.debug("Looking for regex: "+regex) self.Timeout = int(timeout) self.returnonlymatch = None def debug(self, args): message = "lw: %s: %s" % (self.name, args) if not self.Env: print (message) else: self.Env.debug(message) def setwatch(self): '''Mark the place to start watching the log from. ''' if self.Env["LogWatcher"] == "remote": for node in self.Env["nodes"]: self.file_list.append(SearchObj(self.Env, self.filename, node)) else: self.file_list.append(SearchObj(self.Env, self.filename)) def __del__(self): if self.debug_level > 1: self.debug("Destroy") def ReturnOnlyMatch(self, onlymatch=1): '''Specify one or more subgroups of the match to return rather than the whole string http://www.python.org/doc/2.5.2/lib/match-objects.html ''' self.returnonlymatch = onlymatch def __get_lines(self): if not len(self.file_list): raise ValueError("No sources to read from") for f in self.file_list: lines = f.next() if len(lines): self.line_cache.extend(lines) def look(self, timeout=None, silent=False): '''Examine the log looking for the given patterns. It starts looking from the place marked by setwatch(). This function looks in the file in the fashion of tail -f. It properly recovers from log file truncation, but not from removing and recreating the log. It would be nice if it recovered from this as well :-) We return the first line which matches any of our patterns. ''' if timeout == None: timeout = self.Timeout lines=0 begin=time.time() end=begin+timeout+1 if self.debug_level > 2: self.debug("starting single search: timeout=%d, begin=%d, end=%d" % (timeout, begin, end)) self.__get_lines() while True: if len(self.line_cache): lines += 1 line = self.line_cache[0] self.line_cache.remove(line) which=-1 if re.search("CTS:", line): continue if self.debug_level > 2: self.debug("Processing: "+ line) for regex in self.regexes: which=which+1 if self.debug_level > 2: self.debug("Comparing line to: "+ regex) #matchobj = re.search(string.lower(regex), string.lower(line)) matchobj = re.search(regex, line) if matchobj: self.whichmatch=which if self.returnonlymatch: return matchobj.group(self.returnonlymatch) else: self.debug("Matched: "+line) if self.debug_level > 1: self.debug("With: "+ regex) return line elif timeout > 0 and end > time.time(): time.sleep(1) self.__get_lines() elif timeout > 0: # Grab any relevant messages that might have arrived since # the last time the buffer was populated self.__get_lines() # Don't come back here again timeout = 0 else: self.debug("Single search terminated: start=%d, end=%d, now=%d, lines=%d" % (begin, end, time.time(), lines)) return None self.debug("How did we get here") return None def lookforall(self, timeout=None, allow_multiple_matches=None, silent=False): '''Examine the log looking for ALL of the given patterns. It starts looking from the place marked by setwatch(). We return when the timeout is reached, or when we have found ALL of the regexes that were part of the watch ''' if timeout == None: timeout = self.Timeout save_regexes = self.regexes returnresult = [] if not silent: self.debug("starting search: timeout=%d" % timeout) for regex in self.regexes: if self.debug_level > 2: self.debug("Looking for regex: "+regex) while (len(self.regexes) > 0): oneresult = self.look(timeout) if not oneresult: self.unmatched = self.regexes self.matched = returnresult self.regexes = save_regexes return None returnresult.append(oneresult) if not allow_multiple_matches: del self.regexes[self.whichmatch] else: # Allow multiple regexes to match a single line tmp_regexes = self.regexes self.regexes = [] which = 0 for regex in tmp_regexes: matchobj = re.search(regex, oneresult) if not matchobj: self.regexes.append(regex) self.unmatched = None self.matched = returnresult self.regexes = save_regexes return returnresult class NodeStatus: def __init__(self, Env): self.Env = Env def IsNodeBooted(self, node): '''Return TRUE if the given node is booted (responds to pings)''' return self.Env.rsh("localhost", "ping -nq -c1 -w1 %s" % node, silent=True) == 0 def IsSshdUp(self, node): rc = self.Env.rsh(node, "true", silent=True) return rc == 0 def WaitForNodeToComeUp(self, node, Timeout=300): '''Return TRUE when given node comes up, or None/FALSE if timeout''' timeout=Timeout anytimeouts=0 while timeout > 0: if self.IsNodeBooted(node) and self.IsSshdUp(node): if anytimeouts: # Fudge to wait for the system to finish coming up time.sleep(30) self.Env.debug("Node %s now up" % node) return 1 time.sleep(30) if (not anytimeouts): self.Env.debug("Waiting for node %s to come up" % node) anytimeouts=1 timeout = timeout - 1 self.Env.log("%s did not come up within %d tries" % (node, Timeout)) answer = raw_input('Continue? [nY]') if answer and answer == "n": raise ValueError("%s did not come up within %d tries" % (node, Timeout)) def WaitForAllNodesToComeUp(self, nodes, timeout=300): '''Return TRUE when all nodes come up, or FALSE if timeout''' for node in nodes: if not self.WaitForNodeToComeUp(node, timeout): return None return 1 class ClusterManager(UserDict): '''The Cluster Manager class. This is an subclass of the Python dictionary class. (this is because it contains lots of {name,value} pairs, not because it's behavior is that terribly similar to a dictionary in other ways.) This is an abstract class which class implements high-level operations on the cluster and/or its cluster managers. Actual cluster managers classes are subclassed from this type. One of the things we do is track the state we think every node should be in. ''' def __InitialConditions(self): #if os.geteuid() != 0: # raise ValueError("Must Be Root!") None def _finalConditions(self): for key in self.keys(): if self[key] == None: raise ValueError("Improper derivation: self[" + key + "] must be overridden by subclass.") def __init__(self, Environment, randseed=None): self.Env = Environment self.__InitialConditions() self.clear_cache = 0 self.TestLoggingLevel=0 self.data = { "up" : "up", # Status meaning up "down" : "down", # Status meaning down "StonithCmd" : "stonith -t baytech -p '10.10.10.100 admin admin' %s", "DeadTime" : 30, # Max time to detect dead node... "StartTime" : 90, # Max time to start up # # These next values need to be overridden in the derived class. # "Name" : None, "StartCmd" : None, "StopCmd" : None, "StatusCmd" : None, #"RereadCmd" : None, "BreakCommCmd" : None, "FixCommCmd" : None, #"TestConfigDir" : None, "LogFileName" : None, #"Pat:Master_started" : None, #"Pat:Slave_started" : None, "Pat:We_stopped" : None, "Pat:They_stopped" : None, "BadRegexes" : None, # A set of "bad news" regexes # to apply to the log } self.rsh = self.Env.rsh self.ShouldBeStatus={} self.ns = NodeStatus(self.Env) self.OurNode=string.lower(os.uname()[1]) def key_for_node(self, node): return node def errorstoignore(self): '''Return list of errors which are 'normal' and should be ignored''' return [] def log(self, args): self.Env.log(args) def debug(self, args): self.Env.debug(args) def prepare(self): '''Finish the Initialization process. Prepare to test...''' for node in self.Env["nodes"]: if self.StataCM(node): self.ShouldBeStatus[node]="up" else: self.ShouldBeStatus[node]="down" self.unisolate_node(node) def upcount(self): '''How many nodes are up?''' count=0 for node in self.Env["nodes"]: if self.ShouldBeStatus[node]=="up": count=count+1 return count def install_helper(self, filename, nodes=None): file_with_path="%s/%s" % (CTSvars.CTS_home, filename) if not nodes: nodes = self.Env["nodes"] self.debug("Installing %s to %s on %s" % (filename, CTSvars.CTS_home, repr(self.Env["nodes"]))) for node in nodes: self.rsh(node, "mkdir -p %s" % CTSvars.CTS_home) self.rsh.cp(file_with_path, "root@%s:%s" % (node, file_with_path)) return file_with_path def install_config(self, node): return None def clear_all_caches(self): if self.clear_cache: for node in self.Env["nodes"]: if self.ShouldBeStatus[node] == "down": self.debug("Removing cache file on: "+node) self.rsh(node, "rm -f "+CTSvars.HA_VARLIBHBDIR+"/hostcache") else: self.debug("NOT Removing cache file on: "+node) def prepare_fencing_watcher(self, node): # If we don't have quorum now but get it as a result of starting this node, # then a bunch of nodes might get fenced if self.HasQuorum(None): return None if not self.has_key("Pat:They_fenced"): return None if not self.has_key("Pat:They_fenced_offset"): return None stonith = None stonithPats = [] for peer in self.Env["nodes"]: if peer != node and self.ShouldBeStatus[peer] != "up": stonithPats.append(self["Pat:They_fenced"] % peer) # Look for STONITH ops, depending on Env["at-boot"] we might need to change the nodes status stonith = LogWatcher(self.Env, self["LogFileName"], stonithPats, "StartaCM", 0) stonith.setwatch() return stonith def fencing_cleanup(self, node, stonith): peer_list = [] # If we just started a node, we may now have quorum (and permission to fence) # Make sure everyone is online before continuing self.ns.WaitForAllNodesToComeUp(self.Env["nodes"]) if not stonith: return peer_list if not self.HasQuorum(None) and len(self.Env["nodes"]) > 2: # We didn't gain quorum - we shouldn't have shot anyone return peer_list # Now see if any states need to be updated self.debug("looking for: " + repr(stonith.regexes)) shot = stonith.look(0) while shot: line = repr(shot) self.debug("Found: "+ line) # Extract node name start = line.find(self["Pat:They_fenced_offset"]) + len(self["Pat:They_fenced_offset"]) peer = line[start:].split("' ")[0] self.debug("Found peer: "+ peer) peer_list.append(peer) self.ShouldBeStatus[peer]="down" self.log(" Peer %s was fenced as a result of %s starting" % (peer, node)) # Get the next one shot = stonith.look(60) # Poll until it comes up if self.Env["at-boot"]: if not self.StataCM(peer): time.sleep(self["StartTime"]) if not self.StataCM(peer): self.log("ERROR: Peer %s failed to restart after being fenced" % peer) return None self.ShouldBeStatus[peer]="up" return peer_list def StartaCM(self, node, verbose=False): '''Start up the cluster manager on a given node''' if verbose: self.log("Starting %s on node %s" %(self["Name"], node)) else: self.debug("Starting %s on node %s" %(self["Name"], node)) ret = 1 if not self.ShouldBeStatus.has_key(node): self.ShouldBeStatus[node] = "down" if self.ShouldBeStatus[node] != "down": return 1 patterns = [] # Technically we should always be able to notice ourselves starting patterns.append(self["Pat:Local_started"] % node) if self.upcount() == 0: patterns.append(self["Pat:Master_started"] % node) else: patterns.append(self["Pat:Slave_started"] % node) watch = LogWatcher( self.Env, self["LogFileName"], patterns, "StartaCM", self["StartTime"]+10) watch.setwatch() self.install_config(node) self.ShouldBeStatus[node] = "any" if self.StataCM(node) and self.cluster_stable(self["DeadTime"]): self.log ("%s was already started" %(node)) return 1 # Clear out the host cache so autojoin can be exercised if self.clear_cache: self.debug("Removing cache file on: "+node) self.rsh(node, "rm -f "+CTSvars.HA_VARLIBHBDIR+"/hostcache") if not(self.Env["valgrind-tests"]): startCmd = self["StartCmd"] else: if self.Env["valgrind-prefix"]: prefix = self.Env["valgrind-prefix"] else: prefix = "cts" startCmd = """G_SLICE=always-malloc HA_VALGRIND_ENABLED='%s' VALGRIND_OPTS='%s --log-file=/tmp/%s-%s.valgrind' %s""" % ( self.Env["valgrind-procs"], self.Env["valgrind-opts"], prefix, """%p""", self["StartCmd"]) stonith = self.prepare_fencing_watcher(node) if self.rsh(node, startCmd) != 0: self.log ("Warn: Start command failed on node %s" %(node)) return None self.ShouldBeStatus[node]="up" watch_result = watch.lookforall() self.fencing_cleanup(node, stonith) if watch.unmatched: for regex in watch.unmatched: self.log ("Warn: Startup pattern not found: %s" %(regex)) if watch_result and self.cluster_stable(self["DeadTime"]): #self.debug("Found match: "+ repr(watch_result)) return 1 elif self.StataCM(node) and self.cluster_stable(self["DeadTime"]): return 1 self.log ("Warn: Start failed for node %s" %(node)) return None def StartaCMnoBlock(self, node, verbose=False): '''Start up the cluster manager on a given node with none-block mode''' if verbose: self.log("Starting %s on node %s" %(self["Name"], node)) else: self.debug("Starting %s on node %s" %(self["Name"], node)) # Clear out the host cache so autojoin can be exercised if self.clear_cache: self.debug("Removing cache file on: "+node) self.rsh(node, "rm -f "+CTSvars.HA_VARLIBHBDIR+"/hostcache") if not(self.Env["valgrind-tests"]): startCmd = self["StartCmd"] else: if self.Env["valgrind-prefix"]: prefix = self.Env["valgrind-prefix"] else: prefix = "cts" startCmd = """G_SLICE=always-malloc HA_VALGRIND_ENABLED='%s' VALGRIND_OPTS='%s --log-file=/tmp/%s-%s.valgrind' %s""" % ( self.Env["valgrind-procs"], self.Env["valgrind-opts"], prefix, """%p""", self["StartCmd"]) self.rsh(node, startCmd, synchronous=0) self.ShouldBeStatus[node]="up" return 1 def StopaCM(self, node, verbose=False): '''Stop the cluster manager on a given node''' if verbose: self.log("Stopping %s on node %s" %(self["Name"], node)) else: self.debug("Stopping %s on node %s" %(self["Name"], node)) if self.ShouldBeStatus[node] != "up": return 1 if self.rsh(node, self["StopCmd"]) == 0: # Make sure we can continue even if corosync leaks self.rsh(node, "rm -f /dev/shm/fdata-*") self.ShouldBeStatus[node]="down" self.cluster_stable(self["DeadTime"]) return 1 else: self.log ("Could not stop %s on node %s" %(self["Name"], node)) return None def StopaCMnoBlock(self, node): '''Stop the cluster manager on a given node with none-block mode''' self.debug("Stopping %s on node %s" %(self["Name"], node)) self.rsh(node, self["StopCmd"], synchronous=0) self.ShouldBeStatus[node]="down" return 1 def cluster_stable(self, timeout = None): time.sleep(self["StableTime"]) return 1 def node_stable(self, node): return 1 def RereadCM(self, node): '''Force the cluster manager on a given node to reread its config This may be a no-op on certain cluster managers. ''' rc=self.rsh(node, self["RereadCmd"]) if rc == 0: return 1 else: self.log ("Could not force %s on node %s to reread its config" % (self["Name"], node)) return None def StataCM(self, node): '''Report the status of the cluster manager on a given node''' out=self.rsh(node, self["StatusCmd"], 1) ret= (string.find(out, 'stopped') == -1) try: if ret: if self.ShouldBeStatus[node] == "down": self.log( "Node status for %s is %s but we think it should be %s" % (node, "up", self.ShouldBeStatus[node])) else: if self.ShouldBeStatus[node] == "up": self.log( "Node status for %s is %s but we think it should be %s" % (node, "down", self.ShouldBeStatus[node])) except KeyError: pass if ret: self.ShouldBeStatus[node]="up" else: self.ShouldBeStatus[node]="down" return ret def startall(self, nodelist=None, verbose=False): '''Start the cluster manager on every node in the cluster. We can do it on a subset of the cluster if nodelist is not None. ''' ret = 1 map = {} if not nodelist: nodelist=self.Env["nodes"] for node in nodelist: if self.ShouldBeStatus[node] == "down": if not self.StartaCM(node, verbose=verbose): ret = 0 return ret def stopall(self, nodelist=None, verbose=False): '''Stop the cluster managers on every node in the cluster. We can do it on a subset of the cluster if nodelist is not None. ''' ret = 1 map = {} if not nodelist: nodelist=self.Env["nodes"] for node in self.Env["nodes"]: if self.ShouldBeStatus[node] == "up": if not self.StopaCM(node, verbose=verbose): ret = 0 return ret def rereadall(self, nodelist=None): '''Force the cluster managers on every node in the cluster to reread their config files. We can do it on a subset of the cluster if nodelist is not None. ''' map = {} if not nodelist: nodelist=self.Env["nodes"] for node in self.Env["nodes"]: if self.ShouldBeStatus[node] == "up": self.RereadCM(node) def statall(self, nodelist=None): '''Return the status of the cluster managers in the cluster. We can do it on a subset of the cluster if nodelist is not None. ''' result={} if not nodelist: nodelist=self.Env["nodes"] for node in nodelist: if self.StataCM(node): result[node] = "up" else: result[node] = "down" return result def isolate_node(self, target, nodes=None): '''isolate the communication between the nodes''' if not nodes: nodes = self.Env["nodes"] for node in nodes: if node != target: rc = self.rsh(target, self["BreakCommCmd"] % self.key_for_node(node)) if rc != 0: self.log("Could not break the communication between %s and %s: %d" % (target, node, rc)) return None else: self.debug("Communication cut between %s and %s" % (target, node)) return 1 def unisolate_node(self, target, nodes=None): '''fix the communication between the nodes''' if not nodes: nodes = self.Env["nodes"] for node in nodes: if node != target: restored = 0 # Limit the amount of time we have asynchronous connectivity for # Restore both sides as simultaneously as possible self.rsh(target, self["FixCommCmd"] % self.key_for_node(node), synchronous=0) self.rsh(node, self["FixCommCmd"] % self.key_for_node(target), synchronous=0) self.debug("Communication restored between %s and %s" % (target, node)) def reducecomm_node(self,node): '''reduce the communication between the nodes''' rc = self.rsh(node, self["ReduceCommCmd"]%(self.Env["XmitLoss"],self.Env["RecvLoss"])) if rc == 0: return 1 else: self.log("Could not reduce the communication between the nodes from node: %s" % node) return None def restorecomm_node(self,node): '''restore the saved communication between the nodes''' rc = 0 if float(self.Env["XmitLoss"])!=0 or float(self.Env["RecvLoss"])!=0 : rc = self.rsh(node, self["RestoreCommCmd"]); if rc == 0: return 1 else: self.log("Could not restore the communication between the nodes from node: %s" % node) return None def HasQuorum(self, node_list): "Return TRUE if the cluster currently has quorum" # If we are auditing a partition, then one side will # have quorum and the other not. # So the caller needs to tell us which we are checking # If no value for node_list is specified... assume all nodes raise ValueError("Abstract Class member (HasQuorum)") def Components(self): raise ValueError("Abstract Class member (Components)") def oprofileStart(self, node=None): if not node: for n in self.Env["oprofile"]: self.oprofileStart(n) elif node in self.Env["oprofile"]: self.debug("Enabling oprofile on %s" % node) self.rsh(node, "opcontrol --init") self.rsh(node, "opcontrol --setup --no-vmlinux --separate=lib --callgraph=20 --image=all") self.rsh(node, "opcontrol --start") self.rsh(node, "opcontrol --reset") def oprofileSave(self, test, node=None): if not node: for n in self.Env["oprofile"]: self.oprofileSave(test, n) elif node in self.Env["oprofile"]: self.rsh(node, "opcontrol --dump") self.rsh(node, "opcontrol --save=cts.%d" % test) # Read back with: opreport -l session:cts.0 image:/usr/lib/heartbeat/c* if None: self.rsh(node, "opcontrol --reset") else: self.oprofileStop(node) self.oprofileStart(node) def oprofileStop(self, node=None): if not node: for n in self.Env["oprofile"]: self.oprofileStop(n) elif node in self.Env["oprofile"]: self.debug("Stopping oprofile on %s" % node) self.rsh(node, "opcontrol --reset") self.rsh(node, "opcontrol --shutdown 2>&1 > /dev/null") class Resource: ''' This is an HA resource (not a resource group). A resource group is just an ordered list of Resource objects. ''' def __init__(self, cm, rsctype=None, instance=None): self.CM = cm self.ResourceType = rsctype self.Instance = instance self.needs_quorum = 1 def Type(self): return self.ResourceType def Instance(self, nodename): return self.Instance def IsRunningOn(self, nodename): ''' This member function returns true if our resource is running on the given node in the cluster. It is analagous to the "status" operation on SystemV init scripts and heartbeat scripts. FailSafe calls it the "exclusive" operation. ''' raise ValueError("Abstract Class member (IsRunningOn)") return None def IsWorkingCorrectly(self, nodename): ''' This member function returns true if our resource is operating correctly on the given node in the cluster. Heartbeat does not require this operation, but it might be called the Monitor operation, which is what FailSafe calls it. For remotely monitorable resources (like IP addresses), they *should* be monitored remotely for testing. ''' raise ValueError("Abstract Class member (IsWorkingCorrectly)") return None def Start(self, nodename): ''' This member function starts or activates the resource. ''' raise ValueError("Abstract Class member (Start)") return None def Stop(self, nodename): ''' This member function stops or deactivates the resource. ''' raise ValueError("Abstract Class member (Stop)") return None def __repr__(self): if (self.Instance and len(self.Instance) > 1): return "{" + self.ResourceType + "::" + self.Instance + "}" else: return "{" + self.ResourceType + "}" class Component: def kill(self, node): None class Process(Component): def __init__(self, cm, name, process=None, dc_only=0, pats=[], dc_pats=[], badnews_ignore=[], triggersreboot=0): self.name = str(name) self.dc_only = dc_only self.pats = pats self.dc_pats = dc_pats self.CM = cm self.badnews_ignore = badnews_ignore self.triggersreboot = triggersreboot if process: self.proc = str(process) else: self.proc = str(name) self.KillCmd = "killall -9 " + self.proc def kill(self, node): if self.CM.rsh(node, self.KillCmd) != 0: self.CM.log ("ERROR: Kill %s failed on node %s" %(self.name,node)) return None return 1 diff --git a/cts/CTSaudits.py b/cts/CTSaudits.py index 5bc15a5f42..5672dd3c2b 100755 --- a/cts/CTSaudits.py +++ b/cts/CTSaudits.py @@ -1,782 +1,782 @@ '''CTS: Cluster Testing System: Audit module ''' __copyright__=''' Copyright (C) 2000, 2001,2005 Alan Robertson Licensed under the GNU GPL. ''' # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. import time, os, string, re import CTS class ClusterAudit: def __init__(self, cm): self.CM = cm def __call__(self): raise ValueError("Abstract Class member (__call__)") def is_applicable(self): '''Return TRUE if we are applicable in the current test configuration''' raise ValueError("Abstract Class member (is_applicable)") return 1 def log(self, args): self.CM.log("audit: %s" % args) def debug(self, args): self.CM.debug("audit: %s" % args) def name(self): raise ValueError("Abstract Class member (name)") AllAuditClasses = [ ] class LogAudit(ClusterAudit): def name(self): return "LogAudit" def __init__(self, cm): self.CM = cm def RestartClusterLogging(self, nodes=None): if not nodes: nodes = self.CM.Env["nodes"] self.CM.debug("Restarting logging on: %s" % repr(nodes)) for node in nodes: cmd="service %s restart 2>&1 > /dev/null" % self.CM.Env["syslogd"] if self.CM.rsh(node, cmd, synchronous=0) != 0: self.CM.log ("ERROR: Cannot restart logging on %s [%s failed]" % (node, cmd)) def TestLogging(self): patterns = [] prefix = "Test message from" watch_syslog = None watch_remote = None for node in self.CM.Env["nodes"]: # Look for the node name in two places to make sure # that syslog is logging with the correct hostname patterns.append("%s.*%s %s" % (node, prefix, node)) watch_pref = self.CM.Env["LogWatcher"] if watch_pref == "any" or watch_pref == "syslog": self.CM.Env["LogWatcher"] = "syslog" if watch_pref == "any": self.CM.log("Testing for %s logs" % self.CM.Env["LogWatcher"]) watch_syslog = CTS.LogWatcher(self.CM.Env, self.CM.Env["LogFileName"], patterns, "LogAudit", 30, silent=True) watch_syslog.setwatch() if watch_pref == "any" or watch_pref == "remote": self.CM.Env["LogWatcher"] = "remote" if watch_pref == "any": self.CM.log("Testing for %s logs" % self.CM.Env["LogWatcher"]) watch_remote = CTS.LogWatcher(self.CM.Env, self.CM.Env["LogFileName"], patterns, "LogAudit", 30, silent=True) watch_remote.setwatch() for node in self.CM.Env["nodes"]: cmd="logger -p %s.info %s %s" % (self.CM.Env["SyslogFacility"], prefix, node) if self.CM.rsh(node, cmd, synchronous=0, silent=True) != 0: self.CM.log ("ERROR: Cannot execute remote command [%s] on %s" % (cmd, node)) if watch_syslog: watch = watch_syslog self.CM.Env["LogWatcher"] = "syslog" watch_result = watch.lookforall(silent=True) if not watch.unmatched: if watch_pref == "any": self.CM.log ("Continuing with %s-based log reader" % (self.CM.Env["LogWatcher"])) return 1 if watch_remote: watch = watch_remote self.CM.Env["LogWatcher"] = "remote" watch_result = watch.lookforall(silent=True) if not watch.unmatched: if watch_pref == "any": self.CM.log ("Continuing with %s-based log reader" % (self.CM.Env["LogWatcher"])) return 1 if watch_syslog and watch_syslog.unmatched: for regex in watch_syslog.unmatched: self.CM.log ("Test message [%s] not found in syslog logs." % regex) if watch_remote and watch_remote.unmatched: for regex in watch_remote.unmatched: self.CM.log ("Test message [%s] not found in remote logs." % regex) return 0 def __call__(self): max=3 attempt=0 self.CM.ns.WaitForAllNodesToComeUp(self.CM.Env["nodes"]) while attempt <= max and self.TestLogging() == 0: attempt = attempt + 1 self.RestartClusterLogging() time.sleep(60*attempt) if attempt > max: self.CM.log("ERROR: Cluster logging unrecoverable.") return 0 return 1 def is_applicable(self): if self.CM.Env["DoBSC"]: return 0 return 1 class DiskAudit(ClusterAudit): def name(self): return "DiskspaceAudit" def __init__(self, cm): self.CM = cm def __call__(self): result=1 dfcmd="df -k /var/log | tail -1 | tr -s ' ' | cut -d' ' -f2" self.CM.ns.WaitForAllNodesToComeUp(self.CM.Env["nodes"]) for node in self.CM.Env["nodes"]: dfout=self.CM.rsh(node, dfcmd, 1) if not dfout: self.CM.log ("ERROR: Cannot execute remote df command [%s] on %s" % (dfcmd, node)) else: try: idfout = int(dfout) except (ValueError, TypeError): self.CM.log("Warning: df output from %s was invalid [%s]" % (node, dfout)) else: if idfout == 0: self.CM.log("CRIT: Completely out of log disk space on %s" % node) result=None elif idfout <= 1000: self.CM.log("WARN: Low on log disk space (%d Mbytes) on %s" % (idfout, node)) return result def is_applicable(self): if self.CM.Env["DoBSC"]: return 0 return 1 class AuditResource: def __init__(self, cm, line): fields = line.split() self.CM = cm self.line = line self.type = fields[1] self.id = fields[2] self.clone_id = fields[3] self.parent = fields[4] self.rprovider = fields[5] self.rclass = fields[6] self.rtype = fields[7] self.host = fields[8] self.needs_quorum = fields[9] self.flags = int(fields[10]) self.flags_s = fields[11] if self.parent == "NA": self.parent = None def unique(self): if self.flags & int("0x00000020", 16): return 1 return 0 def orphan(self): if self.flags & int("0x00000001", 16): return 1 return 0 def managed(self): if self.flags & int("0x00000002", 16): return 1 return 0 class AuditConstraint: def __init__(self, cm, line): fields = line.split() self.CM = cm self.line = line self.type = fields[1] self.id = fields[2] self.rsc = fields[3] self.target = fields[4] self.score = fields[5] self.rsc_role = fields[6] self.target_role = fields[7] if self.rsc_role == "NA": self.rsc_role = None if self.target_role == "NA": self.target_role = None class PrimitiveAudit(ClusterAudit): def name(self): return "PrimitiveAudit" def __init__(self, cm): self.CM = cm def doResourceAudit(self, resource, quorum): rc=1 active = self.CM.ResourceLocation(resource.id) if len(active) == 1: if quorum: self.debug("Resource %s active on %s" % (resource.id, repr(active))) elif resource.needs_quorum == 1: self.CM.log("Resource %s active without quorum: %s" % (resource.id, repr(active))) rc=0 elif not resource.managed(): self.CM.log("Resource %s not managed. Active on %s" % (resource.id, repr(active))) elif not resource.unique(): # TODO: Figure out a clever way to actually audit these resource types if len(active) > 1: self.debug("Non-unique resource %s is active on: %s" % (resource.id, repr(active))) else: self.debug("Non-unique resource %s is not active" % resource.id) elif len(active) > 1: self.CM.log("Resource %s is active multiple times: %s" % (resource.id, repr(active))) rc=0 elif resource.orphan(): self.debug("Resource %s is an inactive orphan" % resource.id) elif len(self.inactive_nodes) == 0: self.CM.log("WARN: Resource %s not served anywhere" % resource.id) rc=0 elif self.CM.Env["warn-inactive"] == 1: if quorum or not resource.needs_quorum: self.CM.log("WARN: Resource %s not served anywhere (Inactive nodes: %s)" % (resource.id, repr(self.inactive_nodes))) else: self.debug("Resource %s not served anywhere (Inactive nodes: %s)" % (resource.id, repr(self.inactive_nodes))) elif quorum or not resource.needs_quorum: self.debug("Resource %s not served anywhere (Inactive nodes: %s)" % (resource.id, repr(self.inactive_nodes))) return rc def setup(self): self.target = None self.resources = [] self.constraints = [] self.active_nodes = [] self.inactive_nodes = [] for node in self.CM.Env["nodes"]: if self.CM.ShouldBeStatus[node] == "up": self.active_nodes.append(node) else: self.inactive_nodes.append(node) for node in self.CM.Env["nodes"]: if self.target == None and self.CM.ShouldBeStatus[node] == "up": self.target = node if not self.target: # TODO: In Pacemaker 1.0 clusters we'll be able to run crm_resource # with CIB_file=/path/to/cib.xml even when the cluster isn't running self.debug("No nodes active - skipping %s" % self.name()) return 0 (rc, lines) = self.CM.rsh(self.target, "crm_resource -c", None) for line in lines: if re.search("^Resource", line): self.resources.append(AuditResource(self.CM, line)) elif re.search("^Constraint", line): self.constraints.append(AuditConstraint(self.CM, line)) else: self.CM.log("Unknown entry: %s" % line); return 1 def __call__(self): rc = 1 if not self.setup(): return 1 quorum = self.CM.HasQuorum(None) for resource in self.resources: if resource.type == "primitive": if self.doResourceAudit(resource, quorum) == 0: rc = 0 return rc def is_applicable(self): if self.CM["Name"] == "crm-lha": return 1 if self.CM["Name"] == "crm-ais": return 1 return 0 class GroupAudit(PrimitiveAudit): def name(self): return "GroupAudit" def __call__(self): rc = 1 if not self.setup(): return 1 for group in self.resources: if group.type == "group": first_match = 1 group_location = None for child in self.resources: if child.parent == group.id: nodes = self.CM.ResourceLocation(child.id) if first_match and len(nodes) > 0: group_location = nodes[0] first_match = 0 if len(nodes) > 1: rc = 0 self.CM.log("Child %s of %s is active more than once: %s" % (child.id, group.id, repr(nodes))) elif len(nodes) == 0: # Groups are allowed to be partially active # However we do need to make sure later children aren't running group_location = None self.debug("Child %s of %s is stopped" % (child.id, group.id)) elif nodes[0] != group_location: rc = 0 self.CM.log("Child %s of %s is active on the wrong node (%s) expected %s" % (child.id, group.id, nodes[0], group_location)) else: self.debug("Child %s of %s is active on %s" % (child.id, group.id, nodes[0])) return rc class CloneAudit(PrimitiveAudit): def name(self): return "CloneAudit" def __call__(self): rc = 1 if not self.setup(): return 1 for clone in self.resources: if clone.type == "clone": for child in self.resources: if child.parent == clone.id and child.type == "primitive": self.debug("Checking child %s of %s..." % (child.id, clone.id)) # Check max and node_max # Obtain with: # crm_resource -g clone_max --meta -r child.id # crm_resource -g clone_node_max --meta -r child.id return rc class ColocationAudit(PrimitiveAudit): def name(self): return "ColocationAudit" def crm_location(self, resource): (rc, lines) = self.CM.rsh(self.target, "crm_resource -W -r %s -Q"%resource, None) hosts = [] if rc == 0: for line in lines: fields = line.split() hosts.append(fields[0]) return hosts def __call__(self): rc = 1 if not self.setup(): return 1 for coloc in self.constraints: if coloc.type == "rsc_colocation": source = self.crm_location(coloc.rsc) target = self.crm_location(coloc.target) if len(source) == 0: self.debug("Colocation audit (%s): %s not running" % (coloc.id, coloc.rsc)) else: for node in source: if not node in target: rc = 0 self.CM.log("Colocation audit (%s): %s running on %s (not in %s)" % (coloc.id, coloc.rsc, node, repr(target))) else: self.debug("Colocation audit (%s): %s running on %s (in %s)" % (coloc.id, coloc.rsc, node, repr(target))) return rc class CrmdStateAudit(ClusterAudit): def __init__(self, cm): self.CM = cm self.Stats = {"calls":0 , "success":0 , "failure":0 , "skipped":0 , "auditfail":0} def has_key(self, key): return self.Stats.has_key(key) def __setitem__(self, key, value): self.Stats[key] = value def __getitem__(self, key): return self.Stats[key] def incr(self, name): '''Increment (or initialize) the value associated with the given name''' if not self.Stats.has_key(name): self.Stats[name]=0 self.Stats[name] = self.Stats[name]+1 def __call__(self): passed = 1 up_are_down = 0 down_are_up = 0 unstable_list = [] for node in self.CM.Env["nodes"]: should_be = self.CM.ShouldBeStatus[node] rc = self.CM.test_node_CM(node) if rc > 0: if should_be == "down": down_are_up = down_are_up + 1 if rc == 1: unstable_list.append(node) elif should_be == "up": up_are_down = up_are_down + 1 if len(unstable_list) > 0: passed = 0 self.CM.log("Cluster is not stable: %d (of %d): %s" %(len(unstable_list), self.CM.upcount(), repr(unstable_list))) if up_are_down > 0: passed = 0 self.CM.log("%d (of %d) nodes expected to be up were down." %(up_are_down, len(self.CM.Env["nodes"]))) if down_are_up > 0: passed = 0 self.CM.log("%d (of %d) nodes expected to be down were up." %(down_are_up, len(self.CM.Env["nodes"]))) return passed def name(self): return "CrmdStateAudit" def is_applicable(self): if self.CM["Name"] == "crm-lha": return 1 if self.CM["Name"] == "crm-ais": return 1 return 0 class CIBAudit(ClusterAudit): def __init__(self, cm): self.CM = cm self.Stats = {"calls":0 , "success":0 , "failure":0 , "skipped":0 , "auditfail":0} def has_key(self, key): return self.Stats.has_key(key) def __setitem__(self, key, value): self.Stats[key] = value def __getitem__(self, key): return self.Stats[key] def incr(self, name): '''Increment (or initialize) the value associated with the given name''' if not self.Stats.has_key(name): self.Stats[name]=0 self.Stats[name] = self.Stats[name]+1 def __call__(self): passed = 1 ccm_partitions = self.CM.find_partitions() if len(ccm_partitions) == 0: self.debug("\tNo partitions to audit") return 1 for partition in ccm_partitions: self.debug("\tAuditing CIB consistency for: %s" %partition) partition_passed = 0 if self.audit_cib_contents(partition) == 0: passed = 0 return passed def audit_cib_contents(self, hostlist): passed = 1 node0 = None node0_xml = None partition_hosts = hostlist.split() for node in partition_hosts: node_xml = self.store_remote_cib(node, node0) if node_xml == None: self.CM.log("Could not perform audit: No configuration from %s" % node) passed = 0 elif node0 == None: node0 = node node0_xml = node_xml elif node0_xml == None: self.CM.log("Could not perform audit: No configuration from %s" % node0) passed = 0 else: (rc, result) = self.CM.rsh( node0, "crm_diff -VV -cf --new %s --original %s" % (node_xml, node0_xml), None) if rc != 0: self.CM.log("Diff between %s and %s failed: %d" % (node0_xml, node_xml, rc)) passed = 0 for line in result: if not re.search("", line): passed = 0 self.debug("CibDiff[%s-%s]: %s" % (node0, node, line)) else: self.debug("CibDiff[%s-%s] Ignoring: %s" % (node0, node, line)) # self.CM.rsh(node0, "rm -f %s" % node_xml) # self.CM.rsh(node0, "rm -f %s" % node0_xml) return passed def store_remote_cib(self, node, target): combined = "" filename="/tmp/ctsaudit.%s.xml" % node if not target: target = node (rc, lines) = self.CM.rsh(node, self.CM["CibQuery"], None) if rc != 0: self.CM.log("Could not retrieve configuration") return None self.CM.rsh("localhost", "rm -f %s" % filename) for line in lines: self.CM.rsh("localhost", "echo \'%s\' >> %s" % (line[:-1], filename), silent=True) if self.CM.rsh.cp(filename, "root@%s:%s" % (target, filename), silent=True) != 0: self.CM.log("Could not store configuration") return None return filename def name(self): return "CibAudit" def is_applicable(self): if self.CM["Name"] == "crm-lha": return 1 if self.CM["Name"] == "crm-ais": return 1 return 0 class PartitionAudit(ClusterAudit): def __init__(self, cm): self.CM = cm self.Stats = {"calls":0 , "success":0 , "failure":0 , "skipped":0 , "auditfail":0} self.NodeEpoche={} self.NodeState={} self.NodeQuorum={} def has_key(self, key): return self.Stats.has_key(key) def __setitem__(self, key, value): self.Stats[key] = value def __getitem__(self, key): return self.Stats[key] def incr(self, name): '''Increment (or initialize) the value associated with the given name''' if not self.Stats.has_key(name): self.Stats[name]=0 self.Stats[name] = self.Stats[name]+1 def __call__(self): passed = 1 ccm_partitions = self.CM.find_partitions() if ccm_partitions == None or len(ccm_partitions) == 0: return 1 self.CM.cluster_stable(double_check=True) if len(ccm_partitions) != self.CM.partitions_expected: self.CM.log("ERROR: %d cluster partitions detected:" %len(ccm_partitions)) passed = 0 for partition in ccm_partitions: self.CM.log("\t %s" %partition) for partition in ccm_partitions: partition_passed = 0 if self.audit_partition(partition) == 0: passed = 0 return passed def trim_string(self, avalue): if not avalue: return None if len(avalue) > 1: return avalue[:-1] def trim2int(self, avalue): if not avalue: return None if len(avalue) > 1: return int(avalue[:-1]) def audit_partition(self, partition): passed = 1 dc_found = [] dc_allowed_list = [] lowest_epoche = None node_list = partition.split() self.debug("Auditing partition: %s" %(partition)) for node in node_list: if self.CM.ShouldBeStatus[node] != "up": self.CM.log("Warn: Node %s appeared out of nowhere" %(node)) self.CM.ShouldBeStatus[node] = "up" # not in itself a reason to fail the audit (not what we're # checking for in this audit) self.NodeState[node] = self.CM.rsh(node, self.CM["StatusCmd"]%node, 1) self.NodeEpoche[node] = self.CM.rsh(node, self.CM["EpocheCmd"], 1) self.NodeQuorum[node] = self.CM.rsh(node, self.CM["QuorumCmd"], 1) self.debug("Node %s: %s - %s - %s." %(node, self.NodeState[node], self.NodeEpoche[node], self.NodeQuorum[node])) self.NodeState[node] = self.trim_string(self.NodeState[node]) self.NodeEpoche[node] = self.trim2int(self.NodeEpoche[node]) self.NodeQuorum[node] = self.trim_string(self.NodeQuorum[node]) if not self.NodeEpoche[node]: self.CM.log("Warn: Node %s dissappeared: cant determin epoche" %(node)) self.CM.ShouldBeStatus[node] = "down" # not in itself a reason to fail the audit (not what we're # checking for in this audit) elif lowest_epoche == None or self.NodeEpoche[node] < lowest_epoche: lowest_epoche = self.NodeEpoche[node] if not lowest_epoche: self.CM.log("Lowest epoche not determined in %s" % (partition)) passed = 0 for node in node_list: if self.CM.ShouldBeStatus[node] == "up": if self.CM.is_node_dc(node, self.NodeState[node]): dc_found.append(node) if self.NodeEpoche[node] == lowest_epoche: self.debug("%s: OK" % node) elif not self.NodeEpoche[node]: self.debug("Check on %s ignored: no node epoche" % node) elif not lowest_epoche: self.debug("Check on %s ignored: no lowest epoche" % node) else: self.CM.log("DC %s is not the oldest node (%d vs. %d)" %(node, self.NodeEpoche[node], lowest_epoche)) passed = 0 if len(dc_found) == 0: self.CM.log("DC not found on any of the %d allowed nodes: %s (of %s)" %(len(dc_allowed_list), str(dc_allowed_list), str(node_list))) elif len(dc_found) > 1: self.CM.log("%d DCs (%s) found in cluster partition: %s" %(len(dc_found), str(dc_found), str(node_list))) passed = 0 if passed == 0: for node in node_list: if self.CM.ShouldBeStatus[node] == "up": self.CM.log("epoche %s : %s" %(self.NodeEpoche[node], self.NodeState[node])) return passed def name(self): return "PartitionAudit" def is_applicable(self): if self.CM["Name"] == "crm-lha": return 1 if self.CM["Name"] == "crm-ais": return 1 return 0 AllAuditClasses.append(DiskAudit) AllAuditClasses.append(LogAudit) AllAuditClasses.append(CrmdStateAudit) AllAuditClasses.append(PartitionAudit) AllAuditClasses.append(PrimitiveAudit) AllAuditClasses.append(GroupAudit) AllAuditClasses.append(CloneAudit) AllAuditClasses.append(ColocationAudit) AllAuditClasses.append(CIBAudit) def AuditList(cm): result = [] for auditclass in AllAuditClasses: a = auditclass(cm) if a.is_applicable(): result.append(a) return result diff --git a/cts/CTSlab.py b/cts/CTSlab.py index 73cce0a4ac..9fd08c7ec5 100755 --- a/cts/CTSlab.py +++ b/cts/CTSlab.py @@ -1,530 +1,530 @@ #!/usr/bin/python '''CTS: Cluster Testing System: Lab environment module ''' __copyright__=''' Copyright (C) 2001,2005 Alan Robertson Licensed under the GNU GPL. ''' # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. from UserDict import UserDict import sys, types, string, string, signal, os, socket pdir=os.path.dirname(sys.path[0]) sys.path.insert(0, pdir) # So that things work from the source directory try: from cts.CTSvars import * from cts.CM_ais import * from cts.CM_lha import crm_lha from cts.CTSaudits import AuditList from cts.CTStests import TestList from cts.CTSscenarios import * except ImportError: sys.stderr.write("abort: couldn't find cts libraries in [%s]\n" % ' '.join(sys.path)) sys.stderr.write("(check your install and PYTHONPATH)\n") # Now do it again to get more details from cts.CTSvars import * from cts.CM_ais import * from cts.CM_lha import crm_lha from cts.CTSaudits import AuditList from cts.CTStests import TestList from cts.CTSscenarios import * sys.exit(-1) cm = None Tests = [] Chosen = [] scenario = None # Not really used, the handler in def sig_handler(signum, frame) : if cm: cm.log("Interrupted by signal %d"%signum) if scenario: scenario.summarize() if signum == 15 : if scenario: scenario.TearDown() sys.exit(1) class LabEnvironment(CtsLab): def __init__(self): CtsLab.__init__(self) # Get a random seed for the random number generator. self["DoStandby"] = 1 self["DoFencing"] = 1 self["XmitLoss"] = "0.0" self["RecvLoss"] = "0.0" self["ClobberCIB"] = 0 self["CIBfilename"] = None self["CIBResource"] = 0 self["DoBSC"] = 0 self["use_logd"] = 0 self["oprofile"] = [] self["warn-inactive"] = 0 self["ListTests"] = 0 self["benchmark"] = 0 self["Schema"] = "pacemaker-1.0" self["Stack"] = "openais" self["stonith-type"] = "external/ssh" self["stonith-params"] = "hostlist=all,livedangerously=yes" self["logger"] = ([StdErrLog(self)]) self["loop-minutes"] = 60 self["valgrind-prefix"] = None self["valgrind-procs"] = "cib crmd attrd pengine stonith-ng" self["valgrind-opts"] = """--leak-check=full --show-reachable=yes --trace-children=no --num-callers=25 --gen-suppressions=all --suppressions="""+CTSvars.CTS_home+"""/cts.supp""" #self["valgrind-opts"] = """--trace-children=no --num-callers=25 --gen-suppressions=all --suppressions="""+CTSvars.CTS_home+"""/cts.supp""" self["experimental-tests"] = 0 self["valgrind-tests"] = 0 self["unsafe-tests"] = 1 self["loop-tests"] = 1 self["scenario"] = "random" master = socket.gethostname() # Use the IP where possible to avoid name lookup failures for ip in socket.gethostbyname_ex(master)[2]: if ip != "127.0.0.1": master = ip break; self["cts-master"] = master def usage(arg, status=1): print "Illegal argument " + arg print "usage: " + sys.argv[0] +" [options] number-of-iterations" print "\nCommon options: " print "\t [--at-boot (1|0)], does the cluster software start at boot time" print "\t [--nodes 'node list'], list of cluster nodes separated by whitespace" print "\t [--limit-nodes max], only use the first 'max' cluster nodes supplied with --nodes" print "\t [--stack (heartbeat|ais)], which cluster stack is installed" print "\t [--logfile path], where should the test software look for logs from cluster nodes" print "\t [--outputfile path], optional location for the test software to write logs to" print "\t [--syslog-facility name], which syslog facility should the test software log to" print "\t [--choose testcase-name], run only the named test" print "\t [--list-tests], list the valid tests" print "\t [--benchmark], add the timing information" print "\t " print "Options for release testing: " print "\t [--clobber-cib | -c ] Erase any existing configuration" print "\t [--populate-resources | -r] Generate a sample configuration" print "\t [--test-ip-base ip] Offset for generated IP address resources" print "\t " print "Additional (less common) options: " print "\t [--trunc (truncate logfile before starting)]" print "\t [--xmit-loss lost-rate(0.0-1.0)]" print "\t [--recv-loss lost-rate(0.0-1.0)]" print "\t [--standby (1 | 0 | yes | no)]" print "\t [--fencing (1 | 0 | yes | no)]" print "\t [--stonith (1 | 0 | yes | no | rhcs | lha)]" print "\t [--stonith-type type]" print "\t [--stonith-args name=value]" print "\t [--bsc]" print "\t [--once], run all valid tests once" print "\t [--no-loop-tests], dont run looping/time-based tests" print "\t [--no-unsafe-tests], dont run tests that are unsafe for use with ocfs2/drbd" print "\t [--valgrind-tests], include tests using valgrind" print "\t [--experimental-tests], include experimental tests" print "\t [--oprofile 'node list'], list of cluster nodes to run oprofile on]" print "\t [--qarsh] Use the QARSH backdoor to access nodes instead of SSH" print "\t [--seed random_seed]" print "\t [--set option=value]" sys.exit(status) # # A little test code... # if __name__ == '__main__': Environment = LabEnvironment() rsh = RemoteExec(None, silent=True) NumIter = 0 Version = 1 LimitNodes = 0 TruncateLog = 0 ListTests = 0 HaveSeed = 0 node_list = '' # Set the signal handler signal.signal(15, sig_handler) signal.signal(10, sig_handler) # Process arguments... skipthis=None args=sys.argv[1:] for i in range(0, len(args)): if skipthis: skipthis=None continue elif args[i] == "-l" or args[i] == "--limit-nodes": skipthis=1 LimitNodes = int(args[i+1]) elif args[i] == "-r" or args[i] == "--populate-resources": Environment["CIBResource"] = 1 Environment["ClobberCIB"] = 1 elif args[i] == "-L" or args[i] == "--logfile": skipthis=1 Environment["LogFileName"] = args[i+1] elif args[i] == "--outputfile": skipthis=1 Environment["OutputFile"] = args[i+1] elif args[i] == "--test-ip-base": skipthis=1 Environment["IPBase"] = args[i+1] Environment["CIBResource"] = 1 Environment["ClobberCIB"] = 1 elif args[i] == "--oprofile": skipthis=1 Environment["oprofile"] = args[i+1].split(' ') elif args[i] == "--trunc": Environment["TruncateLog"]=1 elif args[i] == "--list-tests" or args[i] == "--list" : Environment["ListTests"]=1 elif args[i] == "--benchmark": Environment["benchmark"]=1 elif args[i] == "--bsc": Environment["DoBSC"] = 1 Environment["scenario"] = "basic-sanity" elif args[i] == "--qarsh": Environment.rsh.enable_qarsh() elif args[i] == "--fencing": skipthis=1 if args[i+1] == "1" or args[i+1] == "yes": Environment["DoFencing"] = 1 elif args[i+1] == "0" or args[i+1] == "no": Environment["DoFencing"] = 0 else: usage(args[i+1]) elif args[i] == "--stonith": skipthis=1 if args[i+1] == "1" or args[i+1] == "yes": Environment["DoFencing"]=1 elif args[i+1] == "0" or args[i+1] == "no": Environment["DoFencing"]=0 elif args[i+1] == "rhcs": Environment["DoStonith"]=1 Environment["stonith-type"] = "fence_xvm" Environment["stonith-params"] = "pcmk_arg_map=domain:uname" elif args[i+1] == "lha": Environment["DoStonith"]=1 Environment["stonith-type"] = "external/ssh" Environment["stonith-params"] = "hostlist=all,livedangerously=yes" else: usage(args[i+1]) elif args[i] == "--stonith-type": Environment["stonith-type"] = args[i+1] skipthis=1 elif args[i] == "--stonith-args": Environment["stonith-params"] = args[i+1] skipthis=1 elif args[i] == "--standby": skipthis=1 if args[i+1] == "1" or args[i+1] == "yes": Environment["DoStandby"] = 1 elif args[i+1] == "0" or args[i+1] == "no": Environment["DoStandby"] = 0 else: usage(args[i+1]) elif args[i] == "--clobber-cib" or args[i] == "-c": Environment["ClobberCIB"] = 1 elif args[i] == "--cib-filename": skipthis=1 Environment["CIBfilename"] = args[i+1] elif args[i] == "--xmit-loss": try: float(args[i+1]) except ValueError: print ("--xmit-loss parameter should be float") usage(args[i+1]) skipthis=1 Environment["XmitLoss"] = args[i+1] elif args[i] == "--recv-loss": try: float(args[i+1]) except ValueError: print ("--recv-loss parameter should be float") usage(args[i+1]) skipthis=1 Environment["RecvLoss"] = args[i+1] elif args[i] == "--choose": skipthis=1 Chosen.append(args[i+1]) Environment["scenario"] = "sequence" elif args[i] == "--nodes": skipthis=1 node_list = args[i+1].split(' ') elif args[i] == "--syslog-facility" or args[i] == "--facility": skipthis=1 Environment["SyslogFacility"] = args[i+1] elif args[i] == "--seed": skipthis=1 Environment.SeedRandom(args[i+1]) elif args[i] == "--warn-inactive": Environment["warn-inactive"] = 1 elif args[i] == "--schema": skipthis=1 Environment["Schema"] = args[i+1] elif args[i] == "--ais": Environment["Stack"] = "openais" elif args[i] == "--at-boot" or args[i] == "--cluster-starts-at-boot": skipthis=1 if args[i+1] == "1" or args[i+1] == "yes": Environment["at-boot"] = 1 elif args[i+1] == "0" or args[i+1] == "no": Environment["at-boot"] = 0 else: usage(args[i+1]) elif args[i] == "--heartbeat" or args[i] == "--lha": Environment["Stack"] = "heartbeat" elif args[i] == "--hae": Environment["Stack"] = "openais" Environment["Schema"] = "hae" elif args[i] == "--stack": Environment["Stack"] = args[i+1] skipthis=1 elif args[i] == "--once": Environment["scenario"] = "all-once" elif args[i] == "--valgrind-tests": Environment["valgrind-tests"] = 1 elif args[i] == "--no-loop-tests": Environment["loop-tests"] = 0 elif args[i] == "--loop-minutes": skipthis=1 try: Environment["loop-minutes"]=int(args[i+1]) except ValueError: usage(args[i]) elif args[i] == "--no-unsafe-tests": Environment["unsafe-tests"] = 0 elif args[i] == "--experimental-tests": Environment["experimental-tests"] = 1 elif args[i] == "--set": skipthis=1 (name, value) = args[i+1].split('=') Environment[name] = value else: try: NumIter=int(args[i]) except ValueError: usage(args[i]) if Environment["DoBSC"]: NumIter = 2 LimitNodes = 1 Chosen.append("AddResource") Environment["ClobberCIB"] = 1 Environment["CIBResource"] = 0 Environment["logger"].append(FileLog(Environment, Environment["LogFileName"])) elif Environment["OutputFile"]: Environment["logger"].append(FileLog(Environment, Environment["OutputFile"])) elif Environment["SyslogFacility"]: Environment["logger"].append(SysLog(Environment)) if Environment["Stack"] == "heartbeat" or Environment["Stack"] == "lha": Environment["Stack"] = "heartbeat" Environment['CMclass'] = crm_lha elif Environment["Stack"] == "openais" or Environment["Stack"] == "ais" or Environment["Stack"] == "whitetank": Environment["Stack"] = "openais (whitetank)" Environment['CMclass'] = crm_whitetank Environment["use_logd"] = 0 elif Environment["Stack"] == "corosync" or Environment["Stack"] == "cs" or Environment["Stack"] == "flatiron": Environment["Stack"] = "corosync (flatiron)" Environment['CMclass'] = crm_flatiron Environment["use_logd"] = 0 elif Environment["Stack"] == "cman": Environment["Stack"] = "corosync (cman)" Environment['CMclass'] = crm_cman Environment["use_logd"] = 0 elif Environment["Stack"] == "mcp": Environment["Stack"] = "corosync (mcp)" Environment['CMclass'] = crm_mcp Environment["use_logd"] = 0 else: print "Unknown stack: "+Environment["Stack"] sys.exit(1) if len(node_list) < 1: print "No nodes specified!" sys.exit(1) if LimitNodes > 0: if len(node_list) > LimitNodes: print("Limiting the number of nodes configured=%d (max=%d)" %(len(node_list), LimitNodes)) while len(node_list) > LimitNodes: node_list.pop(len(node_list)-1) Environment["nodes"] = node_list discover = random.Random().choice(Environment["nodes"]) # Detect syslog variant if not Environment.has_key("syslogd") or not Environment["syslogd"]: if not Environment.has_key("syslogd") or not Environment["syslogd"]: # SYS-V Environment["syslogd"] = rsh(discover, "chkconfig | grep log.*on | awk '{print $1}' | head -n 1", stdout=1) if not Environment.has_key("syslogd") or not Environment["syslogd"]: # Systemd Environment["syslogd"] = rsh(discover, "systemctl list-units | grep log.*\.service.*active.*running | sed 's:.service.*::'", stdout=1) if not Environment.has_key("syslogd") or not Environment["syslogd"]: Environment["syslogd"] = "rsyslogd" # Detect if the cluster starts at boot if not Environment.has_key("at-boot"): atboot = 0 # SYS-V atboot = atboot or not rsh(discover, "chkconfig | grep -e corosync.*on -e heartbeat.*on -e pacemaker.*on") # Systemd atboot = atboot or not rsh(discover, "systemctl is-enabled heartbeat.service") atboot = atboot or not rsh(discover, "systemctl is-enabled corosync.service") atboot = atboot or not rsh(discover, "systemctl is-enabled pacemaker.service") Environment["at-boot"] = atboot # Try to determinw an offset for IPaddr resources if Environment["CIBResource"] and not Environment.has_key("IPBase"): network=rsh(discover, "ip addr | grep inet | grep -v -e link -e inet6 -e '/32' -e ' lo' | awk '{print $2}'", stdout=1).strip() Environment["IPBase"] = rsh(discover, "nmap -sn %s | grep 'scan report' | sort -u | tail -n 1 | awk '{print $5}'" % network, stdout=1).strip() if not Environment["IPBase"]: Environment["IPBase"] = "127.0.0.10" Environment.log("Could not determine an offset for IPaddr resources. Perhaps nmap is not installed on the nodes.") Environment.log("Defaulting to '%s', use --test-ip-base to override" % Environment["IPBase"]) # Create the Cluster Manager object cm = Environment['CMclass'](Environment) if TruncateLog: Environment.log("Truncating %s" % LogFile) lf = open(LogFile, "w"); if lf != None: lf.truncate(0) lf.close() Audits = AuditList(cm) if Environment["ListTests"] == 1 : Tests = TestList(cm, Audits) Environment.log("Total %d tests"%len(Tests)) for test in Tests : Environment.log(str(test.name)); sys.exit(0) if len(Chosen) == 0: Tests = TestList(cm, Audits) else: for TestCase in Chosen: match = None for test in TestList(cm, Audits): if test.name == TestCase: match = test if not match: usage("--choose: No applicable/valid tests chosen") else: Tests.append(match) # Scenario selection if Environment["scenario"] == "basic-sanity": scenario = RandomTests(cm, [ BasicSanityCheck(Environment) ], Audits, Tests) elif Environment["scenario"] == "all-once": NumIter = len(Tests) scenario = AllOnce( cm, [ InitClusterManager(Environment), PacketLoss(Environment) ], Audits, Tests) elif Environment["scenario"] == "sequence": scenario = Sequence( cm, [ InitClusterManager(Environment), PacketLoss(Environment) ], Audits, Tests) else: scenario = RandomTests( cm, [ InitClusterManager(Environment), PacketLoss(Environment) ], Audits, Tests) Environment.log(">>>>>>>>>>>>>>>> BEGINNING " + repr(NumIter) + " TESTS ") Environment.log("Stack: %s" % Environment["Stack"]) Environment.log("Schema: %s" % Environment["Schema"]) Environment.log("Scenario: %s" % scenario.__doc__) Environment.log("CTS Master: %s" % Environment["cts-master"]) Environment.log("Random Seed: %s" % Environment["RandSeed"]) Environment.log("Syslog variant: %s" % Environment["syslogd"].strip()) Environment.log("System log files: %s" % Environment["LogFileName"]) # Environment.log(" ") if Environment.has_key("IPBase"): Environment.log("Base IP for resources: %s" % Environment["IPBase"]) Environment.log("Cluster starts at boot: %d" % Environment["at-boot"]) Environment.dump() rc = Environment.run(scenario, NumIter) sys.exit(rc) diff --git a/cts/CTStests.py b/cts/CTStests.py index 1ef24f3f37..2b07adb7ef 100644 --- a/cts/CTStests.py +++ b/cts/CTStests.py @@ -1,2244 +1,2244 @@ '''CTS: Cluster Testing System: Tests module There are a few things we want to do here: ''' __copyright__=''' Copyright (C) 2000, 2001 Alan Robertson Licensed under the GNU GPL. Add RecourceRecover testcase Zhao Kai ''' # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. # # SPECIAL NOTE: # # Tests may NOT implement any cluster-manager-specific code in them. # EXTEND the ClusterManager object to provide the base capabilities # the test needs if you need to do something that the current CM classes # do not. Otherwise you screw up the whole point of the object structure # in CTS. # # Thank you. # import time, os, re, types, string, tempfile, sys from stat import * from cts import CTS from cts.CTSaudits import * AllTestClasses = [ ] class CTSTest: ''' A Cluster test. We implement the basic set of properties and behaviors for a generic cluster test. Cluster tests track their own statistics. We keep each of the kinds of counts we track as separate {name,value} pairs. ''' def __init__(self, cm): #self.name="the unnamed test" self.Stats = {"calls":0 , "success":0 , "failure":0 , "skipped":0 , "auditfail":0} # if not issubclass(cm.__class__, ClusterManager): # raise ValueError("Must be a ClusterManager object") self.CM = cm self.Audits = [] self.timeout=120 self.passed = 1 self.is_loop = 0 self.is_unsafe = 0 self.is_experimental = 0 self.is_valgrind = 0 self.benchmark = 0 # which tests to benchmark self.timer = {} # timers def has_key(self, key): return self.Stats.has_key(key) def __setitem__(self, key, value): self.Stats[key] = value def __getitem__(self, key): return self.Stats[key] def log_mark(self, msg): self.CM.debug("MARK: test %s %s %d" % (self.name,msg,time.time())) return def get_timer(self,key = "test"): try: return self.timer[key] except: return 0 def set_timer(self,key = "test"): self.timer[key] = time.time() return self.timer[key] def log_timer(self,key = "test"): elapsed = 0 if key in self.timer: elapsed = time.time() - self.timer[key] s = key == "test" and self.name or "%s:%s" %(self.name,key) self.CM.debug("%s runtime: %.2f" % (s, elapsed)) del self.timer[key] return elapsed def incr(self, name): '''Increment (or initialize) the value associated with the given name''' if not self.Stats.has_key(name): self.Stats[name]=0 self.Stats[name] = self.Stats[name]+1 # Reset the test passed boolean if name == "calls": self.passed = 1 def failure(self, reason="none"): '''Increment the failure count''' self.passed = 0 self.incr("failure") self.CM.log(("Test %s" % self.name).ljust(35) +" FAILED: %s" % reason) return None def success(self): '''Increment the success count''' self.incr("success") return 1 def skipped(self): '''Increment the skipped count''' self.incr("skipped") return 1 def __call__(self, node): '''Perform the given test''' raise ValueError("Abstract Class member (__call__)") self.incr("calls") return self.failure() def audit(self): passed = 1 if len(self.Audits) > 0: for audit in self.Audits: if not audit(): self.CM.log("Internal %s Audit %s FAILED." % (self.name, audit.name())) self.incr("auditfail") passed = 0 return passed def setup(self, node): '''Setup the given test''' return self.success() def teardown(self, node): '''Tear down the given test''' return self.success() def create_watch(self, patterns, timeout, name=None): if not name: name = self.name return CTS.LogWatcher(self.CM.Env, self.CM["LogFileName"], patterns, name, timeout) def local_badnews(self, prefix, watch, local_ignore=[]): errcount = 0 if not prefix: prefix = "LocalBadNews:" ignorelist = [] ignorelist.append(" CTS: ") ignorelist.append(prefix) ignorelist.extend(local_ignore) while errcount < 100: match=watch.look(0) if match: add_err = 1 for ignore in ignorelist: if add_err == 1 and re.search(ignore, match): add_err = 0 if add_err == 1: self.CM.log(prefix + " " + match) errcount=errcount+1 else: break else: self.CM.log("Too many errors!") return errcount def is_applicable(self): return self.is_applicable_common() def is_applicable_common(self): '''Return TRUE if we are applicable in the current test configuration''' #raise ValueError("Abstract Class member (is_applicable)") if self.is_loop and not self.CM.Env["loop-tests"]: return 0 elif self.is_unsafe and not self.CM.Env["unsafe-tests"]: return 0 elif self.is_valgrind and not self.CM.Env["valgrind-tests"]: return 0 elif self.is_experimental and not self.CM.Env["experimental-tests"]: return 0 elif self.CM.Env["benchmark"] and self.benchmark == 0: return 0 return 1 def find_ocfs2_resources(self, node): self.r_o2cb = None self.r_ocfs2 = [] (rc, lines) = self.CM.rsh(node, "crm_resource -c", None) for line in lines: if re.search("^Resource", line): r = AuditResource(self.CM, line) if r.rtype == "o2cb" and r.parent != "NA": self.CM.debug("Found o2cb: %s" % self.r_o2cb) self.r_o2cb = r.parent if re.search("^Constraint", line): c = AuditConstraint(self.CM, line) if c.type == "rsc_colocation" and c.target == self.r_o2cb: self.r_ocfs2.append(c.rsc) self.CM.debug("Found ocfs2 filesystems: %s" % repr(self.r_ocfs2)) return len(self.r_ocfs2) def canrunnow(self, node): '''Return TRUE if we can meaningfully run right now''' return 1 def errorstoignore(self): '''Return list of errors which are 'normal' and should be ignored''' return [] ################################################################### class StopTest(CTSTest): ################################################################### '''Stop (deactivate) the cluster manager on a node''' def __init__(self, cm): CTSTest.__init__(self, cm) self.name="Stop" def __call__(self, node): '''Perform the 'stop' test. ''' self.incr("calls") if self.CM.ShouldBeStatus[node] != "up": return self.skipped() patterns = [] # Technically we should always be able to notice ourselves stopping patterns.append(self.CM["Pat:We_stopped"] % node) #if self.CM.Env["use_logd"]: # patterns.append(self.CM["Pat:Logd_stopped"] % node) # Any active node needs to notice this one left # NOTE: This wont work if we have multiple partitions for other in self.CM.Env["nodes"]: if self.CM.ShouldBeStatus[other] == "up" and other != node: patterns.append(self.CM["Pat:They_stopped"] %(other, self.CM.key_for_node(node))) #self.debug("Checking %s will notice %s left"%(other, node)) watch = self.create_watch(patterns, self.CM["DeadTime"]) watch.setwatch() if node == self.CM.OurNode: self.incr("us") else: if self.CM.upcount() <= 1: self.incr("all") else: self.incr("them") self.CM.StopaCM(node) watch_result = watch.lookforall() failreason=None UnmatchedList = "||" if watch.unmatched: (rc, output) = self.CM.rsh(node, "/bin/ps axf", None) for line in output: self.CM.debug(line) for regex in watch.unmatched: self.CM.log ("ERROR: Shutdown pattern not found: %s" % (regex)) UnmatchedList += regex + "||"; failreason="Missing shutdown pattern" self.CM.cluster_stable(self.CM["DeadTime"]) if not watch.unmatched or self.CM.upcount() == 0: return self.success() if len(watch.unmatched) >= self.CM.upcount(): return self.failure("no match against (%s)" % UnmatchedList) if failreason == None: return self.success() else: return self.failure(failreason) # # We don't register StopTest because it's better when called by # another test... # ################################################################### class StartTest(CTSTest): ################################################################### '''Start (activate) the cluster manager on a node''' def __init__(self, cm, debug=None): CTSTest.__init__(self,cm) self.name="start" self.debug = debug def __call__(self, node): '''Perform the 'start' test. ''' self.incr("calls") if self.CM.upcount() == 0: self.incr("us") else: self.incr("them") if self.CM.ShouldBeStatus[node] != "down": return self.skipped() elif self.CM.StartaCM(node): return self.success() else: return self.failure("Startup %s on node %s failed" %(self.CM["Name"], node)) # # We don't register StartTest because it's better when called by # another test... # ################################################################### class FlipTest(CTSTest): ################################################################### '''If it's running, stop it. If it's stopped start it. Overthrow the status quo... ''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="Flip" self.start = StartTest(cm) self.stop = StopTest(cm) def __call__(self, node): '''Perform the 'Flip' test. ''' self.incr("calls") if self.CM.ShouldBeStatus[node] == "up": self.incr("stopped") ret = self.stop(node) type="up->down" # Give the cluster time to recognize it's gone... time.sleep(self.CM["StableTime"]) elif self.CM.ShouldBeStatus[node] == "down": self.incr("started") ret = self.start(node) type="down->up" else: return self.skipped() self.incr(type) if ret: return self.success() else: return self.failure("%s failure" % type) # Register FlipTest as a good test to run AllTestClasses.append(FlipTest) ################################################################### class RestartTest(CTSTest): ################################################################### '''Stop and restart a node''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="Restart" self.start = StartTest(cm) self.stop = StopTest(cm) self.benchmark = 1 def __call__(self, node): '''Perform the 'restart' test. ''' self.incr("calls") self.incr("node:" + node) ret1 = 1 if self.CM.StataCM(node): self.incr("WasStopped") if not self.start(node): return self.failure("start (setup) failure: "+node) self.set_timer() if not self.stop(node): return self.failure("stop failure: "+node) if not self.start(node): return self.failure("start failure: "+node) return self.success() # Register RestartTest as a good test to run AllTestClasses.append(RestartTest) ################################################################### class StonithdTest(CTSTest): ################################################################### def __init__(self, cm): CTSTest.__init__(self, cm) self.name="Stonithd" self.startall = SimulStartLite(cm) self.benchmark = 1 def __call__(self, node): self.incr("calls") if len(self.CM.Env["nodes"]) < 2: return self.skipped() ret = self.startall(None) if not ret: return self.failure("Setup failed") is_dc = self.CM.is_node_dc(node) watchpats = [] watchpats.append("stonith-ng:.*Operation .* for host '%s' with device .* returned: 0" % node) watchpats.append("tengine_stonith_notify: Peer %s was terminated .*: OK" % node) if is_dc: watchpats.append("tengine_stonith_notify: Target was our leader .*%s" % node) else: watchpats.append("tengine_stonith_callback: .*: OK ") if self.CM.Env["LogWatcher"] != "remote" or not is_dc: # Often remote logs aren't flushed to disk by the time the node is shot, # so we wont be able to find them # Remote syslog doesn't suffer this problem because they're already on # the loghost when the node is shot watchpats.append("Node %s will be fenced because termination was requested" % node) watchpats.append("Scheduling Node %s for STONITH" % node) watchpats.append("Executing .* fencing operation") if self.CM.Env["at-boot"] == 0: self.CM.debug("Expecting %s to stay down" % node) self.CM.ShouldBeStatus[node]="down" else: self.CM.debug("Expecting %s to come up again %d" % (node, self.CM.Env["at-boot"])) watchpats.append("%s crmd: .* S_STARTING -> S_PENDING" % node) watchpats.append("%s crmd: .* S_PENDING -> S_NOT_DC" % node) watch = self.create_watch(watchpats, 30 + self.CM["DeadTime"] + self.CM["StableTime"] + self.CM["StartTime"]) watch.setwatch() self.CM.rsh(node, "crm_attribute --node %s --type status --attr-name terminate --attr-value true" % node) self.set_timer("fence") matched = watch.lookforall() self.log_timer("fence") self.set_timer("reform") if watch.unmatched: self.CM.log("Patterns not found: " + repr(watch.unmatched)) self.CM.debug("Waiting for the cluster to recover") self.CM.cluster_stable() self.CM.debug("Waiting STONITHd node to come back up") self.CM.ns.WaitForAllNodesToComeUp(self.CM.Env["nodes"], 600) self.CM.debug("Waiting for the cluster to re-stabilize with all nodes") is_stable = self.CM.cluster_stable(self.CM["StartTime"]) if not matched: return self.failure("Didn't find all expected patterns") elif not is_stable: return self.failure("Cluster did not become stable") self.log_timer("reform") return self.success() def errorstoignore(self): return [ "Executing .* fencing operation" ] def is_applicable(self): if not self.is_applicable_common(): return 0 if self.CM.Env.has_key("DoFencing"): return self.CM.Env["DoFencing"] return 1 AllTestClasses.append(StonithdTest) ################################################################### class StartOnebyOne(CTSTest): ################################################################### '''Start all the nodes ~ one by one''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="StartOnebyOne" self.stopall = SimulStopLite(cm) self.start = StartTest(cm) self.ns=CTS.NodeStatus(cm.Env) def __call__(self, dummy): '''Perform the 'StartOnebyOne' test. ''' self.incr("calls") # We ignore the "node" parameter... # Shut down all the nodes... ret = self.stopall(None) if not ret: return self.failure("Test setup failed") failed=[] self.set_timer() for node in self.CM.Env["nodes"]: if not self.start(node): failed.append(node) if len(failed) > 0: return self.failure("Some node failed to start: " + repr(failed)) return self.success() # Register StartOnebyOne as a good test to run AllTestClasses.append(StartOnebyOne) ################################################################### class SimulStart(CTSTest): ################################################################### '''Start all the nodes ~ simultaneously''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="SimulStart" self.stopall = SimulStopLite(cm) self.startall = SimulStartLite(cm) def __call__(self, dummy): '''Perform the 'SimulStart' test. ''' self.incr("calls") # We ignore the "node" parameter... # Shut down all the nodes... ret = self.stopall(None) if not ret: return self.failure("Setup failed") self.CM.clear_all_caches() if not self.startall(None): return self.failure("Startall failed") return self.success() # Register SimulStart as a good test to run AllTestClasses.append(SimulStart) ################################################################### class SimulStop(CTSTest): ################################################################### '''Stop all the nodes ~ simultaneously''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="SimulStop" self.startall = SimulStartLite(cm) self.stopall = SimulStopLite(cm) def __call__(self, dummy): '''Perform the 'SimulStop' test. ''' self.incr("calls") # We ignore the "node" parameter... # Start up all the nodes... ret = self.startall(None) if not ret: return self.failure("Setup failed") if not self.stopall(None): return self.failure("Stopall failed") return self.success() # Register SimulStop as a good test to run AllTestClasses.append(SimulStop) ################################################################### class StopOnebyOne(CTSTest): ################################################################### '''Stop all the nodes in order''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="StopOnebyOne" self.startall = SimulStartLite(cm) self.stop = StopTest(cm) def __call__(self, dummy): '''Perform the 'StopOnebyOne' test. ''' self.incr("calls") # We ignore the "node" parameter... # Start up all the nodes... ret = self.startall(None) if not ret: return self.failure("Setup failed") failed=[] self.set_timer() for node in self.CM.Env["nodes"]: if not self.stop(node): failed.append(node) if len(failed) > 0: return self.failure("Some node failed to stop: " + repr(failed)) self.CM.clear_all_caches() return self.success() # Register StopOnebyOne as a good test to run AllTestClasses.append(StopOnebyOne) ################################################################### class RestartOnebyOne(CTSTest): ################################################################### '''Restart all the nodes in order''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="RestartOnebyOne" self.startall = SimulStartLite(cm) def __call__(self, dummy): '''Perform the 'RestartOnebyOne' test. ''' self.incr("calls") # We ignore the "node" parameter... # Start up all the nodes... ret = self.startall(None) if not ret: return self.failure("Setup failed") did_fail=[] self.set_timer() self.restart = RestartTest(self.CM) for node in self.CM.Env["nodes"]: if not self.restart(node): did_fail.append(node) if did_fail: return self.failure("Could not restart %d nodes: %s" %(len(did_fail), repr(did_fail))) return self.success() # Register StopOnebyOne as a good test to run AllTestClasses.append(RestartOnebyOne) ################################################################### class PartialStart(CTSTest): ################################################################### '''Start a node - but tell it to stop before it finishes starting up''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="PartialStart" self.startall = SimulStartLite(cm) self.stopall = SimulStopLite(cm) self.stop = StopTest(cm) #self.is_unsafe = 1 def __call__(self, node): '''Perform the 'PartialStart' test. ''' self.incr("calls") ret = self.stopall(None) if not ret: return self.failure("Setup failed") # FIXME! This should use the CM class to get the pattern # then it would be applicable in general watchpats = [] watchpats.append("Starting crmd") watch = self.create_watch(watchpats, self.CM["DeadTime"]+10) watch.setwatch() self.CM.StartaCMnoBlock(node) ret = watch.lookforall() if not ret: self.CM.log("Patterns not found: " + repr(watch.unmatched)) return self.failure("Setup of %s failed" % node) ret = self.stop(node) if not ret: return self.failure("%s did not stop in time" % node) return self.success() # Register StopOnebyOne as a good test to run AllTestClasses.append(PartialStart) ####################################################################### class StandbyTest(CTSTest): ####################################################################### def __init__(self, cm): CTSTest.__init__(self,cm) self.name="Standby" self.benchmark = 1 self.start = StartTest(cm) self.startall = SimulStartLite(cm) # make sure the node is active # set the node to standby mode # check resources, none resource should be running on the node # set the node to active mode # check resouces, resources should have been migrated back (SHOULD THEY?) def __call__(self, node): self.incr("calls") ret=self.startall(None) if not ret: return self.failure("Start all nodes failed") self.CM.debug("Make sure node %s is active" % node) if self.CM.StandbyStatus(node) != "off": if not self.CM.SetStandbyMode(node, "off"): return self.failure("can't set node %s to active mode" % node) self.CM.cluster_stable() status = self.CM.StandbyStatus(node) if status != "off": return self.failure("standby status of %s is [%s] but we expect [off]" % (node, status)) self.CM.debug("Getting resources running on node %s" % node) rsc_on_node = self.CM.active_resources(node) watchpats = [] watchpats.append("do_state_transition:.*input=I_PE_SUCCESS") watch = self.create_watch(watchpats, self.CM["DeadTime"]+10) watch.setwatch() self.CM.debug("Setting node %s to standby mode" % node) if not self.CM.SetStandbyMode(node, "on"): return self.failure("can't set node %s to standby mode" % node) self.set_timer("on") ret = watch.lookforall() if not ret: self.CM.log("Patterns not found: " + repr(watch.unmatched)) self.CM.SetStandbyMode(node, "off") return self.failure("cluster didn't react to standby change on %s" % node) self.CM.cluster_stable() status = self.CM.StandbyStatus(node) if status != "on": return self.failure("standby status of %s is [%s] but we expect [on]" % (node, status)) self.log_timer("on") self.CM.debug("Checking resources") bad_run = self.CM.active_resources(node) if len(bad_run) > 0: rc = self.failure("%s set to standby, %s is still running on it" % (node, repr(bad_run))) self.CM.debug("Setting node %s to active mode" % node) self.CM.SetStandbyMode(node, "off") return rc self.CM.debug("Setting node %s to active mode" % node) if not self.CM.SetStandbyMode(node, "off"): return self.failure("can't set node %s to active mode" % node) self.set_timer("off") self.CM.cluster_stable() status = self.CM.StandbyStatus(node) if status != "off": return self.failure("standby status of %s is [%s] but we expect [off]" % (node, status)) self.log_timer("off") return self.success() AllTestClasses.append(StandbyTest) ####################################################################### class ValgrindTest(CTSTest): ####################################################################### '''Check for memory leaks''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="Valgrind" self.stopall = SimulStopLite(cm) self.startall = SimulStartLite(cm) self.is_valgrind = 1 self.is_loop = 1 def setup(self, node): self.incr("calls") ret=self.stopall(None) if not ret: return self.failure("Stop all nodes failed") # Enable valgrind self.logPat = "/tmp/%s-*.valgrind" % self.name self.CM.Env["valgrind-prefix"] = self.name self.CM.rsh(node, "rm -f %s" % self.logPat, None) ret=self.startall(None) if not ret: return self.failure("Start all nodes failed") for node in self.CM.Env["nodes"]: (rc, output) = self.CM.rsh(node, "ps u --ppid `pidofproc aisexec`", None) for line in output: self.CM.debug(line) return self.success() def teardown(self, node): # Disable valgrind self.CM.Env["valgrind-prefix"] = None # Return all nodes to normal ret=self.stopall(None) if not ret: return self.failure("Stop all nodes failed") return self.success() def find_leaks(self): # Check for leaks leaked = [] self.stop = StopTest(self.CM) for node in self.CM.Env["nodes"]: (rc, ps_out) = self.CM.rsh(node, "ps u --ppid `pidofproc aisexec`", None) rc = self.stop(node) if not rc: self.failure("Couldn't shut down %s" % node) rc = self.CM.rsh(node, "grep -e indirectly.*lost:.*[1-9] -e definitely.*lost:.*[1-9] -e ERROR.*SUMMARY:.*[1-9].*errors %s" % self.logPat, 0) if rc != 1: leaked.append(node) self.failure("Valgrind errors detected on %s" % node) for line in ps_out: self.CM.log(line) (rc, output) = self.CM.rsh(node, "grep -e lost: -e SUMMARY: %s" % self.logPat, None) for line in output: self.CM.log(line) (rc, output) = self.CM.rsh(node, "cat %s" % self.logPat, None) for line in output: self.CM.debug(line) self.CM.rsh(node, "rm -f %s" % self.logPat, None) return leaked def __call__(self, node): leaked = self.find_leaks() if len(leaked) > 0: return self.failure("Nodes %s leaked" % repr(leaked)) return self.success() def errorstoignore(self): '''Return list of errors which should be ignored''' return [ """cib:.*readCibXmlFile:""", """HA_VALGRIND_ENABLED""" ] ####################################################################### class StandbyLoopTest(ValgrindTest): ####################################################################### '''Check for memory leaks by putting a node in and out of standby for an hour''' def __init__(self, cm): ValgrindTest.__init__(self,cm) self.name="StandbyLoop" def __call__(self, node): lpc = 0 delay = 2 failed = 0 done=time.time() + self.CM.Env["loop-minutes"]*60 while time.time() <= done and not failed: lpc = lpc + 1 time.sleep(delay) if not self.CM.SetStandbyMode(node, "on"): self.failure("can't set node %s to standby mode" % node) failed = lpc time.sleep(delay) if not self.CM.SetStandbyMode(node, "off"): self.failure("can't set node %s to active mode" % node) failed = lpc leaked = self.find_leaks() if failed: return self.failure("Iteration %d failed" % failed) elif len(leaked) > 0: return self.failure("Nodes %s leaked" % repr(leaked)) return self.success() AllTestClasses.append(StandbyLoopTest) ############################################################################## class BandwidthTest(CTSTest): ############################################################################## # Tests should not be cluster-manager-specific # If you need to find out cluster manager configuration to do this, then # it should be added to the generic cluster manager API. '''Test the bandwidth which heartbeat uses''' def __init__(self, cm): CTSTest.__init__(self, cm) self.name = "Bandwidth" self.start = StartTest(cm) self.__setitem__("min",0) self.__setitem__("max",0) self.__setitem__("totalbandwidth",0) self.tempfile = tempfile.mktemp(".cts") self.startall = SimulStartLite(cm) def __call__(self, node): '''Perform the Bandwidth test''' self.incr("calls") if self.CM.upcount()<1: return self.skipped() Path = self.CM.InternalCommConfig() if "ip" not in Path["mediatype"]: return self.skipped() port = Path["port"][0] port = int(port) ret = self.startall(None) if not ret: return self.failure("Test setup failed") time.sleep(5) # We get extra messages right after startup. fstmpfile = "/var/run/band_estimate" dumpcmd = "tcpdump -p -n -c 102 -i any udp port %d > %s 2>&1" \ % (port, fstmpfile) rc = self.CM.rsh(node, dumpcmd) if rc == 0: farfile = "root@%s:%s" % (node, fstmpfile) self.CM.rsh.cp(farfile, self.tempfile) Bandwidth = self.countbandwidth(self.tempfile) if not Bandwidth: self.CM.log("Could not compute bandwidth.") return self.success() intband = int(Bandwidth + 0.5) self.CM.log("...bandwidth: %d bits/sec" % intband) self.Stats["totalbandwidth"] = self.Stats["totalbandwidth"] + Bandwidth if self.Stats["min"] == 0: self.Stats["min"] = Bandwidth if Bandwidth > self.Stats["max"]: self.Stats["max"] = Bandwidth if Bandwidth < self.Stats["min"]: self.Stats["min"] = Bandwidth self.CM.rsh(node, "rm -f %s" % fstmpfile) os.unlink(self.tempfile) return self.success() else: return self.failure("no response from tcpdump command [%d]!" % rc) def countbandwidth(self, file): fp = open(file, "r") fp.seek(0) count = 0 sum = 0 while 1: line = fp.readline() if not line: return None if re.search("udp",line) or re.search("UDP,", line): count=count+1 linesplit = string.split(line," ") for j in range(len(linesplit)-1): if linesplit[j]=="udp": break if linesplit[j]=="length:": break try: sum = sum + int(linesplit[j+1]) except ValueError: self.CM.log("Invalid tcpdump line: %s" % line) return None T1 = linesplit[0] timesplit = string.split(T1,":") time2split = string.split(timesplit[2],".") time1 = (long(timesplit[0])*60+long(timesplit[1]))*60+long(time2split[0])+long(time2split[1])*0.000001 break while count < 100: line = fp.readline() if not line: return None if re.search("udp",line) or re.search("UDP,", line): count = count+1 linessplit = string.split(line," ") for j in range(len(linessplit)-1): if linessplit[j] =="udp": break if linesplit[j]=="length:": break try: sum=int(linessplit[j+1])+sum except ValueError: self.CM.log("Invalid tcpdump line: %s" % line) return None T2 = linessplit[0] timesplit = string.split(T2,":") time2split = string.split(timesplit[2],".") time2 = (long(timesplit[0])*60+long(timesplit[1]))*60+long(time2split[0])+long(time2split[1])*0.000001 time = time2-time1 if (time <= 0): return 0 return (sum*8)/time def is_applicable(self): '''BandwidthTest never applicable''' return 0 AllTestClasses.append(BandwidthTest) ################################################################### class ResourceRecover(CTSTest): ################################################################### def __init__(self, cm): CTSTest.__init__(self,cm) self.name="ResourceRecover" self.start = StartTest(cm) self.startall = SimulStartLite(cm) self.max=30 self.rid=None #self.is_unsafe = 1 self.benchmark = 1 # these are the values used for the new LRM API call self.action = "asyncmon" self.interval = 0 def __call__(self, node): '''Perform the 'ResourceRecover' test. ''' self.incr("calls") ret = self.startall(None) if not ret: return self.failure("Setup failed") resourcelist = self.CM.active_resources(node) # if there are no resourcelist, return directly if len(resourcelist)==0: self.CM.log("No active resources on %s" % node) return self.skipped() self.rid = self.CM.Env.RandomGen.choice(resourcelist) rsc = None (rc, lines) = self.CM.rsh(node, "crm_resource -c", None) for line in lines: if re.search("^Resource", line): tmp = AuditResource(self.CM, line) if tmp.id == self.rid: rsc = tmp # Handle anonymous clones that get renamed self.rid = rsc.clone_id break if not rsc: return self.failure("Could not find %s in the resource list" % self.rid) self.CM.debug("Shooting %s aka. %s" % (rsc.clone_id, rsc.id)) pats = [] pats.append("Updating failcount for %s on .* after .* %s" % (self.rid, self.action)) if rsc.managed(): pats.append("crmd:.* Performing .* op=%s_stop_0" % self.rid) if rsc.unique(): pats.append("crmd:.* Performing .* op=%s_start_0" % self.rid) pats.append("crmd:.* LRM operation %s_start_0.*confirmed.*ok" % self.rid) else: # Anonymous clones may get restarted with a different clone number pats.append("crmd:.* Performing .* op=.*_start_0") pats.append("crmd:.* LRM operation .*_start_0.*confirmed.*ok") watch = self.create_watch(pats, 60) watch.setwatch() self.CM.rsh(node, "crm_resource -F -r %s -H %s &>/dev/null" % (self.rid, node)) self.set_timer("recover") watch.lookforall() self.log_timer("recover") self.CM.cluster_stable() recovered=self.CM.ResourceLocation(self.rid) if watch.unmatched: return self.failure("Patterns not found: %s" % repr(watch.unmatched)) elif rsc.unique() and len(recovered) > 1: return self.failure("%s is now active on more than one node: %s"%(self.rid, repr(recovered))) elif len(recovered) > 0: self.CM.debug("%s is running on: %s" %(self.rid, repr(recovered))) elif rsc.managed(): return self.failure("%s was not recovered and is inactive" % self.rid) return self.success() def errorstoignore(self): '''Return list of errors which should be ignored''' return [ """Updating failcount for %s""" % self.rid, """LogActions: Recover %s""" % self.rid, """Unknown operation: fail""", """ERROR: sending stonithRA op to stonithd failed.""", """ERROR: process_lrm_event: LRM operation %s_%s_%d""" % (self.rid, self.action, self.interval), """ERROR: process_graph_event: Action %s_%s_%d .* initiated outside of a transition""" % (self.rid, self.action, self.interval), ] AllTestClasses.append(ResourceRecover) ################################################################### class ComponentFail(CTSTest): ################################################################### def __init__(self, cm): CTSTest.__init__(self,cm) self.name="ComponentFail" self.startall = SimulStartLite(cm) self.complist = cm.Components() self.patterns = [] self.okerrpatterns = [] self.is_unsafe = 1 def __call__(self, node): '''Perform the 'ComponentFail' test. ''' self.incr("calls") self.patterns = [] self.okerrpatterns = [] # start all nodes ret = self.startall(None) if not ret: return self.failure("Setup failed") if not self.CM.cluster_stable(self.CM["StableTime"]): return self.failure("Setup failed - unstable") node_is_dc = self.CM.is_node_dc(node, None) # select a component to kill chosen = self.CM.Env.RandomGen.choice(self.complist) while chosen.dc_only == 1 and node_is_dc == 0: chosen = self.CM.Env.RandomGen.choice(self.complist) self.CM.debug("...component %s (dc=%d,boot=%d)" % (chosen.name, node_is_dc,chosen.triggersreboot)) self.incr(chosen.name) if chosen.name != "aisexec": if self.CM["Name"] != "crm-lha" or chosen.name != "pengine": self.patterns.append(self.CM["Pat:ChildKilled"] %(node, chosen.name)) self.patterns.append(self.CM["Pat:ChildRespawn"] %(node, chosen.name)) self.patterns.extend(chosen.pats) if node_is_dc: self.patterns.extend(chosen.dc_pats) # In an ideal world, this next stuff should be in the "chosen" object as a member function if self.CM["Name"] == "crm-lha" and chosen.triggersreboot: # Make sure the node goes down and then comes back up if it should reboot... for other in self.CM.Env["nodes"]: if other != node: self.patterns.append(self.CM["Pat:They_stopped"] %(other, self.CM.key_for_node(node))) self.patterns.append(self.CM["Pat:Slave_started"] % node) self.patterns.append(self.CM["Pat:Local_started"] % node) if chosen.dc_only: # Sometimes these will be in the log, and sometimes they won't... self.okerrpatterns.append("%s crmd:.*Process %s:.* exited" %(node, chosen.name)) self.okerrpatterns.append("%s crmd:.*I_ERROR.*crmdManagedChildDied" %node) self.okerrpatterns.append("%s crmd:.*The %s subsystem terminated unexpectedly" %(node, chosen.name)) self.okerrpatterns.append("ERROR: Client .* exited with return code") else: # Sometimes this won't be in the log... self.okerrpatterns.append(self.CM["Pat:ChildKilled"] %(node, chosen.name)) self.okerrpatterns.append(self.CM["Pat:ChildRespawn"] %(node, chosen.name)) self.okerrpatterns.append(self.CM["Pat:ChildExit"]) # supply a copy so self.patterns doesnt end up empty tmpPats = [] tmpPats.extend(self.patterns) self.patterns.extend(chosen.badnews_ignore) # Look for STONITH ops, depending on Env["at-boot"] we might need to change the nodes status stonithPats = [] stonithPats.append("stonith-ng:.*Operation .* for host '%s' with device .* returned: 0" % node) stonith = self.create_watch(stonithPats, 0) stonith.setwatch() # set the watch for stable watch = self.create_watch( tmpPats, self.CM["DeadTime"] + self.CM["StableTime"] + self.CM["StartTime"]) watch.setwatch() # kill the component chosen.kill(node) # check to see Heartbeat noticed matched = watch.lookforall(allow_multiple_matches=1) if watch.unmatched: self.CM.log("Patterns not found: " + repr(watch.unmatched)) if self.CM.Env["at-boot"] == 0: self.CM.debug("Checking if %s was shot" % node) shot = stonith.look(60) if shot: self.CM.debug("Found: "+ repr(shot)) self.CM.ShouldBeStatus[node]="down" self.CM.debug("Waiting for the cluster to recover") self.CM.cluster_stable() self.CM.debug("Waiting for any STONITHd node to come back up") self.CM.ns.WaitForAllNodesToComeUp(self.CM.Env["nodes"], 600) self.CM.debug("Waiting for the cluster to re-stabilize with all nodes") is_stable = self.CM.cluster_stable(self.CM["StartTime"]) if not matched: return self.failure("Didn't find all expected patterns") elif not is_stable: return self.failure("Cluster did not become stable") return self.success() def errorstoignore(self): '''Return list of errors which should be ignored''' # Note that okerrpatterns refers to the last time we ran this test # The good news is that this works fine for us... self.okerrpatterns.extend(self.patterns) return self.okerrpatterns AllTestClasses.append(ComponentFail) #################################################################### class SplitBrainTest(CTSTest): #################################################################### '''It is used to test split-brain. when the path between the two nodes break check the two nodes both take over the resource''' def __init__(self,cm): CTSTest.__init__(self,cm) self.name = "SplitBrain" self.start = StartTest(cm) self.startall = SimulStartLite(cm) self.is_experimental = 1 def isolate_partition(self, partition): other_nodes = [] other_nodes.extend(self.CM.Env["nodes"]) for node in partition: try: other_nodes.remove(node) except ValueError: self.CM.log("Node "+node+" not in " + repr(self.CM.Env["nodes"]) + " from " +repr(partition)) if len(other_nodes) == 0: return 1 self.CM.debug("Creating partition: " + repr(partition)) self.CM.debug("Everyone else: " + repr(other_nodes)) for node in partition: if not self.CM.isolate_node(node, other_nodes): self.CM.log("Could not isolate %s" % node) return 0 return 1 def heal_partition(self, partition): other_nodes = [] other_nodes.extend(self.CM.Env["nodes"]) for node in partition: try: other_nodes.remove(node) except ValueError: self.CM.log("Node "+node+" not in " + repr(self.CM.Env["nodes"])) if len(other_nodes) == 0: return 1 self.CM.debug("Healing partition: " + repr(partition)) self.CM.debug("Everyone else: " + repr(other_nodes)) for node in partition: self.CM.unisolate_node(node, other_nodes) def __call__(self, node): '''Perform split-brain test''' self.incr("calls") self.passed = 1 partitions = {} ret = self.startall(None) if not ret: return self.failure("Setup failed") while 1: # Retry until we get multiple partitions partitions = {} p_max = len(self.CM.Env["nodes"]) for node in self.CM.Env["nodes"]: p = self.CM.Env.RandomGen.randint(1, p_max) if not partitions.has_key(p): partitions[p]= [] partitions[p].append(node) p_max = len(partitions.keys()) if p_max > 1: break # else, try again self.CM.debug("Created %d partitions" % p_max) for key in partitions.keys(): self.CM.debug("Partition["+str(key)+"]:\t"+repr(partitions[key])) # Disabling STONITH to reduce test complexity for now self.CM.rsh(node, "crm_attribute -n stonith-enabled -v false") for key in partitions.keys(): self.isolate_partition(partitions[key]) count = 30 while count > 0: if len(self.CM.find_partitions()) != p_max: time.sleep(10) else: break else: self.failure("Expected partitions were not created") # Target number of partitions formed - wait for stability if not self.CM.cluster_stable(): self.failure("Partitioned cluster not stable") # Now audit the cluster state self.CM.partitions_expected = p_max if not self.audit(): self.failure("Audits failed") self.CM.partitions_expected = 1 # And heal them again for key in partitions.keys(): self.heal_partition(partitions[key]) # Wait for a single partition to form count = 30 while count > 0: if len(self.CM.find_partitions()) != 1: time.sleep(10) count -= 1 else: break else: self.failure("Cluster did not reform") # Wait for it to have the right number of members count = 30 while count > 0: members = [] partitions = self.CM.find_partitions() if len(partitions) > 0: members = partitions[0].split() if len(members) != len(self.CM.Env["nodes"]): time.sleep(10) count -= 1 else: break else: self.failure("Cluster did not completely reform") # Wait up to 20 minutes - the delay is more preferable than # trying to continue with in a messed up state if not self.CM.cluster_stable(1200): self.failure("Reformed cluster not stable") answer = raw_input('Continue? [nY]') if answer and answer == "n": raise ValueError("Reformed cluster not stable") # Turn fencing back on if self.CM.Env["DoFencing"]: self.CM.rsh(node, "crm_attribute -D -n stonith-enabled") self.CM.cluster_stable() if self.passed: return self.success() return self.failure("See previous errors") def errorstoignore(self): '''Return list of errors which are 'normal' and should be ignored''' return [ "Another DC detected:", "ERROR: attrd_cib_callback: .*Application of an update diff failed", "crmd_ha_msg_callback:.*not in our membership list", "CRIT:.*node.*returning after partition", ] def is_applicable(self): if not self.is_applicable_common(): return 0 return len(self.CM.Env["nodes"]) > 2 AllTestClasses.append(SplitBrainTest) #################################################################### class Reattach(CTSTest): #################################################################### def __init__(self, cm): CTSTest.__init__(self,cm) self.name="Reattach" self.startall = SimulStartLite(cm) self.restart1 = RestartTest(cm) self.stopall = SimulStopLite(cm) self.is_unsafe = 0 # Handled by canrunnow() def setup(self, node): attempt=0 if not self.startall(None): return None # Make sure we are really _really_ stable and that all # resources, including those that depend on transient node # attributes, are started while not self.CM.cluster_stable(double_check=True): if attempt < 5: attempt += 1 self.CM.debug("Not stable yet, re-testing") else: self.CM.log("Cluster is not stable") return None return 1 def teardown(self, node): # Make sure 'node' is up start = StartTest(self.CM) start(node) is_managed = self.CM.rsh(node, "crm_attribute -GQ -t crm_config -n is-managed-default -d true", 1) is_managed = is_managed[:-1] # Strip off the newline if is_managed != "true": self.CM.log("Attempting to re-enable resource management on %s (%s)" % (node, is_managed)) managed = self.create_watch(["is-managed-default"], 60) managed.setwatch() self.CM.rsh(node, "crm_attribute -D -n is-managed-default") if not managed.lookforall(): self.CM.log("Patterns not found: " + repr(managed.unmatched)) self.CM.log("Could not re-enable resource management") return 0 return 1 def canrunnow(self, node): '''Return TRUE if we can meaningfully run right now''' if self.find_ocfs2_resources(node): self.CM.log("Detach/Reattach scenarios are not possible with OCFS2 services present") return 0 return 1 def __call__(self, node): self.incr("calls") pats = [] managed = self.create_watch(["is-managed-default"], 60) managed.setwatch() self.CM.debug("Disable resource management") self.CM.rsh(node, "crm_attribute -n is-managed-default -v false") if not managed.lookforall(): self.CM.log("Patterns not found: " + repr(managed.unmatched)) return self.failure("Resource management not disabled") pats = [] pats.append("crmd:.*Performing.*_stop_0") pats.append("crmd:.*Performing.*_start_0") pats.append("crmd:.*Performing.*_promote_0") pats.append("crmd:.*Performing.*_demote_0") pats.append("crmd:.*Performing.*_migrate_.*_0") watch = self.create_watch(pats, 60, "ShutdownActivity") watch.setwatch() self.CM.debug("Shutting down the cluster") ret = self.stopall(None) if not ret: self.CM.debug("Re-enable resource management") self.CM.rsh(node, "crm_attribute -D -n is-managed-default") return self.failure("Couldn't shut down the cluster") self.CM.debug("Bringing the cluster back up") ret = self.startall(None) time.sleep(5) # allow ping to update the CIB if not ret: self.CM.debug("Re-enable resource management") self.CM.rsh(node, "crm_attribute -D -n is-managed-default") return self.failure("Couldn't restart the cluster") if self.local_badnews("ResourceActivity:", watch): self.CM.debug("Re-enable resource management") self.CM.rsh(node, "crm_attribute -D -n is-managed-default") return self.failure("Resources stopped or started during cluster restart") watch = self.create_watch(pats, 60, "StartupActivity") watch.setwatch() managed = self.create_watch(["is-managed-default"], 60) managed.setwatch() self.CM.debug("Re-enable resource management") self.CM.rsh(node, "crm_attribute -D -n is-managed-default") if not managed.lookforall(): self.CM.log("Patterns not found: " + repr(managed.unmatched)) return self.failure("Resource management not enabled") self.CM.cluster_stable() # Ignore actions for STONITH resources ignore = [] (rc, lines) = self.CM.rsh(node, "crm_resource -c", None) for line in lines: if re.search("^Resource", line): r = AuditResource(self.CM, line) if r.rclass == "stonith": self.CM.debug("Ignoring: crmd:.*Performing.*op=%s_.*_0" % r.id) ignore.append("crmd:.*Performing.*op=%s_.*_0" % r.id) if self.local_badnews("ResourceActivity:", watch, ignore): return self.failure("Resources stopped or started after resource management was re-enabled") return ret def errorstoignore(self): '''Return list of errors which should be ignored''' return [ "You may ignore this error if it is unmanaged.", "pingd: .*ERROR: send_ipc_message:", "pingd: .*ERROR: send_update:", "lrmd: .*ERROR: notify_client:", ] def is_applicable(self): if self.CM["Name"] == "crm-lha": return None return 1 AllTestClasses.append(Reattach) #################################################################### class SpecialTest1(CTSTest): #################################################################### '''Set up a custom test to cause quorum failure issues for Andrew''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="SpecialTest1" self.startall = SimulStartLite(cm) self.restart1 = RestartTest(cm) self.stopall = SimulStopLite(cm) def __call__(self, node): '''Perform the 'SpecialTest1' test for Andrew. ''' self.incr("calls") # Shut down all the nodes... ret = self.stopall(None) if not ret: return self.failure("Could not stop all nodes") # Start the selected node ret = self.restart1(node) if not ret: return self.failure("Could not start "+node) # Start all remaining nodes ret = self.startall(None) if not ret: return self.failure("Could not start the remaining nodes") return self.success() AllTestClasses.append(SpecialTest1) #################################################################### class HAETest(CTSTest): #################################################################### '''Set up a custom test to cause quorum failure issues for Andrew''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="HAETest" self.stopall = SimulStopLite(cm) self.startall = SimulStartLite(cm) self.is_loop = 1 def setup(self, node): # Start all remaining nodes ret = self.startall(None) if not ret: return self.failure("Couldn't start all nodes") return self.success() def teardown(self, node): # Stop everything ret = self.stopall(None) if not ret: return self.failure("Couldn't stop all nodes") return self.success() def wait_on_state(self, node, resource, expected_clones, attempts=240): while attempts > 0: active=0 (rc, lines) = self.CM.rsh(node, "crm_resource -r %s -W -Q" % resource, stdout=None) # Hack until crm_resource does the right thing if rc == 0 and lines: active = len(lines) if len(lines) == expected_clones: return 1 elif rc == 1: self.CM.debug("Resource %s is still inactive" % resource) elif rc == 234: self.CM.log("Unknown resource %s" % resource) return 0 elif rc == 246: self.CM.log("Cluster is inactive") return 0 elif rc != 0: self.CM.log("Call to crm_resource failed, rc=%d" % rc) return 0 else: self.CM.debug("Resource %s is active on %d times instead of %d" % (resource, active, expected_clones)) attempts -= 1 time.sleep(1) return 0 def find_dlm(self, node): self.r_dlm = None (rc, lines) = self.CM.rsh(node, "crm_resource -c", None) for line in lines: if re.search("^Resource", line): r = AuditResource(self.CM, line) if r.rtype == "controld" and r.parent != "NA": self.CM.debug("Found dlm: %s" % self.r_dlm) self.r_dlm = r.parent return 1 return 0 def find_hae_resources(self, node): self.r_dlm = None self.r_o2cb = None self.r_ocfs2 = [] if self.find_dlm(node): self.find_ocfs2_resources(node) def is_applicable(self): if not self.is_applicable_common(): return 0 if self.CM.Env["Schema"] == "hae": return 1 return None #################################################################### class HAERoleTest(HAETest): #################################################################### def __init__(self, cm): '''Lars' mount/unmount test for the HA extension. ''' HAETest.__init__(self,cm) self.name="HAERoleTest" def change_state(self, node, resource, target): rc = self.CM.rsh(node, "crm_resource -r %s -p target-role -v %s --meta" % (resource, target)) return rc def __call__(self, node): self.incr("calls") lpc = 0 failed = 0 delay = 2 done=time.time() + self.CM.Env["loop-minutes"]*60 self.find_hae_resources(node) clone_max = len(self.CM.Env["nodes"]) while time.time() <= done and not failed: lpc = lpc + 1 self.change_state(node, self.r_dlm, "Stopped") if not self.wait_on_state(node, self.r_dlm, 0): self.failure("%s did not go down correctly" % self.r_dlm) failed = lpc self.change_state(node, self.r_dlm, "Started") if not self.wait_on_state(node, self.r_dlm, clone_max): self.failure("%s did not come up correctly" % self.r_dlm) failed = lpc if not self.wait_on_state(node, self.r_o2cb, clone_max): self.failure("%s did not come up correctly" % self.r_o2cb) failed = lpc for fs in self.r_ocfs2: if not self.wait_on_state(node, fs, clone_max): self.failure("%s did not come up correctly" % fs) failed = lpc if failed: return self.failure("iteration %d failed" % failed) return self.success() AllTestClasses.append(HAERoleTest) #################################################################### class HAEStandbyTest(HAETest): #################################################################### '''Set up a custom test to cause quorum failure issues for Andrew''' def __init__(self, cm): HAETest.__init__(self,cm) self.name="HAEStandbyTest" def change_state(self, node, resource, target): rc = self.CM.rsh(node, "crm_standby -l reboot -v %s" % (target)) return rc def __call__(self, node): self.incr("calls") lpc = 0 failed = 0 done=time.time() + self.CM.Env["loop-minutes"]*60 self.find_hae_resources(node) clone_max = len(self.CM.Env["nodes"]) while time.time() <= done and not failed: lpc = lpc + 1 self.change_state(node, self.r_dlm, "true") if not self.wait_on_state(node, self.r_dlm, clone_max-1): self.failure("%s did not go down correctly" % self.r_dlm) failed = lpc self.change_state(node, self.r_dlm, "false") if not self.wait_on_state(node, self.r_dlm, clone_max): self.failure("%s did not come up correctly" % self.r_dlm) failed = lpc if not self.wait_on_state(node, self.r_o2cb, clone_max): self.failure("%s did not come up correctly" % self.r_o2cb) failed = lpc for fs in self.r_ocfs2: if not self.wait_on_state(node, fs, clone_max): self.failure("%s did not come up correctly" % fs) failed = lpc if failed: return self.failure("iteration %d failed" % failed) return self.success() AllTestClasses.append(HAEStandbyTest) ################################################################### class NearQuorumPointTest(CTSTest): ################################################################### ''' This test brings larger clusters near the quorum point (50%). In addition, it will test doing starts and stops at the same time. Here is how I think it should work: - loop over the nodes and decide randomly which will be up and which will be down Use a 50% probability for each of up/down. - figure out what to do to get into that state from the current state - in parallel, bring up those going up and bring those going down. ''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="NearQuorumPoint" def __call__(self, dummy): '''Perform the 'NearQuorumPoint' test. ''' self.incr("calls") startset = [] stopset = [] #decide what to do with each node for node in self.CM.Env["nodes"]: action = self.CM.Env.RandomGen.choice(["start","stop"]) #action = self.CM.Env.RandomGen.choice(["start","stop","no change"]) if action == "start" : startset.append(node) elif action == "stop" : stopset.append(node) self.CM.debug("start nodes:" + repr(startset)) self.CM.debug("stop nodes:" + repr(stopset)) #add search patterns watchpats = [ ] for node in stopset: if self.CM.ShouldBeStatus[node] == "up": watchpats.append(self.CM["Pat:We_stopped"] % node) for node in startset: if self.CM.ShouldBeStatus[node] == "down": #watchpats.append(self.CM["Pat:Slave_started"] % node) watchpats.append(self.CM["Pat:Local_started"] % node) else: for stopping in stopset: if self.CM.ShouldBeStatus[stopping] == "up": watchpats.append(self.CM["Pat:They_stopped"] % (node, self.CM.key_for_node(stopping))) if len(watchpats) == 0: return self.skipped() if len(startset) != 0: watchpats.append(self.CM["Pat:DC_IDLE"]) watch = self.create_watch(watchpats, self.CM["DeadTime"]+10) watch.setwatch() #begin actions for node in stopset: if self.CM.ShouldBeStatus[node] == "up": self.CM.StopaCMnoBlock(node) for node in startset: if self.CM.ShouldBeStatus[node] == "down": self.CM.StartaCMnoBlock(node) #get the result if watch.lookforall(): self.CM.cluster_stable() return self.success() self.CM.log("Warn: Patterns not found: " + repr(watch.unmatched)) #get the "bad" nodes upnodes = [] for node in stopset: if self.CM.StataCM(node) == 1: upnodes.append(node) downnodes = [] for node in startset: if self.CM.StataCM(node) == 0: downnodes.append(node) if upnodes == [] and downnodes == []: self.CM.cluster_stable() # Make sure they're completely down with no residule for node in stopset: self.CM.rsh(node, self.CM["StopCmd"]) return self.success() if len(upnodes) > 0: self.CM.log("Warn: Unstoppable nodes: " + repr(upnodes)) if len(downnodes) > 0: self.CM.log("Warn: Unstartable nodes: " + repr(downnodes)) return self.failure() AllTestClasses.append(NearQuorumPointTest) ################################################################### class RollingUpgradeTest(CTSTest): ################################################################### '''Perform a rolling upgrade of the cluster''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="RollingUpgrade" self.start = StartTest(cm) self.stop = StopTest(cm) self.stopall = SimulStopLite(cm) self.startall = SimulStartLite(cm) def setup(self, node): # Start all remaining nodes ret = self.stopall(None) if not ret: return self.failure("Couldn't stop all nodes") for node in self.CM.Env["nodes"]: if not self.downgrade(node, None): return self.failure("Couldn't downgrade %s" % node) ret = self.startall(None) if not ret: return self.failure("Couldn't start all nodes") return self.success() def teardown(self, node): # Stop everything ret = self.stopall(None) if not ret: return self.failure("Couldn't stop all nodes") for node in self.CM.Env["nodes"]: if not self.upgrade(node, None): return self.failure("Couldn't upgrade %s" % node) return self.success() def install(self, node, version, start=1, flags="--force"): target_dir = "/tmp/rpm-%s" % version src_dir = "%s/%s" % (self.CM.Env["rpm-dir"], version) self.CM.log("Installing %s on %s with %s" % (version, node, flags)) if not self.stop(node): return self.failure("stop failure: "+node) rc = self.CM.rsh(node, "mkdir -p %s" % target_dir) rc = self.CM.rsh(node, "rm -f %s/*.rpm" % target_dir) (rc, lines) = self.CM.rsh(node, "ls -1 %s/*.rpm" % src_dir, None) for line in lines: line = line[:-1] rc = self.CM.rsh.cp("%s" % (line), "%s:%s/" % (node, target_dir)) rc = self.CM.rsh(node, "rpm -Uvh %s %s/*.rpm" % (flags, target_dir)) if start and not self.start(node): return self.failure("start failure: "+node) return self.success() def upgrade(self, node, start=1): return self.install(node, self.CM.Env["current-version"], start) def downgrade(self, node, start=1): return self.install(node, self.CM.Env["previous-version"], start, "--force --nodeps") def __call__(self, node): '''Perform the 'Rolling Upgrade' test. ''' self.incr("calls") for node in self.CM.Env["nodes"]: if self.upgrade(node): return self.failure("Couldn't upgrade %s" % node) self.CM.cluster_stable() return self.success() def is_applicable(self): if not self.is_applicable_common(): return None if not self.CM.Env.has_key("rpm-dir"): return None if not self.CM.Env.has_key("current-version"): return None if not self.CM.Env.has_key("previous-version"): return None return 1 # Register RestartTest as a good test to run AllTestClasses.append(RollingUpgradeTest) ################################################################### class BSC_AddResource(CTSTest): ################################################################### '''Add a resource to the cluster''' def __init__(self, cm): CTSTest.__init__(self, cm) self.name="AddResource" self.resource_offset = 0 self.cib_cmd="""cibadmin -C -o %s -X '%s' """ def __call__(self, node): self.incr("calls") self.resource_offset = self.resource_offset + 1 r_id = "bsc-rsc-%s-%d" % (node, self.resource_offset) start_pat = "crmd.*%s_start_0.*confirmed.*ok" patterns = [] patterns.append(start_pat % r_id) watch = self.create_watch(patterns, self.CM["DeadTime"]) watch.setwatch() fields = string.split(self.CM.Env["IPBase"], '.') fields[3] = str(int(fields[3])+1) ip = string.join(fields, '.') self.CM.Env["IPBase"] = ip if not self.make_ip_resource(node, r_id, "ocf", "IPaddr", ip): return self.failure("Make resource %s failed" % r_id) failed = 0 watch_result = watch.lookforall() if watch.unmatched: for regex in watch.unmatched: self.CM.log ("Warn: Pattern not found: %s" % (regex)) failed = 1 if failed: return self.failure("Resource pattern(s) not found") if not self.CM.cluster_stable(self.CM["DeadTime"]): return self.failure("Unstable cluster") return self.success() def make_ip_resource(self, node, id, rclass, type, ip): self.CM.log("Creating %s::%s:%s (%s) on %s" % (rclass,type,id,ip,node)) rsc_xml=""" """ % (id, rclass, type, id, id, ip) node_constraint=""" """ % (id, id, id, id, node) rc = 0 (rc, lines) = self.CM.rsh(node, self.cib_cmd % ("constraints", node_constraint), None) if rc != 0: self.CM.log("Constraint creation failed: %d" % rc) return None (rc, lines) = self.CM.rsh(node, self.cib_cmd % ("resources", rsc_xml), None) if rc != 0: self.CM.log("Resource creation failed: %d" % rc) return None return 1 def is_applicable(self): if self.CM.Env["DoBSC"]: return 1 return None AllTestClasses.append(BSC_AddResource) class SimulStopLite(CTSTest): ################################################################### '''Stop any active nodes ~ simultaneously''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="SimulStopLite" def __call__(self, dummy): '''Perform the 'SimulStopLite' setup work. ''' self.incr("calls") self.CM.debug("Setup: " + self.name) # We ignore the "node" parameter... watchpats = [ ] for node in self.CM.Env["nodes"]: if self.CM.ShouldBeStatus[node] == "up": self.incr("WasStarted") watchpats.append(self.CM["Pat:We_stopped"] % node) #if self.CM.Env["use_logd"]: # watchpats.append(self.CM["Pat:Logd_stopped"] % node) if len(watchpats) == 0: self.CM.clear_all_caches() return self.success() # Stop all the nodes - at about the same time... watch = self.create_watch(watchpats, self.CM["DeadTime"]+10) watch.setwatch() self.set_timer() for node in self.CM.Env["nodes"]: if self.CM.ShouldBeStatus[node] == "up": self.CM.StopaCMnoBlock(node) if watch.lookforall(): self.CM.clear_all_caches() # Make sure they're completely down with no residule for node in self.CM.Env["nodes"]: self.CM.rsh(node, self.CM["StopCmd"]) return self.success() did_fail=0 up_nodes = [] for node in self.CM.Env["nodes"]: if self.CM.StataCM(node) == 1: did_fail=1 up_nodes.append(node) if did_fail: return self.failure("Active nodes exist: " + repr(up_nodes)) self.CM.log("Warn: All nodes stopped but CTS didnt detect: " + repr(watch.unmatched)) self.CM.clear_all_caches() return self.failure("Missing log message: "+repr(watch.unmatched)) def is_applicable(self): '''SimulStopLite is a setup test and never applicable''' return 0 ################################################################### class SimulStartLite(CTSTest): ################################################################### '''Start any stopped nodes ~ simultaneously''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name="SimulStartLite" def __call__(self, dummy): '''Perform the 'SimulStartList' setup work. ''' self.incr("calls") self.CM.debug("Setup: " + self.name) # We ignore the "node" parameter... node_list = [] for node in self.CM.Env["nodes"]: if self.CM.ShouldBeStatus[node] == "down": self.incr("WasStopped") node_list.append(node) self.set_timer() while len(node_list) > 0: watchpats = [ ] uppat = self.CM["Pat:Slave_started"] if self.CM.upcount() == 0: uppat = self.CM["Pat:Local_started"] watchpats.append(self.CM["Pat:DC_IDLE"]) for node in node_list: watchpats.append(uppat % node) # Start all the nodes - at about the same time... watch = self.create_watch(watchpats, self.CM["DeadTime"]+10) watch.setwatch() stonith = self.CM.prepare_fencing_watcher(self.name) for node in node_list: self.CM.StartaCMnoBlock(node) watch.lookforall() node_list = self.CM.fencing_cleanup(self.name, stonith) # Remove node_list messages from watch.unmatched for node in node_list: if watch.unmatched: watch.unmatched.remove(uppat % node) if watch.unmatched: for regex in watch.unmatched: self.CM.log ("Warn: Startup pattern not found: %s" %(regex)) if not self.CM.cluster_stable(): return self.failure("Cluster did not stabilize") did_fail=0 unstable = [] for node in self.CM.Env["nodes"]: if self.CM.StataCM(node) == 0: did_fail=1 unstable.append(node) if did_fail: return self.failure("Unstarted nodes exist: " + repr(unstable)) unstable = [] for node in self.CM.Env["nodes"]: if not self.CM.node_stable(node): did_fail=1 unstable.append(node) if did_fail: return self.failure("Unstable cluster nodes exist: " + repr(unstable)) return self.success() def is_applicable(self): '''SimulStartLite is a setup test and never applicable''' return 0 def TestList(cm, audits): result = [] for testclass in AllTestClasses: bound_test = testclass(cm) if bound_test.is_applicable(): bound_test.Audits = audits result.append(bound_test) return result # vim:ts=4:sw=4:et: diff --git a/cts/OCFIPraTest.py b/cts/OCFIPraTest.py index bb0747291e..11519f08e6 100755 --- a/cts/OCFIPraTest.py +++ b/cts/OCFIPraTest.py @@ -1,176 +1,176 @@ #!/usr/bin/python '''OCF IPaddr/IPaddr2 Resource Agent Test''' __copyright__=''' Author: Huang Zhen Copyright (C) 2004 International Business Machines Licensed under the GNU GPL. ''' # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. import string,sys,struct,os,random,time,syslog from cts.CTSvars import * def usage(): print "usage: " + sys.argv[0] \ + " [-2]"\ + " [--ipbase|-i first-test-ip]"\ + " [--ipnum|-n test-ip-num]"\ + " [--help|-h]"\ + " [--perform|-p op]"\ + " [number-of-iterations]" sys.exit(1) def perform_op(ra, ip, op): os.environ["OCF_RA_VERSION_MAJOR"] = "1" os.environ["OCF_RA_VERSION_MINOR"] = "0" os.environ["OCF_ROOT"] = CTSvars.OCF_ROOT_DIR os.environ["OCF_RESOURCE_INSTANCE"] = ip os.environ["OCF_RESOURCE_TYPE"] = ra os.environ["OCF_RESKEY_ip"] = ip os.environ["HA_LOGFILE"] = "/dev/null" os.environ["HA_LOGFACILITY"] = "local7" path = CTSvars.OCF_ROOT_DIR +"/resource.d/heartbeat/" + ra return os.spawnvpe(os.P_WAIT, path, [ra, op], os.environ) def audit(ra, iplist, ipstatus, summary): passed = 1 for ip in iplist: ret = perform_op(ra, ip, "monitor") if ret != ipstatus[ip]: passed = 0 log("audit: status of %s should be %d but it is %d\t [failure]"% (ip,ipstatus[ip],ret)) ipstatus[ip] = ret summary["audit"]["called"] += 1; if passed : summary["audit"]["success"] += 1 else : summary["audit"]["failure"] += 1 def log(towrite): t = time.strftime("%Y/%m/%d_%H:%M:%S\t", time.localtime(time.time())) logstr = t + " "+str(towrite) syslog.syslog(logstr) print logstr if __name__ == '__main__': ra = "IPaddr" ipbase = "127.0.0.10" ipnum = 1 itnum = 50 perform = None summary = { "start":{"called":0,"success":0,"failure":0}, "stop" :{"called":0,"success":0,"failure":0}, "audit":{"called":0,"success":0,"failure":0} } syslog.openlog(sys.argv[0], 0, syslog.LOG_LOCAL7) # Process arguments... skipthis = None args = sys.argv[1:] for i in range(0, len(args)) : if skipthis : skipthis = None continue elif args[i] == "-2" : ra = "IPaddr2" elif args[i] == "--ip" or args[i] == "-i" : skipthis = 1 ipbase = args[i+1] elif args[i] == "--ipnum" or args[i] == "-n" : skipthis = 1 ipnum = int(args[i+1]) elif args[i] == "--perform" or args[i] == "-p" : skipthis = 1 perform = args[i+1] elif args[i] == "--help" or args[i] == "-h" : usage() else: itnum = int(args[i]) log("Begin OCF IPaddr/IPaddr2 Test") # Generate the test ips iplist = [] ipstatus = {} fields = string.split(ipbase, '.') for i in range(0, ipnum) : ip = string.join(fields, '.') iplist.append(ip) ipstatus[ip]=perform_op(ra,ip,"monitor") fields[3] = str(int(fields[3])+1) log("Test ip:" + str(iplist)) # If use ask perform an operation if perform != None: log("Perform opeartion %s"%perform) for ip in iplist: perform_op(ra, ip, perform) log("Done") sys.exit() log("RA Type:" + ra) log("Test Count:" + str(itnum)) # Prepare Random f = open("/dev/urandom", "r") seed=struct.unpack("BBB", f.read(3)) f.close() #seed=(123,321,231) rand = random.Random() rand.seed(seed[0]) log("Test Random Seed:" + str(seed)) # # Begin Tests log(">>>>>>>>>>>>>>>>>>>>>>>>") for i in range(0, itnum): ip = rand.choice(iplist) if ipstatus[ip] == 0: op = "stop" elif ipstatus[ip] == 7: op = "start" else : op = rand.choice(["start","stop"]) ret = perform_op(ra, ip, op) # update status if op == "start" and ret == 0: ipstatus[ip] = 0 elif op == "stop" and ret == 0: ipstatus[ip] = 7 else : ipstatus[ip] = 1 result = "" if ret == 0: result = "success" else : result = "failure" summary[op]["called"] += 1 summary[op][result] += 1 log( "%d:%s %s \t[%s]"%(i, op, ip, result)) audit(ra, iplist, ipstatus, summary) log("<<<<<<<<<<<<<<<<<<<<<<<<") log("start:\t" + str(summary["start"])) log("stop: \t" + str(summary["stop"])) log("audit:\t" + str(summary["audit"])) diff --git a/cts/extracttests.py b/cts/extracttests.py index 5e547f2b2d..0d6b39ddec 100644 --- a/cts/extracttests.py +++ b/cts/extracttests.py @@ -1,107 +1,107 @@ #!/usr/bin/python __copyright__=''' Copyright: (C) 2005 International Business Machines, Inc. Author: Alan Robertson Support: linux-ha-dev@lists.tummy.com License: GNU General Public License (GPL) ''' # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. import sys import re #Numeric comparison sorting def sorttestnum(lhs, rhs): return int(lhs) - int(rhs) # # Compress sorted list of tests into runs (ranges) of sequential tests # So that (1,2,3,4) gets compressed down into (1,5) # We add one so we get the marker showing the beginning of the next test # # # def testruns(list): if len(list) < 1: return None, None first=int(list.pop(0)) last=first+1 while len(list) >= 1 and list[0] == last: last=list.pop(0)+1 return (int(first), int(last)) # # Scanning for particular tests... # class ExtractTests: def expandtestranges(self, testlist): outlist = [] for j in range(len(testlist)): match = re.match("([0-9]+)[-:]([0-9]+)", testlist[j]) if match: for k in range(int(match.groups()[0]),int(match.groups()[1])+1): outlist.append(k) else: outlist.append(testlist[j]) return outlist def __init__(self, filename, testlist): self.file = open(filename, "r") testlist = self.expandtestranges(testlist) testlist.sort(sorttestnum) self.testlist = testlist print "Extracting tests ", self.testlist self.regex=re.compile(" CTS: Running test .*(\[|\s)([0-9]+)") self.CTSregex=re.compile(" CTS: ") self.DEBUGregex=re.compile(" CTS: (BadNews|debug):") def __call__(self): first, last = testruns(self.testlist) curtest=0 while 1: line = self.file.readline() lineprinted=None if line == None or line == "": break regexmatchobj=self.regex.search(line) if regexmatchobj: curtest= int(regexmatchobj.groups()[1]) if curtest == 0: # Print the summary if self.CTSregex.search(line): if not self.DEBUGregex.search(line): sys.stdout.write(line) lineprinted=1 if curtest >= first and not lineprinted: sys.stdout.write(line) if curtest >= last: first, last = testruns(self.testlist) if first == None: break if len(sys.argv) < 3: print "Usage:", sys.argv[0] , "logfilename testnumber ..." sys.exit(1) foo = ExtractTests (sys.argv[1], sys.argv[2:]) foo() diff --git a/include/crm_config.h.in b/include/crm_config.h.in index e50b6ed2aa..19a57be0f5 100755 --- a/include/crm_config.h.in +++ b/include/crm_config.h.in @@ -1,72 +1,72 @@ /* crm_config.h.in */ /* * Copyright (C) 2006 - 2007 * Andrew Beekhof * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * You should have received a copy of the GNU General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef CRM_CONFIG__H__IN #define CRM_CONFIG__H__IN /* Pacemaker configuration */ /* Current pacemaker version */ #undef PACEMAKER_VERSION /* Build version */ #undef BUILD_VERSION /* Set of enabled features */ #undef CRM_FEATURES /* Where to keep CIB configuration files */ #undef CRM_CONFIG_DIR /* Group to run Pacemaker daemons as */ #undef CRM_DAEMON_GROUP /* User to run Pacemaker daemons as */ #undef CRM_DAEMON_USER /* Where to keep state files and sockets */ #undef CRM_STATE_DIR /* Location for the Pacemaker Relax-NG Schema */ #undef CRM_DTD_DIRECTORY /* Current version of the Pacemaker Relax-NG Schema */ #undef CRM_DTD_VERSION /* Where to keep PEngine outputs */ #undef PE_STATE_DIR /* Support the Heartbeat messaging and membership layer */ #undef SUPPORT_HEARTBEAT /* Support the Corosync messaging and membership layer */ #undef SUPPORT_COROSYNC /* Compatability alias for SUPPORT_COROSYNC */ #undef SUPPORT_AIS /* Compatability alias for SUPPORT_COROSYNC */ #undef AIS_COROSYNC /* Use g_hash_table compatibility functions */ #undef USE_GHASH_COMPAT #endif /* CRM_CONFIG__H__IN */ diff --git a/include/crm_internal.h b/include/crm_internal.h index 0b682558b9..16965f70e3 100644 --- a/include/crm_internal.h +++ b/include/crm_internal.h @@ -1,90 +1,90 @@ /* crm_internal.h */ /* * Copyright (C) 2006 - 2008 * Andrew Beekhof * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * You should have received a copy of the GNU General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef CRM_INTERNAL__H #define CRM_INTERNAL__H #include #include /* Prototypes for libreplace functions */ #ifndef HAVE_DAEMON int daemon(int nochdir, int noclose); #endif #ifndef HAVE_SETENV int setenv(const char *name, const char * value, int overwrite); #endif #ifndef HAVE_UNSETENV int unsetenv(const char *name); #endif #ifndef HAVE_STRERROR char * strerror(int errnum); #endif #ifndef HAVE_SCANDIR # include int scandir (const char *dir_name, struct dirent ***array, int (*select_fn) (const struct dirent *), int (*compare_fn) (const void *, const void *)); #endif #ifndef HAVE_ALPHASORT # include int alphasort(const void *dirent1, const void *dirent2); #endif #ifndef HAVE_INET_PTON int inet_pton(int af, const char *src, void *dst); #endif #ifndef HAVE_STRNLEN size_t strnlen(const char *s, size_t maxlen); #endif #ifndef HAVE_STRNDUP char *strndup(const char *str, size_t len); #endif #ifndef HAVE_STRLCPY size_t strlcpy(char * dest, const char *source, size_t len); #endif #ifndef HAVE_STRLCAT size_t strlcat(char * dest, const char *source, size_t len); #endif /* * Some compilers do not define __FUNCTION__ */ /* Sun studio compiler */ # ifdef __SUNPRO_C # define __FUNCTION__ __func__ # endif # ifdef __MY_UNKNOWN_C # define __FUNCTION__ "__FUNCTION__" # endif #endif /* CRM_INTERNAL__H */ diff --git a/include/portability.h b/include/portability.h index 47d7b3a657..4604f4d59b 100644 --- a/include/portability.h +++ b/include/portability.h @@ -1,104 +1,104 @@ #ifndef PORTABILITY_H # define PORTABILITY_H /* * Copyright (C) 2001 Alan Robertson * This software licensed under the GNU LGPL. * * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * - * You should have received a copy of the GNU Lesser General Public + * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #define EOS '\0' #define DIMOF(a) ((int) (sizeof(a)/sizeof(a[0])) ) #define STRLEN_CONST(conststr) ((size_t)((sizeof(conststr)/sizeof(char))-1)) #define STRNCMP_CONST(varstr, conststr) strncmp((varstr), conststr, STRLEN_CONST(conststr)+1) #define STRLEN(c) STRLEN_CONST(c) /* Needs to be defined before any other includes, otherwise some system * headers do not behave as expected! Major black magic... */ #undef _GNU_SOURCE /* in case it was defined on the command line */ #define _GNU_SOURCE /* Please leave this as the first #include - Solaris needs it there */ #ifdef HAVE_CONFIG_H #include #endif #include #ifdef BSD # define SCANSEL_CAST (void *) #else # define SCANSEL_CAST /* Nothing */ #endif #if defined(ANSI_ONLY) && !defined(inline) # define inline /* nothing */ # undef NETSNMP_ENABLE_INLINE # define NETSNMP_NO_INLINE 1 #endif #ifndef HAVE_DAEMON /* We supply a replacement function, but need a prototype */ int daemon(int nochdir, int noclose); #endif /* HAVE_DAEMON */ #ifndef HAVE_SETENV /* We supply a replacement function, but need a prototype */ int setenv(const char *name, const char * value, int why); #endif /* HAVE_SETENV */ #ifndef HAVE_STRERROR /* We supply a replacement function, but need a prototype */ char * strerror(int errnum); #endif /* HAVE_STRERROR */ #ifndef HAVE_ALPHASORT # include int alphasort(const void *dirent1, const void *dirent2); #endif /* HAVE_ALPHASORT */ #ifndef HAVE_INET_PTON /* We supply a replacement function, but need a prototype */ int inet_pton(int af, const char *src, void *dst); #endif /* HAVE_INET_PTON */ #ifndef HAVE_STRNLEN size_t strnlen(const char *s, size_t maxlen); #else # define USE_GNU #endif #ifndef HAVE_STRNDUP char *strndup(const char *str, size_t len); #else # define USE_GNU #endif #ifdef HAVE_STRUCT_UCRED_DARWIN # include # ifndef SYS_NMLN # define SYS_NMLN _SYS_NAMELEN # endif /* SYS_NMLN */ #endif #define POINTER_TO_SIZE_T(p) ((size_t)(p)) /*pointer cast as size_t*/ #define POINTER_TO_SSIZE_T(p) ((ssize_t)(p)) /*pointer cast as ssize_t*/ #define POINTER_TO_ULONG(p) ((unsigned long)(p)) /*pointer cast as unsigned long*/ #endif /* PORTABILITY_H */ diff --git a/lib/cib/cib_acl.c b/lib/cib/cib_acl.c index fe13a1978a..0ee03dbf47 100644 --- a/lib/cib/cib_acl.c +++ b/lib/cib/cib_acl.c @@ -1,783 +1,783 @@ /* * Copyright (C) 2009 Yan Gao * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include typedef struct acl_obj_s { const char *mode; const char *tag; const char *ref; const char *xpath; const char *attribute; } acl_obj_t; typedef struct xml_perm_s { const char *mode; GHashTable *attribute_perms; } xml_perm_t; static gboolean req_by_privileged(xmlNode *request); static xmlNode *diff_xml_object_orig(xmlNode *old, xmlNode *new, gboolean suppress, xmlNode *new_diff); static gboolean unpack_user_acl(xmlNode *xml_acls, const char *user, GListPtr *user_acl); static gboolean user_match(const char *user, const char *uid); static gboolean unpack_acl(xmlNode *xml_acls, xmlNode *xml_acl, GListPtr *acl); static gboolean unpack_role_acl(xmlNode *xml_acls, const char *role, GListPtr *acl); static gboolean acl_append(xmlNode *acl_child, GListPtr *acl); static void free_acl(GListPtr acl); static gboolean parse_acl_xpath(xmlNode *xml, GListPtr acl, GListPtr *parsed_acl); static gboolean gen_xml_perms(xmlNode *xml, GListPtr acl, GHashTable **xml_perms); static int search_xml_children(GListPtr *children, xmlNode *root, const char *tag, const char *field, const char *value, gboolean search_matches); static int search_xpath_objects(GListPtr *objects, xmlNode *xml_obj, const char *xpath); static gboolean update_xml_perms(xmlNode *xml, acl_obj_t *acl_obj, GHashTable *xml_perms); static gboolean update_xml_children_perms(xmlNode *xml, const char *mode, GHashTable *xml_perms); static void free_xml_perm(gpointer xml_perm); static gboolean acl_filter_xml(xmlNode *xml, GHashTable *xml_perms); static gboolean acl_check_diff_xml(xmlNode *xml, GHashTable *xml_perms); gboolean acl_enabled(GHashTable *config_hash) { const char *value = NULL; gboolean rc = FALSE; value = cib_pref(config_hash, "enable-acl"); rc = crm_is_true(value); crm_debug("CIB ACL is %s", rc?"enabled":"disabled"); return rc; } /* rc = TRUE if orig_cib has been filtered*/ /* That means *filtered_cib rather than orig_cib should be exploited afterwards*/ gboolean acl_filter_cib(xmlNode *request, xmlNode *current_cib, xmlNode *orig_cib, xmlNode **filtered_cib) { const char *user = NULL; xmlNode *xml_acls = NULL; xmlNode *tmp_cib = NULL; GListPtr user_acl = NULL; GHashTable *xml_perms = NULL; *filtered_cib = NULL; if (req_by_privileged(request)) { return FALSE; } if (orig_cib == NULL) { return FALSE; } if (current_cib == NULL) { return TRUE; } xml_acls = get_object_root(XML_CIB_TAG_ACLS, current_cib); if (xml_acls == NULL) { crm_warn("Ordinary users cannot access the CIB without any defined ACLs: '%s'", user); return TRUE; } user = crm_element_value(request, F_CIB_USER); unpack_user_acl(xml_acls, user, &user_acl); tmp_cib = copy_xml(orig_cib); gen_xml_perms(tmp_cib, user_acl, &xml_perms); if (acl_filter_xml(tmp_cib, xml_perms)) { crm_warn("User '%s' doesn't have the permission for the whole CIB", user); tmp_cib = NULL; } g_hash_table_destroy(xml_perms); free_acl(user_acl); *filtered_cib = tmp_cib; return TRUE; } /* rc = TRUE if the request passes the ACL check */ /* rc = FALSE if the permission is denied */ gboolean acl_check_diff(xmlNode *request, xmlNode *current_cib, xmlNode *result_cib, xmlNode *diff) { const char *user = NULL; xmlNode *xml_acls = NULL; GListPtr user_acl = NULL; xmlNode *orig_diff = NULL; int rc = FALSE; if (req_by_privileged(request)) { return TRUE; } if (diff == NULL) { return TRUE; } if (current_cib == NULL) { return FALSE; } xml_acls = get_object_root(XML_CIB_TAG_ACLS, current_cib); if (xml_acls == NULL) { crm_warn("Ordinary users cannot access the CIB without any defined ACLs: '%s'", user); return FALSE; } user = crm_element_value(request, F_CIB_USER); unpack_user_acl(xml_acls, user, &user_acl); orig_diff = diff_xml_object_orig(current_cib, result_cib, FALSE, diff); xml_child_iter( orig_diff, diff_child, const char *tag = crm_element_name(diff_child); GListPtr parsed_acl = NULL; crm_debug("Preparing ACL checking on '%s'", tag); if (crm_str_eq(tag, XML_TAG_DIFF_REMOVED, TRUE)) { crm_debug("Parsing any xpaths under the ACL according to the current CIB"); parse_acl_xpath(current_cib, user_acl, &parsed_acl); } else if (crm_str_eq(tag, XML_TAG_DIFF_ADDED, TRUE)) { crm_debug("Parsing any xpaths under the ACL according to the result CIB"); parse_acl_xpath(result_cib, user_acl, &parsed_acl); } else { continue; } xml_child_iter( diff_child, diff_cib, GHashTable *xml_perms = NULL; gen_xml_perms(diff_cib, parsed_acl, &xml_perms); rc = acl_check_diff_xml(diff_cib, xml_perms); g_hash_table_destroy(xml_perms); if (rc == FALSE) { crm_warn("User '%s' doesn't have enough permission to modify the CIB objects", user); goto done; } ); free_acl(parsed_acl); ); done: free_xml(orig_diff); free_acl(user_acl); return rc; } static gboolean req_by_privileged(xmlNode *request) { const char *user = crm_element_value(request, F_CIB_USER); if (user == NULL || strcmp(user, "") == 0) { crm_debug("Request without an explicit client user: op=%s, origin=%s, client=%s", crm_element_value(request, F_CIB_OPERATION), crm_element_value(request, F_ORIG)?crm_element_value(request, F_ORIG):"local", crm_element_value(request, F_CIB_CLIENTNAME)); return TRUE; } if (is_privileged(user)) { return TRUE; } return FALSE; } /* Borrowed from lib/common/xml.c: diff_xml_object() */ /* But if a new format of diff ("new_diff") exists, we could reuse its "diff-removed" part */ /* So it would be more time-saving than generating the diff from start */ static xmlNode * diff_xml_object_orig(xmlNode *old, xmlNode *new, gboolean suppress, xmlNode *new_diff) { xmlNode *tmp1 = NULL; xmlNode *diff = create_xml_node(NULL, "diff"); xmlNode *removed = NULL; xmlNode *added = NULL; crm_xml_add(diff, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); if (new_diff && (tmp1 = find_xml_node(new_diff, "diff-removed", FALSE))) { removed = add_node_copy(diff, tmp1); } else { removed = create_xml_node(diff, "diff-removed"); tmp1 = subtract_xml_object(removed, old, new, FALSE, "removed:top"); if(suppress && tmp1 != NULL && can_prune_leaf(tmp1)) { free_xml_from_parent(removed, tmp1); } } added = create_xml_node(diff, "diff-added"); tmp1 = subtract_xml_object(added, new, old, FALSE, "added:top"); if(suppress && tmp1 != NULL && can_prune_leaf(tmp1)) { free_xml_from_parent(added, tmp1); } if(added->children == NULL && removed->children == NULL) { free_xml(diff); diff = NULL; } return diff; } static gboolean unpack_user_acl(xmlNode *xml_acls, const char *user, GListPtr *user_acl) { if (xml_acls == NULL) { return FALSE; } xml_child_iter( xml_acls, xml_acl, const char *tag = crm_element_name(xml_acl); const char *id = crm_element_value(xml_acl, XML_ATTR_ID); if (crm_str_eq(tag, XML_ACL_TAG_USER, TRUE)) { if (user_match(user, id)) { crm_debug("Unpacking ACL of user: '%s'", id); unpack_acl(xml_acls, xml_acl, user_acl); return TRUE; } } ); return FALSE; } static gboolean user_match(const char *user, const char *uid) { CRM_CHECK(user != NULL && user[0] != '\0' && uid != NULL && uid[0] != '\0', return FALSE); if (crm_str_eq(user, uid, TRUE)) { return TRUE; } return FALSE; } static gboolean unpack_acl(xmlNode *xml_acls, xmlNode *xml_acl, GListPtr *acl) { xml_child_iter( xml_acl, acl_child, const char *tag = crm_element_name(acl_child); if (crm_str_eq(XML_ACL_TAG_ROLE_REF, tag, TRUE)) { const char *ref_role = crm_element_value(acl_child, XML_ATTR_ID); if (ref_role) { unpack_role_acl(xml_acls, ref_role, acl); } } else if (crm_str_eq(XML_ACL_TAG_READ, tag, TRUE) || crm_str_eq(XML_ACL_TAG_WRITE, tag, TRUE) || crm_str_eq(XML_ACL_TAG_DENY, tag, TRUE)) acl_append(acl_child, acl); ); return TRUE; } static gboolean unpack_role_acl(xmlNode *xml_acls, const char *role, GListPtr *acl) { xml_child_iter_filter( xml_acls, xml_acl, XML_ACL_TAG_ROLE, const char *role_id = crm_element_value(xml_acl, XML_ATTR_ID); if (role_id && crm_str_eq(role, role_id, TRUE)) { crm_debug("Unpacking ACL of the referenced role: '%s'", role); unpack_acl(xml_acls, xml_acl, acl); return TRUE; } ); return FALSE; } static gboolean acl_append(xmlNode *acl_child, GListPtr *acl) { acl_obj_t *acl_obj = NULL; const char *tag = crm_element_value(acl_child, XML_ACL_ATTR_TAG); const char *ref = crm_element_value(acl_child, XML_ACL_ATTR_REF); const char *xpath = crm_element_value(acl_child, XML_ACL_ATTR_XPATH); if (tag == NULL && ref == NULL && xpath == NULL) { return FALSE; } crm_malloc0(acl_obj, sizeof(acl_obj_t)); if (acl_obj == NULL) { return FALSE; } acl_obj->mode = crm_element_name(acl_child); acl_obj->tag = tag; acl_obj->ref = ref; acl_obj->xpath = xpath; acl_obj->attribute = crm_element_value(acl_child, XML_ACL_ATTR_ATTRIBUTE); *acl = g_list_append(*acl, acl_obj); crm_debug_3("ACL object appended: mode=%s, tag=%s, ref=%s, xpath=%s, attribute=%s", acl_obj->mode, acl_obj->tag, acl_obj->ref, acl_obj->xpath, acl_obj->attribute); return TRUE; } static void free_acl(GListPtr acl) { GListPtr iterator = acl; while(iterator != NULL) { crm_free(iterator->data); iterator = iterator->next; } if(acl != NULL) { g_list_free(acl); } } static gboolean parse_acl_xpath(xmlNode *xml, GListPtr acl, GListPtr *parsed_acl) { GListPtr acl_iterator = acl; acl_obj_t *new_acl_obj = NULL; *parsed_acl = NULL; while (acl_iterator != NULL) { acl_obj_t *acl_obj = acl_iterator->data; if (acl_obj->tag || acl_obj->ref) { crm_malloc0(new_acl_obj, sizeof(acl_obj_t)); if (new_acl_obj == NULL) { return FALSE; } memcpy(new_acl_obj, acl_obj, sizeof(acl_obj_t)); *parsed_acl = g_list_append(*parsed_acl, new_acl_obj); crm_debug_3("Copied ACL object: mode=%s, tag=%s, ref=%s, xpath=%s, attribute=%s", new_acl_obj->mode, new_acl_obj->tag, new_acl_obj->ref, new_acl_obj->xpath, new_acl_obj->attribute); } else if (acl_obj->xpath) { GListPtr children = NULL; GListPtr children_iterator = NULL; search_xpath_objects(&children, xml, acl_obj->xpath); children_iterator = children; while (children_iterator != NULL) { crm_malloc0(new_acl_obj, sizeof(acl_obj_t)); if (new_acl_obj == NULL) { return FALSE; } new_acl_obj->mode = acl_obj->mode; new_acl_obj->tag = crm_element_name((xmlNode*)children_iterator->data); new_acl_obj->ref = crm_element_value(children_iterator->data, XML_ATTR_ID); new_acl_obj->attribute = acl_obj->attribute; *parsed_acl = g_list_append(*parsed_acl, new_acl_obj); crm_debug_3("Parsed the ACL object with xpath '%s' to: mode=%s, tag=%s, ref=%s, xpath=%s, attribute=%s", acl_obj->xpath, new_acl_obj->mode, new_acl_obj->tag, new_acl_obj->ref, new_acl_obj->xpath, new_acl_obj->attribute); children_iterator = children_iterator->next; } g_list_free(children); } acl_iterator = acl_iterator->next; } return TRUE; } static gboolean gen_xml_perms(xmlNode *xml, GListPtr acl, GHashTable **xml_perms) { GListPtr acl_iterator = acl; if (*xml_perms == NULL) { *xml_perms = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, free_xml_perm); } while (acl_iterator != NULL) { acl_obj_t *acl_obj = acl_iterator->data; GListPtr children = NULL; GListPtr children_iterator = NULL; crm_debug("Generating permissions with ACL: mode=%s, tag=%s, ref=%s, xpath=%s, attribute=%s", acl_obj->mode, acl_obj->tag, acl_obj->ref, acl_obj->xpath, acl_obj->attribute); if (acl_obj->tag || acl_obj->ref) { search_xml_children(&children, xml, acl_obj->tag, XML_ATTR_ID, acl_obj->ref, TRUE); } else if (acl_obj->xpath) { /* Never be here for a modification operation */ /* Already parse_acl_xpath() previously */ search_xpath_objects(&children, xml, acl_obj->xpath); } children_iterator = children; while (children_iterator != NULL) { update_xml_perms(children_iterator->data, acl_obj, *xml_perms); children_iterator = children_iterator->next; } g_list_free(children); acl_iterator = acl_iterator->next; } return TRUE; } /* Borrowed from lib/common/xml.c: find_xml_children() */ /* But adding the original xmlNode pointers into a GList */ static int search_xml_children(GListPtr *children, xmlNode *root, const char *tag, const char *field, const char *value, gboolean search_matches) { int match_found = 0; CRM_CHECK(root != NULL, return FALSE); CRM_CHECK(children != NULL, return FALSE); if(tag != NULL && safe_str_neq(tag, crm_element_name(root))) { } else if(value != NULL && safe_str_neq(value, crm_element_value(root, field))) { } else { *children = g_list_append(*children, root); match_found = 1; } if(search_matches || match_found == 0) { xml_child_iter( root, child, match_found += search_xml_children( children, child, tag, field, value, search_matches); ); } return match_found; } static int search_xpath_objects(GListPtr *objects, xmlNode *xml_obj, const char *xpath) { int match_found = 0; xmlXPathObjectPtr xpathObj = NULL; if(xpath == NULL) { return 0; } xpathObj = xpath_search(xml_obj, xpath); if(xpathObj == NULL || xpathObj->nodesetval == NULL || xpathObj->nodesetval->nodeNr < 1) { crm_debug("No match for %s in %s", xpath, xmlGetNodePath(xml_obj)); } else if(xpathObj->nodesetval->nodeNr > 0) { int lpc = 0, max = xpathObj->nodesetval->nodeNr; for(lpc = 0; lpc < max; lpc++) { xmlNode *match = getXpathResult(xpathObj, lpc); if (match == NULL) { continue; } *objects = g_list_append(*objects, match); match_found++; } } if(xpathObj) { xmlXPathFreeObject(xpathObj); } return match_found; } static gboolean update_xml_perms(xmlNode *xml, acl_obj_t *acl_obj, GHashTable *xml_perms) { xml_perm_t *perm = NULL; if (g_hash_table_lookup_extended(xml_perms, xml, NULL, (gpointer)&perm)) { if (perm->mode != NULL) { return FALSE; } } else { crm_malloc0(perm, sizeof(xml_perm_t)); if (perm == NULL) { return FALSE; } g_hash_table_insert(xml_perms, xml, perm); } if (acl_obj->attribute == NULL) { perm->mode = acl_obj->mode; crm_debug_3("Permission for element: element_mode=%s, tag=%s, id=%s", perm->mode, crm_element_name(xml), crm_element_value(xml, XML_ATTR_ID)); xml_child_iter( xml, child, update_xml_children_perms(child, perm->mode, xml_perms); ); } else { if (perm->attribute_perms == NULL || (g_hash_table_lookup_extended(perm->attribute_perms, acl_obj->attribute, NULL, NULL) == FALSE)) { if (perm->attribute_perms == NULL) { perm->attribute_perms = g_hash_table_new_full( crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); } g_hash_table_insert(perm->attribute_perms, crm_strdup(acl_obj->attribute), crm_strdup(acl_obj->mode)); crm_debug_3("Permission for attribute: attribute_mode=%s, tag=%s, id=%s attribute=%s", acl_obj->mode, crm_element_name(xml), crm_element_value(xml, XML_ATTR_ID), acl_obj->attribute); } } return TRUE; } static gboolean update_xml_children_perms(xmlNode *xml, const char *mode, GHashTable *xml_perms) { xml_perm_t *perm = NULL; if (g_hash_table_lookup_extended(xml_perms, xml, NULL, (gpointer)&perm)) { if (perm->mode != NULL) { return FALSE; } } else { crm_malloc0(perm, sizeof(xml_perm_t)); if (perm == NULL) { return FALSE; } g_hash_table_insert(xml_perms, xml, perm); } perm->mode = mode; crm_debug_4("Permission for child element: element_mode=%s, tag=%s, id=%s", mode, crm_element_name(xml), crm_element_value(xml, XML_ATTR_ID)); xml_child_iter( xml, child, update_xml_children_perms(child, mode, xml_perms); ); return TRUE; } static void free_xml_perm(gpointer xml_perm) { xml_perm_t *perm = xml_perm; if (perm == NULL) { return; } if (perm->attribute_perms != NULL) { g_hash_table_destroy(perm->attribute_perms); } crm_free(perm); } #define can_read(mode) (crm_str_eq(mode, XML_ACL_TAG_READ, TRUE) \ || crm_str_eq(mode, XML_ACL_TAG_WRITE, TRUE)) #define can_write(mode) crm_str_eq(mode, XML_ACL_TAG_WRITE, TRUE) /* rc = TRUE if the xml is filtered out*/ static gboolean acl_filter_xml(xmlNode *xml, GHashTable *xml_perms) { int children_counter = 0; xml_perm_t *perm = NULL; int allow_counter = 0; xml_child_iter( xml, child, if (acl_filter_xml(child, xml_perms) == FALSE) { children_counter++; } ); g_hash_table_lookup_extended(xml_perms, xml, NULL, (gpointer)&perm); if (perm == NULL) { crm_debug_4("No ACL defined to read the element: tag=%s, id=%s", crm_element_name(xml), crm_element_value(xml, XML_ATTR_ID)); goto end_filter; } if (perm->attribute_perms == NULL) { if (can_read(perm->mode)) { return FALSE; } else { crm_debug_4("No enough permission to read the element: element_mode=%s, tag=%s, id=%s", perm->mode, crm_element_name(xml), crm_element_value(xml, XML_ATTR_ID)); goto end_filter; } } xml_prop_iter(xml, prop_name, prop_value, gpointer mode = NULL; if (g_hash_table_lookup_extended(perm->attribute_perms, prop_name, NULL, &mode)) { if (can_read(mode)) { allow_counter++; } else { xml_remove_prop(xml, prop_name); crm_debug_4("Filtered out the attribute: attribute_mode=%s, tag=%s, id=%s, attribute=%s", (char *)mode, crm_element_name(xml), crm_element_value(xml, XML_ATTR_ID), prop_name); } } else { if (can_read(perm->mode)) { allow_counter++; } else if (crm_str_eq(prop_name, XML_ATTR_ID, TRUE) == FALSE) { xml_remove_prop(xml, prop_name); crm_debug_4("Filtered out the attribute: element_mode=%s, tag=%s, id=%s, attribute=%s", perm->mode, crm_element_name(xml), crm_element_value(xml, XML_ATTR_ID), prop_name); } } ); if (allow_counter) { return FALSE; } if (can_read(perm->mode)) { return FALSE; } end_filter: if (children_counter) { crm_debug_4("Don't filter out the element (tag=%s, id=%s) because user can read its children", crm_element_name(xml), crm_element_value(xml, XML_ATTR_ID)); return FALSE; } free_xml_from_parent(NULL, xml); crm_debug_4("Filtered out the element: tag=%s, id=%s", crm_element_name(xml), crm_element_value(xml, XML_ATTR_ID)); return TRUE; } static gboolean acl_check_diff_xml(xmlNode *xml, GHashTable *xml_perms) { xml_perm_t *perm = NULL; xml_child_iter( xml, child, if (acl_check_diff_xml(child, xml_perms) == FALSE) { return FALSE; } ); g_hash_table_lookup_extended(xml_perms, xml, NULL, (gpointer)&perm); xml_prop_iter(xml, prop_name, prop_value, gpointer mode = NULL; if (crm_str_eq(crm_element_name(xml), XML_TAG_CIB, TRUE)) { if (crm_str_eq(prop_name, XML_ATTR_GENERATION, TRUE) || crm_str_eq(prop_name, XML_ATTR_NUMUPDATES, TRUE) || crm_str_eq(prop_name, XML_ATTR_GENERATION_ADMIN, TRUE)) { continue; } } if (crm_str_eq(prop_name, XML_ATTR_ID, TRUE)) { continue; } if (crm_str_eq(prop_name, XML_DIFF_MARKER, TRUE) && xml_has_children(xml)) { continue; } if (perm == NULL) { crm_warn("No ACL defined to modify the element: tag=%s, id=%s, attribute=%s", crm_element_name(xml), crm_element_value(xml, XML_ATTR_ID), prop_name); return FALSE; } if (perm->attribute_perms == NULL) { if (can_write(perm->mode)) { return TRUE; } else { crm_warn("No enough permission to modify the element: element_mode=%s, tag=%s, id=%s, attribute=%s", perm->mode, crm_element_name(xml), crm_element_value(xml, XML_ATTR_ID), prop_name); return FALSE; } } if (g_hash_table_lookup_extended(perm->attribute_perms, prop_name, NULL, &mode)) { if (can_write(mode) == FALSE) { crm_warn("No enough permission to modify the attribute: attribute_mode=%s, tag=%s, id=%s, attribute=%s", (char *)mode, crm_element_name(xml), crm_element_value(xml, XML_ATTR_ID), prop_name); return FALSE; } } else if (can_write(perm->mode) == FALSE) { crm_warn("No enough permission to modify the element and the attribute: element_mode=%s, tag=%s, id=%s, attribute=%s", perm->mode, crm_element_name(xml), crm_element_value(xml, XML_ATTR_ID), prop_name); return FALSE; } ); return TRUE; } diff --git a/lib/plugins/lrm/raexecstonith.c b/lib/plugins/lrm/raexecstonith.c index 5528c29d16..04caeffdf7 100644 --- a/lib/plugins/lrm/raexecstonith.c +++ b/lib/plugins/lrm/raexecstonith.c @@ -1,270 +1,270 @@ /* * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * * File: raexecocf.c * Author: Sun Jiang Dong * Copyright (c) 2004 International Business Machines * * This code implements the Resource Agent Plugin Module for LSB style. * It's a part of Local Resource Manager. Currently it's used by lrmd only. */ #include #include #include #include #include #include #include #include #include #include #include #if HAVE_HB_CONFIG_H #include #endif #if HAVE_GLUE_CONFIG_H #include #endif #include #include #include #include #include /* Add it for compiling on OSX */ #include #include #include #include # define PIL_PLUGINTYPE RA_EXEC_TYPE # define PIL_PLUGINTYPE_S "RAExec" # define PIL_PLUGINLICENSE LICENSE_PUBDOM # define PIL_PLUGINLICENSEURL URL_PUBDOM # define PIL_PLUGIN stonith # define PIL_PLUGIN_S "stonith" static PIL_rc close_stonithRA(PILInterface*, void* ud_interface); /* The begin of exported function list */ static int execra(const char * rsc_id, const char * rsc_type, const char * provider, const char * op_type, const int timeout, GHashTable * params); static uniform_ret_execra_t map_ra_retvalue(int ret_execra , const char * op_type, const char * std_output); static int get_resource_list(GList ** rsc_info); static char* get_resource_meta(const char* rsc_type, const char* provider); static int get_provider_list(const char* op_type, GList ** providers); /* The end of exported function list */ /* Rource agent execution plugin operations */ static struct RAExecOps raops = { execra, map_ra_retvalue, get_resource_list, get_provider_list, get_resource_meta }; PIL_PLUGIN_BOILERPLATE2("1.0", Debug); static const PILPluginImports* PluginImports; static PILPlugin* OurPlugin; static PILInterface* OurInterface; static void* OurImports; static void* interfprivate; /* * Our plugin initialization and registration function * It gets called when the plugin gets loaded. */ PIL_rc PIL_PLUGIN_INIT(PILPlugin * us, const PILPluginImports* imports); PIL_rc PIL_PLUGIN_INIT(PILPlugin * us, const PILPluginImports* imports) { /* Force the compiler to do a little type checking */ (void)(PILPluginInitFun)PIL_PLUGIN_INIT; PluginImports = imports; OurPlugin = us; /* Register ourself as a plugin */ imports->register_plugin(us, &OurPIExports); /* Register our interfaces */ return imports->register_interface(us, PIL_PLUGINTYPE_S, PIL_PLUGIN_S, &raops, close_stonithRA, &OurInterface, &OurImports, interfprivate); } static PIL_rc close_stonithRA(PILInterface* pif, void* ud_interface) { return PIL_OK; } static int execra(const char *rsc_id, const char *rsc_type, const char *provider, const char *op_type, const int timeout, GHashTable *params) { int rc = 0; stonith_key_value_t *device_params = NULL; stonith_t *stonith_api = NULL; provider = get_stonith_provider(rsc_type, provider); crm_log_init("lrm-stonith", LOG_INFO, FALSE, FALSE, 0, NULL); if ( 0 == STRNCMP_CONST(op_type, "meta-data")) { char *meta = get_resource_meta(rsc_type, provider); printf("%s", meta); free(meta); exit(0); } stonith_api = stonith_api_new(); rc = stonith_api->cmds->connect(stonith_api, "lrmd", NULL); if(provider == NULL) { crm_err("No such legacy stonith device: %s", rsc_type); rc = st_err_unknown_device; } else if ( 0 == STRNCMP_CONST(op_type, "monitor") ) { rc = stonith_api->cmds->call( stonith_api, st_opt_sync_call, rsc_id, op_type, NULL, timeout); } else if ( 0 == STRNCMP_CONST(op_type, "start") ) { char *key = NULL; char *value = NULL; GHashTableIter iter; const char *agent = rsc_type; if(0 == STRNCMP_CONST(provider, "heartbeat")) { agent = "fence_legacy"; g_hash_table_replace(params, strdup("plugin"), strdup(rsc_type)); } g_hash_table_iter_init( &iter, params ); while (g_hash_table_iter_next (&iter, (gpointer *)&key, (gpointer *)&value)) { device_params = stonith_key_value_add(device_params, key, value); } rc = stonith_api->cmds->register_device( stonith_api, st_opt_sync_call, rsc_id, provider, agent, device_params); } else if ( 0 == STRNCMP_CONST(op_type, "stop") ) { rc = stonith_api->cmds->remove_device( stonith_api, st_opt_sync_call, rsc_id); } crm_debug("%s_%s returned %d", rsc_id, op_type, rc); stonith_api->cmds->disconnect(stonith_api); stonith_api_delete(stonith_api); /* cl_log(LOG_DEBUG, "stonithRA orignal exit code=%d", exit_value); */ exit(map_ra_retvalue(rc, op_type, NULL)); } static uniform_ret_execra_t map_ra_retvalue(int rc, const char * op_type, const char * std_output) { if(rc == st_err_unknown_device) { if ( 0 == STRNCMP_CONST(op_type, "stop") ) { rc = 0; } else if ( 0 == STRNCMP_CONST(op_type, "start") ) { rc = 5; } else { rc = 7; } } else if (rc < 0 || rc > EXECRA_STATUS_UNKNOWN) { crm_warn("Mapped the invalid return code %d.", rc); rc = EXECRA_UNKNOWN_ERROR; } return rc; } static int get_resource_list(GList ** rsc_info) { stonith_t *stonith_api = NULL; stonith_key_value_t *devices = NULL; stonith_key_value_t *dIter = NULL; if ( rsc_info == NULL ) { crm_err("Parameter error: get_resource_list"); return -2; } stonith_api = stonith_api_new(); stonith_api->cmds->list(stonith_api, st_opt_sync_call, NULL, &devices, 0); stonith_api_delete(stonith_api); for(dIter = devices; dIter; dIter = dIter->next) { *rsc_info = g_list_append(*rsc_info, dIter->value); } stonith_key_value_freeall(devices, 1, 0); return 0; } static int get_provider_list(const char* op_type, GList ** providers) { if(providers == NULL) { return -1; } if(op_type == NULL) { *providers = g_list_append(*providers, g_strdup("redhat")); *providers = g_list_append(*providers, g_strdup("heartbeat")); return 2; } else { const char *provider = get_stonith_provider(op_type, NULL); if(provider) { *providers = g_list_append(*providers, g_strdup(provider)); return 1; } } return 0; } static char * get_resource_meta(const char* rsc_type, const char* provider) { char *buffer = NULL; stonith_t *stonith_api = stonith_api_new(); stonith_api->cmds->metadata( stonith_api, st_opt_sync_call, rsc_type, provider, &buffer, 0); stonith_api_delete(stonith_api); crm_debug("stonithRA plugin: got metadata: %s", buffer); /* TODO: Convert to XML and ensure our standard actions exist */ return buffer; } diff --git a/pacemaker.spec.in b/pacemaker.spec.in index 58db7a3a73..2e2723fc8d 100644 --- a/pacemaker.spec.in +++ b/pacemaker.spec.in @@ -1,702 +1,709 @@ %global gname haclient %global uname hacluster %global pcmk_docdir %{_docdir}/%{name} %global specversion 0 %global upstream_version tip %global upstream_prefix pacemaker # Compatibility macros for distros (fedora) that don't provide Python macros by default # Do this instead of trying to conditionally include {_rpmconfigdir}/macros.python %{!?py_ver: %{expand: %%global py_ver %%(echo `python -c "import sys; print sys.version[:3]"`)}} %{!?py_prefix: %{expand: %%global py_prefix %%(echo `python -c "import sys; print sys.prefix"`)}} %{!?py_libdir: %{expand: %%global py_libdir %%{expand:%%%%{py_prefix}/%%%%{_lib}/python%%%%{py_ver}}}} %{!?py_sitedir: %{expand: %%global py_sitedir %%{expand:%%%%{py_libdir}/site-packages}}} # Compatibility macro wrappers for legacy RPM versions that do not # support conditional builds %{!?bcond_without: %{expand: %%global bcond_without() %%{expand:%%%%{!?_without_%%{1}:%%%%global with_%%{1} 1}}}} %{!?bcond_with: %{expand: %%global bcond_with() %%{expand:%%%%{?_with_%%{1}:%%%%global with_%%{1} 1}}}} %{!?with: %{expand: %%global with() %%{expand:%%%%{?with_%%{1}:1}%%%%{!?with_%%{1}:0}}}} %{!?without: %{expand: %%global without() %%{expand:%%%%{?with_%%{1}:0}%%%%{!?with_%%{1}:1}}}} # Conditionals # Invoke "rpmbuild --without " or "rpmbuild --with " # to disable or enable specific features # Supported cluster stacks, must support at least one %bcond_with cman %bcond_without doc %bcond_without ais %bcond_without heartbeat # ESMTP is not available in RHEL, only in EPEL. Allow people to build # the RPM without ESMTP in case they choose not to use EPEL packages %bcond_without esmtp %bcond_without snmp # Build with/without support for profiling tools %bcond_with profiling %bcond_with gcov # Support additional trace logging %bcond_without tracedata # We generate some docs using Publican, but its not available everywhere %bcond_without publican # Use a different versioning scheme %bcond_with pre_release %if %{with profiling} # This disables -debuginfo package creation and also the stripping binaries/libraries # Useful if you want sane profiling data %global debug_package %{nil} %endif %if %{with pre_release} %global pcmk_release 0.%{specversion}.%{upstream_version}.hg %else %global pcmk_release %{specversion} %endif Name: pacemaker Summary: Scalable High-Availability cluster resource manager Version: 1.1.6 Release: %{pcmk_release}%{?dist} License: GPLv2+ and LGPLv2+ Url: http://www.clusterlabs.org Group: System Environment/Daemons Source0: pacemaker-%{upstream_version}.tar.bz2 BuildRoot: %(mktemp -ud %{_tmppath}/%{name}-%{version}-%{release}-XXXXXX) AutoReqProv: on Requires(pre): cluster-glue Requires: resource-agents Requires: python >= 2.4 Conflicts: heartbeat < 2.99 %if 0%{?fedora} || 0%{?centos} > 4 || 0%{?rhel} > 4 Requires: perl(:MODULE_COMPAT_%(eval "`%{__perl} -V:version`"; echo $version)) -BuildRequires: help2man libtool-ltdl-devel +BuildRequires: libtool-ltdl-devel %endif %if 0%{?suse_version} # net-snmp-devel on SLES10 does not suck in tcpd-devel automatically -BuildRequires: help2man tcpd-devel +BuildRequires: tcpd-devel # Suse splits this off into a separate package Requires: python-curses python-xml BuildRequires: python-curses python-xml %endif # Required for core functionality BuildRequires: automake autoconf libtool pkgconfig python BuildRequires: glib2-devel cluster-glue-libs-devel libxml2-devel libxslt-devel BuildRequires: pkgconfig python-devel gcc-c++ bzip2-devel gnutls-devel pam-devel # Enables optional functionality BuildRequires: ncurses-devel openssl-devel libselinux-devel docbook-style-xsl resource-agents %ifarch alpha %{ix86} x86_64 BuildRequires: lm_sensors-devel %endif %if %{with cman} BuildRequires: clusterlib-devel %endif %if %{with esmtp} BuildRequires: libesmtp-devel %endif %if %{with snmp} BuildRequires: net-snmp-devel %endif %if %{with ais} # Do not require corosync, the admin should select which stack to use and install it BuildRequires: corosynclib-devel %endif %if %{with heartbeat} # Do not require heartbeat, the admin should select which stack to use and install it BuildRequires: heartbeat-devel heartbeat-libs >= 3.0.0 %endif %if %{with doc} BuildRequires: asciidoc help2man %if %{with publican} BuildRequires: publican inkscape %endif %endif %description Pacemaker is an advanced, scalable High-Availability cluster resource manager for Linux-HA (Heartbeat) and/or OpenAIS. It supports "n-node" clusters with significant capabilities for managing resources and dependencies. It will run scripts at initialization, when machines go up or down, when related resources fail and can be configured to periodically check resource health. Available rpmbuild rebuild options: --without : heartbeat ais snmp esmtp publican %package cli License: GPLv2+ and LGPLv2+ Summary: Command line tools for controlling the Pacemaker cluster resource manager Group: System Environment/Daemons Requires: %{name}-libs = %{version}-%{release} %description cli Pacemaker is an advanced, scalable High-Availability cluster resource manager for Linux-HA (Heartbeat) and/or OpenAIS. It supports "n-node" clusters with significant capabilities for managing resources and dependencies. It will run scripts at initialization, when machines go up or down, when related resources fail and can be configured to periodically check resource health. %package -n %{name}-libs License: GPLv2+ and LGPLv2+ Summary: Libraries used by the Pacemaker cluster resource manager and its clients Group: System Environment/Daemons %description -n %{name}-libs Pacemaker is an advanced, scalable High-Availability cluster resource manager for Linux-HA (Heartbeat) and/or OpenAIS. It supports "n-node" clusters with significant capabilities for managing resources and dependencies. It will run scripts at initialization, when machines go up or down, when related resources fail and can be configured to periodically check resource health. %package -n %{name}-libs-devel License: GPLv2+ and LGPLv2+ Summary: Pacemaker development package Group: Development/Libraries Requires: %{name}-libs = %{version}-%{release} Requires: cluster-glue-libs-devel libtool-ltdl-devel Requires: libxml2-devel libxslt-devel bzip2-devel glib2-devel %if %{with ais} Requires: corosynclib-devel %endif %if %{with heartbeat} Requires: heartbeat-devel %endif %description -n %{name}-libs-devel Headers and shared libraries for developing tools for Pacemaker. Pacemaker is an advanced, scalable High-Availability cluster resource manager for Linux-HA (Heartbeat) and/or OpenAIS. It supports "n-node" clusters with significant capabilities for managing resources and dependencies. It will run scripts at initialization, when machines go up or down, when related resources fail and can be configured to periodically check resource health. %package cts License: GPLv2+ and LGPLv2+ Summary: Test framework for cluster-related technologies like Pacemaker Group: System Environment/Daemons Requires: python %description cts Test framework for cluster-related technologies like Pacemaker %package doc License: GPLv2+ and LGPLv2+ Summary: Documentation for Pacemaker Group: Documentation %description doc Documentation for Pacemaker. Pacemaker is an advanced, scalable High-Availability cluster resource manager for Linux-HA (Heartbeat) and/or OpenAIS. It supports "n-node" clusters with significant capabilities for managing resources and dependencies. It will run scripts at initialization, when machines go up or down, when related resources fail and can be configured to periodically check resource health. %prep %setup -q -n %{upstream_prefix}-%{upstream_version} # Force the local time # # 'hg archive' sets the file date to the date of the last commit. # This can result in files having been created in the future # when building on machines in timezones 'behind' the one the # commit occurred in - which seriously confuses 'make' find . -exec touch \{\} \; %build ./autogen.sh %if %{with snmp} eval `objdump --headers --private-headers /usr/bin/perl | grep RPATH | awk '{print "export LD_LIBRARY_PATH="$2}'` %endif # RHEL <= 5 does not support --docdir docdir=%{pcmk_docdir} %{configure} \ %{!?with_heartbeat: --without-heartbeat} \ %{!?with_ais: --without-ais} \ %{!?with_esmtp: --without-esmtp} \ %{!?with_snmp: --without-snmp} \ %{?with_cman: --with-cman} \ %{?with_profiling: --with-profiling} \ %{?with_gcov: --with-gcov} \ %{?with_tracedata: --with-tracedata} \ --with-initdir=%{_initrddir} \ --localstatedir=%{_var} \ --with-version=%{version}-%{release} \ --enable-fatal-warnings=no make %{_smp_mflags} docdir=%{pcmk_docdir} %install rm -rf %{buildroot} make DESTDIR=%{buildroot} docdir=%{pcmk_docdir} install mkdir -p ${RPM_BUILD_ROOT}%{_sysconfdir}/sysconfig install -m 644 mcp/pacemaker.sysconfig ${RPM_BUILD_ROOT}%{_sysconfdir}/sysconfig/pacemaker # Scripts that need should be executable chmod a+x %{buildroot}/%{_libdir}/heartbeat/hb2openais-helper.py chmod a+x %{buildroot}/%{_datadir}/pacemaker/tests/cts/CTSlab.py chmod a+x %{buildroot}/%{_datadir}/pacemaker/tests/cts/extracttests.py # These are not actually scripts find %{buildroot} -name '*.xml' -type f -print0 | xargs -0 chmod a-x find %{buildroot} -name '*.xsl' -type f -print0 | xargs -0 chmod a-x find %{buildroot} -name '*.rng' -type f -print0 | xargs -0 chmod a-x find %{buildroot} -name '*.dtd' -type f -print0 | xargs -0 chmod a-x # Dont package static libs find %{buildroot} -name '*.a' -type f -print0 | xargs -0 rm -f find %{buildroot} -name '*.la' -type f -print0 | xargs -0 rm -f # Do not package these either rm -f %{buildroot}/%{_libdir}/heartbeat/crm_primitive.* rm -f %{buildroot}/%{_libdir}/heartbeat/atest rm -f %{buildroot}/%{_libdir}/service_crm.so %if %{with gcov} GCOV_BASE=%{buildroot}/%{_var}/lib/pacemaker/gcov mkdir -p $GCOV_BASE find . -name '*.gcno' -type f | while read F ; do D=`dirname $F` mkdir -p ${GCOV_BASE}/$D cp $F ${GCOV_BASE}/$D done %endif %clean rm -rf %{buildroot} %post /sbin/chkconfig --add pacemaker || : %preun if [ $1 -eq 0 ]; then /sbin/service pacemaker stop &>/dev/null || : /sbin/chkconfig --del pacemaker || : fi %post -n %{name}-libs -p /sbin/ldconfig %postun -n %{name}-libs -p /sbin/ldconfig %files ########################################################### %defattr(-,root,root) %exclude %{_datadir}/pacemaker/tests %config(noreplace) %{_sysconfdir}/sysconfig/pacemaker %{_sbindir}/pacemakerd %{_initrddir}/pacemaker %if %{defined _unitdir} %{_unitdir}/pacemaker.service %endif %{_datadir}/pacemaker %{_datadir}/snmp/mibs/PCMK-MIB.txt %{_libdir}/heartbeat/* %{_sbindir}/crm_attribute %{_sbindir}/crm_master %{_sbindir}/attrd_updater %{_sbindir}/fence_legacy %{_sbindir}/stonith_admin %doc COPYING %doc AUTHORS %doc ChangeLog %dir %attr (750, %{uname}, %{gname}) %{_var}/lib/heartbeat/crm %dir %attr (750, %{uname}, %{gname}) %{_var}/lib/pengine %dir %attr (750, %{uname}, %{gname}) %{_var}/run/crm %dir /usr/lib/ocf %dir /usr/lib/ocf/resource.d /usr/lib/ocf/resource.d/pacemaker %if %{with ais} %{_libexecdir}/lcrso/pacemaker.lcrso %endif %files cli +%defattr(-,root,root) %{_sbindir}/cibadmin %{_sbindir}/crm_diff %{_sbindir}/crm_failcount %{_sbindir}/crm_mon %{_sbindir}/crm %{_sbindir}/crm_resource %{_sbindir}/crm_standby %{_sbindir}/crm_verify %{_sbindir}/crmadmin %{_sbindir}/iso8601 %{_sbindir}/ptest %{_sbindir}/crm_shadow %{_sbindir}/cibpipe %{_sbindir}/crm_node %{_sbindir}/crm_simulate %{_sbindir}/crm_report %{_sbindir}/crm_ticket %{py_sitedir}/crm -%doc %{_mandir} +%if %{with doc} +%doc %{_mandir}/man7/* +%doc %{_mandir}/man8/* +%endif %if %{with heartbeat} %{_sbindir}/crm_uuid %else %exclude %{_sbindir}/crm_uuid %endif %doc COPYING %doc AUTHORS %doc ChangeLog %files -n %{name}-libs %defattr(-,root,root) %{_libdir}/libcib.so.* %{_libdir}/libcrmcommon.so.* %{_libdir}/libcrmcluster.so.* %{_libdir}/libpe_status.so.* %{_libdir}/libpe_rules.so.* %{_libdir}/libpengine.so.* %{_libdir}/libtransitioner.so.* %{_libdir}/libstonithd.so.* %doc COPYING.LIB %doc AUTHORS %files doc %defattr(-,root,root) %doc %{pcmk_docdir} %files cts %defattr(-,root,root) %{py_sitedir}/cts %{_datadir}/pacemaker/tests/cts %doc COPYING.LIB %doc AUTHORS %files -n %{name}-libs-devel %defattr(-,root,root) %exclude %{_datadir}/pacemaker/tests/cts %{_datadir}/pacemaker/tests %{_includedir}/pacemaker %{_libdir}/*.so %if %{with gcov} %{_var}/lib/pacemaker %endif %{_libdir}/pkgconfig/*.pc %doc COPYING.LIB %doc AUTHORS %changelog +* Wed Aug 24 2011 Andrew Beekhof 1.1.6-1 +- Pre-release + * Fri Feb 11 2011 Andrew Beekhof 1.1.5-1 - Update source tarball to revision: baad6636a053 - Statistics: Changesets: 184 Diff: 605 files changed, 46103 insertions(+), 26417 deletions(-) - See included ChangeLog file or http://hg.clusterlabs.org/pacemaker/1.1/file/tip/ChangeLog for details * Wed Oct 20 2010 Andrew Beekhof 1.1.4-1 - Moved all the interesting parts of the changelog into a separate file as per the Fedora policy :-/ - Update source tarball to revision: 75406c3eb2c1 tip - Significant performance enhancements to the Policy Engine and CIB - Statistics: Changesets: 169 Diff: 772 files changed, 56172 insertions(+), 39309 deletions(-) - See included ChangeLog file or http://hg.clusterlabs.org/pacemaker/1.1/file/tip/ChangeLog for details * Tue Sep 21 2010 Andrew Beekhof 1.1.3-1 - Update source tarball to revision: e3bb31c56244 tip - Statistics: Changesets: 352 Diff: 481 files changed, 14130 insertions(+), 11156 deletions(-) * Wed May 12 2010 Andrew Beekhof 1.1.2-1 - Update source tarball to revision: c25c972a25cc tip - Statistics: Changesets: 339 Diff: 708 files changed, 37918 insertions(+), 10584 deletions(-) * Tue Feb 16 2010 Andrew Beekhof - 1.1.1-1 - First public release of Pacemaker 1.1 - Package reference documentation in a doc subpackage - Move cts into a subpackage so that it can be easily consumed by others - Update source tarball to revision: 17d9cd4ee29f + New stonith daemon that supports global notifications + Service placement influenced by the physical resources + A new tool for simulating failures and the cluster’s reaction to them + Ability to serialize an otherwise unrelated a set of resource actions (eg. Xen migrations) * Wed Feb 10 2010 Andrew Beekhof - 1.0.7-4 - Rebuild for heartbeat 3.0.2-2 * Wed Feb 10 2010 Andrew Beekhof - 1.0.7-3 - Rebuild for cluster-glue 1.0.3 * Tue Jan 19 2010 Andrew Beekhof - 1.0.7-2 - Rebuild for corosync 1.2.0 * Mon Jan 18 2010 Andrew Beekhof - 1.0.7-1 - Update source tarball to revision: 2eed906f43e9 (stable-1.0) tip - Statistics: Changesets: 193 Diff: 220 files changed, 15933 insertions(+), 8782 deletions(-) * Thu Oct 29 2009 Andrew Beekhof - 1.0.5-4 - Include the fixes from CoroSync integration testing - Move the resource templates - they are not documentation - Ensure documentation is placed in a standard location - Exclude documentation that is included elsewhere in the package - Update the tarball from upstream to version ee19d8e83c2a + High: cib: Correctly clean up when both plaintext and tls remote ports are requested + High: PE: Bug bnc#515172 - Provide better defaults for lt(e) and gt(e) comparisions + High: PE: Bug lf#2197 - Allow master instances placemaker to be influenced by colocation constraints + High: PE: Make sure promote/demote pseudo actions are created correctly + High: PE: Prevent target-role from promoting more than master-max instances + High: ais: Bug lf#2199 - Prevent expected-quorum-votes from being populated with garbage + High: ais: Prevent deadlock - dont try to release IPC message if the connection failed + High: cib: For validation errors, send back the full CIB so the client can display the errors + High: cib: Prevent use-after-free for remote plaintext connections + High: crmd: Bug lf#2201 - Prevent use-of-NULL when running heartbeat * Wed Oct 13 2009 Andrew Beekhof - 1.0.5-3 - Update the tarball from upstream to version 38cd629e5c3c + High: Core: Bug lf#2169 - Allow dtd/schema validation to be disabled + High: PE: Bug lf#2106 - Not all anonymous clone children are restarted after configuration change + High: PE: Bug lf#2170 - stop-all-resources option had no effect + High: PE: Bug lf#2171 - Prevent groups from starting if they depend on a complex resource which can not + High: PE: Disable resource management if stonith-enabled=true and no stonith resources are defined + High: PE: do not include master score if it would prevent allocation + High: ais: Avoid excessive load by checking for dead children every 1s (instead of 100ms) + High: ais: Bug rh#525589 - Prevent shutdown deadlocks when running on CoroSync + High: ais: Gracefully handle changes to the AIS nodeid + High: crmd: Bug bnc#527530 - Wait for the transition to complete before leaving S_TRANSITION_ENGINE + High: crmd: Prevent use-after-free with LOG_DEBUG_3 + Medium: xml: Mask the "symmetrical" attribute on rsc_colocation constraints (bnc#540672) + Medium (bnc#520707): Tools: crm: new templates ocfs2 and clvm + Medium: Build: Invert the disable ais/heartbeat logic so that --without (ais|heartbeat) is available to rpmbuild + Medium: PE: Bug lf#2178 - Indicate unmanaged clones + Medium: PE: Bug lf#2180 - Include node information for all failed ops + Medium: PE: Bug lf#2189 - Incorrect error message when unpacking simple ordering constraint + Medium: PE: Correctly log resources that would like to start but can not + Medium: PE: Stop ptest from logging to syslog + Medium: ais: Include version details in plugin name + Medium: crmd: Requery the resource metadata after every start operation * Fri Aug 21 2009 Tomas Mraz - 1.0.5-2.1 - rebuilt with new openssl * Wed Aug 19 2009 Andrew Beekhof - 1.0.5-2 - Add versioned perl dependency as specified by https://fedoraproject.org/wiki/Packaging/Perl#Packages_that_link_to_libperl - No longer remove RPATH data, it prevents us finding libperl.so and no other libraries were being hardcoded - Compile in support for heartbeat - Conditionally add heartbeat-devel and corosynclib-devel to the -devel requirements depending on which stacks are supported * Mon Aug 17 2009 Andrew Beekhof - 1.0.5-1 - Add dependency on resource-agents - Use the version of the configure macro that supplies --prefix, --libdir, etc - Update the tarball from upstream to version 462f1569a437 (Pacemaker 1.0.5 final) + High: Tools: crm_resource - Advertise --move instead of --migrate + Medium: Extra: New node connectivity RA that uses system ping and attrd_updater + Medium: crmd: Note that dc-deadtime can be used to mask the brokeness of some switches * Tue Aug 11 2009 Ville Skyttä - 1.0.5-0.7.c9120a53a6ae.hg - Use bzipped upstream tarball. * Wed Jul 29 2009 Andrew Beekhof - 1.0.5-0.6.c9120a53a6ae.hg - Add back missing build auto* dependancies - Minor cleanups to the install directive * Tue Jul 28 2009 Andrew Beekhof - 1.0.5-0.5.c9120a53a6ae.hg - Add a leading zero to the revision when alphatag is used * Tue Jul 28 2009 Andrew Beekhof - 1.0.5-0.4.c9120a53a6ae.hg - Incorporate the feedback from the cluster-glue review - Realistically, the version is a 1.0.5 pre-release - Use the global directive instead of define for variables - Use the haclient/hacluster group/user instead of daemon - Use the _configure macro - Fix install dependancies * Fri Jul 24 2009 Andrew Beekhof - 1.0.4-3 - Initial Fedora checkin - Include an AUTHORS and license file in each package - Change the library package name to pacemaker-libs to be more Fedora compliant - Remove execute permissions from xml related files - Reference the new cluster-glue devel package name - Update the tarball from upstream to version c9120a53a6ae + High: PE: Only prevent migration if the clone dependency is stopping/starting on the target node + High: PE: Bug 2160 - Dont shuffle clones due to colocation + High: PE: New implementation of the resource migration (not stop/start) logic + Medium: Tools: crm_resource - Prevent use-of-NULL by requiring a resource name for the -A and -a options + Medium: PE: Prevent use-of-NULL in find_first_action() * Tue Jul 14 2009 Andrew Beekhof - 1.0.4-2 - Reference authors from the project AUTHORS file instead of listing in description - Change Source0 to reference the Mercurial repo - Cleaned up the summaries and descriptions - Incorporate the results of Fedora package self-review * Thu Jun 04 2009 Andrew Beekhof - 1.0.4-1 - Update source tarball to revision: 1d87d3e0fc7f (stable-1.0) - Statistics: Changesets: 209 Diff: 266 files changed, 12010 insertions(+), 8276 deletions(-) * Wed Apr 08 2009 Andrew Beekhof - 1.0.3-1 - Update source tarball to revision: b133b3f19797 (stable-1.0) tip - Statistics: Changesets: 383 Diff: 329 files changed, 15471 insertions(+), 15119 deletions(-) * Mon Feb 16 2009 Andrew Beekhof - 1.0.2-1 - Update source tarball to revision: d232d19daeb9 (stable-1.0) tip - Statistics: Changesets: 441 Diff: 639 files changed, 20871 insertions(+), 21594 deletions(-) * Tue Nov 18 2008 Andrew Beekhof - 1.0.1-1 - Update source tarball to revision: 6fc5ce8302ab (stable-1.0) tip - Statistics: Changesets: 170 Diff: 816 files changed, 7633 insertions(+), 6286 deletions(-) * Thu Oct 16 2008 Andrew Beekhof - 1.0.0-1 - Update source tarball to revision: 388654dfef8f tip - Statistics: Changesets: 261 Diff: 3021 files changed, 244985 insertions(+), 111596 deletions(-) * Mon Sep 22 2008 Andrew Beekhof - 0.7.3-1 - Update source tarball to revision: 33e677ab7764+ tip - Statistics: Changesets: 133 Diff: 89 files changed, 7492 insertions(+), 1125 deletions(-) * Wed Aug 20 2008 Andrew Beekhof - 0.7.1-1 - Update source tarball to revision: f805e1b30103+ tip - Statistics: Changesets: 184 Diff: 513 files changed, 43408 insertions(+), 43783 deletions(-) * Fri Jul 18 2008 Andrew Beekhof - 0.7.0-19 - Update source tarball to revision: 007c3a1c50f5 (unstable) tip - Statistics: Changesets: 108 Diff: 216 files changed, 4632 insertions(+), 4173 deletions(-) * Wed Jun 25 2008 Andrew Beekhof - 0.7.0-1 - Update source tarball to revision: bde0c7db74fb tip - Statistics: Changesets: 439 Diff: 676 files changed, 41310 insertions(+), 52071 deletions(-) * Thu Jun 19 2008 Andrew Beekhof - 0.6.5-1 - Update source tarball to revision: b9fe723d1ac5 tip - Statistics: Changesets: 48 Diff: 37 files changed, 1204 insertions(+), 234 deletions(-) * Thu May 22 2008 Andrew Beekhof - 0.6.4-1 - Update source tarball to revision: 226d8e356924 tip - Statistics: Changesets: 55 Diff: 199 files changed, 7103 insertions(+), 12378 deletions(-) * Wed Apr 23 2008 Andrew Beekhof - 0.6.3-1 - Update source tarball to revision: fd8904c9bc67 tip - Statistics: Changesets: 117 Diff: 354 files changed, 19094 insertions(+), 11338 deletions(-) * Thu Feb 14 2008 Andrew Beekhof - 0.6.2-1 - Update source tarball to revision: 28b1a8c1868b tip - Statistics: Changesets: 11 Diff: 7 files changed, 58 insertions(+), 18 deletions(-) * Tue Feb 12 2008 Andrew Beekhof - 0.6.1-1 - Update source tarball to revision: e7152d1be933 tip - Statistics: Changesets: 25 Diff: 37 files changed, 1323 insertions(+), 227 deletions(-) * Mon Jan 14 2008 Andrew Beekhof - 0.6.0-2 - This is the first release of the Pacemaker Cluster Resource Manager formerly part of Heartbeat. - For those looking for the GUI, mgmtd, CIM or TSA components, they are now found in the new pacemaker-pygui project. Build dependancies prevent them from being included in Heartbeat (since the built-in CRM is no longer supported) and, being non-core components, are not included with Pacemaker. - Update source tarball to revision: c94b92d550cf - Statistics: Changesets: 347 Diff: 2272 files changed, 132508 insertions(+), 305991 deletions(-) - Test hardware: + 6-node vmware cluster (sles10-sp1/256Mb/vmware stonith) on a single host (opensuse10.3/2Gb/2.66Ghz Quad Core2) + 7-node EMC Centera cluster (sles10/512Mb/2Ghz Xeon/ssh stonith) - Notes: Heartbeat Stack + All testing was performed with STONITH enabled + The CRM was enabled using the "crm respawn" directive - Notes: OpenAIS Stack + This release contains a preview of support for the OpenAIS cluster stack + The current release of the OpenAIS project is missing two important patches that we require. OpenAIS packages containing these patches are available for most major distributions at: http://download.opensuse.org/repositories/server:/ha-clustering + The OpenAIS stack is not currently recommended for use in clusters that have shared data as STONITH support is not yet implimented + pingd is not yet available for use with the OpenAIS stack + 3 significant OpenAIS issues were found during testing of 4 and 6 node clusters. We are activly working together with the OpenAIS project to get these resolved. - Pending bugs encountered during testing: + OpenAIS #1736 - Openais membership took 20s to stabilize + Heartbeat #1750 - ipc_bufpool_update: magic number in head does not match + OpenAIS #1793 - Assertion failure in memb_state_gather_enter() + OpenAIS #1796 - Cluster message corruption * Mon Dec 10 2007 Andrew Beekhof - 0.6.0-1 - Initial opensuse package check-in diff --git a/replace/NoSuchFunctionName.c b/replace/NoSuchFunctionName.c index 373eabdeeb..45717d5035 100644 --- a/replace/NoSuchFunctionName.c +++ b/replace/NoSuchFunctionName.c @@ -1,31 +1,31 @@ /* * Copyright (C) 2002 Alan Robertson * This software licensed under the GNU LGPL. * * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * */ void nosuchfunctionname(void); /* * This is a completely useless function put here only to make OpenBSD make * procedures happy. I hope no one ever makes such a function ;-) */ void nosuchfunctionname(void) { return; } diff --git a/replace/setenv.c b/replace/setenv.c index 0cb023b30e..8205013782 100644 --- a/replace/setenv.c +++ b/replace/setenv.c @@ -1,50 +1,50 @@ /* * Copyright (C) 2001 Alan Robertson * This software licensed under the GNU LGPL. * * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * */ #include #include #include /* * Small replacement function for setenv() */ int setenv(const char *name, const char * value, int why) { int rc = -1; if ( name && value ) { char * envp = NULL; envp = malloc(strlen(name)+strlen(value)+2); if (envp) { /* * Unfortunately, the putenv API guarantees memory leaks when * changing environment variables repeatedly... :-( */ sprintf(envp, "%s=%s", name, value); /* Cannot free envp (!) */ rc = putenv(envp); } } return(rc); } diff --git a/replace/strerror.c b/replace/strerror.c index 0d996feea2..b686dbe5a6 100644 --- a/replace/strerror.c +++ b/replace/strerror.c @@ -1,37 +1,37 @@ /* * Copyright (C) 2002 Alan Robertson * This software licensed under the GNU LGPL. * * This library is free software; you can redistribute it and/or * modify it under the terms of version 2.1 of the GNU Lesser General Public * License as published by the Free Software Foundation. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * */ #include #include #include extern const char * sys_err[]; extern int sys_nerr; char * strerror(int errnum) { static char whaterr[32]; if (errnum < 0) { return "negative errno"; } if (errnum >= sys_nerr) { snprintf(whaterr, sizeof(whaterr),"error %d", errnum); return whaterr; } return sys_err[sys_nerr]; } diff --git a/replace/strlcat.c b/replace/strlcat.c index dbd80880f8..3ebb8df937 100644 --- a/replace/strlcat.c +++ b/replace/strlcat.c @@ -1,33 +1,33 @@ #include #include /* * Copyright (C) 2007 Alan Robertson * This software licensed under the GNU LGPL. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * */ size_t strlcat(char *dest, const char * src, size_t maxlen) { size_t curlen = strlen(dest); size_t addlen = strlen(src); size_t appendlen = (maxlen-1) - curlen; if (appendlen > 0) { strlcpy(dest+curlen, src, maxlen-curlen); } return curlen + addlen; } diff --git a/replace/strlcpy.c b/replace/strlcpy.c index 38a9916a1f..27128fa4c7 100644 --- a/replace/strlcpy.c +++ b/replace/strlcpy.c @@ -1,32 +1,32 @@ #include #include /* * Copyright (C) 2007 Alan Robertson * This software licensed under the GNU LGPL. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * */ size_t strlcpy(char *dest, const char * src, size_t maxlen) { size_t srclen = strlen(src); if (maxlen > 0) { strncpy(dest, src, maxlen); dest[maxlen-1]=EOS; } return srclen; } diff --git a/replace/strndup.c b/replace/strndup.c index efbb3ff751..6d6a06fe70 100644 --- a/replace/strndup.c +++ b/replace/strndup.c @@ -1,38 +1,38 @@ #include #include #include /* * Copyright (C) 2004 Matt Soffen * This software licensed under the GNU LGPL. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * */ /* Taken from the GlibC implementation of strndup */ char *strndup(const char *str, size_t len) { size_t n = strnlen(str,len); char *new = (char *) malloc (len+1); if (NULL == new) { return NULL; } new[n] = '\0'; return (char *)memcpy (new, str, len); } diff --git a/replace/strnlen.c b/replace/strnlen.c index b69a2105a2..332432f7ee 100644 --- a/replace/strnlen.c +++ b/replace/strnlen.c @@ -1,31 +1,31 @@ #include #include /* * Copyright (C) 2003 Alan Robertson * This software licensed under the GNU LGPL. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * */ size_t strnlen(const char *s, size_t maxlen) { const char * eospos; eospos = memchr(s, (int)'\0', maxlen); return (eospos == NULL ? maxlen : (size_t)(eospos-s)); } diff --git a/replace/unsetenv.c b/replace/unsetenv.c index 59f96908af..90d4b466ca 100644 --- a/replace/unsetenv.c +++ b/replace/unsetenv.c @@ -1,51 +1,51 @@ /* * Copyright (C) 2001 Alan Robertson * This software licensed under the GNU LGPL. * * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * */ #include #include #include #include #include #define __environ environ #ifndef HAVE_ENVIRON_DECL extern char **environ; #endif int unsetenv (const char *name) { const size_t len = strlen (name); char **ep; for (ep = __environ; *ep; ++ep) { if (!strncmp (*ep, name, len) && (*ep)[len] == '=') { /* Found it. */ /* Remove this pointer by moving later ones back. */ char **dp = ep; do dp[0] = dp[1]; while (*dp++); /* Continue the loop in case NAME appears again. */ } } return 0; } diff --git a/replace/uuid_parse.c b/replace/uuid_parse.c index d4a2aa5b5c..ba108ff00f 100644 --- a/replace/uuid_parse.c +++ b/replace/uuid_parse.c @@ -1,519 +1,519 @@ /* * uuid: emulation of e2fsprogs interface if implementation lacking. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * * Original uuid implementation: copyright (C) Theodore Ts'o * * This importation into heartbeat: * Copyright (C) 2004 David Lee * */ #include #include #ifdef HAVE_UNISTD_H #include #endif #ifdef HAVE_STDLIB_H #include #endif #include #include #include /* * Local "replace" implementation of uuid functions. */ #include #include #include /* UUID Variant definitions */ #define UUID_VARIANT_NCS 0 #define UUID_VARIANT_DCE 1 #define UUID_VARIANT_MICROSOFT 2 #define UUID_VARIANT_OTHER 3 /* UUID Type definitions */ #define UUID_TYPE_DCE_TIME 1 #define UUID_TYPE_DCE_RANDOM 4 /* For uuid_compare() */ #define UUCMP(u1,u2) if (u1 != u2) return((u1 < u2) ? -1 : 1); /************************************ * Private types ************************************/ #define longlong long long /* * Offset between 15-Oct-1582 and 1-Jan-70 */ #define TIME_OFFSET_HIGH 0x01B21DD2 #define TIME_OFFSET_LOW 0x13814000 #if (SIZEOF_INT == 4) typedef unsigned int __u32; #elif (SIZEOF_LONG == 4) typedef unsigned long __u32; #endif #if (SIZEOF_INT == 2) typedef int __s16; typedef unsigned int __u16; #elif (SIZEOF_SHORT == 2) typedef short __s16; typedef unsigned short __u16; #endif typedef unsigned char __u8; struct uuid { __u32 time_low; __u16 time_mid; __u16 time_hi_and_version; __u16 clock_seq; __u8 node[6]; }; /************************************ * internal routines ************************************/ static void uuid_pack(const struct uuid *uu, uuid_t ptr) { __u32 tmp; unsigned char *out = ptr; tmp = uu->time_low; out[3] = (unsigned char) tmp; tmp >>= 8; out[2] = (unsigned char) tmp; tmp >>= 8; out[1] = (unsigned char) tmp; tmp >>= 8; out[0] = (unsigned char) tmp; tmp = uu->time_mid; out[5] = (unsigned char) tmp; tmp >>= 8; out[4] = (unsigned char) tmp; tmp = uu->time_hi_and_version; out[7] = (unsigned char) tmp; tmp >>= 8; out[6] = (unsigned char) tmp; tmp = uu->clock_seq; out[9] = (unsigned char) tmp; tmp >>= 8; out[8] = (unsigned char) tmp; memcpy(out+10, uu->node, 6); } static void uuid_unpack(const uuid_t in, struct uuid *uu) { const __u8 *ptr = in; __u32 tmp; tmp = *ptr++; tmp = (tmp << 8) | *ptr++; tmp = (tmp << 8) | *ptr++; tmp = (tmp << 8) | *ptr++; uu->time_low = tmp; tmp = *ptr++; tmp = (tmp << 8) | *ptr++; uu->time_mid = tmp; tmp = *ptr++; tmp = (tmp << 8) | *ptr++; uu->time_hi_and_version = tmp; tmp = *ptr++; tmp = (tmp << 8) | *ptr++; uu->clock_seq = tmp; memcpy(uu->node, ptr, 6); } /************************************ * Main routines, except uuid_generate*() ************************************/ void uuid_clear(uuid_t uu) { memset(uu, 0, 16); } int uuid_compare(const uuid_t uu1, const uuid_t uu2) { struct uuid uuid1, uuid2; uuid_unpack(uu1, &uuid1); uuid_unpack(uu2, &uuid2); UUCMP(uuid1.time_low, uuid2.time_low); UUCMP(uuid1.time_mid, uuid2.time_mid); UUCMP(uuid1.time_hi_and_version, uuid2.time_hi_and_version); UUCMP(uuid1.clock_seq, uuid2.clock_seq); return memcmp(uuid1.node, uuid2.node, 6); } void uuid_copy(uuid_t dst, const uuid_t src) { unsigned char *cp1; const unsigned char *cp2; int i; for (i=0, cp1 = dst, cp2 = src; i < 16; i++) *cp1++ = *cp2++; } /* if uu is the null uuid, return 1 else 0 */ int uuid_is_null(const uuid_t uu) { const unsigned char *cp; int i; for (i=0, cp = uu; i < 16; i++) if (*cp++) return 0; return 1; } /* 36byte-string=>uuid */ int uuid_parse(const char *in, uuid_t uu) { struct uuid uuid; int i; const char *cp; char buf[3]; if (strlen(in) != 36) return -1; for (i=0, cp = in; i <= 36; i++,cp++) { if ((i == 8) || (i == 13) || (i == 18) || (i == 23)) { if (*cp == '-') continue; else return -1; } if (i== 36) if (*cp == 0) continue; if (!isxdigit((int) *cp)) return -1; } uuid.time_low = strtoul(in, NULL, 16); uuid.time_mid = strtoul(in+9, NULL, 16); uuid.time_hi_and_version = strtoul(in+14, NULL, 16); uuid.clock_seq = strtoul(in+19, NULL, 16); cp = in+24; buf[2] = 0; for (i=0; i < 6; i++) { buf[0] = *cp++; buf[1] = *cp++; uuid.node[i] = strtoul(buf, NULL, 16); } uuid_pack(&uuid, uu); return 0; } /* uuid=>36byte-string-with-null */ void uuid_unparse(const uuid_t uu, char *out) { struct uuid uuid; uuid_unpack(uu, &uuid); sprintf(out, "%08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x", uuid.time_low, uuid.time_mid, uuid.time_hi_and_version, uuid.clock_seq >> 8, uuid.clock_seq & 0xFF, uuid.node[0], uuid.node[1], uuid.node[2], uuid.node[3], uuid.node[4], uuid.node[5]); } /************************************ * Main routines: uuid_generate*() ************************************/ #include #include #include #ifdef HAVE_SYS_IOCTL_H #include #endif #ifdef HAVE_SYS_SOCKET_H #include #endif #ifdef HAVE_SYS_SOCKIO_H #include #endif #ifdef HAVE_NET_IF_H #include #endif #ifdef HAVE_NETINET_IN_H #include #endif #ifdef HAVE_SRANDOM #define srand(x) srandom(x) #define rand() random() #endif static int get_random_fd(void) { struct timeval tv; static int fd = -2; int i; if (fd == -2) { gettimeofday(&tv, 0); fd = open("/dev/urandom", O_RDONLY); if (fd == -1) fd = open("/dev/random", O_RDONLY | O_NONBLOCK); srand((getpid() << 16) ^ getuid() ^ tv.tv_sec ^ tv.tv_usec); } /* Crank the random number generator a few times */ gettimeofday(&tv, 0); for (i = (tv.tv_sec ^ tv.tv_usec) & 0x1F; i > 0; i--) rand(); return fd; } /* * Generate a series of random bytes. Use /dev/urandom if possible, * and if not, use srandom/random. */ static void get_random_bytes(void *buf, int nbytes) { int i, n = nbytes, fd = get_random_fd(); int lose_counter = 0; unsigned char *cp = (unsigned char *) buf; if (fd >= 0) { while (n > 0) { i = read(fd, cp, n); if (i <= 0) { if (lose_counter++ > 16) break; continue; } n -= i; cp += i; lose_counter = 0; } } /* * We do this all the time, but this is the only source of * randomness if /dev/random/urandom is out to lunch. */ for (cp = buf, i = 0; i < nbytes; i++) *cp++ ^= (rand() >> 7) & 0xFF; return; } /* * Get the ethernet hardware address, if we can find it... */ static int get_node_id(unsigned char *node_id) { #ifdef HAVE_NET_IF_H int sd; struct ifreq ifr, *ifrp; struct ifconf ifc; char buf[1024]; int n, i; unsigned char *a; /* * BSD 4.4 defines the size of an ifreq to be * max(sizeof(ifreq), sizeof(ifreq.ifr_name)+ifreq.ifr_addr.sa_len * However, under earlier systems, sa_len isn't present, so the size is * just sizeof(struct ifreq) */ #ifdef HAVE_SA_LEN #ifndef max #define max(a,b) ((a) > (b) ? (a) : (b)) #endif #define ifreq_size(i) max(sizeof(struct ifreq),\ sizeof((i).ifr_name)+(i).ifr_addr.sa_len) #else #define ifreq_size(i) sizeof(struct ifreq) #endif /* HAVE_SA_LEN*/ sd = socket(AF_INET, SOCK_DGRAM, IPPROTO_IP); if (sd < 0) { return -1; } memset(buf, 0, sizeof(buf)); ifc.ifc_len = sizeof(buf); ifc.ifc_buf = buf; if (ioctl (sd, SIOCGIFCONF, (char *)&ifc) < 0) { close(sd); return -1; } n = ifc.ifc_len; for (i = 0; i < n; i+= ifreq_size(*ifr) ) { ifrp = (struct ifreq *)((char *) ifc.ifc_buf+i); strncpy(ifr.ifr_name, ifrp->ifr_name, IFNAMSIZ); #ifdef SIOCGIFHWADDR if (ioctl(sd, SIOCGIFHWADDR, &ifr) < 0) continue; a = (unsigned char *) &ifr.ifr_hwaddr.sa_data; #else #ifdef SIOCGENADDR if (ioctl(sd, SIOCGENADDR, &ifr) < 0) continue; a = (unsigned char *) ifr.ifr_enaddr; #else /* * XXX we don't have a way of getting the hardware * address */ close(sd); return 0; #endif /* SIOCGENADDR */ #endif /* SIOCGIFHWADDR */ if (!a[0] && !a[1] && !a[2] && !a[3] && !a[4] && !a[5]) continue; if (node_id) { memcpy(node_id, a, 6); close(sd); return 1; } } close(sd); #endif return 0; } /* Assume that the gettimeofday() has microsecond granularity */ #define MAX_ADJUSTMENT 10 static int get_clock(__u32 *clock_high, __u32 *clock_low, __u16 *ret_clock_seq) { static int adjustment = 0; static struct timeval last = {0, 0}; static __u16 clock_seq; struct timeval tv; unsigned longlong clock_reg; try_again: gettimeofday(&tv, 0); if ((last.tv_sec == 0) && (last.tv_usec == 0)) { get_random_bytes(&clock_seq, sizeof(clock_seq)); clock_seq &= 0x1FFF; last = tv; last.tv_sec--; } if ((tv.tv_sec < last.tv_sec) || ((tv.tv_sec == last.tv_sec) && (tv.tv_usec < last.tv_usec))) { clock_seq = (clock_seq+1) & 0x1FFF; adjustment = 0; last = tv; } else if ((tv.tv_sec == last.tv_sec) && (tv.tv_usec == last.tv_usec)) { if (adjustment >= MAX_ADJUSTMENT) goto try_again; adjustment++; } else { adjustment = 0; last = tv; } clock_reg = tv.tv_usec*10 + adjustment; clock_reg += ((unsigned longlong) tv.tv_sec)*10000000; clock_reg += (((unsigned longlong) 0x01B21DD2) << 32) + 0x13814000; *clock_high = clock_reg >> 32; *clock_low = clock_reg; *ret_clock_seq = clock_seq; return 0; } /* create a new uuid, based on randomness */ void uuid_generate_random(uuid_t out) { uuid_t buf; struct uuid uu; get_random_bytes(buf, sizeof(buf)); uuid_unpack(buf, &uu); uu.clock_seq = (uu.clock_seq & 0x3FFF) | 0x8000; uu.time_hi_and_version = (uu.time_hi_and_version & 0x0FFF) | 0x4000; uuid_pack(&uu, out); } /* create a new uuid, based on time */ static void uuid_generate_time(uuid_t out) { static unsigned char node_id[6]; static int has_init = 0; struct uuid uu; __u32 clock_mid; if (!has_init) { if (get_node_id(node_id) <= 0) { get_random_bytes(node_id, 6); /* * Set multicast bit, to prevent conflicts * with IEEE 802 addresses obtained from * network cards */ node_id[0] |= 0x80; } has_init = 1; } get_clock(&clock_mid, &uu.time_low, &uu.clock_seq); uu.clock_seq |= 0x8000; uu.time_mid = (__u16) clock_mid; uu.time_hi_and_version = (clock_mid >> 16) | 0x1000; memcpy(uu.node, node_id, 6); uuid_pack(&uu, out); } void uuid_generate(uuid_t out) { if (get_random_fd() >= 0) { uuid_generate_random(out); }else{ uuid_generate_time(out); } } diff --git a/rpmlintrc b/rpmlintrc index 5a2b83babe..3283543675 100644 --- a/rpmlintrc +++ b/rpmlintrc @@ -1,10 +1,11 @@ addFilter("E: non-standard-dir-perm .* 0750") addFilter("E: hardcoded-library-path in /usr/lib/ocf") addFilter("W: spelling-error") addFilter("W: non-standard-uid .* hacluster") addFilter("W: non-standard-gid .* haclient") addFilter("W: shared-lib-calls-exit") addFilter("W: invalid-url Source0:") addFilter("pacemaker.src: W: strange-permission .* 0600") -addFilter("pacemaker.src: E: invalid-spec-name") addFilter("W: mixed-use-of-spaces-and-tabs") +addFilter("W: unstripped-binary-or-object") +addFilter("W: hidden-file-or-dir /var/lib/pacemaker/gcov") diff --git a/shell/modules/help.py.in b/shell/modules/help.py.in index d7d33b429d..685c0acf53 100644 --- a/shell/modules/help.py.in +++ b/shell/modules/help.py.in @@ -1,277 +1,277 @@ # Copyright (C) 2008 Dejan Muhamedagic # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public # License as published by the Free Software Foundation; either # version 2.1 of the License, or (at your option) any later version. # # This software is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public # License along with this library; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # import os import re from cache import WCache from utils import odict, page_string from vars import gethomedir from msg import * # # help or make users feel less lonely # def add_shorthelp(topic,shorthelp,topic_help): ''' Join topics ("%s,%s") if they share the same short description. ''' for i in range(len(topic_help)): if topic_help[i][1] == shorthelp: topic_help[i][0] = "%s,%s" % (topic_help[i][0], topic) return topic_help.append([topic, shorthelp]) def dump_short_help(help_tab): topic_help = [] for topic in help_tab: if topic == '.': continue # with odict, for whatever reason, python parses differently: # help_tab["..."] = ("...","...") and # help_tab["..."] = ("...",""" # ...""") # a parser bug? if type(help_tab[topic][0]) == type(()): shorthelp = help_tab[topic][0][0] else: shorthelp = help_tab[topic][0] add_shorthelp(topic,shorthelp,topic_help) for t,d in topic_help: print "\t%-16s %s" % (t,d) def overview(help_tab): print "" print help_tab['.'][1] print "" print "Available commands:" print "" dump_short_help(help_tab) print "" def topic_help(help_tab,topic): if topic not in help_tab: print "There is no help for topic %s" % topic return if type(help_tab[topic][0]) == type(()): shorthelp = help_tab[topic][0][0] longhelp = help_tab[topic][0][1] else: shorthelp = help_tab[topic][0] longhelp = help_tab[topic][1] if longhelp: page_string(longhelp) else: print shorthelp def cmd_help(help_tab,topic = ''): "help!" # help_tab is an odict (ordered dictionary): # help_tab[topic] = (short_help,long_help) # topic '.' is a special entry for the top level if not help_tab: common_info("sorry, help not available") return if not topic: overview(help_tab) else: topic_help(help_tab,topic) def is_level(s): return len(s.split("_")) == 2 def help_short(s): r = re.search("help_[^,]+,(.*)\]\]", s) return r and r.group(1) or '' class HelpSystem(object): ''' The help system. All help is in the following form in the manual: [[cmdhelp__,]] === ... Long help text. ... [[cmdhelp__,]] Help for the level itself is like this: [[cmdhelp_,]] ''' help_text_file = "@datadir@/@PACKAGE@/crm.8.txt" index_file = os.path.join(gethomedir(),".crm_help_index") def __init__(self): self.key_pos = {} self.leveld = {} self.no_help_file = False # don't print repeatedly messages self.bad_index = False # don't print repeatedly warnings for bad index def open_file(self,name,mode): try: f = open(name,mode) return f except IOError,msg: common_err("%s open: %s"%(name,msg)) common_err("extensive help system is not available") self.no_help_file = True return None def drop_index(self): common_info("removing index") os.unlink(self.index_file) self.key_pos = {} self.leveld = {} self.bad_index = True def mk_index(self): ''' Prepare an index file, sorted by topic, with seek positions Do we need a hash on content? ''' if self.no_help_file: return False crm_help_v = os.getenv("CRM_HELP_FILE") if crm_help_v: self.help_text_file = crm_help_v help_f = self.open_file(self.help_text_file,"r") if not help_f: return False idx_f = self.open_file(self.index_file,"w") if not idx_f: return False common_debug("building help index") key_pos = odict() while 1: pos = help_f.tell() s = help_f.readline() if not s: break if s.startswith("[["): r = re.search(r'..([^,]+),', s) if r: key_pos[r.group(1)] = pos help_f.close() for key in key_pos: print >>idx_f, '%s %d' % (key,key_pos[key]) idx_f.close() return True def is_index_old(self): try: t_idx = os.path.getmtime(self.index_file) except: return True try: t_help = os.path.getmtime(self.help_text_file) except: return True return t_help > t_idx def load_index(self): if self.is_index_old(): self.mk_index() self.key_pos = {} self.leveld = {} idx_f = self.open_file(self.index_file,"r") if not idx_f: return False cur_lvl = '' for s in idx_f: a = s.split() if len(a) != 2: if not self.bad_index: common_err("index file corrupt") idx_f.close() self.drop_index() return self.load_index() # this runs only once return False key = a[0] fpos = long(a[1]) if key.startswith("cmdhelp_"): if is_level(key): if key != cur_lvl: cur_lvl = key self.leveld[cur_lvl] = [] else: self.leveld[cur_lvl].append(key) self.key_pos[key] = fpos idx_f.close() return True def __filter(self,s): if '<<' in s: return re.sub(r'<<[^,]+,(.+)>>', r'\1', s) else: return s def __load_help_one(self,key,skip = 2): longhelp = '' self.help_f.seek(self.key_pos[key]) shorthelp = help_short(self.help_f.readline()) for i in range(skip-1): self.help_f.readline() l = [] for s in self.help_f: if s.startswith("[[") or s.startswith("="): break l.append(self.__filter(s)) if l and l[-1] == '\n': # drop the last line of empty l.pop() if l: longhelp = ''.join(l) if not shorthelp or not longhelp: if not self.bad_index: common_warn("help topic %s not found" % key) self.drop_index() return shorthelp,longhelp def cmdhelp(self,s): if not self.key_pos and not self.load_index(): return None,None if not s in self.key_pos: if not self.bad_index: common_warn("help topic %s not found" % s) self.drop_index() return None,None return self.__load_help_one(s) def __load_level(self,lvl): ''' For the given level, create a help table. ''' if wcache.is_cached("lvl_help_tab_%s" % lvl): return wcache.retrieve("lvl_help_tab_%s" % lvl) if not self.key_pos and not self.load_index(): return None self.help_f = self.open_file(self.help_text_file,"r") if not self.help_f: return None lvl_s = "cmdhelp_%s" % lvl if not lvl_s in self.leveld: if not self.bad_index: common_warn("help table for level %s not found" % lvl) self.drop_index() return None common_debug("loading help table for level %s" % lvl) help_tab = odict() help_tab["."] = self.__load_help_one(lvl_s) try: for key in self.leveld[lvl_s]: cmd = key[len(lvl_s)+1:] help_tab[cmd] = self.__load_help_one(key) except: pass self.help_f.close() help_tab["quit"] = ("exit the program", "") help_tab["help"] = ("show help", "") help_tab["end"] = ("go back one level", "") return help_tab def load_level(self,lvl): help_tab = self.__load_level(lvl) if self.bad_index: # try again help_tab = self.__load_level(lvl) return wcache.store("lvl_help_tab_%s" % lvl, help_tab) wcache = WCache.getInstance() # vim:ts=4:sw=4:et: diff --git a/shell/modules/ra.py.in b/shell/modules/ra.py.in index e31b372465..8b5e8a3684 100644 --- a/shell/modules/ra.py.in +++ b/shell/modules/ra.py.in @@ -1,665 +1,665 @@ # Copyright (C) 2008 Dejan Muhamedagic # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public # License as published by the Free Software Foundation; either # version 2.1 of the License, or (at your option) any later version. # # This software is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public # License along with this library; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # import os import sys import subprocess import copy import xml.dom.minidom import re import glob from userprefs import Options, UserPrefs from cache import WCache from vars import Vars, getuser from utils import * from msg import * # # Resource Agents interface (meta-data, parameters, etc) # ocf_root = os.getenv("OCF_ROOT") if not ocf_root: ocf_root = "@OCF_ROOT_DIR@" if not ocf_root: ocf_root = "/usr/lib/ocf" os.putenv("OCF_ROOT",ocf_root) class RaLrmd(object): ''' Getting information from the resource agents. ''' lrmadmin_prog = "lrmadmin" def __init__(self): self.good = self.is_lrmd_accessible() def lrmadmin(self, opts, xml = False): ''' Get information directly from lrmd using lrmadmin. ''' l = stdout2list("%s %s" % (self.lrmadmin_prog,opts)) if l and not xml: l = l[1:] # skip the first line return l def is_lrmd_accessible(self): if not (is_program(self.lrmadmin_prog) and is_process("lrmd")): return False return subprocess.call(\ add_sudo(">/dev/null 2>&1 %s -C" % self.lrmadmin_prog), \ shell=True) == 0 def meta(self, ra_class,ra_type,ra_provider): return self.lrmadmin("-M %s %s %s"%(ra_class,ra_type,ra_provider),True) def providers(self, ra_type,ra_class = "ocf"): 'List of providers for a class:type.' return self.lrmadmin("-P %s %s" % (ra_class,ra_type),True) def classes(self): 'List of classes.' return self.lrmadmin("-C") def types(self, ra_class = "ocf", ra_provider = ""): 'List of types for a class.' return self.lrmadmin("-T %s" % ra_class) class RaOS(object): ''' Getting information from the resource agents (direct). ''' def __init__(self): self.good = True def meta(self, ra_class,ra_type,ra_provider): l = [] if ra_class == "ocf": l = stdout2list("%s/resource.d/%s/%s meta-data" % \ (ocf_root,ra_provider,ra_type)) elif ra_class == "stonith": l = stdout2list("stonith -m -t %s" % ra_type) return l def providers(self, ra_type,ra_class = "ocf"): 'List of providers for a class:type.' l = [] if ra_class == "ocf": for s in glob.glob("%s/resource.d/*/%s" % (ocf_root,ra_type)): a = s.split("/") if len(a) == 7: l.append(a[5]) return l def classes(self): 'List of classes.' return "heartbeat lsb ocf stonith".split() def types(self, ra_class = "ocf", ra_provider = ""): 'List of types for a class.' l = [] prov = ra_provider and ra_provider or "*" if ra_class == "ocf": l = os_types_list("%s/resource.d/%s/*" % (ocf_root,prov)) elif ra_class == "lsb": l = os_types_list("/etc/init.d/*") elif ra_class == "stonith": l = stdout2list("stonith -L") l = list(set(l)) l.sort() return l def ra_if(): if vars.ra_if: return vars.ra_if if getuser() in ("root",vars.crm_daemon_user): vars.ra_if = RaLrmd() if not vars.ra_if or not vars.ra_if.good: vars.ra_if = RaOS() return vars.ra_if def ra_classes(): ''' List of RA classes. ''' if wcache.is_cached("ra_classes"): return wcache.retrieve("ra_classes") l = ra_if().classes() l.sort() return wcache.store("ra_classes",l) def ra_providers(ra_type,ra_class = "ocf"): 'List of providers for a class:type.' id = "ra_providers-%s-%s" % (ra_class,ra_type) if wcache.is_cached(id): return wcache.retrieve(id) l = ra_if().providers(ra_type,ra_class) l.sort() return wcache.store(id,l) def ra_providers_all(ra_class = "ocf"): ''' List of providers for a class. ''' id = "ra_providers_all-%s" % ra_class if wcache.is_cached(id): return wcache.retrieve(id) dir = ocf_root + "/resource.d" l = [] for s in os.listdir(dir): if os.path.isdir("%s/%s" % (dir,s)): l.append(s) l.sort() return wcache.store(id,l) def ra_types(ra_class = "ocf", ra_provider = ""): ''' List of RA type for a class. ''' if not ra_class: ra_class = "ocf" id = "ra_types-%s-%s" % (ra_class,ra_provider) if wcache.is_cached(id): return wcache.retrieve(id) if ra_provider: list = [] for ra in ra_if().types(ra_class): if ra_provider in ra_providers(ra,ra_class): list.append(ra) else: list = ra_if().types(ra_class) list.sort() return wcache.store(id,list) def get_pe_meta(): if not vars.pe_metadata: vars.pe_metadata = RAInfo("pengine","metadata") return vars.pe_metadata def get_crmd_meta(): if not vars.crmd_metadata: vars.crmd_metadata = RAInfo("crmd","metadata") vars.crmd_metadata.set_advanced_params(vars.crmd_advanced) return vars.crmd_metadata def get_stonithd_meta(): if not vars.stonithd_metadata: vars.stonithd_metadata = RAInfo("stonithd","metadata") return vars.stonithd_metadata def get_cib_meta(): if not vars.cib_metadata: vars.cib_metadata = RAInfo("cib","metadata") return vars.cib_metadata def get_properties_meta(): if not vars.crm_properties_metadata: get_pe_meta() get_crmd_meta() get_cib_meta() vars.crm_properties_metadata = copy.deepcopy(vars.crmd_metadata) vars.crm_properties_metadata.add_ra_params(vars.pe_metadata) vars.crm_properties_metadata.add_ra_params(vars.cib_metadata) return vars.crm_properties_metadata def get_properties_list(): try: return get_properties_meta().params().keys() except: return [] def prog_meta(prog): ''' Do external program metadata. ''' l = [] if is_program(prog): l = stdout2list("%s metadata" % prog) return l def get_nodes_text(n,tag): try: node = n.getElementsByTagName(tag)[0] for c in node.childNodes: if c.nodeType == c.TEXT_NODE: return c.data.strip() except: return '' def mk_monitor_name(role,depth): depth = depth != "0" and ("_%s" % depth) or "" return role and role != "Started" and \ "monitor_%s%s" % (role,depth) or \ "monitor%s" % depth def monitor_name_node(node): depth = node.getAttribute("depth") or '0' role = node.getAttribute("role") return mk_monitor_name(role,depth) def monitor_name_pl(pl): depth = find_value(pl, "depth") or '0' role = find_value(pl, "role") return mk_monitor_name(role,depth) def crm_msec(t): ''' See lib/common/utils.c:crm_get_msec(). ''' convtab = { 'ms': (1,1), 'msec': (1,1), 'us': (1,1000), 'usec': (1,1000), '': (1000,1), 's': (1000,1), 'sec': (1000,1), 'm': (60*1000,1), 'min': (60*1000,1), 'h': (60*60*1000,1), 'hr': (60*60*1000,1), } if not t: return -1 r = re.match("\s*(\d+)\s*([a-zA-Z]+)?", t) if not r: return -1 if not r.group(2): q = '' else: q = r.group(2).lower() try: mult,div = convtab[q] except: return -1 return (int(r.group(1))*mult)/div def crm_time_cmp(a, b): return crm_msec(a) - crm_msec(b) class RAInfo(object): ''' A resource agent and whatever's useful about it. ''' ra_tab = " " # four horses required_ops = ("start", "stop") skip_ops = ("meta-data", "validate-all") skip_op_attr = ("name", "depth", "role") def __init__(self,ra_class,ra_type,ra_provider = "heartbeat"): self.advanced_params = [] self.ra_class = ra_class self.ra_type = ra_type self.ra_provider = ra_provider if not self.ra_provider: self.ra_provider = "heartbeat" self.ra_node = None def ra_string(self): return self.ra_class == "ocf" and \ "%s:%s:%s" % (self.ra_class, self.ra_provider, self.ra_type) or \ "%s:%s" % (self.ra_class, self.ra_type) def error(self, s): common_err("%s: %s" % (self.ra_string(), s)) def warn(self, s): common_warn("%s: %s" % (self.ra_string(), s)) def set_advanced_params(self, l): self.advanced_params = l def filter_crmd_attributes(self): for n in self.ra_node.getElementsByTagName("parameter"): if not n.getAttribute("name") in vars.crmd_user_attributes: n.parentNode.removeChild(n) def add_ra_params(self,ra): ''' Add parameters from another RAInfo instance. ''' try: if not self.mk_ra_node() or not ra.mk_ra_node(): return except: return try: params_node = self.doc.getElementsByTagName("parameters")[0] except: params_node = self.doc.createElement("parameters") self.ra_node.appendChild(params_node) for n in ra.ra_node.getElementsByTagName("parameter"): params_node.appendChild(self.doc.importNode(n,1)) def mk_ra_node(self): ''' Return the resource_agent node. ''' if self.ra_node: return self.ra_node meta = self.meta() try: self.doc = xml.dom.minidom.parseString('\n'.join(meta)) except: self.error("could not parse meta-data: %s" % '\n'.join(meta)) self.ra_node = None return None try: self.ra_node = self.doc.getElementsByTagName("resource-agent")[0] except: self.error("meta-data contains no resource-agent element") self.ra_node = None return None if self.ra_class == "stonith": self.add_ra_params(get_stonithd_meta()) return self.ra_node def param_type_default(self,n): try: content = n.getElementsByTagName("content")[0] type = content.getAttribute("type") default = content.getAttribute("default") return type,default except: return None,None def params(self): ''' Construct a dict of dicts: parameters are keys and dictionary of attributes/values are values. Cached too. ''' id = "ra_params-%s" % self.ra_string() if wcache.is_cached(id): return wcache.retrieve(id) if not self.mk_ra_node(): return None d = {} for pset in self.ra_node.getElementsByTagName("parameters"): for c in pset.getElementsByTagName("parameter"): name = c.getAttribute("name") if not name: continue required = c.getAttribute("required") unique = c.getAttribute("unique") type,default = self.param_type_default(c) d[name] = { "required": required, "unique": unique, "type": type, "default": default, } return wcache.store(id,d) def completion_params(self): ''' Extra method for completion, for we want to filter some (advanced) parameters out. And we want this to be fast. ''' if not self.mk_ra_node(): return None return [c.getAttribute("name") for c in self.ra_node.getElementsByTagName("parameter") if c.getAttribute("name") and c.getAttribute("name") not in self.advanced_params ] def actions(self): ''' Construct a dict of dicts: actions are keys and dictionary of attributes/values are values. Cached too. ''' id = "ra_actions-%s" % self.ra_string() if wcache.is_cached(id): return wcache.retrieve(id) if not self.mk_ra_node(): return None d = {} for pset in self.ra_node.getElementsByTagName("actions"): for c in pset.getElementsByTagName("action"): name = c.getAttribute("name") if not name or name in self.skip_ops: continue if name == "monitor": name = monitor_name_node(c) d[name] = {} for a in c.attributes.keys(): if a in self.skip_op_attr: continue v = c.getAttribute(a) if v: d[name][a] = v # add monitor ops without role, if they don't already # exist d2 = {} for op in d.keys(): if re.match("monitor_[^0-9]", op): norole_op = re.sub(r'monitor_[^0-9_]+_(.*)', r'monitor_\1', op) if not norole_op in d: d2[norole_op] = d[op] d.update(d2) return wcache.store(id,d) def reqd_params_list(self): ''' List of required parameters. ''' d = self.params() if not d: return [] return [x for x in d if d[x]["required"] == '1'] def param_default(self,pname): ''' Parameter's default. ''' d = self.params() try: return d[pname]["default"] except: return None def sanity_check_params(self, id, pl): ''' pl is a list of (attribute,value) pairs. - are all required parameters defined - do all parameters exist ''' rc = 0 d = {} for p,v in pl: d[p] = v for p in self.reqd_params_list(): if p not in d: common_err("%s: required parameter %s not defined" % (id,p)) rc |= user_prefs.get_check_rc() for p in d: if p not in self.params(): common_err("%s: parameter %s does not exist" % (id,p)) rc |= user_prefs.get_check_rc() return rc def get_adv_timeout(self, op, node = None): if node and op == "monitor": name = monitor_name_node(node) else: name = op try: return self.actions()[name]["timeout"] except: return None def sanity_check_ops(self, id, ops, default_timeout): ''' ops is a dict, operation names are keys and values are lists of (attribute,value) pairs. - do all operations exist - are timeouts sensible ''' rc = 0 n_ops = {} for op in ops: n_op = op == "monitor" and monitor_name_pl(ops[op]) or op n_ops[n_op] = {} for p,v in ops[op]: if p in self.skip_op_attr: continue n_ops[n_op][p] = v for req_op in self.required_ops: if req_op not in n_ops: n_ops[req_op] = {} for op in n_ops: if op not in self.actions(): common_warn("%s: action %s not advertised in meta-data, it may not be supported by the RA" % (id,op)) rc |= 1 continue if "interval" in n_ops[op]: if n_ops[op]["interval"] != "0": if op == "start" or op == "stop": v = n_ops[op]["interval"] common_warn("%s: Specified interval for %s is %s, this is greater than 0 thus invalid" %(id,op,v)) try: adv_timeout = self.actions()[op]["timeout"] except: continue if "timeout" in n_ops[op]: v = n_ops[op]["timeout"] timeout_string = "specified timeout" else: v = default_timeout timeout_string = "default timeout" if crm_msec(v) < 0: continue if crm_time_cmp(adv_timeout,v) > 0: common_warn("%s: %s %s for %s is smaller than the advised %s" % \ (id,timeout_string,v,op,adv_timeout)) rc |= 1 return rc def meta(self): ''' RA meta-data as raw xml. ''' id = "ra_meta-%s" % self.ra_string() if wcache.is_cached(id): return wcache.retrieve(id) if self.ra_class in vars.meta_progs: l = prog_meta(self.ra_class) else: l = ra_if().meta(self.ra_class,self.ra_type,self.ra_provider) return wcache.store(id, l) def meta_pretty(self): ''' Print the RA meta-data in a human readable form. ''' if not self.mk_ra_node(): return '' l = [] title = self.meta_title() l.append(title) longdesc = get_nodes_text(self.ra_node,"longdesc") if longdesc: l.append(longdesc) if self.ra_class != "heartbeat": params = self.meta_parameters() if params: l.append(params.rstrip()) actions = self.meta_actions() if actions: l.append(actions) return '\n\n'.join(l) def get_shortdesc(self,n): name = n.getAttribute("name") shortdesc = get_nodes_text(n,"shortdesc") longdesc = get_nodes_text(n,"longdesc") if shortdesc and shortdesc not in (name,longdesc,self.ra_type): return shortdesc return '' def meta_title(self): s = self.ra_string() shortdesc = self.get_shortdesc(self.ra_node) if shortdesc: s = "%s (%s)" % (shortdesc,s) return s def meta_param_head(self,n): name = n.getAttribute("name") if not name: return None s = name if n.getAttribute("required") == "1": s = s + "*" type,default = self.param_type_default(n) if type and default: s = "%s (%s, [%s])" % (s,type,default) elif type: s = "%s (%s)" % (s,type) shortdesc = self.get_shortdesc(n) s = "%s: %s" % (s,shortdesc) return s def format_parameter(self,n): l = [] head = self.meta_param_head(n) if not head: self.error("no name attribute for parameter") return "" l.append(head) longdesc = get_nodes_text(n,"longdesc") if longdesc: longdesc = self.ra_tab + longdesc.replace("\n","\n"+self.ra_tab) + '\n' l.append(longdesc) return '\n'.join(l) def meta_parameter(self,param): if not self.mk_ra_node(): return '' l = [] for pset in self.ra_node.getElementsByTagName("parameters"): for c in pset.getElementsByTagName("parameter"): if c.getAttribute("name") == param: return self.format_parameter(c) def meta_parameters(self): if not self.mk_ra_node(): return '' l = [] for pset in self.ra_node.getElementsByTagName("parameters"): for c in pset.getElementsByTagName("parameter"): s = self.format_parameter(c) if s: l.append(s) if l: return "Parameters (* denotes required, [] the default):\n\n" + '\n'.join(l) def meta_action_head(self,n): name = n.getAttribute("name") if not name: return '' if name in self.skip_ops: return '' if name == "monitor": name = monitor_name_node(n) s = "%-13s" % name for a in n.attributes.keys(): if a in self.skip_op_attr: continue v = n.getAttribute(a) if v: s = "%s %s=%s" % (s,a,v) return s def meta_actions(self): l = [] for aset in self.ra_node.getElementsByTagName("actions"): for c in aset.getElementsByTagName("action"): s = self.meta_action_head(c) if s: l.append(self.ra_tab + s) if l: return "Operations' defaults (advisory minimum):\n\n" + '\n'.join(l) # # resource type definition # def ra_type_validate(s, ra_class, provider, rsc_type): ''' Only ocf ra class supports providers. ''' if not rsc_type: common_err("bad resource type specification %s"%s) return False if ra_class == "ocf": if not provider: common_err("provider could not be determined for %s"%s) return False else: if provider: common_warn("ra class %s does not support providers"%ra_class) return True return True def disambiguate_ra_type(s): ''' Unravel [class:[provider:]]type ''' l = s.split(':') if not l or len(l) > 3: return ["","",""] if len(l) == 3: return l elif len(l) == 2: ra_class,ra_type = l else: ra_class = "ocf" ra_type = l[0] ra_provider = '' if ra_class == "ocf": pl = ra_providers(ra_type,ra_class) if pl and len(pl) == 1: ra_provider = pl[0] elif not pl: ra_provider = 'heartbeat' return ra_class,ra_provider,ra_type wcache = WCache.getInstance() vars = Vars.getInstance() # vim:ts=4:sw=4:et: diff --git a/shell/modules/ui.py.in b/shell/modules/ui.py.in index c42718ddd5..a4ecb45f68 100644 --- a/shell/modules/ui.py.in +++ b/shell/modules/ui.py.in @@ -1,1730 +1,1730 @@ # Copyright (C) 2008 Dejan Muhamedagic # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public # License as published by the Free Software Foundation; either # version 2.1 of the License, or (at your option) any later version. # # This software is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public # License along with this library; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # import sys import re import os import shlex import time import bz2 from help import HelpSystem, cmd_help from vars import Vars from levels import Levels from cibconfig import mkset_obj, CibFactory from cibstatus import CibStatus from template import LoadTemplate from cliformat import nvpairs2list from ra import * from msg import * from utils import * from xmlutil import * def cmd_end(cmd,dir = ".."): "Go up one level." levels.droplevel() def cmd_exit(cmd,rc = 0): "Exit the crm program" cmd_end(cmd) if options.interactive and not options.batch: print "bye" try: from readline import write_history_file write_history_file(vars.hist_file) except: pass for f in vars.tmpfiles: os.unlink(f) sys.exit(rc) class UserInterface(object): ''' Stuff common to all user interface classes. ''' global_cmd_aliases = { "quit": ("bye","exit"), "end": ("cd","up"), } def __init__(self): self.help_table = odict() self.cmd_table = odict() self.cmd_table["help"] = (self.help,(0,1),0,0) self.cmd_table["quit"] = (self.exit,(0,0),0,0) self.cmd_table["end"] = (self.end,(0,1),0,0) self.cmd_aliases = self.global_cmd_aliases.copy() if options.interactive: self.help_table = help_sys.load_level(self.lvl_name) def end_game(self, no_questions_asked = False): pass def help(self,cmd,topic = ''): "usage: help []" if not self.help_table: self.help_table = help_sys.load_level(self.lvl_name) setup_help_aliases(self) cmd_help(self.help_table,topic) def end(self,cmd,dir = ".."): "usage: end" self.end_game() cmd_end(cmd,dir) def exit(self,cmd): "usage: exit" self.end_game() cmd_exit(cmd) class CliOptions(UserInterface): ''' Manage user preferences ''' lvl_name = "options" desc_short = "user preferences" desc_long = """ Several user preferences are available. Note that it is possible to save the preferences to a startup file. """ def __init__(self): UserInterface.__init__(self) self.cmd_table["skill-level"] = (self.set_skill_level,(1,1),0,0) self.cmd_table["editor"] = (self.set_editor,(1,1),0,0) self.cmd_table["pager"] = (self.set_pager,(1,1),0,0) self.cmd_table["user"] = (self.set_crm_user,(0,1),0,0) self.cmd_table["output"] = (self.set_output,(1,1),0,0) self.cmd_table["colorscheme"] = (self.set_colors,(1,1),0,0) self.cmd_table["check-frequency"] = (self.set_check_frequency,(1,1),0,0) self.cmd_table["check-mode"] = (self.set_check_mode,(1,1),0,0) self.cmd_table["sort-elements"] = (self.set_sort_elements,(1,1),0,0) self.cmd_table["wait"] = (self.set_wait,(1,1),0,0) self.cmd_table["save"] = (self.save_options,(0,0),0,0) self.cmd_table["show"] = (self.show_options,(0,0),0,0) setup_aliases(self) def set_skill_level(self,cmd,skill_level): """usage: skill-level level: operator | administrator | expert""" return user_prefs.set_skill_level(skill_level) def set_editor(self,cmd,prog): "usage: editor " return user_prefs.set_editor(prog) def set_pager(self,cmd,prog): "usage: pager " return user_prefs.set_pager(prog) def set_crm_user(self,cmd,user = ''): "usage: user []" return user_prefs.set_crm_user(user) def set_output(self,cmd,otypes): "usage: output " return user_prefs.set_output(otypes) def set_colors(self,cmd,scheme): "usage: colorscheme " return user_prefs.set_colors(scheme) def set_check_frequency(self,cmd,freq): "usage: check-frequence " return user_prefs.set_check_freq(freq) def set_check_mode(self,cmd,mode): "usage: check-mode " return user_prefs.set_check_mode(mode) def set_sort_elements(self,cmd,opt): "usage: sort-elements {yes|no}" if not verify_boolean(opt): common_err("%s: bad boolean option"%opt) return True return user_prefs.set_sort_elems(opt) def set_wait(self,cmd,opt): "usage: wait {yes|no}" if not verify_boolean(opt): common_err("%s: bad boolean option"%opt) return True return user_prefs.set_wait(opt) def show_options(self,cmd): "usage: show" return user_prefs.write_rc(sys.stdout) def save_options(self,cmd): "usage: save" return user_prefs.save_options(vars.rc_file) def end_game(self, no_questions_asked = False): if no_questions_asked and not options.interactive: self.save_options("save") class CibShadow(UserInterface): ''' CIB shadow management class ''' lvl_name = "cib" desc_short = "manage shadow CIBs" desc_long = """ A shadow CIB is a regular cluster configuration which is kept in a file. The CRM and the CRM tools may manage a shadow CIB in the same way as the live CIB (i.e. the current cluster configuration). A shadow CIB may be applied to the cluster in one step. """ extcmd = ">/dev/null &1" % self.extcmd) except os.error: no_prog_err(self.extcmd) return False return True def new(self,cmd,name,*args): "usage: new [withstatus] [force] [empty]" if not is_filename_sane(name): return False for par in args: if not par in ("force","--force","withstatus","empty"): syntax_err((cmd,name,par), context = 'new') return False if "empty" in args: new_cmd = "%s -e '%s'" % (self.extcmd,name) else: new_cmd = "%s -c '%s'" % (self.extcmd,name) if user_prefs.get_force() or "force" in args or "--force" in args: new_cmd = "%s --force" % new_cmd if ext_cmd(new_cmd) == 0: common_info("%s shadow CIB created"%name) self.use("use",name) if "withstatus" in args: cib_status.load("shadow:%s" % name) def _find_pe(self,infile): 'Find a pe input' for p in ("%s/%s", "%s/%s.bz2", "%s/pe-*-%s.bz2"): fl = glob.glob(p % (vars.pe_dir,infile)) if fl: break if not fl: common_err("no %s pe input file"%infile) return '' if len(fl) > 1: common_err("more than one %s pe input file: %s" % \ (infile,' '.join(fl))) return '' return fl[0] def pe_import(self,cmd,infile,name = None): "usage: import {|} []" if name and not is_filename_sane(name): return False # where's the input? if not os.access(infile,os.F_OK): if "/" in infile: common_err("%s: no such file"%infile) return False infile = self._find_pe(infile) if not infile: return False if not name: name = os.path.basename(infile) # read input try: f = open(infile) except IOError,msg: common_err("open: %s"%msg) return s = ''.join(f) f.close() # decompresed and rename shadow if it ends with .bz2 if infile.endswith(".bz2"): name = name.replace(".bz2","") s = bz2.decompress(s) # copy input to the shadow try: f = open(shadowfile(name), "w") except IOError,msg: common_err("open: %s"%msg) return f.write(s) f.close() # use the shadow and load the status from there return self.use("use",name,"withstatus") def delete(self,cmd,name): "usage: delete " if not is_filename_sane(name): return False if vars.cib_in_use == name: common_err("%s shadow CIB is in use"%name) return False if ext_cmd("%s -D '%s' --force" % (self.extcmd,name)) == 0: common_info("%s shadow CIB deleted"%name) else: common_err("failed to delete %s shadow CIB"%name) return False def reset(self,cmd,name): "usage: reset " if not is_filename_sane(name): return False if ext_cmd("%s -r '%s'" % (self.extcmd,name)) == 0: common_info("copied live CIB to %s"%name) else: common_err("failed to copy live CIB to %s"%name) return False def commit(self,cmd,name): "usage: commit " if not is_filename_sane(name): return False if ext_cmd("%s -C '%s' --force" % (self.extcmd,name)) == 0: common_info("commited '%s' shadow CIB to the cluster"%name) else: common_err("failed to commit the %s shadow CIB"%name) return False def diff(self,cmd): "usage: diff" s = get_stdout(add_sudo("%s -d" % self.extcmd_stdout)) page_string(s) def list(self,cmd): "usage: list" if options.regression_tests: for t in listshadows(): print t else: multicolumn(listshadows()) def _use(self,name,withstatus): # Choose a shadow cib for further changes. If the name # provided is empty, then choose the live (cluster) cib. # Don't allow ' in shadow names if not name or name == "live": os.unsetenv(vars.shadow_envvar) vars.cib_in_use = "" if withstatus: cib_status.load("live") else: os.putenv(vars.shadow_envvar,name) vars.cib_in_use = name if withstatus: cib_status.load("shadow:%s" % name) def use(self,cmd,name = '', withstatus = ''): "usage: use [] [withstatus]" # check the name argument if name and not is_filename_sane(name): return False if name and name != "live": if not os.access(shadowfile(name),os.F_OK): common_err("%s: no such shadow CIB"%name) return False if withstatus and withstatus != "withstatus": syntax_err((cmd,withstatus), context = 'use') return False # If invoked from configure # take special precautions try: prev_level = levels.previous().myname() except: prev_level = '' if prev_level != "cibconfig": self._use(name,withstatus) return True if not cib_factory.has_cib_changed(): self._use(name,withstatus) # new CIB: refresh the CIB factory cib_factory.refresh() return True saved_cib = vars.cib_in_use self._use(name,'') # don't load the status yet if not cib_factory.is_current_cib_equal(silent = True): # user made changes and now wants to switch to a # different and unequal CIB; we refuse to cooperate common_err("the requested CIB is different from the current one") if user_prefs.get_force(): common_info("CIB overwrite forced") elif not ask("All changes will be dropped. Do you want to proceed?"): self._use(saved_cib,'') # revert to the previous CIB return False self._use(name,withstatus) # now load the status too return True def check_transition(inp,state,possible_l): if not state in possible_l: common_err("input (%s) in wrong state %s" % (inp,state)) return False return True class Template(UserInterface): ''' Configuration templates. ''' lvl_name = "template" def __init__(self): UserInterface.__init__(self) self.cmd_table["new"] = (self.new,(2,),1,0) self.cmd_table["load"] = (self.load,(0,1),1,0) self.cmd_table["edit"] = (self.edit,(0,1),1,0) self.cmd_table["delete"] = (self.delete,(1,2),1,0) self.cmd_table["show"] = (self.show,(0,1),0,0) self.cmd_table["apply"] = (self.apply,(0,2),1,0) self.cmd_table["list"] = (self.list,(0,1),0,0) setup_aliases(self) self.init_dir() self.curr_conf = '' def init_dir(self): '''Create the conf directory, link to templates''' if not os.path.isdir(vars.tmpl_conf_dir): try: os.makedirs(vars.tmpl_conf_dir) except os.error,msg: common_err("makedirs: %s"%msg) return def get_depends(self,tmpl): '''return a list of required templates''' # Not used. May need it later. try: tf = open("%s/%s" % (vars.tmpl_dir, tmpl),"r") except IOError,msg: common_err("open: %s"%msg) return l = [] for s in tf: a = s.split() if len(a) >= 2 and a[0] == '%depends_on': l += a[1:] tf.close() return l def replace_params(self,s,user_data): change = False for i in range(len(s)): word = s[i] for p in user_data: # is parameter in the word? pos = word.find('%' + p) if pos < 0: continue endpos = pos + len('%' + p) # and it isn't part of another word? if re.match("[A-Za-z0-9]", word[endpos:endpos+1]): continue # if the value contains a space or # it is a value of an attribute # put quotes around it if user_data[p].find(' ') >= 0 or word[pos-1:pos] == '=': v = '"' + user_data[p] + '"' else: v = user_data[p] word = word.replace('%' + p, v) change = True # we did replace something if change: s[i] = word if 'opt' in s: if not change: s = [] else: s.remove('opt') return s def generate(self,l,user_data): '''replace parameters (user_data) and generate output ''' l2 = [] for piece in l: piece2 = [] for s in piece: s = self.replace_params(s,user_data) if s: piece2.append(' '.join(s)) if piece2: l2.append(' \\\n\t'.join(piece2)) return '\n'.join(l2) def process(self,config = ''): '''Create a cli configuration from the current config''' try: f = open("%s/%s" % (vars.tmpl_conf_dir, config or self.curr_conf),'r') except IOError,msg: common_err("open: %s"%msg) return '' l = [] piece = [] user_data = {} # states START = 0; PFX = 1; DATA = 2; GENERATE = 3 state = START err_buf.start_tmp_lineno() rc = True for inp in f: err_buf.incr_lineno() if inp.startswith('#'): continue if type(inp) == type(u''): inp = inp.encode('ascii') inp = inp.strip() try: s = shlex.split(inp) except ValueError, msg: common_err(msg) continue while '\n' in s: s.remove('\n') if not s: if state == GENERATE and piece: l.append(piece) piece = [] elif s[0] in ("%name","%depends_on","%suggests"): continue elif s[0] == "%pfx": if check_transition(inp,state,(START,DATA)) and len(s) == 2: pfx = s[1] state = PFX elif s[0] == "%required": if check_transition(inp,state,(PFX,)): state = DATA data_reqd = True elif s[0] == "%optional": if check_transition(inp,state,(PFX,DATA)): state = DATA data_reqd = False elif s[0] == "%%": if state != DATA: common_warn("user data in wrong state %s" % state) if len(s) < 2: common_warn("parameter name missing") elif len(s) == 2: if data_reqd: common_err("required parameter %s not set" % s[1]) rc = False elif len(s) == 3: user_data["%s:%s" % (pfx,s[1])] = s[2] else: common_err("%s: syntax error" % inp) elif s[0] == "%generate": if check_transition(inp,state,(DATA,)): state = GENERATE piece = [] elif state == GENERATE: if s: piece.append(s) else: common_err("<%s> unexpected" % inp) if piece: l.append(piece) err_buf.stop_tmp_lineno() f.close() if not rc: return '' return self.generate(l,user_data) def new(self,cmd,name,*args): "usage: new