diff --git a/crmd/pengine.c b/crmd/pengine.c index 989601b553..ae60a59de8 100644 --- a/crmd/pengine.c +++ b/crmd/pengine.c @@ -1,323 +1,323 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include /* for access */ #include /* for calls to open */ #include /* for calls to open */ #include /* for calls to open */ #include /* for getpwuid */ #include /* for initgroups */ #include /* for getrlimit */ #include /* for getrlimit */ #include #include #include #include #include #include #include #include static GCHSource *pe_source = NULL; struct crm_subsystem_s *pe_subsystem = NULL; void do_pe_invoke_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data); static void save_cib_contents(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { char *id = user_data; register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __FUNCTION__); CRM_CHECK(id != NULL, return); if (rc == cib_ok) { int len = 15; char *filename = NULL; len += strlen(id); len += strlen(PE_STATE_DIR); crm_malloc0(filename, len); CRM_CHECK(filename != NULL, return); sprintf(filename, PE_STATE_DIR "/pe-core-%s.bz2", id); if (write_xml_file(output, filename, TRUE) < 0) { crm_err("Could not save CIB contents after PE crash to %s", filename); } else { crm_notice("Saved CIB contents after PE crash to %s", filename); } crm_free(filename); } crm_free(id); } static void pe_connection_destroy(gpointer user_data) { pe_source = NULL; clear_bit_inplace(fsa_input_register, pe_subsystem->flag_connected); if (is_set(fsa_input_register, pe_subsystem->flag_required)) { int rc = cib_ok; cl_uuid_t new_uuid; char uuid_str[UU_UNPARSE_SIZEOF]; cl_uuid_generate(&new_uuid); cl_uuid_unparse(&new_uuid, uuid_str); crm_crit("Connection to the Policy Engine failed (pid=%d, uuid=%s)", pe_subsystem->pid, uuid_str); /* *The PE died... * * Save the current CIB so that we have a chance of * figuring out what killed it. * * Delay raising the I_ERROR until the query below completes or * 5s is up, whichever comes first. * */ rc = fsa_cib_conn->cmds->query(fsa_cib_conn, NULL, NULL, cib_scope_local); fsa_cib_conn->cmds->register_callback(fsa_cib_conn, rc, 5, FALSE, crm_strdup(uuid_str), "save_cib_contents", save_cib_contents); } else { crm_info("Connection to the Policy Engine released"); } pe_subsystem->pid = -1; pe_subsystem->ipc = NULL; pe_subsystem->client = NULL; mainloop_set_trigger(fsa_source); return; } static gboolean pe_msg_dispatch(IPC_Channel * client, gpointer user_data) { xmlNode *msg = NULL; gboolean stay_connected = TRUE; while (IPC_ISRCONN(client) && client->ops->is_message_pending(client)) { msg = xmlfromIPC(client, MAX_IPC_DELAY); if (msg != NULL) { route_message(C_IPC_MESSAGE, msg); free_xml(msg); } } if (client->ch_status != IPC_CONNECT) { crm_info("Received HUP from %s:[%d]", pe_subsystem->name, pe_subsystem->pid); stay_connected = FALSE; } mainloop_set_trigger(fsa_source); return stay_connected; } /* A_PE_START, A_PE_STOP, A_TE_RESTART */ void do_pe_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { struct crm_subsystem_s *this_subsys = pe_subsystem; long long stop_actions = A_PE_STOP; long long start_actions = A_PE_START; if (action & stop_actions) { clear_bit_inplace(fsa_input_register, pe_subsystem->flag_required); if (is_heartbeat_cluster()) { stop_subsystem(this_subsys, FALSE); } else { if (pe_source) { G_main_del_IPC_Channel(pe_source); pe_source = NULL; } if (pe_subsystem->ipc) { pe_subsystem->ipc->ops->destroy(pe_subsystem->ipc); pe_subsystem->ipc = NULL; } clear_bit_inplace(fsa_input_register, pe_subsystem->flag_connected); } } - if (action & start_actions) { + if ((action & start_actions) && (is_set(fsa_input_register, R_PE_CONNECTED) == FALSE)) { if (cur_state != S_STOPPING) { if (is_openais_cluster()) { set_bit_inplace(fsa_input_register, pe_subsystem->flag_required); } else if (is_heartbeat_cluster()) { if (start_subsystem(this_subsys) == FALSE) { register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); return; } sleep(4); } pe_subsystem->ipc = init_client_ipc_comms_nodispatch(CRM_SYSTEM_PENGINE); if (pe_subsystem->ipc == NULL) { crm_warn("Setup of client connection failed," " not adding channel to mainloop"); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); return; } if (is_openais_cluster()) { pe_subsystem->pid = pe_subsystem->ipc->farside_pid;; } set_bit_inplace(fsa_input_register, pe_subsystem->flag_connected); pe_source = G_main_add_IPC_Channel(G_PRIORITY_HIGH, pe_subsystem->ipc, FALSE, pe_msg_dispatch, NULL, pe_connection_destroy); } else { crm_info("Ignoring request to start %s while shutting down", this_subsys->name); } } } int fsa_pe_query = 0; char *fsa_pe_ref = NULL; /* A_PE_INVOKE */ void do_pe_invoke(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { if (AM_I_DC == FALSE) { crm_err("Not DC: No need to invoke the PE (anymore): %s", fsa_action2string(action)); return; } if (is_set(fsa_input_register, R_PE_CONNECTED) == FALSE) { if (is_set(fsa_input_register, R_SHUTDOWN)) { crm_err("Cannot shut down gracefully without the PE"); register_fsa_input_before(C_FSA_INTERNAL, I_TERMINATE, NULL); } else { crm_info("Waiting for the PE to connect"); crmd_fsa_stall(NULL); register_fsa_action(A_PE_START); } return; } if (is_set(fsa_input_register, R_HAVE_CIB) == FALSE) { crm_err("Attempted to invoke the PE without a consistent" " copy of the CIB!"); /* start the join from scratch */ register_fsa_input_before(C_FSA_INTERNAL, I_ELECTION, NULL); return; } fsa_pe_query = fsa_cib_conn->cmds->query(fsa_cib_conn, NULL, NULL, cib_scope_local); crm_debug("Query %d: Requesting the current CIB: %s", fsa_pe_query, fsa_state2string(fsa_state)); /* Make sure any queued calculations are discarded */ crm_free(fsa_pe_ref); fsa_pe_ref = NULL; fsa_cib_conn->cmds->register_callback(fsa_cib_conn, fsa_pe_query, 60, FALSE, NULL, "do_pe_invoke_callback", do_pe_invoke_callback); } void do_pe_invoke_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { xmlNode *cmd = NULL; if (rc != cib_ok) { crm_err("Cant retrieve the CIB: %s", cib_error2string(rc)); register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __FUNCTION__); return; } else if (call_id != fsa_pe_query) { crm_trace("Skipping superceeded CIB query: %d (current=%d)", call_id, fsa_pe_query); return; } else if (AM_I_DC == FALSE || is_set(fsa_input_register, R_PE_CONNECTED) == FALSE) { crm_debug("No need to invoke the PE anymore"); return; } else if (fsa_state != S_POLICY_ENGINE) { crm_debug("Discarding PE request in state: %s", fsa_state2string(fsa_state)); return; } else if (last_peer_update != 0) { crm_debug("Re-asking for the CIB: peer update %d still pending", last_peer_update); sleep(1); register_fsa_action(A_PE_INVOKE); return; } else if (fsa_state != S_POLICY_ENGINE) { crm_err("Invoking PE in state: %s", fsa_state2string(fsa_state)); return; } CRM_LOG_ASSERT(output != NULL); crm_xml_add(output, XML_ATTR_DC_UUID, fsa_our_uuid); crm_xml_add_int(output, XML_ATTR_HAVE_QUORUM, fsa_has_quorum); if (ever_had_quorum && crm_have_quorum == FALSE) { crm_xml_add_int(output, XML_ATTR_QUORUM_PANIC, 1); } cmd = create_request(CRM_OP_PECALC, output, NULL, CRM_SYSTEM_PENGINE, CRM_SYSTEM_DC, NULL); crm_free(fsa_pe_ref); fsa_pe_ref = crm_element_value_copy(cmd, XML_ATTR_REFERENCE); if (send_ipc_message(pe_subsystem->ipc, cmd) == FALSE) { crm_err("Could not contact the pengine"); register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __FUNCTION__); } crm_debug("Invoking the PE: query=%d, ref=%s, seq=%llu, quorate=%d", fsa_pe_query, fsa_pe_ref, crm_peer_seq, fsa_has_quorum); free_xml(cmd); } diff --git a/cts/CTS.py b/cts/CTS.py index eb62ab8bbd..8eebfbc7ec 100644 --- a/cts/CTS.py +++ b/cts/CTS.py @@ -1,1452 +1,1463 @@ '''CTS: Cluster Testing System: Main module Classes related to testing high-availability clusters... ''' __copyright__=''' Copyright (C) 2000, 2001 Alan Robertson Licensed under the GNU GPL. ''' # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. import types, string, select, sys, time, re, os, struct, signal import time, syslog, random, traceback, base64, pickle, binascii, fcntl from socket import gethostbyname_ex from UserDict import UserDict from subprocess import Popen,PIPE from cts.CTSvars import * class CtsLab(UserDict): '''This class defines the Lab Environment for the Cluster Test System. It defines those things which are expected to change from test environment to test environment for the same cluster manager. It is where you define the set of nodes that are in your test lab what kind of reset mechanism you use, etc. This class is derived from a UserDict because we hold many different parameters of different kinds, and this provides provide a uniform and extensible interface useful for any kind of communication between the user/administrator/tester and CTS. At this point in time, it is the intent of this class to model static configuration and/or environmental data about the environment which doesn't change as the tests proceed. Well-known names (keys) are an important concept in this class. The HasMinimalKeys member function knows the minimal set of well-known names for the class. The following names are standard (well-known) at this time: nodes An array of the nodes in the cluster reset A ResetMechanism object logger An array of objects that log strings... CMclass The type of ClusterManager we are running (This is a class object, not a class instance) RandSeed Random seed. It is a triple of bytes. (optional) The CTS code ignores names it doesn't know about/need. The individual tests have access to this information, and it is perfectly acceptable to provide hints, tweaks, fine-tuning directions or other information to the tests through this mechanism. ''' def __init__(self): self.data = {} self.rsh = RemoteExec(self) self.RandomGen = random.Random() self.Scenario = None # Get a random seed for the random number generator. self["LogWatcher"] = "any" self["LogFileName"] = "/var/log/messages" self["OutputFile"] = None self["SyslogFacility"] = "daemon" self["CMclass"] = None self["logger"] = ([StdErrLog(self)]) self.SeedRandom() def SeedRandom(self, seed=None): if not seed: seed = int(time.time()) if self.has_key("RandSeed"): self.log("New random seed is: " + str(seed)) else: self.log("Random seed is: " + str(seed)) self["RandSeed"] = seed self.RandomGen.seed(str(seed)) def HasMinimalKeys(self): 'Return TRUE if our object has the minimal set of keys/values in it' result = 1 for key in self.MinimalKeys: if not self.has_key(key): result = None return result def log(self, args): "Log using each of the supplied logging methods" for logfcn in self._logfunctions: logfcn(string.strip(args)) def debug(self, args): "Log using each of the supplied logging methods" for logfcn in self._logfunctions: if logfcn.name() != "StdErrLog": logfcn("debug: %s" % string.strip(args)) def dump(self): keys = [] for key in self.keys(): keys.append(key) keys.sort() for key in keys: self.debug("Environment["+key+"]:\t"+str(self[key])) def run(self, Scenario, Iterations): if not Scenario: self.log("No scenario was defined") return 1 self.log("Cluster nodes: ") for node in self["nodes"]: self.log(" * %s" % (node)) if not Scenario.SetUp(): return 1 try : Scenario.run(Iterations) except : self.log("Exception by %s" % sys.exc_info()[0]) for logmethod in self["logger"]: traceback.print_exc(50, logmethod) Scenario.summarize() Scenario.TearDown() return 1 #ClusterManager.oprofileSave(Iterations) Scenario.TearDown() Scenario.summarize() if Scenario.Stats["failure"] > 0: return Scenario.Stats["failure"] elif Scenario.Stats["success"] != Iterations: self.log("No failure count but success != requested iterations") return 1 return 0 def __setitem__(self, key, value): '''Since this function gets called whenever we modify the dictionary (object), we can (and do) validate those keys that we know how to validate. For the most part, we know how to validate the "MinimalKeys" elements. ''' # # List of nodes in the system # if key == "nodes": self.Nodes = {} for node in value: # I don't think I need the IP address, etc. but this validates # the node name against /etc/hosts and/or DNS, so it's a # GoodThing(tm). try: self.Nodes[node] = gethostbyname_ex(node) except: print node+" not found in DNS... aborting" raise # # List of Logging Mechanism(s) # elif key == "logger": if len(value) < 1: raise ValueError("Must have at least one logging mechanism") for logger in value: if not callable(logger): raise ValueError("'logger' elements must be callable") self._logfunctions = value # # Cluster Manager Class # elif key == "CMclass": if value and not issubclass(value, ClusterManager): raise ValueError("'CMclass' must be a subclass of" " ClusterManager") # # Initial Random seed... # #elif key == "RandSeed": # if len(value) != 3: # raise ValueError("'Randseed' must be a 3-element list/tuple") # for elem in value: # if not isinstance(elem, types.IntType): # raise ValueError("'Randseed' list must all be ints") self.data[key] = value def IsValidNode(self, node): 'Return TRUE if the given node is valid' return self.Nodes.has_key(node) def __CheckNode(self, node): "Raise a ValueError if the given node isn't valid" if not self.IsValidNode(node): raise ValueError("Invalid node [%s] in CheckNode" % node) def RandomNode(self): '''Choose a random node from the cluster''' return self.RandomGen.choice(self["nodes"]) class Logger: TimeFormat = "%b %d %H:%M:%S\t" def __call__(self, lines): raise ValueError("Abstract class member (__call__)") def write(self, line): return self(line.rstrip()) def writelines(self, lines): for s in lines: self.write(s) return 1 def flush(self): return 1 def isatty(self): return None class SysLog(Logger): # http://docs.python.org/lib/module-syslog.html defaultsource="CTS" map = { "kernel": syslog.LOG_KERN, "user": syslog.LOG_USER, "mail": syslog.LOG_MAIL, "daemon": syslog.LOG_DAEMON, "auth": syslog.LOG_AUTH, "lpr": syslog.LOG_LPR, "news": syslog.LOG_NEWS, "uucp": syslog.LOG_UUCP, "cron": syslog.LOG_CRON, "local0": syslog.LOG_LOCAL0, "local1": syslog.LOG_LOCAL1, "local2": syslog.LOG_LOCAL2, "local3": syslog.LOG_LOCAL3, "local4": syslog.LOG_LOCAL4, "local5": syslog.LOG_LOCAL5, "local6": syslog.LOG_LOCAL6, "local7": syslog.LOG_LOCAL7, } def __init__(self, labinfo): if labinfo.has_key("syslogsource"): self.source=labinfo["syslogsource"] else: self.source=SysLog.defaultsource self.facility="daemon" if labinfo.has_key("SyslogFacility") and labinfo["SyslogFacility"]: if SysLog.map.has_key(labinfo["SyslogFacility"]): self.facility=labinfo["SyslogFacility"] else: raise ValueError("%s: bad syslog facility"%labinfo["SyslogFacility"]) self.facility=SysLog.map[self.facility] syslog.openlog(self.source, 0, self.facility) def setfacility(self, facility): self.facility = facility if SysLog.map.has_key(self.facility): self.facility=SysLog.map[self.facility] syslog.closelog() syslog.openlog(self.source, 0, self.facility) def __call__(self, lines): if isinstance(lines, types.StringType): syslog.syslog(lines) else: for line in lines: syslog.syslog(line) def name(self): return "Syslog" class StdErrLog(Logger): def __init__(self, labinfo): pass def __call__(self, lines): t = time.strftime(Logger.TimeFormat, time.localtime(time.time())) if isinstance(lines, types.StringType): sys.__stderr__.writelines([t, lines, "\n"]) else: for line in lines: sys.__stderr__.writelines([t, line, "\n"]) sys.__stderr__.flush() def name(self): return "StdErrLog" class FileLog(Logger): def __init__(self, labinfo, filename=None): if filename == None: filename=labinfo["LogFileName"] self.logfile=filename import os self.hostname = os.uname()[1]+" " self.source = "CTS: " def __call__(self, lines): fd = open(self.logfile, "a") t = time.strftime(Logger.TimeFormat, time.localtime(time.time())) if isinstance(lines, types.StringType): fd.writelines([t, self.hostname, self.source, lines, "\n"]) else: for line in lines: fd.writelines([t, self.hostname, self.source, line, "\n"]) fd.close() def name(self): return "FileLog" class RemoteExec: '''This is an abstract remote execution class. It runs a command on another machine - somehow. The somehow is up to us. This particular class uses ssh. Most of the work is done by fork/exec of ssh or scp. ''' def __init__(self, Env=None, silent=False): self.Env = Env self.silent = silent # -n: no stdin, -x: no X11, # -o ServerAliveInterval=5 disconnect after 3*5s if the server stops responding self.Command = "ssh -l root -n -x -o ServerAliveInterval=5 -o ConnectTimeout=10 -o TCPKeepAlive=yes -o ServerAliveCountMax=3 " # -B: batch mode, -q: no stats (quiet) self.CpCommand = "scp -B -q" self.OurNode=string.lower(os.uname()[1]) def enable_qarsh(self): # http://nstraz.wordpress.com/2008/12/03/introducing-qarsh/ self.log("Using QARSH for connections to cluster nodes") self.Command = "qarsh -t 300 -l root" self.CpCommand = "qacp -q" def _fixcmd(self, cmd): return re.sub("\'", "'\\''", cmd) def _cmd(self, *args): '''Compute the string that will run the given command on the given remote system''' args= args[0] sysname = args[0] command = args[1] #print "sysname: %s, us: %s" % (sysname, self.OurNode) if sysname == None or string.lower(sysname) == self.OurNode or sysname == "localhost": ret = command else: ret = self.Command + " " + sysname + " '" + self._fixcmd(command) + "'" #print ("About to run %s\n" % ret) return ret def log(self, args): if not self.silent: if not self.Env: print (args) else: self.Env.log(args) def debug(self, args): if not self.silent: if not self.Env: print (args) else: self.Env.debug(args) def __call__(self, node, command, stdout=0, synchronous=1, silent=False, blocking=True): '''Run the given command on the given remote system If you call this class like a function, this is the function that gets called. It just runs it roughly as though it were a system() call on the remote machine. The first argument is name of the machine to run it on. ''' rc = 0 result = None if not synchronous: proc = Popen(self._cmd([node, command]), stdout = PIPE, stderr = PIPE, close_fds = True, shell = True) if not silent: self.debug("cmd: async: target=%s, rc=%d: %s" % (node, proc.pid, command)) if proc.pid > 0: return 0 return -1 proc = Popen(self._cmd([node, command]), stdout = PIPE, stderr = PIPE, close_fds = True, shell = True) #if not blocking: # fcntl.fcntl(proc.stdout.fileno(), fcntl.F_SETFL, os.O_NONBLOCK) if proc.stdout: if stdout == 1: result = proc.stdout.readline() else: result = proc.stdout.readlines() proc.stdout.close() else: self.log("No stdout stream") rc = proc.wait() if not silent: self.debug("cmd: target=%s, rc=%d: %s" % (node, rc, command)) if stdout == 1: return result if proc.stderr: errors = proc.stderr.readlines() proc.stderr.close() if not silent: for err in errors: self.debug("cmd: stderr: %s" % err) if stdout == 0: if not silent and result: for line in result: self.debug("cmd: stdout: %s" % line) return rc return (rc, result) def cp(self, source, target, silent=False): '''Perform a remote copy''' cpstring = self.CpCommand + " \'" + source + "\'" + " \'" + target + "\'" rc = os.system(cpstring) if not silent: self.debug("cmd: rc=%d: %s" % (rc, cpstring)) return rc has_log_watcher = {} log_watcher_bin = "/tmp/cts_log_watcher.py" log_watcher = """ import sys, os, fcntl ''' Remote logfile reader for CTS Reads a specified number of lines from the supplied offset Returns the current offset Contains logic for handling truncation ''' limit = 0 offset = 0 prefix = '' filename = '/var/log/messages' skipthis=None args=sys.argv[1:] for i in range(0, len(args)): if skipthis: skipthis=None continue elif args[i] == '-l' or args[i] == '--limit': skipthis=1 limit = int(args[i+1]) elif args[i] == '-f' or args[i] == '--filename': skipthis=1 filename = args[i+1] elif args[i] == '-o' or args[i] == '--offset': skipthis=1 offset = args[i+1] elif args[i] == '-p' or args[i] == '--prefix': skipthis=1 prefix = args[i+1] logfile=open(filename, 'r') logfile.seek(0, os.SEEK_END) newsize=logfile.tell() if offset != 'EOF': offset = int(offset) if newsize >= offset: logfile.seek(offset) else: print prefix + ('File truncated from %d to %d' % (offset, newsize)) if (newsize*1.05) < offset: logfile.seek(0) # else: we probably just lost a few logs after a fencing op # continue from the new end # TODO: accept a timestamp and discard all messages older than it # Don't block when we reach EOF fcntl.fcntl(logfile.fileno(), fcntl.F_SETFL, os.O_NONBLOCK) count = 0 while True: if logfile.tell() >= newsize: break elif limit and count >= limit: break line = logfile.readline() if not line: break print line.strip() count += 1 print prefix + 'Last read: %d, limit=%d, count=%d' % (logfile.tell(), limit, count) logfile.close() """ class SearchObj: def __init__(self, Env, filename, host=None): self.Env = Env self.host = host self.filename = filename self.cache = [] self.offset = "EOF" if host == None: host = "localhost" global has_log_watcher if not has_log_watcher.has_key(host): global log_watcher global log_watcher_bin self.debug("Installing %s on %s" % (log_watcher_bin, host)) self.Env.rsh(host, '''echo "%s" > %s''' % (log_watcher, log_watcher_bin), silent=True) has_log_watcher[host] = 1 self.next() def __str__(self): if self.host: return "%s:%s" % (self.host, self.filename) return self.filename def log(self, args): message = "lw: %s: %s" % (self, args) if not self.Env: print (message) else: self.Env.log(message) def debug(self, args): message = "lw: %s: %s" % (self, args) if not self.Env: print (message) else: self.Env.debug(message) def next(self): cache = [] if not len(self.cache): global log_watcher_bin (rc, lines) = self.Env.rsh( self.host, "python %s -p CTSwatcher: -f %s -o %s" % (log_watcher_bin, self.filename, self.offset), stdout=None, silent=True, blocking=False) for line in lines: match = re.search("^CTSwatcher:Last read: (\d+)", line) if match: last_offset = self.offset self.offset = match.group(1) #if last_offset == "EOF": self.debug("Got %d lines, new offset: %s" % (len(lines), self.offset)) elif re.search("^CTSwatcher:.*truncated", line): self.log(line) elif re.search("^CTSwatcher:", line): self.debug("Got control line: "+ line) else: cache.append(line) return cache class LogWatcher(RemoteExec): '''This class watches logs for messages that fit certain regular expressions. Watching logs for events isn't the ideal way to do business, but it's better than nothing :-) On the other hand, this class is really pretty cool ;-) The way you use this class is as follows: Construct a LogWatcher object Call setwatch() when you want to start watching the log Call look() to scan the log looking for the patterns ''' def __init__(self, Env, log, regexes, name="Anon", timeout=10, debug_level=None, silent=False): '''This is the constructor for the LogWatcher class. It takes a log name to watch, and a list of regular expressions to watch for." ''' RemoteExec.__init__(self, Env) # Validate our arguments. Better sooner than later ;-) for regex in regexes: assert re.compile(regex) self.name = name self.regexes = regexes self.filename = log self.debug_level = debug_level self.whichmatch = -1 self.unmatched = None self.file_list = [] self.line_cache = [] if not silent: for regex in self.regexes: self.debug("Looking for regex: "+regex) self.Timeout = int(timeout) self.returnonlymatch = None def debug(self, args): message = "lw: %s: %s" % (self.name, args) if not self.Env: print (message) else: self.Env.debug(message) def setwatch(self): '''Mark the place to start watching the log from. ''' if self.Env["LogWatcher"] == "remote": for node in self.Env["nodes"]: self.file_list.append(SearchObj(self.Env, self.filename, node)) else: self.file_list.append(SearchObj(self.Env, self.filename)) def __del__(self): if self.debug_level > 1: self.debug("Destroy") def ReturnOnlyMatch(self, onlymatch=1): '''Specify one or more subgroups of the match to return rather than the whole string http://www.python.org/doc/2.5.2/lib/match-objects.html ''' self.returnonlymatch = onlymatch def __get_lines(self): if not len(self.file_list): raise ValueError("No sources to read from") for f in self.file_list: lines = f.next() if len(lines): self.line_cache.extend(lines) def look(self, timeout=None, silent=False): '''Examine the log looking for the given patterns. It starts looking from the place marked by setwatch(). This function looks in the file in the fashion of tail -f. It properly recovers from log file truncation, but not from removing and recreating the log. It would be nice if it recovered from this as well :-) We return the first line which matches any of our patterns. ''' if timeout == None: timeout = self.Timeout lines=0 begin=time.time() end=begin+timeout+1 if self.debug_level > 2: self.debug("starting single search: timeout=%d, begin=%d, end=%d" % (timeout, begin, end)) self.__get_lines() while True: if len(self.line_cache): lines += 1 line = self.line_cache[0] self.line_cache.remove(line) which=-1 if re.search("CTS:", line): continue if self.debug_level > 2: self.debug("Processing: "+ line) for regex in self.regexes: which=which+1 if self.debug_level > 2: self.debug("Comparing line to: "+ regex) #matchobj = re.search(string.lower(regex), string.lower(line)) matchobj = re.search(regex, line) if matchobj: self.whichmatch=which if self.returnonlymatch: return matchobj.group(self.returnonlymatch) else: self.debug("Matched: "+line) if self.debug_level > 1: self.debug("With: "+ regex) return line elif timeout > 0 and end > time.time(): time.sleep(1) self.__get_lines() elif timeout > 0: # Grab any relevant messages that might have arrived since # the last time the buffer was populated self.__get_lines() # Don't come back here again timeout = 0 else: self.debug("Single search terminated: start=%d, end=%d, now=%d, lines=%d" % (begin, end, time.time(), lines)) return None self.debug("How did we get here") return None def lookforall(self, timeout=None, allow_multiple_matches=None, silent=False): '''Examine the log looking for ALL of the given patterns. It starts looking from the place marked by setwatch(). We return when the timeout is reached, or when we have found ALL of the regexes that were part of the watch ''' if timeout == None: timeout = self.Timeout save_regexes = self.regexes returnresult = [] if not silent: self.debug("starting search: timeout=%d" % timeout) for regex in self.regexes: if self.debug_level > 2: self.debug("Looking for regex: "+regex) while (len(self.regexes) > 0): oneresult = self.look(timeout) if not oneresult: self.unmatched = self.regexes self.matched = returnresult self.regexes = save_regexes return None returnresult.append(oneresult) if not allow_multiple_matches: del self.regexes[self.whichmatch] else: # Allow multiple regexes to match a single line tmp_regexes = self.regexes self.regexes = [] which = 0 for regex in tmp_regexes: matchobj = re.search(regex, oneresult) if not matchobj: self.regexes.append(regex) self.unmatched = None self.matched = returnresult self.regexes = save_regexes return returnresult class NodeStatus: def __init__(self, Env): self.Env = Env def IsNodeBooted(self, node): '''Return TRUE if the given node is booted (responds to pings)''' return self.Env.rsh("localhost", "ping -nq -c1 -w1 %s" % node, silent=True) == 0 def IsSshdUp(self, node): rc = self.Env.rsh(node, "true", silent=True) return rc == 0 def WaitForNodeToComeUp(self, node, Timeout=300): '''Return TRUE when given node comes up, or None/FALSE if timeout''' timeout=Timeout anytimeouts=0 while timeout > 0: if self.IsNodeBooted(node) and self.IsSshdUp(node): if anytimeouts: # Fudge to wait for the system to finish coming up time.sleep(30) self.Env.debug("Node %s now up" % node) return 1 time.sleep(30) if (not anytimeouts): self.Env.debug("Waiting for node %s to come up" % node) anytimeouts=1 timeout = timeout - 1 self.Env.log("%s did not come up within %d tries" % (node, Timeout)) answer = raw_input('Continue? [nY]') if answer and answer == "n": raise ValueError("%s did not come up within %d tries" % (node, Timeout)) def WaitForAllNodesToComeUp(self, nodes, timeout=300): '''Return TRUE when all nodes come up, or FALSE if timeout''' for node in nodes: if not self.WaitForNodeToComeUp(node, timeout): return None return 1 class ClusterManager(UserDict): '''The Cluster Manager class. This is an subclass of the Python dictionary class. (this is because it contains lots of {name,value} pairs, not because it's behavior is that terribly similar to a dictionary in other ways.) This is an abstract class which class implements high-level operations on the cluster and/or its cluster managers. Actual cluster managers classes are subclassed from this type. One of the things we do is track the state we think every node should be in. ''' def __InitialConditions(self): #if os.geteuid() != 0: # raise ValueError("Must Be Root!") None def _finalConditions(self): for key in self.keys(): if self[key] == None: raise ValueError("Improper derivation: self[" + key + "] must be overridden by subclass.") def __init__(self, Environment, randseed=None): self.Env = Environment self.__InitialConditions() self.clear_cache = 0 self.TestLoggingLevel=0 self.data = { "up" : "up", # Status meaning up "down" : "down", # Status meaning down "StonithCmd" : "stonith -t baytech -p '10.10.10.100 admin admin' %s", "DeadTime" : 30, # Max time to detect dead node... "StartTime" : 90, # Max time to start up # # These next values need to be overridden in the derived class. # "Name" : None, "StartCmd" : None, "StopCmd" : None, "StatusCmd" : None, #"RereadCmd" : None, "BreakCommCmd" : None, "FixCommCmd" : None, #"TestConfigDir" : None, "LogFileName" : None, #"Pat:Master_started" : None, #"Pat:Slave_started" : None, "Pat:We_stopped" : None, "Pat:They_stopped" : None, "BadRegexes" : None, # A set of "bad news" regexes # to apply to the log } self.rsh = self.Env.rsh self.ShouldBeStatus={} self.ns = NodeStatus(self.Env) self.OurNode=string.lower(os.uname()[1]) + self.__instance_errorstoignore = [] def key_for_node(self, node): return node + def instance_errorstoignore_clear(self): + '''Allows the test scenario to reset instance errors to ignore on each iteration.''' + self.__instance_errorstoignore = [] + + def instance_errorstoignore(self): + '''Return list of errors which are 'normal' for a specific test instance''' + return self.__instance_errorstoignore + def errorstoignore(self): '''Return list of errors which are 'normal' and should be ignored''' return [] def log(self, args): self.Env.log(args) def debug(self, args): self.Env.debug(args) def prepare(self): '''Finish the Initialization process. Prepare to test...''' for node in self.Env["nodes"]: if self.StataCM(node): self.ShouldBeStatus[node]="up" else: self.ShouldBeStatus[node]="down" self.unisolate_node(node) def upcount(self): '''How many nodes are up?''' count=0 for node in self.Env["nodes"]: if self.ShouldBeStatus[node]=="up": count=count+1 return count def install_helper(self, filename, nodes=None): file_with_path="%s/%s" % (CTSvars.CTS_home, filename) if not nodes: nodes = self.Env["nodes"] self.debug("Installing %s to %s on %s" % (filename, CTSvars.CTS_home, repr(self.Env["nodes"]))) for node in nodes: self.rsh(node, "mkdir -p %s" % CTSvars.CTS_home) self.rsh.cp(file_with_path, "root@%s:%s" % (node, file_with_path)) return file_with_path def install_config(self, node): return None def clear_all_caches(self): if self.clear_cache: for node in self.Env["nodes"]: if self.ShouldBeStatus[node] == "down": self.debug("Removing cache file on: "+node) self.rsh(node, "rm -f "+CTSvars.HA_VARLIBHBDIR+"/hostcache") else: self.debug("NOT Removing cache file on: "+node) def prepare_fencing_watcher(self, node): # If we don't have quorum now but get it as a result of starting this node, # then a bunch of nodes might get fenced if self.HasQuorum(None): return None if not self.has_key("Pat:They_fenced"): return None if not self.has_key("Pat:They_fenced_offset"): return None stonith = None stonithPats = [] for peer in self.Env["nodes"]: if peer != node and self.ShouldBeStatus[peer] != "up": stonithPats.append(self["Pat:They_fenced"] % peer) # Look for STONITH ops, depending on Env["at-boot"] we might need to change the nodes status stonith = LogWatcher(self.Env, self["LogFileName"], stonithPats, "StartaCM", 0) stonith.setwatch() return stonith def fencing_cleanup(self, node, stonith): peer_list = [] # If we just started a node, we may now have quorum (and permission to fence) # Make sure everyone is online before continuing self.ns.WaitForAllNodesToComeUp(self.Env["nodes"]) if not stonith: return peer_list if not self.HasQuorum(None) and len(self.Env["nodes"]) > 2: # We didn't gain quorum - we shouldn't have shot anyone return peer_list # Now see if any states need to be updated self.debug("looking for: " + repr(stonith.regexes)) shot = stonith.look(0) while shot: line = repr(shot) self.debug("Found: "+ line) # Extract node name start = line.find(self["Pat:They_fenced_offset"]) + len(self["Pat:They_fenced_offset"]) peer = line[start:].split("' ")[0] self.debug("Found peer: "+ peer) peer_list.append(peer) self.ShouldBeStatus[peer]="down" - self.log(" Peer %s was fenced as a result of %s starting" % (peer, node)) + self.debug(" Peer %s was fenced as a result of %s starting" % (peer, node)) # Get the next one shot = stonith.look(60) # Poll until it comes up if self.Env["at-boot"]: if not self.StataCM(peer): time.sleep(self["StartTime"]) if not self.StataCM(peer): self.log("ERROR: Peer %s failed to restart after being fenced" % peer) return None self.ShouldBeStatus[peer]="up" return peer_list def StartaCM(self, node, verbose=False): '''Start up the cluster manager on a given node''' if verbose: self.log("Starting %s on node %s" %(self["Name"], node)) else: self.debug("Starting %s on node %s" %(self["Name"], node)) ret = 1 if not self.ShouldBeStatus.has_key(node): self.ShouldBeStatus[node] = "down" if self.ShouldBeStatus[node] != "down": return 1 patterns = [] # Technically we should always be able to notice ourselves starting patterns.append(self["Pat:Local_started"] % node) if self.upcount() == 0: patterns.append(self["Pat:Master_started"] % node) else: patterns.append(self["Pat:Slave_started"] % node) watch = LogWatcher( self.Env, self["LogFileName"], patterns, "StartaCM", self["StartTime"]+10) watch.setwatch() self.install_config(node) self.ShouldBeStatus[node] = "any" if self.StataCM(node) and self.cluster_stable(self["DeadTime"]): self.log ("%s was already started" %(node)) return 1 # Clear out the host cache so autojoin can be exercised if self.clear_cache: self.debug("Removing cache file on: "+node) self.rsh(node, "rm -f "+CTSvars.HA_VARLIBHBDIR+"/hostcache") if not(self.Env["valgrind-tests"]): startCmd = self["StartCmd"] else: if self.Env["valgrind-prefix"]: prefix = self.Env["valgrind-prefix"] else: prefix = "cts" startCmd = """G_SLICE=always-malloc HA_VALGRIND_ENABLED='%s' VALGRIND_OPTS='%s --log-file=/tmp/%s-%s.valgrind' %s""" % ( self.Env["valgrind-procs"], self.Env["valgrind-opts"], prefix, """%p""", self["StartCmd"]) stonith = self.prepare_fencing_watcher(node) if self.rsh(node, startCmd) != 0: self.log ("Warn: Start command failed on node %s" %(node)) return None self.ShouldBeStatus[node]="up" watch_result = watch.lookforall() - self.fencing_cleanup(node, stonith) - + peer_list = self.fencing_cleanup(node, stonith) + for peer in peer_list: + self.__instance_errorstoignore.append("te_fence_node: Executing .* fencing operation .* on %s" % (peer)); + if watch.unmatched: for regex in watch.unmatched: self.log ("Warn: Startup pattern not found: %s" %(regex)) if watch_result and self.cluster_stable(self["DeadTime"]): #self.debug("Found match: "+ repr(watch_result)) return 1 elif self.StataCM(node) and self.cluster_stable(self["DeadTime"]): return 1 self.log ("Warn: Start failed for node %s" %(node)) return None def StartaCMnoBlock(self, node, verbose=False): '''Start up the cluster manager on a given node with none-block mode''' if verbose: self.log("Starting %s on node %s" %(self["Name"], node)) else: self.debug("Starting %s on node %s" %(self["Name"], node)) # Clear out the host cache so autojoin can be exercised if self.clear_cache: self.debug("Removing cache file on: "+node) self.rsh(node, "rm -f "+CTSvars.HA_VARLIBHBDIR+"/hostcache") if not(self.Env["valgrind-tests"]): startCmd = self["StartCmd"] else: if self.Env["valgrind-prefix"]: prefix = self.Env["valgrind-prefix"] else: prefix = "cts" startCmd = """G_SLICE=always-malloc HA_VALGRIND_ENABLED='%s' VALGRIND_OPTS='%s --log-file=/tmp/%s-%s.valgrind' %s""" % ( self.Env["valgrind-procs"], self.Env["valgrind-opts"], prefix, """%p""", self["StartCmd"]) self.rsh(node, startCmd, synchronous=0) self.ShouldBeStatus[node]="up" return 1 def StopaCM(self, node, verbose=False): '''Stop the cluster manager on a given node''' if verbose: self.log("Stopping %s on node %s" %(self["Name"], node)) else: self.debug("Stopping %s on node %s" %(self["Name"], node)) if self.ShouldBeStatus[node] != "up": return 1 if self.rsh(node, self["StopCmd"]) == 0: # Make sure we can continue even if corosync leaks # fdata-* is the old name self.rsh(node, "rm -f /dev/shm/qb-* /dev/shm/fdata-*") self.ShouldBeStatus[node]="down" self.cluster_stable(self["DeadTime"]) return 1 else: self.log ("Could not stop %s on node %s" %(self["Name"], node)) return None def StopaCMnoBlock(self, node): '''Stop the cluster manager on a given node with none-block mode''' self.debug("Stopping %s on node %s" %(self["Name"], node)) self.rsh(node, self["StopCmd"], synchronous=0) self.ShouldBeStatus[node]="down" return 1 def cluster_stable(self, timeout = None): time.sleep(self["StableTime"]) return 1 def node_stable(self, node): return 1 def RereadCM(self, node): '''Force the cluster manager on a given node to reread its config This may be a no-op on certain cluster managers. ''' rc=self.rsh(node, self["RereadCmd"]) if rc == 0: return 1 else: self.log ("Could not force %s on node %s to reread its config" % (self["Name"], node)) return None def StataCM(self, node): '''Report the status of the cluster manager on a given node''' out=self.rsh(node, self["StatusCmd"], 1) ret= (string.find(out, 'stopped') == -1) try: if ret: if self.ShouldBeStatus[node] == "down": self.log( "Node status for %s is %s but we think it should be %s" % (node, "up", self.ShouldBeStatus[node])) else: if self.ShouldBeStatus[node] == "up": self.log( "Node status for %s is %s but we think it should be %s" % (node, "down", self.ShouldBeStatus[node])) except KeyError: pass if ret: self.ShouldBeStatus[node]="up" else: self.ShouldBeStatus[node]="down" return ret def startall(self, nodelist=None, verbose=False): '''Start the cluster manager on every node in the cluster. We can do it on a subset of the cluster if nodelist is not None. ''' ret = 1 map = {} if not nodelist: nodelist=self.Env["nodes"] for node in nodelist: if self.ShouldBeStatus[node] == "down": if not self.StartaCM(node, verbose=verbose): ret = 0 return ret def stopall(self, nodelist=None, verbose=False): '''Stop the cluster managers on every node in the cluster. We can do it on a subset of the cluster if nodelist is not None. ''' ret = 1 map = {} if not nodelist: nodelist=self.Env["nodes"] for node in self.Env["nodes"]: if self.ShouldBeStatus[node] == "up": if not self.StopaCM(node, verbose=verbose): ret = 0 return ret def rereadall(self, nodelist=None): '''Force the cluster managers on every node in the cluster to reread their config files. We can do it on a subset of the cluster if nodelist is not None. ''' map = {} if not nodelist: nodelist=self.Env["nodes"] for node in self.Env["nodes"]: if self.ShouldBeStatus[node] == "up": self.RereadCM(node) def statall(self, nodelist=None): '''Return the status of the cluster managers in the cluster. We can do it on a subset of the cluster if nodelist is not None. ''' result={} if not nodelist: nodelist=self.Env["nodes"] for node in nodelist: if self.StataCM(node): result[node] = "up" else: result[node] = "down" return result def isolate_node(self, target, nodes=None): '''isolate the communication between the nodes''' if not nodes: nodes = self.Env["nodes"] for node in nodes: if node != target: rc = self.rsh(target, self["BreakCommCmd"] % self.key_for_node(node)) if rc != 0: self.log("Could not break the communication between %s and %s: %d" % (target, node, rc)) return None else: self.debug("Communication cut between %s and %s" % (target, node)) return 1 def unisolate_node(self, target, nodes=None): '''fix the communication between the nodes''' if not nodes: nodes = self.Env["nodes"] for node in nodes: if node != target: restored = 0 # Limit the amount of time we have asynchronous connectivity for # Restore both sides as simultaneously as possible self.rsh(target, self["FixCommCmd"] % self.key_for_node(node), synchronous=0) self.rsh(node, self["FixCommCmd"] % self.key_for_node(target), synchronous=0) self.debug("Communication restored between %s and %s" % (target, node)) def reducecomm_node(self,node): '''reduce the communication between the nodes''' rc = self.rsh(node, self["ReduceCommCmd"]%(self.Env["XmitLoss"],self.Env["RecvLoss"])) if rc == 0: return 1 else: self.log("Could not reduce the communication between the nodes from node: %s" % node) return None def restorecomm_node(self,node): '''restore the saved communication between the nodes''' rc = 0 if float(self.Env["XmitLoss"])!=0 or float(self.Env["RecvLoss"])!=0 : rc = self.rsh(node, self["RestoreCommCmd"]); if rc == 0: return 1 else: self.log("Could not restore the communication between the nodes from node: %s" % node) return None def HasQuorum(self, node_list): "Return TRUE if the cluster currently has quorum" # If we are auditing a partition, then one side will # have quorum and the other not. # So the caller needs to tell us which we are checking # If no value for node_list is specified... assume all nodes raise ValueError("Abstract Class member (HasQuorum)") def Components(self): raise ValueError("Abstract Class member (Components)") def oprofileStart(self, node=None): if not node: for n in self.Env["oprofile"]: self.oprofileStart(n) elif node in self.Env["oprofile"]: self.debug("Enabling oprofile on %s" % node) self.rsh(node, "opcontrol --init") self.rsh(node, "opcontrol --setup --no-vmlinux --separate=lib --callgraph=20 --image=all") self.rsh(node, "opcontrol --start") self.rsh(node, "opcontrol --reset") def oprofileSave(self, test, node=None): if not node: for n in self.Env["oprofile"]: self.oprofileSave(test, n) elif node in self.Env["oprofile"]: self.rsh(node, "opcontrol --dump") self.rsh(node, "opcontrol --save=cts.%d" % test) # Read back with: opreport -l session:cts.0 image:/usr/lib/heartbeat/c* if None: self.rsh(node, "opcontrol --reset") else: self.oprofileStop(node) self.oprofileStart(node) def oprofileStop(self, node=None): if not node: for n in self.Env["oprofile"]: self.oprofileStop(n) elif node in self.Env["oprofile"]: self.debug("Stopping oprofile on %s" % node) self.rsh(node, "opcontrol --reset") self.rsh(node, "opcontrol --shutdown 2>&1 > /dev/null") class Resource: ''' This is an HA resource (not a resource group). A resource group is just an ordered list of Resource objects. ''' def __init__(self, cm, rsctype=None, instance=None): self.CM = cm self.ResourceType = rsctype self.Instance = instance self.needs_quorum = 1 def Type(self): return self.ResourceType def Instance(self, nodename): return self.Instance def IsRunningOn(self, nodename): ''' This member function returns true if our resource is running on the given node in the cluster. It is analagous to the "status" operation on SystemV init scripts and heartbeat scripts. FailSafe calls it the "exclusive" operation. ''' raise ValueError("Abstract Class member (IsRunningOn)") return None def IsWorkingCorrectly(self, nodename): ''' This member function returns true if our resource is operating correctly on the given node in the cluster. Heartbeat does not require this operation, but it might be called the Monitor operation, which is what FailSafe calls it. For remotely monitorable resources (like IP addresses), they *should* be monitored remotely for testing. ''' raise ValueError("Abstract Class member (IsWorkingCorrectly)") return None def Start(self, nodename): ''' This member function starts or activates the resource. ''' raise ValueError("Abstract Class member (Start)") return None def Stop(self, nodename): ''' This member function stops or deactivates the resource. ''' raise ValueError("Abstract Class member (Stop)") return None def __repr__(self): if (self.Instance and len(self.Instance) > 1): return "{" + self.ResourceType + "::" + self.Instance + "}" else: return "{" + self.ResourceType + "}" class Component: def kill(self, node): None class Process(Component): def __init__(self, cm, name, process=None, dc_only=0, pats=[], dc_pats=[], badnews_ignore=[], triggersreboot=0): self.name = str(name) self.dc_only = dc_only self.pats = pats self.dc_pats = dc_pats self.CM = cm self.badnews_ignore = badnews_ignore self.triggersreboot = triggersreboot if process: self.proc = str(process) else: self.proc = str(name) self.KillCmd = "killall -9 " + self.proc def kill(self, node): if self.CM.rsh(node, self.KillCmd) != 0: self.CM.log ("ERROR: Kill %s failed on node %s" %(self.name,node)) return None return 1 diff --git a/cts/CTSscenarios.py b/cts/CTSscenarios.py index f1dd9dea5c..9cf57db2d7 100644 --- a/cts/CTSscenarios.py +++ b/cts/CTSscenarios.py @@ -1,544 +1,546 @@ from CTS import * from CTStests import CTSTest from CTSaudits import ClusterAudit class ScenarioComponent: def __init__(self, Env): self.Env = Env def IsApplicable(self): '''Return TRUE if the current ScenarioComponent is applicable in the given LabEnvironment given to the constructor. ''' raise ValueError("Abstract Class member (IsApplicable)") def SetUp(self, CM): '''Set up the given ScenarioComponent''' raise ValueError("Abstract Class member (Setup)") def TearDown(self, CM): '''Tear down (undo) the given ScenarioComponent''' raise ValueError("Abstract Class member (Setup)") class Scenario: ( '''The basic idea of a scenario is that of an ordered list of ScenarioComponent objects. Each ScenarioComponent is SetUp() in turn, and then after the tests have been run, they are torn down using TearDown() (in reverse order). A Scenario is applicable to a particular cluster manager iff each ScenarioComponent is applicable. A partially set up scenario is torn down if it fails during setup. ''') def __init__(self, ClusterManager, Components, Audits, Tests): "Initialize the Scenario from the list of ScenarioComponents" self.ClusterManager = ClusterManager self.Components = Components self.Audits = Audits self.Tests = Tests self.BadNews = None self.TestSets = [] self.Stats = {"success":0, "failure":0, "BadNews":0, "skipped":0} self.Sets = [] #self.ns=CTS.NodeStatus(self.Env) for comp in Components: if not issubclass(comp.__class__, ScenarioComponent): raise ValueError("Init value must be subclass of ScenarioComponent") for audit in Audits: if not issubclass(audit.__class__, ClusterAudit): raise ValueError("Init value must be subclass of ClusterAudit") for test in Tests: if not issubclass(test.__class__, CTSTest): raise ValueError("Init value must be a subclass of CTSTest") def IsApplicable(self): ( '''A Scenario IsApplicable() iff each of its ScenarioComponents IsApplicable() ''' ) for comp in self.Components: if not comp.IsApplicable(): return None return 1 def SetUp(self): '''Set up the Scenario. Return TRUE on success.''' self.ClusterManager.prepare() self.ClusterManager.ns.WaitForAllNodesToComeUp(self.ClusterManager.Env["nodes"]) self.audit() if self.ClusterManager.Env["valgrind-tests"]: self.ClusterManager.install_helper("cts.supp") self.BadNews = LogWatcher(self.ClusterManager.Env, self.ClusterManager["LogFileName"], self.ClusterManager["BadRegexes"], "BadNews", 0) self.BadNews.setwatch() # Call after we've figured out what type of log watching to do in LogAudit j=0 while j < len(self.Components): if not self.Components[j].SetUp(self.ClusterManager): # OOPS! We failed. Tear partial setups down. self.audit() self.ClusterManager.log("Tearing down partial setup") self.TearDown(j) return None j=j+1 self.audit() return 1 def TearDown(self, max=None): '''Tear Down the Scenario - in reverse order.''' if max == None: max = len(self.Components)-1 j=max while j >= 0: self.Components[j].TearDown(self.ClusterManager) j=j-1 self.audit() def incr(self, name): '''Increment (or initialize) the value associated with the given name''' if not self.Stats.has_key(name): self.Stats[name]=0 self.Stats[name] = self.Stats[name]+1 def run(self, Iterations): self.ClusterManager.oprofileStart() try: self.run_loop(Iterations) self.ClusterManager.oprofileStop() except: self.ClusterManager.oprofileStop() raise def run_loop(self, Iterations): raise ValueError("Abstract Class member (run_loop)") def run_test(self, test, testcount): nodechoice = self.ClusterManager.Env.RandomNode() ret = 1 where = "" did_run = 0 - + + self.ClusterManager.instance_errorstoignore_clear() self.ClusterManager.log(("Running test %s" % test.name).ljust(35) + (" (%s) " % nodechoice).ljust(15) +"["+ ("%d" % testcount).rjust(3) +"]") starttime = test.set_timer() if not test.setup(nodechoice): self.ClusterManager.log("Setup failed") ret = 0 elif not test.canrunnow(nodechoice): self.ClusterManager.log("Skipped") test.skipped() else: did_run = 1 ret = test(nodechoice) if not test.teardown(nodechoice): self.ClusterManager.log("Teardown failed") answer = raw_input('Continue? [nY] ') if answer and answer == "n": raise ValueError("Teardown of %s on %s failed" % (test.name, nodechoice)) ret = 0 stoptime=time.time() self.ClusterManager.oprofileSave(testcount) elapsed_time = stoptime - starttime test_time = stoptime - test.get_timer() if not test.has_key("min_time"): test["elapsed_time"] = elapsed_time test["min_time"] = test_time test["max_time"] = test_time else: test["elapsed_time"] = test["elapsed_time"] + elapsed_time if test_time < test["min_time"]: test["min_time"] = test_time if test_time > test["max_time"]: test["max_time"] = test_time if ret: self.incr("success") test.log_timer() else: self.incr("failure") self.ClusterManager.statall() did_run = 1 # Force the test count to be incrimented anyway so test extraction works self.audit(test.errorstoignore()) return did_run def summarize(self): self.ClusterManager.log("****************") self.ClusterManager.log("Overall Results:" + repr(self.Stats)) self.ClusterManager.log("****************") stat_filter = { "calls":0, "failure":0, "skipped":0, "auditfail":0, } self.ClusterManager.log("Test Summary") for test in self.Tests: for key in stat_filter.keys(): stat_filter[key] = test.Stats[key] self.ClusterManager.log(("Test %s: "%test.name).ljust(25) + " %s"%repr(stat_filter)) self.ClusterManager.debug("Detailed Results") for test in self.Tests: self.ClusterManager.debug(("Test %s: "%test.name).ljust(25) + " %s"%repr(test.Stats)) self.ClusterManager.log("<<<<<<<<<<<<<<<< TESTS COMPLETED") def audit(self, LocalIgnore=[]): errcount=0 ignorelist = [] ignorelist.append("CTS:") ignorelist.extend(LocalIgnore) ignorelist.extend(self.ClusterManager.errorstoignore()) + ignorelist.extend(self.ClusterManager.instance_errorstoignore()) # This makes sure everything is stabilized before starting... failed = 0 for audit in self.Audits: if not audit(): self.ClusterManager.log("Audit " + audit.name() + " FAILED.") failed += 1 else: self.ClusterManager.debug("Audit " + audit.name() + " passed.") while errcount < 1000: match = None if self.BadNews: match=self.BadNews.look(0) if match: add_err = 1 for ignore in ignorelist: if add_err == 1 and re.search(ignore, match): add_err = 0 if add_err == 1: self.ClusterManager.log("BadNews: " + match) self.incr("BadNews") errcount=errcount+1 else: break else: answer = raw_input('Big problems. Continue? [nY]') if answer and answer == "n": self.ClusterManager.log("Shutting down.") self.summarize() self.TearDown() raise ValueError("Looks like we hit a BadNews jackpot!") return failed class AllOnce(Scenario): '''Every Test Once''' # Accessable as __doc__ def run_loop(self, Iterations): testcount=1 for test in self.Tests: self.run_test(test, testcount) testcount += 1 class RandomTests(Scenario): '''Random Test Execution''' def run_loop(self, Iterations): testcount=1 while testcount <= Iterations: test = self.ClusterManager.Env.RandomGen.choice(self.Tests) self.run_test(test, testcount) testcount += 1 class BasicSanity(Scenario): '''Basic Cluster Sanity''' def run_loop(self, Iterations): testcount=1 while testcount <= Iterations: test = self.Environment.RandomGen.choice(self.Tests) self.run_test(test, testcount) testcount += 1 class Sequence(Scenario): '''Named Tests in Sequence''' def run_loop(self, Iterations): testcount=1 while testcount <= Iterations: for test in self.Tests: self.run_test(test, testcount) testcount += 1 class InitClusterManager(ScenarioComponent): ( '''InitClusterManager is the most basic of ScenarioComponents. This ScenarioComponent simply starts the cluster manager on all the nodes. It is fairly robust as it waits for all nodes to come up before starting as they might have been rebooted or crashed for some reason beforehand. ''') def __init__(self, Env): pass def IsApplicable(self): '''InitClusterManager is so generic it is always Applicable''' return 1 def SetUp(self, CM): '''Basic Cluster Manager startup. Start everything''' CM.prepare() # Clear out the cobwebs ;-) self.TearDown(CM) # Now start the Cluster Manager on all the nodes. CM.log("Starting Cluster Manager on all nodes.") return CM.startall(verbose=True) def TearDown(self, CM): '''Set up the given ScenarioComponent''' # Stop the cluster manager everywhere CM.log("Stopping Cluster Manager on all nodes") return CM.stopall(verbose=True) class PingFest(ScenarioComponent): ( '''PingFest does a flood ping to each node in the cluster from the test machine. If the LabEnvironment Parameter PingSize is set, it will be used as the size of ping packet requested (via the -s option). If it is not set, it defaults to 1024 bytes. According to the manual page for ping: Outputs packets as fast as they come back or one hundred times per second, whichever is more. For every ECHO_REQUEST sent a period ``.'' is printed, while for every ECHO_REPLY received a backspace is printed. This provides a rapid display of how many packets are being dropped. Only the super-user may use this option. This can be very hard on a net- work and should be used with caution. ''' ) def __init__(self, Env): self.Env = Env def IsApplicable(self): '''PingFests are always applicable ;-) ''' return 1 def SetUp(self, CM): '''Start the PingFest!''' self.PingSize=1024 if CM.Env.has_key("PingSize"): self.PingSize=CM.Env["PingSize"] CM.log("Starting %d byte flood pings" % self.PingSize) self.PingPids=[] for node in CM.Env["nodes"]: self.PingPids.append(self._pingchild(node)) CM.log("Ping PIDs: " + repr(self.PingPids)) return 1 def TearDown(self, CM): '''Stop it right now! My ears are pinging!!''' for pid in self.PingPids: if pid != None: CM.log("Stopping ping process %d" % pid) os.kill(pid, signal.SIGKILL) def _pingchild(self, node): Args = ["ping", "-qfn", "-s", str(self.PingSize), node] sys.stdin.flush() sys.stdout.flush() sys.stderr.flush() pid = os.fork() if pid < 0: self.Env.log("Cannot fork ping child") return None if pid > 0: return pid # Otherwise, we're the child process. os.execvp("ping", Args) self.Env.log("Cannot execvp ping: " + repr(Args)) sys.exit(1) class PacketLoss(ScenarioComponent): ( ''' It would be useful to do some testing of CTS with a modest amount of packet loss enabled - so we could see that everything runs like it should with a certain amount of packet loss present. ''') def IsApplicable(self): '''always Applicable''' return 1 def SetUp(self, CM): '''Reduce the reliability of communications''' if float(CM.Env["XmitLoss"]) == 0 and float(CM.Env["RecvLoss"]) == 0 : return 1 for node in CM.Env["nodes"]: CM.reducecomm_node(node) CM.log("Reduce the reliability of communications") return 1 def TearDown(self, CM): '''Fix the reliability of communications''' if float(CM.Env["XmitLoss"]) == 0 and float(CM.Env["RecvLoss"]) == 0 : return 1 for node in CM.Env["nodes"]: CM.unisolate_node(node) CM.log("Fix the reliability of communications") class BasicSanityCheck(ScenarioComponent): ( ''' ''') def IsApplicable(self): return self.Env["DoBSC"] def SetUp(self, CM): CM.prepare() # Clear out the cobwebs self.TearDown(CM) # Now start the Cluster Manager on all the nodes. CM.log("Starting Cluster Manager on BSC node(s).") return CM.startall() def TearDown(self, CM): CM.log("Stopping Cluster Manager on BSC node(s).") return CM.stopall() class Benchmark(ScenarioComponent): ( ''' ''') def IsApplicable(self): return self.Env["benchmark"] def SetUp(self, CM): CM.prepare() # Clear out the cobwebs self.TearDown(CM) # Now start the Cluster Manager on all the nodes. CM.log("Starting Cluster Manager on all node(s).") return CM.startall() def TearDown(self, CM): CM.log("Stopping Cluster Manager on all node(s).") return CM.stopall() class RollingUpgrade(ScenarioComponent): ( ''' Test a rolling upgrade between two versions of the stack ''') def __init__(self, Env): self.Env = Env def IsApplicable(self): if not self.Env["rpm-dir"]: return None if not self.Env["current-version"]: return None if not self.Env["previous-version"]: return None return 1 def install(self, node, version): target_dir = "/tmp/rpm-%s" % version src_dir = "%s/%s" % (self.CM.Env["rpm-dir"], version) rc = self.CM.rsh(node, "mkdir -p %s" % target_dir) rc = self.CM.cp("%s/*.rpm %s:%s" % (src_dir, node, target_dir)) rc = self.CM.rsh(node, "rpm -Uvh --force %s/*.rpm" % (target_dir)) return self.success() def upgrade(self, node): return self.install(node, self.CM.Env["current-version"]) def downgrade(self, node): return self.install(node, self.CM.Env["previous-version"]) def SetUp(self, CM): CM.prepare() # Clear out the cobwebs CM.stopall() CM.log("Downgrading all nodes to %s." % self.Env["previous-version"]) for node in self.Env["nodes"]: if not self.downgrade(node): CM.log("Couldn't downgrade %s" % node) return None return 1 def TearDown(self, CM): # Stop everything CM.log("Stopping Cluster Manager on Upgrade nodes.") CM.stopall() CM.log("Upgrading all nodes to %s." % self.Env["current-version"]) for node in self.Env["nodes"]: if not self.upgrade(node): CM.log("Couldn't upgrade %s" % node) return None return 1 diff --git a/lib/cluster/cluster.c b/lib/cluster/cluster.c index bfc0c8b7ad..241a6f4c99 100644 --- a/lib/cluster/cluster.c +++ b/lib/cluster/cluster.c @@ -1,518 +1,529 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "stack.h" CRM_TRACE_INIT_DATA(cluster); #if SUPPORT_HEARTBEAT void *hb_library = NULL; #endif static GHashTable *crm_uuid_cache = NULL; static GHashTable *crm_uname_cache = NULL; xmlNode *create_common_message(xmlNode * original_request, xmlNode * xml_response_data); static char * get_heartbeat_uuid(uint32_t unused, const char *uname) { char *uuid_calc = NULL; #if SUPPORT_HEARTBEAT cl_uuid_t uuid_raw; const char *unknown = "00000000-0000-0000-0000-000000000000"; if (heartbeat_cluster == NULL) { crm_warn("No connection to heartbeat, using uuid=uname"); return NULL; } if (heartbeat_cluster->llc_ops->get_uuid_by_name(heartbeat_cluster, uname, &uuid_raw) == HA_FAIL) { crm_err("get_uuid_by_name() call failed for host %s", uname); crm_free(uuid_calc); return NULL; } crm_malloc0(uuid_calc, 50); cl_uuid_unparse(&uuid_raw, uuid_calc); if (safe_str_eq(uuid_calc, unknown)) { crm_warn("Could not calculate UUID for %s", uname); crm_free(uuid_calc); return NULL; } #endif return uuid_calc; } static gboolean uname_is_uuid(void) { static const char *uuid_pref = NULL; if (uuid_pref == NULL) { uuid_pref = getenv("PCMK_uname_is_uuid"); } if (uuid_pref == NULL) { /* true is legacy mode */ uuid_pref = "false"; } return crm_is_true(uuid_pref); } int get_corosync_id(int id, const char *uuid) { if (id == 0 && !uname_is_uuid() && is_corosync_cluster()) { id = crm_atoi(uuid, "0"); } return id; } char * get_corosync_uuid(uint32_t id, const char *uname) { if (!uname_is_uuid() && is_corosync_cluster()) { if (id <= 0) { /* Try the membership cache... */ crm_node_t *node = g_hash_table_lookup(crm_peer_cache, uname); if (node != NULL) { id = node->id; } } if (id > 0) { return crm_itoa(id); } else { crm_warn("Node %s is not yet known by corosync", uname); } } else if (uname != NULL) { return crm_strdup(uname); } return NULL; } const char * get_node_uuid(uint32_t id, const char *uname) { char *uuid = NULL; enum cluster_type_e type = get_cluster_type(); if (crm_uuid_cache == NULL) { crm_uuid_cache = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); } /* avoid blocking heartbeat calls where possible */ if (uname) { uuid = g_hash_table_lookup(crm_uuid_cache, uname); } if (uuid != NULL) { return uuid; } switch (type) { case pcmk_cluster_corosync: uuid = get_corosync_uuid(id, uname); break; case pcmk_cluster_cman: case pcmk_cluster_classic_ais: if (uname) { uuid = crm_strdup(uname); } break; case pcmk_cluster_heartbeat: uuid = get_heartbeat_uuid(id, uname); break; case pcmk_cluster_unknown: case pcmk_cluster_invalid: crm_err("Unsupported cluster type"); break; } if (uuid == NULL) { return NULL; } if (uname) { g_hash_table_insert(crm_uuid_cache, crm_strdup(uname), uuid); return g_hash_table_lookup(crm_uuid_cache, uname); } /* Memory leak! */ CRM_LOG_ASSERT(uname != NULL); return uuid; } gboolean crm_cluster_connect(char **our_uname, char **our_uuid, void *dispatch, void *destroy, #if SUPPORT_HEARTBEAT ll_cluster_t ** hb_conn #else void **hb_conn #endif ) { enum cluster_type_e type = get_cluster_type(); crm_notice("Connecting to cluster infrastructure: %s", name_for_cluster_type(type)); if (hb_conn != NULL) { *hb_conn = NULL; } #if SUPPORT_COROSYNC if (is_openais_cluster()) { crm_peer_init(); return init_ais_connection(dispatch, destroy, our_uuid, our_uname, NULL); } #endif #if SUPPORT_HEARTBEAT if (is_heartbeat_cluster()) { int rv; CRM_ASSERT(hb_conn != NULL); /* coverity[var_deref_op] False positive */ if (*hb_conn == NULL) { /* No object passed in, create a new one. */ ll_cluster_t *(*new_cluster) (const char *llctype) = find_library_function(&hb_library, HEARTBEAT_LIBRARY, "ll_cluster_new"); *hb_conn = (*new_cluster) ("heartbeat"); /* dlclose(handle); */ } else { /* Object passed in. Disconnect first, then reconnect below. */ ll_cluster_t *conn = *hb_conn; conn->llc_ops->signoff(conn, FALSE); } /* make sure we are disconnected first with the old object, if any. */ if (heartbeat_cluster && heartbeat_cluster != *hb_conn) { heartbeat_cluster->llc_ops->signoff(heartbeat_cluster, FALSE); } CRM_ASSERT(*hb_conn != NULL); heartbeat_cluster = *hb_conn; rv = register_heartbeat_conn(heartbeat_cluster, our_uuid, our_uname, dispatch, destroy); if (rv) { /* we'll benefit from a bigger queue length on heartbeat side. * Otherwise, if peers send messages faster than we can consume * them right now, heartbeat messaging layer will kick us out once * it's (small) default queue fills up :( * If we fail to adjust the sendq length, that's not yet fatal, though. */ if (HA_OK != (*hb_conn)->llc_ops->set_sendq_len(*hb_conn, 1024)) { crm_warn("Cannot set sendq length: %s", (*hb_conn)->llc_ops->errmsg(*hb_conn)); } } return rv; } #endif crm_info("Unsupported cluster stack: %s", getenv("HA_cluster_type")); return FALSE; } gboolean send_cluster_message(const char *node, enum crm_ais_msg_types service, xmlNode * data, gboolean ordered) { #if SUPPORT_COROSYNC if (is_openais_cluster()) { return send_ais_message(data, FALSE, node, service); } #endif #if SUPPORT_HEARTBEAT if (is_heartbeat_cluster()) { return send_ha_message(heartbeat_cluster, data, node, ordered); } #endif return FALSE; } void empty_uuid_cache(void) { if (crm_uuid_cache != NULL) { g_hash_table_destroy(crm_uuid_cache); crm_uuid_cache = NULL; } } void unget_uuid(const char *uname) { if (crm_uuid_cache == NULL) { return; } g_hash_table_remove(crm_uuid_cache, uname); } const char * get_uuid(const char *uname) { return get_node_uuid(0, uname); } const char * get_uname(const char *uuid) { - char *uname = NULL; + const char *uname = NULL; if (crm_uname_cache == NULL) { crm_uname_cache = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); } CRM_CHECK(uuid != NULL, return NULL); /* avoid blocking calls where possible */ uname = g_hash_table_lookup(crm_uname_cache, uuid); if (uname != NULL) { return uname; } #if SUPPORT_COROSYNC if (is_openais_cluster()) { - g_hash_table_insert(crm_uname_cache, crm_strdup(uuid), crm_strdup(uuid)); + if (!uname_is_uuid() && is_corosync_cluster()) { + uint32_t id = crm_int_helper(uuid, NULL); + crm_node_t *node = g_hash_table_lookup(crm_peer_id_cache, GUINT_TO_POINTER(id)); + + uname = node ? node->uname : NULL; + } else { + uname = uuid; + } + + if (uname) { + g_hash_table_insert(crm_uname_cache, crm_strdup(uuid), crm_strdup(uname)); + } } #endif #if SUPPORT_HEARTBEAT if (is_heartbeat_cluster()) { if (heartbeat_cluster != NULL && uuid != NULL) { cl_uuid_t uuid_raw; char *uuid_copy = crm_strdup(uuid); cl_uuid_parse(uuid_copy, &uuid_raw); crm_malloc(uname, MAX_NAME); if (heartbeat_cluster->llc_ops->get_name_by_uuid(heartbeat_cluster, &uuid_raw, uname, MAX_NAME) == HA_FAIL) { crm_err("Could not calculate uname for %s", uuid); crm_free(uuid_copy); crm_free(uname); } else { g_hash_table_insert(crm_uname_cache, uuid_copy, uname); } } } #endif return g_hash_table_lookup(crm_uname_cache, uuid); } void set_uuid(xmlNode * node, const char *attr, const char *uname) { const char *uuid_calc = get_uuid(uname); crm_xml_add(node, attr, uuid_calc); return; } xmlNode * createPingAnswerFragment(const char *from, const char *status) { xmlNode *ping = NULL; ping = create_xml_node(NULL, XML_CRM_TAG_PING); crm_xml_add(ping, XML_PING_ATTR_STATUS, status); crm_xml_add(ping, XML_PING_ATTR_SYSFROM, from); return ping; } const char * name_for_cluster_type(enum cluster_type_e type) { switch (type) { case pcmk_cluster_classic_ais: return "classic openais (with plugin)"; case pcmk_cluster_cman: return "cman"; case pcmk_cluster_corosync: return "corosync"; case pcmk_cluster_heartbeat: return "heartbeat"; case pcmk_cluster_unknown: return "unknown"; case pcmk_cluster_invalid: return "invalid"; } crm_err("Invalid cluster type: %d", type); return "invalid"; } /* Do not expose these two */ int set_cluster_type(enum cluster_type_e type); static enum cluster_type_e cluster_type = pcmk_cluster_unknown; int set_cluster_type(enum cluster_type_e type) { if (cluster_type == pcmk_cluster_unknown) { crm_info("Cluster type set to: %s", name_for_cluster_type(type)); cluster_type = type; return 0; } else if (cluster_type == type) { return 0; } else if (pcmk_cluster_unknown == type) { cluster_type = type; return 0; } crm_err("Cluster type already set to %s, ignoring %s", name_for_cluster_type(cluster_type), name_for_cluster_type(type)); return -1; } enum cluster_type_e get_cluster_type(void) { if (cluster_type == pcmk_cluster_unknown) { const char *cluster = getenv("HA_cluster_type"); cluster_type = pcmk_cluster_invalid; if (cluster) { crm_info("Cluster type is: '%s'", cluster); } else { #if SUPPORT_COROSYNC cluster_type = find_corosync_variant(); if (cluster_type == pcmk_cluster_unknown) { cluster = "heartbeat"; crm_info("Assuming a 'heartbeat' based cluster"); } else { cluster = name_for_cluster_type(cluster_type); crm_info("Detected an active '%s' cluster", cluster); } #else cluster = "heartbeat"; #endif } if (safe_str_eq(cluster, "heartbeat")) { #if SUPPORT_HEARTBEAT cluster_type = pcmk_cluster_heartbeat; #else cluster_type = pcmk_cluster_invalid; #endif } else if (safe_str_eq(cluster, "openais") || safe_str_eq(cluster, "classic openais (with plugin)")) { #if SUPPORT_COROSYNC cluster_type = pcmk_cluster_classic_ais; #else cluster_type = pcmk_cluster_invalid; #endif } else if (safe_str_eq(cluster, "corosync")) { #if SUPPORT_COROSYNC cluster_type = pcmk_cluster_corosync; #else cluster_type = pcmk_cluster_invalid; #endif } else if (safe_str_eq(cluster, "cman")) { #if SUPPORT_CMAN cluster_type = pcmk_cluster_cman; #else cluster_type = pcmk_cluster_invalid; #endif } else { cluster_type = pcmk_cluster_invalid; } if (cluster_type == pcmk_cluster_invalid) { crm_crit ("This installation of Pacemaker does not support the '%s' cluster infrastructure. Terminating.", cluster); exit(100); } } return cluster_type; } gboolean is_cman_cluster(void) { return get_cluster_type() == pcmk_cluster_cman; } gboolean is_corosync_cluster(void) { return get_cluster_type() == pcmk_cluster_corosync; } gboolean is_classic_ais_cluster(void) { return get_cluster_type() == pcmk_cluster_classic_ais; } gboolean is_openais_cluster(void) { enum cluster_type_e type = get_cluster_type(); if (type == pcmk_cluster_classic_ais) { return TRUE; } else if (type == pcmk_cluster_corosync) { return TRUE; } else if (type == pcmk_cluster_cman) { return TRUE; } return FALSE; } gboolean is_heartbeat_cluster(void) { return get_cluster_type() == pcmk_cluster_heartbeat; } diff --git a/pengine/allocate.c b/pengine/allocate.c index 9ff0344d6d..be45cb31d8 100644 --- a/pengine/allocate.c +++ b/pengine/allocate.c @@ -1,2262 +1,2273 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include CRM_TRACE_INIT_DATA(pe_allocate); void set_alloc_actions(pe_working_set_t * data_set); void migrate_reload_madness(pe_working_set_t * data_set); resource_alloc_functions_t resource_class_alloc_functions[] = { { native_merge_weights, native_color, native_create_actions, native_create_probe, native_internal_constraints, native_rsc_colocation_lh, native_rsc_colocation_rh, native_rsc_location, native_action_flags, native_update_actions, native_expand, native_append_meta, }, { group_merge_weights, group_color, group_create_actions, native_create_probe, group_internal_constraints, group_rsc_colocation_lh, group_rsc_colocation_rh, group_rsc_location, group_action_flags, group_update_actions, group_expand, group_append_meta, }, { native_merge_weights, clone_color, clone_create_actions, clone_create_probe, clone_internal_constraints, clone_rsc_colocation_lh, clone_rsc_colocation_rh, clone_rsc_location, clone_action_flags, clone_update_actions, clone_expand, clone_append_meta, }, { native_merge_weights, master_color, master_create_actions, clone_create_probe, master_internal_constraints, clone_rsc_colocation_lh, master_rsc_colocation_rh, clone_rsc_location, clone_action_flags, clone_update_actions, clone_expand, master_append_meta, } }; static gboolean check_rsc_parameters(resource_t * rsc, node_t * node, xmlNode * rsc_entry, pe_working_set_t * data_set) { int attr_lpc = 0; gboolean force_restart = FALSE; gboolean delete_resource = FALSE; const char *value = NULL; const char *old_value = NULL; const char *attr_list[] = { XML_ATTR_TYPE, XML_AGENT_ATTR_CLASS, XML_AGENT_ATTR_PROVIDER }; for (; attr_lpc < DIMOF(attr_list); attr_lpc++) { value = crm_element_value(rsc->xml, attr_list[attr_lpc]); old_value = crm_element_value(rsc_entry, attr_list[attr_lpc]); if (value == old_value /* ie. NULL */ || crm_str_eq(value, old_value, TRUE)) { continue; } force_restart = TRUE; crm_notice("Forcing restart of %s on %s, %s changed: %s -> %s", rsc->id, node->details->uname, attr_list[attr_lpc], crm_str(old_value), crm_str(value)); } if (force_restart) { /* make sure the restart happens */ stop_action(rsc, node, FALSE); set_bit(rsc->flags, pe_rsc_start_pending); delete_resource = TRUE; } return delete_resource; } static void CancelXmlOp(resource_t * rsc, xmlNode * xml_op, node_t * active_node, const char *reason, pe_working_set_t * data_set) { int interval = 0; action_t *cancel = NULL; char *key = NULL; const char *task = NULL; const char *call_id = NULL; const char *interval_s = NULL; CRM_CHECK(xml_op != NULL, return); CRM_CHECK(active_node != NULL, return); task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); call_id = crm_element_value(xml_op, XML_LRM_ATTR_CALLID); interval_s = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL); interval = crm_parse_int(interval_s, "0"); /* we need to reconstruct the key because of the way we used to construct resource IDs */ key = generate_op_key(rsc->id, task, interval); crm_info("Action %s on %s will be stopped: %s", key, active_node->details->uname, reason ? reason : "unknown"); cancel = custom_action(rsc, crm_strdup(key), RSC_CANCEL, active_node, FALSE, TRUE, data_set); crm_free(cancel->task); cancel->task = crm_strdup(RSC_CANCEL); add_hash_param(cancel->meta, XML_LRM_ATTR_TASK, task); add_hash_param(cancel->meta, XML_LRM_ATTR_CALLID, call_id); add_hash_param(cancel->meta, XML_LRM_ATTR_INTERVAL, interval_s); custom_action_order(rsc, stop_key(rsc), NULL, rsc, NULL, cancel, pe_order_optional, data_set); crm_free(key); key = NULL; } static gboolean check_action_definition(resource_t * rsc, node_t * active_node, xmlNode * xml_op, pe_working_set_t * data_set) { char *key = NULL; int interval = 0; const char *interval_s = NULL; gboolean did_change = FALSE; xmlNode *params_all = NULL; xmlNode *params_restart = NULL; GHashTable *local_rsc_params = NULL; char *digest_all_calc = NULL; const char *digest_all = NULL; const char *restart_list = NULL; const char *digest_restart = NULL; char *digest_restart_calc = NULL; action_t *action = NULL; const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); const char *op_version = crm_element_value(xml_op, XML_ATTR_CRM_VERSION); CRM_CHECK(active_node != NULL, return FALSE); if (safe_str_eq(task, RSC_STOP)) { return FALSE; } interval_s = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL); interval = crm_parse_int(interval_s, "0"); /* we need to reconstruct the key because of the way we used to construct resource IDs */ key = generate_op_key(rsc->id, task, interval); if (interval > 0) { xmlNode *op_match = NULL; crm_trace("Checking parameters for %s", key); op_match = find_rsc_op_entry(rsc, key); if (op_match == NULL && is_set(data_set->flags, pe_flag_stop_action_orphans)) { CancelXmlOp(rsc, xml_op, active_node, "orphan", data_set); crm_free(key); key = NULL; return TRUE; } else if (op_match == NULL) { crm_debug("Orphan action detected: %s on %s", key, active_node->details->uname); crm_free(key); key = NULL; return TRUE; } } action = custom_action(rsc, key, task, active_node, TRUE, FALSE, data_set); local_rsc_params = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); get_rsc_attributes(local_rsc_params, rsc, active_node, data_set); params_all = create_xml_node(NULL, XML_TAG_PARAMS); g_hash_table_foreach(local_rsc_params, hash2field, params_all); g_hash_table_foreach(action->extra, hash2field, params_all); g_hash_table_foreach(rsc->parameters, hash2field, params_all); g_hash_table_foreach(action->meta, hash2metafield, params_all); filter_action_parameters(params_all, op_version); digest_all_calc = calculate_operation_digest(params_all, op_version); digest_all = crm_element_value(xml_op, XML_LRM_ATTR_OP_DIGEST); digest_restart = crm_element_value(xml_op, XML_LRM_ATTR_RESTART_DIGEST); restart_list = crm_element_value(xml_op, XML_LRM_ATTR_OP_RESTART); if (interval == 0 && safe_str_eq(task, RSC_STATUS)) { /* Reload based on the start action not a probe */ task = RSC_START; } else if (interval == 0 && safe_str_eq(task, RSC_MIGRATED)) { /* Reload based on the start action not a migrate */ task = RSC_START; } if (digest_restart) { /* Changes that force a restart */ params_restart = copy_xml(params_all); if (restart_list) { filter_reload_parameters(params_restart, restart_list); } digest_restart_calc = calculate_operation_digest(params_restart, op_version); if (safe_str_neq(digest_restart_calc, digest_restart)) { did_change = TRUE; crm_log_xml_info(params_restart, "params:restart"); crm_info("Parameters to %s on %s changed: was %s vs. now %s (restart:%s) %s", key, active_node->details->uname, crm_str(digest_restart), digest_restart_calc, op_version, crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC)); key = generate_op_key(rsc->id, task, interval); custom_action(rsc, key, task, NULL, FALSE, TRUE, data_set); goto cleanup; } } if (safe_str_neq(digest_all_calc, digest_all)) { /* Changes that can potentially be handled by a reload */ did_change = TRUE; crm_log_xml_info(params_all, "params:reload"); crm_info("Parameters to %s on %s changed: was %s vs. now %s (reload:%s) %s", key, active_node->details->uname, crm_str(digest_all), digest_all_calc, op_version, crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC)); if (interval > 0) { action_t *op = NULL; #if 0 /* Always reload/restart the entire resource */ op = custom_action(rsc, start_key(rsc), RSC_START, NULL, FALSE, TRUE, data_set); update_action_flags(op, pe_action_allow_reload_conversion); #else /* Re-sending the recurring op is sufficient - the old one will be cancelled automatically */ key = generate_op_key(rsc->id, task, interval); op = custom_action(rsc, key, task, NULL, FALSE, TRUE, data_set); custom_action_order(rsc, start_key(rsc), NULL, NULL, NULL, op, pe_order_runnable_left, data_set); #endif } else if (digest_restart) { crm_trace("Reloading '%s' action for resource %s", task, rsc->id); /* Allow this resource to reload - unless something else causes a full restart */ set_bit(rsc->flags, pe_rsc_try_reload); /* Create these for now, it keeps the action IDs the same in the regression outputs */ key = generate_op_key(rsc->id, task, interval); custom_action(rsc, key, task, NULL, TRUE, TRUE, data_set); } else { crm_trace("Resource %s doesn't know how to reload", rsc->id); /* Re-send the start/demote/promote op * Recurring ops will be detected independantly */ key = generate_op_key(rsc->id, task, interval); custom_action(rsc, key, task, NULL, FALSE, TRUE, data_set); } } cleanup: free_xml(params_all); free_xml(params_restart); crm_free(digest_all_calc); crm_free(digest_restart_calc); g_hash_table_destroy(local_rsc_params); pe_free_action(action); return did_change; } extern gboolean DeleteRsc(resource_t * rsc, node_t * node, gboolean optional, pe_working_set_t * data_set); static void check_actions_for(xmlNode * rsc_entry, resource_t * rsc, node_t * node, pe_working_set_t * data_set) { GListPtr gIter = NULL; int offset = -1; int interval = 0; int stop_index = 0; int start_index = 0; const char *task = NULL; const char *interval_s = NULL; xmlNode *rsc_op = NULL; GListPtr op_list = NULL; GListPtr sorted_op_list = NULL; gboolean is_probe = FALSE; + gboolean did_change = FALSE; CRM_CHECK(node != NULL, return); if (is_set(rsc->flags, pe_rsc_orphan)) { crm_trace("Skipping param check for %s: orphan", rsc->id); return; } else if (pe_find_node_id(rsc->running_on, node->details->id) == NULL) { crm_trace("Skipping param check for %s: no longer active on %s", rsc->id, node->details->uname); return; } crm_trace("Processing %s on %s", rsc->id, node->details->uname); if (check_rsc_parameters(rsc, node, rsc_entry, data_set)) { DeleteRsc(rsc, node, FALSE, data_set); } for (rsc_op = __xml_first_child(rsc_entry); rsc_op != NULL; rsc_op = __xml_next(rsc_op)) { if (crm_str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, TRUE)) { op_list = g_list_prepend(op_list, rsc_op); } } sorted_op_list = g_list_sort(op_list, sort_op_by_callid); calculate_active_ops(sorted_op_list, &start_index, &stop_index); for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; offset++; if (start_index < stop_index) { /* stopped */ continue; } else if (offset < start_index) { /* action occurred prior to a start */ continue; } is_probe = FALSE; + did_change = FALSE; task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); interval_s = crm_element_value(rsc_op, XML_LRM_ATTR_INTERVAL); interval = crm_parse_int(interval_s, "0"); if (interval == 0 && safe_str_eq(task, RSC_STATUS)) { is_probe = TRUE; } if (interval > 0 && is_set(data_set->flags, pe_flag_maintenance_mode)) { CancelXmlOp(rsc, rsc_op, node, "maintenance mode", data_set); } else if (is_probe || safe_str_eq(task, RSC_START) || interval > 0 || safe_str_eq(task, RSC_MIGRATED)) { - check_action_definition(rsc, node, rsc_op, data_set); + did_change = check_action_definition(rsc, node, rsc_op, data_set); + } + + if (did_change && get_failcount(node, rsc, NULL, data_set)) { + char *key = NULL; + action_t *action_clear = NULL; + + key = generate_op_key(rsc->id, CRM_OP_CLEAR_FAILCOUNT, 0); + action_clear = custom_action(rsc, key, CRM_OP_CLEAR_FAILCOUNT, node, FALSE, TRUE, data_set); + set_bit_inplace(action_clear->flags, pe_action_runnable); } } g_list_free(sorted_op_list); } static GListPtr find_rsc_list(GListPtr result, resource_t * rsc, const char *id, gboolean renamed_clones, gboolean partial, pe_working_set_t * data_set) { GListPtr gIter = NULL; gboolean match = FALSE; if (id == NULL) { return NULL; } else if (rsc == NULL && data_set) { for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { resource_t *child = (resource_t *) gIter->data; result = find_rsc_list(result, child, id, renamed_clones, partial, NULL); } return result; } else if (rsc == NULL) { return NULL; } if (partial) { if (strstr(rsc->id, id)) { match = TRUE; } else if (rsc->long_name && strstr(rsc->long_name, id)) { match = TRUE; } else if (renamed_clones && rsc->clone_name && strstr(rsc->clone_name, id)) { match = TRUE; } } else { if (strcmp(rsc->id, id) == 0) { match = TRUE; } else if (rsc->long_name && strcmp(rsc->long_name, id) == 0) { match = TRUE; } else if (renamed_clones && rsc->clone_name && strcmp(rsc->clone_name, id) == 0) { match = TRUE; } } if (match) { result = g_list_prepend(result, rsc); } if (rsc->children) { gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { resource_t *child = (resource_t *) gIter->data; result = find_rsc_list(result, child, id, renamed_clones, partial, NULL); } } return result; } static void check_actions(pe_working_set_t * data_set) { const char *id = NULL; node_t *node = NULL; xmlNode *lrm_rscs = NULL; xmlNode *status = get_object_root(XML_CIB_TAG_STATUS, data_set->input); xmlNode *node_state = NULL; for (node_state = __xml_first_child(status); node_state != NULL; node_state = __xml_next(node_state)) { if (crm_str_eq((const char *)node_state->name, XML_CIB_TAG_STATE, TRUE)) { id = crm_element_value(node_state, XML_ATTR_ID); lrm_rscs = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE); lrm_rscs = find_xml_node(lrm_rscs, XML_LRM_TAG_RESOURCES, FALSE); node = pe_find_node_id(data_set->nodes, id); if (node == NULL) { continue; } else if (can_run_resources(node) == FALSE) { crm_trace("Skipping param check for %s: cant run resources", node->details->uname); continue; } crm_trace("Processing node %s", node->details->uname); if (node->details->online || is_set(data_set->flags, pe_flag_stonith_enabled)) { xmlNode *rsc_entry = NULL; for (rsc_entry = __xml_first_child(lrm_rscs); rsc_entry != NULL; rsc_entry = __xml_next(rsc_entry)) { if (crm_str_eq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE, TRUE)) { if (xml_has_children(rsc_entry)) { GListPtr gIter = NULL; GListPtr result = NULL; const char *rsc_id = ID(rsc_entry); CRM_CHECK(rsc_id != NULL, return); result = find_rsc_list(NULL, NULL, rsc_id, TRUE, FALSE, data_set); for (gIter = result; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; check_actions_for(rsc_entry, rsc, node, data_set); } g_list_free(result); } } } } } } } static gboolean apply_placement_constraints(pe_working_set_t * data_set) { GListPtr gIter = NULL; crm_trace("Applying constraints..."); for (gIter = data_set->placement_constraints; gIter != NULL; gIter = gIter->next) { rsc_to_node_t *cons = (rsc_to_node_t *) gIter->data; cons->rsc_lh->cmds->rsc_location(cons->rsc_lh, cons); } return TRUE; } static void common_apply_stickiness(resource_t * rsc, node_t * node, pe_working_set_t * data_set) { int fail_count = 0; resource_t *failed = rsc; if (rsc->children) { GListPtr gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; common_apply_stickiness(child_rsc, node, data_set); } return; } if (is_set(rsc->flags, pe_rsc_managed) && rsc->stickiness != 0 && g_list_length(rsc->running_on) == 1) { node_t *current = pe_find_node_id(rsc->running_on, node->details->id); node_t *match = pe_hash_table_lookup(rsc->allowed_nodes, node->details->id); if (current == NULL) { } else if (match != NULL || is_set(data_set->flags, pe_flag_symmetric_cluster)) { resource_t *sticky_rsc = rsc; resource_location(sticky_rsc, node, rsc->stickiness, "stickiness", data_set); crm_debug("Resource %s: preferring current location" " (node=%s, weight=%d)", sticky_rsc->id, node->details->uname, rsc->stickiness); } else { GHashTableIter iter; node_t *nIter = NULL; crm_debug("Ignoring stickiness for %s: the cluster is asymmetric" " and node %s is not explicitly allowed", rsc->id, node->details->uname); g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&nIter)) { crm_err("%s[%s] = %d", rsc->id, nIter->details->uname, nIter->weight); } } } if (is_not_set(rsc->flags, pe_rsc_unique)) { failed = uber_parent(rsc); } fail_count = get_failcount(node, rsc, NULL, data_set); if (fail_count > 0 && rsc->migration_threshold != 0) { if (rsc->migration_threshold <= fail_count) { resource_location(failed, node, -INFINITY, "__fail_limit__", data_set); crm_warn("Forcing %s away from %s after %d failures (max=%d)", failed->id, node->details->uname, fail_count, rsc->migration_threshold); } else { crm_notice("%s can fail %d more times on %s before being forced off", failed->id, rsc->migration_threshold - fail_count, node->details->uname); } } } static void complex_set_cmds(resource_t * rsc) { GListPtr gIter = rsc->children; rsc->cmds = &resource_class_alloc_functions[rsc->variant]; for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; complex_set_cmds(child_rsc); } } void set_alloc_actions(pe_working_set_t * data_set) { GListPtr gIter = data_set->resources; for (; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; complex_set_cmds(rsc); } } static void calculate_system_health(gpointer gKey, gpointer gValue, gpointer user_data) { const char *key = (const char *)gKey; const char *value = (const char *)gValue; int *system_health = (int *)user_data; if (!gKey || !gValue || !user_data) { return; } /* Does it start with #health? */ if (0 == strncmp(key, "#health", 7)) { int score; /* Convert the value into an integer */ score = char2score(value); /* Add it to the running total */ *system_health = merge_weights(score, *system_health); } } static gboolean apply_system_health(pe_working_set_t * data_set) { GListPtr gIter = NULL; const char *health_strategy = pe_pref(data_set->config_hash, "node-health-strategy"); if (health_strategy == NULL || safe_str_eq(health_strategy, "none")) { /* Prevent any accidental health -> score translation */ node_score_red = 0; node_score_yellow = 0; node_score_green = 0; return TRUE; } else if (safe_str_eq(health_strategy, "migrate-on-red")) { /* Resources on nodes which have health values of red are * weighted away from that node. */ node_score_red = -INFINITY; node_score_yellow = 0; node_score_green = 0; } else if (safe_str_eq(health_strategy, "only-green")) { /* Resources on nodes which have health values of red or yellow * are forced away from that node. */ node_score_red = -INFINITY; node_score_yellow = -INFINITY; node_score_green = 0; } else if (safe_str_eq(health_strategy, "progressive")) { /* Same as the above, but use the r/y/g scores provided by the user * Defaults are provided by the pe_prefs table */ } else if (safe_str_eq(health_strategy, "custom")) { /* Requires the admin to configure the rsc_location constaints for * processing the stored health scores */ /* TODO: Check for the existance of appropriate node health constraints */ return TRUE; } else { crm_err("Unknown node health strategy: %s", health_strategy); return FALSE; } crm_info("Applying automated node health strategy: %s", health_strategy); for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { int system_health = 0; node_t *node = (node_t *) gIter->data; /* Search through the node hash table for system health entries. */ g_hash_table_foreach(node->details->attrs, calculate_system_health, &system_health); crm_info(" Node %s has an combined system health of %d", node->details->uname, system_health); /* If the health is non-zero, then create a new rsc2node so that the * weight will be added later on. */ if (system_health != 0) { GListPtr gIter2 = data_set->resources; for (; gIter2 != NULL; gIter2 = gIter2->next) { resource_t *rsc = (resource_t *) gIter2->data; rsc2node_new(health_strategy, rsc, system_health, node, data_set); } } } return TRUE; } gboolean stage0(pe_working_set_t * data_set) { xmlNode *cib_constraints = get_object_root(XML_CIB_TAG_CONSTRAINTS, data_set->input); if (data_set->input == NULL) { return FALSE; } if (is_set(data_set->flags, pe_flag_have_status) == FALSE) { crm_trace("Calculating status"); cluster_status(data_set); } set_alloc_actions(data_set); apply_system_health(data_set); unpack_constraints(cib_constraints, data_set); return TRUE; } static void wait_for_probe(resource_t * rsc, const char *action, action_t * probe_complete, pe_working_set_t * data_set) { if (probe_complete == NULL) { return; } if (rsc->children) { GListPtr gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { resource_t *child = (resource_t *) gIter->data; wait_for_probe(child, action, probe_complete, data_set); } } else { char *key = generate_op_key(rsc->id, action, 0); custom_action_order(NULL, NULL, probe_complete, rsc, key, NULL, pe_order_optional, data_set); } } /* * Check nodes for resources started outside of the LRM */ gboolean probe_resources(pe_working_set_t * data_set) { action_t *probe_complete = NULL; action_t *probe_node_complete = NULL; GListPtr gIter = NULL; GListPtr gIter2 = NULL; gIter = data_set->nodes; for (; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; const char *probed = g_hash_table_lookup(node->details->attrs, CRM_OP_PROBED); if (node->details->online == FALSE) { continue; } else if (node->details->unclean) { continue; } else if (probe_complete == NULL) { probe_complete = get_pseudo_op(CRM_OP_PROBED, data_set); } if (probed != NULL && crm_is_true(probed) == FALSE) { action_t *probe_op = custom_action(NULL, crm_strdup(CRM_OP_REPROBE), CRM_OP_REPROBE, node, FALSE, TRUE, data_set); add_hash_param(probe_op->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE); continue; } probe_node_complete = custom_action(NULL, crm_strdup(CRM_OP_PROBED), CRM_OP_PROBED, node, FALSE, TRUE, data_set); if (crm_is_true(probed)) { crm_trace("unset"); update_action_flags(probe_node_complete, pe_action_optional); } else { crm_trace("set"); update_action_flags(probe_node_complete, pe_action_optional | pe_action_clear); } crm_trace("%s - %d", node->details->uname, probe_node_complete->flags & pe_action_optional); probe_node_complete->priority = INFINITY; add_hash_param(probe_node_complete->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE); if (node->details->pending) { update_action_flags(probe_node_complete, pe_action_runnable | pe_action_clear); crm_info("Action %s on %s is unrunnable (pending)", probe_node_complete->uuid, probe_node_complete->node->details->uname); } order_actions(probe_node_complete, probe_complete, pe_order_runnable_left /*|pe_order_implies_then */ ); gIter2 = data_set->resources; for (; gIter2 != NULL; gIter2 = gIter2->next) { resource_t *rsc = (resource_t *) gIter2->data; if (rsc->cmds->create_probe(rsc, node, probe_node_complete, FALSE, data_set)) { update_action_flags(probe_complete, pe_action_optional | pe_action_clear); update_action_flags(probe_node_complete, pe_action_optional | pe_action_clear); wait_for_probe(rsc, RSC_START, probe_complete, data_set); } } } gIter = data_set->resources; for (; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; wait_for_probe(rsc, RSC_STOP, probe_complete, data_set); } return TRUE; } /* * Count how many valid nodes we have (so we know the maximum number of * colors we can resolve). * * Apply node constraints (ie. filter the "allowed_nodes" part of resources */ gboolean stage2(pe_working_set_t * data_set) { GListPtr gIter = NULL; crm_trace("Applying placement constraints"); gIter = data_set->nodes; for (; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; if (node == NULL) { /* error */ } else if (node->weight >= 0.0 /* global weight */ && node->details->online && node->details->type == node_member) { data_set->max_valid_nodes++; } } apply_placement_constraints(data_set); gIter = data_set->nodes; for (; gIter != NULL; gIter = gIter->next) { GListPtr gIter2 = NULL; node_t *node = (node_t *) gIter->data; gIter2 = data_set->resources; for (; gIter2 != NULL; gIter2 = gIter2->next) { resource_t *rsc = (resource_t *) gIter2->data; common_apply_stickiness(rsc, node, data_set); } } return TRUE; } /* * Create internal resource constraints before allocation */ gboolean stage3(pe_working_set_t * data_set) { GListPtr gIter = data_set->resources; for (; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; rsc->cmds->internal_constraints(rsc, data_set); } return TRUE; } /* * Check for orphaned or redefined actions */ gboolean stage4(pe_working_set_t * data_set) { check_actions(data_set); return TRUE; } static gint sort_rsc_process_order(gconstpointer a, gconstpointer b, gpointer data) { int rc = 0; int r1_weight = -INFINITY; int r2_weight = -INFINITY; const char *reason = "existance"; const GListPtr nodes = (GListPtr) data; resource_t *resource1 = (resource_t *) convert_const_pointer(a); resource_t *resource2 = (resource_t *) convert_const_pointer(b); node_t *node = NULL; GListPtr gIter = NULL; GHashTable *r1_nodes = NULL; GHashTable *r2_nodes = NULL; if (a == NULL && b == NULL) { goto done; } if (a == NULL) { return 1; } if (b == NULL) { return -1; } reason = "priority"; r1_weight = resource1->priority; r2_weight = resource2->priority; if (r1_weight > r2_weight) { rc = -1; goto done; } if (r1_weight < r2_weight) { rc = 1; goto done; } reason = "no node list"; if (nodes == NULL) { goto done; } r1_nodes = rsc_merge_weights(resource1, resource1->id, NULL, NULL, 1, pe_weights_forward | pe_weights_init); dump_node_scores(LOG_TRACE, NULL, resource1->id, r1_nodes); r2_nodes = rsc_merge_weights(resource2, resource2->id, NULL, NULL, 1, pe_weights_forward | pe_weights_init); dump_node_scores(LOG_TRACE, NULL, resource2->id, r2_nodes); /* Current location score */ reason = "current location"; r1_weight = -INFINITY; r2_weight = -INFINITY; if (resource1->running_on) { node = g_list_nth_data(resource1->running_on, 0); node = g_hash_table_lookup(r1_nodes, node->details->id); r1_weight = node->weight; } if (resource2->running_on) { node = g_list_nth_data(resource2->running_on, 0); node = g_hash_table_lookup(r2_nodes, node->details->id); r2_weight = node->weight; } if (r1_weight > r2_weight) { rc = -1; goto done; } if (r1_weight < r2_weight) { rc = 1; goto done; } reason = "score"; for (gIter = nodes; gIter != NULL; gIter = gIter->next) { node_t *r1_node = NULL; node_t *r2_node = NULL; node = (node_t *) gIter->data; r1_weight = -INFINITY; if (r1_nodes) { r1_node = g_hash_table_lookup(r1_nodes, node->details->id); } if (r1_node) { r1_weight = r1_node->weight; } r2_weight = -INFINITY; if (r2_nodes) { r2_node = g_hash_table_lookup(r2_nodes, node->details->id); } if (r2_node) { r2_weight = r2_node->weight; } if (r1_weight > r2_weight) { rc = -1; goto done; } if (r1_weight < r2_weight) { rc = 1; goto done; } } done: if (r1_nodes) { g_hash_table_destroy(r1_nodes); } if (r2_nodes) { g_hash_table_destroy(r2_nodes); } crm_trace( "%s (%d) %c %s (%d) on %s: %s", resource1->id, r1_weight, rc < 0 ? '>' : rc > 0 ? '<' : '=', resource2->id, r2_weight, node ? node->details->id : "n/a", reason); return rc; } gboolean stage5(pe_working_set_t * data_set) { GListPtr gIter = NULL; if (safe_str_neq(data_set->placement_strategy, "default")) { GListPtr nodes = g_list_copy(data_set->nodes); nodes = g_list_sort_with_data(nodes, sort_node_weight, NULL); data_set->resources = g_list_sort_with_data(data_set->resources, sort_rsc_process_order, nodes); g_list_free(nodes); } gIter = data_set->nodes; for (; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; dump_node_capacity(show_utilization ? 0 : utilization_log_level, "Original", node); } crm_trace("Allocating services"); /* Take (next) highest resource, assign it and create its actions */ gIter = data_set->resources; for (; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; crm_trace("Allocating: %s", rsc->id); rsc->cmds->allocate(rsc, NULL, data_set); } gIter = data_set->nodes; for (; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; dump_node_capacity(show_utilization ? 0 : utilization_log_level, "Remaining", node); } if (is_set(data_set->flags, pe_flag_startup_probes)) { crm_trace("Calculating needed probes"); /* This code probably needs optimization * ptest -x with 100 nodes, 100 clones and clone-max=100: With probes: ptest[14781]: 2010/09/27_17:56:46 notice: TRACE: do_calculations: pengine.c:258 Calculate cluster status ptest[14781]: 2010/09/27_17:56:46 notice: TRACE: do_calculations: pengine.c:278 Applying placement constraints ptest[14781]: 2010/09/27_17:56:47 notice: TRACE: do_calculations: pengine.c:285 Create internal constraints ptest[14781]: 2010/09/27_17:56:47 notice: TRACE: do_calculations: pengine.c:292 Check actions ptest[14781]: 2010/09/27_17:56:48 notice: TRACE: do_calculations: pengine.c:299 Allocate resources ptest[14781]: 2010/09/27_17:56:48 notice: TRACE: stage5: allocate.c:881 Allocating services ptest[14781]: 2010/09/27_17:56:49 notice: TRACE: stage5: allocate.c:894 Calculating needed probes ptest[14781]: 2010/09/27_17:56:51 notice: TRACE: stage5: allocate.c:899 Creating actions ptest[14781]: 2010/09/27_17:56:52 notice: TRACE: stage5: allocate.c:905 Creating done ptest[14781]: 2010/09/27_17:56:52 notice: TRACE: do_calculations: pengine.c:306 Processing fencing and shutdown cases ptest[14781]: 2010/09/27_17:56:52 notice: TRACE: do_calculations: pengine.c:313 Applying ordering constraints 36s ptest[14781]: 2010/09/27_17:57:28 notice: TRACE: do_calculations: pengine.c:320 Create transition graph Without probes: ptest[14637]: 2010/09/27_17:56:21 notice: TRACE: do_calculations: pengine.c:258 Calculate cluster status ptest[14637]: 2010/09/27_17:56:22 notice: TRACE: do_calculations: pengine.c:278 Applying placement constraints ptest[14637]: 2010/09/27_17:56:22 notice: TRACE: do_calculations: pengine.c:285 Create internal constraints ptest[14637]: 2010/09/27_17:56:22 notice: TRACE: do_calculations: pengine.c:292 Check actions ptest[14637]: 2010/09/27_17:56:23 notice: TRACE: do_calculations: pengine.c:299 Allocate resources ptest[14637]: 2010/09/27_17:56:23 notice: TRACE: stage5: allocate.c:881 Allocating services ptest[14637]: 2010/09/27_17:56:24 notice: TRACE: stage5: allocate.c:899 Creating actions ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: stage5: allocate.c:905 Creating done ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: do_calculations: pengine.c:306 Processing fencing and shutdown cases ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: do_calculations: pengine.c:313 Applying ordering constraints ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: do_calculations: pengine.c:320 Create transition graph */ probe_resources(data_set); } crm_trace("Creating actions"); gIter = data_set->resources; for (; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; rsc->cmds->create_actions(rsc, data_set); } crm_trace("Creating done"); return TRUE; } static gboolean is_managed(const resource_t * rsc) { GListPtr gIter = rsc->children; if (is_set(rsc->flags, pe_rsc_managed)) { return TRUE; } for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; if (is_managed(child_rsc)) { return TRUE; } } return FALSE; } static gboolean any_managed_resouces(pe_working_set_t * data_set) { GListPtr gIter = data_set->resources; for (; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; if (is_managed(rsc)) { return TRUE; } } return FALSE; } /* * Create dependancies for stonith and shutdown operations */ gboolean stage6(pe_working_set_t * data_set) { action_t *dc_down = NULL; action_t *dc_fence = NULL; action_t *stonith_op = NULL; action_t *last_stonith = NULL; gboolean integrity_lost = FALSE; action_t *ready = get_pseudo_op(STONITH_UP, data_set); action_t *all_stopped = get_pseudo_op(ALL_STOPPED, data_set); action_t *done = get_pseudo_op(STONITH_DONE, data_set); gboolean need_stonith = FALSE; GListPtr gIter = data_set->nodes; crm_trace("Processing fencing and shutdown cases"); if (is_set(data_set->flags, pe_flag_stonith_enabled) && (is_set(data_set->flags, pe_flag_have_quorum) || data_set->no_quorum_policy == no_quorum_ignore || data_set->no_quorum_policy == no_quorum_suicide)) { need_stonith = TRUE; } if (need_stonith && any_managed_resouces(data_set) == FALSE) { crm_notice("Delaying fencing operations until there are resources to manage"); need_stonith = FALSE; } for (; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; stonith_op = NULL; if (node->details->unclean && need_stonith) { pe_warn("Scheduling Node %s for STONITH", node->details->uname); stonith_op = custom_action(NULL, crm_strdup(CRM_OP_FENCE), CRM_OP_FENCE, node, FALSE, TRUE, data_set); add_hash_param(stonith_op->meta, XML_LRM_ATTR_TARGET, node->details->uname); add_hash_param(stonith_op->meta, XML_LRM_ATTR_TARGET_UUID, node->details->id); add_hash_param(stonith_op->meta, "stonith_action", data_set->stonith_action); stonith_constraints(node, stonith_op, data_set); order_actions(ready, stonith_op, pe_order_runnable_left); order_actions(stonith_op, all_stopped, pe_order_implies_then); clear_bit_inplace(ready->flags, pe_action_optional); if (node->details->is_dc) { dc_down = stonith_op; dc_fence = stonith_op; } else { if (last_stonith) { order_actions(last_stonith, stonith_op, pe_order_optional); } last_stonith = stonith_op; } } else if (node->details->online && node->details->shutdown) { action_t *down_op = NULL; crm_notice("Scheduling Node %s for shutdown", node->details->uname); down_op = custom_action(NULL, crm_strdup(CRM_OP_SHUTDOWN), CRM_OP_SHUTDOWN, node, FALSE, TRUE, data_set); shutdown_constraints(node, down_op, data_set); add_hash_param(down_op->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE); if (node->details->is_dc) { dc_down = down_op; } } if (node->details->unclean && stonith_op == NULL) { integrity_lost = TRUE; pe_warn("Node %s is unclean!", node->details->uname); } } if (integrity_lost) { if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) { pe_warn("YOUR RESOURCES ARE NOW LIKELY COMPROMISED"); pe_err("ENABLE STONITH TO KEEP YOUR RESOURCES SAFE"); } else if (is_set(data_set->flags, pe_flag_have_quorum) == FALSE) { crm_notice("Cannot fence unclean nodes until quorum is" " attained (or no-quorum-policy is set to ignore)"); } } if (dc_down != NULL) { GListPtr shutdown_matches = find_actions(data_set->actions, CRM_OP_SHUTDOWN, NULL); crm_trace("Ordering shutdowns before %s on %s (DC)", dc_down->task, dc_down->node->details->uname); add_hash_param(dc_down->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE); gIter = shutdown_matches; for (; gIter != NULL; gIter = gIter->next) { action_t *node_stop = (action_t *) gIter->data; if (node_stop->node->details->is_dc) { continue; } crm_debug("Ordering shutdown on %s before %s on %s", node_stop->node->details->uname, dc_down->task, dc_down->node->details->uname); order_actions(node_stop, dc_down, pe_order_optional); } if (last_stonith && dc_down != last_stonith) { order_actions(last_stonith, dc_down, pe_order_optional); } g_list_free(shutdown_matches); } if (last_stonith) { order_actions(last_stonith, done, pe_order_implies_then); } else if (dc_fence) { order_actions(dc_down, done, pe_order_implies_then); } order_actions(ready, done, pe_order_optional); return TRUE; } /* * Determin the sets of independant actions and the correct order for the * actions in each set. * * Mark dependencies of un-runnable actions un-runnable * */ static GListPtr find_actions_by_task(GListPtr actions, resource_t * rsc, const char *original_key) { GListPtr list = NULL; list = find_actions(actions, original_key, NULL); if (list == NULL) { /* we're potentially searching a child of the original resource */ char *key = NULL; char *tmp = NULL; char *task = NULL; int interval = 0; if (parse_op_key(original_key, &tmp, &task, &interval)) { key = generate_op_key(rsc->id, task, interval); /* crm_err("looking up %s instead of %s", key, original_key); */ /* slist_iter(action, action_t, actions, lpc, */ /* crm_err(" - %s", action->uuid)); */ list = find_actions(actions, key, NULL); } else { crm_err("search key: %s", original_key); } crm_free(key); crm_free(tmp); crm_free(task); } return list; } static void rsc_order_then(action_t * lh_action, resource_t * rsc, order_constraint_t * order) { GListPtr gIter = NULL; GListPtr rh_actions = NULL; action_t *rh_action = NULL; enum pe_ordering type = order->type; CRM_CHECK(rsc != NULL, return); CRM_CHECK(order != NULL, return); rh_action = order->rh_action; crm_trace("Processing RH of ordering constraint %d", order->id); if (rh_action != NULL) { rh_actions = g_list_prepend(NULL, rh_action); } else if (rsc != NULL) { rh_actions = find_actions_by_task(rsc->actions, rsc, order->rh_action_task); } if (rh_actions == NULL) { crm_trace("No RH-Side (%s/%s) found for constraint..." " ignoring", rsc->id, order->rh_action_task); if (lh_action) { crm_trace("LH-Side was: %s", lh_action->uuid); } return; } if (lh_action && lh_action->rsc == rsc && is_set(lh_action->flags, pe_action_dangle)) { crm_trace("Detected dangling operation %s -> %s", lh_action->uuid, order->rh_action_task); clear_bit_inplace(type, pe_order_implies_then); } gIter = rh_actions; for (; gIter != NULL; gIter = gIter->next) { action_t *rh_action_iter = (action_t *) gIter->data; if (lh_action) { order_actions(lh_action, rh_action_iter, type); } else if (type & pe_order_implies_then) { update_action_flags(rh_action_iter, pe_action_runnable | pe_action_clear); crm_warn("Unrunnable %s 0x%.6x", rh_action_iter->uuid, type); } else { crm_warn("neither %s 0x%.6x", rh_action_iter->uuid, type); } } g_list_free(rh_actions); } static void rsc_order_first(resource_t * lh_rsc, order_constraint_t * order, pe_working_set_t * data_set) { GListPtr gIter = NULL; GListPtr lh_actions = NULL; action_t *lh_action = order->lh_action; resource_t *rh_rsc = order->rh_rsc; crm_trace("Processing LH of ordering constraint %d", order->id); CRM_ASSERT(lh_rsc != NULL); if (lh_action != NULL) { lh_actions = g_list_prepend(NULL, lh_action); } else if (lh_action == NULL) { lh_actions = find_actions_by_task(lh_rsc->actions, lh_rsc, order->lh_action_task); } if (lh_actions == NULL && lh_rsc != rh_rsc) { char *key = NULL; char *rsc_id = NULL; char *op_type = NULL; int interval = 0; parse_op_key(order->lh_action_task, &rsc_id, &op_type, &interval); key = generate_op_key(lh_rsc->id, op_type, interval); if (lh_rsc->fns->state(lh_rsc, TRUE) != RSC_ROLE_STOPPED || safe_str_neq(op_type, RSC_STOP)) { crm_trace("No LH-Side (%s/%s) found for constraint %d with %s - creating", lh_rsc->id, order->lh_action_task, order->id, order->rh_action_task); lh_action = custom_action(lh_rsc, key, op_type, NULL, TRUE, TRUE, data_set); lh_actions = g_list_prepend(NULL, lh_action); } else { crm_free(key); crm_trace("No LH-Side (%s/%s) found for constraint %d with %s - ignoring", lh_rsc->id, order->lh_action_task, order->id, order->rh_action_task); } crm_free(op_type); crm_free(rsc_id); } gIter = lh_actions; for (; gIter != NULL; gIter = gIter->next) { action_t *lh_action_iter = (action_t *) gIter->data; if (rh_rsc == NULL && order->rh_action) { rh_rsc = order->rh_action->rsc; } if (rh_rsc) { rsc_order_then(lh_action_iter, rh_rsc, order); } else if (order->rh_action) { order_actions(lh_action_iter, order->rh_action, order->type); } } g_list_free(lh_actions); } extern gboolean update_action(action_t * action); gboolean stage7(pe_working_set_t * data_set) { GListPtr gIter = NULL; crm_trace("Applying ordering constraints"); /* Don't ask me why, but apparently they need to be processed in * the order they were created in... go figure * * Also g_list_prepend() has horrendous performance characteristics * So we need to use g_list_prepend() and then reverse the list here */ data_set->ordering_constraints = g_list_reverse(data_set->ordering_constraints); gIter = data_set->ordering_constraints; for (; gIter != NULL; gIter = gIter->next) { order_constraint_t *order = (order_constraint_t *) gIter->data; resource_t *rsc = order->lh_rsc; crm_trace("Applying ordering constraint: %d", order->id); if (rsc != NULL) { crm_trace("rsc_action-to-*"); rsc_order_first(rsc, order, data_set); continue; } rsc = order->rh_rsc; if (rsc != NULL) { crm_trace("action-to-rsc_action"); rsc_order_then(order->lh_action, rsc, order); } else { crm_trace("action-to-action"); order_actions(order->lh_action, order->rh_action, order->type); } } crm_trace("Updating %d actions", g_list_length(data_set->actions)); gIter = data_set->actions; for (; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; update_action(action); } crm_trace("Processing migrations"); gIter = data_set->resources; for (; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; rsc_migrate_reload(rsc, data_set); LogActions(rsc, data_set, FALSE); } return TRUE; } static gint sort_notify_entries(gconstpointer a, gconstpointer b) { int tmp; const notify_entry_t *entry_a = a; const notify_entry_t *entry_b = b; if (entry_a == NULL && entry_b == NULL) { return 0; } if (entry_a == NULL) { return 1; } if (entry_b == NULL) { return -1; } if (entry_a->rsc == NULL && entry_b->rsc == NULL) { return 0; } if (entry_a->rsc == NULL) { return 1; } if (entry_b->rsc == NULL) { return -1; } tmp = strcmp(entry_a->rsc->id, entry_b->rsc->id); if (tmp != 0) { return tmp; } if (entry_a->node == NULL && entry_b->node == NULL) { return 0; } if (entry_a->node == NULL) { return 1; } if (entry_b->node == NULL) { return -1; } return strcmp(entry_a->node->details->id, entry_b->node->details->id); } static void expand_list(GListPtr list, char **rsc_list, char **node_list) { GListPtr gIter = list; const char *uname = NULL; const char *rsc_id = NULL; const char *last_rsc_id = NULL; if (rsc_list) { *rsc_list = NULL; } if (list == NULL) { if (rsc_list) { *rsc_list = crm_strdup(" "); } if (node_list) { *node_list = crm_strdup(" "); } return; } if (node_list) { *node_list = NULL; } for (; gIter != NULL; gIter = gIter->next) { notify_entry_t *entry = (notify_entry_t *) gIter->data; CRM_CHECK(entry != NULL, continue); CRM_CHECK(entry->rsc != NULL, continue); CRM_CHECK(node_list == NULL || entry->node != NULL, continue); uname = NULL; rsc_id = entry->rsc->id; CRM_ASSERT(rsc_id != NULL); /* filter dups */ if (safe_str_eq(rsc_id, last_rsc_id)) { continue; } last_rsc_id = rsc_id; if (rsc_list != NULL) { int existing_len = 0; int len = 2 + strlen(rsc_id); /* +1 space, +1 EOS */ if (rsc_list && *rsc_list) { existing_len = strlen(*rsc_list); } crm_trace("Adding %s (%dc) at offset %d", rsc_id, len - 2, existing_len); crm_realloc(*rsc_list, len + existing_len); sprintf(*rsc_list + existing_len, "%s ", rsc_id); } if (entry->node != NULL) { uname = entry->node->details->uname; } if (node_list != NULL && uname) { int existing_len = 0; int len = 2 + strlen(uname); if (node_list && *node_list) { existing_len = strlen(*node_list); } crm_trace("Adding %s (%dc) at offset %d", uname, len - 2, existing_len); crm_realloc(*node_list, len + existing_len); sprintf(*node_list + existing_len, "%s ", uname); } } } static void dup_attr(gpointer key, gpointer value, gpointer user_data) { add_hash_param(user_data, key, value); } static action_t * pe_notify(resource_t * rsc, node_t * node, action_t * op, action_t * confirm, notify_data_t * n_data, pe_working_set_t * data_set) { char *key = NULL; action_t *trigger = NULL; const char *value = NULL; const char *task = NULL; if (op == NULL || confirm == NULL) { crm_trace("Op=%p confirm=%p", op, confirm); return NULL; } CRM_CHECK(node != NULL, return NULL); if (node->details->online == FALSE) { crm_trace("Skipping notification for %s: node offline", rsc->id); return NULL; } else if (is_set(op->flags, pe_action_runnable) == FALSE) { crm_trace("Skipping notification for %s: not runnable", op->uuid); return NULL; } value = g_hash_table_lookup(op->meta, "notify_type"); task = g_hash_table_lookup(op->meta, "notify_operation"); crm_trace("Creating notify actions for %s: %s (%s-%s)", op->uuid, rsc->id, value, task); key = generate_notify_key(rsc->id, value, task); trigger = custom_action(rsc, key, op->task, node, is_set(op->flags, pe_action_optional), TRUE, data_set); g_hash_table_foreach(op->meta, dup_attr, trigger->meta); g_hash_table_foreach(n_data->keys, dup_attr, trigger->meta); /* pseudo_notify before notify */ crm_trace("Ordering %s before %s (%d->%d)", op->uuid, trigger->uuid, trigger->id, op->id); order_actions(op, trigger, pe_order_optional); order_actions(trigger, confirm, pe_order_optional); return trigger; } static void pe_post_notify(resource_t * rsc, node_t * node, notify_data_t * n_data, pe_working_set_t * data_set) { action_t *notify = NULL; CRM_CHECK(rsc != NULL, return); if (n_data->post == NULL) { return; /* Nothing to do */ } notify = pe_notify(rsc, node, n_data->post, n_data->post_done, n_data, data_set); if (notify != NULL) { notify->priority = INFINITY; } if (n_data->post_done) { GListPtr gIter = rsc->actions; for (; gIter != NULL; gIter = gIter->next) { action_t *mon = (action_t *) gIter->data; const char *interval = g_hash_table_lookup(mon->meta, "interval"); if (interval == NULL || safe_str_eq(interval, "0")) { crm_trace("Skipping %s: interval", mon->uuid); continue; } else if (safe_str_eq(mon->task, "cancel")) { crm_trace("Skipping %s: cancel", mon->uuid); continue; } order_actions(n_data->post_done, mon, pe_order_optional); } } } notify_data_t * create_notification_boundaries(resource_t * rsc, const char *action, action_t * start, action_t * end, pe_working_set_t * data_set) { /* Create the pseudo ops that preceed and follow the actual notifications */ /* * Creates two sequences (conditional on start and end being supplied): * pre_notify -> pre_notify_complete -> start, and * end -> post_notify -> post_notify_complete * * 'start' and 'end' may be the same event or ${X} and ${X}ed as per clones */ char *key = NULL; notify_data_t *n_data = NULL; if (is_not_set(rsc->flags, pe_rsc_notify)) { return NULL; } crm_malloc0(n_data, sizeof(notify_data_t)); n_data->action = action; n_data->keys = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); if (start) { /* create pre-event notification wrappers */ key = generate_notify_key(rsc->id, "pre", start->task); n_data->pre = custom_action(rsc, key, RSC_NOTIFY, NULL, is_set(start->flags, pe_action_optional), TRUE, data_set); update_action_flags(n_data->pre, pe_action_pseudo); update_action_flags(n_data->pre, pe_action_runnable); add_hash_param(n_data->pre->meta, "notify_type", "pre"); add_hash_param(n_data->pre->meta, "notify_operation", n_data->action); /* create pre_notify_complete */ key = generate_notify_key(rsc->id, "confirmed-pre", start->task); n_data->pre_done = custom_action(rsc, key, RSC_NOTIFIED, NULL, is_set(start->flags, pe_action_optional), TRUE, data_set); update_action_flags(n_data->pre_done, pe_action_pseudo); update_action_flags(n_data->pre_done, pe_action_runnable); add_hash_param(n_data->pre_done->meta, "notify_type", "pre"); add_hash_param(n_data->pre_done->meta, "notify_operation", n_data->action); order_actions(n_data->pre_done, start, pe_order_optional); order_actions(n_data->pre, n_data->pre_done, pe_order_optional); } if (end) { /* create post-event notification wrappers */ key = generate_notify_key(rsc->id, "post", end->task); n_data->post = custom_action(rsc, key, RSC_NOTIFY, NULL, is_set(end->flags, pe_action_optional), TRUE, data_set); n_data->post->priority = INFINITY; update_action_flags(n_data->post, pe_action_pseudo); if (is_set(end->flags, pe_action_runnable)) { update_action_flags(n_data->post, pe_action_runnable); } else { update_action_flags(n_data->post, pe_action_runnable | pe_action_clear); } add_hash_param(n_data->post->meta, "notify_type", "post"); add_hash_param(n_data->post->meta, "notify_operation", n_data->action); /* create post_notify_complete */ key = generate_notify_key(rsc->id, "confirmed-post", end->task); n_data->post_done = custom_action(rsc, key, RSC_NOTIFIED, NULL, is_set(end->flags, pe_action_optional), TRUE, data_set); n_data->post_done->priority = INFINITY; update_action_flags(n_data->post_done, pe_action_pseudo); if (is_set(end->flags, pe_action_runnable)) { update_action_flags(n_data->post_done, pe_action_runnable); } else { update_action_flags(n_data->post_done, pe_action_runnable | pe_action_clear); } add_hash_param(n_data->post_done->meta, "notify_type", "pre"); add_hash_param(n_data->post_done->meta, "notify_operation", n_data->action); order_actions(end, n_data->post, pe_order_implies_then); order_actions(n_data->post, n_data->post_done, pe_order_implies_then); } if (start && end) { order_actions(n_data->pre_done, n_data->post, pe_order_optional); } if (safe_str_eq(action, RSC_STOP)) { action_t *all_stopped = get_pseudo_op(ALL_STOPPED, data_set); order_actions(n_data->post_done, all_stopped, pe_order_optional); } return n_data; } void collect_notification_data(resource_t * rsc, gboolean state, gboolean activity, notify_data_t * n_data) { if (rsc->children) { GListPtr gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { resource_t *child = (resource_t *) gIter->data; collect_notification_data(child, state, activity, n_data); } return; } if (state) { notify_entry_t *entry = NULL; crm_malloc0(entry, sizeof(notify_entry_t)); entry->rsc = rsc; if (rsc->running_on) { /* we only take the first one */ entry->node = rsc->running_on->data; } crm_trace("%s state: %s", rsc->id, role2text(rsc->role)); switch (rsc->role) { case RSC_ROLE_STOPPED: n_data->inactive = g_list_prepend(n_data->inactive, entry); break; case RSC_ROLE_STARTED: n_data->active = g_list_prepend(n_data->active, entry); break; case RSC_ROLE_SLAVE: n_data->slave = g_list_prepend(n_data->slave, entry); break; case RSC_ROLE_MASTER: n_data->master = g_list_prepend(n_data->master, entry); break; default: crm_err("Unsupported notify role"); crm_free(entry); break; } } if (activity) { notify_entry_t *entry = NULL; enum action_tasks task; GListPtr gIter = rsc->actions; for (; gIter != NULL; gIter = gIter->next) { action_t *op = (action_t *) gIter->data; if (is_set(op->flags, pe_action_optional) == FALSE && op->node != NULL) { crm_malloc0(entry, sizeof(notify_entry_t)); entry->node = op->node; entry->rsc = rsc; task = text2task(op->task); switch (task) { case start_rsc: n_data->start = g_list_prepend(n_data->start, entry); break; case stop_rsc: n_data->stop = g_list_prepend(n_data->stop, entry); break; case action_promote: n_data->promote = g_list_prepend(n_data->promote, entry); break; case action_demote: n_data->demote = g_list_prepend(n_data->demote, entry); break; default: crm_free(entry); break; } } } } } gboolean expand_notification_data(notify_data_t * n_data) { /* Expand the notification entries into a key=value hashtable * This hashtable is later used in action2xml() */ gboolean required = FALSE; char *rsc_list = NULL; char *node_list = NULL; if (n_data->stop) { n_data->stop = g_list_sort(n_data->stop, sort_notify_entries); } expand_list(n_data->stop, &rsc_list, &node_list); if (rsc_list != NULL && safe_str_neq(" ", rsc_list)) { if (safe_str_eq(n_data->action, RSC_STOP)) { required = TRUE; } } g_hash_table_insert(n_data->keys, crm_strdup("notify_stop_resource"), rsc_list); g_hash_table_insert(n_data->keys, crm_strdup("notify_stop_uname"), node_list); if (n_data->start) { n_data->start = g_list_sort(n_data->start, sort_notify_entries); if (rsc_list && safe_str_eq(n_data->action, RSC_START)) { required = TRUE; } } expand_list(n_data->start, &rsc_list, &node_list); g_hash_table_insert(n_data->keys, crm_strdup("notify_start_resource"), rsc_list); g_hash_table_insert(n_data->keys, crm_strdup("notify_start_uname"), node_list); if (n_data->demote) { n_data->demote = g_list_sort(n_data->demote, sort_notify_entries); if (safe_str_eq(n_data->action, RSC_DEMOTE)) { required = TRUE; } } expand_list(n_data->demote, &rsc_list, &node_list); g_hash_table_insert(n_data->keys, crm_strdup("notify_demote_resource"), rsc_list); g_hash_table_insert(n_data->keys, crm_strdup("notify_demote_uname"), node_list); if (n_data->promote) { n_data->promote = g_list_sort(n_data->promote, sort_notify_entries); if (safe_str_eq(n_data->action, RSC_PROMOTE)) { required = TRUE; } } expand_list(n_data->promote, &rsc_list, &node_list); g_hash_table_insert(n_data->keys, crm_strdup("notify_promote_resource"), rsc_list); g_hash_table_insert(n_data->keys, crm_strdup("notify_promote_uname"), node_list); if (n_data->active) { n_data->active = g_list_sort(n_data->active, sort_notify_entries); } expand_list(n_data->active, &rsc_list, &node_list); g_hash_table_insert(n_data->keys, crm_strdup("notify_active_resource"), rsc_list); g_hash_table_insert(n_data->keys, crm_strdup("notify_active_uname"), node_list); if (n_data->slave) { n_data->slave = g_list_sort(n_data->slave, sort_notify_entries); } expand_list(n_data->slave, &rsc_list, &node_list); g_hash_table_insert(n_data->keys, crm_strdup("notify_slave_resource"), rsc_list); g_hash_table_insert(n_data->keys, crm_strdup("notify_slave_uname"), node_list); if (n_data->master) { n_data->master = g_list_sort(n_data->master, sort_notify_entries); } expand_list(n_data->master, &rsc_list, &node_list); g_hash_table_insert(n_data->keys, crm_strdup("notify_master_resource"), rsc_list); g_hash_table_insert(n_data->keys, crm_strdup("notify_master_uname"), node_list); if (n_data->inactive) { n_data->inactive = g_list_sort(n_data->inactive, sort_notify_entries); } expand_list(n_data->inactive, &rsc_list, NULL); g_hash_table_insert(n_data->keys, crm_strdup("notify_inactive_resource"), rsc_list); if (required && n_data->pre) { update_action_flags(n_data->pre, pe_action_optional | pe_action_clear); update_action_flags(n_data->pre_done, pe_action_optional | pe_action_clear); } if (required && n_data->post) { update_action_flags(n_data->post, pe_action_optional | pe_action_clear); update_action_flags(n_data->post_done, pe_action_optional | pe_action_clear); } return required; } void create_notifications(resource_t * rsc, notify_data_t * n_data, pe_working_set_t * data_set) { GListPtr gIter = NULL; action_t *stop = NULL; action_t *start = NULL; enum action_tasks task = text2task(n_data->action); if (rsc->children) { gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { resource_t *child = (resource_t *) gIter->data; create_notifications(child, n_data, data_set); } return; } /* Copy notification details into standard ops */ gIter = rsc->actions; for (; gIter != NULL; gIter = gIter->next) { action_t *op = (action_t *) gIter->data; if (is_set(op->flags, pe_action_optional) == FALSE && op->node != NULL) { enum action_tasks t = text2task(op->task); switch (t) { case start_rsc: case stop_rsc: case action_promote: case action_demote: g_hash_table_foreach(n_data->keys, dup_attr, op->meta); break; default: break; } } } crm_trace("Creating notificaitons for: %s.%s (%s->%s)", n_data->action, rsc->id, role2text(rsc->role), role2text(rsc->next_role)); stop = find_first_action(rsc->actions, NULL, RSC_STOP, NULL); start = find_first_action(rsc->actions, NULL, RSC_START, NULL); /* stop / demote */ if (rsc->role != RSC_ROLE_STOPPED) { if (task == stop_rsc || task == action_demote) { gIter = rsc->running_on; for (; gIter != NULL; gIter = gIter->next) { node_t *current_node = (node_t *) gIter->data; pe_notify(rsc, current_node, n_data->pre, n_data->pre_done, n_data, data_set); if (task == action_demote || stop == NULL || is_set(stop->flags, pe_action_optional)) { pe_post_notify(rsc, current_node, n_data, data_set); } } } } /* start / promote */ if (rsc->next_role != RSC_ROLE_STOPPED) { if (rsc->allocated_to == NULL) { pe_proc_err("Next role '%s' but %s is not allocated", role2text(rsc->next_role), rsc->id); } else if (task == start_rsc || task == action_promote) { if (task != start_rsc || start == NULL || is_set(start->flags, pe_action_optional)) { pe_notify(rsc, rsc->allocated_to, n_data->pre, n_data->pre_done, n_data, data_set); } pe_post_notify(rsc, rsc->allocated_to, n_data, data_set); } } } void free_notification_data(notify_data_t * n_data) { if (n_data == NULL) { return; } slist_basic_destroy(n_data->stop); slist_basic_destroy(n_data->start); slist_basic_destroy(n_data->demote); slist_basic_destroy(n_data->promote); slist_basic_destroy(n_data->master); slist_basic_destroy(n_data->slave); slist_basic_destroy(n_data->active); slist_basic_destroy(n_data->inactive); g_hash_table_destroy(n_data->keys); crm_free(n_data); } int transition_id = -1; /* * Create a dependency graph to send to the transitioner (via the CRMd) */ gboolean stage8(pe_working_set_t * data_set) { GListPtr gIter = NULL; const char *value = NULL; transition_id++; crm_trace("Creating transition graph %d.", transition_id); data_set->graph = create_xml_node(NULL, XML_TAG_GRAPH); value = pe_pref(data_set->config_hash, "cluster-delay"); crm_xml_add(data_set->graph, "cluster-delay", value); value = pe_pref(data_set->config_hash, "stonith-timeout"); crm_xml_add(data_set->graph, "stonith-timeout", value); crm_xml_add(data_set->graph, "failed-stop-offset", "INFINITY"); if (is_set(data_set->flags, pe_flag_start_failure_fatal)) { crm_xml_add(data_set->graph, "failed-start-offset", "INFINITY"); } else { crm_xml_add(data_set->graph, "failed-start-offset", "1"); } value = pe_pref(data_set->config_hash, "batch-limit"); crm_xml_add(data_set->graph, "batch-limit", value); crm_xml_add_int(data_set->graph, "transition_id", transition_id); value = pe_pref(data_set->config_hash, "migration-limit"); if (crm_int_helper(value, NULL) > 0) { crm_xml_add(data_set->graph, "migration-limit", value); } /* errors... slist_iter(action, action_t, action_list, lpc, if(action->optional == FALSE && action->runnable == FALSE) { print_action("Ignoring", action, TRUE); } ); */ gIter = data_set->resources; for (; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; crm_trace("processing actions for rsc=%s", rsc->id); rsc->cmds->expand(rsc, data_set); } crm_log_xml_trace(data_set->graph, "created resource-driven action list"); /* catch any non-resource specific actions */ crm_trace("processing non-resource actions"); gIter = data_set->actions; for (; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; if (action->rsc && action->node && action->node->details->shutdown && is_not_set(data_set->flags, pe_flag_maintenance_mode) && is_not_set(action->flags, pe_action_optional) && is_not_set(action->flags, pe_action_runnable) && crm_str_eq(action->task, RSC_STOP, TRUE) ) { crm_crit("Cannot shut down node '%s' because of %s:%s%s", action->node->details->uname, action->rsc->id, is_not_set(action->rsc->flags, pe_rsc_managed)?" unmanaged":" blocked", is_set(action->rsc->flags, pe_rsc_failed)?" failed":""); } graph_element_from_action(action, data_set); } crm_log_xml_trace(data_set->graph, "created generic action list"); crm_trace("Created transition graph %d.", transition_id); return TRUE; } void cleanup_alloc_calculations(pe_working_set_t * data_set) { if (data_set == NULL) { return; } crm_trace("deleting %d order cons: %p", g_list_length(data_set->ordering_constraints), data_set->ordering_constraints); pe_free_ordering(data_set->ordering_constraints); data_set->ordering_constraints = NULL; crm_trace("deleting %d node cons: %p", g_list_length(data_set->placement_constraints), data_set->placement_constraints); pe_free_rsc_to_node(data_set->placement_constraints); data_set->placement_constraints = NULL; crm_trace("deleting %d inter-resource cons: %p", g_list_length(data_set->colocation_constraints), data_set->colocation_constraints); slist_basic_destroy(data_set->colocation_constraints); data_set->colocation_constraints = NULL; crm_trace("deleting %d ticket deps: %p", g_list_length(data_set->ticket_constraints), data_set->ticket_constraints); slist_basic_destroy(data_set->ticket_constraints); data_set->ticket_constraints = NULL; cleanup_calculations(data_set); } diff --git a/pengine/regression.sh b/pengine/regression.sh index 3306f45f5a..344044434e 100755 --- a/pengine/regression.sh +++ b/pengine/regression.sh @@ -1,630 +1,633 @@ #!/bin/bash # Copyright (C) 2004 Andrew Beekhof # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public # License as published by the Free Software Foundation; either # version 2 of the License, or (at your option) any later version. # # This software is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # core=`dirname $0` . $core/regression.core.sh create_mode="true" info Generating test outputs for these tests... # do_test file description info Done. echo "" info Performing the following tests from $io_dir create_mode="false" echo "" do_test simple1 "Offline " do_test simple2 "Start " do_test simple3 "Start 2 " do_test simple4 "Start Failed" do_test simple6 "Stop Start " do_test simple7 "Shutdown " #do_test simple8 "Stonith " #do_test simple9 "Lower version" #do_test simple10 "Higher version" do_test simple11 "Priority (ne)" do_test simple12 "Priority (eq)" do_test simple8 "Stickiness" echo "" do_test group1 "Group " do_test group2 "Group + Native " do_test group3 "Group + Group " do_test group4 "Group + Native (nothing)" do_test group5 "Group + Native (move) " do_test group6 "Group + Group (move) " do_test group7 "Group colocation" do_test group13 "Group colocation (cant run)" do_test group8 "Group anti-colocation" do_test group9 "Group recovery" do_test group10 "Group partial recovery" do_test group11 "Group target_role" do_test group14 "Group stop (graph terminated)" do_test group15 "-ve group colocation" do_test bug-1573 "Partial stop of a group with two children" do_test bug-1718 "Mandatory group ordering - Stop group_FUN" do_test bug-lf-2613 "Move group on failure" do_test bug-lf-2619 "Move group on clone failure" echo "" do_test rsc_dep1 "Must not " do_test rsc_dep3 "Must " do_test rsc_dep5 "Must not 3 " do_test rsc_dep7 "Must 3 " do_test rsc_dep10 "Must (but cant)" do_test rsc_dep2 "Must (running) " do_test rsc_dep8 "Must (running : alt) " do_test rsc_dep4 "Must (running + move)" do_test asymmetric "Asymmetric - require explicit location constraints" echo "" do_test orphan-0 "Orphan ignore" do_test orphan-1 "Orphan stop" do_test orphan-2 "Orphan stop, remove failcount" echo "" do_test params-0 "Params: No change" do_test params-1 "Params: Changed" do_test params-2 "Params: Resource definition" do_test params-4 "Params: Reload" do_test params-5 "Params: Restart based on probe digest" do_test novell-251689 "Resource definition change + target_role=stopped" do_test bug-lf-2106 "Restart all anonymous clone instances after config change" do_test params-6 "Params: Detect reload in previously migrated resource" echo "" do_test target-0 "Target Role : baseline" do_test target-1 "Target Role : master" do_test target-2 "Target Role : invalid" echo "" do_test domain "Failover domains" do_test base-score "Set a node's default score for all nodes" echo "" do_test date-1 "Dates" -t "2005-020" do_test date-2 "Date Spec - Pass" -t "2005-020T12:30" do_test date-3 "Date Spec - Fail" -t "2005-020T11:30" do_test probe-0 "Probe (anon clone)" do_test probe-1 "Pending Probe" do_test probe-2 "Correctly re-probe cloned groups" do_test probe-3 "Probe (pending node)" do_test probe-4 "Probe (pending node + stopped resource)" --rc 4 do_test standby "Standby" do_test comments "Comments" echo "" do_test one-or-more-0 "Everything starts" do_test one-or-more-1 "Nothing starts because of A" do_test one-or-more-2 "D can start because of C" do_test one-or-more-3 "D cannot start because of B and C" do_test one-or-more-4 "D cannot start because of target-role" do_test one-or-more-5 "Start A and F even though C and D are stopped" do_test one-or-more-6 "Leave A running even though B is stopped" do_test one-or-more-7 "Leave A running even though C is stopped" echo "" do_test order1 "Order start 1 " do_test order2 "Order start 2 " do_test order3 "Order stop " do_test order4 "Order (multiple) " do_test order5 "Order (move) " do_test order6 "Order (move w/ restart) " do_test order7 "Order (manditory) " do_test order-optional "Order (score=0) " do_test order-required "Order (score=INFINITY) " do_test bug-lf-2171 "Prevent group start when clone is stopped" do_test order-clone "Clone ordering should be able to prevent startup of dependant clones" do_test order-sets "Ordering for resource sets" do_test order-serialize "Serialize resources without inhibiting migration" do_test order-serialize-set "Serialize a set of resources without inhibiting migration" do_test clone-order-primitive "Order clone start after a primitive" do_test order-optional-keyword "Order (optional keyword)" do_test order-mandatory "Order (mandatory keyword)" do_test bug-lf-2493 "Don't imply colocation requirements when applying ordering constraints with clones" do_test ordered-set-basic-startup "Constraint set with default order settings." # This test emits an error log and thus upsets the test suite; even # though it explicitly aims to test an error leg. FIXME # do_test order-wrong-kind "Order (error)" echo "" do_test coloc-loop "Colocation - loop" do_test coloc-many-one "Colocation - many-to-one" do_test coloc-list "Colocation - many-to-one with list" do_test coloc-group "Colocation - groups" do_test coloc-slave-anti "Anti-colocation with slave shouldn't prevent master colocation" do_test coloc-attr "Colocation based on node attributes" do_test coloc-negative-group "Negative colocation with a group" do_test coloc-intra-set "Intra-set colocation" do_test bug-lf-2435 "Colocation sets with a negative score" do_test coloc-clone-stays-active "Ensure clones don't get stopped/demoted because a dependant must stop" echo "" do_test rsc-sets-seq-true "Resource Sets - sequential=false" do_test rsc-sets-seq-false "Resource Sets - sequential=true" do_test rsc-sets-clone "Resource Sets - Clone" do_test rsc-sets-master "Resource Sets - Master" do_test rsc-sets-clone-1 "Resource Sets - Clone (lf#2404)" #echo "" #do_test agent1 "version: lt (empty)" #do_test agent2 "version: eq " #do_test agent3 "version: gt " echo "" do_test attrs1 "string: eq (and) " do_test attrs2 "string: lt / gt (and)" do_test attrs3 "string: ne (or) " do_test attrs4 "string: exists " do_test attrs5 "string: not_exists " do_test attrs6 "is_dc: true " do_test attrs7 "is_dc: false " do_test attrs8 "score_attribute " echo "" do_test mon-rsc-1 "Schedule Monitor - start" do_test mon-rsc-2 "Schedule Monitor - move " do_test mon-rsc-3 "Schedule Monitor - pending start " do_test mon-rsc-4 "Schedule Monitor - move/pending start" echo "" do_test rec-rsc-0 "Resource Recover - no start " do_test rec-rsc-1 "Resource Recover - start " do_test rec-rsc-2 "Resource Recover - monitor " do_test rec-rsc-3 "Resource Recover - stop - ignore" do_test rec-rsc-4 "Resource Recover - stop - block " do_test rec-rsc-5 "Resource Recover - stop - fence " do_test rec-rsc-6 "Resource Recover - multiple - restart" do_test rec-rsc-7 "Resource Recover - multiple - stop " do_test rec-rsc-8 "Resource Recover - multiple - block " do_test rec-rsc-9 "Resource Recover - group/group" echo "" do_test quorum-1 "No quorum - ignore" do_test quorum-2 "No quorum - freeze" do_test quorum-3 "No quorum - stop " do_test quorum-4 "No quorum - start anyway" do_test quorum-5 "No quorum - start anyway (group)" do_test quorum-6 "No quorum - start anyway (clone)" echo "" do_test rec-node-1 "Node Recover - Startup - no fence" do_test rec-node-2 "Node Recover - Startup - fence " do_test rec-node-3 "Node Recover - HA down - no fence" do_test rec-node-4 "Node Recover - HA down - fence " do_test rec-node-5 "Node Recover - CRM down - no fence" do_test rec-node-6 "Node Recover - CRM down - fence " do_test rec-node-7 "Node Recover - no quorum - ignore " do_test rec-node-8 "Node Recover - no quorum - freeze " do_test rec-node-9 "Node Recover - no quorum - stop " do_test rec-node-10 "Node Recover - no quorum - stop w/fence" do_test rec-node-11 "Node Recover - CRM down w/ group - fence " do_test rec-node-12 "Node Recover - nothing active - fence " do_test rec-node-13 "Node Recover - failed resource + shutdown - fence " do_test rec-node-15 "Node Recover - unknown lrm section" do_test rec-node-14 "Serialize all stonith's" echo "" do_test multi1 "Multiple Active (stop/start)" echo "" do_test migrate-begin "Normal migration" do_test migrate-success "Completed migration" do_test migrate-partial-1 "Completed migration, missing stop on source" do_test migrate-partial-2 "Successful migrate_to only" do_test migrate-partial-3 "Successful migrate_to only, target down" do_test migrate-partial-4 "Migrate from the correct host after migrate_to+migrate_from" do_test migrate-fail-2 "Failed migrate_from" do_test migrate-fail-3 "Failed migrate_from + stop on source" do_test migrate-fail-4 "Failed migrate_from + stop on target - ideally we wouldn't need to re-stop on target" do_test migrate-fail-5 "Failed migrate_from + stop on source and target" do_test migrate-fail-6 "Failed migrate_to" do_test migrate-fail-7 "Failed migrate_to + stop on source" do_test migrate-fail-8 "Failed migrate_to + stop on target - ideally we wouldn't need to re-stop on target" do_test migrate-fail-9 "Failed migrate_to + stop on source and target" do_test migrate-stop "Migration in a stopping stack" do_test migrate-start "Migration in a starting stack" do_test migrate-stop_start "Migration in a restarting stack" do_test migrate-stop-complex "Migration in a complex stopping stack" do_test migrate-start-complex "Migration in a complex starting stack" do_test migrate-stop-start-complex "Migration in a complex moving stack" do_test migrate-shutdown "Order the post-migration 'stop' before node shutdown" do_test migrate-1 "Migrate (migrate)" do_test migrate-2 "Migrate (stable)" do_test migrate-3 "Migrate (failed migrate_to)" do_test migrate-4 "Migrate (failed migrate_from)" do_test novell-252693 "Migration in a stopping stack" do_test novell-252693-2 "Migration in a starting stack" do_test novell-252693-3 "Non-Migration in a starting and stopping stack" do_test bug-1820 "Migration in a group" do_test bug-1820-1 "Non-migration in a group" do_test migrate-5 "Primitive migration with a clone" do_test migrate-fencing "Migration after Fencing" #echo "" #do_test complex1 "Complex " do_test bug-lf-2422 "Dependancy on partially active group - stop ocfs:*" echo "" do_test clone-anon-probe-1 "Probe the correct (anonymous) clone instance for each node" do_test clone-anon-probe-2 "Avoid needless re-probing of anonymous clones" do_test clone-anon-failcount "Merge failcounts for anonymous clones" do_test inc0 "Incarnation start" do_test inc1 "Incarnation start order" do_test inc2 "Incarnation silent restart, stop, move" do_test inc3 "Inter-incarnation ordering, silent restart, stop, move" do_test inc4 "Inter-incarnation ordering, silent restart, stop, move (ordered)" do_test inc5 "Inter-incarnation ordering, silent restart, stop, move (restart 1)" do_test inc6 "Inter-incarnation ordering, silent restart, stop, move (restart 2)" do_test inc7 "Clone colocation" do_test inc8 "Clone anti-colocation" do_test inc9 "Non-unique clone" do_test inc10 "Non-unique clone (stop)" do_test inc11 "Primitive colocation with clones" do_test inc12 "Clone shutdown" do_test cloned-group "Make sure only the correct number of cloned groups are started" do_test clone-no-shuffle "Dont prioritize allocation of instances that must be moved" do_test clone-max-zero "Orphan processing with clone-max=0" do_test clone-anon-dup "Bug LF#2087 - Correctly parse the state of anonymous clones that are active more than once per node" do_test bug-lf-2160 "Dont shuffle clones due to colocation" do_test bug-lf-2213 "clone-node-max enforcement for cloned groups" do_test bug-lf-2153 "Clone ordering constraints" do_test bug-lf-2361 "Ensure clones observe mandatory ordering constraints if the LHS is unrunnable" do_test bug-lf-2317 "Avoid needless restart of primitive depending on a clone" do_test clone-colocate-instance-1 "Colocation with a specific clone instance (negative example)" do_test clone-colocate-instance-2 "Colocation with a specific clone instance" do_test clone-order-instance "Ordering with specific clone instances" do_test bug-lf-2453 "Enforce mandatory clone ordering without colocation" do_test bug-lf-2508 "Correctly reconstruct the status of anonymous cloned groups" do_test bug-lf-2544 "Balanced clone placement" do_test bug-lf-2445 "Redistribute clones with node-max > 1 and stickiness = 0" do_test bug-lf-2574 "Avoid clone shuffle" do_test bug-lf-2581 "Avoid group restart due to unrelated clone (re)start" echo "" do_test master-0 "Stopped -> Slave" do_test master-1 "Stopped -> Promote" do_test master-2 "Stopped -> Promote : notify" do_test master-3 "Stopped -> Promote : master location" do_test master-4 "Started -> Promote : master location" do_test master-5 "Promoted -> Promoted" do_test master-6 "Promoted -> Promoted (2)" do_test master-7 "Promoted -> Fenced" do_test master-8 "Promoted -> Fenced -> Moved" do_test master-9 "Stopped + Promotable + No quorum" do_test master-10 "Stopped -> Promotable : notify with monitor" do_test master-11 "Stopped -> Promote : colocation" do_test novell-239082 "Demote/Promote ordering" do_test novell-239087 "Stable master placement" do_test master-12 "Promotion based solely on rsc_location constraints" do_test master-13 "Include preferences of colocated resources when placing master" do_test master-demote "Ordering when actions depends on demoting a slave resource" do_test master-ordering "Prevent resources from starting that need a master" do_test bug-1765 "Master-Master Colocation (dont stop the slaves)" do_test master-group "Promotion of cloned groups" do_test bug-lf-1852 "Don't shuffle master/slave instances unnecessarily" do_test master-failed-demote "Dont retry failed demote actions" do_test master-failed-demote-2 "Dont retry failed demote actions (notify=false)" do_test master-depend "Ensure resources that depend on the master don't get allocated until the master does" do_test master-reattach "Re-attach to a running master" do_test master-allow-start "Don't include master score if it would prevent allocation" do_test master-colocation "Allow master instances placemaker to be influenced by colocation constraints" do_test master-pseudo "Make sure promote/demote pseudo actions are created correctly" do_test master-role "Prevent target-role from promoting more than master-max instances" do_test bug-lf-2358 "Master-Master anti-colocation" do_test master-promotion-constraint "Mandatory master colocation constraints" do_test unmanaged-master "Ensure role is preserved for unmanaged resources" do_test master-unmanaged-monitor "Start the correct monitor operation for unmanaged masters" do_test master-demote-2 "Demote does not clear past failure" do_test master-move "Move master based on failure of colocated group" echo "" do_test history-1 "Correctly parse stateful-1 resource state" echo "" do_test managed-0 "Managed (reference)" do_test managed-1 "Not managed - down " do_test managed-2 "Not managed - up " do_test bug-5028 "Shutdown should block if anything depends on an unmanaged resource" do_test bug-5028-detach "Ensure detach still works" do_test bug-5028-bottom "Ensure shutdown still blocks if the blocked resource is at the bottom of the stack" echo "" do_test interleave-0 "Interleave (reference)" do_test interleave-1 "coloc - not interleaved" do_test interleave-2 "coloc - interleaved " do_test interleave-3 "coloc - interleaved (2)" do_test interleave-pseudo-stop "Interleaved clone during stonith" do_test interleave-stop "Interleaved clone during stop" do_test interleave-restart "Interleaved clone during dependancy restart" echo "" do_test notify-0 "Notify reference" do_test notify-1 "Notify simple" do_test notify-2 "Notify simple, confirm" do_test notify-3 "Notify move, confirm" do_test novell-239079 "Notification priority" #do_test notify-2 "Notify - 764" echo "" do_test 594 "OSDL #594 - Unrunnable actions scheduled in transition" do_test 662 "OSDL #662 - Two resources start on one node when incarnation_node_max = 1" do_test 696 "OSDL #696 - CRM starts stonith RA without monitor" do_test 726 "OSDL #726 - Attempting to schedule rsc_posic041_monitor_5000 _after_ a stop" do_test 735 "OSDL #735 - Correctly detect that rsc_hadev1 is stopped on hadev3" do_test 764 "OSDL #764 - Missing monitor op for DoFencing:child_DoFencing:1" do_test 797 "OSDL #797 - Assert triggered: task_id_i > max_call_id" do_test 829 "OSDL #829" do_test 994 "OSDL #994 - Stopping the last resource in a resource group causes the entire group to be restarted" do_test 994-2 "OSDL #994 - with a dependant resource" do_test 1360 "OSDL #1360 - Clone stickiness" do_test 1484 "OSDL #1484 - on_fail=stop" do_test 1494 "OSDL #1494 - Clone stability" do_test unrunnable-1 "Unrunnable" do_test stonith-0 "Stonith loop - 1" do_test stonith-1 "Stonith loop - 2" do_test stonith-2 "Stonith loop - 3" do_test stonith-3 "Stonith startup" do_test bug-1572-1 "Recovery of groups depending on master/slave" do_test bug-1572-2 "Recovery of groups depending on master/slave when the master is never re-promoted" do_test bug-1685 "Depends-on-master ordering" do_test bug-1822 "Dont promote partially active groups" do_test bug-pm-11 "New resource added to a m/s group" do_test bug-pm-12 "Recover only the failed portion of a cloned group" do_test bug-n-387749 "Don't shuffle clone instances" do_test bug-n-385265 "Don't ignore the failure stickiness of group children - resource_idvscommon should stay stopped" do_test bug-n-385265-2 "Ensure groups are migrated instead of remaining partially active on the current node" do_test bug-lf-1920 "Correctly handle probes that find active resources" do_test bnc-515172 "Location constraint with multiple expressions" do_test colocate-primitive-with-clone "Optional colocation with a clone" do_test use-after-free-merge "Use-after-free in native_merge_weights" do_test bug-lf-2551 "STONITH ordering for stop" do_test bug-lf-2606 "Stonith implies demote" do_test bug-lf-2474 "Ensure resource op timeout takes precedence over op_defaults" do_test bug-suse-707150 "Prevent vm-01 from starting due to colocation/ordering" do_test bug-5014-A-start-B-start "Verify when A starts B starts using symmetrical=false" do_test bug-5014-A-stop-B-started "Verify when A stops B does not stop if it has already started using symmetric=false" do_test bug-5014-A-stopped-B-stopped "Verify when A is stopped and B has not started, B does not start before A using symmetric=false" do_test bug-5014-CthenAthenB-C-stopped "Verify when C then A is symmetrical=true, A then B is symmetric=false, and C is stopped that nothing starts." do_test bug-5014-CLONE-A-start-B-start "Verify when A starts B starts using clone resources with symmetric=false" do_test bug-5014-CLONE-A-stop-B-started "Verify when A stops B does not stop if it has already started using clone resources with symmetric=false." do_test bug-5014-GROUP-A-start-B-start "Verify when A starts B starts when using group resources with symmetric=false." do_test bug-5014-GROUP-A-stopped-B-started "Verify when A stops B does not stop if it has already started using group resources with symmetric=false." do_test bug-5014-GROUP-A-stopped-B-stopped "Verify when A is stopped and B has not started, B does not start before A using group resources with symmetric=false." do_test bug-5014-ordered-set-symmetrical-false "Verify ordered sets work with symmetrical=false" do_test bug-5014-ordered-set-symmetrical-true "Verify ordered sets work with symmetrical=true" do_test bug-5007-masterslave_colocation "Verify use of colocation scores other than INFINITY and -INFINITY work on multi-state resources." do_test bug-5038 "Prevent restart of anonymous clones when clone-max decreases" +do_test bug-5025-1 "Automatically clean up failcount after resource config change with reload" +do_test bug-5025-2 "Make sure clear failcount action isn't set when config does not change." +do_test bug-5025-3 "Automatically clean up failcount after resource config change with restart" echo "" do_test systemhealth1 "System Health () #1" do_test systemhealth2 "System Health () #2" do_test systemhealth3 "System Health () #3" do_test systemhealthn1 "System Health (None) #1" do_test systemhealthn2 "System Health (None) #2" do_test systemhealthn3 "System Health (None) #3" do_test systemhealthm1 "System Health (Migrate On Red) #1" do_test systemhealthm2 "System Health (Migrate On Red) #2" do_test systemhealthm3 "System Health (Migrate On Red) #3" do_test systemhealtho1 "System Health (Only Green) #1" do_test systemhealtho2 "System Health (Only Green) #2" do_test systemhealtho3 "System Health (Only Green) #3" do_test systemhealthp1 "System Health (Progessive) #1" do_test systemhealthp2 "System Health (Progessive) #2" do_test systemhealthp3 "System Health (Progessive) #3" echo "" do_test utilization "Placement Strategy - utilization" do_test minimal "Placement Strategy - minimal" do_test balanced "Placement Strategy - balanced" echo "" do_test placement-stickiness "Optimized Placement Strategy - stickiness" do_test placement-priority "Optimized Placement Strategy - priority" do_test placement-location "Optimized Placement Strategy - location" do_test placement-capacity "Optimized Placement Strategy - capacity" echo "" do_test utilization-order1 "Utilization Order - Simple" do_test utilization-order2 "Utilization Order - Complex" do_test utilization-order3 "Utilization Order - Migrate" do_test utilization-order4 "Utilization Order - Live Mirgration (bnc#695440)" do_test utilization-shuffle "Don't displace prmExPostgreSQLDB2 on act2, Start prmExPostgreSQLDB1 on act3" echo "" do_test reprobe-target_rc "Ensure correct target_rc for reprobe of inactive resources" echo "" do_test stopped-monitor-00 "Stopped Monitor - initial start" do_test stopped-monitor-01 "Stopped Monitor - failed started" do_test stopped-monitor-02 "Stopped Monitor - started multi-up" do_test stopped-monitor-03 "Stopped Monitor - stop started" do_test stopped-monitor-04 "Stopped Monitor - failed stop" do_test stopped-monitor-05 "Stopped Monitor - start unmanaged" do_test stopped-monitor-06 "Stopped Monitor - unmanaged multi-up" do_test stopped-monitor-07 "Stopped Monitor - start unmanaged multi-up" do_test stopped-monitor-08 "Stopped Monitor - migrate" do_test stopped-monitor-09 "Stopped Monitor - unmanage started" do_test stopped-monitor-10 "Stopped Monitor - unmanaged started multi-up" do_test stopped-monitor-11 "Stopped Monitor - stop unmanaged started" do_test stopped-monitor-12 "Stopped Monitor - unmanaged started multi-up (targer-role="Stopped")" do_test stopped-monitor-20 "Stopped Monitor - initial stop" do_test stopped-monitor-21 "Stopped Monitor - stopped single-up" do_test stopped-monitor-22 "Stopped Monitor - stopped multi-up" do_test stopped-monitor-23 "Stopped Monitor - start stopped" do_test stopped-monitor-24 "Stopped Monitor - unmanage stopped" do_test stopped-monitor-25 "Stopped Monitor - unmanaged stopped multi-up" do_test stopped-monitor-26 "Stopped Monitor - start unmanaged stopped" do_test stopped-monitor-27 "Stopped Monitor - unmanaged stopped multi-up (target-role="Started")" do_test stopped-monitor-30 "Stopped Monitor - new node started" do_test stopped-monitor-31 "Stopped Monitor - new node stopped" echo"" do_test ticket-primitive-1 "Ticket - Primitive (loss-policy=stop, initial)" do_test ticket-primitive-2 "Ticket - Primitive (loss-policy=stop, granted)" do_test ticket-primitive-3 "Ticket - Primitive (loss-policy-stop, revoked)" do_test ticket-primitive-4 "Ticket - Primitive (loss-policy=demote, initial)" do_test ticket-primitive-5 "Ticket - Primitive (loss-policy=demote, granted)" do_test ticket-primitive-6 "Ticket - Primitive (loss-policy=demote, revoked)" do_test ticket-primitive-7 "Ticket - Primitive (loss-policy=fence, initial)" do_test ticket-primitive-8 "Ticket - Primitive (loss-policy=fence, granted)" do_test ticket-primitive-9 "Ticket - Primitive (loss-policy=fence, revoked)" do_test ticket-primitive-10 "Ticket - Primitive (loss-policy=freeze, initial)" do_test ticket-primitive-11 "Ticket - Primitive (loss-policy=freeze, granted)" do_test ticket-primitive-12 "Ticket - Primitive (loss-policy=freeze, revoked)" do_test ticket-primitive-13 "Ticket - Primitive (loss-policy=stop, standby, granted)" do_test ticket-primitive-14 "Ticket - Primitive (loss-policy=stop, granted, standby)" do_test ticket-primitive-15 "Ticket - Primitive (loss-policy=stop, standby, revoked)" do_test ticket-primitive-16 "Ticket - Primitive (loss-policy=demote, standby, granted)" do_test ticket-primitive-17 "Ticket - Primitive (loss-policy=demote, granted, standby)" do_test ticket-primitive-18 "Ticket - Primitive (loss-policy=demote, standby, revoked)" do_test ticket-primitive-19 "Ticket - Primitive (loss-policy=fence, standby, granted)" do_test ticket-primitive-20 "Ticket - Primitive (loss-policy=fence, granted, standby)" do_test ticket-primitive-21 "Ticket - Primitive (loss-policy=fence, standby, revoked)" do_test ticket-primitive-22 "Ticket - Primitive (loss-policy=freeze, standby, granted)" do_test ticket-primitive-23 "Ticket - Primitive (loss-policy=freeze, granted, standby)" do_test ticket-primitive-24 "Ticket - Primitive (loss-policy=freeze, standby, revoked)" echo"" do_test ticket-group-1 "Ticket - Group (loss-policy=stop, initial)" do_test ticket-group-2 "Ticket - Group (loss-policy=stop, granted)" do_test ticket-group-3 "Ticket - Group (loss-policy-stop, revoked)" do_test ticket-group-4 "Ticket - Group (loss-policy=demote, initial)" do_test ticket-group-5 "Ticket - Group (loss-policy=demote, granted)" do_test ticket-group-6 "Ticket - Group (loss-policy=demote, revoked)" do_test ticket-group-7 "Ticket - Group (loss-policy=fence, initial)" do_test ticket-group-8 "Ticket - Group (loss-policy=fence, granted)" do_test ticket-group-9 "Ticket - Group (loss-policy=fence, revoked)" do_test ticket-group-10 "Ticket - Group (loss-policy=freeze, initial)" do_test ticket-group-11 "Ticket - Group (loss-policy=freeze, granted)" do_test ticket-group-12 "Ticket - Group (loss-policy=freeze, revoked)" do_test ticket-group-13 "Ticket - Group (loss-policy=stop, standby, granted)" do_test ticket-group-14 "Ticket - Group (loss-policy=stop, granted, standby)" do_test ticket-group-15 "Ticket - Group (loss-policy=stop, standby, revoked)" do_test ticket-group-16 "Ticket - Group (loss-policy=demote, standby, granted)" do_test ticket-group-17 "Ticket - Group (loss-policy=demote, granted, standby)" do_test ticket-group-18 "Ticket - Group (loss-policy=demote, standby, revoked)" do_test ticket-group-19 "Ticket - Group (loss-policy=fence, standby, granted)" do_test ticket-group-20 "Ticket - Group (loss-policy=fence, granted, standby)" do_test ticket-group-21 "Ticket - Group (loss-policy=fence, standby, revoked)" do_test ticket-group-22 "Ticket - Group (loss-policy=freeze, standby, granted)" do_test ticket-group-23 "Ticket - Group (loss-policy=freeze, granted, standby)" do_test ticket-group-24 "Ticket - Group (loss-policy=freeze, standby, revoked)" echo"" do_test ticket-clone-1 "Ticket - Clone (loss-policy=stop, initial)" do_test ticket-clone-2 "Ticket - Clone (loss-policy=stop, granted)" do_test ticket-clone-3 "Ticket - Clone (loss-policy-stop, revoked)" do_test ticket-clone-4 "Ticket - Clone (loss-policy=demote, initial)" do_test ticket-clone-5 "Ticket - Clone (loss-policy=demote, granted)" do_test ticket-clone-6 "Ticket - Clone (loss-policy=demote, revoked)" do_test ticket-clone-7 "Ticket - Clone (loss-policy=fence, initial)" do_test ticket-clone-8 "Ticket - Clone (loss-policy=fence, granted)" do_test ticket-clone-9 "Ticket - Clone (loss-policy=fence, revoked)" do_test ticket-clone-10 "Ticket - Clone (loss-policy=freeze, initial)" do_test ticket-clone-11 "Ticket - Clone (loss-policy=freeze, granted)" do_test ticket-clone-12 "Ticket - Clone (loss-policy=freeze, revoked)" do_test ticket-clone-13 "Ticket - Clone (loss-policy=stop, standby, granted)" do_test ticket-clone-14 "Ticket - Clone (loss-policy=stop, granted, standby)" do_test ticket-clone-15 "Ticket - Clone (loss-policy=stop, standby, revoked)" do_test ticket-clone-16 "Ticket - Clone (loss-policy=demote, standby, granted)" do_test ticket-clone-17 "Ticket - Clone (loss-policy=demote, granted, standby)" do_test ticket-clone-18 "Ticket - Clone (loss-policy=demote, standby, revoked)" do_test ticket-clone-19 "Ticket - Clone (loss-policy=fence, standby, granted)" do_test ticket-clone-20 "Ticket - Clone (loss-policy=fence, granted, standby)" do_test ticket-clone-21 "Ticket - Clone (loss-policy=fence, standby, revoked)" do_test ticket-clone-22 "Ticket - Clone (loss-policy=freeze, standby, granted)" do_test ticket-clone-23 "Ticket - Clone (loss-policy=freeze, granted, standby)" do_test ticket-clone-24 "Ticket - Clone (loss-policy=freeze, standby, revoked)" echo"" do_test ticket-master-1 "Ticket - Master (loss-policy=stop, initial)" do_test ticket-master-2 "Ticket - Master (loss-policy=stop, granted)" do_test ticket-master-3 "Ticket - Master (loss-policy-stop, revoked)" do_test ticket-master-4 "Ticket - Master (loss-policy=demote, initial)" do_test ticket-master-5 "Ticket - Master (loss-policy=demote, granted)" do_test ticket-master-6 "Ticket - Master (loss-policy=demote, revoked)" do_test ticket-master-7 "Ticket - Master (loss-policy=fence, initial)" do_test ticket-master-8 "Ticket - Master (loss-policy=fence, granted)" do_test ticket-master-9 "Ticket - Master (loss-policy=fence, revoked)" do_test ticket-master-10 "Ticket - Master (loss-policy=freeze, initial)" do_test ticket-master-11 "Ticket - Master (loss-policy=freeze, granted)" do_test ticket-master-12 "Ticket - Master (loss-policy=freeze, revoked)" do_test ticket-master-13 "Ticket - Master (loss-policy=stop, standby, granted)" do_test ticket-master-14 "Ticket - Master (loss-policy=stop, granted, standby)" do_test ticket-master-15 "Ticket - Master (loss-policy=stop, standby, revoked)" do_test ticket-master-16 "Ticket - Master (loss-policy=demote, standby, granted)" do_test ticket-master-17 "Ticket - Master (loss-policy=demote, granted, standby)" do_test ticket-master-18 "Ticket - Master (loss-policy=demote, standby, revoked)" do_test ticket-master-19 "Ticket - Master (loss-policy=fence, standby, granted)" do_test ticket-master-20 "Ticket - Master (loss-policy=fence, granted, standby)" do_test ticket-master-21 "Ticket - Master (loss-policy=fence, standby, revoked)" do_test ticket-master-22 "Ticket - Master (loss-policy=freeze, standby, granted)" do_test ticket-master-23 "Ticket - Master (loss-policy=freeze, granted, standby)" do_test ticket-master-24 "Ticket - Master (loss-policy=freeze, standby, revoked)" echo "" do_test ticket-rsc-sets-1 "Ticket - Resource sets (1 ticket, initial)" do_test ticket-rsc-sets-2 "Ticket - Resource sets (1 ticket, granted)" do_test ticket-rsc-sets-3 "Ticket - Resource sets (1 ticket, revoked)" do_test ticket-rsc-sets-4 "Ticket - Resource sets (2 tickets, initial)" do_test ticket-rsc-sets-5 "Ticket - Resource sets (2 tickets, granted)" do_test ticket-rsc-sets-6 "Ticket - Resource sets (2 tickets, granted)" do_test ticket-rsc-sets-7 "Ticket - Resource sets (2 tickets, revoked)" do_test ticket-rsc-sets-8 "Ticket - Resource sets (1 ticket, standby, granted)" do_test ticket-rsc-sets-9 "Ticket - Resource sets (1 ticket, granted, standby)" do_test ticket-rsc-sets-10 "Ticket - Resource sets (1 ticket, standby, revoked)" do_test ticket-rsc-sets-11 "Ticket - Resource sets (2 tickets, standby, granted)" do_test ticket-rsc-sets-12 "Ticket - Resource sets (2 tickets, standby, granted)" do_test ticket-rsc-sets-13 "Ticket - Resource sets (2 tickets, granted, standby)" do_test ticket-rsc-sets-14 "Ticket - Resource sets (2 tickets, standby, revoked)" echo "" do_test template-1 "Template - 1" do_test template-2 "Template - 2" do_test template-3 "Template - 3 (merge operations)" do_test template-coloc-1 "Template - Colocation 1" do_test template-coloc-2 "Template - Colocation 2" do_test template-coloc-3 "Template - Colocation 3" do_test template-order-1 "Template - Order 1" do_test template-order-2 "Template - Order 2" do_test template-order-3 "Template - Order 3" do_test template-ticket "Template - Ticket" do_test template-rsc-sets-1 "Template - Resource Sets 1" do_test template-rsc-sets-2 "Template - Resource Sets 2" do_test template-rsc-sets-3 "Template - Resource Sets 3" do_test template-rsc-sets-4 "Template - Resource Sets 4" echo "" test_results diff --git a/pengine/test10/bug-5025-1.dot b/pengine/test10/bug-5025-1.dot new file mode 100644 index 0000000000..8cae3a30e0 --- /dev/null +++ b/pengine/test10/bug-5025-1.dot @@ -0,0 +1,6 @@ + digraph "g" { +"A_clear_failcount_0 fc16-builder" [ style=bold color="green" fontcolor="black"] +"A_monitor_30000 fc16-builder" [ style=bold color="green" fontcolor="black"] +"A_reload_0 fc16-builder" -> "A_monitor_30000 fc16-builder" [ style = bold] +"A_reload_0 fc16-builder" [ style=bold color="green" fontcolor="black"] +} diff --git a/pengine/test10/bug-5025-1.exp b/pengine/test10/bug-5025-1.exp new file mode 100644 index 0000000000..de5732d98b --- /dev/null +++ b/pengine/test10/bug-5025-1.exp @@ -0,0 +1,34 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/pengine/test10/bug-5025-1.scores b/pengine/test10/bug-5025-1.scores new file mode 100644 index 0000000000..8ff812ccbb --- /dev/null +++ b/pengine/test10/bug-5025-1.scores @@ -0,0 +1,7 @@ +Allocation scores: +native_color: A allocation score on fc16-builder2: 0 +native_color: A allocation score on fc16-builder3: 0 +native_color: A allocation score on fc16-builder: 0 +native_color: virt-fencing allocation score on fc16-builder2: 0 +native_color: virt-fencing allocation score on fc16-builder3: 0 +native_color: virt-fencing allocation score on fc16-builder: INFINITY diff --git a/pengine/test10/bug-5025-1.xml b/pengine/test10/bug-5025-1.xml new file mode 100644 index 0000000000..f13fe714b3 --- /dev/null +++ b/pengine/test10/bug-5025-1.xml @@ -0,0 +1,71 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/pengine/test10/bug-5025-2.dot b/pengine/test10/bug-5025-2.dot new file mode 100644 index 0000000000..d8f1c9f22b --- /dev/null +++ b/pengine/test10/bug-5025-2.dot @@ -0,0 +1,2 @@ + digraph "g" { +} diff --git a/pengine/test10/bug-5025-2.exp b/pengine/test10/bug-5025-2.exp new file mode 100644 index 0000000000..b0e6e77e3b --- /dev/null +++ b/pengine/test10/bug-5025-2.exp @@ -0,0 +1,2 @@ + + diff --git a/pengine/test10/bug-5025-2.scores b/pengine/test10/bug-5025-2.scores new file mode 100644 index 0000000000..22e4e6d65b --- /dev/null +++ b/pengine/test10/bug-5025-2.scores @@ -0,0 +1,10 @@ +Allocation scores: +native_color: A allocation score on fc16-builder2: 0 +native_color: A allocation score on fc16-builder3: 0 +native_color: A allocation score on fc16-builder: 0 +native_color: B allocation score on fc16-builder2: 0 +native_color: B allocation score on fc16-builder3: 0 +native_color: B allocation score on fc16-builder: 0 +native_color: virt-fencing allocation score on fc16-builder2: 0 +native_color: virt-fencing allocation score on fc16-builder3: 0 +native_color: virt-fencing allocation score on fc16-builder: -INFINITY diff --git a/pengine/test10/bug-5025-2.xml b/pengine/test10/bug-5025-2.xml new file mode 100644 index 0000000000..a695cfc45e --- /dev/null +++ b/pengine/test10/bug-5025-2.xml @@ -0,0 +1,79 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/pengine/test10/bug-5025-3.dot b/pengine/test10/bug-5025-3.dot new file mode 100644 index 0000000000..783192c891 --- /dev/null +++ b/pengine/test10/bug-5025-3.dot @@ -0,0 +1,10 @@ + digraph "g" { +"A_clear_failcount_0 fc16-builder" [ style=bold color="green" fontcolor="black"] +"A_monitor_30000 fc16-builder" [ style=bold color="green" fontcolor="black"] +"A_start_0 fc16-builder" -> "A_monitor_30000 fc16-builder" [ style = bold] +"A_start_0 fc16-builder" [ style=bold color="green" fontcolor="black"] +"A_stop_0 fc16-builder" -> "A_start_0 fc16-builder" [ style = bold] +"A_stop_0 fc16-builder" -> "all_stopped" [ style = bold] +"A_stop_0 fc16-builder" [ style=bold color="green" fontcolor="black"] +"all_stopped" [ style=bold color="green" fontcolor="orange"] +} diff --git a/pengine/test10/bug-5025-3.exp b/pengine/test10/bug-5025-3.exp new file mode 100644 index 0000000000..9e9deb43a5 --- /dev/null +++ b/pengine/test10/bug-5025-3.exp @@ -0,0 +1,59 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/pengine/test10/bug-5025-3.scores b/pengine/test10/bug-5025-3.scores new file mode 100644 index 0000000000..22e4e6d65b --- /dev/null +++ b/pengine/test10/bug-5025-3.scores @@ -0,0 +1,10 @@ +Allocation scores: +native_color: A allocation score on fc16-builder2: 0 +native_color: A allocation score on fc16-builder3: 0 +native_color: A allocation score on fc16-builder: 0 +native_color: B allocation score on fc16-builder2: 0 +native_color: B allocation score on fc16-builder3: 0 +native_color: B allocation score on fc16-builder: 0 +native_color: virt-fencing allocation score on fc16-builder2: 0 +native_color: virt-fencing allocation score on fc16-builder3: 0 +native_color: virt-fencing allocation score on fc16-builder: -INFINITY diff --git a/pengine/test10/bug-5025-3.xml b/pengine/test10/bug-5025-3.xml new file mode 100644 index 0000000000..ffd17058a7 --- /dev/null +++ b/pengine/test10/bug-5025-3.xml @@ -0,0 +1,80 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tools/crm_mon.c b/tools/crm_mon.c index 5152fa5dfa..45b591f419 100644 --- a/tools/crm_mon.c +++ b/tools/crm_mon.c @@ -1,2289 +1,2290 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include <../lib/pengine/unpack.h> #include <../pengine/pengine.h> /* GMainLoop *mainloop = NULL; */ void wait_for_refresh(int offset, const char *prefix, int msec); void clean_up(int rc); void crm_diff_update(const char *event, xmlNode * msg); gboolean mon_refresh_display(gpointer user_data); int cib_connect(gboolean full); char *xml_file = NULL; char *as_html_file = NULL; int as_xml = 0; char *pid_file = NULL; char *snmp_target = NULL; char *snmp_community = NULL; gboolean as_console = TRUE;; gboolean simple_status = FALSE; gboolean group_by_node = FALSE; gboolean inactive_resources = FALSE; gboolean web_cgi = FALSE; int reconnect_msec = 5000; gboolean daemonize = FALSE; GMainLoop *mainloop = NULL; guint timer_id = 0; GList *attr_list = NULL; const char *crm_mail_host = NULL; const char *crm_mail_prefix = NULL; const char *crm_mail_from = NULL; const char *crm_mail_to = NULL; const char *external_agent = NULL; const char *external_recipient = NULL; cib_t *cib = NULL; xmlNode *current_cib = NULL; gboolean one_shot = FALSE; gboolean has_warnings = FALSE; gboolean print_failcount = FALSE; gboolean print_operations = FALSE; gboolean print_timing = FALSE; gboolean print_nodes_attr = FALSE; gboolean print_last_updated = TRUE; gboolean print_last_change = TRUE; gboolean print_tickets = FALSE; #define FILTER_STR {"shutdown", "terminate", "standby", "fail-count", \ "last-failure", "probe_complete", "#id", "#uname", \ "#is_dc", NULL} gboolean log_diffs = FALSE; gboolean log_updates = FALSE; long last_refresh = 0; crm_trigger_t *refresh_trigger = NULL; /* * 1.3.6.1.4.1.32723 has been assigned to the project by IANA * http://www.iana.org/assignments/enterprise-numbers */ #define PACEMAKER_PREFIX "1.3.6.1.4.1.32723" #define PACEMAKER_TRAP_PREFIX PACEMAKER_PREFIX ".1" #define snmp_crm_trap_oid PACEMAKER_TRAP_PREFIX #define snmp_crm_oid_node PACEMAKER_TRAP_PREFIX ".1" #define snmp_crm_oid_rsc PACEMAKER_TRAP_PREFIX ".2" #define snmp_crm_oid_task PACEMAKER_TRAP_PREFIX ".3" #define snmp_crm_oid_desc PACEMAKER_TRAP_PREFIX ".4" #define snmp_crm_oid_status PACEMAKER_TRAP_PREFIX ".5" #define snmp_crm_oid_rc PACEMAKER_TRAP_PREFIX ".6" #define snmp_crm_oid_trc PACEMAKER_TRAP_PREFIX ".7" #if CURSES_ENABLED # define print_dot() if(as_console) { \ printw("."); \ clrtoeol(); \ refresh(); \ } else { \ fprintf(stdout, "."); \ } #else # define print_dot() fprintf(stdout, "."); #endif #if CURSES_ENABLED # define print_as(fmt, args...) if(as_console) { \ printw(fmt, ##args); \ clrtoeol(); \ refresh(); \ } else { \ fprintf(stdout, fmt, ##args); \ } #else # define print_as(fmt, args...) fprintf(stdout, fmt, ##args); #endif static void blank_screen(void) { #if CURSES_ENABLED int lpc = 0; for (lpc = 0; lpc < LINES; lpc++) { move(lpc, 0); clrtoeol(); } move(0, 0); refresh(); #endif } static gboolean mon_timer_popped(gpointer data) { int rc = cib_ok; if (timer_id > 0) { g_source_remove(timer_id); } rc = cib_connect(TRUE); if (rc != cib_ok) { print_dot(); timer_id = g_timeout_add(reconnect_msec, mon_timer_popped, NULL); } return FALSE; } static void mon_cib_connection_destroy(gpointer user_data) { print_as("Connection to the CIB terminated\n"); if (cib) { print_as("Reconnecting..."); cib->cmds->signoff(cib); timer_id = g_timeout_add(reconnect_msec, mon_timer_popped, NULL); } return; } /* * Mainloop signal handler. */ static void mon_shutdown(int nsig) { clean_up(LSB_EXIT_OK); } #if ON_DARWIN # define sighandler_t sig_t #endif #if CURSES_ENABLED static sighandler_t ncurses_winch_handler; static void mon_winresize(int nsig) { static int not_done; int lines = 0, cols = 0; if (!not_done++) { if (ncurses_winch_handler) /* the original ncurses WINCH signal handler does the * magic of retrieving the new window size; * otherwise, we'd have to use ioctl or tgetent */ (*ncurses_winch_handler) (SIGWINCH); getmaxyx(stdscr, lines, cols); resizeterm(lines, cols); mainloop_set_trigger(refresh_trigger); } not_done--; } #endif int cib_connect(gboolean full) { int rc = cib_ok; static gboolean need_pass = TRUE; CRM_CHECK(cib != NULL, return cib_missing); if (getenv("CIB_passwd") != NULL) { need_pass = FALSE; } if (cib->state != cib_connected_query && cib->state != cib_connected_command) { crm_trace("Connecting to the CIB"); if (as_console && need_pass && cib->variant == cib_remote) { need_pass = FALSE; print_as("Password:"); } rc = cib->cmds->signon(cib, crm_system_name, cib_query); if (rc != cib_ok) { return rc; } current_cib = get_cib_copy(cib); mon_refresh_display(NULL); if (full) { if (rc == cib_ok) { rc = cib->cmds->set_connection_dnotify(cib, mon_cib_connection_destroy); if (rc == cib_NOTSUPPORTED) { print_as("Notification setup failed, won't be able to reconnect after failure"); if (as_console) { sleep(2); } rc = cib_ok; } } if (rc == cib_ok) { cib->cmds->del_notify_callback(cib, T_CIB_DIFF_NOTIFY, crm_diff_update); rc = cib->cmds->add_notify_callback(cib, T_CIB_DIFF_NOTIFY, crm_diff_update); } if (rc != cib_ok) { print_as("Notification setup failed, could not monitor CIB actions"); if (as_console) { sleep(2); } clean_up(-rc); } } } return rc; } /* *INDENT-OFF* */ static struct crm_option long_options[] = { /* Top-level Options */ {"help", 0, 0, '?', "\tThis text"}, {"version", 0, 0, '$', "\tVersion information" }, {"verbose", 0, 0, 'V', "\tIncrease debug output"}, {"quiet", 0, 0, 'Q', "\tDisplay only essential output" }, {"-spacer-", 1, 0, '-', "\nModes:"}, {"as-html", 1, 0, 'h', "Write cluster status to the named html file"}, - {"as-xml", 0, 0, 'X', "\tWrite cluster status as xml to stdout."}, + {"as-xml", 0, 0, 'X', "\tWrite cluster status as xml to stdout. This will enable one-shot mode."}, {"web-cgi", 0, 0, 'w', "\tWeb mode with output suitable for cgi"}, {"simple-status", 0, 0, 's', "Display the cluster status once as a simple one line output (suitable for nagios)"}, {"snmp-traps", 1, 0, 'S', "Send SNMP traps to this station", !ENABLE_SNMP}, {"snmp-community", 1, 0, 'C', "Specify community for SNMP traps(default is NULL)", !ENABLE_SNMP}, {"mail-to", 1, 0, 'T', "Send Mail alerts to this user. See also --mail-from, --mail-host, --mail-prefix", !ENABLE_ESMTP}, {"-spacer-", 1, 0, '-', "\nDisplay Options:"}, {"group-by-node", 0, 0, 'n', "\tGroup resources by node" }, {"inactive", 0, 0, 'r', "\tDisplay inactive resources" }, {"failcounts", 0, 0, 'f', "\tDisplay resource fail counts"}, {"operations", 0, 0, 'o', "\tDisplay resource operation history" }, {"timing-details", 0, 0, 't', "\tDisplay resource operation history with timing details" }, {"show-node-attributes", 0, 0, 'A', "Display node attributes" }, {"tickets", 0, 0, 'c', "\t\tDisplay cluster tickets"}, {"-spacer-", 1, 0, '-', "\nAdditional Options:"}, {"interval", 1, 0, 'i', "\tUpdate frequency in seconds" }, {"one-shot", 0, 0, '1', "\tDisplay the cluster status once on the console and exit"}, {"disable-ncurses",0, 0, 'N', "\tDisable the use of ncurses", !CURSES_ENABLED}, {"daemonize", 0, 0, 'd', "\tRun in the background as a daemon"}, {"pid-file", 1, 0, 'p', "\t(Advanced) Daemon pid file location"}, {"mail-from", 1, 0, 'F', "\tMail alerts should come from the named user", !ENABLE_ESMTP}, {"mail-host", 1, 0, 'H', "\tMail alerts should be sent via the named host", !ENABLE_ESMTP}, {"mail-prefix", 1, 0, 'P', "Subjects for mail alerts should start with this string", !ENABLE_ESMTP}, {"external-agent", 1, 0, 'E', "A program to run when resource operations take place."}, {"external-recipient",1, 0, 'e', "A recipient for your program (assuming you want the program to send something to someone)."}, {"xml-file", 1, 0, 'x', NULL, 1}, {"-spacer-", 1, 0, '-', "\nExamples:", pcmk_option_paragraph}, {"-spacer-", 1, 0, '-', "Display the cluster status on the console with updates as they occur:", pcmk_option_paragraph}, {"-spacer-", 1, 0, '-', " crm_mon", pcmk_option_example}, {"-spacer-", 1, 0, '-', "Display the cluster status on the console just once then exit:", pcmk_option_paragraph}, {"-spacer-", 1, 0, '-', " crm_mon -1", pcmk_option_example}, {"-spacer-", 1, 0, '-', "Display your cluster status, group resources by node, and include inactive resources in the list:", pcmk_option_paragraph}, {"-spacer-", 1, 0, '-', " crm_mon --group-by-node --inactive", pcmk_option_example}, {"-spacer-", 1, 0, '-', "Start crm_mon as a background daemon and have it write the cluster status to an HTML file:", pcmk_option_paragraph}, {"-spacer-", 1, 0, '-', " crm_mon --daemonize --as-html /path/to/docroot/filename.html", pcmk_option_example}, {"-spacer-", 1, 0, '-', "Start crm_mon and export the current cluster status as xml to stdout, then exit.:", pcmk_option_paragraph}, - {"-spacer-", 1, 0, '-', " crm_mon --one-shot --as-xml", pcmk_option_example}, + {"-spacer-", 1, 0, '-', " crm_mon --as-xml", pcmk_option_example}, {"-spacer-", 1, 0, '-', "Start crm_mon as a background daemon and have it send email alerts:", pcmk_option_paragraph|!ENABLE_ESMTP}, {"-spacer-", 1, 0, '-', " crm_mon --daemonize --mail-to user@example.com --mail-host mail.example.com", pcmk_option_example|!ENABLE_ESMTP}, {"-spacer-", 1, 0, '-', "Start crm_mon as a background daemon and have it send SNMP alerts:", pcmk_option_paragraph|!ENABLE_SNMP}, {"-spacer-", 1, 0, '-', " crm_mon --daemonize --snmp-traps snmptrapd.example.com", pcmk_option_example|!ENABLE_SNMP}, {NULL, 0, 0, 0} }; /* *INDENT-ON* */ int main(int argc, char **argv) { int flag; int argerr = 0; int exit_code = 0; int option_index = 0; pid_file = crm_strdup("/tmp/ClusterMon.pid"); crm_log_init_quiet(NULL, LOG_CRIT, FALSE, FALSE, argc, argv); crm_set_options(NULL, "mode [options]", long_options, "Provides a summary of cluster's current state." "\n\nOutputs varying levels of detail in a number of different formats.\n"); #ifndef ON_DARWIN /* prevent zombies */ signal(SIGCLD, SIG_IGN); #endif if (strcmp(crm_system_name, "crm_mon.cgi") == 0) { web_cgi = TRUE; one_shot = TRUE; } while (1) { flag = crm_get_option(argc, argv, &option_index); if (flag == -1) break; switch (flag) { case 'V': crm_bump_log_level(); break; case 'Q': print_last_updated = FALSE; print_last_change = FALSE; break; case 'i': reconnect_msec = crm_get_msec(optarg); break; case 'n': group_by_node = TRUE; break; case 'r': inactive_resources = TRUE; break; case 'd': daemonize = TRUE; break; case 't': print_timing = TRUE; print_operations = TRUE; break; case 'o': print_operations = TRUE; break; case 'f': print_failcount = TRUE; break; case 'A': print_nodes_attr = TRUE; break; case 'c': print_tickets = TRUE; break; case 'p': crm_free(pid_file); pid_file = crm_strdup(optarg); break; case 'x': xml_file = crm_strdup(optarg); one_shot = TRUE; break; case 'h': as_html_file = crm_strdup(optarg); break; case 'X': as_xml = TRUE; + one_shot = TRUE; break; case 'w': web_cgi = TRUE; one_shot = TRUE; break; case 's': simple_status = TRUE; one_shot = TRUE; break; case 'S': snmp_target = optarg; break; case 'T': crm_mail_to = optarg; break; case 'F': crm_mail_from = optarg; break; case 'H': crm_mail_host = optarg; break; case 'P': crm_mail_prefix = optarg; break; case 'E': external_agent = optarg; break; case 'e': external_recipient = optarg; break; case '1': one_shot = TRUE; break; case 'N': as_console = FALSE; break; case 'C': snmp_community = optarg; break; case '$': case '?': crm_help(flag, LSB_EXIT_OK); break; default: printf("Argument code 0%o (%c) is not (?yet?) supported\n", flag, flag); ++argerr; break; } } if (optind < argc) { printf("non-option ARGV-elements: "); while (optind < argc) printf("%s ", argv[optind++]); printf("\n"); } if (argerr) { crm_help('?', LSB_EXIT_GENERIC); } if (one_shot) { as_console = FALSE; } else if (daemonize) { as_console = FALSE; crm_enable_stderr(FALSE); if (!as_html_file && !snmp_target && !crm_mail_to && !external_agent && !as_xml) { printf ("Looks like you forgot to specify one or more of: --as-html, --as-xml, --mail-to, --snmp-target, --external-agent\n"); crm_help('?', LSB_EXIT_GENERIC); } crm_make_daemon(crm_system_name, TRUE, pid_file); } else if (as_console) { #if CURSES_ENABLED initscr(); cbreak(); noecho(); crm_enable_stderr(FALSE); #else one_shot = TRUE; as_console = FALSE; printf("Defaulting to one-shot mode\n"); printf("You need to have curses available at compile time to enable console mode\n"); #endif } crm_info("Starting %s", crm_system_name); if (xml_file != NULL) { current_cib = filename2xml(xml_file); mon_refresh_display(NULL); return exit_code; } if (current_cib == NULL) { cib = cib_new(); if (!one_shot) { print_as("Attempting connection to the cluster..."); } do { exit_code = cib_connect(!one_shot); if (one_shot) { break; } else if (exit_code != cib_ok) { print_dot(); sleep(reconnect_msec / 1000); } } while (exit_code == cib_connection); if (exit_code != cib_ok) { print_as("\nConnection to cluster failed: %s\n", cib_error2string(exit_code)); if (as_console) { sleep(2); } clean_up(-exit_code); } } if (one_shot) { return exit_code; } mainloop = g_main_new(FALSE); mainloop_add_signal(SIGTERM, mon_shutdown); mainloop_add_signal(SIGINT, mon_shutdown); #if CURSES_ENABLED if (as_console) { ncurses_winch_handler = signal(SIGWINCH, mon_winresize); if (ncurses_winch_handler == SIG_DFL || ncurses_winch_handler == SIG_IGN || ncurses_winch_handler == SIG_ERR) ncurses_winch_handler = NULL; } #endif refresh_trigger = mainloop_add_trigger(G_PRIORITY_LOW, mon_refresh_display, NULL); g_main_run(mainloop); g_main_destroy(mainloop); crm_info("Exiting %s", crm_system_name); clean_up(0); return 0; /* never reached */ } void wait_for_refresh(int offset, const char *prefix, int msec) { int lpc = msec / 1000; struct timespec sleept = { 1, 0 }; if (as_console == FALSE) { timer_id = g_timeout_add(msec, mon_timer_popped, NULL); return; } crm_notice("%sRefresh in %ds...", prefix ? prefix : "", lpc); while (lpc > 0) { #if CURSES_ENABLED move(offset, 0); /* printw("%sRefresh in \033[01;32m%ds\033[00m...", prefix?prefix:"", lpc); */ printw("%sRefresh in %ds...\n", prefix ? prefix : "", lpc); clrtoeol(); refresh(); #endif lpc--; if (lpc == 0) { timer_id = g_timeout_add(1000, mon_timer_popped, NULL); } else { if (nanosleep(&sleept, NULL) != 0) { return; } } } } #define mon_warn(fmt...) do { \ if (!has_warnings) { \ print_as("Warning:"); \ } else { \ print_as(","); \ } \ print_as(fmt); \ has_warnings = TRUE; \ } while(0) static int count_resources(pe_working_set_t * data_set, resource_t * rsc) { int count = 0; GListPtr gIter = NULL; if (rsc == NULL) { gIter = data_set->resources; } else if (rsc->children) { gIter = rsc->children; } else { return is_not_set(rsc->flags, pe_rsc_orphan); } for (; gIter != NULL; gIter = gIter->next) { count += count_resources(data_set, gIter->data); } return count; } static int print_simple_status(pe_working_set_t * data_set) { node_t *dc = NULL; GListPtr gIter = NULL; int nodes_online = 0; int nodes_standby = 0; dc = data_set->dc_node; if (dc == NULL) { mon_warn("No DC "); } for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; if (node->details->standby && node->details->online) { nodes_standby++; } else if (node->details->online) { nodes_online++; } else { mon_warn("offline node: %s", node->details->uname); } } if (!has_warnings) { print_as("Ok: %d nodes online", nodes_online); if (nodes_standby > 0) { print_as(", %d standby nodes", nodes_standby); } print_as(", %d resources configured", count_resources(data_set, NULL)); } print_as("\n"); return 0; } extern int get_failcount(node_t * node, resource_t * rsc, int *last_failure, pe_working_set_t * data_set); static void print_date(time_t time) { int lpc = 0; char date_str[26]; asctime_r(localtime(&time), date_str); for (; lpc < 26; lpc++) { if (date_str[lpc] == '\n') { date_str[lpc] = 0; } } print_as("'%s'", date_str); } static void print_rsc_summary(pe_working_set_t * data_set, node_t * node, resource_t * rsc, gboolean all) { gboolean printed = FALSE; time_t last_failure = 0; char *fail_attr = crm_concat("fail-count", rsc->id, '-'); const char *value = g_hash_table_lookup(node->details->attrs, fail_attr); int failcount = char2score(value); /* Get the true value, not the effective one from get_failcount() */ get_failcount(node, rsc, (int *)&last_failure, data_set); crm_free(fail_attr); if (all || failcount || last_failure > 0) { printed = TRUE; print_as(" %s: migration-threshold=%d", rsc->id, rsc->migration_threshold); } if (failcount > 0) { printed = TRUE; print_as(" fail-count=%d", failcount); } if (last_failure > 0) { printed = TRUE; print_as(" last-failure="); print_date(last_failure); } if (printed) { print_as("\n"); } } static void print_rsc_history(pe_working_set_t * data_set, node_t * node, xmlNode * rsc_entry) { GListPtr gIter = NULL; GListPtr op_list = NULL; gboolean print_name = TRUE; GListPtr sorted_op_list = NULL; const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); resource_t *rsc = pe_find_resource(data_set->resources, rsc_id); xmlNode *rsc_op = NULL; for (rsc_op = __xml_first_child(rsc_entry); rsc_op != NULL; rsc_op = __xml_next(rsc_op)) { if (crm_str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, TRUE)) { op_list = g_list_append(op_list, rsc_op); } } sorted_op_list = g_list_sort(op_list, sort_op_by_callid); for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) { xmlNode *xml_op = (xmlNode *) gIter->data; const char *value = NULL; const char *call = crm_element_value(xml_op, XML_LRM_ATTR_CALLID); const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); const char *op_rc = crm_element_value(xml_op, XML_LRM_ATTR_RC); const char *interval = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL); int rc = crm_parse_int(op_rc, "0"); if (safe_str_eq(task, CRMD_ACTION_STATUS) && safe_str_eq(interval, "0")) { task = "probe"; } if (rc == 7 && safe_str_eq(task, "probe")) { continue; } else if (safe_str_eq(task, CRMD_ACTION_NOTIFY)) { continue; } if (print_name) { print_name = FALSE; if (rsc == NULL) { print_as("Orphan resource: %s", rsc_id); } else { print_rsc_summary(data_set, node, rsc, TRUE); } } print_as(" + (%s) %s:", call, task); if (safe_str_neq(interval, "0")) { print_as(" interval=%sms", interval); } if (print_timing) { int int_value; const char *attr = "last-rc-change"; value = crm_element_value(xml_op, attr); if (value) { int_value = crm_parse_int(value, NULL); print_as(" %s=", attr); print_date(int_value); } attr = "last-run"; value = crm_element_value(xml_op, attr); if (value) { int_value = crm_parse_int(value, NULL); print_as(" %s=", attr); print_date(int_value); } attr = "exec-time"; value = crm_element_value(xml_op, attr); if (value) { int_value = crm_parse_int(value, NULL); print_as(" %s=%dms", attr, int_value); } attr = "queue-time"; value = crm_element_value(xml_op, attr); if (value) { int_value = crm_parse_int(value, NULL); print_as(" %s=%dms", attr, int_value); } } print_as(" rc=%s (%s)\n", op_rc, execra_code2string(rc)); } /* no need to free the contents */ g_list_free(sorted_op_list); } static void print_attr_msg(node_t * node, GListPtr rsc_list, const char *attrname, const char *attrvalue) { GListPtr gIter = NULL; for (gIter = rsc_list; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; const char *type = g_hash_table_lookup(rsc->meta, "type"); if (rsc->children != NULL) { print_attr_msg(node, rsc->children, attrname, attrvalue); } if (safe_str_eq(type, "ping") || safe_str_eq(type, "pingd")) { const char *name = "pingd"; const char *multiplier = NULL; char **host_list = NULL; int host_list_num = 0; int expected_score = 0; if (g_hash_table_lookup(rsc->meta, "name") != NULL) { name = g_hash_table_lookup(rsc->meta, "name"); } /* To identify the resource with the attribute name. */ if (safe_str_eq(name, attrname)) { int value = crm_parse_int(attrvalue, "0"); multiplier = g_hash_table_lookup(rsc->meta, "multiplier"); host_list = g_strsplit(g_hash_table_lookup(rsc->meta, "host_list"), " ", 0); host_list_num = g_strv_length(host_list); g_strfreev(host_list); /* pingd multiplier is the same as the default value. */ expected_score = host_list_num * crm_parse_int(multiplier, "1"); /* pingd is abnormal score. */ if (value <= 0) { print_as("\t: Connectivity is lost"); } else if (value < expected_score) { print_as("\t: Connectivity is degraded (Expected=%d)", expected_score); } } } } } static int compare_attribute(gconstpointer a, gconstpointer b) { int rc; rc = strcmp((const char *)a, (const char *)b); return rc; } static void create_attr_list(gpointer name, gpointer value, gpointer data) { int i; const char *filt_str[] = FILTER_STR; CRM_CHECK(name != NULL, return); /* filtering automatic attributes */ for (i = 0; filt_str[i] != NULL; i++) { if (g_str_has_prefix(name, filt_str[i])) { return; } } attr_list = g_list_insert_sorted(attr_list, name, compare_attribute); } static void print_node_attribute(gpointer name, gpointer node_data) { const char *value = NULL; node_t *node = (node_t *) node_data; value = g_hash_table_lookup(node->details->attrs, name); print_as(" + %-32s\t: %-10s", (char *)name, value); print_attr_msg(node, node->details->running_rsc, name, value); print_as("\n"); } static void print_node_summary(pe_working_set_t * data_set, gboolean operations) { xmlNode *lrm_rsc = NULL; xmlNode *rsc_entry = NULL; xmlNode *node_state = NULL; xmlNode *cib_status = get_object_root(XML_CIB_TAG_STATUS, data_set->input); if (operations) { print_as("\nOperations:\n"); } else { print_as("\nMigration summary:\n"); } for (node_state = __xml_first_child(cib_status); node_state != NULL; node_state = __xml_next(node_state)) { if (crm_str_eq((const char *)node_state->name, XML_CIB_TAG_STATE, TRUE)) { node_t *node = pe_find_node_id(data_set->nodes, ID(node_state)); if (node == NULL || node->details->online == FALSE) { continue; } print_as("* Node %s: ", crm_element_value(node_state, XML_ATTR_UNAME)); print_as("\n"); lrm_rsc = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE); lrm_rsc = find_xml_node(lrm_rsc, XML_LRM_TAG_RESOURCES, FALSE); for (rsc_entry = __xml_first_child(lrm_rsc); rsc_entry != NULL; rsc_entry = __xml_next(rsc_entry)) { if (crm_str_eq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE, TRUE)) { if (operations) { print_rsc_history(data_set, node, rsc_entry); } else { const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); resource_t *rsc = pe_find_resource(data_set->resources, rsc_id); if (rsc) { print_rsc_summary(data_set, node, rsc, FALSE); } else { print_as(" %s: orphan\n", rsc_id); } } } } } } } static void print_ticket(gpointer name, gpointer value, gpointer data) { ticket_t *ticket = (ticket_t *) value; print_as(" %s\t%s%10s", ticket->id, ticket->granted ? "granted":"revoked", ticket->standby ? " [standby]":""); if (ticket->last_granted > -1) { print_as(" last-granted="); print_date(ticket->last_granted); } print_as("\n"); return; } static void print_cluster_tickets(pe_working_set_t * data_set) { xmlNode *cib_constraints = get_object_root(XML_CIB_TAG_CONSTRAINTS, data_set->input); /* For recording the tickets that are referenced in rsc_ticket constraints * but have never been granted yet. */ unpack_constraints(cib_constraints, data_set); print_as("\nTickets:\n"); g_hash_table_foreach(data_set->tickets, print_ticket, NULL); return; } static char * add_list_element(char *list, const char *value) { int len = 0; int last = 0; if (value == NULL) { return list; } if (list) { last = strlen(list); } len = last + 2; /* +1 space, +1 EOS */ len += strlen(value); crm_realloc(list, len); sprintf(list + last, " %s", value); return list; } static int print_status(pe_working_set_t * data_set) { static int updates = 0; GListPtr gIter = NULL; node_t *dc = NULL; char *since_epoch = NULL; char *online_nodes = NULL; char *offline_nodes = NULL; xmlNode *dc_version = NULL; xmlNode *quorum_node = NULL; xmlNode *stack = NULL; time_t a_time = time(NULL); int print_opts = pe_print_ncurses; const char *quorum_votes = "unknown"; if (as_console) { blank_screen(); } else { print_opts = pe_print_printf; } updates++; dc = data_set->dc_node; print_as("============\n"); if (a_time == (time_t) - 1) { crm_perror(LOG_ERR, "set_node_tstamp(): Invalid time returned"); return 1; } since_epoch = ctime(&a_time); if (since_epoch != NULL && print_last_updated) { print_as("Last updated: %s", since_epoch); } if (print_last_change) { const char *last_written = crm_element_value(data_set->input, XML_CIB_ATTR_WRITTEN); const char *user = crm_element_value(data_set->input, XML_ATTR_UPDATE_USER); const char *client = crm_element_value(data_set->input, XML_ATTR_UPDATE_CLIENT); const char *origin = crm_element_value(data_set->input, XML_ATTR_UPDATE_ORIG); print_as("Last change: %s", last_written ? last_written : ""); if (user) { print_as(" by %s", user); } if (client) { print_as(" via %s", client); } if (origin) { print_as(" on %s", origin); } print_as("\n"); } stack = get_xpath_object("//nvpair[@name='cluster-infrastructure']", data_set->input, LOG_DEBUG); if (stack) { print_as("Stack: %s\n", crm_element_value(stack, XML_NVPAIR_ATTR_VALUE)); } dc_version = get_xpath_object("//nvpair[@name='dc-version']", data_set->input, LOG_DEBUG); if (dc == NULL) { print_as("Current DC: NONE\n"); } else { const char *quorum = crm_element_value(data_set->input, XML_ATTR_HAVE_QUORUM); if (safe_str_neq(dc->details->uname, dc->details->id)) { print_as("Current DC: %s (%s)", dc->details->uname, dc->details->id); } else { print_as("Current DC: %s", dc->details->uname); } print_as(" - partition %s quorum\n", crm_is_true(quorum) ? "with" : "WITHOUT"); if (dc_version) { print_as("Version: %s\n", crm_element_value(dc_version, XML_NVPAIR_ATTR_VALUE)); } } quorum_node = get_xpath_object("//nvpair[@name='" XML_ATTR_EXPECTED_VOTES "']", data_set->input, LOG_DEBUG); if (quorum_node) { quorum_votes = crm_element_value(quorum_node, XML_NVPAIR_ATTR_VALUE); } print_as("%d Nodes configured, %s expected votes\n", g_list_length(data_set->nodes), quorum_votes); print_as("%d Resources configured.\n", count_resources(data_set, NULL)); print_as("============\n\n"); for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; const char *node_mode = NULL; if (node->details->unclean) { if (node->details->online && node->details->unclean) { node_mode = "UNCLEAN (online)"; } else if (node->details->pending) { node_mode = "UNCLEAN (pending)"; } else { node_mode = "UNCLEAN (offline)"; } } else if (node->details->pending) { node_mode = "pending"; } else if (node->details->standby_onfail && node->details->online) { node_mode = "standby (on-fail)"; } else if (node->details->standby) { if (node->details->online) { node_mode = "standby"; } else { node_mode = "OFFLINE (standby)"; } } else if (node->details->online) { node_mode = "online"; if (group_by_node == FALSE) { online_nodes = add_list_element(online_nodes, node->details->uname); continue; } } else { node_mode = "OFFLINE"; if (group_by_node == FALSE) { offline_nodes = add_list_element(offline_nodes, node->details->uname); continue; } } if (safe_str_eq(node->details->uname, node->details->id)) { print_as("Node %s: %s\n", node->details->uname, node_mode); } else { print_as("Node %s (%s): %s\n", node->details->uname, node->details->id, node_mode); } if (group_by_node) { GListPtr gIter2 = NULL; for (gIter2 = node->details->running_rsc; gIter2 != NULL; gIter2 = gIter2->next) { resource_t *rsc = (resource_t *) gIter2->data; rsc->fns->print(rsc, "\t", print_opts | pe_print_rsconly, stdout); } } } if (online_nodes) { print_as("Online: [%s ]\n", online_nodes); crm_free(online_nodes); } if (offline_nodes) { print_as("OFFLINE: [%s ]\n", offline_nodes); crm_free(offline_nodes); } if (group_by_node == FALSE && inactive_resources) { print_as("\nFull list of resources:\n"); } else if (inactive_resources) { print_as("\nInactive resources:\n"); } if (group_by_node == FALSE || inactive_resources) { print_as("\n"); for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; gboolean is_active = rsc->fns->active(rsc, TRUE); gboolean partially_active = rsc->fns->active(rsc, FALSE); if (is_set(rsc->flags, pe_rsc_orphan) && is_active == FALSE) { continue; } else if (group_by_node == FALSE) { if (partially_active || inactive_resources) { rsc->fns->print(rsc, NULL, print_opts, stdout); } } else if (is_active == FALSE && inactive_resources) { rsc->fns->print(rsc, NULL, print_opts, stdout); } } } if (print_nodes_attr) { print_as("\nNode Attributes:\n"); for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; if (node == NULL || node->details->online == FALSE) { continue; } attr_list = NULL; print_as("* Node %s:\n", node->details->uname); g_hash_table_foreach(node->details->attrs, create_attr_list, NULL); g_list_foreach(attr_list, print_node_attribute, node); } } if (print_operations || print_failcount) { print_node_summary(data_set, print_operations); } if (xml_has_children(data_set->failed)) { xmlNode *xml_op = NULL; print_as("\nFailed actions:\n"); for (xml_op = __xml_first_child(data_set->failed); xml_op != NULL; xml_op = __xml_next(xml_op)) { int val = 0; const char *id = ID(xml_op); const char *op_key = crm_element_value(xml_op, XML_LRM_ATTR_TASK_KEY); const char *last = crm_element_value(xml_op, "last_run"); const char *node = crm_element_value(xml_op, XML_ATTR_UNAME); const char *call = crm_element_value(xml_op, XML_LRM_ATTR_CALLID); const char *rc = crm_element_value(xml_op, XML_LRM_ATTR_RC); const char *status = crm_element_value(xml_op, XML_LRM_ATTR_OPSTATUS); val = crm_parse_int(status, "0"); print_as(" %s (node=%s, call=%s, rc=%s, status=%s", op_key ? op_key : id, node, call, rc, op_status2text(val)); if (last) { time_t run_at = crm_parse_int(last, "0"); print_as(", last-run=%s, queued=%sms, exec=%sms\n", ctime(&run_at), crm_element_value(xml_op, "exec_time"), crm_element_value(xml_op, "queue_time")); } val = crm_parse_int(rc, "0"); print_as("): %s\n", execra_code2string(val)); } } if (print_tickets) { print_cluster_tickets(data_set); } #if CURSES_ENABLED if (as_console) { refresh(); } #endif return 0; } static int print_xml_status(pe_working_set_t * data_set) { FILE *stream = stdout; GListPtr gIter = NULL; node_t *dc = NULL; xmlNode *stack = NULL; xmlNode *quorum_node = NULL; const char *quorum_votes = "unknown"; dc = data_set->dc_node; fprintf(stream, "\n"); fprintf(stream, "\n", VERSION); /*** SUMMARY ***/ fprintf(stream, " \n"); if (print_last_updated) { time_t now = time(NULL); char *now_str = ctime(&now); now_str[24] = EOS; /* replace the newline */ fprintf(stream, " \n", now_str); } if (print_last_change) { const char *last_written = crm_element_value(data_set->input, XML_CIB_ATTR_WRITTEN); const char *user = crm_element_value(data_set->input, XML_ATTR_UPDATE_USER); const char *client = crm_element_value(data_set->input, XML_ATTR_UPDATE_CLIENT); const char *origin = crm_element_value(data_set->input, XML_ATTR_UPDATE_ORIG); fprintf(stream, " \n", last_written ? last_written : "", user ? user : "", client ? client : "", origin ? origin : ""); } stack = get_xpath_object("//nvpair[@name='cluster-infrastructure']", data_set->input, LOG_DEBUG); if (stack) { fprintf(stream, " \n", crm_element_value(stack, XML_NVPAIR_ATTR_VALUE)); } if (!dc) { fprintf(stream, " \n"); } else { const char *quorum = crm_element_value(data_set->input, XML_ATTR_HAVE_QUORUM); const char *uname = dc->details->uname; const char *id = dc->details->id; xmlNode *dc_version = get_xpath_object("//nvpair[@name='dc-version']", data_set->input, LOG_DEBUG); fprintf(stream, " \n", dc_version ? crm_element_value(dc_version, XML_NVPAIR_ATTR_VALUE) : "", uname, id, quorum ? (crm_is_true(quorum) ? "true" : "false") : "false"); } quorum_node = get_xpath_object("//nvpair[@name='" XML_ATTR_EXPECTED_VOTES "']", data_set->input, LOG_DEBUG); if (quorum_node) { quorum_votes = crm_element_value(quorum_node, XML_NVPAIR_ATTR_VALUE); } fprintf(stream, " \n", g_list_length(data_set->nodes), quorum_votes); fprintf(stream, " \n", count_resources(data_set, NULL)); fprintf(stream, " \n"); /*** NODES ***/ fprintf(stream, " \n"); for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; const char *node_type = "unknown"; switch (node->details->type) { case node_member: node_type = "member"; break; case node_ping: node_type = "ping"; break; } fprintf(stream, " details->uname); fprintf(stream, "id=\"%s\" ", node->details->id); fprintf(stream, "online=\"%s\" ", node->details->online ? "true" : "false"); fprintf(stream, "standby=\"%s\" ", node->details->standby ? "true" : "false"); fprintf(stream, "standby_onfail=\"%s\" ", node->details->standby_onfail ? "true" : "false"); fprintf(stream, "pending=\"%s\" ", node->details->pending ? "true" : "false"); fprintf(stream, "unclean=\"%s\" ", node->details->unclean ? "true" : "false"); fprintf(stream, "shutdown=\"%s\" ", node->details->shutdown ? "true" : "false"); fprintf(stream, "expected_up=\"%s\" ", node->details->expected_up ? "true" : "false"); fprintf(stream, "is_dc=\"%s\" ", node->details->is_dc ? "true" : "false"); fprintf(stream, "resources_running=\"%d\" ", g_list_length(node->details->running_rsc)); fprintf(stream, "type=\"%s\" ", node_type); if (group_by_node) { GListPtr lpc2 = NULL; fprintf(stream, ">\n"); for (lpc2 = node->details->running_rsc; lpc2 != NULL; lpc2 = lpc2->next) { resource_t *rsc = (resource_t *) lpc2->data; rsc->fns->print(rsc, " ", pe_print_xml | pe_print_rsconly, stream); } fprintf(stream, " \n"); } else { fprintf(stream, "/>\n"); } } fprintf(stream, " \n"); /*** RESOURCES ***/ if (group_by_node == FALSE || inactive_resources) { fprintf(stream, " \n"); for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; gboolean is_active = rsc->fns->active(rsc, TRUE); gboolean partially_active = rsc->fns->active(rsc, FALSE); if (is_set(rsc->flags, pe_rsc_orphan) && is_active == FALSE) { continue; } else if (group_by_node == FALSE) { if (partially_active || inactive_resources) { rsc->fns->print(rsc, " ", pe_print_xml, stream); } } else if (is_active == FALSE && inactive_resources) { rsc->fns->print(rsc, " ", pe_print_xml, stream); } } fprintf(stream, " \n"); } fprintf(stream, "\n"); fflush(stream); fclose(stream); return 0; } static int print_html_status(pe_working_set_t * data_set, const char *filename, gboolean web_cgi) { FILE *stream; GListPtr gIter = NULL; node_t *dc = NULL; static int updates = 0; char *filename_tmp = NULL; if (web_cgi) { stream = stdout; fprintf(stream, "Content-type: text/html\n\n"); } else { filename_tmp = crm_concat(filename, "tmp", '.'); stream = fopen(filename_tmp, "w"); if (stream == NULL) { crm_perror(LOG_ERR, "Cannot open %s for writing", filename_tmp); crm_free(filename_tmp); return -1; } } updates++; dc = data_set->dc_node; fprintf(stream, ""); fprintf(stream, ""); fprintf(stream, "Cluster status"); /* content="%d;url=http://webdesign.about.com" */ fprintf(stream, "", reconnect_msec / 1000); fprintf(stream, ""); /*** SUMMARY ***/ fprintf(stream, "

Cluster summary

"); { char *now_str = NULL; time_t now = time(NULL); now_str = ctime(&now); now_str[24] = EOS; /* replace the newline */ fprintf(stream, "Last updated: %s
\n", now_str); } if (dc == NULL) { fprintf(stream, "Current DC: NONE
"); } else { fprintf(stream, "Current DC: %s (%s)
", dc->details->uname, dc->details->id); } fprintf(stream, "%d Nodes configured.
", g_list_length(data_set->nodes)); fprintf(stream, "%d Resources configured.
", count_resources(data_set, NULL)); /*** CONFIG ***/ fprintf(stream, "

Config Options

\n"); fprintf(stream, "\n"); fprintf(stream, "\n", is_set(data_set->flags, pe_flag_stonith_enabled) ? "enabled" : "disabled"); fprintf(stream, "\n", is_set(data_set->flags, pe_flag_symmetric_cluster) ? "" : "a-"); fprintf(stream, "\n
STONITH of failed nodes:%s
Cluster is:%ssymmetric
No Quorum Policy:"); switch (data_set->no_quorum_policy) { case no_quorum_freeze: fprintf(stream, "Freeze resources"); break; case no_quorum_stop: fprintf(stream, "Stop ALL resources"); break; case no_quorum_ignore: fprintf(stream, "Ignore"); break; case no_quorum_suicide: fprintf(stream, "Suicide"); break; } fprintf(stream, "\n
\n"); /*** NODE LIST ***/ fprintf(stream, "

Node List

\n"); fprintf(stream, "
    \n"); for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; fprintf(stream, "
  • "); if (node->details->standby_onfail && node->details->online) { fprintf(stream, "Node: %s (%s): %s", node->details->uname, node->details->id, "standby (on-fail)\n"); } else if (node->details->standby && node->details->online) { fprintf(stream, "Node: %s (%s): %s", node->details->uname, node->details->id, "standby\n"); } else if (node->details->standby) { fprintf(stream, "Node: %s (%s): %s", node->details->uname, node->details->id, "OFFLINE (standby)\n"); } else if (node->details->online) { fprintf(stream, "Node: %s (%s): %s", node->details->uname, node->details->id, "online\n"); } else { fprintf(stream, "Node: %s (%s): %s", node->details->uname, node->details->id, "OFFLINE\n"); } if (group_by_node) { GListPtr lpc2 = NULL; fprintf(stream, "
      \n"); for (lpc2 = node->details->running_rsc; lpc2 != NULL; lpc2 = lpc2->next) { resource_t *rsc = (resource_t *) lpc2->data; fprintf(stream, "
    • "); rsc->fns->print(rsc, NULL, pe_print_html | pe_print_rsconly, stream); fprintf(stream, "
    • \n"); } fprintf(stream, "
    \n"); } fprintf(stream, "
  • \n"); } fprintf(stream, "
\n"); if (group_by_node && inactive_resources) { fprintf(stream, "

Inactive Resources

\n"); } else if (group_by_node == FALSE) { fprintf(stream, "

Resource List

\n"); } if (group_by_node == FALSE || inactive_resources) { for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; gboolean is_active = rsc->fns->active(rsc, TRUE); gboolean partially_active = rsc->fns->active(rsc, FALSE); if (is_set(rsc->flags, pe_rsc_orphan) && is_active == FALSE) { continue; } else if (group_by_node == FALSE) { if (partially_active || inactive_resources) { rsc->fns->print(rsc, NULL, pe_print_html, stream); } } else if (is_active == FALSE && inactive_resources) { rsc->fns->print(rsc, NULL, pe_print_html, stream); } } } fprintf(stream, ""); fflush(stream); fclose(stream); if (!web_cgi) { if (rename(filename_tmp, filename) != 0) { crm_perror(LOG_ERR, "Unable to rename %s->%s", filename_tmp, filename); } crm_free(filename_tmp); } return 0; } #if ENABLE_SNMP # include # include # include # include # include # include # define add_snmp_field(list, oid_string, value) do { \ oid name[MAX_OID_LEN]; \ size_t name_length = MAX_OID_LEN; \ if (snmp_parse_oid(oid_string, name, &name_length)) { \ int s_rc = snmp_add_var(list, name, name_length, 's', (value)); \ if(s_rc != 0) { \ crm_err("Could not add %s=%s rc=%d", oid_string, value, s_rc); \ } else { \ crm_trace("Added %s=%s", oid_string, value); \ } \ } else { \ crm_err("Could not parse OID: %s", oid_string); \ } \ } while(0) \ # define add_snmp_field_int(list, oid_string, value) do { \ oid name[MAX_OID_LEN]; \ size_t name_length = MAX_OID_LEN; \ if (snmp_parse_oid(oid_string, name, &name_length)) { \ if(NULL == snmp_pdu_add_variable( \ list, name, name_length, ASN_INTEGER, \ (u_char *) & value, sizeof(value))) { \ crm_err("Could not add %s=%d", oid_string, value); \ } else { \ crm_trace("Added %s=%d", oid_string, value); \ } \ } else { \ crm_err("Could not parse OID: %s", oid_string); \ } \ } while(0) \ static int snmp_input(int operation, netsnmp_session * session, int reqid, netsnmp_pdu * pdu, void *magic) { return 1; } static netsnmp_session * crm_snmp_init(const char *target, char *community) { static netsnmp_session *session = NULL; # ifdef NETSNMPV53 char target53[128]; snprintf(target53, sizeof(target53), "%s:162", target); # endif if (session) { return session; } if (target == NULL) { return NULL; } if (get_crm_log_level() > LOG_INFO) { char *debug_tokens = crm_strdup("run:shell,snmptrap,tdomain"); debug_register_tokens(debug_tokens); snmp_set_do_debugging(1); } crm_malloc0(session, sizeof(netsnmp_session)); snmp_sess_init(session); session->version = SNMP_VERSION_2c; session->callback = snmp_input; session->callback_magic = NULL; if (community) { session->community_len = strlen(community); session->community = (unsigned char *)community; } session = snmp_add(session, # ifdef NETSNMPV53 netsnmp_tdomain_transport(target53, 0, "udp"), # else netsnmp_transport_open_client("snmptrap", target), # endif NULL, NULL); if (session == NULL) { snmp_sess_perror("Could not create snmp transport", session); } return session; } #endif static int send_snmp_trap(const char *node, const char *rsc, const char *task, int target_rc, int rc, int status, const char *desc) { int ret = 1; #if ENABLE_SNMP static oid snmptrap_oid[] = { 1, 3, 6, 1, 6, 3, 1, 1, 4, 1, 0 }; static oid sysuptime_oid[] = { 1, 3, 6, 1, 2, 1, 1, 3, 0 }; netsnmp_pdu *trap_pdu; netsnmp_session *session = crm_snmp_init(snmp_target, snmp_community); trap_pdu = snmp_pdu_create(SNMP_MSG_TRAP2); if (!trap_pdu) { crm_err("Failed to create SNMP notification"); return SNMPERR_GENERR; } if (1) { /* send uptime */ char csysuptime[20]; time_t now = time(NULL); sprintf(csysuptime, "%ld", now); snmp_add_var(trap_pdu, sysuptime_oid, sizeof(sysuptime_oid) / sizeof(oid), 't', csysuptime); } /* Indicate what the trap is by setting snmpTrapOid.0 */ ret = snmp_add_var(trap_pdu, snmptrap_oid, sizeof(snmptrap_oid) / sizeof(oid), 'o', snmp_crm_trap_oid); if (ret != 0) { crm_err("Failed set snmpTrapOid.0=%s", snmp_crm_trap_oid); return ret; } /* Add extries to the trap */ add_snmp_field(trap_pdu, snmp_crm_oid_rsc, rsc); add_snmp_field(trap_pdu, snmp_crm_oid_node, node); add_snmp_field(trap_pdu, snmp_crm_oid_task, task); add_snmp_field(trap_pdu, snmp_crm_oid_desc, desc); add_snmp_field_int(trap_pdu, snmp_crm_oid_rc, rc); add_snmp_field_int(trap_pdu, snmp_crm_oid_trc, target_rc); add_snmp_field_int(trap_pdu, snmp_crm_oid_status, status); /* Send and cleanup */ ret = snmp_send(session, trap_pdu); if (ret == 0) { /* error */ snmp_sess_perror("Could not send SNMP trap", session); snmp_free_pdu(trap_pdu); ret = SNMPERR_GENERR; } else { ret = SNMPERR_SUCCESS; } #else crm_err("Sending SNMP traps is not supported by this installation"); #endif return ret; } #if ENABLE_ESMTP # include # include static void print_recipient_status(smtp_recipient_t recipient, const char *mailbox, void *arg) { const smtp_status_t *status; status = smtp_recipient_status(recipient); printf("%s: %d %s", mailbox, status->code, status->text); } static void event_cb(smtp_session_t session, int event_no, void *arg, ...) { int *ok; va_list alist; va_start(alist, arg); switch (event_no) { case SMTP_EV_CONNECT: case SMTP_EV_MAILSTATUS: case SMTP_EV_RCPTSTATUS: case SMTP_EV_MESSAGEDATA: case SMTP_EV_MESSAGESENT: case SMTP_EV_DISCONNECT: break; case SMTP_EV_WEAK_CIPHER:{ int bits = va_arg(alist, long); ok = va_arg(alist, int *); crm_debug("SMTP_EV_WEAK_CIPHER, bits=%d - accepted.", bits); *ok = 1; break; } case SMTP_EV_STARTTLS_OK: crm_debug("SMTP_EV_STARTTLS_OK - TLS started here."); break; case SMTP_EV_INVALID_PEER_CERTIFICATE:{ long vfy_result = va_arg(alist, long); ok = va_arg(alist, int *); /* There is a table in handle_invalid_peer_certificate() of mail-file.c */ crm_err("SMTP_EV_INVALID_PEER_CERTIFICATE: %ld", vfy_result); *ok = 1; break; } case SMTP_EV_NO_PEER_CERTIFICATE: ok = va_arg(alist, int *); crm_debug("SMTP_EV_NO_PEER_CERTIFICATE - accepted."); *ok = 1; break; case SMTP_EV_WRONG_PEER_CERTIFICATE: ok = va_arg(alist, int *); crm_debug("SMTP_EV_WRONG_PEER_CERTIFICATE - accepted."); *ok = 1; break; case SMTP_EV_NO_CLIENT_CERTIFICATE: ok = va_arg(alist, int *); crm_debug("SMTP_EV_NO_CLIENT_CERTIFICATE - accepted."); *ok = 1; break; default: crm_debug("Got event: %d - ignored.\n", event_no); } va_end(alist); } #endif #define BODY_MAX 2048 #if ENABLE_ESMTP static void crm_smtp_debug(const char *buf, int buflen, int writing, void *arg) { char type = 0; int lpc = 0, last = 0, level = *(int *)arg; if (writing == SMTP_CB_HEADERS) { type = 'H'; } else if (writing) { type = 'C'; } else { type = 'S'; } for (; lpc < buflen; lpc++) { switch (buf[lpc]) { case 0: case '\n': if (last > 0) { do_crm_log(level, " %.*s", lpc - last, buf + last); } else { do_crm_log(level, "%c: %.*s", type, lpc - last, buf + last); } last = lpc + 1; break; } } } #endif static int send_custom_trap(const char *node, const char *rsc, const char *task, int target_rc, int rc, int status, const char *desc) { pid_t pid; /*setenv needs chars, these are ints */ char *rc_s = crm_itoa(rc); char *status_s = crm_itoa(status); char *target_rc_s = crm_itoa(target_rc); crm_debug("Sending external notification to '%s' via '%s'", external_recipient, external_agent); setenv("CRM_notify_recipient", external_recipient, 1); setenv("CRM_notify_node", node, 1); setenv("CRM_notify_rsc", rsc, 1); setenv("CRM_notify_task", task, 1); setenv("CRM_notify_desc", desc, 1); setenv("CRM_notify_rc", rc_s, 1); setenv("CRM_notify_target_rc", target_rc_s, 1); setenv("CRM_notify_status", status_s, 1); pid = fork(); if (pid == -1) { cl_perror("notification fork() failed."); } if (pid == 0) { /* crm_debug("notification: I am the child. Executing the nofitication program."); */ execl(external_agent, external_agent, NULL); } crm_trace("Finished running custom notification program '%s'.", external_agent); crm_free(target_rc_s); crm_free(status_s); crm_free(rc_s); return 0; } static int send_smtp_trap(const char *node, const char *rsc, const char *task, int target_rc, int rc, int status, const char *desc) { #if ENABLE_ESMTP smtp_session_t session; smtp_message_t message; auth_context_t authctx; struct sigaction sa; int len = 20; int noauth = 1; int smtp_debug = LOG_DEBUG; char crm_mail_body[BODY_MAX]; char *crm_mail_subject = NULL; memset(&sa, 0, sizeof(struct sigaction)); if (node == NULL) { node = "-"; } if (rsc == NULL) { rsc = "-"; } if (desc == NULL) { desc = "-"; } if (crm_mail_to == NULL) { return 1; } if (crm_mail_host == NULL) { crm_mail_host = "localhost:25"; } if (crm_mail_prefix == NULL) { crm_mail_prefix = "Cluster notification"; } crm_debug("Sending '%s' mail to %s via %s", crm_mail_prefix, crm_mail_to, crm_mail_host); len += strlen(crm_mail_prefix); len += strlen(task); len += strlen(rsc); len += strlen(node); len += strlen(desc); len++; crm_malloc0(crm_mail_subject, len); snprintf(crm_mail_subject, len, "%s - %s event for %s on %s: %s\r\n", crm_mail_prefix, task, rsc, node, desc); len = 0; len += snprintf(crm_mail_body + len, BODY_MAX - len, "\r\n%s\r\n", crm_mail_prefix); len += snprintf(crm_mail_body + len, BODY_MAX - len, "====\r\n\r\n"); if (rc == target_rc) { len += snprintf(crm_mail_body + len, BODY_MAX - len, "Completed operation %s for resource %s on %s\r\n", task, rsc, node); } else { len += snprintf(crm_mail_body + len, BODY_MAX - len, "Operation %s for resource %s on %s failed: %s\r\n", task, rsc, node, desc); } len += snprintf(crm_mail_body + len, BODY_MAX - len, "\r\nDetails:\r\n"); len += snprintf(crm_mail_body + len, BODY_MAX - len, "\toperation status: (%d) %s\r\n", status, op_status2text(status)); if (status == LRM_OP_DONE) { len += snprintf(crm_mail_body + len, BODY_MAX - len, "\tscript returned: (%d) %s\r\n", rc, execra_code2string(rc)); len += snprintf(crm_mail_body + len, BODY_MAX - len, "\texpected return value: (%d) %s\r\n", target_rc, execra_code2string(target_rc)); } auth_client_init(); session = smtp_create_session(); message = smtp_add_message(session); smtp_starttls_enable(session, Starttls_ENABLED); sa.sa_handler = SIG_IGN; sigemptyset(&sa.sa_mask); sa.sa_flags = 0; sigaction(SIGPIPE, &sa, NULL); smtp_set_server(session, crm_mail_host); authctx = auth_create_context(); auth_set_mechanism_flags(authctx, AUTH_PLUGIN_PLAIN, 0); smtp_set_eventcb(session, event_cb, NULL); /* Now tell libESMTP it can use the SMTP AUTH extension. */ if (!noauth) { crm_debug("Adding authentication context"); smtp_auth_set_context(session, authctx); } if (crm_mail_from == NULL) { struct utsname us; char auto_from[BODY_MAX]; CRM_ASSERT(uname(&us) == 0); snprintf(auto_from, BODY_MAX, "crm_mon@%s", us.nodename); smtp_set_reverse_path(message, auto_from); } else { /* NULL is ok */ smtp_set_reverse_path(message, crm_mail_from); } smtp_set_header(message, "To", NULL /*phrase */ , NULL /*addr */ ); /* "Phrase" */ smtp_add_recipient(message, crm_mail_to); /* Set the Subject: header and override any subject line in the message headers. */ smtp_set_header(message, "Subject", crm_mail_subject); smtp_set_header_option(message, "Subject", Hdr_OVERRIDE, 1); smtp_set_message_str(message, crm_mail_body); smtp_set_monitorcb(session, crm_smtp_debug, &smtp_debug, 1); if (smtp_start_session(session)) { char buf[128]; int rc = smtp_errno(); crm_err("SMTP server problem: %s (%d)", smtp_strerror(rc, buf, sizeof buf), rc); } else { char buf[128]; int rc = smtp_errno(); const smtp_status_t *smtp_status = smtp_message_transfer_status(message); if (rc != 0) { crm_err("SMTP server problem: %s (%d)", smtp_strerror(rc, buf, sizeof buf), rc); } crm_info("Send status: %d %s", smtp_status->code, crm_str(smtp_status->text)); smtp_enumerate_recipients(message, print_recipient_status, NULL); } smtp_destroy_session(session); auth_destroy_context(authctx); auth_client_exit(); #endif return 0; } static void handle_rsc_op(xmlNode * rsc_op) { int rc = -1; int status = -1; int action = -1; int interval = 0; int target_rc = -1; int transition_num = -1; gboolean notify = TRUE; char *rsc = NULL; char *task = NULL; const char *desc = NULL; const char *node = NULL; const char *magic = NULL; const char *id = crm_element_value(rsc_op, XML_LRM_ATTR_TASK_KEY); char *update_te_uuid = NULL; xmlNode *n = rsc_op; if (id == NULL) { /* Compatability with <= 1.1.5 */ id = ID(rsc_op); } magic = crm_element_value(rsc_op, XML_ATTR_TRANSITION_MAGIC); if (magic == NULL) { /* non-change */ return; } if (FALSE == decode_transition_magic(magic, &update_te_uuid, &transition_num, &action, &status, &rc, &target_rc)) { crm_err("Invalid event %s detected for %s", magic, id); return; } if (parse_op_key(id, &rsc, &task, &interval) == FALSE) { crm_err("Invalid event detected for %s", id); goto bail; } while (n != NULL && safe_str_neq(XML_CIB_TAG_STATE, TYPE(n))) { n = n->parent; } node = crm_element_value(n, XML_ATTR_UNAME); if (node == NULL) { node = ID(n); } if (node == NULL) { crm_err("No node detected for event %s (%s)", magic, id); goto bail; } /* look up where we expected it to be? */ desc = cib_error2string(cib_ok); if (status == LRM_OP_DONE && target_rc == rc) { crm_notice("%s of %s on %s completed: %s", task, rsc, node, desc); if (rc == EXECRA_NOT_RUNNING) { notify = FALSE; } } else if (status == LRM_OP_DONE) { desc = execra_code2string(rc); crm_warn("%s of %s on %s failed: %s", task, rsc, node, desc); } else { desc = op_status2text(status); crm_warn("%s of %s on %s failed: %s", task, rsc, node, desc); } if (notify && snmp_target) { send_snmp_trap(node, rsc, task, target_rc, rc, status, desc); } if (notify && crm_mail_to) { send_smtp_trap(node, rsc, task, target_rc, rc, status, desc); } if (notify && external_agent) { send_custom_trap(node, rsc, task, target_rc, rc, status, desc); } bail: crm_free(update_te_uuid); crm_free(rsc); crm_free(task); } void crm_diff_update(const char *event, xmlNode * msg) { int rc = -1; long now = time(NULL); const char *op = NULL; unsigned int log_level = LOG_INFO; xmlNode *diff = NULL; xmlNode *cib_last = NULL; xmlNode *update = get_message_xml(msg, F_CIB_UPDATE); print_dot(); if (msg == NULL) { crm_err("NULL update"); return; } crm_element_value_int(msg, F_CIB_RC, &rc); op = crm_element_value(msg, F_CIB_OPERATION); diff = get_message_xml(msg, F_CIB_UPDATE_RESULT); if (rc < cib_ok) { log_level = LOG_WARNING; do_crm_log(log_level, "[%s] %s ABORTED: %s", event, op, cib_error2string(rc)); return; } if (current_cib != NULL) { cib_last = current_cib; current_cib = NULL; rc = cib_process_diff(op, cib_force_diff, NULL, NULL, diff, cib_last, ¤t_cib, NULL); if (rc != cib_ok) { crm_debug("Update didn't apply, requesting full copy: %s", cib_error2string(rc)); free_xml(current_cib); current_cib = NULL; } } if (current_cib == NULL) { current_cib = get_cib_copy(cib); } if (log_diffs && diff) { log_cib_diff(LOG_DEBUG, diff, op); } if (log_updates && update != NULL) { crm_log_xml_debug(update, "raw_update"); } if (diff && (crm_mail_to || snmp_target || external_agent)) { /* Process operation updates */ xmlXPathObject *xpathObj = xpath_search(diff, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_LRM_TAG_RSC_OP); if (xpathObj && xpathObj->nodesetval->nodeNr > 0) { int lpc = 0, max = xpathObj->nodesetval->nodeNr; for (lpc = 0; lpc < max; lpc++) { xmlNode *rsc_op = getXpathResult(xpathObj, lpc); handle_rsc_op(rsc_op); } } if (xpathObj) { xmlXPathFreeObject(xpathObj); } } if ((now - last_refresh) > (reconnect_msec / 1000)) { /* Force a refresh */ mon_refresh_display(NULL); } else { mainloop_set_trigger(refresh_trigger); } free_xml(cib_last); } gboolean mon_refresh_display(gpointer user_data) { xmlNode *cib_copy = copy_xml(current_cib); pe_working_set_t data_set; last_refresh = time(NULL); if (cli_config_update(&cib_copy, NULL, FALSE) == FALSE) { if (cib) { cib->cmds->signoff(cib); } print_as("Upgrade failed: %s", cib_error2string(cib_dtd_validation)); if (as_console) { sleep(2); } clean_up(LSB_EXIT_GENERIC); return FALSE; } set_working_set_defaults(&data_set); data_set.input = cib_copy; cluster_status(&data_set); if (as_html_file || web_cgi) { if (print_html_status(&data_set, as_html_file, web_cgi) != 0) { fprintf(stderr, "Critical: Unable to output html file\n"); clean_up(LSB_EXIT_GENERIC); } } else if (as_xml) { if (print_xml_status(&data_set) != 0) { fprintf(stderr, "Critical: Unable to output xml file\n"); clean_up(LSB_EXIT_GENERIC); } } else if (daemonize) { /* do nothing */ } else if (simple_status) { print_simple_status(&data_set); if (has_warnings) { clean_up(LSB_EXIT_GENERIC); } } else { print_status(&data_set); } cleanup_calculations(&data_set); return TRUE; } /* * De-init ncurses, signoff from the CIB and deallocate memory. */ void clean_up(int rc) { #if ENABLE_SNMP netsnmp_session *session = crm_snmp_init(NULL, NULL); if (session) { snmp_close(session); snmp_shutdown("snmpapp"); } #endif #if CURSES_ENABLED if (as_console) { as_console = FALSE; echo(); nocbreak(); endwin(); } #endif if (cib != NULL) { cib->cmds->signoff(cib); cib_delete(cib); cib = NULL; } crm_free(as_html_file); crm_free(xml_file); crm_free(pid_file); if (rc >= 0) { exit(rc); } return; }