diff --git a/cts/CM_lha.py b/cts/CM_lha.py index 77c3bb593c..90adb09de2 100755 --- a/cts/CM_lha.py +++ b/cts/CM_lha.py @@ -1,628 +1,600 @@ '''CTS: Cluster Testing System: LinuxHA v2 dependent modules... ''' __copyright__=''' Author: Huang Zhen Copyright (C) 2004 International Business Machines Additional Audits, Revised Start action, Default Configuration: Copyright (C) 2004 Andrew Beekhof ''' # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. import os, sys, warnings from cts import CTS from cts.CTSvars import * from cts.CTS import * from cts.CIB import * from cts.CTStests import AuditResource try: from xml.dom.minidom import * except ImportError: sys.__stdout__.write("Python module xml.dom.minidom not found\n") sys.__stdout__.write("Please install python-xml or similar before continuing\n") sys.__stdout__.flush() sys.exit(1) ####################################################################### # # LinuxHA v2 dependent modules # ####################################################################### class crm_lha(ClusterManager): ''' The linux-ha version 2 cluster manager class. It implements the things we need to talk to and manipulate linux-ha version 2 clusters ''' def __init__(self, Environment, randseed=None): ClusterManager.__init__(self, Environment, randseed=randseed) #HeartbeatCM.__init__(self, Environment, randseed=randseed) self.fastfail = 0 self.clear_cache = 0 self.cib_installed = 0 self.config = None self.cluster_monitor = 0 self.use_short_names = 1 self.update({ "Name" : "crm-lha", "DeadTime" : 300, "StartTime" : 300, # Max time to start up "StableTime" : 30, "StartCmd" : CTSvars.INITDIR+"/heartbeat start > /dev/null 2>&1", "StopCmd" : CTSvars.INITDIR+"/heartbeat stop > /dev/null 2>&1", "ElectionCmd" : "crmadmin -E %s", "StatusCmd" : "crmadmin -t 60000 -S %s 2>/dev/null", "EpocheCmd" : "crm_node -H -e", "QuorumCmd" : "crm_node -H -q", "ParitionCmd" : "crm_node -H -p", "CibQuery" : "cibadmin -Ql", # 300,000 == 5 minutes - "ExecuteRscOp" : "lrmadmin -n %s -E %s %s 300000 %d EVERYTIME 2>&1", + "RscRunning" : CTSvars.CTS_home + "/lrmd_test -R -r %s", "CIBfile" : "%s:"+CTSvars.CRM_CONFIG_DIR+"/cib.xml", "TmpDir" : "/tmp", "BreakCommCmd" : "iptables -A INPUT -s %s -j DROP >/dev/null 2>&1", "FixCommCmd" : "iptables -D INPUT -s %s -j DROP >/dev/null 2>&1", # tc qdisc add dev lo root handle 1: cbq avpkt 1000 bandwidth 1000mbit # tc class add dev lo parent 1: classid 1:1 cbq rate "$RATE"kbps allot 17000 prio 5 bounded isolated # tc filter add dev lo parent 1: protocol ip prio 16 u32 match ip dst 127.0.0.1 match ip sport $PORT 0xFFFF flowid 1:1 # tc qdisc add dev lo parent 1: netem delay "$LATENCY"msec "$(($LATENCY/4))"msec 10% 2> /dev/null > /dev/null "ReduceCommCmd" : "", "RestoreCommCmd" : "tc qdisc del dev lo root", "LogFileName" : Environment["LogFileName"], "UUIDQueryCmd" : "crmadmin -N", "StandbyCmd" : "crm_attribute -Q -U %s -n standby -l forever -v %s 2>/dev/null", "StandbyQueryCmd" : "crm_attribute -GQ -U %s -n standby -l forever -d off 2>/dev/null", # Patterns to look for in the log files for various occasions... "Pat:DC_IDLE" : "crmd.*State transition.*-> S_IDLE", # This wont work if we have multiple partitions "Pat:Local_started" : "%s .*The local CRM is operational", "Pat:Slave_started" : "%s .*State transition.*-> S_NOT_DC", "Pat:Master_started" : "%s .* State transition.*-> S_IDLE", "Pat:We_stopped" : "heartbeat.*%s.*Heartbeat shutdown complete", "Pat:Logd_stopped" : "%s logd:.*Exiting write process", "Pat:They_stopped" : "%s .*LOST:.* %s ", "Pat:They_dead" : "node %s.*: is dead", "Pat:TransitionComplete" : "Transition status: Complete: complete", "Pat:ChildKilled" : "%s heartbeat.*%s.*killed by signal 9", "Pat:ChildRespawn" : "%s heartbeat.*Respawning client.*%s", "Pat:ChildExit" : "(ERROR|error): Client .* exited with return code", "Pat:We_fenced" : "crmd.* Executing .* fencing operation .* on %s", "Pat:They_fenced" : "stonith.* log_operation: Operation .* for host '%s' with device .* returned: 0", "Pat:They_fenced_offset" : "for host '", # Bad news Regexes. Should never occur. "BadRegexes" : ( r" trace:", r"error:", r"crit:", r"ERROR:", r"CRIT:", r"Shutting down...NOW", r"Timer I_TERMINATE just popped", r"input=I_ERROR", r"input=I_FAIL", r"input=I_INTEGRATED cause=C_TIMER_POPPED", r"input=I_FINALIZED cause=C_TIMER_POPPED", r"input=I_ERROR", r", exiting\.", r"WARN.*Ignoring HA message.*vote.*not in our membership list", r"pengine.*Attempting recovery of resource", r"is taking more than 2x its timeout", r"Confirm not received from", r"Welcome reply not received from", r"Attempting to schedule .* after a stop", r"Resource .* was active at shutdown", r"duplicate entries for call_id", r"Search terminated:", r"No need to invoke the TE", r"global_timer_callback:", r"Faking parameter digest creation", r"Parameters to .* action changed:", r"Parameters to .* changed", ), }) if self.Env["DoBSC"]: del self["Pat:They_stopped"] del self["Pat:Logd_stopped"] self.Env["use_logd"] = 0 self._finalConditions() self.check_transitions = 0 self.check_elections = 0 self.CIBsync = {} self.CibFactory = ConfigFactory(self) self.cib = self.CibFactory.createConfig(self.Env["Schema"]) def errorstoignore(self): # At some point implement a more elegant solution that # also produces a report at the end '''Return list of errors which are known and very noisey should be ignored''' if 1: return [ "(ERROR|error): crm_abort: crm_glib_handler: ", "(ERROR|error): Message hist queue is filling up", "stonithd.*CRIT: external_hostlist: 'vmware gethosts' returned an empty hostlist", "stonithd.*(ERROR|error): Could not list nodes for stonith RA external/vmware.", "pengine.*Preventing .* from re-starting", ] return [] def install_config(self, node): if not self.ns.WaitForNodeToComeUp(node): self.log("Node %s is not up." % node) return None if not self.CIBsync.has_key(node) and self.Env["ClobberCIB"] == 1: self.CIBsync[node] = 1 self.rsh(node, "rm -f "+CTSvars.CRM_CONFIG_DIR+"/cib*") # Only install the CIB on the first node, all the other ones will pick it up from there if self.cib_installed == 1: return None self.cib_installed = 1 if self.Env["CIBfilename"] == None: self.log("Installing Generated CIB on node %s" %(node)) self.cib.install(node) else: self.log("Installing CIB (%s) on node %s" %(self.Env["CIBfilename"], node)) if 0 != self.rsh.cp(self.Env["CIBfilename"], "root@" + (self["CIBfile"]%node)): raise ValueError("Can not scp file to %s %d"%(node)) self.rsh(node, "chown "+CTSvars.CRM_DAEMON_USER+" "+CTSvars.CRM_CONFIG_DIR+"/cib.xml") def prepare(self): '''Finish the Initialization process. Prepare to test...''' self.partitions_expected = 1 for node in self.Env["nodes"]: self.ShouldBeStatus[node] = "" self.unisolate_node(node) self.StataCM(node) def test_node_CM(self, node): '''Report the status of the cluster manager on a given node''' watchpats = [ ] watchpats.append("Current ping state: (S_IDLE|S_NOT_DC)") watchpats.append(self["Pat:Slave_started"]%node) watchpats.append(self["Pat:Master_started"]%node) idle_watch = CTS.LogWatcher(self.Env, self["LogFileName"], watchpats, "ClusterIdle") idle_watch.setwatch() out = self.rsh(node, self["StatusCmd"]%node, 1) self.debug("Node %s status: '%s'" %(node, out)) if not out or string.find(out, 'ok') < 0: if self.ShouldBeStatus[node] == "up": self.log( "Node status for %s is %s but we think it should be %s" %(node, "down", self.ShouldBeStatus[node])) self.ShouldBeStatus[node]="down" return 0 if self.ShouldBeStatus[node] == "down": self.log( "Node status for %s is %s but we think it should be %s: %s" %(node, "up", self.ShouldBeStatus[node], out)) self.ShouldBeStatus[node]="up" # check the output first - because syslog-ng looses messages if string.find(out, 'S_NOT_DC') != -1: # Up and stable return 2 if string.find(out, 'S_IDLE') != -1: # Up and stable return 2 # fall back to syslog-ng and wait if not idle_watch.look(): # just up self.debug("Warn: Node %s is unstable: %s" %(node, out)) return 1 # Up and stable return 2 # Is the node up or is the node down def StataCM(self, node): '''Report the status of the cluster manager on a given node''' if self.test_node_CM(node) > 0: return 1 return None # Being up and being stable is not the same question... def node_stable(self, node): '''Report the status of the cluster manager on a given node''' if self.test_node_CM(node) == 2: return 1 self.log("Warn: Node %s not stable" %(node)) return None def partition_stable(self, nodes, timeout=None): watchpats = [ ] watchpats.append("Current ping state: S_IDLE") watchpats.append(self["Pat:DC_IDLE"]) self.debug("Waiting for cluster stability...") if timeout == None: timeout = self["DeadTime"] idle_watch = CTS.LogWatcher(self.Env, self["LogFileName"], watchpats, "ClusterStable", timeout) idle_watch.setwatch() any_up = 0 for node in self.Env["nodes"]: # have each node dump its current state if self.ShouldBeStatus[node] == "up": self.rsh(node, self["StatusCmd"] %node, 1) any_up = 1 if any_up == 0: self.debug("Cluster is inactive") return 1 ret = idle_watch.look() while ret: self.debug(ret) for node in nodes: if re.search(node, ret): return 1 ret = idle_watch.look() self.debug("Warn: Partition %s not IDLE after %ds" % (repr(nodes), timeout)) return None def cluster_stable(self, timeout=None, double_check=False): partitions = self.find_partitions() for partition in partitions: if not self.partition_stable(partition, timeout): return None if double_check: # Make sure we are really stable and that all resources, # including those that depend on transient node attributes, # are started if they were going to be time.sleep(5) for partition in partitions: if not self.partition_stable(partition, timeout): return None return 1 def is_node_dc(self, node, status_line=None): rc = 0 if not status_line: status_line = self.rsh(node, self["StatusCmd"]%node, 1) if not status_line: rc = 0 elif string.find(status_line, 'S_IDLE') != -1: rc = 1 elif string.find(status_line, 'S_INTEGRATION') != -1: rc = 1 elif string.find(status_line, 'S_FINALIZE_JOIN') != -1: rc = 1 elif string.find(status_line, 'S_POLICY_ENGINE') != -1: rc = 1 elif string.find(status_line, 'S_TRANSITION_ENGINE') != -1: rc = 1 return rc def active_resources(self, node): # [SM].* {node} matches Started, Slave, Master # Stopped wont be matched as it wont include {node} (rc, output) = self.rsh(node, """crm_resource -c""", None) resources = [] for line in output: if re.search("^Resource", line): tmp = AuditResource(self, line) if tmp.type == "primitive" and tmp.host == node: resources.append(tmp.id) return resources - def ResourceOp(self, resource, op, node, interval=0, app="lrmadmin"): - ''' - Execute an operation on a resource - ''' - cmd = self["ExecuteRscOp"] % (app, resource, op, interval) - (rc, lines) = self.rsh(node, cmd, None) - - if rc == 127: - self.log("Command '%s' failed. Binary not installed?" % cmd) - for line in lines: - self.log("Output: "+line) - - return rc - def ResourceLocation(self, rid): ResourceNodes = [] for node in self.Env["nodes"]: if self.ShouldBeStatus[node] == "up": - dummy = 0 - rc = self.ResourceOp(rid, "monitor", node) - # Strange error codes from remote_py - # 65024 == not installed - # 2048 == 8 - # 1792 == 7 - # 0 == 0 - if rc == 127: - dummy = 1 - elif rc == 254 or rc == 65024: - dummy = 1 - #self.debug("%s is not installed on %s: %d" % (rid, node, rc)) + cmd = self["RscRunning"] % (rid) + (rc, lines) = self.rsh(node, cmd, None) - elif rc == 0 or rc == 2048 or rc == 8: + if rc == 127: + self.log("Command '%s' failed. Binary not installed?" % cmd) + for line in lines: + self.log("Output: "+line) + elif rc == 0: ResourceNodes.append(node) - elif rc == 7 or rc == 1792: - dummy = 1 - #self.debug("%s is not running on %s: %d" % (rid, node, rc)) - - else: - # not active on this node? - self.log("Unknown rc code for %s on %s: %d" % (rid, node, rc)) - return ResourceNodes def find_partitions(self): ccm_partitions = [] for node in self.Env["nodes"]: if self.ShouldBeStatus[node] == "up": partition = self.rsh(node, self["ParitionCmd"], 1) if not partition: self.log("no partition details for %s" %node) elif len(partition) > 2: partition = partition[:-1] found=0 for a_partition in ccm_partitions: if partition == a_partition: found = 1 if found == 0: self.debug("Adding partition from %s: %s" %(node, partition)) ccm_partitions.append(partition) else: self.debug("Partition '%s' from %s is consistent with existing entries" %(partition, node)) else: self.log("bad partition details for %s" %node) else: self.debug("Node %s is down... skipping" %node) return ccm_partitions def HasQuorum(self, node_list): # If we are auditing a partition, then one side will # have quorum and the other not. # So the caller needs to tell us which we are checking # If no value for node_list is specified... assume all nodes if not node_list: node_list = self.Env["nodes"] for node in node_list: if self.ShouldBeStatus[node] == "up": quorum = self.rsh(node, self["QuorumCmd"], 1) if string.find(quorum, "1") != -1: return 1 elif string.find(quorum, "0") != -1: return 0 else: self.debug("WARN: Unexpected quorum test result from "+ node +":"+ quorum) return 0 def Components(self): complist = [] common_ignore = [ "Pending action:", "(ERROR|error): crm_log_message_adv:", "(ERROR|error): MSG: No message to dump", "pending LRM operations at shutdown", "Lost connection to the CIB service", "Connection to the CIB terminated...", "Sending message to CIB service FAILED", "Action A_RECOVER .* not supported", "(ERROR|error): stonithd_op_result_ready: not signed on", "pingd.*(ERROR|error): send_update: Could not send update", "send_ipc_message: IPC Channel to .* is not connected", "unconfirmed_actions: Waiting on .* unconfirmed actions", "cib_native_msgready: Message pending on command channel", "do_exit: Performing A_EXIT_1 - forcefully exiting the CRMd", "verify_stopped: Resource .* was active at shutdown. You may ignore this error if it is unmanaged.", ] stonith_ignore = [ "(ERROR|error): stonithd_signon: ", "update_failcount: Updating failcount for child_DoFencing", "(ERROR|error): te_connect_stonith: Sign-in failed: triggered a retry", "lrmd.*(ERROR|error): cl_get_value: wrong argument (reply)", "lrmd.*(ERROR|error): is_expected_msg:.* null message", "lrmd.*(ERROR|error): stonithd_receive_ops_result failed.", ] stonith_ignore.extend(common_ignore) ccm_ignore = [ "(ERROR|error): get_channel_token: No reply message - disconnected" ] ccm_ignore.extend(common_ignore) ccm = Process(self, "ccm", triggersreboot=self.fastfail, pats = [ "State transition .* S_RECOVERY", "CCM connection appears to have failed", "crmd.*Action A_RECOVER .* not supported", "crmd.*Input I_TERMINATE from do_recover", "Exiting to recover from CCM connection failure", "crmd.*do_exit: Could not recover from internal error", "crmd.*I_ERROR.*(ccm_dispatch|crmd_cib_connection_destroy)", "crmd.*exited with return code 2.", "attrd.*exited with return code 1.", "cib.*exited with return code 2.", # Not if it was fenced # "A new node joined the cluster", # "WARN: determine_online_status: Node .* is unclean", # "Scheduling Node .* for STONITH", # "Executing .* fencing operation", # "tengine_stonith_callback: .*result=0", # "Processing I_NODE_JOIN:.* cause=C_HA_MESSAGE", # "State transition S_.* -> S_INTEGRATION.*input=I_NODE_JOIN", "State transition S_STARTING -> S_PENDING", ], badnews_ignore = ccm_ignore) cib = Process(self, "cib", triggersreboot=self.fastfail, pats = [ "State transition .* S_RECOVERY", "Lost connection to the CIB service", "Connection to the CIB terminated...", "crmd.*Input I_TERMINATE from do_recover", "crmd.*I_ERROR.*crmd_cib_connection_destroy", "crmd.*do_exit: Could not recover from internal error", "crmd.*exited with return code 2.", "attrd.*exited with return code 1.", ], badnews_ignore = common_ignore) lrmd = Process(self, "lrmd", triggersreboot=self.fastfail, pats = [ "State transition .* S_RECOVERY", "LRM Connection failed", "crmd.*I_ERROR.*lrm_connection_destroy", "State transition S_STARTING -> S_PENDING", "crmd.*Input I_TERMINATE from do_recover", "crmd.*do_exit: Could not recover from internal error", "crmd.*exited with return code 2.", ], badnews_ignore = common_ignore) crmd = Process(self, "crmd", triggersreboot=self.fastfail, pats = [ # "WARN: determine_online_status: Node .* is unclean", # "Scheduling Node .* for STONITH", # "Executing .* fencing operation", # "tengine_stonith_callback: .*result=0", "State transition .* S_IDLE", "State transition S_STARTING -> S_PENDING", ], badnews_ignore = common_ignore) pengine = Process(self, "pengine", triggersreboot=self.fastfail, pats = [ "State transition .* S_RECOVERY", "crmd.*exited with return code 2.", "crmd.*Input I_TERMINATE from do_recover", "crmd.*do_exit: Could not recover from internal error", "crmd.*CRIT: pe_connection_destroy: Connection to the Policy Engine failed", "crmd.*I_ERROR.*save_cib_contents", "crmd.*exited with return code 2.", ], badnews_ignore = common_ignore, dc_only=1) if self.Env["DoFencing"] == 1 : complist.append(Process(self, "stoniths", triggersreboot=self.fastfail, dc_pats = [ "crmd.*CRIT: tengine_stonith_connection_destroy: Fencing daemon connection failed", "Attempting connection to fencing daemon", "te_connect_stonith: Connected", ], badnews_ignore = stonith_ignore)) if self.fastfail == 0: ccm.pats.extend([ "attrd .* exited with return code 1", "(ERROR|error): Respawning client .*attrd", "cib.* exited with return code 2", "(ERROR|error): Respawning client .*cib", "crmd.* exited with return code 2", "(ERROR|error): Respawning client .*crmd" ]) cib.pats.extend([ "attrd.* exited with return code 1", "(ERROR|error): Respawning client .*attrd", "crmd.* exited with return code 2", "(ERROR|error): Respawning client .*crmd" ]) lrmd.pats.extend([ "crmd.* exited with return code 2", "(ERROR|error): Respawning client .*crmd" ]) pengine.pats.extend([ "(ERROR|error): Respawning client .*crmd" ]) complist.append(ccm) complist.append(cib) complist.append(lrmd) complist.append(crmd) complist.append(pengine) return complist def NodeUUID(self, node): lines = self.rsh(node, self["UUIDQueryCmd"], 1) for line in lines: self.debug("UUIDLine:"+ line) m = re.search(r'%s.+\((.+)\)' % node, line) if m: return m.group(1) return "" def StandbyStatus(self, node): out=self.rsh(node, self["StandbyQueryCmd"]%node, 1) if not out: return "off" out = out[:-1] self.debug("Standby result: "+out) return out # status == "on" : Enter Standby mode # status == "off": Enter Active mode def SetStandbyMode(self, node, status): current_status = self.StandbyStatus(node) cmd = self["StandbyCmd"] % (node, status) ret = self.rsh(node, cmd) return True ####################################################################### # # A little test code... # # Which you are advised to completely ignore... # ####################################################################### if __name__ == '__main__': pass diff --git a/include/crm/lrmd.h b/include/crm/lrmd.h index 1b6b7005e1..c42da62224 100644 --- a/include/crm/lrmd.h +++ b/include/crm/lrmd.h @@ -1,413 +1,415 @@ /* * Copyright (c) 2012 David Vossel * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * */ #ifndef LRMD__H #define LRMD__H typedef struct lrmd_s lrmd_t; typedef struct lrmd_key_value_s lrmd_key_value_t; struct lrmd_key_value_t; /* *INDENT-OFF* */ #define F_LRMD_OPERATION "lrmd_op" #define F_LRMD_CLIENTNAME "lrmd_clientname" #define F_LRMD_CLIENTID "lrmd_clientid" #define F_LRMD_CALLBACK_TOKEN "lrmd_async_id" #define F_LRMD_CALLID "lrmd_callid" #define F_LRMD_CANCEL_CALLID "lrmd_cancel_callid" #define F_LRMD_CALLOPTS "lrmd_callopt" #define F_LRMD_CALLDATA "lrmd_calldata" #define F_LRMD_RC "lrmd_rc" #define F_LRMD_EXEC_RC "lrmd_exec_rc" #define F_LRMD_OP_STATUS "lrmd_exec_op_status" #define F_LRMD_TIMEOUT "lrmd_timeout" #define F_LRMD_CLASS "lrmd_class" #define F_LRMD_PROVIDER "lrmd_provider" #define F_LRMD_TYPE "lrmd_type" #define F_LRMD_ORIGIN "lrmd_origin" #define F_LRMD_RSC_RUN_TIME "lrmd_run_time" #define F_LRMD_RSC_RCCHANGE_TIME "lrmd_rcchange_time" #define F_LRMD_RSC_EXEC_TIME "lrmd_exec_time" #define F_LRMD_RSC_QUEUE_TIME "lrmd_queue_time" #define F_LRMD_RSC_ID "lrmd_rsc_id" #define F_LRMD_RSC_ACTION "lrmd_rsc_action" #define F_LRMD_RSC_USERDATA_STR "lrmd_rsc_userdata_str" #define F_LRMD_RSC_OUTPUT "lrmd_rsc_output" #define F_LRMD_RSC_START_DELAY "lrmd_rsc_start_delay" #define F_LRMD_RSC_INTERVAL "lrmd_rsc_interval" #define F_LRMD_RSC_METADATA "lrmd_rsc_metadata_res" #define F_LRMD_RSC_DELETED "lrmd_rsc_deleted" #define F_LRMD_RSC "lrmd_rsc" #define LRMD_OP_RSC_CHK_REG "lrmd_rsc_check_register" #define LRMD_OP_RSC_REG "lrmd_rsc_register" #define LRMD_OP_RSC_EXEC "lrmd_rsc_exec" #define LRMD_OP_RSC_CANCEL "lrmd_rsc_cancel" #define LRMD_OP_RSC_UNREG "lrmd_rsc_unregister" #define LRMD_OP_RSC_INFO "lrmd_rsc_info" #define LRMD_OP_RSC_METADATA "lrmd_rsc_metadata" #define T_LRMD "lrmd" #define T_LRMD_REPLY "lrmd_reply" #define T_LRMD_NOTIFY "lrmd_notify" /* *INDENT-ON* */ lrmd_t *lrmd_api_new(void); bool lrmd_dispatch(lrmd_t *lrmd); void lrmd_api_delete(lrmd_t * lrmd); lrmd_key_value_t *lrmd_key_value_add(lrmd_key_value_t *kvp, const char *key, const char *value); /* *INDENT-OFF* */ /* Reserved for future use */ enum lrmd_call_options { lrmd_opt_none = 0x00000000, /* lrmd_opt_sync_call = 0x00000001, //Not implemented, patches welcome. */ + /*! Only notify the client originating a exec() the results */ + lrmd_opt_notify_orig_only = 0x00000002, }; enum lrmd_errors { lrmd_ok = 0, lrmd_pending = -1, lrmd_err_generic = -2, lrmd_err_internal = -3, lrmd_err_not_supported = -4, lrmd_err_connection = -5, lrmd_err_missing = -6, lrmd_err_exists = -7, lrmd_err_timeout = -8, lrmd_err_ipc = -9, lrmd_err_peer = -10, lrmd_err_unknown_operation = -11, lrmd_err_unknown_rsc = -12, lrmd_err_none_available = -13, lrmd_err_authentication = -14, lrmd_err_signal = -15, lrmd_err_exec_failed = -16, lrmd_err_no_metadata = -17, lrmd_err_stonith_connection = -18, lrmd_err_provider_required = -19, }; enum lrmd_callback_event { lrmd_event_register, lrmd_event_unregister, lrmd_event_exec_complete, lrmd_event_disconnect, }; enum lrmd_exec_rc { PCMK_EXECRA_OK = 0, PCMK_EXECRA_UNKNOWN_ERROR = 1, PCMK_EXECRA_INVALID_PARAM = 2, PCMK_EXECRA_UNIMPLEMENT_FEATURE = 3, PCMK_EXECRA_INSUFFICIENT_PRIV = 4, PCMK_EXECRA_NOT_INSTALLED = 5, PCMK_EXECRA_NOT_CONFIGURED = 6, PCMK_EXECRA_NOT_RUNNING = 7, PCMK_EXECRA_RUNNING_MASTER = 8, PCMK_EXECRA_FAILED_MASTER = 9, /* For status command only */ PCMK_EXECRA_STATUS_UNKNOWN = 14, }; /* *INDENT-ON* */ typedef struct lrmd_event_data_s { /*! Type of event, register, unregister, call_completed... */ enum lrmd_callback_event type; /*! The resource this event occurred on. */ const char *rsc_id; /*! The action performed, start, stop, monitor... */ const char *op_type; /*! The userdata string given do exec() api function */ const char *user_data; /*! The client api call id associated with this event */ int call_id; /*! The operation's timeout period in ms. */ int timeout; /*! The operation's recurring interval in ms. */ int interval; /*! The operation's start delay value in ms. */ int start_delay; /*! This operation that just completed is on a deleted rsc. */ int rsc_deleted; /*! The executed ra return code */ enum lrmd_exec_rc rc; /*! The lrmd status returned for exec_complete events */ int op_status; /*! stdout from resource agent operation */ const char *output; /*! Timestamp of when op ran */ unsigned int t_run; /*! Timestamp of last rc change */ unsigned int t_rcchange; /*! Time in length op took to execute */ unsigned int exec_time; /*! Time in length spent in queue */ unsigned int queue_time; /* This is a GHashTable containing the * parameters given to the operation */ void *params; } lrmd_event_data_t; lrmd_event_data_t *lrmd_copy_event(lrmd_event_data_t *event); void lrmd_free_event(lrmd_event_data_t *event); typedef struct lrmd_rsc_info_s { char *id; char *type; char *class; char *provider; } lrmd_rsc_info_t; lrmd_rsc_info_t *lrmd_copy_rsc_info(lrmd_rsc_info_t *rsc_info); void lrmd_free_rsc_info(lrmd_rsc_info_t *rsc_info); typedef void (*lrmd_event_callback)(lrmd_event_data_t *event); typedef struct lrmd_list_s { const char *val; struct lrmd_list_s *next; } lrmd_list_t; void lrmd_list_freeall(lrmd_list_t *head); typedef struct lrmd_api_operations_s { /*! * \brief Connect from the lrmd. * * \retval 0, success * \retval negative error code on failure */ int (*connect) (lrmd_t *lrmd, const char *client_name, int *fd); /*! * \brief Disconnect from the lrmd. * * \retval 0, success * \retval negative error code on failure */ int (*disconnect)(lrmd_t *lrmd); /*! * \brief Register a resource with the lrmd. * * \note Synchronous, guaranteed to occur in daemon before function returns. * * \retval 0, success * \retval negative error code on failure */ int (*register_rsc) (lrmd_t *lrmd, const char *rsc_id, const char *class, const char *provider, const char *agent, enum lrmd_call_options options); /*! * \brief Retrieve registration info for a rsc * * \retval info on success * \retval NULL on failure */ lrmd_rsc_info_t *(*get_rsc_info) (lrmd_t *lrmd, const char *rsc_id, enum lrmd_call_options options); /*! * \brief Unregister a resource from the lrmd. * * \note All pending and recurring operations will be cancelled * automatically. * * \note Synchronous, guaranteed to occur in daemon before function returns. * * \retval 0, success * \retval -1, success, but operations are currently executing on the rsc which will * return once they are completed. * \retval negative error code on failure * */ int (*unregister_rsc) (lrmd_t *lrmd, const char *rsc_id, enum lrmd_call_options options); /*! * \brief Sets the callback to receive lrmd events on. */ void (*set_callback) (lrmd_t *lrmd, lrmd_event_callback callback); /*! * \brief Issue a command on a resource * * \note Asynchronous, command is queued in daemon on function return, but * execution of command is not synced. * * \note Operations on individual resources are guaranteed to occur * in the order the client api calls them in. * * \note Operations between different resources are not guaranteed * to occur in any specific order in relation to one another * regardless of what order the client api is called in. * \retval call_id to track async event result on success * \retval negative error code on failure */ int (*exec)(lrmd_t *lrmd, const char *rsc_id, const char *action, const char *userdata, /* userdata string given back in event notification */ int interval, /* ms */ int timeout, /* ms */ int start_delay, /* ms */ enum lrmd_call_options options, lrmd_key_value_t *params); /* ownership of params is given up to api here */ /*! * \brief Cancel a recurring command. * * \note Synchronous, guaranteed to occur in daemon before function returns. * * \note The cancel is completed async from this call. * We can be guaranteed the cancel has completed once * the callback receives an exec_complete event with * the lrmd_op_status signifying that the operation is * cancelled. * \note For each resource, cancel operations and exec operations * are processed in the order they are received. * It is safe to assume that for a single resource, a cancel * will occur in the lrmd before an exec if the client's cancel * api call occurs before the exec api call. * * It is not however safe to assume any operation on one resource will * occur before an operation on another resource regardless of * the order the client api is called in. * * \retval 0, cancel command sent. * \retval negative error code on failure */ int (*cancel)(lrmd_t *lrmd, const char *rsc_id, const char *action, int interval); /*! * \brief Get the metadata documentation for a resource. * * \note Value is returned in output. Output must be freed when set * * \retval lrmd_ok success * \retval negative error code on failure */ int (*get_metadata) (lrmd_t *lrmd, const char *class, const char *provider, const char *agent, char **output, enum lrmd_call_options options); /*! * \brief Retrieve a list of installed resource agents. * * \note if class is not provided, all known agents will be returned * \note list must be freed using lrmd_list_freeall() * * \retval num items in list on success * \retval negative error code on failure */ int (*list_agents)(lrmd_t *lrmd, lrmd_list_t **agents, const char *class, const char *provider); /*! * \brief Retrieve a list of resource agent providers * * \note When the agent is provided, only the agent's provider will be returned * \note When no agent is supplied, all providers will be returned. * \note List must be freed using lrmd_list_freeall() * * \retval num items in list on success * \retval negative error code on failure */ int (*list_ocf_providers)(lrmd_t *lrmd, const char *agent, lrmd_list_t **providers); } lrmd_api_operations_t; struct lrmd_s { lrmd_api_operations_t *cmds; void *private; }; static inline const char * lrmd_event_rc2str(enum lrmd_exec_rc rc) { switch(rc) { case PCMK_EXECRA_OK: return "ok"; case PCMK_EXECRA_UNKNOWN_ERROR: return "unknown error"; case PCMK_EXECRA_INVALID_PARAM: return "invalid parameter"; case PCMK_EXECRA_UNIMPLEMENT_FEATURE: return "unimplemented feature"; case PCMK_EXECRA_INSUFFICIENT_PRIV: return "insufficient privileges"; case PCMK_EXECRA_NOT_INSTALLED: return "not installed"; case PCMK_EXECRA_NOT_CONFIGURED: return "not configured"; case PCMK_EXECRA_NOT_RUNNING: return "not running"; case PCMK_EXECRA_RUNNING_MASTER: return "master"; case PCMK_EXECRA_FAILED_MASTER: return "master (failed)"; case PCMK_EXECRA_STATUS_UNKNOWN: return "status: unknown"; default: break; } return ""; } static inline const char * lrmd_event_type2str(enum lrmd_callback_event type) { switch (type) { case lrmd_event_register: return "register"; case lrmd_event_unregister: return "unregister"; case lrmd_event_exec_complete: return "exec_complete"; case lrmd_event_disconnect: return "disconnect"; } return "unknown"; } #endif diff --git a/lrmd/Makefile.am b/lrmd/Makefile.am index 0be956e459..a7b1a81264 100644 --- a/lrmd/Makefile.am +++ b/lrmd/Makefile.am @@ -1,35 +1,40 @@ # Copyright (c) 2012 David Vossel # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2.1 of the License, or (at your option) any later version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # MAINTAINERCLEANFILES = Makefile.in lrmdlibdir = $(CRM_DAEMON_DIR) +lrmdctsdir = $(datadir)/$(PACKAGE)/tests/cts + ## binary progs -lrmdlib_PROGRAMS = lrmd -noinst_PROGRAMS = lrmd_test +lrmdlib_PROGRAMS = lrmd +lrmdcts_PROGRAMS = lrmd_test lrmd_SOURCES = main.c lrmd.c lrmd_LDADD = $(top_builddir)/lib/common/libcrmcommon.la \ $(top_builddir)/lib/services/libcrmservice.la \ $(top_builddir)/lib/lrmd/liblrmd.la \ $(top_builddir)/lib/fencing/libstonithd.la lrmd_test_SOURCES = test.c lrmd_test_LDADD = $(top_builddir)/lib/common/libcrmcommon.la \ $(top_builddir)/lib/lrmd/liblrmd.la \ - $(top_builddir)/lib/services/libcrmservice.la + $(top_builddir)/lib/cib/libcib.la \ + $(top_builddir)/lib/services/libcrmservice.la \ + $(top_builddir)/lib/pengine/libpe_status.la \ + $(top_builddir)/pengine/libpengine.la diff --git a/lrmd/lrmd.c b/lrmd/lrmd.c index d336ece2de..b66a4e857d 100644 --- a/lrmd/lrmd.c +++ b/lrmd/lrmd.c @@ -1,968 +1,983 @@ /* * Copyright (c) 2012 David Vossel * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * */ #include #include #include #include #include #include #include #include #include #ifdef HAVE_SYS_TIMEB_H #include #endif GHashTable *rsc_list = NULL; GHashTable *client_list = NULL; typedef struct lrmd_cmd_s { int timeout; int interval; int start_delay; int call_id; int exec_rc; int lrmd_op_status; /* Timer ids, must be removed on cmd destruction. */ int delay_id; int stonith_recurring_id; int rsc_deleted; + char *only_notify_client; char *origin; char *rsc_id; char *action; char *output; char *userdata_str; #ifdef HAVE_SYS_TIMEB_H /* Timestamp of when op ran */ struct timeb t_run; /* Timestamp of when op was queued */ struct timeb t_queue; /* Timestamp of last rc change */ struct timeb t_rcchange; #endif GHashTable *params; } lrmd_cmd_t; static void cmd_finalize(lrmd_cmd_t *cmd, lrmd_rsc_t *rsc); static gboolean lrmd_rsc_dispatch(gpointer user_data); static lrmd_rsc_t * build_rsc_from_xml(xmlNode *msg) { xmlNode *rsc_xml = get_xpath_object("//"F_LRMD_RSC, msg, LOG_ERR); lrmd_rsc_t *rsc = NULL; rsc = calloc(1, sizeof(lrmd_rsc_t)); rsc->rsc_id = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ID); rsc->class = crm_element_value_copy(rsc_xml, F_LRMD_CLASS); rsc->provider = crm_element_value_copy(rsc_xml, F_LRMD_PROVIDER); rsc->type = crm_element_value_copy(rsc_xml, F_LRMD_TYPE); rsc->work = mainloop_add_trigger(G_PRIORITY_HIGH, lrmd_rsc_dispatch, rsc); return rsc; } static lrmd_cmd_t * -create_lrmd_cmd(xmlNode *msg) +create_lrmd_cmd(xmlNode *msg, lrmd_client_t *client) { + int call_options = 0; xmlNode *rsc_xml = get_xpath_object("//"F_LRMD_RSC, msg, LOG_ERR); lrmd_cmd_t *cmd = NULL; cmd = calloc(1, sizeof(lrmd_cmd_t)); + crm_element_value_int(msg, F_LRMD_CALLOPTS, &call_options); + + if (call_options & lrmd_opt_notify_orig_only) { + cmd->only_notify_client = crm_strdup(client->id); + } + crm_element_value_int(msg, F_LRMD_CALLID, &cmd->call_id); crm_element_value_int(rsc_xml, F_LRMD_RSC_INTERVAL, &cmd->interval); crm_element_value_int(rsc_xml, F_LRMD_TIMEOUT, &cmd->timeout); crm_element_value_int(rsc_xml, F_LRMD_RSC_START_DELAY, &cmd->start_delay); cmd->origin = crm_element_value_copy(rsc_xml, F_LRMD_ORIGIN); cmd->action = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ACTION); cmd->userdata_str = crm_element_value_copy(rsc_xml, F_LRMD_RSC_USERDATA_STR); cmd->rsc_id = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ID); cmd->params = xml2list(rsc_xml); return cmd; } static void free_lrmd_cmd(lrmd_cmd_t *cmd) { if (cmd->stonith_recurring_id) { g_source_remove(cmd->stonith_recurring_id); } if (cmd->delay_id) { g_source_remove(cmd->delay_id); } if (cmd->params) { g_hash_table_destroy(cmd->params); } free(cmd->origin); free(cmd->action); free(cmd->userdata_str); free(cmd->rsc_id); free(cmd->output); + free(cmd->only_notify_client); free(cmd); } static gboolean stonith_recurring_op_helper(gpointer data) { lrmd_cmd_t *cmd = data; lrmd_rsc_t *rsc = NULL; cmd->stonith_recurring_id = 0; rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL; CRM_ASSERT(rsc != NULL); /* take it out of recurring_ops list, and put it in the pending ops * to be executed */ rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd); rsc->pending_ops = g_list_append(rsc->pending_ops, cmd); #ifdef HAVE_SYS_TIMEB_H ftime(&cmd->t_queue); #endif mainloop_set_trigger(rsc->work); return FALSE; } static gboolean start_delay_helper(gpointer data) { lrmd_cmd_t *cmd = data; lrmd_rsc_t *rsc = NULL; cmd->delay_id = 0; rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL; if (rsc) { mainloop_set_trigger(rsc->work); } return FALSE; } static void schedule_lrmd_cmd(lrmd_rsc_t *rsc, lrmd_cmd_t *cmd) { CRM_CHECK(cmd != NULL, return); CRM_CHECK(rsc != NULL, return); crm_trace("Scheduling %s on %s", cmd->action, rsc->rsc_id); rsc->pending_ops = g_list_append(rsc->pending_ops, cmd); #ifdef HAVE_SYS_TIMEB_H ftime(&cmd->t_queue); #endif mainloop_set_trigger(rsc->work); if (cmd->start_delay) { cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd); } } static void send_reply(lrmd_client_t *client, int rc, int call_id) { int send_rc = 0; xmlNode *reply = NULL; reply = create_xml_node(NULL, T_LRMD_REPLY); crm_xml_add(reply, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add_int(reply, F_LRMD_RC, rc); crm_xml_add_int(reply, F_LRMD_CALLID, call_id); send_rc = crm_ipcs_send(client->channel, reply, FALSE); free_xml(reply); if (send_rc < 0) { crm_warn("LRMD reply to %s failed: %d", client->name, send_rc); } } static void send_client_notify(gpointer key, gpointer value, gpointer user_data) { xmlNode *update_msg = user_data; lrmd_client_t *client = value; if (client == NULL) { crm_err("Asked to send event to NULL client"); return; } else if (client->channel == NULL) { crm_trace("Asked to send event to disconnected client"); return; } else if (client->name == NULL) { crm_trace("Asked to send event to client with no name"); return; } if (crm_ipcs_send(client->channel, update_msg, TRUE) <= 0) { crm_warn("Notification of client %s/%s failed", client->name, client->id); } } #ifdef HAVE_SYS_TIMEB_H static int time_diff_ms(struct timeb *now, struct timeb *old) { int sec = difftime(now->time, old->time); int ms = now->millitm - old->millitm; if (old->time == 0) { return 0; } return (sec * 1000) + ms; } #endif static void send_cmd_complete_notify(lrmd_cmd_t *cmd) { #ifdef HAVE_SYS_TIMEB_H struct timeb now = { 0, }; #endif xmlNode *notify = NULL; notify = create_xml_node(NULL, T_LRMD_NOTIFY); crm_xml_add(notify, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add_int(notify, F_LRMD_TIMEOUT, cmd->timeout); crm_xml_add_int(notify, F_LRMD_RSC_INTERVAL, cmd->interval); crm_xml_add_int(notify, F_LRMD_RSC_START_DELAY, cmd->start_delay); crm_xml_add_int(notify, F_LRMD_EXEC_RC, cmd->exec_rc); crm_xml_add_int(notify, F_LRMD_OP_STATUS, cmd->lrmd_op_status); crm_xml_add_int(notify, F_LRMD_CALLID, cmd->call_id); crm_xml_add_int(notify, F_LRMD_RSC_DELETED, cmd->rsc_deleted); #ifdef HAVE_SYS_TIMEB_H ftime(&now); crm_xml_add_int(notify, F_LRMD_RSC_RUN_TIME, cmd->t_run.time); crm_xml_add_int(notify, F_LRMD_RSC_RCCHANGE_TIME, cmd->t_rcchange.time); crm_xml_add_int(notify, F_LRMD_RSC_EXEC_TIME, time_diff_ms(&now, &cmd->t_run)); crm_xml_add_int(notify, F_LRMD_RSC_QUEUE_TIME, time_diff_ms(&cmd->t_run, &cmd->t_queue)); #endif crm_xml_add(notify, F_LRMD_OPERATION, LRMD_OP_RSC_EXEC); crm_xml_add(notify, F_LRMD_RSC_ID, cmd->rsc_id); crm_xml_add(notify, F_LRMD_RSC_ACTION, cmd->action); crm_xml_add(notify, F_LRMD_RSC_USERDATA_STR, cmd->userdata_str); crm_xml_add(notify, F_LRMD_RSC_OUTPUT, cmd->output); if (cmd->params) { char *key = NULL; char *value = NULL; GHashTableIter iter; xmlNode *args = create_xml_node(notify, XML_TAG_ATTRS); g_hash_table_iter_init(&iter, cmd->params); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { hash2field((gpointer) key, (gpointer) value, args); } } - g_hash_table_foreach(client_list, send_client_notify, notify); + if (cmd->only_notify_client) { + lrmd_client_t *client = g_hash_table_lookup(client_list, cmd->only_notify_client); + + if (client) { + send_client_notify(client->id, client, notify); + } + } else { + g_hash_table_foreach(client_list, send_client_notify, notify); + } free_xml(notify); } static void send_generic_notify(int rc, xmlNode *request) { int call_id = 0; xmlNode *notify = NULL; xmlNode *rsc_xml = get_xpath_object("//"F_LRMD_RSC, request, LOG_ERR); const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); const char *op = crm_element_value(request, F_LRMD_OPERATION); crm_element_value_int(request, F_LRMD_CALLID, &call_id); notify = create_xml_node(NULL, T_LRMD_NOTIFY); crm_xml_add(notify, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add_int(notify, F_LRMD_RC, rc); crm_xml_add_int(notify, F_LRMD_CALLID, call_id); crm_xml_add(notify, F_LRMD_OPERATION, op); crm_xml_add(notify, F_LRMD_RSC_ID, rsc_id); g_hash_table_foreach(client_list, send_client_notify, notify); free_xml(notify); } static void cmd_finalize(lrmd_cmd_t *cmd, lrmd_rsc_t *rsc) { crm_trace("Resource operation rsc:%s action:%s completed", cmd->rsc_id, cmd->action); if (rsc && (rsc->active == cmd)) { rsc->active = NULL; mainloop_set_trigger(rsc->work); } if (!rsc) { cmd->rsc_deleted = 1; } send_cmd_complete_notify(cmd); if (cmd->interval && (cmd->lrmd_op_status == PCMK_LRM_OP_CANCELLED)) { if (rsc) { rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd); rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd); } free_lrmd_cmd(cmd); } else if (cmd->interval == 0) { if (rsc) { rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd); } free_lrmd_cmd(cmd); } else { /* Clear all the values pertaining just to the last iteration of a recurring op. */ cmd->lrmd_op_status = 0; memset(&cmd->t_run, 0, sizeof(cmd->t_run)); memset(&cmd->t_queue, 0, sizeof(cmd->t_queue)); free(cmd->output); cmd->output = NULL; } } static int lsb2uniform_rc(const char *action, int rc) { if (rc < 0) { return PCMK_EXECRA_UNKNOWN_ERROR; } /* status has different return codes that everything else. */ if (!safe_str_eq(action, "status") && !safe_str_eq(action, "monitor")) { if (rc > PCMK_LSB_NOT_RUNNING) { return PCMK_EXECRA_UNKNOWN_ERROR; } return rc; } switch (rc) { case PCMK_LSB_STATUS_OK: return PCMK_EXECRA_OK; case PCMK_LSB_STATUS_NOT_INSTALLED: return PCMK_EXECRA_NOT_INSTALLED; case PCMK_LSB_STATUS_VAR_PID: case PCMK_LSB_STATUS_VAR_LOCK: case PCMK_LSB_STATUS_NOT_RUNNING: return PCMK_EXECRA_NOT_RUNNING; default: return PCMK_EXECRA_UNKNOWN_ERROR; } return PCMK_EXECRA_UNKNOWN_ERROR; } static int ocf2uniform_rc(int rc) { if (rc < 0 || rc > PCMK_OCF_FAILED_MASTER) { return PCMK_EXECRA_UNKNOWN_ERROR; } return rc; } static int stonith2uniform_rc(const char *action, int rc) { if (rc == st_err_unknown_device) { if (safe_str_eq(action, "stop")) { rc = PCMK_EXECRA_OK; } else if (safe_str_eq(action, "start")) { rc = PCMK_EXECRA_NOT_INSTALLED; } else { rc = PCMK_EXECRA_NOT_RUNNING; } } else if (rc != 0) { rc = PCMK_EXECRA_UNKNOWN_ERROR; } return rc; } static int get_uniform_rc(const char *standard, const char *action, int rc) { if (safe_str_eq(standard, "ocf")) { return ocf2uniform_rc(rc); } else if (safe_str_eq(standard, "stonith")) { return stonith2uniform_rc(action, rc); } else { return lsb2uniform_rc(action, rc); } } static void action_complete(svc_action_t *action) { lrmd_rsc_t *rsc; lrmd_cmd_t *cmd = action->cb_data; if (!cmd) { crm_err("LRMD action (%s) completed does not match any known operations.", action->id); return; } #ifdef HAVE_SYS_TIMEB_H if (cmd->exec_rc != action->rc) { ftime(&cmd->t_rcchange); } #endif cmd->exec_rc = get_uniform_rc(action->standard, cmd->action, action->rc); cmd->lrmd_op_status = action->status; rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL; if (action->stdout_data) { cmd->output = crm_strdup(action->stdout_data); } cmd_finalize(cmd, rsc); } static int lrmd_rsc_execute_stonith(lrmd_rsc_t *rsc, lrmd_cmd_t *cmd) { int rc = 0; stonith_t *stonith_api = get_stonith_connection(); if (!stonith_api) { cmd->exec_rc = get_uniform_rc("stonith", cmd->action, st_err_connection); cmd->lrmd_op_status = PCMK_LRM_OP_ERROR; cmd_finalize(cmd, rsc); return lrmd_err_stonith_connection; } if (safe_str_eq(cmd->action, "start")) { char *key = NULL; char *value = NULL; stonith_key_value_t *device_params = NULL; if (cmd->params) { GHashTableIter iter; g_hash_table_iter_init(&iter, cmd->params); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { device_params = stonith_key_value_add(device_params, key, value); } } rc = stonith_api->cmds->register_device(stonith_api, st_opt_sync_call, cmd->rsc_id, rsc->provider, rsc->type, device_params); stonith_key_value_freeall(device_params, 1, 1); if (rc == 0) { rc = stonith_api->cmds->call(stonith_api, st_opt_sync_call, cmd->rsc_id, "monitor", NULL, cmd->timeout); } } else if (safe_str_eq(cmd->action, "stop")) { rc = stonith_api->cmds->remove_device(stonith_api, st_opt_sync_call, cmd->rsc_id); } else if (safe_str_eq(cmd->action, "monitor")) { rc = stonith_api->cmds->call(stonith_api, st_opt_sync_call, cmd->rsc_id, cmd->action, NULL, cmd->timeout); } cmd->exec_rc = get_uniform_rc("stonith", cmd->action, rc); /* Attempt to map return codes to op status if possible */ if (rc) { switch (rc) { case st_err_not_supported: cmd->lrmd_op_status = PCMK_LRM_OP_NOTSUPPORTED; break; case st_err_timeout: cmd->lrmd_op_status = PCMK_LRM_OP_TIMEOUT; break; default: cmd->lrmd_op_status = PCMK_LRM_OP_ERROR; } } else { cmd->lrmd_op_status = PCMK_LRM_OP_DONE; } if (cmd->interval > 0) { rsc->recurring_ops = g_list_append(rsc->recurring_ops, cmd); cmd->stonith_recurring_id = g_timeout_add(cmd->interval, stonith_recurring_op_helper, cmd); } cmd_finalize(cmd, rsc); return rc; } static const char * normalize_action_name(lrmd_rsc_t *rsc, const char *action) { if (safe_str_eq(action, "monitor") && (safe_str_eq(rsc->class, "lsb") || safe_str_eq(rsc->class, "service") || safe_str_eq(rsc->class, "systemd"))) { return "status"; } return action; } static void dup_attr(gpointer key, gpointer value, gpointer user_data) { g_hash_table_replace(user_data, crm_strdup(key), crm_strdup(value)); } static int lrmd_rsc_execute_service_lib(lrmd_rsc_t *rsc, lrmd_cmd_t *cmd) { svc_action_t *action = NULL; GHashTable *params_copy = NULL; crm_trace("Creating action, resource:%s action:%s class:%s provider:%s agent:%s", rsc->rsc_id, cmd->action, rsc->class, rsc->provider, rsc->type); if (cmd->params) { params_copy = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); if (params_copy != NULL) { g_hash_table_foreach(cmd->params, dup_attr, params_copy); } } action = resources_action_create(rsc->rsc_id, rsc->class, rsc->provider, rsc->type, normalize_action_name(rsc, cmd->action), cmd->interval, cmd->timeout, params_copy); if (!action) { crm_err("Failed to create action, action:%s on resource %s", cmd->action, rsc->rsc_id); cmd->lrmd_op_status = PCMK_LRM_OP_ERROR; goto exec_done; } action->cb_data = cmd; if (!services_action_async(action, action_complete)) { services_action_free(action); action = NULL; cmd->lrmd_op_status = PCMK_LRM_OP_ERROR; goto exec_done; } if (cmd->interval) { rsc->recurring_ops = g_list_append(rsc->recurring_ops, cmd); } /* The cmd will be finalized by the action_complete callback after * the service library is done with it */ rsc->active = cmd; /* only one op at a time for a rsc */ cmd = NULL; exec_done: if (cmd) { cmd_finalize(cmd, rsc); } return TRUE; } static gboolean lrmd_rsc_execute(lrmd_rsc_t *rsc) { lrmd_cmd_t *cmd = NULL; CRM_CHECK(rsc != NULL, return FALSE); if (rsc->active) { crm_trace("%s is still active", rsc->rsc_id); return TRUE; } if (rsc->pending_ops) { GList *first = rsc->pending_ops; cmd = first->data; if (cmd->delay_id) { crm_trace("Command %s %s was asked to run too early, waiting for start_delay timeout of %dms", cmd->rsc_id, cmd->action, cmd->start_delay); return TRUE; } rsc->pending_ops = g_list_remove_link(rsc->pending_ops, first); g_list_free_1(first); #ifdef HAVE_SYS_TIMEB_H ftime(&cmd->t_run); } #endif if (!cmd) { crm_trace("Nothing further to do for %s", rsc->rsc_id); return TRUE; } if (safe_str_eq(rsc->class, "stonith")) { lrmd_rsc_execute_stonith(rsc, cmd); } else { lrmd_rsc_execute_service_lib(rsc, cmd); } return TRUE; } static gboolean lrmd_rsc_dispatch(gpointer user_data) { return lrmd_rsc_execute(user_data); } void free_rsc(gpointer data) { GListPtr gIter = NULL; lrmd_rsc_t *rsc = data; int is_stonith = safe_str_eq(rsc->class, "stonith"); for (gIter = rsc->pending_ops; gIter != NULL; gIter = gIter->next) { lrmd_cmd_t *cmd = gIter->data; /* command was never executed */ cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED; cmd_finalize(cmd, NULL); } /* frees list, but not list elements. */ g_list_free(rsc->pending_ops); for (gIter = rsc->recurring_ops; gIter != NULL; gIter = gIter->next) { lrmd_cmd_t *cmd = gIter->data; if (is_stonith) { cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED; cmd_finalize(cmd, NULL); } else { /* This command is already handed off to service library, * let service library cancel it and tell us via the callback * when it is cancelled. The rsc can be safely destroyed * even if we are waiting for the cancel result */ services_action_cancel(rsc->rsc_id, cmd->action, cmd->interval); } } /* frees list, but not list elements. */ g_list_free(rsc->recurring_ops); free(rsc->rsc_id); free(rsc->class); free(rsc->provider); free(rsc->type); mainloop_destroy_trigger(rsc->work); free(rsc); } static int process_lrmd_signon(lrmd_client_t *client, xmlNode *request) { xmlNode *reply = create_xml_node(NULL, "reply"); crm_xml_add(reply, F_LRMD_OPERATION, CRM_OP_REGISTER); crm_xml_add(reply, F_LRMD_CLIENTID, client->id); crm_ipcs_send(client->channel, reply, FALSE); free_xml(reply); return lrmd_ok; } static int process_lrmd_rsc_register(lrmd_client_t *client, xmlNode *request) { int rc = lrmd_ok; lrmd_rsc_t *rsc = build_rsc_from_xml(request); lrmd_rsc_t *dup = g_hash_table_lookup(rsc_list, rsc->rsc_id); if (dup && safe_str_eq(rsc->class, dup->class) && safe_str_eq(rsc->provider, dup->provider) && safe_str_eq(rsc->type, dup->type)) { crm_warn("Can't add, RSC '%s' already present in the rsc list (%d active resources)", rsc->rsc_id, g_hash_table_size(rsc_list)); free_rsc(rsc); return rc; } g_hash_table_replace(rsc_list, rsc->rsc_id, rsc); crm_info("Added '%s' to the rsc list (%d active resources)", rsc->rsc_id, g_hash_table_size(rsc_list)); return rc; } static void process_lrmd_get_rsc_info(lrmd_client_t *client, xmlNode *request) { int rc = lrmd_ok; int send_rc = 0; int call_id = 0; xmlNode *rsc_xml = get_xpath_object("//"F_LRMD_RSC, request, LOG_ERR); const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); xmlNode *reply = NULL; lrmd_rsc_t *rsc = NULL; crm_element_value_int(request, F_LRMD_CALLID, &call_id); if (!rsc_id) { rc = lrmd_err_unknown_rsc; goto get_rsc_done; } if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) { crm_info("Resource '%s' not found (%d active resources)", rsc_id, g_hash_table_size(rsc_list)); rc = lrmd_err_unknown_rsc; goto get_rsc_done; } get_rsc_done: reply = create_xml_node(NULL, T_LRMD_REPLY); crm_xml_add(reply, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add_int(reply, F_LRMD_RC, rc); crm_xml_add_int(reply, F_LRMD_CALLID, call_id); if (rsc) { crm_xml_add(reply, F_LRMD_RSC_ID, rsc->rsc_id); crm_xml_add(reply, F_LRMD_CLASS, rsc->class); crm_xml_add(reply, F_LRMD_PROVIDER, rsc->provider); crm_xml_add(reply, F_LRMD_TYPE, rsc->type); } send_rc = crm_ipcs_send(client->channel, reply, FALSE); if (send_rc < 0) { crm_warn("LRMD reply to %s failed: %d", client->name, send_rc); } free_xml(reply); } static int process_lrmd_rsc_unregister(lrmd_client_t *client, xmlNode *request) { int rc = lrmd_ok; lrmd_rsc_t *rsc = NULL; xmlNode *rsc_xml = get_xpath_object("//"F_LRMD_RSC, request, LOG_ERR); const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); if (!rsc_id) { return lrmd_err_unknown_rsc; } if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) { crm_info("Resource '%s' not found (%d active resources)", rsc_id, g_hash_table_size(rsc_list)); return lrmd_err_unknown_rsc; } if (rsc->active) { /* let the caller know there are still active ops on this rsc to watch for */ rc = lrmd_pending; } g_hash_table_remove(rsc_list, rsc_id); return rc; } static int process_lrmd_rsc_exec(lrmd_client_t *client, xmlNode *request) { lrmd_rsc_t *rsc = NULL; lrmd_cmd_t *cmd = NULL; xmlNode *rsc_xml = get_xpath_object("//"F_LRMD_RSC, request, LOG_ERR); const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); if (!rsc_id) { return lrmd_err_missing; } if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) { return lrmd_err_unknown_rsc; } - cmd = create_lrmd_cmd(request); + cmd = create_lrmd_cmd(request, client); schedule_lrmd_cmd(rsc, cmd); return cmd->call_id; } static int cancel_op(const char *rsc_id, const char *action, int interval) { GListPtr gIter = NULL; lrmd_rsc_t *rsc = g_hash_table_lookup(rsc_list, rsc_id); /* How to cancel an action. * 1. Check pending ops list, if it hasn't been handed off * to the service library or stonith recurring list remove * it there and that will stop it. * 2. If it isn't in the pending ops list, then its either a * recurring op in the stonith recurring list, or the service * library's recurring list. Stop it there * 3. If not found in any lists, then this operation has either * been executed already and is not a recurring operation, or * never existed. */ if (!rsc) { return lrmd_err_unknown_rsc; } for (gIter = rsc->pending_ops; gIter != NULL; gIter = gIter->next) { lrmd_cmd_t *cmd = gIter->data; if (safe_str_eq(cmd->action, action) && cmd->interval == interval) { cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED; cmd_finalize(cmd, rsc); return lrmd_ok; } } if (safe_str_eq(rsc->class, "stonith")) { /* The service library does not handle stonith operations. * We have to handle recurring stonith opereations ourselves. */ for (gIter = rsc->recurring_ops; gIter != NULL; gIter = gIter->next) { lrmd_cmd_t *cmd = gIter->data; if (safe_str_eq(cmd->action, action) && cmd->interval == interval) { cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED; cmd_finalize(cmd, rsc); return lrmd_ok; } } } else if (services_action_cancel(rsc_id, normalize_action_name(rsc, action), interval) == TRUE) { /* The service library will tell the action_complete callback function * this action was cancelled, which will destroy the cmd and remove * it from the recurring_op list. Do not do that in this function * if the service library says it cancelled it. */ return lrmd_ok; } return lrmd_err_unknown_operation; } static int process_lrmd_rsc_cancel(lrmd_client_t *client, xmlNode *request) { xmlNode *rsc_xml = get_xpath_object("//"F_LRMD_RSC, request, LOG_ERR); const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); const char *action = crm_element_value(rsc_xml, F_LRMD_RSC_ACTION); int interval = 0; crm_element_value_int(rsc_xml, F_LRMD_RSC_INTERVAL, &interval); if (!rsc_id || !action) { return lrmd_err_missing; } return cancel_op(rsc_id, action, interval); } void process_lrmd_message(lrmd_client_t *client, xmlNode *request) { int rc = lrmd_ok; - int call_options = 0; int call_id = 0; const char *op = crm_element_value(request, F_LRMD_OPERATION); int do_reply = 0; int do_notify = 0; int exit = 0; - crm_element_value_int(request, F_LRMD_CALLOPTS, &call_options); crm_element_value_int(request, F_LRMD_CALLID, &call_id); if (crm_str_eq(op, CRM_OP_REGISTER, TRUE)) { rc = process_lrmd_signon(client, request); } else if (crm_str_eq(op, LRMD_OP_RSC_REG, TRUE)) { rc = process_lrmd_rsc_register(client, request); do_notify = 1; do_reply = 1; } else if (crm_str_eq(op, LRMD_OP_RSC_INFO, TRUE)) { process_lrmd_get_rsc_info(client, request); } else if (crm_str_eq(op, LRMD_OP_RSC_UNREG, TRUE)) { rc = process_lrmd_rsc_unregister(client, request); /* don't notify anyone about failed un-registers */ if (rc == lrmd_ok || rc == lrmd_pending) { do_notify = 1; } do_reply = 1; } else if (crm_str_eq(op, LRMD_OP_RSC_EXEC, TRUE)) { rc = process_lrmd_rsc_exec(client, request); do_reply = 1; } else if (crm_str_eq(op, LRMD_OP_RSC_CANCEL, TRUE)) { rc = process_lrmd_rsc_cancel(client, request); do_reply = 1; } else if (crm_str_eq(op, CRM_OP_QUIT, TRUE)) { do_reply = 1; exit = 1; } else { rc = lrmd_err_unknown_operation; do_reply = 1; crm_err("Unknown %s from %s", op, client->name); crm_log_xml_warn(request, "UnknownOp"); } if (do_reply) { send_reply(client, rc, call_id); } if (do_notify) { send_generic_notify(rc, request); } if (exit) { lrmd_shutdown(0); } } diff --git a/lrmd/test.c b/lrmd/test.c index 6d1d7646e3..f43217cec2 100644 --- a/lrmd/test.c +++ b/lrmd/test.c @@ -1,420 +1,561 @@ /* * Copyright (c) 2012 David Vossel * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * */ #include #include #include #include #include #include +#include +#include #include /* *INDENT-OFF* */ static struct crm_option long_options[] = { {"help", 0, 0, '?'}, {"verbose", 0, 0, 'V', "\t\tPrint out logs and events to screen"}, {"quiet", 0, 0, 'Q', "\t\tSuppress all output to screen"}, /* just incase we have to add data to events, * we don't want break a billion regression tests. Instead * we'll create different versions */ {"listen", 1, 0, 'l', "\tListen for a specific event string"}, {"event-ver", 1, 0, 'e', "\tVersion of event to listen to"}, {"api-call", 1, 0, 'c', "\tDirectly relates to lrmd api functions"}, {"no-wait", 0, 0, 'w', "\tMake api call and do not wait for result."}, + {"is-running", 0, 0, 'R', "\tDetermine if a resource is registered and running."}, {"-spacer-", 1, 0, '-', "\nParameters for api-call option"}, {"action", 1, 0, 'a'}, {"rsc-id", 1, 0, 'r'}, {"cancel-call-id", 1, 0, 'x'}, {"provider", 1, 0, 'P'}, {"class", 1, 0, 'C'}, {"type", 1, 0, 'T'}, {"interval", 1, 0, 'i'}, {"timeout", 1, 0, 't'}, {"start-delay", 1, 0, 's'}, {"param-key", 1, 0, 'k'}, {"param-val", 1, 0, 'v'}, {"-spacer-", 1, 0, '-'}, {0, 0, 0, 0} }; /* *INDENT-ON* */ +cib_t *cib_conn = NULL; static int exec_call_id = 0; +static int exec_call_opts = 0; +extern void cleanup_alloc_calculations(pe_working_set_t * data_set); static struct { int verbose; int quiet; int print; int interval; int timeout; int start_delay; int cancel_call_id; int event_version; int no_wait; + int is_running; int no_connect; const char *api_call; const char *rsc_id; const char *provider; const char *class; const char *type; const char *action; const char *listen; lrmd_key_value_t *params; } options; GMainLoop *mainloop = NULL; lrmd_t *lrmd_conn = NULL; static char event_buf_v0[1024]; #define print_result(result) \ if (!options.quiet) { \ result; \ } \ #define report_event(event) \ snprintf(event_buf_v0, sizeof(event_buf_v0), "NEW_EVENT event_type:%s rsc_id:%s action:%s rc:%s op_status:%s", \ lrmd_event_type2str(event->type), \ event->rsc_id, \ event->op_type ? event->op_type : "none", \ lrmd_event_rc2str(event->rc), \ services_lrm_status_str(event->op_status)); \ crm_info("%s", event_buf_v0);; static void test_shutdown(int nsig) { lrmd_api_delete(lrmd_conn); } static void read_events(lrmd_event_data_t *event) { report_event(event); if (options.listen) { if (safe_str_eq(options.listen, event_buf_v0)) { print_result(printf("LISTEN EVENT SUCCESSFUL\n")); exit(0); } } if (exec_call_id && (event->call_id == exec_call_id)) { - if (event->op_status == 0) { + if (event->op_status == 0 && event->rc == 0) { print_result(printf("API-CALL SUCCESSFUL for 'exec'\n")); } else { print_result(printf("API-CALL FAILURE for 'exec', rc:%d lrmd_op_status:%s\n", event->rc, services_lrm_status_str(event->op_status))); exit(-1); } if (!options.listen) { exit(0); } } } static gboolean timeout_err(gpointer data) { print_result(printf("LISTEN EVENT FAILURE - timeout occurred, never found.\n")); exit(-1); return FALSE; } static void try_connect(void) { int tries = 10; int i = 0; int rc = 0; for (i = 0; i < tries; i++) { rc = lrmd_conn->cmds->connect(lrmd_conn, "lrmd", NULL); if (!rc) { crm_info("lrmd client connection established"); return; } else { crm_info("lrmd client connection failed"); } sleep(1); } print_result(printf("API CONNECTION FAILURE\n")); exit(-1); } static gboolean start_test(gpointer user_data) { int rc = 0; if (!options.no_connect) { try_connect(); } lrmd_conn->cmds->set_callback(lrmd_conn, read_events); if (options.timeout) { g_timeout_add(options.timeout, timeout_err, NULL); } if (!options.api_call) { return 0; } if (safe_str_eq(options.api_call, "exec")) { rc = lrmd_conn->cmds->exec(lrmd_conn, options.rsc_id, options.action, NULL, options.interval, options.timeout, options.start_delay, - 0, + exec_call_opts, options.params); if (rc > 0) { exec_call_id = rc; print_result(printf("API-CALL 'exec' action pending, waiting on response\n")); } } else if (safe_str_eq(options.api_call, "register_rsc")) { rc = lrmd_conn->cmds->register_rsc(lrmd_conn, options.rsc_id, options.class, options.provider, options.type, 0); } else if (safe_str_eq(options.api_call, "get_rsc_info")) { lrmd_rsc_info_t *rsc_info; rsc_info = lrmd_conn->cmds->get_rsc_info(lrmd_conn, options.rsc_id, 0); if (rsc_info) { print_result(printf("RSC_INFO: id:%s class:%s provider:%s type:%s\n", rsc_info->id, rsc_info->class, rsc_info->provider ? rsc_info->provider : "", rsc_info->type)); lrmd_free_rsc_info(rsc_info); rc = lrmd_ok; } else { rc = -1; } } else if (safe_str_eq(options.api_call, "unregister_rsc")) { rc = lrmd_conn->cmds->unregister_rsc(lrmd_conn, options.rsc_id, 0); } else if (safe_str_eq(options.api_call, "cancel")) { rc = lrmd_conn->cmds->cancel(lrmd_conn, options.rsc_id, options.action, options.interval); } else if (safe_str_eq(options.api_call, "metadata")) { char *output = NULL; rc = lrmd_conn->cmds->get_metadata(lrmd_conn, options.class, options.provider, options.type, &output, 0); if (rc == lrmd_ok) { print_result(printf("%s", output)); free(output); } } else if (safe_str_eq(options.api_call, "list_agents")) { lrmd_list_t *list = NULL; lrmd_list_t *iter = NULL; rc = lrmd_conn->cmds->list_agents(lrmd_conn, &list, options.class, options.provider); if (rc > 0) { print_result(printf("%d agents found\n", rc)); for (iter = list; iter != NULL; iter = iter->next) { print_result(printf("%s\n", iter->val)); } lrmd_list_freeall(list); rc = 0; } else { print_result(printf("API_CALL FAILURE - no agents found\n")); rc = -1; } } else if (safe_str_eq(options.api_call, "list_ocf_providers")) { lrmd_list_t *list = NULL; lrmd_list_t *iter = NULL; rc = lrmd_conn->cmds->list_ocf_providers(lrmd_conn, options.type, &list); if (rc > 0) { print_result(printf("%d providers found\n", rc)); for (iter = list; iter != NULL; iter = iter->next) { print_result(printf("%s\n", iter->val)); } lrmd_list_freeall(list); rc = 0; } else { print_result(printf("API_CALL FAILURE - no providers found\n")); rc = -1; } } else if (options.api_call) { print_result(printf("API-CALL FAILURE unknown action '%s'\n", options.action)); exit(-1); } if (rc < 0) { print_result(printf("API-CALL FAILURE for '%s' api_rc:%d\n", options.api_call, rc)); exit(-1); } if (options.api_call && rc == lrmd_ok) { print_result(printf("API-CALL SUCCESSFUL for '%s'\n", options.api_call)); if (!options.listen) { exit(0); } } if (options.no_wait) { /* just make the call and exit regardless of anything else. */ exit(0); } return 0; } +static resource_t * +find_rsc_or_clone(const char *rsc, pe_working_set_t * data_set) +{ + resource_t *the_rsc = pe_find_resource(data_set->resources, rsc); + + if (the_rsc == NULL) { + char *as_clone = crm_concat(rsc, "0", ':'); + + the_rsc = pe_find_resource(data_set->resources, as_clone); + free(as_clone); + } + return the_rsc; +} + +static int +generate_params(void) +{ + int rc = 0; + pe_working_set_t data_set; + xmlNode *cib_xml_copy = NULL; + resource_t *rsc = NULL; + GHashTable *params = NULL; + GHashTable *meta = NULL; + GHashTableIter iter; + + if (options.params) { + return 0; + } + + set_working_set_defaults(&data_set); + + cib_conn = cib_new(); + rc = cib_conn->cmds->signon(cib_conn, "lrmd_test", cib_query); + if (rc != cib_ok) { + crm_err("Error signing on to the CIB service: %s\n", cib_error2string(rc)); + rc = -1; + goto param_gen_bail; + } + + cib_xml_copy = get_cib_copy(cib_conn); + + if (!cib_xml_copy) { + crm_err("Error retrieving cib copy."); + rc = -1; + goto param_gen_bail; + } + + if (cli_config_update(&cib_xml_copy, NULL, FALSE) == FALSE) { + crm_err("Error updating cib configuration"); + rc = -1; + goto param_gen_bail; + } + + data_set.input = cib_xml_copy; + data_set.now = new_ha_date(TRUE); + + cluster_status(&data_set); + if (options.rsc_id) { + rsc = find_rsc_or_clone(options.rsc_id, &data_set); + } + + if (!rsc) { + crm_err("Resource does not exist in config"); + rc = -1; + goto param_gen_bail; + } + + params = g_hash_table_new_full(crm_str_hash, + g_str_equal, g_hash_destroy_str, g_hash_destroy_str); + meta = g_hash_table_new_full(crm_str_hash, + g_str_equal, g_hash_destroy_str, g_hash_destroy_str); + + get_rsc_attributes(params, rsc, NULL, &data_set); + get_meta_attributes(meta, rsc, NULL, &data_set); + + if (params) { + char *key = NULL; + char *value = NULL; + + g_hash_table_iter_init(&iter, params); + while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { + options.params = lrmd_key_value_add(options.params, key, value); + } + g_hash_table_destroy(params); + } + + if (meta) { + char *key = NULL; + char *value = NULL; + + g_hash_table_iter_init(&iter, meta); + while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { + char *crm_name = crm_meta_name(key); + options.params = lrmd_key_value_add(options.params, crm_name, value); + free(crm_name); + } + g_hash_table_destroy(meta); + } + +param_gen_bail: + + cleanup_alloc_calculations(&data_set); + return rc; +} + int main(int argc, char ** argv) { int option_index = 0; int argerr = 0; int flag; char *key = NULL; char *val = NULL; crm_trigger_t *trig; crm_set_options(NULL, "mode [options]", long_options, "Inject commands into the lrmd and watch for events\n"); while (1) { flag = crm_get_option(argc, argv, &option_index); if (flag == -1) break; switch(flag) { case '?': crm_help(flag, LSB_EXIT_OK); break; case 'V': options.verbose = 1; break; case 'Q': options.quiet = 1; options.verbose = 0; break; case 'e': options.event_version = atoi(optarg); break; case 'l': options.listen = optarg; break; case 'w': options.no_wait = 1; break; + case 'R': + options.is_running = 1; + break; case 'c': options.api_call = optarg; break; case 'a': options.action = optarg; break; case 'r': options.rsc_id = optarg; break; case 'x': options.cancel_call_id = atoi(optarg); break; case 'P': options.provider = optarg; break; case 'C': options.class = optarg; break; case 'T': options.type = optarg; break; case 'i': options.interval = atoi(optarg); break; case 't': options.timeout = atoi(optarg); break; case 's': options.start_delay = atoi(optarg); break; case 'k': key = optarg; if (key && val) { options.params = lrmd_key_value_add(options.params, key, val); key = val = NULL; } break; case 'v': val = optarg; if (key && val) { options.params = lrmd_key_value_add(options.params, key, val); key = val = NULL; } break; default: ++argerr; break; } } if (argerr) { crm_help('?', LSB_EXIT_GENERIC); } if (optind > argc) { ++argerr; } if (!options.listen && (safe_str_eq(options.api_call, "metadata") || safe_str_eq(options.api_call, "list_agents") || safe_str_eq(options.api_call, "list_ocf_providers"))) { options.no_connect = 1; } crm_log_init("lrmd_ctest", LOG_INFO, TRUE, options.verbose ? TRUE : FALSE, argc, argv, FALSE); + if (options.is_running) { + if (!options.timeout) { + options.timeout = 30000; + } + options.interval = 0; + if (!options.rsc_id) { + crm_err("rsc-id must be given when is-running is used"); + exit(-1); + } + + if (generate_params()) { + print_result(printf("Failed to retrieve rsc parameters from cib, can not determine if rsc is running.\n")); + exit(-1); + } + options.api_call = "exec"; + options.action = "monitor"; + exec_call_opts = lrmd_opt_notify_orig_only; + } + + /* if we can't perform an api_call or listen for events, * there is nothing to do */ if (!options.api_call && !options.listen) { crm_err("Nothing to be done. Please specify 'api-call' and/or 'listen'"); return 0; } lrmd_conn = lrmd_api_new(); trig = mainloop_add_trigger(G_PRIORITY_HIGH, start_test, NULL); mainloop_set_trigger(trig); mainloop_add_signal(SIGTERM, test_shutdown); crm_info("Starting"); mainloop = g_main_new(FALSE); g_main_run(mainloop); lrmd_api_delete(lrmd_conn); + + if (cib_conn != NULL) { + cib_conn->cmds->signoff(cib_conn); + cib_delete(cib_conn); + } + return 0; }