Page MenuHomeClusterLabs Projects

No OneTemporary

diff --git a/cts/CM_lha.py b/cts/CM_lha.py
index 77c3bb593c..90adb09de2 100755
--- a/cts/CM_lha.py
+++ b/cts/CM_lha.py
@@ -1,628 +1,600 @@
'''CTS: Cluster Testing System: LinuxHA v2 dependent modules...
'''
__copyright__='''
Author: Huang Zhen <zhenhltc@cn.ibm.com>
Copyright (C) 2004 International Business Machines
Additional Audits, Revised Start action, Default Configuration:
Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
'''
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
import os, sys, warnings
from cts import CTS
from cts.CTSvars import *
from cts.CTS import *
from cts.CIB import *
from cts.CTStests import AuditResource
try:
from xml.dom.minidom import *
except ImportError:
sys.__stdout__.write("Python module xml.dom.minidom not found\n")
sys.__stdout__.write("Please install python-xml or similar before continuing\n")
sys.__stdout__.flush()
sys.exit(1)
#######################################################################
#
# LinuxHA v2 dependent modules
#
#######################################################################
class crm_lha(ClusterManager):
'''
The linux-ha version 2 cluster manager class.
It implements the things we need to talk to and manipulate
linux-ha version 2 clusters
'''
def __init__(self, Environment, randseed=None):
ClusterManager.__init__(self, Environment, randseed=randseed)
#HeartbeatCM.__init__(self, Environment, randseed=randseed)
self.fastfail = 0
self.clear_cache = 0
self.cib_installed = 0
self.config = None
self.cluster_monitor = 0
self.use_short_names = 1
self.update({
"Name" : "crm-lha",
"DeadTime" : 300,
"StartTime" : 300, # Max time to start up
"StableTime" : 30,
"StartCmd" : CTSvars.INITDIR+"/heartbeat start > /dev/null 2>&1",
"StopCmd" : CTSvars.INITDIR+"/heartbeat stop > /dev/null 2>&1",
"ElectionCmd" : "crmadmin -E %s",
"StatusCmd" : "crmadmin -t 60000 -S %s 2>/dev/null",
"EpocheCmd" : "crm_node -H -e",
"QuorumCmd" : "crm_node -H -q",
"ParitionCmd" : "crm_node -H -p",
"CibQuery" : "cibadmin -Ql",
# 300,000 == 5 minutes
- "ExecuteRscOp" : "lrmadmin -n %s -E %s %s 300000 %d EVERYTIME 2>&1",
+ "RscRunning" : CTSvars.CTS_home + "/lrmd_test -R -r %s",
"CIBfile" : "%s:"+CTSvars.CRM_CONFIG_DIR+"/cib.xml",
"TmpDir" : "/tmp",
"BreakCommCmd" : "iptables -A INPUT -s %s -j DROP >/dev/null 2>&1",
"FixCommCmd" : "iptables -D INPUT -s %s -j DROP >/dev/null 2>&1",
# tc qdisc add dev lo root handle 1: cbq avpkt 1000 bandwidth 1000mbit
# tc class add dev lo parent 1: classid 1:1 cbq rate "$RATE"kbps allot 17000 prio 5 bounded isolated
# tc filter add dev lo parent 1: protocol ip prio 16 u32 match ip dst 127.0.0.1 match ip sport $PORT 0xFFFF flowid 1:1
# tc qdisc add dev lo parent 1: netem delay "$LATENCY"msec "$(($LATENCY/4))"msec 10% 2> /dev/null > /dev/null
"ReduceCommCmd" : "",
"RestoreCommCmd" : "tc qdisc del dev lo root",
"LogFileName" : Environment["LogFileName"],
"UUIDQueryCmd" : "crmadmin -N",
"StandbyCmd" : "crm_attribute -Q -U %s -n standby -l forever -v %s 2>/dev/null",
"StandbyQueryCmd" : "crm_attribute -GQ -U %s -n standby -l forever -d off 2>/dev/null",
# Patterns to look for in the log files for various occasions...
"Pat:DC_IDLE" : "crmd.*State transition.*-> S_IDLE",
# This wont work if we have multiple partitions
"Pat:Local_started" : "%s .*The local CRM is operational",
"Pat:Slave_started" : "%s .*State transition.*-> S_NOT_DC",
"Pat:Master_started" : "%s .* State transition.*-> S_IDLE",
"Pat:We_stopped" : "heartbeat.*%s.*Heartbeat shutdown complete",
"Pat:Logd_stopped" : "%s logd:.*Exiting write process",
"Pat:They_stopped" : "%s .*LOST:.* %s ",
"Pat:They_dead" : "node %s.*: is dead",
"Pat:TransitionComplete" : "Transition status: Complete: complete",
"Pat:ChildKilled" : "%s heartbeat.*%s.*killed by signal 9",
"Pat:ChildRespawn" : "%s heartbeat.*Respawning client.*%s",
"Pat:ChildExit" : "(ERROR|error): Client .* exited with return code",
"Pat:We_fenced" : "crmd.* Executing .* fencing operation .* on %s",
"Pat:They_fenced" : "stonith.* log_operation: Operation .* for host '%s' with device .* returned: 0",
"Pat:They_fenced_offset" : "for host '",
# Bad news Regexes. Should never occur.
"BadRegexes" : (
r" trace:",
r"error:",
r"crit:",
r"ERROR:",
r"CRIT:",
r"Shutting down...NOW",
r"Timer I_TERMINATE just popped",
r"input=I_ERROR",
r"input=I_FAIL",
r"input=I_INTEGRATED cause=C_TIMER_POPPED",
r"input=I_FINALIZED cause=C_TIMER_POPPED",
r"input=I_ERROR",
r", exiting\.",
r"WARN.*Ignoring HA message.*vote.*not in our membership list",
r"pengine.*Attempting recovery of resource",
r"is taking more than 2x its timeout",
r"Confirm not received from",
r"Welcome reply not received from",
r"Attempting to schedule .* after a stop",
r"Resource .* was active at shutdown",
r"duplicate entries for call_id",
r"Search terminated:",
r"No need to invoke the TE",
r"global_timer_callback:",
r"Faking parameter digest creation",
r"Parameters to .* action changed:",
r"Parameters to .* changed",
),
})
if self.Env["DoBSC"]:
del self["Pat:They_stopped"]
del self["Pat:Logd_stopped"]
self.Env["use_logd"] = 0
self._finalConditions()
self.check_transitions = 0
self.check_elections = 0
self.CIBsync = {}
self.CibFactory = ConfigFactory(self)
self.cib = self.CibFactory.createConfig(self.Env["Schema"])
def errorstoignore(self):
# At some point implement a more elegant solution that
# also produces a report at the end
'''Return list of errors which are known and very noisey should be ignored'''
if 1:
return [
"(ERROR|error): crm_abort: crm_glib_handler: ",
"(ERROR|error): Message hist queue is filling up",
"stonithd.*CRIT: external_hostlist: 'vmware gethosts' returned an empty hostlist",
"stonithd.*(ERROR|error): Could not list nodes for stonith RA external/vmware.",
"pengine.*Preventing .* from re-starting",
]
return []
def install_config(self, node):
if not self.ns.WaitForNodeToComeUp(node):
self.log("Node %s is not up." % node)
return None
if not self.CIBsync.has_key(node) and self.Env["ClobberCIB"] == 1:
self.CIBsync[node] = 1
self.rsh(node, "rm -f "+CTSvars.CRM_CONFIG_DIR+"/cib*")
# Only install the CIB on the first node, all the other ones will pick it up from there
if self.cib_installed == 1:
return None
self.cib_installed = 1
if self.Env["CIBfilename"] == None:
self.log("Installing Generated CIB on node %s" %(node))
self.cib.install(node)
else:
self.log("Installing CIB (%s) on node %s" %(self.Env["CIBfilename"], node))
if 0 != self.rsh.cp(self.Env["CIBfilename"], "root@" + (self["CIBfile"]%node)):
raise ValueError("Can not scp file to %s %d"%(node))
self.rsh(node, "chown "+CTSvars.CRM_DAEMON_USER+" "+CTSvars.CRM_CONFIG_DIR+"/cib.xml")
def prepare(self):
'''Finish the Initialization process. Prepare to test...'''
self.partitions_expected = 1
for node in self.Env["nodes"]:
self.ShouldBeStatus[node] = ""
self.unisolate_node(node)
self.StataCM(node)
def test_node_CM(self, node):
'''Report the status of the cluster manager on a given node'''
watchpats = [ ]
watchpats.append("Current ping state: (S_IDLE|S_NOT_DC)")
watchpats.append(self["Pat:Slave_started"]%node)
watchpats.append(self["Pat:Master_started"]%node)
idle_watch = CTS.LogWatcher(self.Env, self["LogFileName"], watchpats, "ClusterIdle")
idle_watch.setwatch()
out = self.rsh(node, self["StatusCmd"]%node, 1)
self.debug("Node %s status: '%s'" %(node, out))
if not out or string.find(out, 'ok') < 0:
if self.ShouldBeStatus[node] == "up":
self.log(
"Node status for %s is %s but we think it should be %s"
%(node, "down", self.ShouldBeStatus[node]))
self.ShouldBeStatus[node]="down"
return 0
if self.ShouldBeStatus[node] == "down":
self.log(
"Node status for %s is %s but we think it should be %s: %s"
%(node, "up", self.ShouldBeStatus[node], out))
self.ShouldBeStatus[node]="up"
# check the output first - because syslog-ng looses messages
if string.find(out, 'S_NOT_DC') != -1:
# Up and stable
return 2
if string.find(out, 'S_IDLE') != -1:
# Up and stable
return 2
# fall back to syslog-ng and wait
if not idle_watch.look():
# just up
self.debug("Warn: Node %s is unstable: %s" %(node, out))
return 1
# Up and stable
return 2
# Is the node up or is the node down
def StataCM(self, node):
'''Report the status of the cluster manager on a given node'''
if self.test_node_CM(node) > 0:
return 1
return None
# Being up and being stable is not the same question...
def node_stable(self, node):
'''Report the status of the cluster manager on a given node'''
if self.test_node_CM(node) == 2:
return 1
self.log("Warn: Node %s not stable" %(node))
return None
def partition_stable(self, nodes, timeout=None):
watchpats = [ ]
watchpats.append("Current ping state: S_IDLE")
watchpats.append(self["Pat:DC_IDLE"])
self.debug("Waiting for cluster stability...")
if timeout == None:
timeout = self["DeadTime"]
idle_watch = CTS.LogWatcher(self.Env, self["LogFileName"], watchpats, "ClusterStable", timeout)
idle_watch.setwatch()
any_up = 0
for node in self.Env["nodes"]:
# have each node dump its current state
if self.ShouldBeStatus[node] == "up":
self.rsh(node, self["StatusCmd"] %node, 1)
any_up = 1
if any_up == 0:
self.debug("Cluster is inactive")
return 1
ret = idle_watch.look()
while ret:
self.debug(ret)
for node in nodes:
if re.search(node, ret):
return 1
ret = idle_watch.look()
self.debug("Warn: Partition %s not IDLE after %ds" % (repr(nodes), timeout))
return None
def cluster_stable(self, timeout=None, double_check=False):
partitions = self.find_partitions()
for partition in partitions:
if not self.partition_stable(partition, timeout):
return None
if double_check:
# Make sure we are really stable and that all resources,
# including those that depend on transient node attributes,
# are started if they were going to be
time.sleep(5)
for partition in partitions:
if not self.partition_stable(partition, timeout):
return None
return 1
def is_node_dc(self, node, status_line=None):
rc = 0
if not status_line:
status_line = self.rsh(node, self["StatusCmd"]%node, 1)
if not status_line:
rc = 0
elif string.find(status_line, 'S_IDLE') != -1:
rc = 1
elif string.find(status_line, 'S_INTEGRATION') != -1:
rc = 1
elif string.find(status_line, 'S_FINALIZE_JOIN') != -1:
rc = 1
elif string.find(status_line, 'S_POLICY_ENGINE') != -1:
rc = 1
elif string.find(status_line, 'S_TRANSITION_ENGINE') != -1:
rc = 1
return rc
def active_resources(self, node):
# [SM].* {node} matches Started, Slave, Master
# Stopped wont be matched as it wont include {node}
(rc, output) = self.rsh(node, """crm_resource -c""", None)
resources = []
for line in output:
if re.search("^Resource", line):
tmp = AuditResource(self, line)
if tmp.type == "primitive" and tmp.host == node:
resources.append(tmp.id)
return resources
- def ResourceOp(self, resource, op, node, interval=0, app="lrmadmin"):
- '''
- Execute an operation on a resource
- '''
- cmd = self["ExecuteRscOp"] % (app, resource, op, interval)
- (rc, lines) = self.rsh(node, cmd, None)
-
- if rc == 127:
- self.log("Command '%s' failed. Binary not installed?" % cmd)
- for line in lines:
- self.log("Output: "+line)
-
- return rc
-
def ResourceLocation(self, rid):
ResourceNodes = []
for node in self.Env["nodes"]:
if self.ShouldBeStatus[node] == "up":
- dummy = 0
- rc = self.ResourceOp(rid, "monitor", node)
- # Strange error codes from remote_py
- # 65024 == not installed
- # 2048 == 8
- # 1792 == 7
- # 0 == 0
- if rc == 127:
- dummy = 1
- elif rc == 254 or rc == 65024:
- dummy = 1
- #self.debug("%s is not installed on %s: %d" % (rid, node, rc))
+ cmd = self["RscRunning"] % (rid)
+ (rc, lines) = self.rsh(node, cmd, None)
- elif rc == 0 or rc == 2048 or rc == 8:
+ if rc == 127:
+ self.log("Command '%s' failed. Binary not installed?" % cmd)
+ for line in lines:
+ self.log("Output: "+line)
+ elif rc == 0:
ResourceNodes.append(node)
- elif rc == 7 or rc == 1792:
- dummy = 1
- #self.debug("%s is not running on %s: %d" % (rid, node, rc))
-
- else:
- # not active on this node?
- self.log("Unknown rc code for %s on %s: %d" % (rid, node, rc))
-
return ResourceNodes
def find_partitions(self):
ccm_partitions = []
for node in self.Env["nodes"]:
if self.ShouldBeStatus[node] == "up":
partition = self.rsh(node, self["ParitionCmd"], 1)
if not partition:
self.log("no partition details for %s" %node)
elif len(partition) > 2:
partition = partition[:-1]
found=0
for a_partition in ccm_partitions:
if partition == a_partition:
found = 1
if found == 0:
self.debug("Adding partition from %s: %s" %(node, partition))
ccm_partitions.append(partition)
else:
self.debug("Partition '%s' from %s is consistent with existing entries" %(partition, node))
else:
self.log("bad partition details for %s" %node)
else:
self.debug("Node %s is down... skipping" %node)
return ccm_partitions
def HasQuorum(self, node_list):
# If we are auditing a partition, then one side will
# have quorum and the other not.
# So the caller needs to tell us which we are checking
# If no value for node_list is specified... assume all nodes
if not node_list:
node_list = self.Env["nodes"]
for node in node_list:
if self.ShouldBeStatus[node] == "up":
quorum = self.rsh(node, self["QuorumCmd"], 1)
if string.find(quorum, "1") != -1:
return 1
elif string.find(quorum, "0") != -1:
return 0
else:
self.debug("WARN: Unexpected quorum test result from "+ node +":"+ quorum)
return 0
def Components(self):
complist = []
common_ignore = [
"Pending action:",
"(ERROR|error): crm_log_message_adv:",
"(ERROR|error): MSG: No message to dump",
"pending LRM operations at shutdown",
"Lost connection to the CIB service",
"Connection to the CIB terminated...",
"Sending message to CIB service FAILED",
"Action A_RECOVER .* not supported",
"(ERROR|error): stonithd_op_result_ready: not signed on",
"pingd.*(ERROR|error): send_update: Could not send update",
"send_ipc_message: IPC Channel to .* is not connected",
"unconfirmed_actions: Waiting on .* unconfirmed actions",
"cib_native_msgready: Message pending on command channel",
"do_exit: Performing A_EXIT_1 - forcefully exiting the CRMd",
"verify_stopped: Resource .* was active at shutdown. You may ignore this error if it is unmanaged.",
]
stonith_ignore = [
"(ERROR|error): stonithd_signon: ",
"update_failcount: Updating failcount for child_DoFencing",
"(ERROR|error): te_connect_stonith: Sign-in failed: triggered a retry",
"lrmd.*(ERROR|error): cl_get_value: wrong argument (reply)",
"lrmd.*(ERROR|error): is_expected_msg:.* null message",
"lrmd.*(ERROR|error): stonithd_receive_ops_result failed.",
]
stonith_ignore.extend(common_ignore)
ccm_ignore = [
"(ERROR|error): get_channel_token: No reply message - disconnected"
]
ccm_ignore.extend(common_ignore)
ccm = Process(self, "ccm", triggersreboot=self.fastfail, pats = [
"State transition .* S_RECOVERY",
"CCM connection appears to have failed",
"crmd.*Action A_RECOVER .* not supported",
"crmd.*Input I_TERMINATE from do_recover",
"Exiting to recover from CCM connection failure",
"crmd.*do_exit: Could not recover from internal error",
"crmd.*I_ERROR.*(ccm_dispatch|crmd_cib_connection_destroy)",
"crmd.*exited with return code 2.",
"attrd.*exited with return code 1.",
"cib.*exited with return code 2.",
# Not if it was fenced
# "A new node joined the cluster",
# "WARN: determine_online_status: Node .* is unclean",
# "Scheduling Node .* for STONITH",
# "Executing .* fencing operation",
# "tengine_stonith_callback: .*result=0",
# "Processing I_NODE_JOIN:.* cause=C_HA_MESSAGE",
# "State transition S_.* -> S_INTEGRATION.*input=I_NODE_JOIN",
"State transition S_STARTING -> S_PENDING",
], badnews_ignore = ccm_ignore)
cib = Process(self, "cib", triggersreboot=self.fastfail, pats = [
"State transition .* S_RECOVERY",
"Lost connection to the CIB service",
"Connection to the CIB terminated...",
"crmd.*Input I_TERMINATE from do_recover",
"crmd.*I_ERROR.*crmd_cib_connection_destroy",
"crmd.*do_exit: Could not recover from internal error",
"crmd.*exited with return code 2.",
"attrd.*exited with return code 1.",
], badnews_ignore = common_ignore)
lrmd = Process(self, "lrmd", triggersreboot=self.fastfail, pats = [
"State transition .* S_RECOVERY",
"LRM Connection failed",
"crmd.*I_ERROR.*lrm_connection_destroy",
"State transition S_STARTING -> S_PENDING",
"crmd.*Input I_TERMINATE from do_recover",
"crmd.*do_exit: Could not recover from internal error",
"crmd.*exited with return code 2.",
], badnews_ignore = common_ignore)
crmd = Process(self, "crmd", triggersreboot=self.fastfail, pats = [
# "WARN: determine_online_status: Node .* is unclean",
# "Scheduling Node .* for STONITH",
# "Executing .* fencing operation",
# "tengine_stonith_callback: .*result=0",
"State transition .* S_IDLE",
"State transition S_STARTING -> S_PENDING",
], badnews_ignore = common_ignore)
pengine = Process(self, "pengine", triggersreboot=self.fastfail, pats = [
"State transition .* S_RECOVERY",
"crmd.*exited with return code 2.",
"crmd.*Input I_TERMINATE from do_recover",
"crmd.*do_exit: Could not recover from internal error",
"crmd.*CRIT: pe_connection_destroy: Connection to the Policy Engine failed",
"crmd.*I_ERROR.*save_cib_contents",
"crmd.*exited with return code 2.",
], badnews_ignore = common_ignore, dc_only=1)
if self.Env["DoFencing"] == 1 :
complist.append(Process(self, "stoniths", triggersreboot=self.fastfail, dc_pats = [
"crmd.*CRIT: tengine_stonith_connection_destroy: Fencing daemon connection failed",
"Attempting connection to fencing daemon",
"te_connect_stonith: Connected",
], badnews_ignore = stonith_ignore))
if self.fastfail == 0:
ccm.pats.extend([
"attrd .* exited with return code 1",
"(ERROR|error): Respawning client .*attrd",
"cib.* exited with return code 2",
"(ERROR|error): Respawning client .*cib",
"crmd.* exited with return code 2",
"(ERROR|error): Respawning client .*crmd"
])
cib.pats.extend([
"attrd.* exited with return code 1",
"(ERROR|error): Respawning client .*attrd",
"crmd.* exited with return code 2",
"(ERROR|error): Respawning client .*crmd"
])
lrmd.pats.extend([
"crmd.* exited with return code 2",
"(ERROR|error): Respawning client .*crmd"
])
pengine.pats.extend([
"(ERROR|error): Respawning client .*crmd"
])
complist.append(ccm)
complist.append(cib)
complist.append(lrmd)
complist.append(crmd)
complist.append(pengine)
return complist
def NodeUUID(self, node):
lines = self.rsh(node, self["UUIDQueryCmd"], 1)
for line in lines:
self.debug("UUIDLine:"+ line)
m = re.search(r'%s.+\((.+)\)' % node, line)
if m:
return m.group(1)
return ""
def StandbyStatus(self, node):
out=self.rsh(node, self["StandbyQueryCmd"]%node, 1)
if not out:
return "off"
out = out[:-1]
self.debug("Standby result: "+out)
return out
# status == "on" : Enter Standby mode
# status == "off": Enter Active mode
def SetStandbyMode(self, node, status):
current_status = self.StandbyStatus(node)
cmd = self["StandbyCmd"] % (node, status)
ret = self.rsh(node, cmd)
return True
#######################################################################
#
# A little test code...
#
# Which you are advised to completely ignore...
#
#######################################################################
if __name__ == '__main__':
pass
diff --git a/include/crm/lrmd.h b/include/crm/lrmd.h
index 1b6b7005e1..c42da62224 100644
--- a/include/crm/lrmd.h
+++ b/include/crm/lrmd.h
@@ -1,413 +1,415 @@
/*
* Copyright (c) 2012 David Vossel <dvossel@redhat.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#ifndef LRMD__H
#define LRMD__H
typedef struct lrmd_s lrmd_t;
typedef struct lrmd_key_value_s lrmd_key_value_t;
struct lrmd_key_value_t;
/* *INDENT-OFF* */
#define F_LRMD_OPERATION "lrmd_op"
#define F_LRMD_CLIENTNAME "lrmd_clientname"
#define F_LRMD_CLIENTID "lrmd_clientid"
#define F_LRMD_CALLBACK_TOKEN "lrmd_async_id"
#define F_LRMD_CALLID "lrmd_callid"
#define F_LRMD_CANCEL_CALLID "lrmd_cancel_callid"
#define F_LRMD_CALLOPTS "lrmd_callopt"
#define F_LRMD_CALLDATA "lrmd_calldata"
#define F_LRMD_RC "lrmd_rc"
#define F_LRMD_EXEC_RC "lrmd_exec_rc"
#define F_LRMD_OP_STATUS "lrmd_exec_op_status"
#define F_LRMD_TIMEOUT "lrmd_timeout"
#define F_LRMD_CLASS "lrmd_class"
#define F_LRMD_PROVIDER "lrmd_provider"
#define F_LRMD_TYPE "lrmd_type"
#define F_LRMD_ORIGIN "lrmd_origin"
#define F_LRMD_RSC_RUN_TIME "lrmd_run_time"
#define F_LRMD_RSC_RCCHANGE_TIME "lrmd_rcchange_time"
#define F_LRMD_RSC_EXEC_TIME "lrmd_exec_time"
#define F_LRMD_RSC_QUEUE_TIME "lrmd_queue_time"
#define F_LRMD_RSC_ID "lrmd_rsc_id"
#define F_LRMD_RSC_ACTION "lrmd_rsc_action"
#define F_LRMD_RSC_USERDATA_STR "lrmd_rsc_userdata_str"
#define F_LRMD_RSC_OUTPUT "lrmd_rsc_output"
#define F_LRMD_RSC_START_DELAY "lrmd_rsc_start_delay"
#define F_LRMD_RSC_INTERVAL "lrmd_rsc_interval"
#define F_LRMD_RSC_METADATA "lrmd_rsc_metadata_res"
#define F_LRMD_RSC_DELETED "lrmd_rsc_deleted"
#define F_LRMD_RSC "lrmd_rsc"
#define LRMD_OP_RSC_CHK_REG "lrmd_rsc_check_register"
#define LRMD_OP_RSC_REG "lrmd_rsc_register"
#define LRMD_OP_RSC_EXEC "lrmd_rsc_exec"
#define LRMD_OP_RSC_CANCEL "lrmd_rsc_cancel"
#define LRMD_OP_RSC_UNREG "lrmd_rsc_unregister"
#define LRMD_OP_RSC_INFO "lrmd_rsc_info"
#define LRMD_OP_RSC_METADATA "lrmd_rsc_metadata"
#define T_LRMD "lrmd"
#define T_LRMD_REPLY "lrmd_reply"
#define T_LRMD_NOTIFY "lrmd_notify"
/* *INDENT-ON* */
lrmd_t *lrmd_api_new(void);
bool lrmd_dispatch(lrmd_t *lrmd);
void lrmd_api_delete(lrmd_t * lrmd);
lrmd_key_value_t *lrmd_key_value_add(lrmd_key_value_t *kvp,
const char *key,
const char *value);
/* *INDENT-OFF* */
/* Reserved for future use */
enum lrmd_call_options {
lrmd_opt_none = 0x00000000,
/* lrmd_opt_sync_call = 0x00000001, //Not implemented, patches welcome. */
+ /*! Only notify the client originating a exec() the results */
+ lrmd_opt_notify_orig_only = 0x00000002,
};
enum lrmd_errors {
lrmd_ok = 0,
lrmd_pending = -1,
lrmd_err_generic = -2,
lrmd_err_internal = -3,
lrmd_err_not_supported = -4,
lrmd_err_connection = -5,
lrmd_err_missing = -6,
lrmd_err_exists = -7,
lrmd_err_timeout = -8,
lrmd_err_ipc = -9,
lrmd_err_peer = -10,
lrmd_err_unknown_operation = -11,
lrmd_err_unknown_rsc = -12,
lrmd_err_none_available = -13,
lrmd_err_authentication = -14,
lrmd_err_signal = -15,
lrmd_err_exec_failed = -16,
lrmd_err_no_metadata = -17,
lrmd_err_stonith_connection = -18,
lrmd_err_provider_required = -19,
};
enum lrmd_callback_event {
lrmd_event_register,
lrmd_event_unregister,
lrmd_event_exec_complete,
lrmd_event_disconnect,
};
enum lrmd_exec_rc {
PCMK_EXECRA_OK = 0,
PCMK_EXECRA_UNKNOWN_ERROR = 1,
PCMK_EXECRA_INVALID_PARAM = 2,
PCMK_EXECRA_UNIMPLEMENT_FEATURE = 3,
PCMK_EXECRA_INSUFFICIENT_PRIV = 4,
PCMK_EXECRA_NOT_INSTALLED = 5,
PCMK_EXECRA_NOT_CONFIGURED = 6,
PCMK_EXECRA_NOT_RUNNING = 7,
PCMK_EXECRA_RUNNING_MASTER = 8,
PCMK_EXECRA_FAILED_MASTER = 9,
/* For status command only */
PCMK_EXECRA_STATUS_UNKNOWN = 14,
};
/* *INDENT-ON* */
typedef struct lrmd_event_data_s {
/*! Type of event, register, unregister, call_completed... */
enum lrmd_callback_event type;
/*! The resource this event occurred on. */
const char *rsc_id;
/*! The action performed, start, stop, monitor... */
const char *op_type;
/*! The userdata string given do exec() api function */
const char *user_data;
/*! The client api call id associated with this event */
int call_id;
/*! The operation's timeout period in ms. */
int timeout;
/*! The operation's recurring interval in ms. */
int interval;
/*! The operation's start delay value in ms. */
int start_delay;
/*! This operation that just completed is on a deleted rsc. */
int rsc_deleted;
/*! The executed ra return code */
enum lrmd_exec_rc rc;
/*! The lrmd status returned for exec_complete events */
int op_status;
/*! stdout from resource agent operation */
const char *output;
/*! Timestamp of when op ran */
unsigned int t_run;
/*! Timestamp of last rc change */
unsigned int t_rcchange;
/*! Time in length op took to execute */
unsigned int exec_time;
/*! Time in length spent in queue */
unsigned int queue_time;
/* This is a GHashTable containing the
* parameters given to the operation */
void *params;
} lrmd_event_data_t;
lrmd_event_data_t *lrmd_copy_event(lrmd_event_data_t *event);
void lrmd_free_event(lrmd_event_data_t *event);
typedef struct lrmd_rsc_info_s {
char *id;
char *type;
char *class;
char *provider;
} lrmd_rsc_info_t;
lrmd_rsc_info_t *lrmd_copy_rsc_info(lrmd_rsc_info_t *rsc_info);
void lrmd_free_rsc_info(lrmd_rsc_info_t *rsc_info);
typedef void (*lrmd_event_callback)(lrmd_event_data_t *event);
typedef struct lrmd_list_s {
const char *val;
struct lrmd_list_s *next;
} lrmd_list_t;
void lrmd_list_freeall(lrmd_list_t *head);
typedef struct lrmd_api_operations_s
{
/*!
* \brief Connect from the lrmd.
*
* \retval 0, success
* \retval negative error code on failure
*/
int (*connect) (lrmd_t *lrmd, const char *client_name, int *fd);
/*!
* \brief Disconnect from the lrmd.
*
* \retval 0, success
* \retval negative error code on failure
*/
int (*disconnect)(lrmd_t *lrmd);
/*!
* \brief Register a resource with the lrmd.
*
* \note Synchronous, guaranteed to occur in daemon before function returns.
*
* \retval 0, success
* \retval negative error code on failure
*/
int (*register_rsc) (lrmd_t *lrmd,
const char *rsc_id,
const char *class,
const char *provider,
const char *agent,
enum lrmd_call_options options);
/*!
* \brief Retrieve registration info for a rsc
*
* \retval info on success
* \retval NULL on failure
*/
lrmd_rsc_info_t *(*get_rsc_info) (lrmd_t *lrmd,
const char *rsc_id,
enum lrmd_call_options options);
/*!
* \brief Unregister a resource from the lrmd.
*
* \note All pending and recurring operations will be cancelled
* automatically.
*
* \note Synchronous, guaranteed to occur in daemon before function returns.
*
* \retval 0, success
* \retval -1, success, but operations are currently executing on the rsc which will
* return once they are completed.
* \retval negative error code on failure
*
*/
int (*unregister_rsc) (lrmd_t *lrmd,
const char *rsc_id,
enum lrmd_call_options options);
/*!
* \brief Sets the callback to receive lrmd events on.
*/
void (*set_callback) (lrmd_t *lrmd,
lrmd_event_callback callback);
/*!
* \brief Issue a command on a resource
*
* \note Asynchronous, command is queued in daemon on function return, but
* execution of command is not synced.
*
* \note Operations on individual resources are guaranteed to occur
* in the order the client api calls them in.
*
* \note Operations between different resources are not guaranteed
* to occur in any specific order in relation to one another
* regardless of what order the client api is called in.
* \retval call_id to track async event result on success
* \retval negative error code on failure
*/
int (*exec)(lrmd_t *lrmd,
const char *rsc_id,
const char *action,
const char *userdata, /* userdata string given back in event notification */
int interval, /* ms */
int timeout, /* ms */
int start_delay, /* ms */
enum lrmd_call_options options,
lrmd_key_value_t *params); /* ownership of params is given up to api here */
/*!
* \brief Cancel a recurring command.
*
* \note Synchronous, guaranteed to occur in daemon before function returns.
*
* \note The cancel is completed async from this call.
* We can be guaranteed the cancel has completed once
* the callback receives an exec_complete event with
* the lrmd_op_status signifying that the operation is
* cancelled.
* \note For each resource, cancel operations and exec operations
* are processed in the order they are received.
* It is safe to assume that for a single resource, a cancel
* will occur in the lrmd before an exec if the client's cancel
* api call occurs before the exec api call.
*
* It is not however safe to assume any operation on one resource will
* occur before an operation on another resource regardless of
* the order the client api is called in.
*
* \retval 0, cancel command sent.
* \retval negative error code on failure
*/
int (*cancel)(lrmd_t *lrmd,
const char *rsc_id,
const char *action,
int interval);
/*!
* \brief Get the metadata documentation for a resource.
*
* \note Value is returned in output. Output must be freed when set
*
* \retval lrmd_ok success
* \retval negative error code on failure
*/
int (*get_metadata) (lrmd_t *lrmd,
const char *class,
const char *provider,
const char *agent,
char **output,
enum lrmd_call_options options);
/*!
* \brief Retrieve a list of installed resource agents.
*
* \note if class is not provided, all known agents will be returned
* \note list must be freed using lrmd_list_freeall()
*
* \retval num items in list on success
* \retval negative error code on failure
*/
int (*list_agents)(lrmd_t *lrmd, lrmd_list_t **agents, const char *class, const char *provider);
/*!
* \brief Retrieve a list of resource agent providers
*
* \note When the agent is provided, only the agent's provider will be returned
* \note When no agent is supplied, all providers will be returned.
* \note List must be freed using lrmd_list_freeall()
*
* \retval num items in list on success
* \retval negative error code on failure
*/
int (*list_ocf_providers)(lrmd_t *lrmd,
const char *agent,
lrmd_list_t **providers);
} lrmd_api_operations_t;
struct lrmd_s {
lrmd_api_operations_t *cmds;
void *private;
};
static inline const char *
lrmd_event_rc2str(enum lrmd_exec_rc rc)
{
switch(rc) {
case PCMK_EXECRA_OK:
return "ok";
case PCMK_EXECRA_UNKNOWN_ERROR:
return "unknown error";
case PCMK_EXECRA_INVALID_PARAM:
return "invalid parameter";
case PCMK_EXECRA_UNIMPLEMENT_FEATURE:
return "unimplemented feature";
case PCMK_EXECRA_INSUFFICIENT_PRIV:
return "insufficient privileges";
case PCMK_EXECRA_NOT_INSTALLED:
return "not installed";
case PCMK_EXECRA_NOT_CONFIGURED:
return "not configured";
case PCMK_EXECRA_NOT_RUNNING:
return "not running";
case PCMK_EXECRA_RUNNING_MASTER:
return "master";
case PCMK_EXECRA_FAILED_MASTER:
return "master (failed)";
case PCMK_EXECRA_STATUS_UNKNOWN:
return "status: unknown";
default:
break;
}
return "<unknown>";
}
static inline const char *
lrmd_event_type2str(enum lrmd_callback_event type)
{
switch (type) {
case lrmd_event_register:
return "register";
case lrmd_event_unregister:
return "unregister";
case lrmd_event_exec_complete:
return "exec_complete";
case lrmd_event_disconnect:
return "disconnect";
}
return "unknown";
}
#endif
diff --git a/lrmd/Makefile.am b/lrmd/Makefile.am
index 0be956e459..a7b1a81264 100644
--- a/lrmd/Makefile.am
+++ b/lrmd/Makefile.am
@@ -1,35 +1,40 @@
# Copyright (c) 2012 David Vossel <dvossel@redhat.com>
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#
MAINTAINERCLEANFILES = Makefile.in
lrmdlibdir = $(CRM_DAEMON_DIR)
+lrmdctsdir = $(datadir)/$(PACKAGE)/tests/cts
+
## binary progs
-lrmdlib_PROGRAMS = lrmd
-noinst_PROGRAMS = lrmd_test
+lrmdlib_PROGRAMS = lrmd
+lrmdcts_PROGRAMS = lrmd_test
lrmd_SOURCES = main.c lrmd.c
lrmd_LDADD = $(top_builddir)/lib/common/libcrmcommon.la \
$(top_builddir)/lib/services/libcrmservice.la \
$(top_builddir)/lib/lrmd/liblrmd.la \
$(top_builddir)/lib/fencing/libstonithd.la
lrmd_test_SOURCES = test.c
lrmd_test_LDADD = $(top_builddir)/lib/common/libcrmcommon.la \
$(top_builddir)/lib/lrmd/liblrmd.la \
- $(top_builddir)/lib/services/libcrmservice.la
+ $(top_builddir)/lib/cib/libcib.la \
+ $(top_builddir)/lib/services/libcrmservice.la \
+ $(top_builddir)/lib/pengine/libpe_status.la \
+ $(top_builddir)/pengine/libpengine.la
diff --git a/lrmd/lrmd.c b/lrmd/lrmd.c
index d336ece2de..b66a4e857d 100644
--- a/lrmd/lrmd.c
+++ b/lrmd/lrmd.c
@@ -1,968 +1,983 @@
/*
* Copyright (c) 2012 David Vossel <dvossel@redhat.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include <crm_internal.h>
#include <glib.h>
#include <unistd.h>
#include <crm/crm.h>
#include <crm/services.h>
#include <crm/common/mainloop.h>
#include <crm/common/ipc.h>
#include <crm/msg_xml.h>
#include <lrmd_private.h>
#ifdef HAVE_SYS_TIMEB_H
#include <sys/timeb.h>
#endif
GHashTable *rsc_list = NULL;
GHashTable *client_list = NULL;
typedef struct lrmd_cmd_s {
int timeout;
int interval;
int start_delay;
int call_id;
int exec_rc;
int lrmd_op_status;
/* Timer ids, must be removed on cmd destruction. */
int delay_id;
int stonith_recurring_id;
int rsc_deleted;
+ char *only_notify_client;
char *origin;
char *rsc_id;
char *action;
char *output;
char *userdata_str;
#ifdef HAVE_SYS_TIMEB_H
/* Timestamp of when op ran */
struct timeb t_run;
/* Timestamp of when op was queued */
struct timeb t_queue;
/* Timestamp of last rc change */
struct timeb t_rcchange;
#endif
GHashTable *params;
} lrmd_cmd_t;
static void cmd_finalize(lrmd_cmd_t *cmd, lrmd_rsc_t *rsc);
static gboolean lrmd_rsc_dispatch(gpointer user_data);
static lrmd_rsc_t *
build_rsc_from_xml(xmlNode *msg)
{
xmlNode *rsc_xml = get_xpath_object("//"F_LRMD_RSC, msg, LOG_ERR);
lrmd_rsc_t *rsc = NULL;
rsc = calloc(1, sizeof(lrmd_rsc_t));
rsc->rsc_id = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ID);
rsc->class = crm_element_value_copy(rsc_xml, F_LRMD_CLASS);
rsc->provider = crm_element_value_copy(rsc_xml, F_LRMD_PROVIDER);
rsc->type = crm_element_value_copy(rsc_xml, F_LRMD_TYPE);
rsc->work = mainloop_add_trigger(G_PRIORITY_HIGH, lrmd_rsc_dispatch, rsc);
return rsc;
}
static lrmd_cmd_t *
-create_lrmd_cmd(xmlNode *msg)
+create_lrmd_cmd(xmlNode *msg, lrmd_client_t *client)
{
+ int call_options = 0;
xmlNode *rsc_xml = get_xpath_object("//"F_LRMD_RSC, msg, LOG_ERR);
lrmd_cmd_t *cmd = NULL;
cmd = calloc(1, sizeof(lrmd_cmd_t));
+ crm_element_value_int(msg, F_LRMD_CALLOPTS, &call_options);
+
+ if (call_options & lrmd_opt_notify_orig_only) {
+ cmd->only_notify_client = crm_strdup(client->id);
+ }
+
crm_element_value_int(msg, F_LRMD_CALLID, &cmd->call_id);
crm_element_value_int(rsc_xml, F_LRMD_RSC_INTERVAL, &cmd->interval);
crm_element_value_int(rsc_xml, F_LRMD_TIMEOUT, &cmd->timeout);
crm_element_value_int(rsc_xml, F_LRMD_RSC_START_DELAY, &cmd->start_delay);
cmd->origin = crm_element_value_copy(rsc_xml, F_LRMD_ORIGIN);
cmd->action = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ACTION);
cmd->userdata_str = crm_element_value_copy(rsc_xml, F_LRMD_RSC_USERDATA_STR);
cmd->rsc_id = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ID);
cmd->params = xml2list(rsc_xml);
return cmd;
}
static void
free_lrmd_cmd(lrmd_cmd_t *cmd)
{
if (cmd->stonith_recurring_id) {
g_source_remove(cmd->stonith_recurring_id);
}
if (cmd->delay_id) {
g_source_remove(cmd->delay_id);
}
if (cmd->params) {
g_hash_table_destroy(cmd->params);
}
free(cmd->origin);
free(cmd->action);
free(cmd->userdata_str);
free(cmd->rsc_id);
free(cmd->output);
+ free(cmd->only_notify_client);
free(cmd);
}
static gboolean
stonith_recurring_op_helper(gpointer data)
{
lrmd_cmd_t *cmd = data;
lrmd_rsc_t *rsc = NULL;
cmd->stonith_recurring_id = 0;
rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL;
CRM_ASSERT(rsc != NULL);
/* take it out of recurring_ops list, and put it in the pending ops
* to be executed */
rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd);
rsc->pending_ops = g_list_append(rsc->pending_ops, cmd);
#ifdef HAVE_SYS_TIMEB_H
ftime(&cmd->t_queue);
#endif
mainloop_set_trigger(rsc->work);
return FALSE;
}
static gboolean
start_delay_helper(gpointer data)
{
lrmd_cmd_t *cmd = data;
lrmd_rsc_t *rsc = NULL;
cmd->delay_id = 0;
rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL;
if (rsc) {
mainloop_set_trigger(rsc->work);
}
return FALSE;
}
static void
schedule_lrmd_cmd(lrmd_rsc_t *rsc, lrmd_cmd_t *cmd)
{
CRM_CHECK(cmd != NULL, return);
CRM_CHECK(rsc != NULL, return);
crm_trace("Scheduling %s on %s", cmd->action, rsc->rsc_id);
rsc->pending_ops = g_list_append(rsc->pending_ops, cmd);
#ifdef HAVE_SYS_TIMEB_H
ftime(&cmd->t_queue);
#endif
mainloop_set_trigger(rsc->work);
if (cmd->start_delay) {
cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd);
}
}
static void
send_reply(lrmd_client_t *client, int rc, int call_id)
{
int send_rc = 0;
xmlNode *reply = NULL;
reply = create_xml_node(NULL, T_LRMD_REPLY);
crm_xml_add(reply, F_LRMD_ORIGIN, __FUNCTION__);
crm_xml_add_int(reply, F_LRMD_RC, rc);
crm_xml_add_int(reply, F_LRMD_CALLID, call_id);
send_rc = crm_ipcs_send(client->channel, reply, FALSE);
free_xml(reply);
if (send_rc < 0) {
crm_warn("LRMD reply to %s failed: %d", client->name, send_rc);
}
}
static void
send_client_notify(gpointer key, gpointer value, gpointer user_data)
{
xmlNode *update_msg = user_data;
lrmd_client_t *client = value;
if (client == NULL) {
crm_err("Asked to send event to NULL client");
return;
} else if (client->channel == NULL) {
crm_trace("Asked to send event to disconnected client");
return;
} else if (client->name == NULL) {
crm_trace("Asked to send event to client with no name");
return;
}
if (crm_ipcs_send(client->channel, update_msg, TRUE) <= 0) {
crm_warn("Notification of client %s/%s failed",
client->name, client->id);
}
}
#ifdef HAVE_SYS_TIMEB_H
static int
time_diff_ms(struct timeb *now, struct timeb *old)
{
int sec = difftime(now->time, old->time);
int ms = now->millitm - old->millitm;
if (old->time == 0) {
return 0;
}
return (sec * 1000) + ms;
}
#endif
static void
send_cmd_complete_notify(lrmd_cmd_t *cmd)
{
#ifdef HAVE_SYS_TIMEB_H
struct timeb now = { 0, };
#endif
xmlNode *notify = NULL;
notify = create_xml_node(NULL, T_LRMD_NOTIFY);
crm_xml_add(notify, F_LRMD_ORIGIN, __FUNCTION__);
crm_xml_add_int(notify, F_LRMD_TIMEOUT, cmd->timeout);
crm_xml_add_int(notify, F_LRMD_RSC_INTERVAL, cmd->interval);
crm_xml_add_int(notify, F_LRMD_RSC_START_DELAY, cmd->start_delay);
crm_xml_add_int(notify, F_LRMD_EXEC_RC, cmd->exec_rc);
crm_xml_add_int(notify, F_LRMD_OP_STATUS, cmd->lrmd_op_status);
crm_xml_add_int(notify, F_LRMD_CALLID, cmd->call_id);
crm_xml_add_int(notify, F_LRMD_RSC_DELETED, cmd->rsc_deleted);
#ifdef HAVE_SYS_TIMEB_H
ftime(&now);
crm_xml_add_int(notify, F_LRMD_RSC_RUN_TIME, cmd->t_run.time);
crm_xml_add_int(notify, F_LRMD_RSC_RCCHANGE_TIME, cmd->t_rcchange.time);
crm_xml_add_int(notify, F_LRMD_RSC_EXEC_TIME, time_diff_ms(&now, &cmd->t_run));
crm_xml_add_int(notify, F_LRMD_RSC_QUEUE_TIME, time_diff_ms(&cmd->t_run, &cmd->t_queue));
#endif
crm_xml_add(notify, F_LRMD_OPERATION, LRMD_OP_RSC_EXEC);
crm_xml_add(notify, F_LRMD_RSC_ID, cmd->rsc_id);
crm_xml_add(notify, F_LRMD_RSC_ACTION, cmd->action);
crm_xml_add(notify, F_LRMD_RSC_USERDATA_STR, cmd->userdata_str);
crm_xml_add(notify, F_LRMD_RSC_OUTPUT, cmd->output);
if (cmd->params) {
char *key = NULL;
char *value = NULL;
GHashTableIter iter;
xmlNode *args = create_xml_node(notify, XML_TAG_ATTRS);
g_hash_table_iter_init(&iter, cmd->params);
while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
hash2field((gpointer) key, (gpointer) value, args);
}
}
- g_hash_table_foreach(client_list, send_client_notify, notify);
+ if (cmd->only_notify_client) {
+ lrmd_client_t *client = g_hash_table_lookup(client_list, cmd->only_notify_client);
+
+ if (client) {
+ send_client_notify(client->id, client, notify);
+ }
+ } else {
+ g_hash_table_foreach(client_list, send_client_notify, notify);
+ }
free_xml(notify);
}
static void
send_generic_notify(int rc, xmlNode *request)
{
int call_id = 0;
xmlNode *notify = NULL;
xmlNode *rsc_xml = get_xpath_object("//"F_LRMD_RSC, request, LOG_ERR);
const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
const char *op = crm_element_value(request, F_LRMD_OPERATION);
crm_element_value_int(request, F_LRMD_CALLID, &call_id);
notify = create_xml_node(NULL, T_LRMD_NOTIFY);
crm_xml_add(notify, F_LRMD_ORIGIN, __FUNCTION__);
crm_xml_add_int(notify, F_LRMD_RC, rc);
crm_xml_add_int(notify, F_LRMD_CALLID, call_id);
crm_xml_add(notify, F_LRMD_OPERATION, op);
crm_xml_add(notify, F_LRMD_RSC_ID, rsc_id);
g_hash_table_foreach(client_list, send_client_notify, notify);
free_xml(notify);
}
static void
cmd_finalize(lrmd_cmd_t *cmd, lrmd_rsc_t *rsc)
{
crm_trace("Resource operation rsc:%s action:%s completed", cmd->rsc_id, cmd->action);
if (rsc && (rsc->active == cmd)) {
rsc->active = NULL;
mainloop_set_trigger(rsc->work);
}
if (!rsc) {
cmd->rsc_deleted = 1;
}
send_cmd_complete_notify(cmd);
if (cmd->interval && (cmd->lrmd_op_status == PCMK_LRM_OP_CANCELLED)) {
if (rsc) {
rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd);
rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd);
}
free_lrmd_cmd(cmd);
} else if (cmd->interval == 0) {
if (rsc) {
rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd);
}
free_lrmd_cmd(cmd);
} else {
/* Clear all the values pertaining just to the last iteration of a recurring op. */
cmd->lrmd_op_status = 0;
memset(&cmd->t_run, 0, sizeof(cmd->t_run));
memset(&cmd->t_queue, 0, sizeof(cmd->t_queue));
free(cmd->output);
cmd->output = NULL;
}
}
static int
lsb2uniform_rc(const char *action, int rc)
{
if (rc < 0) {
return PCMK_EXECRA_UNKNOWN_ERROR;
}
/* status has different return codes that everything else. */
if (!safe_str_eq(action, "status") && !safe_str_eq(action, "monitor")) {
if (rc > PCMK_LSB_NOT_RUNNING) {
return PCMK_EXECRA_UNKNOWN_ERROR;
}
return rc;
}
switch (rc) {
case PCMK_LSB_STATUS_OK:
return PCMK_EXECRA_OK;
case PCMK_LSB_STATUS_NOT_INSTALLED:
return PCMK_EXECRA_NOT_INSTALLED;
case PCMK_LSB_STATUS_VAR_PID:
case PCMK_LSB_STATUS_VAR_LOCK:
case PCMK_LSB_STATUS_NOT_RUNNING:
return PCMK_EXECRA_NOT_RUNNING;
default:
return PCMK_EXECRA_UNKNOWN_ERROR;
}
return PCMK_EXECRA_UNKNOWN_ERROR;
}
static int
ocf2uniform_rc(int rc)
{
if (rc < 0 || rc > PCMK_OCF_FAILED_MASTER) {
return PCMK_EXECRA_UNKNOWN_ERROR;
}
return rc;
}
static int
stonith2uniform_rc(const char *action, int rc)
{
if (rc == st_err_unknown_device) {
if (safe_str_eq(action, "stop")) {
rc = PCMK_EXECRA_OK;
} else if (safe_str_eq(action, "start")) {
rc = PCMK_EXECRA_NOT_INSTALLED;
} else {
rc = PCMK_EXECRA_NOT_RUNNING;
}
} else if (rc != 0) {
rc = PCMK_EXECRA_UNKNOWN_ERROR;
}
return rc;
}
static int
get_uniform_rc(const char *standard, const char *action, int rc)
{
if (safe_str_eq(standard, "ocf")) {
return ocf2uniform_rc(rc);
} else if (safe_str_eq(standard, "stonith")) {
return stonith2uniform_rc(action, rc);
} else {
return lsb2uniform_rc(action, rc);
}
}
static void
action_complete(svc_action_t *action)
{
lrmd_rsc_t *rsc;
lrmd_cmd_t *cmd = action->cb_data;
if (!cmd) {
crm_err("LRMD action (%s) completed does not match any known operations.", action->id);
return;
}
#ifdef HAVE_SYS_TIMEB_H
if (cmd->exec_rc != action->rc) {
ftime(&cmd->t_rcchange);
}
#endif
cmd->exec_rc = get_uniform_rc(action->standard, cmd->action, action->rc);
cmd->lrmd_op_status = action->status;
rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL;
if (action->stdout_data) {
cmd->output = crm_strdup(action->stdout_data);
}
cmd_finalize(cmd, rsc);
}
static int
lrmd_rsc_execute_stonith(lrmd_rsc_t *rsc, lrmd_cmd_t *cmd)
{
int rc = 0;
stonith_t *stonith_api = get_stonith_connection();
if (!stonith_api) {
cmd->exec_rc = get_uniform_rc("stonith", cmd->action, st_err_connection);
cmd->lrmd_op_status = PCMK_LRM_OP_ERROR;
cmd_finalize(cmd, rsc);
return lrmd_err_stonith_connection;
}
if (safe_str_eq(cmd->action, "start")) {
char *key = NULL;
char *value = NULL;
stonith_key_value_t *device_params = NULL;
if (cmd->params) {
GHashTableIter iter;
g_hash_table_iter_init(&iter, cmd->params);
while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
device_params = stonith_key_value_add(device_params, key, value);
}
}
rc = stonith_api->cmds->register_device(stonith_api,
st_opt_sync_call,
cmd->rsc_id,
rsc->provider,
rsc->type,
device_params);
stonith_key_value_freeall(device_params, 1, 1);
if (rc == 0) {
rc = stonith_api->cmds->call(stonith_api,
st_opt_sync_call,
cmd->rsc_id,
"monitor",
NULL,
cmd->timeout);
}
} else if (safe_str_eq(cmd->action, "stop")) {
rc = stonith_api->cmds->remove_device(stonith_api, st_opt_sync_call, cmd->rsc_id);
} else if (safe_str_eq(cmd->action, "monitor")) {
rc = stonith_api->cmds->call(stonith_api,
st_opt_sync_call,
cmd->rsc_id,
cmd->action,
NULL,
cmd->timeout);
}
cmd->exec_rc = get_uniform_rc("stonith", cmd->action, rc);
/* Attempt to map return codes to op status if possible */
if (rc) {
switch (rc) {
case st_err_not_supported:
cmd->lrmd_op_status = PCMK_LRM_OP_NOTSUPPORTED;
break;
case st_err_timeout:
cmd->lrmd_op_status = PCMK_LRM_OP_TIMEOUT;
break;
default:
cmd->lrmd_op_status = PCMK_LRM_OP_ERROR;
}
} else {
cmd->lrmd_op_status = PCMK_LRM_OP_DONE;
}
if (cmd->interval > 0) {
rsc->recurring_ops = g_list_append(rsc->recurring_ops, cmd);
cmd->stonith_recurring_id = g_timeout_add(cmd->interval, stonith_recurring_op_helper, cmd);
}
cmd_finalize(cmd, rsc);
return rc;
}
static const char *
normalize_action_name(lrmd_rsc_t *rsc, const char *action)
{
if (safe_str_eq(action, "monitor") &&
(safe_str_eq(rsc->class, "lsb") ||
safe_str_eq(rsc->class, "service") ||
safe_str_eq(rsc->class, "systemd"))) {
return "status";
}
return action;
}
static void
dup_attr(gpointer key, gpointer value, gpointer user_data)
{
g_hash_table_replace(user_data, crm_strdup(key), crm_strdup(value));
}
static int
lrmd_rsc_execute_service_lib(lrmd_rsc_t *rsc, lrmd_cmd_t *cmd)
{
svc_action_t *action = NULL;
GHashTable *params_copy = NULL;
crm_trace("Creating action, resource:%s action:%s class:%s provider:%s agent:%s",
rsc->rsc_id,
cmd->action,
rsc->class,
rsc->provider,
rsc->type);
if (cmd->params) {
params_copy = g_hash_table_new_full(crm_str_hash,
g_str_equal,
g_hash_destroy_str,
g_hash_destroy_str);
if (params_copy != NULL) {
g_hash_table_foreach(cmd->params, dup_attr, params_copy);
}
}
action = resources_action_create(rsc->rsc_id,
rsc->class,
rsc->provider,
rsc->type,
normalize_action_name(rsc, cmd->action),
cmd->interval,
cmd->timeout,
params_copy);
if (!action) {
crm_err("Failed to create action, action:%s on resource %s", cmd->action, rsc->rsc_id);
cmd->lrmd_op_status = PCMK_LRM_OP_ERROR;
goto exec_done;
}
action->cb_data = cmd;
if (!services_action_async(action, action_complete)) {
services_action_free(action);
action = NULL;
cmd->lrmd_op_status = PCMK_LRM_OP_ERROR;
goto exec_done;
}
if (cmd->interval) {
rsc->recurring_ops = g_list_append(rsc->recurring_ops, cmd);
}
/* The cmd will be finalized by the action_complete callback after
* the service library is done with it */
rsc->active = cmd; /* only one op at a time for a rsc */
cmd = NULL;
exec_done:
if (cmd) {
cmd_finalize(cmd, rsc);
}
return TRUE;
}
static gboolean
lrmd_rsc_execute(lrmd_rsc_t *rsc)
{
lrmd_cmd_t *cmd = NULL;
CRM_CHECK(rsc != NULL, return FALSE);
if (rsc->active) {
crm_trace("%s is still active", rsc->rsc_id);
return TRUE;
}
if (rsc->pending_ops) {
GList *first = rsc->pending_ops;
cmd = first->data;
if (cmd->delay_id) {
crm_trace("Command %s %s was asked to run too early, waiting for start_delay timeout of %dms",
cmd->rsc_id, cmd->action, cmd->start_delay);
return TRUE;
}
rsc->pending_ops = g_list_remove_link(rsc->pending_ops, first);
g_list_free_1(first);
#ifdef HAVE_SYS_TIMEB_H
ftime(&cmd->t_run);
}
#endif
if (!cmd) {
crm_trace("Nothing further to do for %s", rsc->rsc_id);
return TRUE;
}
if (safe_str_eq(rsc->class, "stonith")) {
lrmd_rsc_execute_stonith(rsc, cmd);
} else {
lrmd_rsc_execute_service_lib(rsc, cmd);
}
return TRUE;
}
static gboolean
lrmd_rsc_dispatch(gpointer user_data)
{
return lrmd_rsc_execute(user_data);
}
void
free_rsc(gpointer data)
{
GListPtr gIter = NULL;
lrmd_rsc_t *rsc = data;
int is_stonith = safe_str_eq(rsc->class, "stonith");
for (gIter = rsc->pending_ops; gIter != NULL; gIter = gIter->next) {
lrmd_cmd_t *cmd = gIter->data;
/* command was never executed */
cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED;
cmd_finalize(cmd, NULL);
}
/* frees list, but not list elements. */
g_list_free(rsc->pending_ops);
for (gIter = rsc->recurring_ops; gIter != NULL; gIter = gIter->next) {
lrmd_cmd_t *cmd = gIter->data;
if (is_stonith) {
cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED;
cmd_finalize(cmd, NULL);
} else {
/* This command is already handed off to service library,
* let service library cancel it and tell us via the callback
* when it is cancelled. The rsc can be safely destroyed
* even if we are waiting for the cancel result */
services_action_cancel(rsc->rsc_id, cmd->action, cmd->interval);
}
}
/* frees list, but not list elements. */
g_list_free(rsc->recurring_ops);
free(rsc->rsc_id);
free(rsc->class);
free(rsc->provider);
free(rsc->type);
mainloop_destroy_trigger(rsc->work);
free(rsc);
}
static int
process_lrmd_signon(lrmd_client_t *client, xmlNode *request)
{
xmlNode *reply = create_xml_node(NULL, "reply");
crm_xml_add(reply, F_LRMD_OPERATION, CRM_OP_REGISTER);
crm_xml_add(reply, F_LRMD_CLIENTID, client->id);
crm_ipcs_send(client->channel, reply, FALSE);
free_xml(reply);
return lrmd_ok;
}
static int
process_lrmd_rsc_register(lrmd_client_t *client, xmlNode *request)
{
int rc = lrmd_ok;
lrmd_rsc_t *rsc = build_rsc_from_xml(request);
lrmd_rsc_t *dup = g_hash_table_lookup(rsc_list, rsc->rsc_id);
if (dup &&
safe_str_eq(rsc->class, dup->class) &&
safe_str_eq(rsc->provider, dup->provider) &&
safe_str_eq(rsc->type, dup->type)) {
crm_warn("Can't add, RSC '%s' already present in the rsc list (%d active resources)",
rsc->rsc_id, g_hash_table_size(rsc_list));
free_rsc(rsc);
return rc;
}
g_hash_table_replace(rsc_list, rsc->rsc_id, rsc);
crm_info("Added '%s' to the rsc list (%d active resources)",
rsc->rsc_id, g_hash_table_size(rsc_list));
return rc;
}
static void
process_lrmd_get_rsc_info(lrmd_client_t *client, xmlNode *request)
{
int rc = lrmd_ok;
int send_rc = 0;
int call_id = 0;
xmlNode *rsc_xml = get_xpath_object("//"F_LRMD_RSC, request, LOG_ERR);
const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
xmlNode *reply = NULL;
lrmd_rsc_t *rsc = NULL;
crm_element_value_int(request, F_LRMD_CALLID, &call_id);
if (!rsc_id) {
rc = lrmd_err_unknown_rsc;
goto get_rsc_done;
}
if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) {
crm_info("Resource '%s' not found (%d active resources)",
rsc_id, g_hash_table_size(rsc_list));
rc = lrmd_err_unknown_rsc;
goto get_rsc_done;
}
get_rsc_done:
reply = create_xml_node(NULL, T_LRMD_REPLY);
crm_xml_add(reply, F_LRMD_ORIGIN, __FUNCTION__);
crm_xml_add_int(reply, F_LRMD_RC, rc);
crm_xml_add_int(reply, F_LRMD_CALLID, call_id);
if (rsc) {
crm_xml_add(reply, F_LRMD_RSC_ID, rsc->rsc_id);
crm_xml_add(reply, F_LRMD_CLASS, rsc->class);
crm_xml_add(reply, F_LRMD_PROVIDER, rsc->provider);
crm_xml_add(reply, F_LRMD_TYPE, rsc->type);
}
send_rc = crm_ipcs_send(client->channel, reply, FALSE);
if (send_rc < 0) {
crm_warn("LRMD reply to %s failed: %d", client->name, send_rc);
}
free_xml(reply);
}
static int
process_lrmd_rsc_unregister(lrmd_client_t *client, xmlNode *request)
{
int rc = lrmd_ok;
lrmd_rsc_t *rsc = NULL;
xmlNode *rsc_xml = get_xpath_object("//"F_LRMD_RSC, request, LOG_ERR);
const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
if (!rsc_id) {
return lrmd_err_unknown_rsc;
}
if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) {
crm_info("Resource '%s' not found (%d active resources)",
rsc_id, g_hash_table_size(rsc_list));
return lrmd_err_unknown_rsc;
}
if (rsc->active) {
/* let the caller know there are still active ops on this rsc to watch for */
rc = lrmd_pending;
}
g_hash_table_remove(rsc_list, rsc_id);
return rc;
}
static int
process_lrmd_rsc_exec(lrmd_client_t *client, xmlNode *request)
{
lrmd_rsc_t *rsc = NULL;
lrmd_cmd_t *cmd = NULL;
xmlNode *rsc_xml = get_xpath_object("//"F_LRMD_RSC, request, LOG_ERR);
const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
if (!rsc_id) {
return lrmd_err_missing;
}
if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) {
return lrmd_err_unknown_rsc;
}
- cmd = create_lrmd_cmd(request);
+ cmd = create_lrmd_cmd(request, client);
schedule_lrmd_cmd(rsc, cmd);
return cmd->call_id;
}
static int
cancel_op(const char *rsc_id, const char *action, int interval)
{
GListPtr gIter = NULL;
lrmd_rsc_t *rsc = g_hash_table_lookup(rsc_list, rsc_id);
/* How to cancel an action.
* 1. Check pending ops list, if it hasn't been handed off
* to the service library or stonith recurring list remove
* it there and that will stop it.
* 2. If it isn't in the pending ops list, then its either a
* recurring op in the stonith recurring list, or the service
* library's recurring list. Stop it there
* 3. If not found in any lists, then this operation has either
* been executed already and is not a recurring operation, or
* never existed.
*/
if (!rsc) {
return lrmd_err_unknown_rsc;
}
for (gIter = rsc->pending_ops; gIter != NULL; gIter = gIter->next) {
lrmd_cmd_t *cmd = gIter->data;
if (safe_str_eq(cmd->action, action) && cmd->interval == interval) {
cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED;
cmd_finalize(cmd, rsc);
return lrmd_ok;
}
}
if (safe_str_eq(rsc->class, "stonith")) {
/* The service library does not handle stonith operations.
* We have to handle recurring stonith opereations ourselves. */
for (gIter = rsc->recurring_ops; gIter != NULL; gIter = gIter->next) {
lrmd_cmd_t *cmd = gIter->data;
if (safe_str_eq(cmd->action, action) && cmd->interval == interval) {
cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED;
cmd_finalize(cmd, rsc);
return lrmd_ok;
}
}
} else if (services_action_cancel(rsc_id, normalize_action_name(rsc, action), interval) == TRUE) {
/* The service library will tell the action_complete callback function
* this action was cancelled, which will destroy the cmd and remove
* it from the recurring_op list. Do not do that in this function
* if the service library says it cancelled it. */
return lrmd_ok;
}
return lrmd_err_unknown_operation;
}
static int
process_lrmd_rsc_cancel(lrmd_client_t *client, xmlNode *request)
{
xmlNode *rsc_xml = get_xpath_object("//"F_LRMD_RSC, request, LOG_ERR);
const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
const char *action = crm_element_value(rsc_xml, F_LRMD_RSC_ACTION);
int interval = 0;
crm_element_value_int(rsc_xml, F_LRMD_RSC_INTERVAL, &interval);
if (!rsc_id || !action) {
return lrmd_err_missing;
}
return cancel_op(rsc_id, action, interval);
}
void
process_lrmd_message(lrmd_client_t *client, xmlNode *request)
{
int rc = lrmd_ok;
- int call_options = 0;
int call_id = 0;
const char *op = crm_element_value(request, F_LRMD_OPERATION);
int do_reply = 0;
int do_notify = 0;
int exit = 0;
- crm_element_value_int(request, F_LRMD_CALLOPTS, &call_options);
crm_element_value_int(request, F_LRMD_CALLID, &call_id);
if (crm_str_eq(op, CRM_OP_REGISTER, TRUE)) {
rc = process_lrmd_signon(client, request);
} else if (crm_str_eq(op, LRMD_OP_RSC_REG, TRUE)) {
rc = process_lrmd_rsc_register(client, request);
do_notify = 1;
do_reply = 1;
} else if (crm_str_eq(op, LRMD_OP_RSC_INFO, TRUE)) {
process_lrmd_get_rsc_info(client, request);
} else if (crm_str_eq(op, LRMD_OP_RSC_UNREG, TRUE)) {
rc = process_lrmd_rsc_unregister(client, request);
/* don't notify anyone about failed un-registers */
if (rc == lrmd_ok || rc == lrmd_pending) {
do_notify = 1;
}
do_reply = 1;
} else if (crm_str_eq(op, LRMD_OP_RSC_EXEC, TRUE)) {
rc = process_lrmd_rsc_exec(client, request);
do_reply = 1;
} else if (crm_str_eq(op, LRMD_OP_RSC_CANCEL, TRUE)) {
rc = process_lrmd_rsc_cancel(client, request);
do_reply = 1;
} else if (crm_str_eq(op, CRM_OP_QUIT, TRUE)) {
do_reply = 1;
exit = 1;
} else {
rc = lrmd_err_unknown_operation;
do_reply = 1;
crm_err("Unknown %s from %s", op, client->name);
crm_log_xml_warn(request, "UnknownOp");
}
if (do_reply) {
send_reply(client, rc, call_id);
}
if (do_notify) {
send_generic_notify(rc, request);
}
if (exit) {
lrmd_shutdown(0);
}
}
diff --git a/lrmd/test.c b/lrmd/test.c
index 6d1d7646e3..f43217cec2 100644
--- a/lrmd/test.c
+++ b/lrmd/test.c
@@ -1,420 +1,561 @@
/*
* Copyright (c) 2012 David Vossel <dvossel@redhat.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include <crm_internal.h>
#include <glib.h>
#include <unistd.h>
#include <crm/crm.h>
#include <crm/services.h>
#include <crm/common/mainloop.h>
+#include <crm/pengine/status.h>
+#include <crm/cib.h>
#include <crm/lrmd.h>
/* *INDENT-OFF* */
static struct crm_option long_options[] = {
{"help", 0, 0, '?'},
{"verbose", 0, 0, 'V', "\t\tPrint out logs and events to screen"},
{"quiet", 0, 0, 'Q', "\t\tSuppress all output to screen"},
/* just incase we have to add data to events,
* we don't want break a billion regression tests. Instead
* we'll create different versions */
{"listen", 1, 0, 'l', "\tListen for a specific event string"},
{"event-ver", 1, 0, 'e', "\tVersion of event to listen to"},
{"api-call", 1, 0, 'c', "\tDirectly relates to lrmd api functions"},
{"no-wait", 0, 0, 'w', "\tMake api call and do not wait for result."},
+ {"is-running", 0, 0, 'R', "\tDetermine if a resource is registered and running."},
{"-spacer-", 1, 0, '-', "\nParameters for api-call option"},
{"action", 1, 0, 'a'},
{"rsc-id", 1, 0, 'r'},
{"cancel-call-id", 1, 0, 'x'},
{"provider", 1, 0, 'P'},
{"class", 1, 0, 'C'},
{"type", 1, 0, 'T'},
{"interval", 1, 0, 'i'},
{"timeout", 1, 0, 't'},
{"start-delay", 1, 0, 's'},
{"param-key", 1, 0, 'k'},
{"param-val", 1, 0, 'v'},
{"-spacer-", 1, 0, '-'},
{0, 0, 0, 0}
};
/* *INDENT-ON* */
+cib_t *cib_conn = NULL;
static int exec_call_id = 0;
+static int exec_call_opts = 0;
+extern void cleanup_alloc_calculations(pe_working_set_t * data_set);
static struct {
int verbose;
int quiet;
int print;
int interval;
int timeout;
int start_delay;
int cancel_call_id;
int event_version;
int no_wait;
+ int is_running;
int no_connect;
const char *api_call;
const char *rsc_id;
const char *provider;
const char *class;
const char *type;
const char *action;
const char *listen;
lrmd_key_value_t *params;
} options;
GMainLoop *mainloop = NULL;
lrmd_t *lrmd_conn = NULL;
static char event_buf_v0[1024];
#define print_result(result) \
if (!options.quiet) { \
result; \
} \
#define report_event(event) \
snprintf(event_buf_v0, sizeof(event_buf_v0), "NEW_EVENT event_type:%s rsc_id:%s action:%s rc:%s op_status:%s", \
lrmd_event_type2str(event->type), \
event->rsc_id, \
event->op_type ? event->op_type : "none", \
lrmd_event_rc2str(event->rc), \
services_lrm_status_str(event->op_status)); \
crm_info("%s", event_buf_v0);;
static void
test_shutdown(int nsig)
{
lrmd_api_delete(lrmd_conn);
}
static void
read_events(lrmd_event_data_t *event)
{
report_event(event);
if (options.listen) {
if (safe_str_eq(options.listen, event_buf_v0)) {
print_result(printf("LISTEN EVENT SUCCESSFUL\n"));
exit(0);
}
}
if (exec_call_id && (event->call_id == exec_call_id)) {
- if (event->op_status == 0) {
+ if (event->op_status == 0 && event->rc == 0) {
print_result(printf("API-CALL SUCCESSFUL for 'exec'\n"));
} else {
print_result(printf("API-CALL FAILURE for 'exec', rc:%d lrmd_op_status:%s\n",
event->rc,
services_lrm_status_str(event->op_status)));
exit(-1);
}
if (!options.listen) {
exit(0);
}
}
}
static gboolean
timeout_err(gpointer data)
{
print_result(printf("LISTEN EVENT FAILURE - timeout occurred, never found.\n"));
exit(-1);
return FALSE;
}
static void
try_connect(void)
{
int tries = 10;
int i = 0;
int rc = 0;
for (i = 0; i < tries; i++) {
rc = lrmd_conn->cmds->connect(lrmd_conn, "lrmd", NULL);
if (!rc) {
crm_info("lrmd client connection established");
return;
} else {
crm_info("lrmd client connection failed");
}
sleep(1);
}
print_result(printf("API CONNECTION FAILURE\n"));
exit(-1);
}
static gboolean
start_test(gpointer user_data)
{
int rc = 0;
if (!options.no_connect) {
try_connect();
}
lrmd_conn->cmds->set_callback(lrmd_conn, read_events);
if (options.timeout) {
g_timeout_add(options.timeout, timeout_err, NULL);
}
if (!options.api_call) {
return 0;
}
if (safe_str_eq(options.api_call, "exec")) {
rc = lrmd_conn->cmds->exec(lrmd_conn,
options.rsc_id,
options.action,
NULL,
options.interval,
options.timeout,
options.start_delay,
- 0,
+ exec_call_opts,
options.params);
if (rc > 0) {
exec_call_id = rc;
print_result(printf("API-CALL 'exec' action pending, waiting on response\n"));
}
} else if (safe_str_eq(options.api_call, "register_rsc")) {
rc = lrmd_conn->cmds->register_rsc(lrmd_conn,
options.rsc_id,
options.class,
options.provider,
options.type,
0);
} else if (safe_str_eq(options.api_call, "get_rsc_info")) {
lrmd_rsc_info_t *rsc_info;
rsc_info = lrmd_conn->cmds->get_rsc_info(lrmd_conn, options.rsc_id, 0);
if (rsc_info) {
print_result(printf("RSC_INFO: id:%s class:%s provider:%s type:%s\n",
rsc_info->id, rsc_info->class, rsc_info->provider ? rsc_info->provider : "<none>", rsc_info->type));
lrmd_free_rsc_info(rsc_info);
rc = lrmd_ok;
} else {
rc = -1;
}
} else if (safe_str_eq(options.api_call, "unregister_rsc")) {
rc = lrmd_conn->cmds->unregister_rsc(lrmd_conn,
options.rsc_id,
0);
} else if (safe_str_eq(options.api_call, "cancel")) {
rc = lrmd_conn->cmds->cancel(lrmd_conn,
options.rsc_id,
options.action,
options.interval);
} else if (safe_str_eq(options.api_call, "metadata")) {
char *output = NULL;
rc = lrmd_conn->cmds->get_metadata(lrmd_conn,
options.class,
options.provider,
options.type, &output, 0);
if (rc == lrmd_ok) {
print_result(printf("%s", output));
free(output);
}
} else if (safe_str_eq(options.api_call, "list_agents")) {
lrmd_list_t *list = NULL;
lrmd_list_t *iter = NULL;
rc = lrmd_conn->cmds->list_agents(lrmd_conn, &list, options.class, options.provider);
if (rc > 0) {
print_result(printf("%d agents found\n", rc));
for (iter = list; iter != NULL; iter = iter->next) {
print_result(printf("%s\n", iter->val));
}
lrmd_list_freeall(list);
rc = 0;
} else {
print_result(printf("API_CALL FAILURE - no agents found\n"));
rc = -1;
}
} else if (safe_str_eq(options.api_call, "list_ocf_providers")) {
lrmd_list_t *list = NULL;
lrmd_list_t *iter = NULL;
rc = lrmd_conn->cmds->list_ocf_providers(lrmd_conn, options.type, &list);
if (rc > 0) {
print_result(printf("%d providers found\n", rc));
for (iter = list; iter != NULL; iter = iter->next) {
print_result(printf("%s\n", iter->val));
}
lrmd_list_freeall(list);
rc = 0;
} else {
print_result(printf("API_CALL FAILURE - no providers found\n"));
rc = -1;
}
} else if (options.api_call) {
print_result(printf("API-CALL FAILURE unknown action '%s'\n", options.action));
exit(-1);
}
if (rc < 0) {
print_result(printf("API-CALL FAILURE for '%s' api_rc:%d\n", options.api_call, rc));
exit(-1);
}
if (options.api_call && rc == lrmd_ok) {
print_result(printf("API-CALL SUCCESSFUL for '%s'\n", options.api_call));
if (!options.listen) {
exit(0);
}
}
if (options.no_wait) {
/* just make the call and exit regardless of anything else. */
exit(0);
}
return 0;
}
+static resource_t *
+find_rsc_or_clone(const char *rsc, pe_working_set_t * data_set)
+{
+ resource_t *the_rsc = pe_find_resource(data_set->resources, rsc);
+
+ if (the_rsc == NULL) {
+ char *as_clone = crm_concat(rsc, "0", ':');
+
+ the_rsc = pe_find_resource(data_set->resources, as_clone);
+ free(as_clone);
+ }
+ return the_rsc;
+}
+
+static int
+generate_params(void)
+{
+ int rc = 0;
+ pe_working_set_t data_set;
+ xmlNode *cib_xml_copy = NULL;
+ resource_t *rsc = NULL;
+ GHashTable *params = NULL;
+ GHashTable *meta = NULL;
+ GHashTableIter iter;
+
+ if (options.params) {
+ return 0;
+ }
+
+ set_working_set_defaults(&data_set);
+
+ cib_conn = cib_new();
+ rc = cib_conn->cmds->signon(cib_conn, "lrmd_test", cib_query);
+ if (rc != cib_ok) {
+ crm_err("Error signing on to the CIB service: %s\n", cib_error2string(rc));
+ rc = -1;
+ goto param_gen_bail;
+ }
+
+ cib_xml_copy = get_cib_copy(cib_conn);
+
+ if (!cib_xml_copy) {
+ crm_err("Error retrieving cib copy.");
+ rc = -1;
+ goto param_gen_bail;
+ }
+
+ if (cli_config_update(&cib_xml_copy, NULL, FALSE) == FALSE) {
+ crm_err("Error updating cib configuration");
+ rc = -1;
+ goto param_gen_bail;
+ }
+
+ data_set.input = cib_xml_copy;
+ data_set.now = new_ha_date(TRUE);
+
+ cluster_status(&data_set);
+ if (options.rsc_id) {
+ rsc = find_rsc_or_clone(options.rsc_id, &data_set);
+ }
+
+ if (!rsc) {
+ crm_err("Resource does not exist in config");
+ rc = -1;
+ goto param_gen_bail;
+ }
+
+ params = g_hash_table_new_full(crm_str_hash,
+ g_str_equal, g_hash_destroy_str, g_hash_destroy_str);
+ meta = g_hash_table_new_full(crm_str_hash,
+ g_str_equal, g_hash_destroy_str, g_hash_destroy_str);
+
+ get_rsc_attributes(params, rsc, NULL, &data_set);
+ get_meta_attributes(meta, rsc, NULL, &data_set);
+
+ if (params) {
+ char *key = NULL;
+ char *value = NULL;
+
+ g_hash_table_iter_init(&iter, params);
+ while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
+ options.params = lrmd_key_value_add(options.params, key, value);
+ }
+ g_hash_table_destroy(params);
+ }
+
+ if (meta) {
+ char *key = NULL;
+ char *value = NULL;
+
+ g_hash_table_iter_init(&iter, meta);
+ while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
+ char *crm_name = crm_meta_name(key);
+ options.params = lrmd_key_value_add(options.params, crm_name, value);
+ free(crm_name);
+ }
+ g_hash_table_destroy(meta);
+ }
+
+param_gen_bail:
+
+ cleanup_alloc_calculations(&data_set);
+ return rc;
+}
+
int main(int argc, char ** argv)
{
int option_index = 0;
int argerr = 0;
int flag;
char *key = NULL;
char *val = NULL;
crm_trigger_t *trig;
crm_set_options(NULL, "mode [options]", long_options,
"Inject commands into the lrmd and watch for events\n");
while (1) {
flag = crm_get_option(argc, argv, &option_index);
if (flag == -1)
break;
switch(flag) {
case '?':
crm_help(flag, LSB_EXIT_OK);
break;
case 'V':
options.verbose = 1;
break;
case 'Q':
options.quiet = 1;
options.verbose = 0;
break;
case 'e':
options.event_version = atoi(optarg);
break;
case 'l':
options.listen = optarg;
break;
case 'w':
options.no_wait = 1;
break;
+ case 'R':
+ options.is_running = 1;
+ break;
case 'c':
options.api_call = optarg;
break;
case 'a':
options.action = optarg;
break;
case 'r':
options.rsc_id = optarg;
break;
case 'x':
options.cancel_call_id = atoi(optarg);
break;
case 'P':
options.provider = optarg;
break;
case 'C':
options.class = optarg;
break;
case 'T':
options.type = optarg;
break;
case 'i':
options.interval = atoi(optarg);
break;
case 't':
options.timeout = atoi(optarg);
break;
case 's':
options.start_delay = atoi(optarg);
break;
case 'k':
key = optarg;
if (key && val) {
options.params = lrmd_key_value_add(options.params, key, val);
key = val = NULL;
}
break;
case 'v':
val = optarg;
if (key && val) {
options.params = lrmd_key_value_add(options.params, key, val);
key = val = NULL;
}
break;
default:
++argerr;
break;
}
}
if (argerr) {
crm_help('?', LSB_EXIT_GENERIC);
}
if (optind > argc) {
++argerr;
}
if (!options.listen &&
(safe_str_eq(options.api_call, "metadata") ||
safe_str_eq(options.api_call, "list_agents") ||
safe_str_eq(options.api_call, "list_ocf_providers"))) {
options.no_connect = 1;
}
crm_log_init("lrmd_ctest", LOG_INFO, TRUE, options.verbose ? TRUE : FALSE, argc, argv, FALSE);
+ if (options.is_running) {
+ if (!options.timeout) {
+ options.timeout = 30000;
+ }
+ options.interval = 0;
+ if (!options.rsc_id) {
+ crm_err("rsc-id must be given when is-running is used");
+ exit(-1);
+ }
+
+ if (generate_params()) {
+ print_result(printf("Failed to retrieve rsc parameters from cib, can not determine if rsc is running.\n"));
+ exit(-1);
+ }
+ options.api_call = "exec";
+ options.action = "monitor";
+ exec_call_opts = lrmd_opt_notify_orig_only;
+ }
+
+
/* if we can't perform an api_call or listen for events,
* there is nothing to do */
if (!options.api_call && !options.listen) {
crm_err("Nothing to be done. Please specify 'api-call' and/or 'listen'");
return 0;
}
lrmd_conn = lrmd_api_new();
trig = mainloop_add_trigger(G_PRIORITY_HIGH, start_test, NULL);
mainloop_set_trigger(trig);
mainloop_add_signal(SIGTERM, test_shutdown);
crm_info("Starting");
mainloop = g_main_new(FALSE);
g_main_run(mainloop);
lrmd_api_delete(lrmd_conn);
+
+ if (cib_conn != NULL) {
+ cib_conn->cmds->signoff(cib_conn);
+ cib_delete(cib_conn);
+ }
+
return 0;
}

File Metadata

Mime Type
text/x-diff
Expires
Tue, Jul 8, 6:27 PM (7 h, 9 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2002646
Default Alt Text
(80 KB)

Event Timeline