Page Menu
Home
ClusterLabs Projects
Search
Configure Global Search
Log In
Files
F4624488
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
80 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/cts/CM_lha.py b/cts/CM_lha.py
index 77c3bb593c..90adb09de2 100755
--- a/cts/CM_lha.py
+++ b/cts/CM_lha.py
@@ -1,628 +1,600 @@
'''CTS: Cluster Testing System: LinuxHA v2 dependent modules...
'''
__copyright__='''
Author: Huang Zhen <zhenhltc@cn.ibm.com>
Copyright (C) 2004 International Business Machines
Additional Audits, Revised Start action, Default Configuration:
Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
'''
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
import os, sys, warnings
from cts import CTS
from cts.CTSvars import *
from cts.CTS import *
from cts.CIB import *
from cts.CTStests import AuditResource
try:
from xml.dom.minidom import *
except ImportError:
sys.__stdout__.write("Python module xml.dom.minidom not found\n")
sys.__stdout__.write("Please install python-xml or similar before continuing\n")
sys.__stdout__.flush()
sys.exit(1)
#######################################################################
#
# LinuxHA v2 dependent modules
#
#######################################################################
class crm_lha(ClusterManager):
'''
The linux-ha version 2 cluster manager class.
It implements the things we need to talk to and manipulate
linux-ha version 2 clusters
'''
def __init__(self, Environment, randseed=None):
ClusterManager.__init__(self, Environment, randseed=randseed)
#HeartbeatCM.__init__(self, Environment, randseed=randseed)
self.fastfail = 0
self.clear_cache = 0
self.cib_installed = 0
self.config = None
self.cluster_monitor = 0
self.use_short_names = 1
self.update({
"Name" : "crm-lha",
"DeadTime" : 300,
"StartTime" : 300, # Max time to start up
"StableTime" : 30,
"StartCmd" : CTSvars.INITDIR+"/heartbeat start > /dev/null 2>&1",
"StopCmd" : CTSvars.INITDIR+"/heartbeat stop > /dev/null 2>&1",
"ElectionCmd" : "crmadmin -E %s",
"StatusCmd" : "crmadmin -t 60000 -S %s 2>/dev/null",
"EpocheCmd" : "crm_node -H -e",
"QuorumCmd" : "crm_node -H -q",
"ParitionCmd" : "crm_node -H -p",
"CibQuery" : "cibadmin -Ql",
# 300,000 == 5 minutes
- "ExecuteRscOp" : "lrmadmin -n %s -E %s %s 300000 %d EVERYTIME 2>&1",
+ "RscRunning" : CTSvars.CTS_home + "/lrmd_test -R -r %s",
"CIBfile" : "%s:"+CTSvars.CRM_CONFIG_DIR+"/cib.xml",
"TmpDir" : "/tmp",
"BreakCommCmd" : "iptables -A INPUT -s %s -j DROP >/dev/null 2>&1",
"FixCommCmd" : "iptables -D INPUT -s %s -j DROP >/dev/null 2>&1",
# tc qdisc add dev lo root handle 1: cbq avpkt 1000 bandwidth 1000mbit
# tc class add dev lo parent 1: classid 1:1 cbq rate "$RATE"kbps allot 17000 prio 5 bounded isolated
# tc filter add dev lo parent 1: protocol ip prio 16 u32 match ip dst 127.0.0.1 match ip sport $PORT 0xFFFF flowid 1:1
# tc qdisc add dev lo parent 1: netem delay "$LATENCY"msec "$(($LATENCY/4))"msec 10% 2> /dev/null > /dev/null
"ReduceCommCmd" : "",
"RestoreCommCmd" : "tc qdisc del dev lo root",
"LogFileName" : Environment["LogFileName"],
"UUIDQueryCmd" : "crmadmin -N",
"StandbyCmd" : "crm_attribute -Q -U %s -n standby -l forever -v %s 2>/dev/null",
"StandbyQueryCmd" : "crm_attribute -GQ -U %s -n standby -l forever -d off 2>/dev/null",
# Patterns to look for in the log files for various occasions...
"Pat:DC_IDLE" : "crmd.*State transition.*-> S_IDLE",
# This wont work if we have multiple partitions
"Pat:Local_started" : "%s .*The local CRM is operational",
"Pat:Slave_started" : "%s .*State transition.*-> S_NOT_DC",
"Pat:Master_started" : "%s .* State transition.*-> S_IDLE",
"Pat:We_stopped" : "heartbeat.*%s.*Heartbeat shutdown complete",
"Pat:Logd_stopped" : "%s logd:.*Exiting write process",
"Pat:They_stopped" : "%s .*LOST:.* %s ",
"Pat:They_dead" : "node %s.*: is dead",
"Pat:TransitionComplete" : "Transition status: Complete: complete",
"Pat:ChildKilled" : "%s heartbeat.*%s.*killed by signal 9",
"Pat:ChildRespawn" : "%s heartbeat.*Respawning client.*%s",
"Pat:ChildExit" : "(ERROR|error): Client .* exited with return code",
"Pat:We_fenced" : "crmd.* Executing .* fencing operation .* on %s",
"Pat:They_fenced" : "stonith.* log_operation: Operation .* for host '%s' with device .* returned: 0",
"Pat:They_fenced_offset" : "for host '",
# Bad news Regexes. Should never occur.
"BadRegexes" : (
r" trace:",
r"error:",
r"crit:",
r"ERROR:",
r"CRIT:",
r"Shutting down...NOW",
r"Timer I_TERMINATE just popped",
r"input=I_ERROR",
r"input=I_FAIL",
r"input=I_INTEGRATED cause=C_TIMER_POPPED",
r"input=I_FINALIZED cause=C_TIMER_POPPED",
r"input=I_ERROR",
r", exiting\.",
r"WARN.*Ignoring HA message.*vote.*not in our membership list",
r"pengine.*Attempting recovery of resource",
r"is taking more than 2x its timeout",
r"Confirm not received from",
r"Welcome reply not received from",
r"Attempting to schedule .* after a stop",
r"Resource .* was active at shutdown",
r"duplicate entries for call_id",
r"Search terminated:",
r"No need to invoke the TE",
r"global_timer_callback:",
r"Faking parameter digest creation",
r"Parameters to .* action changed:",
r"Parameters to .* changed",
),
})
if self.Env["DoBSC"]:
del self["Pat:They_stopped"]
del self["Pat:Logd_stopped"]
self.Env["use_logd"] = 0
self._finalConditions()
self.check_transitions = 0
self.check_elections = 0
self.CIBsync = {}
self.CibFactory = ConfigFactory(self)
self.cib = self.CibFactory.createConfig(self.Env["Schema"])
def errorstoignore(self):
# At some point implement a more elegant solution that
# also produces a report at the end
'''Return list of errors which are known and very noisey should be ignored'''
if 1:
return [
"(ERROR|error): crm_abort: crm_glib_handler: ",
"(ERROR|error): Message hist queue is filling up",
"stonithd.*CRIT: external_hostlist: 'vmware gethosts' returned an empty hostlist",
"stonithd.*(ERROR|error): Could not list nodes for stonith RA external/vmware.",
"pengine.*Preventing .* from re-starting",
]
return []
def install_config(self, node):
if not self.ns.WaitForNodeToComeUp(node):
self.log("Node %s is not up." % node)
return None
if not self.CIBsync.has_key(node) and self.Env["ClobberCIB"] == 1:
self.CIBsync[node] = 1
self.rsh(node, "rm -f "+CTSvars.CRM_CONFIG_DIR+"/cib*")
# Only install the CIB on the first node, all the other ones will pick it up from there
if self.cib_installed == 1:
return None
self.cib_installed = 1
if self.Env["CIBfilename"] == None:
self.log("Installing Generated CIB on node %s" %(node))
self.cib.install(node)
else:
self.log("Installing CIB (%s) on node %s" %(self.Env["CIBfilename"], node))
if 0 != self.rsh.cp(self.Env["CIBfilename"], "root@" + (self["CIBfile"]%node)):
raise ValueError("Can not scp file to %s %d"%(node))
self.rsh(node, "chown "+CTSvars.CRM_DAEMON_USER+" "+CTSvars.CRM_CONFIG_DIR+"/cib.xml")
def prepare(self):
'''Finish the Initialization process. Prepare to test...'''
self.partitions_expected = 1
for node in self.Env["nodes"]:
self.ShouldBeStatus[node] = ""
self.unisolate_node(node)
self.StataCM(node)
def test_node_CM(self, node):
'''Report the status of the cluster manager on a given node'''
watchpats = [ ]
watchpats.append("Current ping state: (S_IDLE|S_NOT_DC)")
watchpats.append(self["Pat:Slave_started"]%node)
watchpats.append(self["Pat:Master_started"]%node)
idle_watch = CTS.LogWatcher(self.Env, self["LogFileName"], watchpats, "ClusterIdle")
idle_watch.setwatch()
out = self.rsh(node, self["StatusCmd"]%node, 1)
self.debug("Node %s status: '%s'" %(node, out))
if not out or string.find(out, 'ok') < 0:
if self.ShouldBeStatus[node] == "up":
self.log(
"Node status for %s is %s but we think it should be %s"
%(node, "down", self.ShouldBeStatus[node]))
self.ShouldBeStatus[node]="down"
return 0
if self.ShouldBeStatus[node] == "down":
self.log(
"Node status for %s is %s but we think it should be %s: %s"
%(node, "up", self.ShouldBeStatus[node], out))
self.ShouldBeStatus[node]="up"
# check the output first - because syslog-ng looses messages
if string.find(out, 'S_NOT_DC') != -1:
# Up and stable
return 2
if string.find(out, 'S_IDLE') != -1:
# Up and stable
return 2
# fall back to syslog-ng and wait
if not idle_watch.look():
# just up
self.debug("Warn: Node %s is unstable: %s" %(node, out))
return 1
# Up and stable
return 2
# Is the node up or is the node down
def StataCM(self, node):
'''Report the status of the cluster manager on a given node'''
if self.test_node_CM(node) > 0:
return 1
return None
# Being up and being stable is not the same question...
def node_stable(self, node):
'''Report the status of the cluster manager on a given node'''
if self.test_node_CM(node) == 2:
return 1
self.log("Warn: Node %s not stable" %(node))
return None
def partition_stable(self, nodes, timeout=None):
watchpats = [ ]
watchpats.append("Current ping state: S_IDLE")
watchpats.append(self["Pat:DC_IDLE"])
self.debug("Waiting for cluster stability...")
if timeout == None:
timeout = self["DeadTime"]
idle_watch = CTS.LogWatcher(self.Env, self["LogFileName"], watchpats, "ClusterStable", timeout)
idle_watch.setwatch()
any_up = 0
for node in self.Env["nodes"]:
# have each node dump its current state
if self.ShouldBeStatus[node] == "up":
self.rsh(node, self["StatusCmd"] %node, 1)
any_up = 1
if any_up == 0:
self.debug("Cluster is inactive")
return 1
ret = idle_watch.look()
while ret:
self.debug(ret)
for node in nodes:
if re.search(node, ret):
return 1
ret = idle_watch.look()
self.debug("Warn: Partition %s not IDLE after %ds" % (repr(nodes), timeout))
return None
def cluster_stable(self, timeout=None, double_check=False):
partitions = self.find_partitions()
for partition in partitions:
if not self.partition_stable(partition, timeout):
return None
if double_check:
# Make sure we are really stable and that all resources,
# including those that depend on transient node attributes,
# are started if they were going to be
time.sleep(5)
for partition in partitions:
if not self.partition_stable(partition, timeout):
return None
return 1
def is_node_dc(self, node, status_line=None):
rc = 0
if not status_line:
status_line = self.rsh(node, self["StatusCmd"]%node, 1)
if not status_line:
rc = 0
elif string.find(status_line, 'S_IDLE') != -1:
rc = 1
elif string.find(status_line, 'S_INTEGRATION') != -1:
rc = 1
elif string.find(status_line, 'S_FINALIZE_JOIN') != -1:
rc = 1
elif string.find(status_line, 'S_POLICY_ENGINE') != -1:
rc = 1
elif string.find(status_line, 'S_TRANSITION_ENGINE') != -1:
rc = 1
return rc
def active_resources(self, node):
# [SM].* {node} matches Started, Slave, Master
# Stopped wont be matched as it wont include {node}
(rc, output) = self.rsh(node, """crm_resource -c""", None)
resources = []
for line in output:
if re.search("^Resource", line):
tmp = AuditResource(self, line)
if tmp.type == "primitive" and tmp.host == node:
resources.append(tmp.id)
return resources
- def ResourceOp(self, resource, op, node, interval=0, app="lrmadmin"):
- '''
- Execute an operation on a resource
- '''
- cmd = self["ExecuteRscOp"] % (app, resource, op, interval)
- (rc, lines) = self.rsh(node, cmd, None)
-
- if rc == 127:
- self.log("Command '%s' failed. Binary not installed?" % cmd)
- for line in lines:
- self.log("Output: "+line)
-
- return rc
-
def ResourceLocation(self, rid):
ResourceNodes = []
for node in self.Env["nodes"]:
if self.ShouldBeStatus[node] == "up":
- dummy = 0
- rc = self.ResourceOp(rid, "monitor", node)
- # Strange error codes from remote_py
- # 65024 == not installed
- # 2048 == 8
- # 1792 == 7
- # 0 == 0
- if rc == 127:
- dummy = 1
- elif rc == 254 or rc == 65024:
- dummy = 1
- #self.debug("%s is not installed on %s: %d" % (rid, node, rc))
+ cmd = self["RscRunning"] % (rid)
+ (rc, lines) = self.rsh(node, cmd, None)
- elif rc == 0 or rc == 2048 or rc == 8:
+ if rc == 127:
+ self.log("Command '%s' failed. Binary not installed?" % cmd)
+ for line in lines:
+ self.log("Output: "+line)
+ elif rc == 0:
ResourceNodes.append(node)
- elif rc == 7 or rc == 1792:
- dummy = 1
- #self.debug("%s is not running on %s: %d" % (rid, node, rc))
-
- else:
- # not active on this node?
- self.log("Unknown rc code for %s on %s: %d" % (rid, node, rc))
-
return ResourceNodes
def find_partitions(self):
ccm_partitions = []
for node in self.Env["nodes"]:
if self.ShouldBeStatus[node] == "up":
partition = self.rsh(node, self["ParitionCmd"], 1)
if not partition:
self.log("no partition details for %s" %node)
elif len(partition) > 2:
partition = partition[:-1]
found=0
for a_partition in ccm_partitions:
if partition == a_partition:
found = 1
if found == 0:
self.debug("Adding partition from %s: %s" %(node, partition))
ccm_partitions.append(partition)
else:
self.debug("Partition '%s' from %s is consistent with existing entries" %(partition, node))
else:
self.log("bad partition details for %s" %node)
else:
self.debug("Node %s is down... skipping" %node)
return ccm_partitions
def HasQuorum(self, node_list):
# If we are auditing a partition, then one side will
# have quorum and the other not.
# So the caller needs to tell us which we are checking
# If no value for node_list is specified... assume all nodes
if not node_list:
node_list = self.Env["nodes"]
for node in node_list:
if self.ShouldBeStatus[node] == "up":
quorum = self.rsh(node, self["QuorumCmd"], 1)
if string.find(quorum, "1") != -1:
return 1
elif string.find(quorum, "0") != -1:
return 0
else:
self.debug("WARN: Unexpected quorum test result from "+ node +":"+ quorum)
return 0
def Components(self):
complist = []
common_ignore = [
"Pending action:",
"(ERROR|error): crm_log_message_adv:",
"(ERROR|error): MSG: No message to dump",
"pending LRM operations at shutdown",
"Lost connection to the CIB service",
"Connection to the CIB terminated...",
"Sending message to CIB service FAILED",
"Action A_RECOVER .* not supported",
"(ERROR|error): stonithd_op_result_ready: not signed on",
"pingd.*(ERROR|error): send_update: Could not send update",
"send_ipc_message: IPC Channel to .* is not connected",
"unconfirmed_actions: Waiting on .* unconfirmed actions",
"cib_native_msgready: Message pending on command channel",
"do_exit: Performing A_EXIT_1 - forcefully exiting the CRMd",
"verify_stopped: Resource .* was active at shutdown. You may ignore this error if it is unmanaged.",
]
stonith_ignore = [
"(ERROR|error): stonithd_signon: ",
"update_failcount: Updating failcount for child_DoFencing",
"(ERROR|error): te_connect_stonith: Sign-in failed: triggered a retry",
"lrmd.*(ERROR|error): cl_get_value: wrong argument (reply)",
"lrmd.*(ERROR|error): is_expected_msg:.* null message",
"lrmd.*(ERROR|error): stonithd_receive_ops_result failed.",
]
stonith_ignore.extend(common_ignore)
ccm_ignore = [
"(ERROR|error): get_channel_token: No reply message - disconnected"
]
ccm_ignore.extend(common_ignore)
ccm = Process(self, "ccm", triggersreboot=self.fastfail, pats = [
"State transition .* S_RECOVERY",
"CCM connection appears to have failed",
"crmd.*Action A_RECOVER .* not supported",
"crmd.*Input I_TERMINATE from do_recover",
"Exiting to recover from CCM connection failure",
"crmd.*do_exit: Could not recover from internal error",
"crmd.*I_ERROR.*(ccm_dispatch|crmd_cib_connection_destroy)",
"crmd.*exited with return code 2.",
"attrd.*exited with return code 1.",
"cib.*exited with return code 2.",
# Not if it was fenced
# "A new node joined the cluster",
# "WARN: determine_online_status: Node .* is unclean",
# "Scheduling Node .* for STONITH",
# "Executing .* fencing operation",
# "tengine_stonith_callback: .*result=0",
# "Processing I_NODE_JOIN:.* cause=C_HA_MESSAGE",
# "State transition S_.* -> S_INTEGRATION.*input=I_NODE_JOIN",
"State transition S_STARTING -> S_PENDING",
], badnews_ignore = ccm_ignore)
cib = Process(self, "cib", triggersreboot=self.fastfail, pats = [
"State transition .* S_RECOVERY",
"Lost connection to the CIB service",
"Connection to the CIB terminated...",
"crmd.*Input I_TERMINATE from do_recover",
"crmd.*I_ERROR.*crmd_cib_connection_destroy",
"crmd.*do_exit: Could not recover from internal error",
"crmd.*exited with return code 2.",
"attrd.*exited with return code 1.",
], badnews_ignore = common_ignore)
lrmd = Process(self, "lrmd", triggersreboot=self.fastfail, pats = [
"State transition .* S_RECOVERY",
"LRM Connection failed",
"crmd.*I_ERROR.*lrm_connection_destroy",
"State transition S_STARTING -> S_PENDING",
"crmd.*Input I_TERMINATE from do_recover",
"crmd.*do_exit: Could not recover from internal error",
"crmd.*exited with return code 2.",
], badnews_ignore = common_ignore)
crmd = Process(self, "crmd", triggersreboot=self.fastfail, pats = [
# "WARN: determine_online_status: Node .* is unclean",
# "Scheduling Node .* for STONITH",
# "Executing .* fencing operation",
# "tengine_stonith_callback: .*result=0",
"State transition .* S_IDLE",
"State transition S_STARTING -> S_PENDING",
], badnews_ignore = common_ignore)
pengine = Process(self, "pengine", triggersreboot=self.fastfail, pats = [
"State transition .* S_RECOVERY",
"crmd.*exited with return code 2.",
"crmd.*Input I_TERMINATE from do_recover",
"crmd.*do_exit: Could not recover from internal error",
"crmd.*CRIT: pe_connection_destroy: Connection to the Policy Engine failed",
"crmd.*I_ERROR.*save_cib_contents",
"crmd.*exited with return code 2.",
], badnews_ignore = common_ignore, dc_only=1)
if self.Env["DoFencing"] == 1 :
complist.append(Process(self, "stoniths", triggersreboot=self.fastfail, dc_pats = [
"crmd.*CRIT: tengine_stonith_connection_destroy: Fencing daemon connection failed",
"Attempting connection to fencing daemon",
"te_connect_stonith: Connected",
], badnews_ignore = stonith_ignore))
if self.fastfail == 0:
ccm.pats.extend([
"attrd .* exited with return code 1",
"(ERROR|error): Respawning client .*attrd",
"cib.* exited with return code 2",
"(ERROR|error): Respawning client .*cib",
"crmd.* exited with return code 2",
"(ERROR|error): Respawning client .*crmd"
])
cib.pats.extend([
"attrd.* exited with return code 1",
"(ERROR|error): Respawning client .*attrd",
"crmd.* exited with return code 2",
"(ERROR|error): Respawning client .*crmd"
])
lrmd.pats.extend([
"crmd.* exited with return code 2",
"(ERROR|error): Respawning client .*crmd"
])
pengine.pats.extend([
"(ERROR|error): Respawning client .*crmd"
])
complist.append(ccm)
complist.append(cib)
complist.append(lrmd)
complist.append(crmd)
complist.append(pengine)
return complist
def NodeUUID(self, node):
lines = self.rsh(node, self["UUIDQueryCmd"], 1)
for line in lines:
self.debug("UUIDLine:"+ line)
m = re.search(r'%s.+\((.+)\)' % node, line)
if m:
return m.group(1)
return ""
def StandbyStatus(self, node):
out=self.rsh(node, self["StandbyQueryCmd"]%node, 1)
if not out:
return "off"
out = out[:-1]
self.debug("Standby result: "+out)
return out
# status == "on" : Enter Standby mode
# status == "off": Enter Active mode
def SetStandbyMode(self, node, status):
current_status = self.StandbyStatus(node)
cmd = self["StandbyCmd"] % (node, status)
ret = self.rsh(node, cmd)
return True
#######################################################################
#
# A little test code...
#
# Which you are advised to completely ignore...
#
#######################################################################
if __name__ == '__main__':
pass
diff --git a/include/crm/lrmd.h b/include/crm/lrmd.h
index 1b6b7005e1..c42da62224 100644
--- a/include/crm/lrmd.h
+++ b/include/crm/lrmd.h
@@ -1,413 +1,415 @@
/*
* Copyright (c) 2012 David Vossel <dvossel@redhat.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#ifndef LRMD__H
#define LRMD__H
typedef struct lrmd_s lrmd_t;
typedef struct lrmd_key_value_s lrmd_key_value_t;
struct lrmd_key_value_t;
/* *INDENT-OFF* */
#define F_LRMD_OPERATION "lrmd_op"
#define F_LRMD_CLIENTNAME "lrmd_clientname"
#define F_LRMD_CLIENTID "lrmd_clientid"
#define F_LRMD_CALLBACK_TOKEN "lrmd_async_id"
#define F_LRMD_CALLID "lrmd_callid"
#define F_LRMD_CANCEL_CALLID "lrmd_cancel_callid"
#define F_LRMD_CALLOPTS "lrmd_callopt"
#define F_LRMD_CALLDATA "lrmd_calldata"
#define F_LRMD_RC "lrmd_rc"
#define F_LRMD_EXEC_RC "lrmd_exec_rc"
#define F_LRMD_OP_STATUS "lrmd_exec_op_status"
#define F_LRMD_TIMEOUT "lrmd_timeout"
#define F_LRMD_CLASS "lrmd_class"
#define F_LRMD_PROVIDER "lrmd_provider"
#define F_LRMD_TYPE "lrmd_type"
#define F_LRMD_ORIGIN "lrmd_origin"
#define F_LRMD_RSC_RUN_TIME "lrmd_run_time"
#define F_LRMD_RSC_RCCHANGE_TIME "lrmd_rcchange_time"
#define F_LRMD_RSC_EXEC_TIME "lrmd_exec_time"
#define F_LRMD_RSC_QUEUE_TIME "lrmd_queue_time"
#define F_LRMD_RSC_ID "lrmd_rsc_id"
#define F_LRMD_RSC_ACTION "lrmd_rsc_action"
#define F_LRMD_RSC_USERDATA_STR "lrmd_rsc_userdata_str"
#define F_LRMD_RSC_OUTPUT "lrmd_rsc_output"
#define F_LRMD_RSC_START_DELAY "lrmd_rsc_start_delay"
#define F_LRMD_RSC_INTERVAL "lrmd_rsc_interval"
#define F_LRMD_RSC_METADATA "lrmd_rsc_metadata_res"
#define F_LRMD_RSC_DELETED "lrmd_rsc_deleted"
#define F_LRMD_RSC "lrmd_rsc"
#define LRMD_OP_RSC_CHK_REG "lrmd_rsc_check_register"
#define LRMD_OP_RSC_REG "lrmd_rsc_register"
#define LRMD_OP_RSC_EXEC "lrmd_rsc_exec"
#define LRMD_OP_RSC_CANCEL "lrmd_rsc_cancel"
#define LRMD_OP_RSC_UNREG "lrmd_rsc_unregister"
#define LRMD_OP_RSC_INFO "lrmd_rsc_info"
#define LRMD_OP_RSC_METADATA "lrmd_rsc_metadata"
#define T_LRMD "lrmd"
#define T_LRMD_REPLY "lrmd_reply"
#define T_LRMD_NOTIFY "lrmd_notify"
/* *INDENT-ON* */
lrmd_t *lrmd_api_new(void);
bool lrmd_dispatch(lrmd_t *lrmd);
void lrmd_api_delete(lrmd_t * lrmd);
lrmd_key_value_t *lrmd_key_value_add(lrmd_key_value_t *kvp,
const char *key,
const char *value);
/* *INDENT-OFF* */
/* Reserved for future use */
enum lrmd_call_options {
lrmd_opt_none = 0x00000000,
/* lrmd_opt_sync_call = 0x00000001, //Not implemented, patches welcome. */
+ /*! Only notify the client originating a exec() the results */
+ lrmd_opt_notify_orig_only = 0x00000002,
};
enum lrmd_errors {
lrmd_ok = 0,
lrmd_pending = -1,
lrmd_err_generic = -2,
lrmd_err_internal = -3,
lrmd_err_not_supported = -4,
lrmd_err_connection = -5,
lrmd_err_missing = -6,
lrmd_err_exists = -7,
lrmd_err_timeout = -8,
lrmd_err_ipc = -9,
lrmd_err_peer = -10,
lrmd_err_unknown_operation = -11,
lrmd_err_unknown_rsc = -12,
lrmd_err_none_available = -13,
lrmd_err_authentication = -14,
lrmd_err_signal = -15,
lrmd_err_exec_failed = -16,
lrmd_err_no_metadata = -17,
lrmd_err_stonith_connection = -18,
lrmd_err_provider_required = -19,
};
enum lrmd_callback_event {
lrmd_event_register,
lrmd_event_unregister,
lrmd_event_exec_complete,
lrmd_event_disconnect,
};
enum lrmd_exec_rc {
PCMK_EXECRA_OK = 0,
PCMK_EXECRA_UNKNOWN_ERROR = 1,
PCMK_EXECRA_INVALID_PARAM = 2,
PCMK_EXECRA_UNIMPLEMENT_FEATURE = 3,
PCMK_EXECRA_INSUFFICIENT_PRIV = 4,
PCMK_EXECRA_NOT_INSTALLED = 5,
PCMK_EXECRA_NOT_CONFIGURED = 6,
PCMK_EXECRA_NOT_RUNNING = 7,
PCMK_EXECRA_RUNNING_MASTER = 8,
PCMK_EXECRA_FAILED_MASTER = 9,
/* For status command only */
PCMK_EXECRA_STATUS_UNKNOWN = 14,
};
/* *INDENT-ON* */
typedef struct lrmd_event_data_s {
/*! Type of event, register, unregister, call_completed... */
enum lrmd_callback_event type;
/*! The resource this event occurred on. */
const char *rsc_id;
/*! The action performed, start, stop, monitor... */
const char *op_type;
/*! The userdata string given do exec() api function */
const char *user_data;
/*! The client api call id associated with this event */
int call_id;
/*! The operation's timeout period in ms. */
int timeout;
/*! The operation's recurring interval in ms. */
int interval;
/*! The operation's start delay value in ms. */
int start_delay;
/*! This operation that just completed is on a deleted rsc. */
int rsc_deleted;
/*! The executed ra return code */
enum lrmd_exec_rc rc;
/*! The lrmd status returned for exec_complete events */
int op_status;
/*! stdout from resource agent operation */
const char *output;
/*! Timestamp of when op ran */
unsigned int t_run;
/*! Timestamp of last rc change */
unsigned int t_rcchange;
/*! Time in length op took to execute */
unsigned int exec_time;
/*! Time in length spent in queue */
unsigned int queue_time;
/* This is a GHashTable containing the
* parameters given to the operation */
void *params;
} lrmd_event_data_t;
lrmd_event_data_t *lrmd_copy_event(lrmd_event_data_t *event);
void lrmd_free_event(lrmd_event_data_t *event);
typedef struct lrmd_rsc_info_s {
char *id;
char *type;
char *class;
char *provider;
} lrmd_rsc_info_t;
lrmd_rsc_info_t *lrmd_copy_rsc_info(lrmd_rsc_info_t *rsc_info);
void lrmd_free_rsc_info(lrmd_rsc_info_t *rsc_info);
typedef void (*lrmd_event_callback)(lrmd_event_data_t *event);
typedef struct lrmd_list_s {
const char *val;
struct lrmd_list_s *next;
} lrmd_list_t;
void lrmd_list_freeall(lrmd_list_t *head);
typedef struct lrmd_api_operations_s
{
/*!
* \brief Connect from the lrmd.
*
* \retval 0, success
* \retval negative error code on failure
*/
int (*connect) (lrmd_t *lrmd, const char *client_name, int *fd);
/*!
* \brief Disconnect from the lrmd.
*
* \retval 0, success
* \retval negative error code on failure
*/
int (*disconnect)(lrmd_t *lrmd);
/*!
* \brief Register a resource with the lrmd.
*
* \note Synchronous, guaranteed to occur in daemon before function returns.
*
* \retval 0, success
* \retval negative error code on failure
*/
int (*register_rsc) (lrmd_t *lrmd,
const char *rsc_id,
const char *class,
const char *provider,
const char *agent,
enum lrmd_call_options options);
/*!
* \brief Retrieve registration info for a rsc
*
* \retval info on success
* \retval NULL on failure
*/
lrmd_rsc_info_t *(*get_rsc_info) (lrmd_t *lrmd,
const char *rsc_id,
enum lrmd_call_options options);
/*!
* \brief Unregister a resource from the lrmd.
*
* \note All pending and recurring operations will be cancelled
* automatically.
*
* \note Synchronous, guaranteed to occur in daemon before function returns.
*
* \retval 0, success
* \retval -1, success, but operations are currently executing on the rsc which will
* return once they are completed.
* \retval negative error code on failure
*
*/
int (*unregister_rsc) (lrmd_t *lrmd,
const char *rsc_id,
enum lrmd_call_options options);
/*!
* \brief Sets the callback to receive lrmd events on.
*/
void (*set_callback) (lrmd_t *lrmd,
lrmd_event_callback callback);
/*!
* \brief Issue a command on a resource
*
* \note Asynchronous, command is queued in daemon on function return, but
* execution of command is not synced.
*
* \note Operations on individual resources are guaranteed to occur
* in the order the client api calls them in.
*
* \note Operations between different resources are not guaranteed
* to occur in any specific order in relation to one another
* regardless of what order the client api is called in.
* \retval call_id to track async event result on success
* \retval negative error code on failure
*/
int (*exec)(lrmd_t *lrmd,
const char *rsc_id,
const char *action,
const char *userdata, /* userdata string given back in event notification */
int interval, /* ms */
int timeout, /* ms */
int start_delay, /* ms */
enum lrmd_call_options options,
lrmd_key_value_t *params); /* ownership of params is given up to api here */
/*!
* \brief Cancel a recurring command.
*
* \note Synchronous, guaranteed to occur in daemon before function returns.
*
* \note The cancel is completed async from this call.
* We can be guaranteed the cancel has completed once
* the callback receives an exec_complete event with
* the lrmd_op_status signifying that the operation is
* cancelled.
* \note For each resource, cancel operations and exec operations
* are processed in the order they are received.
* It is safe to assume that for a single resource, a cancel
* will occur in the lrmd before an exec if the client's cancel
* api call occurs before the exec api call.
*
* It is not however safe to assume any operation on one resource will
* occur before an operation on another resource regardless of
* the order the client api is called in.
*
* \retval 0, cancel command sent.
* \retval negative error code on failure
*/
int (*cancel)(lrmd_t *lrmd,
const char *rsc_id,
const char *action,
int interval);
/*!
* \brief Get the metadata documentation for a resource.
*
* \note Value is returned in output. Output must be freed when set
*
* \retval lrmd_ok success
* \retval negative error code on failure
*/
int (*get_metadata) (lrmd_t *lrmd,
const char *class,
const char *provider,
const char *agent,
char **output,
enum lrmd_call_options options);
/*!
* \brief Retrieve a list of installed resource agents.
*
* \note if class is not provided, all known agents will be returned
* \note list must be freed using lrmd_list_freeall()
*
* \retval num items in list on success
* \retval negative error code on failure
*/
int (*list_agents)(lrmd_t *lrmd, lrmd_list_t **agents, const char *class, const char *provider);
/*!
* \brief Retrieve a list of resource agent providers
*
* \note When the agent is provided, only the agent's provider will be returned
* \note When no agent is supplied, all providers will be returned.
* \note List must be freed using lrmd_list_freeall()
*
* \retval num items in list on success
* \retval negative error code on failure
*/
int (*list_ocf_providers)(lrmd_t *lrmd,
const char *agent,
lrmd_list_t **providers);
} lrmd_api_operations_t;
struct lrmd_s {
lrmd_api_operations_t *cmds;
void *private;
};
static inline const char *
lrmd_event_rc2str(enum lrmd_exec_rc rc)
{
switch(rc) {
case PCMK_EXECRA_OK:
return "ok";
case PCMK_EXECRA_UNKNOWN_ERROR:
return "unknown error";
case PCMK_EXECRA_INVALID_PARAM:
return "invalid parameter";
case PCMK_EXECRA_UNIMPLEMENT_FEATURE:
return "unimplemented feature";
case PCMK_EXECRA_INSUFFICIENT_PRIV:
return "insufficient privileges";
case PCMK_EXECRA_NOT_INSTALLED:
return "not installed";
case PCMK_EXECRA_NOT_CONFIGURED:
return "not configured";
case PCMK_EXECRA_NOT_RUNNING:
return "not running";
case PCMK_EXECRA_RUNNING_MASTER:
return "master";
case PCMK_EXECRA_FAILED_MASTER:
return "master (failed)";
case PCMK_EXECRA_STATUS_UNKNOWN:
return "status: unknown";
default:
break;
}
return "<unknown>";
}
static inline const char *
lrmd_event_type2str(enum lrmd_callback_event type)
{
switch (type) {
case lrmd_event_register:
return "register";
case lrmd_event_unregister:
return "unregister";
case lrmd_event_exec_complete:
return "exec_complete";
case lrmd_event_disconnect:
return "disconnect";
}
return "unknown";
}
#endif
diff --git a/lrmd/Makefile.am b/lrmd/Makefile.am
index 0be956e459..a7b1a81264 100644
--- a/lrmd/Makefile.am
+++ b/lrmd/Makefile.am
@@ -1,35 +1,40 @@
# Copyright (c) 2012 David Vossel <dvossel@redhat.com>
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#
MAINTAINERCLEANFILES = Makefile.in
lrmdlibdir = $(CRM_DAEMON_DIR)
+lrmdctsdir = $(datadir)/$(PACKAGE)/tests/cts
+
## binary progs
-lrmdlib_PROGRAMS = lrmd
-noinst_PROGRAMS = lrmd_test
+lrmdlib_PROGRAMS = lrmd
+lrmdcts_PROGRAMS = lrmd_test
lrmd_SOURCES = main.c lrmd.c
lrmd_LDADD = $(top_builddir)/lib/common/libcrmcommon.la \
$(top_builddir)/lib/services/libcrmservice.la \
$(top_builddir)/lib/lrmd/liblrmd.la \
$(top_builddir)/lib/fencing/libstonithd.la
lrmd_test_SOURCES = test.c
lrmd_test_LDADD = $(top_builddir)/lib/common/libcrmcommon.la \
$(top_builddir)/lib/lrmd/liblrmd.la \
- $(top_builddir)/lib/services/libcrmservice.la
+ $(top_builddir)/lib/cib/libcib.la \
+ $(top_builddir)/lib/services/libcrmservice.la \
+ $(top_builddir)/lib/pengine/libpe_status.la \
+ $(top_builddir)/pengine/libpengine.la
diff --git a/lrmd/lrmd.c b/lrmd/lrmd.c
index d336ece2de..b66a4e857d 100644
--- a/lrmd/lrmd.c
+++ b/lrmd/lrmd.c
@@ -1,968 +1,983 @@
/*
* Copyright (c) 2012 David Vossel <dvossel@redhat.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include <crm_internal.h>
#include <glib.h>
#include <unistd.h>
#include <crm/crm.h>
#include <crm/services.h>
#include <crm/common/mainloop.h>
#include <crm/common/ipc.h>
#include <crm/msg_xml.h>
#include <lrmd_private.h>
#ifdef HAVE_SYS_TIMEB_H
#include <sys/timeb.h>
#endif
GHashTable *rsc_list = NULL;
GHashTable *client_list = NULL;
typedef struct lrmd_cmd_s {
int timeout;
int interval;
int start_delay;
int call_id;
int exec_rc;
int lrmd_op_status;
/* Timer ids, must be removed on cmd destruction. */
int delay_id;
int stonith_recurring_id;
int rsc_deleted;
+ char *only_notify_client;
char *origin;
char *rsc_id;
char *action;
char *output;
char *userdata_str;
#ifdef HAVE_SYS_TIMEB_H
/* Timestamp of when op ran */
struct timeb t_run;
/* Timestamp of when op was queued */
struct timeb t_queue;
/* Timestamp of last rc change */
struct timeb t_rcchange;
#endif
GHashTable *params;
} lrmd_cmd_t;
static void cmd_finalize(lrmd_cmd_t *cmd, lrmd_rsc_t *rsc);
static gboolean lrmd_rsc_dispatch(gpointer user_data);
static lrmd_rsc_t *
build_rsc_from_xml(xmlNode *msg)
{
xmlNode *rsc_xml = get_xpath_object("//"F_LRMD_RSC, msg, LOG_ERR);
lrmd_rsc_t *rsc = NULL;
rsc = calloc(1, sizeof(lrmd_rsc_t));
rsc->rsc_id = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ID);
rsc->class = crm_element_value_copy(rsc_xml, F_LRMD_CLASS);
rsc->provider = crm_element_value_copy(rsc_xml, F_LRMD_PROVIDER);
rsc->type = crm_element_value_copy(rsc_xml, F_LRMD_TYPE);
rsc->work = mainloop_add_trigger(G_PRIORITY_HIGH, lrmd_rsc_dispatch, rsc);
return rsc;
}
static lrmd_cmd_t *
-create_lrmd_cmd(xmlNode *msg)
+create_lrmd_cmd(xmlNode *msg, lrmd_client_t *client)
{
+ int call_options = 0;
xmlNode *rsc_xml = get_xpath_object("//"F_LRMD_RSC, msg, LOG_ERR);
lrmd_cmd_t *cmd = NULL;
cmd = calloc(1, sizeof(lrmd_cmd_t));
+ crm_element_value_int(msg, F_LRMD_CALLOPTS, &call_options);
+
+ if (call_options & lrmd_opt_notify_orig_only) {
+ cmd->only_notify_client = crm_strdup(client->id);
+ }
+
crm_element_value_int(msg, F_LRMD_CALLID, &cmd->call_id);
crm_element_value_int(rsc_xml, F_LRMD_RSC_INTERVAL, &cmd->interval);
crm_element_value_int(rsc_xml, F_LRMD_TIMEOUT, &cmd->timeout);
crm_element_value_int(rsc_xml, F_LRMD_RSC_START_DELAY, &cmd->start_delay);
cmd->origin = crm_element_value_copy(rsc_xml, F_LRMD_ORIGIN);
cmd->action = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ACTION);
cmd->userdata_str = crm_element_value_copy(rsc_xml, F_LRMD_RSC_USERDATA_STR);
cmd->rsc_id = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ID);
cmd->params = xml2list(rsc_xml);
return cmd;
}
static void
free_lrmd_cmd(lrmd_cmd_t *cmd)
{
if (cmd->stonith_recurring_id) {
g_source_remove(cmd->stonith_recurring_id);
}
if (cmd->delay_id) {
g_source_remove(cmd->delay_id);
}
if (cmd->params) {
g_hash_table_destroy(cmd->params);
}
free(cmd->origin);
free(cmd->action);
free(cmd->userdata_str);
free(cmd->rsc_id);
free(cmd->output);
+ free(cmd->only_notify_client);
free(cmd);
}
static gboolean
stonith_recurring_op_helper(gpointer data)
{
lrmd_cmd_t *cmd = data;
lrmd_rsc_t *rsc = NULL;
cmd->stonith_recurring_id = 0;
rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL;
CRM_ASSERT(rsc != NULL);
/* take it out of recurring_ops list, and put it in the pending ops
* to be executed */
rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd);
rsc->pending_ops = g_list_append(rsc->pending_ops, cmd);
#ifdef HAVE_SYS_TIMEB_H
ftime(&cmd->t_queue);
#endif
mainloop_set_trigger(rsc->work);
return FALSE;
}
static gboolean
start_delay_helper(gpointer data)
{
lrmd_cmd_t *cmd = data;
lrmd_rsc_t *rsc = NULL;
cmd->delay_id = 0;
rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL;
if (rsc) {
mainloop_set_trigger(rsc->work);
}
return FALSE;
}
static void
schedule_lrmd_cmd(lrmd_rsc_t *rsc, lrmd_cmd_t *cmd)
{
CRM_CHECK(cmd != NULL, return);
CRM_CHECK(rsc != NULL, return);
crm_trace("Scheduling %s on %s", cmd->action, rsc->rsc_id);
rsc->pending_ops = g_list_append(rsc->pending_ops, cmd);
#ifdef HAVE_SYS_TIMEB_H
ftime(&cmd->t_queue);
#endif
mainloop_set_trigger(rsc->work);
if (cmd->start_delay) {
cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd);
}
}
static void
send_reply(lrmd_client_t *client, int rc, int call_id)
{
int send_rc = 0;
xmlNode *reply = NULL;
reply = create_xml_node(NULL, T_LRMD_REPLY);
crm_xml_add(reply, F_LRMD_ORIGIN, __FUNCTION__);
crm_xml_add_int(reply, F_LRMD_RC, rc);
crm_xml_add_int(reply, F_LRMD_CALLID, call_id);
send_rc = crm_ipcs_send(client->channel, reply, FALSE);
free_xml(reply);
if (send_rc < 0) {
crm_warn("LRMD reply to %s failed: %d", client->name, send_rc);
}
}
static void
send_client_notify(gpointer key, gpointer value, gpointer user_data)
{
xmlNode *update_msg = user_data;
lrmd_client_t *client = value;
if (client == NULL) {
crm_err("Asked to send event to NULL client");
return;
} else if (client->channel == NULL) {
crm_trace("Asked to send event to disconnected client");
return;
} else if (client->name == NULL) {
crm_trace("Asked to send event to client with no name");
return;
}
if (crm_ipcs_send(client->channel, update_msg, TRUE) <= 0) {
crm_warn("Notification of client %s/%s failed",
client->name, client->id);
}
}
#ifdef HAVE_SYS_TIMEB_H
static int
time_diff_ms(struct timeb *now, struct timeb *old)
{
int sec = difftime(now->time, old->time);
int ms = now->millitm - old->millitm;
if (old->time == 0) {
return 0;
}
return (sec * 1000) + ms;
}
#endif
static void
send_cmd_complete_notify(lrmd_cmd_t *cmd)
{
#ifdef HAVE_SYS_TIMEB_H
struct timeb now = { 0, };
#endif
xmlNode *notify = NULL;
notify = create_xml_node(NULL, T_LRMD_NOTIFY);
crm_xml_add(notify, F_LRMD_ORIGIN, __FUNCTION__);
crm_xml_add_int(notify, F_LRMD_TIMEOUT, cmd->timeout);
crm_xml_add_int(notify, F_LRMD_RSC_INTERVAL, cmd->interval);
crm_xml_add_int(notify, F_LRMD_RSC_START_DELAY, cmd->start_delay);
crm_xml_add_int(notify, F_LRMD_EXEC_RC, cmd->exec_rc);
crm_xml_add_int(notify, F_LRMD_OP_STATUS, cmd->lrmd_op_status);
crm_xml_add_int(notify, F_LRMD_CALLID, cmd->call_id);
crm_xml_add_int(notify, F_LRMD_RSC_DELETED, cmd->rsc_deleted);
#ifdef HAVE_SYS_TIMEB_H
ftime(&now);
crm_xml_add_int(notify, F_LRMD_RSC_RUN_TIME, cmd->t_run.time);
crm_xml_add_int(notify, F_LRMD_RSC_RCCHANGE_TIME, cmd->t_rcchange.time);
crm_xml_add_int(notify, F_LRMD_RSC_EXEC_TIME, time_diff_ms(&now, &cmd->t_run));
crm_xml_add_int(notify, F_LRMD_RSC_QUEUE_TIME, time_diff_ms(&cmd->t_run, &cmd->t_queue));
#endif
crm_xml_add(notify, F_LRMD_OPERATION, LRMD_OP_RSC_EXEC);
crm_xml_add(notify, F_LRMD_RSC_ID, cmd->rsc_id);
crm_xml_add(notify, F_LRMD_RSC_ACTION, cmd->action);
crm_xml_add(notify, F_LRMD_RSC_USERDATA_STR, cmd->userdata_str);
crm_xml_add(notify, F_LRMD_RSC_OUTPUT, cmd->output);
if (cmd->params) {
char *key = NULL;
char *value = NULL;
GHashTableIter iter;
xmlNode *args = create_xml_node(notify, XML_TAG_ATTRS);
g_hash_table_iter_init(&iter, cmd->params);
while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
hash2field((gpointer) key, (gpointer) value, args);
}
}
- g_hash_table_foreach(client_list, send_client_notify, notify);
+ if (cmd->only_notify_client) {
+ lrmd_client_t *client = g_hash_table_lookup(client_list, cmd->only_notify_client);
+
+ if (client) {
+ send_client_notify(client->id, client, notify);
+ }
+ } else {
+ g_hash_table_foreach(client_list, send_client_notify, notify);
+ }
free_xml(notify);
}
static void
send_generic_notify(int rc, xmlNode *request)
{
int call_id = 0;
xmlNode *notify = NULL;
xmlNode *rsc_xml = get_xpath_object("//"F_LRMD_RSC, request, LOG_ERR);
const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
const char *op = crm_element_value(request, F_LRMD_OPERATION);
crm_element_value_int(request, F_LRMD_CALLID, &call_id);
notify = create_xml_node(NULL, T_LRMD_NOTIFY);
crm_xml_add(notify, F_LRMD_ORIGIN, __FUNCTION__);
crm_xml_add_int(notify, F_LRMD_RC, rc);
crm_xml_add_int(notify, F_LRMD_CALLID, call_id);
crm_xml_add(notify, F_LRMD_OPERATION, op);
crm_xml_add(notify, F_LRMD_RSC_ID, rsc_id);
g_hash_table_foreach(client_list, send_client_notify, notify);
free_xml(notify);
}
static void
cmd_finalize(lrmd_cmd_t *cmd, lrmd_rsc_t *rsc)
{
crm_trace("Resource operation rsc:%s action:%s completed", cmd->rsc_id, cmd->action);
if (rsc && (rsc->active == cmd)) {
rsc->active = NULL;
mainloop_set_trigger(rsc->work);
}
if (!rsc) {
cmd->rsc_deleted = 1;
}
send_cmd_complete_notify(cmd);
if (cmd->interval && (cmd->lrmd_op_status == PCMK_LRM_OP_CANCELLED)) {
if (rsc) {
rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd);
rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd);
}
free_lrmd_cmd(cmd);
} else if (cmd->interval == 0) {
if (rsc) {
rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd);
}
free_lrmd_cmd(cmd);
} else {
/* Clear all the values pertaining just to the last iteration of a recurring op. */
cmd->lrmd_op_status = 0;
memset(&cmd->t_run, 0, sizeof(cmd->t_run));
memset(&cmd->t_queue, 0, sizeof(cmd->t_queue));
free(cmd->output);
cmd->output = NULL;
}
}
static int
lsb2uniform_rc(const char *action, int rc)
{
if (rc < 0) {
return PCMK_EXECRA_UNKNOWN_ERROR;
}
/* status has different return codes that everything else. */
if (!safe_str_eq(action, "status") && !safe_str_eq(action, "monitor")) {
if (rc > PCMK_LSB_NOT_RUNNING) {
return PCMK_EXECRA_UNKNOWN_ERROR;
}
return rc;
}
switch (rc) {
case PCMK_LSB_STATUS_OK:
return PCMK_EXECRA_OK;
case PCMK_LSB_STATUS_NOT_INSTALLED:
return PCMK_EXECRA_NOT_INSTALLED;
case PCMK_LSB_STATUS_VAR_PID:
case PCMK_LSB_STATUS_VAR_LOCK:
case PCMK_LSB_STATUS_NOT_RUNNING:
return PCMK_EXECRA_NOT_RUNNING;
default:
return PCMK_EXECRA_UNKNOWN_ERROR;
}
return PCMK_EXECRA_UNKNOWN_ERROR;
}
static int
ocf2uniform_rc(int rc)
{
if (rc < 0 || rc > PCMK_OCF_FAILED_MASTER) {
return PCMK_EXECRA_UNKNOWN_ERROR;
}
return rc;
}
static int
stonith2uniform_rc(const char *action, int rc)
{
if (rc == st_err_unknown_device) {
if (safe_str_eq(action, "stop")) {
rc = PCMK_EXECRA_OK;
} else if (safe_str_eq(action, "start")) {
rc = PCMK_EXECRA_NOT_INSTALLED;
} else {
rc = PCMK_EXECRA_NOT_RUNNING;
}
} else if (rc != 0) {
rc = PCMK_EXECRA_UNKNOWN_ERROR;
}
return rc;
}
static int
get_uniform_rc(const char *standard, const char *action, int rc)
{
if (safe_str_eq(standard, "ocf")) {
return ocf2uniform_rc(rc);
} else if (safe_str_eq(standard, "stonith")) {
return stonith2uniform_rc(action, rc);
} else {
return lsb2uniform_rc(action, rc);
}
}
static void
action_complete(svc_action_t *action)
{
lrmd_rsc_t *rsc;
lrmd_cmd_t *cmd = action->cb_data;
if (!cmd) {
crm_err("LRMD action (%s) completed does not match any known operations.", action->id);
return;
}
#ifdef HAVE_SYS_TIMEB_H
if (cmd->exec_rc != action->rc) {
ftime(&cmd->t_rcchange);
}
#endif
cmd->exec_rc = get_uniform_rc(action->standard, cmd->action, action->rc);
cmd->lrmd_op_status = action->status;
rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL;
if (action->stdout_data) {
cmd->output = crm_strdup(action->stdout_data);
}
cmd_finalize(cmd, rsc);
}
static int
lrmd_rsc_execute_stonith(lrmd_rsc_t *rsc, lrmd_cmd_t *cmd)
{
int rc = 0;
stonith_t *stonith_api = get_stonith_connection();
if (!stonith_api) {
cmd->exec_rc = get_uniform_rc("stonith", cmd->action, st_err_connection);
cmd->lrmd_op_status = PCMK_LRM_OP_ERROR;
cmd_finalize(cmd, rsc);
return lrmd_err_stonith_connection;
}
if (safe_str_eq(cmd->action, "start")) {
char *key = NULL;
char *value = NULL;
stonith_key_value_t *device_params = NULL;
if (cmd->params) {
GHashTableIter iter;
g_hash_table_iter_init(&iter, cmd->params);
while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
device_params = stonith_key_value_add(device_params, key, value);
}
}
rc = stonith_api->cmds->register_device(stonith_api,
st_opt_sync_call,
cmd->rsc_id,
rsc->provider,
rsc->type,
device_params);
stonith_key_value_freeall(device_params, 1, 1);
if (rc == 0) {
rc = stonith_api->cmds->call(stonith_api,
st_opt_sync_call,
cmd->rsc_id,
"monitor",
NULL,
cmd->timeout);
}
} else if (safe_str_eq(cmd->action, "stop")) {
rc = stonith_api->cmds->remove_device(stonith_api, st_opt_sync_call, cmd->rsc_id);
} else if (safe_str_eq(cmd->action, "monitor")) {
rc = stonith_api->cmds->call(stonith_api,
st_opt_sync_call,
cmd->rsc_id,
cmd->action,
NULL,
cmd->timeout);
}
cmd->exec_rc = get_uniform_rc("stonith", cmd->action, rc);
/* Attempt to map return codes to op status if possible */
if (rc) {
switch (rc) {
case st_err_not_supported:
cmd->lrmd_op_status = PCMK_LRM_OP_NOTSUPPORTED;
break;
case st_err_timeout:
cmd->lrmd_op_status = PCMK_LRM_OP_TIMEOUT;
break;
default:
cmd->lrmd_op_status = PCMK_LRM_OP_ERROR;
}
} else {
cmd->lrmd_op_status = PCMK_LRM_OP_DONE;
}
if (cmd->interval > 0) {
rsc->recurring_ops = g_list_append(rsc->recurring_ops, cmd);
cmd->stonith_recurring_id = g_timeout_add(cmd->interval, stonith_recurring_op_helper, cmd);
}
cmd_finalize(cmd, rsc);
return rc;
}
static const char *
normalize_action_name(lrmd_rsc_t *rsc, const char *action)
{
if (safe_str_eq(action, "monitor") &&
(safe_str_eq(rsc->class, "lsb") ||
safe_str_eq(rsc->class, "service") ||
safe_str_eq(rsc->class, "systemd"))) {
return "status";
}
return action;
}
static void
dup_attr(gpointer key, gpointer value, gpointer user_data)
{
g_hash_table_replace(user_data, crm_strdup(key), crm_strdup(value));
}
static int
lrmd_rsc_execute_service_lib(lrmd_rsc_t *rsc, lrmd_cmd_t *cmd)
{
svc_action_t *action = NULL;
GHashTable *params_copy = NULL;
crm_trace("Creating action, resource:%s action:%s class:%s provider:%s agent:%s",
rsc->rsc_id,
cmd->action,
rsc->class,
rsc->provider,
rsc->type);
if (cmd->params) {
params_copy = g_hash_table_new_full(crm_str_hash,
g_str_equal,
g_hash_destroy_str,
g_hash_destroy_str);
if (params_copy != NULL) {
g_hash_table_foreach(cmd->params, dup_attr, params_copy);
}
}
action = resources_action_create(rsc->rsc_id,
rsc->class,
rsc->provider,
rsc->type,
normalize_action_name(rsc, cmd->action),
cmd->interval,
cmd->timeout,
params_copy);
if (!action) {
crm_err("Failed to create action, action:%s on resource %s", cmd->action, rsc->rsc_id);
cmd->lrmd_op_status = PCMK_LRM_OP_ERROR;
goto exec_done;
}
action->cb_data = cmd;
if (!services_action_async(action, action_complete)) {
services_action_free(action);
action = NULL;
cmd->lrmd_op_status = PCMK_LRM_OP_ERROR;
goto exec_done;
}
if (cmd->interval) {
rsc->recurring_ops = g_list_append(rsc->recurring_ops, cmd);
}
/* The cmd will be finalized by the action_complete callback after
* the service library is done with it */
rsc->active = cmd; /* only one op at a time for a rsc */
cmd = NULL;
exec_done:
if (cmd) {
cmd_finalize(cmd, rsc);
}
return TRUE;
}
static gboolean
lrmd_rsc_execute(lrmd_rsc_t *rsc)
{
lrmd_cmd_t *cmd = NULL;
CRM_CHECK(rsc != NULL, return FALSE);
if (rsc->active) {
crm_trace("%s is still active", rsc->rsc_id);
return TRUE;
}
if (rsc->pending_ops) {
GList *first = rsc->pending_ops;
cmd = first->data;
if (cmd->delay_id) {
crm_trace("Command %s %s was asked to run too early, waiting for start_delay timeout of %dms",
cmd->rsc_id, cmd->action, cmd->start_delay);
return TRUE;
}
rsc->pending_ops = g_list_remove_link(rsc->pending_ops, first);
g_list_free_1(first);
#ifdef HAVE_SYS_TIMEB_H
ftime(&cmd->t_run);
}
#endif
if (!cmd) {
crm_trace("Nothing further to do for %s", rsc->rsc_id);
return TRUE;
}
if (safe_str_eq(rsc->class, "stonith")) {
lrmd_rsc_execute_stonith(rsc, cmd);
} else {
lrmd_rsc_execute_service_lib(rsc, cmd);
}
return TRUE;
}
static gboolean
lrmd_rsc_dispatch(gpointer user_data)
{
return lrmd_rsc_execute(user_data);
}
void
free_rsc(gpointer data)
{
GListPtr gIter = NULL;
lrmd_rsc_t *rsc = data;
int is_stonith = safe_str_eq(rsc->class, "stonith");
for (gIter = rsc->pending_ops; gIter != NULL; gIter = gIter->next) {
lrmd_cmd_t *cmd = gIter->data;
/* command was never executed */
cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED;
cmd_finalize(cmd, NULL);
}
/* frees list, but not list elements. */
g_list_free(rsc->pending_ops);
for (gIter = rsc->recurring_ops; gIter != NULL; gIter = gIter->next) {
lrmd_cmd_t *cmd = gIter->data;
if (is_stonith) {
cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED;
cmd_finalize(cmd, NULL);
} else {
/* This command is already handed off to service library,
* let service library cancel it and tell us via the callback
* when it is cancelled. The rsc can be safely destroyed
* even if we are waiting for the cancel result */
services_action_cancel(rsc->rsc_id, cmd->action, cmd->interval);
}
}
/* frees list, but not list elements. */
g_list_free(rsc->recurring_ops);
free(rsc->rsc_id);
free(rsc->class);
free(rsc->provider);
free(rsc->type);
mainloop_destroy_trigger(rsc->work);
free(rsc);
}
static int
process_lrmd_signon(lrmd_client_t *client, xmlNode *request)
{
xmlNode *reply = create_xml_node(NULL, "reply");
crm_xml_add(reply, F_LRMD_OPERATION, CRM_OP_REGISTER);
crm_xml_add(reply, F_LRMD_CLIENTID, client->id);
crm_ipcs_send(client->channel, reply, FALSE);
free_xml(reply);
return lrmd_ok;
}
static int
process_lrmd_rsc_register(lrmd_client_t *client, xmlNode *request)
{
int rc = lrmd_ok;
lrmd_rsc_t *rsc = build_rsc_from_xml(request);
lrmd_rsc_t *dup = g_hash_table_lookup(rsc_list, rsc->rsc_id);
if (dup &&
safe_str_eq(rsc->class, dup->class) &&
safe_str_eq(rsc->provider, dup->provider) &&
safe_str_eq(rsc->type, dup->type)) {
crm_warn("Can't add, RSC '%s' already present in the rsc list (%d active resources)",
rsc->rsc_id, g_hash_table_size(rsc_list));
free_rsc(rsc);
return rc;
}
g_hash_table_replace(rsc_list, rsc->rsc_id, rsc);
crm_info("Added '%s' to the rsc list (%d active resources)",
rsc->rsc_id, g_hash_table_size(rsc_list));
return rc;
}
static void
process_lrmd_get_rsc_info(lrmd_client_t *client, xmlNode *request)
{
int rc = lrmd_ok;
int send_rc = 0;
int call_id = 0;
xmlNode *rsc_xml = get_xpath_object("//"F_LRMD_RSC, request, LOG_ERR);
const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
xmlNode *reply = NULL;
lrmd_rsc_t *rsc = NULL;
crm_element_value_int(request, F_LRMD_CALLID, &call_id);
if (!rsc_id) {
rc = lrmd_err_unknown_rsc;
goto get_rsc_done;
}
if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) {
crm_info("Resource '%s' not found (%d active resources)",
rsc_id, g_hash_table_size(rsc_list));
rc = lrmd_err_unknown_rsc;
goto get_rsc_done;
}
get_rsc_done:
reply = create_xml_node(NULL, T_LRMD_REPLY);
crm_xml_add(reply, F_LRMD_ORIGIN, __FUNCTION__);
crm_xml_add_int(reply, F_LRMD_RC, rc);
crm_xml_add_int(reply, F_LRMD_CALLID, call_id);
if (rsc) {
crm_xml_add(reply, F_LRMD_RSC_ID, rsc->rsc_id);
crm_xml_add(reply, F_LRMD_CLASS, rsc->class);
crm_xml_add(reply, F_LRMD_PROVIDER, rsc->provider);
crm_xml_add(reply, F_LRMD_TYPE, rsc->type);
}
send_rc = crm_ipcs_send(client->channel, reply, FALSE);
if (send_rc < 0) {
crm_warn("LRMD reply to %s failed: %d", client->name, send_rc);
}
free_xml(reply);
}
static int
process_lrmd_rsc_unregister(lrmd_client_t *client, xmlNode *request)
{
int rc = lrmd_ok;
lrmd_rsc_t *rsc = NULL;
xmlNode *rsc_xml = get_xpath_object("//"F_LRMD_RSC, request, LOG_ERR);
const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
if (!rsc_id) {
return lrmd_err_unknown_rsc;
}
if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) {
crm_info("Resource '%s' not found (%d active resources)",
rsc_id, g_hash_table_size(rsc_list));
return lrmd_err_unknown_rsc;
}
if (rsc->active) {
/* let the caller know there are still active ops on this rsc to watch for */
rc = lrmd_pending;
}
g_hash_table_remove(rsc_list, rsc_id);
return rc;
}
static int
process_lrmd_rsc_exec(lrmd_client_t *client, xmlNode *request)
{
lrmd_rsc_t *rsc = NULL;
lrmd_cmd_t *cmd = NULL;
xmlNode *rsc_xml = get_xpath_object("//"F_LRMD_RSC, request, LOG_ERR);
const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
if (!rsc_id) {
return lrmd_err_missing;
}
if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) {
return lrmd_err_unknown_rsc;
}
- cmd = create_lrmd_cmd(request);
+ cmd = create_lrmd_cmd(request, client);
schedule_lrmd_cmd(rsc, cmd);
return cmd->call_id;
}
static int
cancel_op(const char *rsc_id, const char *action, int interval)
{
GListPtr gIter = NULL;
lrmd_rsc_t *rsc = g_hash_table_lookup(rsc_list, rsc_id);
/* How to cancel an action.
* 1. Check pending ops list, if it hasn't been handed off
* to the service library or stonith recurring list remove
* it there and that will stop it.
* 2. If it isn't in the pending ops list, then its either a
* recurring op in the stonith recurring list, or the service
* library's recurring list. Stop it there
* 3. If not found in any lists, then this operation has either
* been executed already and is not a recurring operation, or
* never existed.
*/
if (!rsc) {
return lrmd_err_unknown_rsc;
}
for (gIter = rsc->pending_ops; gIter != NULL; gIter = gIter->next) {
lrmd_cmd_t *cmd = gIter->data;
if (safe_str_eq(cmd->action, action) && cmd->interval == interval) {
cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED;
cmd_finalize(cmd, rsc);
return lrmd_ok;
}
}
if (safe_str_eq(rsc->class, "stonith")) {
/* The service library does not handle stonith operations.
* We have to handle recurring stonith opereations ourselves. */
for (gIter = rsc->recurring_ops; gIter != NULL; gIter = gIter->next) {
lrmd_cmd_t *cmd = gIter->data;
if (safe_str_eq(cmd->action, action) && cmd->interval == interval) {
cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED;
cmd_finalize(cmd, rsc);
return lrmd_ok;
}
}
} else if (services_action_cancel(rsc_id, normalize_action_name(rsc, action), interval) == TRUE) {
/* The service library will tell the action_complete callback function
* this action was cancelled, which will destroy the cmd and remove
* it from the recurring_op list. Do not do that in this function
* if the service library says it cancelled it. */
return lrmd_ok;
}
return lrmd_err_unknown_operation;
}
static int
process_lrmd_rsc_cancel(lrmd_client_t *client, xmlNode *request)
{
xmlNode *rsc_xml = get_xpath_object("//"F_LRMD_RSC, request, LOG_ERR);
const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
const char *action = crm_element_value(rsc_xml, F_LRMD_RSC_ACTION);
int interval = 0;
crm_element_value_int(rsc_xml, F_LRMD_RSC_INTERVAL, &interval);
if (!rsc_id || !action) {
return lrmd_err_missing;
}
return cancel_op(rsc_id, action, interval);
}
void
process_lrmd_message(lrmd_client_t *client, xmlNode *request)
{
int rc = lrmd_ok;
- int call_options = 0;
int call_id = 0;
const char *op = crm_element_value(request, F_LRMD_OPERATION);
int do_reply = 0;
int do_notify = 0;
int exit = 0;
- crm_element_value_int(request, F_LRMD_CALLOPTS, &call_options);
crm_element_value_int(request, F_LRMD_CALLID, &call_id);
if (crm_str_eq(op, CRM_OP_REGISTER, TRUE)) {
rc = process_lrmd_signon(client, request);
} else if (crm_str_eq(op, LRMD_OP_RSC_REG, TRUE)) {
rc = process_lrmd_rsc_register(client, request);
do_notify = 1;
do_reply = 1;
} else if (crm_str_eq(op, LRMD_OP_RSC_INFO, TRUE)) {
process_lrmd_get_rsc_info(client, request);
} else if (crm_str_eq(op, LRMD_OP_RSC_UNREG, TRUE)) {
rc = process_lrmd_rsc_unregister(client, request);
/* don't notify anyone about failed un-registers */
if (rc == lrmd_ok || rc == lrmd_pending) {
do_notify = 1;
}
do_reply = 1;
} else if (crm_str_eq(op, LRMD_OP_RSC_EXEC, TRUE)) {
rc = process_lrmd_rsc_exec(client, request);
do_reply = 1;
} else if (crm_str_eq(op, LRMD_OP_RSC_CANCEL, TRUE)) {
rc = process_lrmd_rsc_cancel(client, request);
do_reply = 1;
} else if (crm_str_eq(op, CRM_OP_QUIT, TRUE)) {
do_reply = 1;
exit = 1;
} else {
rc = lrmd_err_unknown_operation;
do_reply = 1;
crm_err("Unknown %s from %s", op, client->name);
crm_log_xml_warn(request, "UnknownOp");
}
if (do_reply) {
send_reply(client, rc, call_id);
}
if (do_notify) {
send_generic_notify(rc, request);
}
if (exit) {
lrmd_shutdown(0);
}
}
diff --git a/lrmd/test.c b/lrmd/test.c
index 6d1d7646e3..f43217cec2 100644
--- a/lrmd/test.c
+++ b/lrmd/test.c
@@ -1,420 +1,561 @@
/*
* Copyright (c) 2012 David Vossel <dvossel@redhat.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include <crm_internal.h>
#include <glib.h>
#include <unistd.h>
#include <crm/crm.h>
#include <crm/services.h>
#include <crm/common/mainloop.h>
+#include <crm/pengine/status.h>
+#include <crm/cib.h>
#include <crm/lrmd.h>
/* *INDENT-OFF* */
static struct crm_option long_options[] = {
{"help", 0, 0, '?'},
{"verbose", 0, 0, 'V', "\t\tPrint out logs and events to screen"},
{"quiet", 0, 0, 'Q', "\t\tSuppress all output to screen"},
/* just incase we have to add data to events,
* we don't want break a billion regression tests. Instead
* we'll create different versions */
{"listen", 1, 0, 'l', "\tListen for a specific event string"},
{"event-ver", 1, 0, 'e', "\tVersion of event to listen to"},
{"api-call", 1, 0, 'c', "\tDirectly relates to lrmd api functions"},
{"no-wait", 0, 0, 'w', "\tMake api call and do not wait for result."},
+ {"is-running", 0, 0, 'R', "\tDetermine if a resource is registered and running."},
{"-spacer-", 1, 0, '-', "\nParameters for api-call option"},
{"action", 1, 0, 'a'},
{"rsc-id", 1, 0, 'r'},
{"cancel-call-id", 1, 0, 'x'},
{"provider", 1, 0, 'P'},
{"class", 1, 0, 'C'},
{"type", 1, 0, 'T'},
{"interval", 1, 0, 'i'},
{"timeout", 1, 0, 't'},
{"start-delay", 1, 0, 's'},
{"param-key", 1, 0, 'k'},
{"param-val", 1, 0, 'v'},
{"-spacer-", 1, 0, '-'},
{0, 0, 0, 0}
};
/* *INDENT-ON* */
+cib_t *cib_conn = NULL;
static int exec_call_id = 0;
+static int exec_call_opts = 0;
+extern void cleanup_alloc_calculations(pe_working_set_t * data_set);
static struct {
int verbose;
int quiet;
int print;
int interval;
int timeout;
int start_delay;
int cancel_call_id;
int event_version;
int no_wait;
+ int is_running;
int no_connect;
const char *api_call;
const char *rsc_id;
const char *provider;
const char *class;
const char *type;
const char *action;
const char *listen;
lrmd_key_value_t *params;
} options;
GMainLoop *mainloop = NULL;
lrmd_t *lrmd_conn = NULL;
static char event_buf_v0[1024];
#define print_result(result) \
if (!options.quiet) { \
result; \
} \
#define report_event(event) \
snprintf(event_buf_v0, sizeof(event_buf_v0), "NEW_EVENT event_type:%s rsc_id:%s action:%s rc:%s op_status:%s", \
lrmd_event_type2str(event->type), \
event->rsc_id, \
event->op_type ? event->op_type : "none", \
lrmd_event_rc2str(event->rc), \
services_lrm_status_str(event->op_status)); \
crm_info("%s", event_buf_v0);;
static void
test_shutdown(int nsig)
{
lrmd_api_delete(lrmd_conn);
}
static void
read_events(lrmd_event_data_t *event)
{
report_event(event);
if (options.listen) {
if (safe_str_eq(options.listen, event_buf_v0)) {
print_result(printf("LISTEN EVENT SUCCESSFUL\n"));
exit(0);
}
}
if (exec_call_id && (event->call_id == exec_call_id)) {
- if (event->op_status == 0) {
+ if (event->op_status == 0 && event->rc == 0) {
print_result(printf("API-CALL SUCCESSFUL for 'exec'\n"));
} else {
print_result(printf("API-CALL FAILURE for 'exec', rc:%d lrmd_op_status:%s\n",
event->rc,
services_lrm_status_str(event->op_status)));
exit(-1);
}
if (!options.listen) {
exit(0);
}
}
}
static gboolean
timeout_err(gpointer data)
{
print_result(printf("LISTEN EVENT FAILURE - timeout occurred, never found.\n"));
exit(-1);
return FALSE;
}
static void
try_connect(void)
{
int tries = 10;
int i = 0;
int rc = 0;
for (i = 0; i < tries; i++) {
rc = lrmd_conn->cmds->connect(lrmd_conn, "lrmd", NULL);
if (!rc) {
crm_info("lrmd client connection established");
return;
} else {
crm_info("lrmd client connection failed");
}
sleep(1);
}
print_result(printf("API CONNECTION FAILURE\n"));
exit(-1);
}
static gboolean
start_test(gpointer user_data)
{
int rc = 0;
if (!options.no_connect) {
try_connect();
}
lrmd_conn->cmds->set_callback(lrmd_conn, read_events);
if (options.timeout) {
g_timeout_add(options.timeout, timeout_err, NULL);
}
if (!options.api_call) {
return 0;
}
if (safe_str_eq(options.api_call, "exec")) {
rc = lrmd_conn->cmds->exec(lrmd_conn,
options.rsc_id,
options.action,
NULL,
options.interval,
options.timeout,
options.start_delay,
- 0,
+ exec_call_opts,
options.params);
if (rc > 0) {
exec_call_id = rc;
print_result(printf("API-CALL 'exec' action pending, waiting on response\n"));
}
} else if (safe_str_eq(options.api_call, "register_rsc")) {
rc = lrmd_conn->cmds->register_rsc(lrmd_conn,
options.rsc_id,
options.class,
options.provider,
options.type,
0);
} else if (safe_str_eq(options.api_call, "get_rsc_info")) {
lrmd_rsc_info_t *rsc_info;
rsc_info = lrmd_conn->cmds->get_rsc_info(lrmd_conn, options.rsc_id, 0);
if (rsc_info) {
print_result(printf("RSC_INFO: id:%s class:%s provider:%s type:%s\n",
rsc_info->id, rsc_info->class, rsc_info->provider ? rsc_info->provider : "<none>", rsc_info->type));
lrmd_free_rsc_info(rsc_info);
rc = lrmd_ok;
} else {
rc = -1;
}
} else if (safe_str_eq(options.api_call, "unregister_rsc")) {
rc = lrmd_conn->cmds->unregister_rsc(lrmd_conn,
options.rsc_id,
0);
} else if (safe_str_eq(options.api_call, "cancel")) {
rc = lrmd_conn->cmds->cancel(lrmd_conn,
options.rsc_id,
options.action,
options.interval);
} else if (safe_str_eq(options.api_call, "metadata")) {
char *output = NULL;
rc = lrmd_conn->cmds->get_metadata(lrmd_conn,
options.class,
options.provider,
options.type, &output, 0);
if (rc == lrmd_ok) {
print_result(printf("%s", output));
free(output);
}
} else if (safe_str_eq(options.api_call, "list_agents")) {
lrmd_list_t *list = NULL;
lrmd_list_t *iter = NULL;
rc = lrmd_conn->cmds->list_agents(lrmd_conn, &list, options.class, options.provider);
if (rc > 0) {
print_result(printf("%d agents found\n", rc));
for (iter = list; iter != NULL; iter = iter->next) {
print_result(printf("%s\n", iter->val));
}
lrmd_list_freeall(list);
rc = 0;
} else {
print_result(printf("API_CALL FAILURE - no agents found\n"));
rc = -1;
}
} else if (safe_str_eq(options.api_call, "list_ocf_providers")) {
lrmd_list_t *list = NULL;
lrmd_list_t *iter = NULL;
rc = lrmd_conn->cmds->list_ocf_providers(lrmd_conn, options.type, &list);
if (rc > 0) {
print_result(printf("%d providers found\n", rc));
for (iter = list; iter != NULL; iter = iter->next) {
print_result(printf("%s\n", iter->val));
}
lrmd_list_freeall(list);
rc = 0;
} else {
print_result(printf("API_CALL FAILURE - no providers found\n"));
rc = -1;
}
} else if (options.api_call) {
print_result(printf("API-CALL FAILURE unknown action '%s'\n", options.action));
exit(-1);
}
if (rc < 0) {
print_result(printf("API-CALL FAILURE for '%s' api_rc:%d\n", options.api_call, rc));
exit(-1);
}
if (options.api_call && rc == lrmd_ok) {
print_result(printf("API-CALL SUCCESSFUL for '%s'\n", options.api_call));
if (!options.listen) {
exit(0);
}
}
if (options.no_wait) {
/* just make the call and exit regardless of anything else. */
exit(0);
}
return 0;
}
+static resource_t *
+find_rsc_or_clone(const char *rsc, pe_working_set_t * data_set)
+{
+ resource_t *the_rsc = pe_find_resource(data_set->resources, rsc);
+
+ if (the_rsc == NULL) {
+ char *as_clone = crm_concat(rsc, "0", ':');
+
+ the_rsc = pe_find_resource(data_set->resources, as_clone);
+ free(as_clone);
+ }
+ return the_rsc;
+}
+
+static int
+generate_params(void)
+{
+ int rc = 0;
+ pe_working_set_t data_set;
+ xmlNode *cib_xml_copy = NULL;
+ resource_t *rsc = NULL;
+ GHashTable *params = NULL;
+ GHashTable *meta = NULL;
+ GHashTableIter iter;
+
+ if (options.params) {
+ return 0;
+ }
+
+ set_working_set_defaults(&data_set);
+
+ cib_conn = cib_new();
+ rc = cib_conn->cmds->signon(cib_conn, "lrmd_test", cib_query);
+ if (rc != cib_ok) {
+ crm_err("Error signing on to the CIB service: %s\n", cib_error2string(rc));
+ rc = -1;
+ goto param_gen_bail;
+ }
+
+ cib_xml_copy = get_cib_copy(cib_conn);
+
+ if (!cib_xml_copy) {
+ crm_err("Error retrieving cib copy.");
+ rc = -1;
+ goto param_gen_bail;
+ }
+
+ if (cli_config_update(&cib_xml_copy, NULL, FALSE) == FALSE) {
+ crm_err("Error updating cib configuration");
+ rc = -1;
+ goto param_gen_bail;
+ }
+
+ data_set.input = cib_xml_copy;
+ data_set.now = new_ha_date(TRUE);
+
+ cluster_status(&data_set);
+ if (options.rsc_id) {
+ rsc = find_rsc_or_clone(options.rsc_id, &data_set);
+ }
+
+ if (!rsc) {
+ crm_err("Resource does not exist in config");
+ rc = -1;
+ goto param_gen_bail;
+ }
+
+ params = g_hash_table_new_full(crm_str_hash,
+ g_str_equal, g_hash_destroy_str, g_hash_destroy_str);
+ meta = g_hash_table_new_full(crm_str_hash,
+ g_str_equal, g_hash_destroy_str, g_hash_destroy_str);
+
+ get_rsc_attributes(params, rsc, NULL, &data_set);
+ get_meta_attributes(meta, rsc, NULL, &data_set);
+
+ if (params) {
+ char *key = NULL;
+ char *value = NULL;
+
+ g_hash_table_iter_init(&iter, params);
+ while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
+ options.params = lrmd_key_value_add(options.params, key, value);
+ }
+ g_hash_table_destroy(params);
+ }
+
+ if (meta) {
+ char *key = NULL;
+ char *value = NULL;
+
+ g_hash_table_iter_init(&iter, meta);
+ while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
+ char *crm_name = crm_meta_name(key);
+ options.params = lrmd_key_value_add(options.params, crm_name, value);
+ free(crm_name);
+ }
+ g_hash_table_destroy(meta);
+ }
+
+param_gen_bail:
+
+ cleanup_alloc_calculations(&data_set);
+ return rc;
+}
+
int main(int argc, char ** argv)
{
int option_index = 0;
int argerr = 0;
int flag;
char *key = NULL;
char *val = NULL;
crm_trigger_t *trig;
crm_set_options(NULL, "mode [options]", long_options,
"Inject commands into the lrmd and watch for events\n");
while (1) {
flag = crm_get_option(argc, argv, &option_index);
if (flag == -1)
break;
switch(flag) {
case '?':
crm_help(flag, LSB_EXIT_OK);
break;
case 'V':
options.verbose = 1;
break;
case 'Q':
options.quiet = 1;
options.verbose = 0;
break;
case 'e':
options.event_version = atoi(optarg);
break;
case 'l':
options.listen = optarg;
break;
case 'w':
options.no_wait = 1;
break;
+ case 'R':
+ options.is_running = 1;
+ break;
case 'c':
options.api_call = optarg;
break;
case 'a':
options.action = optarg;
break;
case 'r':
options.rsc_id = optarg;
break;
case 'x':
options.cancel_call_id = atoi(optarg);
break;
case 'P':
options.provider = optarg;
break;
case 'C':
options.class = optarg;
break;
case 'T':
options.type = optarg;
break;
case 'i':
options.interval = atoi(optarg);
break;
case 't':
options.timeout = atoi(optarg);
break;
case 's':
options.start_delay = atoi(optarg);
break;
case 'k':
key = optarg;
if (key && val) {
options.params = lrmd_key_value_add(options.params, key, val);
key = val = NULL;
}
break;
case 'v':
val = optarg;
if (key && val) {
options.params = lrmd_key_value_add(options.params, key, val);
key = val = NULL;
}
break;
default:
++argerr;
break;
}
}
if (argerr) {
crm_help('?', LSB_EXIT_GENERIC);
}
if (optind > argc) {
++argerr;
}
if (!options.listen &&
(safe_str_eq(options.api_call, "metadata") ||
safe_str_eq(options.api_call, "list_agents") ||
safe_str_eq(options.api_call, "list_ocf_providers"))) {
options.no_connect = 1;
}
crm_log_init("lrmd_ctest", LOG_INFO, TRUE, options.verbose ? TRUE : FALSE, argc, argv, FALSE);
+ if (options.is_running) {
+ if (!options.timeout) {
+ options.timeout = 30000;
+ }
+ options.interval = 0;
+ if (!options.rsc_id) {
+ crm_err("rsc-id must be given when is-running is used");
+ exit(-1);
+ }
+
+ if (generate_params()) {
+ print_result(printf("Failed to retrieve rsc parameters from cib, can not determine if rsc is running.\n"));
+ exit(-1);
+ }
+ options.api_call = "exec";
+ options.action = "monitor";
+ exec_call_opts = lrmd_opt_notify_orig_only;
+ }
+
+
/* if we can't perform an api_call or listen for events,
* there is nothing to do */
if (!options.api_call && !options.listen) {
crm_err("Nothing to be done. Please specify 'api-call' and/or 'listen'");
return 0;
}
lrmd_conn = lrmd_api_new();
trig = mainloop_add_trigger(G_PRIORITY_HIGH, start_test, NULL);
mainloop_set_trigger(trig);
mainloop_add_signal(SIGTERM, test_shutdown);
crm_info("Starting");
mainloop = g_main_new(FALSE);
g_main_run(mainloop);
lrmd_api_delete(lrmd_conn);
+
+ if (cib_conn != NULL) {
+ cib_conn->cmds->signoff(cib_conn);
+ cib_delete(cib_conn);
+ }
+
return 0;
}
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Tue, Jul 8, 6:27 PM (7 h, 9 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2002646
Default Alt Text
(80 KB)
Attached To
Mode
rP Pacemaker
Attached
Detach File
Event Timeline
Log In to Comment