Page MenuHomeClusterLabs Projects

No OneTemporary

This file is larger than 256 KB, so syntax highlighting was skipped.
diff --git a/crmd/join_client.c b/crmd/join_client.c
index 14d07d96ce..75411645c6 100644
--- a/crmd/join_client.c
+++ b/crmd/join_client.c
@@ -1,328 +1,338 @@
/*
* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <crm_internal.h>
#include <crm/crm.h>
#include <crm/cib.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <crmd_fsa.h>
#include <crmd_messages.h>
int reannounce_count = 0;
void join_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data);
extern ha_msg_input_t *copy_ha_msg_input(ha_msg_input_t * orig);
/*!
* \internal
* \brief Remember if DC is shutting down as we join
*
* If we're joining while the current DC is shutting down, update its expected
* state, so we don't fence it if we become the new DC. (We weren't a peer
* when it broadcast its shutdown request.)
*
* \param[in] msg A join message from the DC
*/
static void
update_dc_expected(xmlNode *msg)
{
if (fsa_our_dc && crm_is_true(crm_element_value(msg, F_CRM_DC_LEAVING))) {
crm_node_t *dc_node = crm_get_peer(0, fsa_our_dc);
crm_update_peer_expected(__FUNCTION__, dc_node, CRMD_JOINSTATE_DOWN);
}
}
/* A_CL_JOIN_QUERY */
/* is there a DC out there? */
void
do_cl_join_query(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
xmlNode *req = create_request(CRM_OP_JOIN_ANNOUNCE, NULL, NULL,
CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL);
sleep(1); /* give the CCM time to propogate to the DC */
update_dc(NULL); /* Unset any existing value so that the result is not discarded */
crm_debug("Querying for a DC");
send_cluster_message(NULL, crm_msg_crmd, req, FALSE);
free_xml(req);
}
/* A_CL_JOIN_ANNOUNCE */
/* this is kind of a workaround for the fact that we may not be around or
* are otherwise unable to reply when the DC sends out A_DC_JOIN_OFFER_ALL
*/
void
do_cl_join_announce(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
/* Once we hear from the DC, we can stop the timer
*
* This timer was started either on startup or when a node
* left the CCM list
*/
/* don't announce if we're in one of these states */
if (cur_state != S_PENDING) {
crm_warn("Not announcing cluster join because in state %s",
fsa_state2string(cur_state));
return;
}
if (AM_I_OPERATIONAL) {
/* send as a broadcast */
xmlNode *req = create_request(CRM_OP_JOIN_ANNOUNCE, NULL, NULL,
CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL);
crm_debug("Announcing availability");
update_dc(NULL);
send_cluster_message(NULL, crm_msg_crmd, req, FALSE);
free_xml(req);
} else {
/* Delay announce until we have finished local startup */
crm_warn("Delaying announce of cluster join until local startup is complete");
return;
}
}
static int query_call_id = 0;
/* A_CL_JOIN_REQUEST */
/* aka. accept the welcome offer */
void
do_cl_join_offer_respond(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg);
const char *welcome_from = crm_element_value(input->msg, F_CRM_HOST_FROM);
const char *join_id = crm_element_value(input->msg, F_CRM_JOIN_ID);
#if 0
if (we are sick) {
log error;
/* save the request for later? */
return;
}
#endif
crm_trace("Accepting cluster join offer from node %s "CRM_XS" join-%s",
welcome_from, crm_element_value(input->msg, F_CRM_JOIN_ID));
/* we only ever want the last one */
if (query_call_id > 0) {
crm_trace("Cancelling previous join query: %d", query_call_id);
remove_cib_op_callback(query_call_id, FALSE);
query_call_id = 0;
}
if (update_dc(input->msg) == FALSE) {
crm_warn("Discarding cluster join offer from node %s (expected %s)",
welcome_from, fsa_our_dc);
return;
}
update_dc_expected(input->msg);
CRM_LOG_ASSERT(input != NULL);
query_call_id =
fsa_cib_conn->cmds->query(fsa_cib_conn, NULL, NULL, cib_scope_local | cib_no_children);
fsa_register_cib_callback(query_call_id, FALSE, strdup(join_id), join_query_callback);
crm_trace("Registered join query callback: %d", query_call_id);
register_fsa_action(A_DC_TIMER_STOP);
}
void
join_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
char *join_id = user_data;
xmlNode *generation = create_xml_node(NULL, XML_CIB_TAG_GENERATION_TUPPLE);
CRM_LOG_ASSERT(join_id != NULL);
if (query_call_id != call_id) {
crm_trace("Query %d superseded", call_id);
goto done;
}
query_call_id = 0;
if(rc != pcmk_ok || output == NULL) {
crm_err("Could not retrieve version details for join-%s: %s (%d)",
join_id, pcmk_strerror(rc), rc);
register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __FUNCTION__);
} else if (fsa_our_dc == NULL) {
crm_debug("Membership is in flux, not continuing join-%s", join_id);
} else {
xmlNode *reply = NULL;
crm_debug("Respond to join offer join-%s from %s", join_id, fsa_our_dc);
copy_in_properties(generation, output);
reply = create_request(CRM_OP_JOIN_REQUEST, generation, fsa_our_dc,
CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL);
crm_xml_add(reply, F_CRM_JOIN_ID, join_id);
send_cluster_message(crm_get_peer(0, fsa_our_dc), crm_msg_crmd, reply, TRUE);
free_xml(reply);
}
done:
free_xml(generation);
}
static void
set_join_state(const char * start_state)
{
if (safe_str_eq(start_state, "standby")) {
crm_notice("Forcing node %s to join in %s state per configured environment",
fsa_our_uname, start_state);
update_attr_delegate(fsa_cib_conn, cib_sync_call, XML_CIB_TAG_NODES, fsa_our_uuid,
NULL, NULL, NULL, "standby", "on", TRUE, NULL, NULL);
} else if (safe_str_eq(start_state, "online")) {
crm_notice("Forcing node %s to join in %s state per configured environment",
fsa_our_uname, start_state);
update_attr_delegate(fsa_cib_conn, cib_sync_call, XML_CIB_TAG_NODES, fsa_our_uuid,
NULL, NULL, NULL, "standby", "off", TRUE, NULL, NULL);
} else if (safe_str_eq(start_state, "default")) {
crm_debug("Not forcing a starting state on node %s", fsa_our_uname);
} else {
crm_warn("Unrecognized start state '%s', using 'default' (%s)",
start_state, fsa_our_uname);
}
}
/* A_CL_JOIN_RESULT */
/* aka. this is notification that we have (or have not) been accepted */
void
do_cl_join_finalize_respond(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
xmlNode *tmp1 = NULL;
gboolean was_nack = TRUE;
static gboolean first_join = TRUE;
ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg);
const char *start_state = daemon_option("node_start_state");
int join_id = -1;
const char *op = crm_element_value(input->msg, F_CRM_TASK);
const char *ack_nack = crm_element_value(input->msg, CRM_OP_JOIN_ACKNAK);
const char *welcome_from = crm_element_value(input->msg, F_CRM_HOST_FROM);
if (safe_str_neq(op, CRM_OP_JOIN_ACKNAK)) {
crm_trace("Ignoring op=%s message", op);
return;
}
/* calculate if it was an ack or a nack */
if (crm_is_true(ack_nack)) {
was_nack = FALSE;
}
crm_element_value_int(input->msg, F_CRM_JOIN_ID, &join_id);
if (was_nack) {
crm_err("Shutting down because cluster join with leader %s failed "
CRM_XS" join-%d NACK'd", welcome_from, join_id);
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
return;
}
if (AM_I_DC == FALSE && safe_str_eq(welcome_from, fsa_our_uname)) {
crm_warn("Discarding our own welcome - we're no longer the DC");
return;
}
if (update_dc(input->msg) == FALSE) {
crm_warn("Discarding %s from node %s (expected from %s)",
op, welcome_from, fsa_our_dc);
return;
}
update_dc_expected(input->msg);
/* send our status section to the DC */
tmp1 = do_lrm_query(TRUE, fsa_our_uname);
if (tmp1 != NULL) {
xmlNode *reply = create_request(CRM_OP_JOIN_CONFIRM, tmp1, fsa_our_dc,
CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL);
crm_xml_add_int(reply, F_CRM_JOIN_ID, join_id);
crm_debug("Confirming join-%d: sending local operation history to %s",
join_id, fsa_our_dc);
/*
* If this is the node's first join since the crmd started on it,
* set its initial state (standby or member) according to the user's
* preference.
*
* We do not clear the LRM history here. Even if the DC failed to do it
* when we last left, removing them here creates a race condition if the
* crmd is being recovered. Instead of a list of active resources from
* the lrmd, we may end up with a blank status section. If we are _NOT_
* lucky, we will probe for the "wrong" instance of anonymous clones and
* end up with multiple active instances on the machine.
*/
if (first_join && is_not_set(fsa_input_register, R_SHUTDOWN)) {
first_join = FALSE;
+#if !HAVE_ATOMIC_ATTRD
+ /* c9d1c3cd made this unnecessary for atomic attrd.
+ * This means that the issue addressed by that commit is still
+ * present for legacy attrd, but given legacy attrd's imminent
+ * demise, this is preferable to making intrusive changes to it.
+ */
+ erase_status_tag(fsa_our_uname, XML_TAG_TRANSIENT_NODEATTRS, 0);
+ update_attrd(fsa_our_uname, "terminate", NULL, NULL, FALSE);
+ update_attrd(fsa_our_uname, XML_CIB_ATTR_SHUTDOWN, "0", NULL, FALSE);
+#endif
if (start_state) {
set_join_state(start_state);
}
}
send_cluster_message(crm_get_peer(0, fsa_our_dc), crm_msg_crmd, reply, TRUE);
free_xml(reply);
if (AM_I_DC == FALSE) {
register_fsa_input_adv(cause, I_NOT_DC, NULL, A_NOTHING, TRUE, __FUNCTION__);
#if !HAVE_ATOMIC_ATTRD
/* Ask attrd to write all attributes to disk. This is not needed for
* atomic attrd because atomic attrd does a peer sync and write-out
* when winning an election.
*/
update_attrd(NULL, NULL, NULL, NULL, FALSE);
#endif
}
free_xml(tmp1);
} else {
crm_err("Could not confirm join-%d with %s: Local operation history failed",
join_id, fsa_our_dc);
register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
}
}
diff --git a/cts/CIB.py b/cts/CIB.py
index 69570e1d99..131ffeb724 100644
--- a/cts/CIB.py
+++ b/cts/CIB.py
@@ -1,560 +1,559 @@
'''CTS: Cluster Testing System: CIB generator
'''
__copyright__ = '''
Author: Andrew Beekhof <abeekhof@suse.de>
Copyright (C) 2008 Andrew Beekhof
'''
import os, string, warnings
from cts.CTSvars import *
class CibBase:
def __init__(self, Factory, tag, _id, **kwargs):
self.tag = tag
self.name = _id
self.kwargs = kwargs
self.children = []
self.Factory = Factory
def __repr__(self):
return "%s-%s" % (self.tag, self.name)
def add_child(self, child):
self.children.append(child)
def __setitem__(self, key, value):
if value:
self.kwargs[key] = value
else:
self.kwargs.pop(key, None)
from cib_xml import *
class ConfigBase:
cts_cib = None
version = "unknown"
feature_set = "unknown"
Factory = None
def __init__(self, CM, factory, tmpfile=None):
self.CM = CM
self.Factory = factory
if not tmpfile:
warnings.filterwarnings("ignore")
tmpfile = os.tmpnam()
warnings.resetwarnings()
self.Factory.tmpfile = tmpfile
def version(self):
return self.version
def NextIP(self):
ip = self.CM.Env["IPBase"]
if ":" in ip:
(prefix, sep, suffix) = ip.rpartition(":")
suffix = str(hex(int(suffix, 16)+1)).lstrip("0x")
else:
(prefix, sep, suffix) = ip.rpartition(".")
suffix = str(int(suffix)+1)
ip = prefix + sep + suffix
self.CM.Env["IPBase"] = ip
return ip.strip()
class CIB11(ConfigBase):
feature_set = "3.0"
version = "pacemaker-1.1"
counter = 1
def _show(self, command=""):
output = ""
(rc, result) = self.Factory.rsh(self.Factory.target, "HOME=/root CIB_file="+self.Factory.tmpfile+" cibadmin -Ql "+command, None, )
for line in result:
output += line
self.Factory.debug("Generated Config: "+line)
return output
def NewIP(self, name=None, standard="ocf"):
if self.CM.Env["IPagent"] == "IPaddr2":
ip = self.NextIP()
if not name:
if ":" in ip:
(prefix, sep, suffix) = ip.rpartition(":")
name = "r"+suffix
else:
name = "r"+ip
r = Resource(self.Factory, name, self.CM.Env["IPagent"], standard)
r["ip"] = ip
if ":" in ip:
r["cidr_netmask"] = "64"
r["nic"] = "eth0"
else:
r["cidr_netmask"] = "32"
else:
if not name:
name = "r%s%d" % (self.CM.Env["IPagent"], self.counter)
self.counter = self.counter + 1
r = Resource(self.Factory, name, self.CM.Env["IPagent"], standard)
r.add_op("monitor", "5s")
return r
def get_node_id(self, node_name):
""" Check the cluster configuration for a node ID. """
# We can't account for every possible configuration,
# so we only return a node ID if:
# * The node is specified in /etc/corosync/corosync.conf
# with "ring0_addr:" equal to node_name and "nodeid:"
# explicitly specified.
# * Or, the node is specified in /etc/cluster/cluster.conf
# with name="node_name" nodeid="X"
# In all other cases, we return 0.
node_id = 0
# awkward command: use } as record separator
# so each corosync.conf "object" is one record;
# match the "node {" record that has "ring0_addr: node_name";
# then print the substring of that record after "nodeid:"
(rc, output) = self.Factory.rsh(self.Factory.target,
r"""awk -v RS="}" """
r"""'/^(\s*nodelist\s*{)?\s*node\s*{.*(ring0_addr|name):\s*%s(\s+|$)/"""
r"""{gsub(/.*nodeid:\s*/,"");gsub(/\s+.*$/,"");print}'"""
r""" /etc/corosync/corosync.conf""" % node_name, None)
if rc == 0 and len(output) == 1:
try:
node_id = int(output[0])
except ValueError:
node_id = 0
# another awkward command: use < or > as record separator
# so each cluster.conf XML tag is one record;
# match the clusternode record that has name="node_name";
# then print the substring of that record for nodeid="X"
if node_id == 0:
(rc, output) = self.Factory.rsh(self.Factory.target,
r"""awk -v RS="[<>]" """
r"""'/^clusternode\s+.*name="%s".*/"""
r"""{gsub(/.*nodeid="/,"");gsub(/".*/,"");print}'"""
r""" /etc/cluster/cluster.conf""" % node_name, None)
if rc == 0 and len(output) == 1:
try:
node_id = int(output[0])
except ValueError:
node_id = 0
return node_id
def install(self, target):
old = self.Factory.tmpfile
# Force a rebuild
self.cts_cib = None
self.Factory.tmpfile = CTSvars.CRM_CONFIG_DIR+"/cib.xml"
self.contents(target)
self.Factory.rsh(self.Factory.target, "chown "+CTSvars.CRM_DAEMON_USER+" "+self.Factory.tmpfile)
self.Factory.tmpfile = old
def contents(self, target=None):
# fencing resource
if self.cts_cib:
return self.cts_cib
if target:
self.Factory.target = target
self.Factory.rsh(self.Factory.target, "HOME=/root cibadmin --empty %s > %s" % (self.version, self.Factory.tmpfile))
#cib_base = self.cib_template % (self.feature_set, self.version, ''' remote-tls-port='9898' remote-clear-port='9999' ''')
self.num_nodes = len(self.CM.Env["nodes"])
no_quorum = "stop"
if self.num_nodes < 3:
no_quorum = "ignore"
self.Factory.log("Cluster only has %d nodes, configuring: no-quorum-policy=ignore" % self.num_nodes)
# We don't need a nodes section unless we add attributes
stn = None
# Fencing resource
# Define first so that the shell doesn't reject every update
if self.CM.Env["DoFencing"]:
# Define the "real" fencing device
st = Resource(self.Factory, "Fencing", ""+self.CM.Env["stonith-type"], "stonith")
# Set a threshold for unreliable stonith devices such as the vmware one
st.add_meta("migration-threshold", "5")
st.add_op("monitor", "120s", timeout="120s")
st.add_op("stop", "0", timeout="60s")
st.add_op("start", "0", timeout="60s")
# For remote node tests, a cluster node is stopped and brought back up
# as a remote node with the name "remote-OLDNAME". To allow fencing
# devices to fence these nodes, create a list of all possible node names.
all_node_names = [ prefix+n for n in self.CM.Env["nodes"] for prefix in ('', 'remote-') ]
# Add all parameters specified by user
entries = string.split(self.CM.Env["stonith-params"], ',')
for entry in entries:
try:
(name, value) = string.split(entry, '=', 1)
except ValueError:
print("Warning: skipping invalid fencing parameter: %s" % entry)
continue
# Allow user to specify "all" as the node list, and expand it here
if name in [ "hostlist", "pcmk_host_list" ] and value == "all":
value = string.join(all_node_names, " ")
st[name] = value
st.commit()
# Test advanced fencing logic
if True:
stf_nodes = []
stt_nodes = []
attr_nodes = {}
# Create the levels
stl = FencingTopology(self.Factory)
for node in self.CM.Env["nodes"]:
# Remote node tests will rename the node
remote_node = "remote-" + node
# Randomly assign node to a fencing method
ftype = self.CM.Env.RandomGen.choice(["levels-and", "levels-or ", "broadcast "])
# For levels-and, randomly choose targeting by node name or attribute
by = ""
if ftype == "levels-and":
node_id = self.get_node_id(node)
if node_id == 0 or self.CM.Env.RandomGen.choice([True, False]):
by = " (by name)"
else:
attr_nodes[node] = node_id
by = " (by attribute)"
self.CM.log(" - Using %s fencing for node: %s%s" % (ftype, node, by))
if ftype == "levels-and":
# If targeting by name, add a topology level for this node
if node not in attr_nodes:
stl.level(1, node, "FencingPass,Fencing")
# Always target remote nodes by name, otherwise we would need to add
# an attribute to the remote node only during remote tests (we don't
# want nonexistent remote nodes showing up in the non-remote tests).
# That complexity is not worth the effort.
stl.level(1, remote_node, "FencingPass,Fencing")
# Add the node (and its remote equivalent) to the list of levels-and nodes.
stt_nodes.extend([node, remote_node])
elif ftype == "levels-or ":
for n in [ node, remote_node ]:
stl.level(1, n, "FencingFail")
stl.level(2, n, "Fencing")
stf_nodes.extend([node, remote_node])
# If any levels-and nodes were targeted by attribute,
# create the attributes and a level for the attribute.
if attr_nodes:
stn = Nodes(self.Factory)
for (node_name, node_id) in attr_nodes.items():
stn.add_node(node_name, node_id, { "cts-fencing" : "levels-and" })
stl.level(1, None, "FencingPass,Fencing", "cts-fencing", "levels-and")
# Create a Dummy agent that always passes for levels-and
if len(stt_nodes):
self.CM.install_helper("fence_dummy", destdir="/usr/sbin", sourcedir=CTSvars.Fencing_home)
stt = Resource(self.Factory, "FencingPass", "fence_dummy", "stonith")
stt["pcmk_host_list"] = string.join(stt_nodes, " ")
# Wait this many seconds before doing anything, handy for letting disks get flushed too
stt["random_sleep_range"] = "30"
stt["mode"] = "pass"
stt.commit()
# Create a Dummy agent that always fails for levels-or
if len(stf_nodes):
self.CM.install_helper("fence_dummy", destdir="/usr/sbin", sourcedir=CTSvars.Fencing_home)
stf = Resource(self.Factory, "FencingFail", "fence_dummy", "stonith")
stf["pcmk_host_list"] = string.join(stf_nodes, " ")
# Wait this many seconds before doing anything, handy for letting disks get flushed too
stf["random_sleep_range"] = "30"
stf["mode"] = "fail"
stf.commit()
# Now commit the levels themselves
stl.commit()
o = Option(self.Factory)
o["stonith-enabled"] = self.CM.Env["DoFencing"]
o["start-failure-is-fatal"] = "false"
o["pe-input-series-max"] = "5000"
o["shutdown-escalation"] = "5min"
o["batch-limit"] = "10"
o["dc-deadtime"] = "5s"
o["no-quorum-policy"] = no_quorum
o["expected-quorum-votes"] = self.num_nodes
if self.CM.Env["DoBSC"] == 1:
o["ident-string"] = "Linux-HA TEST configuration file - REMOVEME!!"
o.commit()
o = OpDefaults(self.Factory)
o["timeout"] = "90s"
o.commit()
# Commit the nodes section if we defined one
if stn is not None:
stn.commit()
# Add an alerts section if possible
if self.Factory.rsh.exists_on_all(self.CM.Env["notification-agent"], self.CM.Env["nodes"]):
alerts = Alerts(self.Factory)
alerts.add_alert(self.CM.Env["notification-agent"],
self.CM.Env["notification-recipient"])
alerts.commit()
# Add resources?
if self.CM.Env["CIBResource"] == 1:
self.add_resources()
if self.CM.cluster_monitor == 1:
mon = Resource(self.Factory, "cluster_mon", "ocf", "ClusterMon", "pacemaker")
mon.add_op("start", "0", requires="nothing")
mon.add_op("monitor", "5s", requires="nothing")
mon["update"] = "10"
mon["extra_options"] = "-r -n"
mon["user"] = "abeekhof"
mon["htmlfile"] = "/suse/abeekhof/Export/cluster.html"
mon.commit()
#self._create('''location prefer-dc cluster_mon rule -INFINITY: \#is_dc eq false''')
# generate cib
self.cts_cib = self._show()
if self.Factory.tmpfile != CTSvars.CRM_CONFIG_DIR+"/cib.xml":
self.Factory.rsh(self.Factory.target, "rm -f "+self.Factory.tmpfile)
return self.cts_cib
def add_resources(self):
# Per-node resources
for node in self.CM.Env["nodes"]:
name = "rsc_"+node
r = self.NewIP(name)
r.prefer(node, "100")
r.commit()
# Migrator
# Make this slightly sticky (since we have no other location constraints) to avoid relocation during Reattach
m = Resource(self.Factory, "migrator","Dummy", "ocf", "pacemaker")
m["passwd"] = "whatever"
m.add_meta("resource-stickiness","1")
m.add_meta("allow-migrate", "1")
m.add_op("monitor", "P10S")
m.commit()
# Ping the test master
p = Resource(self.Factory, "ping-1","ping", "ocf", "pacemaker")
p.add_op("monitor", "60s")
p["host_list"] = self.CM.Env["cts-master"]
p["name"] = "connected"
p["debug"] = "true"
c = Clone(self.Factory, "Connectivity", p)
c["globally-unique"] = "false"
c.commit()
#master slave resource
s = Resource(self.Factory, "stateful-1", "Stateful", "ocf", "pacemaker")
s.add_op("monitor", "15s", timeout="60s")
s.add_op("monitor", "16s", timeout="60s", role="Master")
ms = Master(self.Factory, "master-1", s)
ms["clone-max"] = self.num_nodes
ms["master-max"] = 1
ms["clone-node-max"] = 1
ms["master-node-max"] = 1
# Require conectivity to run the master
r = Rule(self.Factory, "connected", "-INFINITY", op="or")
r.add_child(Expression(self.Factory, "m1-connected-1", "connected", "lt", "1"))
r.add_child(Expression(self.Factory, "m1-connected-2", "connected", "not_defined", None))
ms.prefer("connected", rule=r)
ms.commit()
# Group Resource
g = Group(self.Factory, "group-1")
g.add_child(self.NewIP())
if self.CM.Env["have_systemd"]:
# It would be better to put the python in a separate file, so we
# could loop "while True" rather than sleep for 24 hours. We can't
# put a loop in a single-line python command; only simple commands
# may be separated by semicolon in python.
dummy_service_file = """
[Unit]
Description=Dummy resource that takes a while to start
[Service]
Type=notify
ExecStart=/usr/bin/python -c 'import time, systemd.daemon; time.sleep(10); systemd.daemon.notify("READY=1"); time.sleep(86400)'
ExecStop=/bin/sh -c 'sleep 10; [ -n "\$MAINPID" ] && kill -s KILL \$MAINPID'
"""
os.system("cat <<-END >/tmp/DummySD.service\n%s\nEND" % (dummy_service_file))
self.CM.install_helper("DummySD.service", destdir="/usr/lib/systemd/system/", sourcedir="/tmp")
sysd = Resource(self.Factory, "petulant", "DummySD", "service")
sysd.add_op("monitor", "P10S")
g.add_child(sysd)
else:
g.add_child(self.NewIP())
g.add_child(self.NewIP())
# Group with the master
g.after("master-1", first="promote", then="start")
g.colocate("master-1", "INFINITY", withrole="Master")
g.commit()
# LSB resource
lsb_agent = self.CM.install_helper("LSBDummy")
lsb = Resource(self.Factory, "lsb-dummy",lsb_agent, "lsb")
lsb.add_op("monitor", "5s")
# LSB with group
lsb.after("group-1")
lsb.colocate("group-1")
lsb.commit()
class CIB12(CIB11):
feature_set = "3.0"
version = "pacemaker-1.2"
class CIB20(CIB11):
feature_set = "3.0"
version = "pacemaker-2.5"
#class HASI(CIB10):
# def add_resources(self):
# # DLM resource
# self._create('''primitive dlm ocf:pacemaker:controld op monitor interval=120s''')
# self._create('''clone dlm-clone dlm meta globally-unique=false interleave=true''')
# O2CB resource
# self._create('''primitive o2cb ocf:ocfs2:o2cb op monitor interval=120s''')
# self._create('''clone o2cb-clone o2cb meta globally-unique=false interleave=true''')
# self._create('''colocation o2cb-with-dlm INFINITY: o2cb-clone dlm-clone''')
# self._create('''order start-o2cb-after-dlm mandatory: dlm-clone o2cb-clone''')
class ConfigFactory:
def __init__(self, CM):
self.CM = CM
self.rsh = self.CM.rsh
self.register("pacemaker11", CIB11, CM, self)
self.register("pacemaker12", CIB12, CM, self)
self.register("pacemaker20", CIB20, CM, self)
# self.register("hae", HASI, CM, self)
self.target = self.CM.Env["nodes"][0]
self.tmpfile = None
def log(self, args):
self.CM.log("cib: %s" % args)
def debug(self, args):
self.CM.debug("cib: %s" % args)
def register(self, methodName, constructor, *args, **kargs):
"""register a constructor"""
_args = [constructor]
_args.extend(args)
setattr(self, methodName, ConfigFactoryItem(*_args, **kargs))
def unregister(self, methodName):
"""unregister a constructor"""
delattr(self, methodName)
def createConfig(self, name="pacemaker-1.0"):
if name == "pacemaker-1.0":
name = "pacemaker10";
elif name == "pacemaker-1.1":
name = "pacemaker11";
elif name == "pacemaker-1.2":
name = "pacemaker12";
elif name == "pacemaker-2.0":
name = "pacemaker20";
elif name == "hasi":
name = "hae";
if hasattr(self, name):
return getattr(self, name)()
else:
self.CM.log("Configuration variant '%s' is unknown. Defaulting to latest config" % name)
return self.pacemaker12()
class ConfigFactoryItem:
def __init__(self, function, *args, **kargs):
self._function = function
self._args = args
self._kargs = kargs
def __call__(self, *args, **kargs):
"""call function"""
_args = list(self._args)
_args.extend(args)
_kargs = self._kargs.copy()
_kargs.update(kargs)
return self._function(*_args,**_kargs)
if __name__ == '__main__':
""" Unit test (pass cluster node names as command line arguments) """
import CTS
import CM_ais
import sys
if len(sys.argv) < 2:
print("Usage: %s <node> ..." % sys.argv[0])
sys.exit(1)
args = [
"--nodes", " ".join(sys.argv[1:]),
"--clobber-cib",
"--populate-resources",
"--stack", "corosync",
"--test-ip-base", "fe80::1234:56:7890:1000",
"--stonith", "rhcs",
- "--stonith-args", "pcmk_host_argument=domain"
]
env = CTS.CtsLab(args)
cm = CM_ais.crm_mcp(env)
CibFactory = ConfigFactory(cm)
cib = CibFactory.createConfig("pacemaker-1.1")
print(cib.contents())
diff --git a/cts/environment.py b/cts/environment.py
index e22c68f861..75a18c8099 100644
--- a/cts/environment.py
+++ b/cts/environment.py
@@ -1,700 +1,697 @@
'''
Classes related to producing and searching logs
'''
__copyright__='''
Copyright (C) 2014 Andrew Beekhof <andrew@beekhof.net>
Licensed under the GNU GPL.
'''
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
import sys, time, os, socket, random
from cts.remote import *
class Environment:
def __init__(self, args):
self.data = {}
self.Nodes = []
self["DeadTime"] = 300
self["StartTime"] = 300
self["StableTime"] = 30
self["tests"] = []
self["IPagent"] = "IPaddr2"
self["DoStandby"] = 1
self["DoFencing"] = 1
self["XmitLoss"] = "0.0"
self["RecvLoss"] = "0.0"
self["ClobberCIB"] = 0
self["CIBfilename"] = None
self["CIBResource"] = 0
self["DoBSC"] = 0
self["use_logd"] = 0
self["oprofile"] = []
self["warn-inactive"] = 0
self["ListTests"] = 0
self["benchmark"] = 0
self["LogWatcher"] = "any"
self["SyslogFacility"] = "daemon"
self["LogFileName"] = "/var/log/messages"
self["Schema"] = "pacemaker-2.0"
self["Stack"] = "corosync"
self["stonith-type"] = "external/ssh"
self["stonith-params"] = "hostlist=all,livedangerously=yes"
self["notification-agent"] = "/var/lib/pacemaker/notify.sh"
self["notification-recipient"] = "/var/lib/pacemaker/notify.log"
self["loop-minutes"] = 60
self["valgrind-prefix"] = None
self["valgrind-procs"] = "attrd cib crmd lrmd pengine stonith-ng"
self["valgrind-opts"] = """--leak-check=full --show-reachable=yes --trace-children=no --num-callers=25 --gen-suppressions=all --suppressions="""+CTSvars.CTS_home+"""/cts.supp"""
self["experimental-tests"] = 0
self["container-tests"] = 0
self["valgrind-tests"] = 0
self["unsafe-tests"] = 1
self["loop-tests"] = 1
self["scenario"] = "random"
self["stats"] = 0
self["docker"] = 0
self["continue"] = 0
self.RandomGen = random.Random()
self.logger = LogFactory()
self.SeedRandom()
self.rsh = RemoteFactory().getInstance()
self.target = "localhost"
self.parse_args(args)
self.discover()
self.validate()
def SeedRandom(self, seed=None):
if not seed:
seed = int(time.time())
self["RandSeed"] = seed
self.RandomGen.seed(str(seed))
def dump(self):
keys = []
for key in list(self.data.keys()):
keys.append(key)
keys.sort()
for key in keys:
self.logger.debug("Environment["+key+"]:\t"+str(self[key]))
def keys(self):
return self.data.keys()
def has_key(self, key):
if key == "nodes":
return True
return key in self.data
def __getitem__(self, key):
if str(key) == "0":
raise ValueError("Bad call to 'foo in X', should reference 'foo in X.keys()' instead")
if key == "nodes":
return self.Nodes
elif key == "Name":
return self.get_stack_short()
elif key in self.data:
return self.data[key]
else:
return None
def __setitem__(self, key, value):
if key == "Stack":
self.set_stack(value)
elif key == "node-limit":
self.data[key] = value
self.filter_nodes()
elif key == "nodes":
self.Nodes = []
for node in value:
# I don't think I need the IP address, etc. but this validates
# the node name against /etc/hosts and/or DNS, so it's a
# GoodThing(tm).
try:
n = node.strip()
if self.data["docker"] == 0:
socket.gethostbyname_ex(n)
self.Nodes.append(n)
except:
self.logger.log(node+" not found in DNS... aborting")
raise
self.filter_nodes()
else:
self.data[key] = value
def RandomNode(self):
'''Choose a random node from the cluster'''
return self.RandomGen.choice(self["nodes"])
def set_stack(self, name):
# Normalize stack names
if name == "heartbeat" or name == "lha":
self.data["Stack"] = "heartbeat"
elif name == "openais" or name == "ais" or name == "whitetank":
self.data["Stack"] = "corosync (plugin v0)"
elif name == "corosync" or name == "cs" or name == "mcp":
self.data["Stack"] = "corosync 2.x"
elif name == "cman":
self.data["Stack"] = "corosync (cman)"
elif name == "v1":
self.data["Stack"] = "corosync (plugin v1)"
elif name == "v0":
self.data["Stack"] = "corosync (plugin v0)"
else:
raise ValueError("Unknown stack: "+name)
sys.exit(1)
def get_stack_short(self):
# Create the Cluster Manager object
if not "Stack" in self.data:
return "unknown"
elif self.data["Stack"] == "heartbeat":
return "crm-lha"
elif self.data["Stack"] == "corosync 2.x":
if self["docker"]:
return "crm-mcp-docker"
else:
return "crm-mcp"
elif self.data["Stack"] == "corosync (cman)":
return "crm-cman"
elif self.data["Stack"] == "corosync (plugin v1)":
return "crm-plugin-v1"
elif self.data["Stack"] == "corosync (plugin v0)":
return "crm-plugin-v0"
else:
LogFactory().log("Unknown stack: "+self["stack"])
raise ValueError("Unknown stack: "+self["stack"])
def detect_syslog(self):
# Detect syslog variant
if not "syslogd" in self.data:
if self["have_systemd"]:
# Systemd
self["syslogd"] = self.rsh(self.target, "systemctl list-units | grep syslog.*\.service.*active.*running | sed 's:.service.*::'", stdout=1).strip()
else:
# SYS-V
self["syslogd"] = self.rsh(self.target, "chkconfig --list | grep syslog.*on | awk '{print $1}' | head -n 1", stdout=1).strip()
if not "syslogd" in self.data or not self["syslogd"]:
# default
self["syslogd"] = "rsyslog"
def detect_at_boot(self):
# Detect if the cluster starts at boot
if not "at-boot" in self.data:
atboot = 0
if self["have_systemd"]:
# Systemd
atboot = atboot or not self.rsh(self.target, "systemctl is-enabled heartbeat.service")
atboot = atboot or not self.rsh(self.target, "systemctl is-enabled corosync.service")
atboot = atboot or not self.rsh(self.target, "systemctl is-enabled pacemaker.service")
else:
# SYS-V
atboot = atboot or not self.rsh(self.target, "chkconfig --list | grep -e corosync.*on -e heartbeat.*on -e pacemaker.*on")
self["at-boot"] = atboot
def detect_ip_offset(self):
# Try to determin an offset for IPaddr resources
if self["CIBResource"] and not "IPBase" in self.data:
network=self.rsh(self.target, "ip addr | grep inet | grep -v -e link -e inet6 -e '/32' -e ' lo' | awk '{print $2}'", stdout=1).strip()
self["IPBase"] = self.rsh(self.target, "nmap -sn -n %s | grep 'scan report' | awk '{print $NF}' | sed 's:(::' | sed 's:)::' | sort -V | tail -n 1" % network, stdout=1).strip()
if not self["IPBase"]:
self["IPBase"] = " fe80::1234:56:7890:1000"
self.logger.log("Could not determine an offset for IPaddr resources. Perhaps nmap is not installed on the nodes.")
self.logger.log("Defaulting to '%s', use --test-ip-base to override" % self["IPBase"])
elif int(self["IPBase"].split('.')[3]) >= 240:
self.logger.log("Could not determine an offset for IPaddr resources. Upper bound is too high: %s %s"
% (self["IPBase"], self["IPBase"].split('.')[3]))
self["IPBase"] = " fe80::1234:56:7890:1000"
self.logger.log("Defaulting to '%s', use --test-ip-base to override" % self["IPBase"])
def filter_nodes(self):
if self["node-limit"] > 0:
if len(self["nodes"]) > self["node-limit"]:
self.logger.log("Limiting the number of nodes configured=%d (max=%d)"
%(len(self["nodes"]), self["node-limit"]))
while len(self["nodes"]) > self["node-limit"]:
self["nodes"].pop(len(self["nodes"])-1)
def validate(self):
if len(self["nodes"]) < 1:
print("No nodes specified!")
sys.exit(1)
def discover(self):
self.target = random.Random().choice(self["nodes"])
master = socket.gethostname()
# Use the IP where possible to avoid name lookup failures
for ip in socket.gethostbyname_ex(master)[2]:
if ip != "127.0.0.1":
master = ip
break;
self["cts-master"] = master
if not "have_systemd" in self.data:
self["have_systemd"] = not self.rsh(self.target, "systemctl list-units")
self.detect_syslog()
self.detect_at_boot()
self.detect_ip_offset()
self.validate()
def parse_args(self, args):
skipthis=None
if not args:
args=sys.argv[1:]
for i in range(0, len(args)):
if skipthis:
skipthis=None
continue
elif args[i] == "-l" or args[i] == "--limit-nodes":
skipthis=1
self["node-limit"] = int(args[i+1])
elif args[i] == "-r" or args[i] == "--populate-resources":
self["CIBResource"] = 1
self["ClobberCIB"] = 1
elif args[i] == "--outputfile":
skipthis=1
self["OutputFile"] = args[i+1]
LogFactory().add_file(self["OutputFile"])
elif args[i] == "-L" or args[i] == "--logfile":
skipthis=1
self["LogWatcher"] = "remote"
self["LogAuditDisabled"] = 1
self["LogFileName"] = args[i+1]
elif args[i] == "--ip" or args[i] == "--test-ip-base":
skipthis=1
self["IPBase"] = args[i+1]
self["CIBResource"] = 1
self["ClobberCIB"] = 1
elif args[i] == "--oprofile":
skipthis=1
self["oprofile"] = args[i+1].split(' ')
elif args[i] == "--trunc":
self["TruncateLog"]=1
elif args[i] == "--list-tests" or args[i] == "--list" :
self["ListTests"]=1
elif args[i] == "--benchmark":
self["benchmark"]=1
elif args[i] == "--bsc":
self["DoBSC"] = 1
self["scenario"] = "basic-sanity"
elif args[i] == "--qarsh":
RemoteFactory().enable_qarsh()
elif args[i] == "--docker":
self["docker"] = 1
RemoteFactory().enable_docker()
elif args[i] == "--yes" or args[i] == "-y":
self["continue"] = 1
elif args[i] == "--stonith" or args[i] == "--fencing":
skipthis=1
if args[i+1] == "1" or args[i+1] == "yes":
self["DoFencing"]=1
elif args[i+1] == "0" or args[i+1] == "no":
self["DoFencing"]=0
elif args[i+1] == "phd":
self["DoStonith"]=1
self["stonith-type"] = "fence_phd_kvm"
- self["stonith-params"] = "pcmk_host_argument=domain,delay=0"
elif args[i+1] == "rhcs" or args[i+1] == "xvm" or args[i+1] == "virt":
self["DoStonith"]=1
self["stonith-type"] = "fence_xvm"
- self["stonith-params"] = "pcmk_host_argument=domain,delay=0"
elif args[i+1] == "docker":
self["DoStonith"]=1
self["stonith-type"] = "fence_docker_cts"
elif args[i+1] == "scsi":
self["DoStonith"]=1
self["stonith-type"] = "fence_scsi"
- self["stonith-params"] = "delay=0"
elif args[i+1] == "ssh" or args[i+1] == "lha":
self["DoStonith"]=1
self["stonith-type"] = "external/ssh"
self["stonith-params"] = "hostlist=all,livedangerously=yes"
elif args[i+1] == "north":
self["DoStonith"]=1
self["stonith-type"] = "fence_apc"
self["stonith-params"] = "ipaddr=north-apc,login=apc,passwd=apc,pcmk_host_map=north-01:2;north-02:3;north-03:4;north-04:5;north-05:6;north-06:7;north-07:9;north-08:10;north-09:11;north-10:12;north-11:13;north-12:14;north-13:15;north-14:18;north-15:17;north-16:19;"
elif args[i+1] == "south":
self["DoStonith"]=1
self["stonith-type"] = "fence_apc"
self["stonith-params"] = "ipaddr=south-apc,login=apc,passwd=apc,pcmk_host_map=south-01:2;south-02:3;south-03:4;south-04:5;south-05:6;south-06:7;south-07:9;south-08:10;south-09:11;south-10:12;south-11:13;south-12:14;south-13:15;south-14:18;south-15:17;south-16:19;"
elif args[i+1] == "east":
self["DoStonith"]=1
self["stonith-type"] = "fence_apc"
self["stonith-params"] = "ipaddr=east-apc,login=apc,passwd=apc,pcmk_host_map=east-01:2;east-02:3;east-03:4;east-04:5;east-05:6;east-06:7;east-07:9;east-08:10;east-09:11;east-10:12;east-11:13;east-12:14;east-13:15;east-14:18;east-15:17;east-16:19;"
elif args[i+1] == "west":
self["DoStonith"]=1
self["stonith-type"] = "fence_apc"
self["stonith-params"] = "ipaddr=west-apc,login=apc,passwd=apc,pcmk_host_map=west-01:2;west-02:3;west-03:4;west-04:5;west-05:6;west-06:7;west-07:9;west-08:10;west-09:11;west-10:12;west-11:13;west-12:14;west-13:15;west-14:18;west-15:17;west-16:19;"
elif args[i+1] == "openstack":
self["DoStonith"]=1
self["stonith-type"] = "fence_openstack"
print("Obtaining OpenStack credentials from the current environment")
self["stonith-params"] = "region=%s,tenant=%s,auth=%s,user=%s,password=%s" % (
os.environ['OS_REGION_NAME'],
os.environ['OS_TENANT_NAME'],
os.environ['OS_AUTH_URL'],
os.environ['OS_USERNAME'],
os.environ['OS_PASSWORD']
)
elif args[i+1] == "rhevm":
self["DoStonith"]=1
self["stonith-type"] = "fence_rhevm"
print("Obtaining RHEV-M credentials from the current environment")
self["stonith-params"] = "login=%s,passwd=%s,ipaddr=%s,ipport=%s,ssl=1,shell_timeout=10" % (
os.environ['RHEVM_USERNAME'],
os.environ['RHEVM_PASSWORD'],
os.environ['RHEVM_SERVER'],
os.environ['RHEVM_PORT'],
)
else:
self.usage(args[i+1])
elif args[i] == "--stonith-type":
self["stonith-type"] = args[i+1]
skipthis=1
elif args[i] == "--stonith-args":
self["stonith-params"] = args[i+1]
skipthis=1
elif args[i] == "--standby":
skipthis=1
if args[i+1] == "1" or args[i+1] == "yes":
self["DoStandby"] = 1
elif args[i+1] == "0" or args[i+1] == "no":
self["DoStandby"] = 0
else:
self.usage(args[i+1])
elif args[i] == "--clobber-cib" or args[i] == "-c":
self["ClobberCIB"] = 1
elif args[i] == "--cib-filename":
skipthis=1
self["CIBfilename"] = args[i+1]
elif args[i] == "--xmit-loss":
try:
float(args[i+1])
except ValueError:
print("--xmit-loss parameter should be float")
self.usage(args[i+1])
skipthis=1
self["XmitLoss"] = args[i+1]
elif args[i] == "--recv-loss":
try:
float(args[i+1])
except ValueError:
print("--recv-loss parameter should be float")
self.usage(args[i+1])
skipthis=1
self["RecvLoss"] = args[i+1]
elif args[i] == "--choose":
skipthis=1
self["tests"].append(args[i+1])
self["scenario"] = "sequence"
elif args[i] == "--nodes":
skipthis=1
self["nodes"] = args[i+1].split(' ')
elif args[i] == "-g" or args[i] == "--group" or args[i] == "--dsh-group":
skipthis=1
self["OutputFile"] = "%s/cluster-%s.log" % (os.environ['HOME'], args[i+1])
LogFactory().add_file(self["OutputFile"], "CTS")
dsh_file = "%s/.dsh/group/%s" % (os.environ['HOME'], args[i+1])
# Hacks to make my life easier
if args[i+1] == "r6":
self["Stack"] = "cman"
self["DoStonith"]=1
self["stonith-type"] = "fence_xvm"
self["stonith-params"] = "delay=0"
self["IPBase"] = " fe80::1234:56:7890:4000"
elif args[i+1] == "virt1":
self["Stack"] = "corosync"
self["DoStonith"]=1
self["stonith-type"] = "fence_xvm"
self["stonith-params"] = "delay=0"
self["IPBase"] = " fe80::1234:56:7890:1000"
elif args[i+1] == "east16" or args[i+1] == "nsew":
self["Stack"] = "corosync"
self["DoStonith"]=1
self["stonith-type"] = "fence_apc"
self["stonith-params"] = "ipaddr=east-apc,login=apc,passwd=apc,pcmk_host_map=east-01:2;east-02:3;east-03:4;east-04:5;east-05:6;east-06:7;east-07:9;east-08:10;east-09:11;east-10:12;east-11:13;east-12:14;east-13:15;east-14:18;east-15:17;east-16:19;"
self["IPBase"] = " fe80::1234:56:7890:2000"
if args[i+1] == "east16":
# Requires newer python than available via nsew
self["IPagent"] = "Dummy"
elif args[i+1] == "corosync8":
self["Stack"] = "corosync"
self["DoStonith"]=1
self["stonith-type"] = "fence_rhevm"
print("Obtaining RHEV-M credentials from the current environment")
self["stonith-params"] = "login=%s,passwd=%s,ipaddr=%s,ipport=%s,ssl=1,shell_timeout=10" % (
os.environ['RHEVM_USERNAME'],
os.environ['RHEVM_PASSWORD'],
os.environ['RHEVM_SERVER'],
os.environ['RHEVM_PORT'],
)
self["IPBase"] = " fe80::1234:56:7890:3000"
if os.path.isfile(dsh_file):
self["nodes"] = []
f = open(dsh_file, 'r')
for line in f:
l = line.strip().rstrip()
if not l.startswith('#'):
self["nodes"].append(l)
f.close()
else:
print("Unknown DSH group: %s" % args[i+1])
elif args[i] == "--syslog-facility" or args[i] == "--facility":
skipthis=1
self["SyslogFacility"] = args[i+1]
elif args[i] == "--seed":
skipthis=1
self.SeedRandom(args[i+1])
elif args[i] == "--warn-inactive":
self["warn-inactive"] = 1
elif args[i] == "--schema":
skipthis=1
self["Schema"] = args[i+1]
elif args[i] == "--ais":
self["Stack"] = "openais"
elif args[i] == "--at-boot" or args[i] == "--cluster-starts-at-boot":
skipthis=1
if args[i+1] == "1" or args[i+1] == "yes":
self["at-boot"] = 1
elif args[i+1] == "0" or args[i+1] == "no":
self["at-boot"] = 0
else:
self.usage(args[i+1])
elif args[i] == "--heartbeat" or args[i] == "--lha":
self["Stack"] = "heartbeat"
elif args[i] == "--hae":
self["Stack"] = "openais"
self["Schema"] = "hae"
elif args[i] == "--stack":
if args[i+1] == "fedora" or args[i+1] == "fedora-17" or args[i+1] == "fedora-18":
self["Stack"] = "corosync"
elif args[i+1] == "rhel-6":
self["Stack"] = "cman"
elif args[i+1] == "rhel-7":
self["Stack"] = "corosync"
else:
self["Stack"] = args[i+1]
skipthis=1
elif args[i] == "--once":
self["scenario"] = "all-once"
elif args[i] == "--boot":
self["scenario"] = "boot"
elif args[i] == "--notification-agent":
self["notification-agent"] = args[i+1]
skipthis = 1
elif args[i] == "--notification-recipient":
self["notification-recipient"] = args[i+1]
skipthis = 1
elif args[i] == "--valgrind-tests":
self["valgrind-tests"] = 1
elif args[i] == "--valgrind-procs":
self["valgrind-procs"] = args[i+1]
skipthis = 1
elif args[i] == "--no-loop-tests":
self["loop-tests"] = 0
elif args[i] == "--loop-minutes":
skipthis=1
try:
self["loop-minutes"]=int(args[i+1])
except ValueError:
self.usage(args[i])
elif args[i] == "--no-unsafe-tests":
self["unsafe-tests"] = 0
elif args[i] == "--experimental-tests":
self["experimental-tests"] = 1
elif args[i] == "--container-tests":
self["container-tests"] = 1
elif args[i] == "--set":
skipthis=1
(name, value) = args[i+1].split('=')
self[name] = value
print("Setting %s = %s" % (name, value))
elif args[i] == "--help":
self.usage(args[i], 0)
elif args[i] == "--":
break
else:
try:
NumIter=int(args[i])
self["iterations"] = NumIter
except ValueError:
self.usage(args[i])
def usage(self, arg, status=1):
if status:
print("Illegal argument %s" % arg)
print("usage: " + sys.argv[0] +" [options] number-of-iterations")
print("\nCommon options: ")
print("\t [--nodes 'node list'] list of cluster nodes separated by whitespace")
print("\t [--group | -g 'name'] use the nodes listed in the named DSH group (~/.dsh/groups/$name)")
print("\t [--limit-nodes max] only use the first 'max' cluster nodes supplied with --nodes")
print("\t [--stack (v0|v1|cman|corosync|heartbeat|openais)] which cluster stack is installed")
print("\t [--list-tests] list the valid tests")
print("\t [--benchmark] add the timing information")
print("\t ")
print("Options that CTS will usually auto-detect correctly: ")
print("\t [--logfile path] where should the test software look for logs from cluster nodes")
print("\t [--syslog-facility name] which syslog facility should the test software log to")
print("\t [--at-boot (1|0)] does the cluster software start at boot time")
print("\t [--test-ip-base ip] offset for generated IP address resources")
print("\t ")
print("Options for release testing: ")
print("\t [--populate-resources | -r] generate a sample configuration")
print("\t [--choose name] run only the named test")
print("\t [--stonith (1 | 0 | yes | no | rhcs | ssh)]")
print("\t [--once] run all valid tests once")
print("\t ")
print("Additional (less common) options: ")
print("\t [--clobber-cib | -c ] erase any existing configuration")
print("\t [--outputfile path] optional location for the test software to write logs to")
print("\t [--trunc] truncate logfile before starting")
print("\t [--xmit-loss lost-rate(0.0-1.0)]")
print("\t [--recv-loss lost-rate(0.0-1.0)]")
print("\t [--standby (1 | 0 | yes | no)]")
print("\t [--fencing (1 | 0 | yes | no | rhcs | lha | openstack )]")
print("\t [--stonith-type type]")
print("\t [--stonith-args name=value]")
print("\t [--bsc]")
print("\t [--notification-agent path] script to configure for Pacemaker alerts")
print("\t [--notification-recipient r] recipient to pass to alert script")
print("\t [--no-loop-tests] don't run looping/time-based tests")
print("\t [--no-unsafe-tests] don't run tests that are unsafe for use with ocfs2/drbd")
print("\t [--valgrind-tests] include tests using valgrind")
print("\t [--experimental-tests] include experimental tests")
print("\t [--container-tests] include pacemaker_remote tests that run in lxc container resources")
print("\t [--oprofile 'node list'] list of cluster nodes to run oprofile on]")
print("\t [--qarsh] use the QARSH backdoor to access nodes instead of SSH")
print("\t [--docker] Indicates nodes are docker nodes.")
print("\t [--seed random_seed]")
print("\t [--set option=value]")
print("\t [--yes | -y] continue to run cts when there is an interaction whether to continue running pacemaker-cts")
print("\t ")
print("\t Example: ")
print("\t python sys.argv[0] -g virt1 --stack cs -r --stonith ssh --schema pacemaker-1.0 500")
sys.exit(status)
class EnvFactory:
instance = None
def __init__(self):
pass
def getInstance(self, args=None):
if not EnvFactory.instance:
EnvFactory.instance = Environment(args)
return EnvFactory.instance
diff --git a/include/crm/common/internal.h b/include/crm/common/internal.h
index e5f34ec903..685aab2696 100644
--- a/include/crm/common/internal.h
+++ b/include/crm/common/internal.h
@@ -1,126 +1,127 @@
/*
* Copyright (C) 2015
* Andrew Beekhof <andrew@beekhof.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef CRM_COMMON_INTERNAL__H
#define CRM_COMMON_INTERNAL__H
#include <glib.h> /* for gboolean */
#include <dirent.h> /* for struct dirent */
#include <unistd.h> /* for getpid() */
#include <sys/types.h> /* for uid_t and gid_t */
#include <crm/common/logging.h>
/* internal I/O utilities (from io.c) */
char *generate_series_filename(const char *directory, const char *series, int sequence,
gboolean bzip);
int get_last_sequence(const char *directory, const char *series);
void write_last_sequence(const char *directory, const char *series, int sequence, int max);
int crm_chown_last_sequence(const char *directory, const char *series, uid_t uid, gid_t gid);
gboolean crm_is_writable(const char *dir, const char *file, const char *user, const char *group,
gboolean need_both);
void crm_sync_directory(const char *name);
char *crm_read_contents(const char *filename);
int crm_write_sync(int fd, const char *contents);
/* internal procfs utilities (from procfs.c) */
int crm_procfs_process_info(struct dirent *entry, char *name, int *pid);
int crm_procfs_pid_of(const char *name);
unsigned int crm_procfs_num_cores(void);
/* internal XML schema functions (from xml.c) */
void crm_schema_init(void);
void crm_schema_cleanup(void);
/* internal generic string functions (from strings.c) */
char *crm_concat(const char *prefix, const char *suffix, char join);
void g_hash_destroy_str(gpointer data);
long long crm_int_helper(const char *text, char **end_text);
+bool crm_starts_with(const char *str, const char *prefix);
gboolean crm_ends_with(const char *s, const char *match);
gboolean crm_ends_with_ext(const char *s, const char *match);
char *add_list_element(char *list, const char *value);
bool crm_compress_string(const char *data, int length, int max, char **result,
unsigned int *result_len);
gint crm_alpha_sort(gconstpointer a, gconstpointer b);
static inline int
crm_strlen_zero(const char *s)
{
return !s || *s == '\0';
}
static inline char *
crm_getpid_s()
{
return crm_strdup_printf("%lu", (unsigned long) getpid());
}
/* convenience functions for failure-related node attributes */
#define CRM_FAIL_COUNT_PREFIX "fail-count"
#define CRM_LAST_FAILURE_PREFIX "last-failure"
/*!
* \internal
* \brief Generate a failure-related node attribute name for a resource
*
* \param[in] prefix Start of attribute name
* \param[in] rsc_id Resource name
* \param[in] op Operation name
* \param[in] interval Operation interval
*
* \return Newly allocated string with attribute name
*
* \note Failure attributes are named like PREFIX-RSC#OP_INTERVAL (for example,
* "fail-count-myrsc#monitor_30000"). The '#' is used because it is not
* a valid character in a resource ID, to reliably distinguish where the
* operation name begins. The '_' is used simply to be more comparable to
* action labels like "myrsc_monitor_30000".
*/
static inline char *
crm_fail_attr_name(const char *prefix, const char *rsc_id, const char *op,
int interval)
{
CRM_CHECK(prefix && rsc_id && op, return NULL);
return crm_strdup_printf("%s-%s#%s_%d", prefix, rsc_id, op, interval);
}
static inline char *
crm_failcount_name(const char *rsc_id, const char *op, int interval)
{
return crm_fail_attr_name(CRM_FAIL_COUNT_PREFIX, rsc_id, op, interval);
}
static inline char *
crm_lastfailure_name(const char *rsc_id, const char *op, int interval)
{
return crm_fail_attr_name(CRM_LAST_FAILURE_PREFIX, rsc_id, op, interval);
}
#endif /* CRM_COMMON_INTERNAL__H */
diff --git a/include/crm/lrmd.h b/include/crm/lrmd.h
index 13274fadc2..3398c6e484 100644
--- a/include/crm/lrmd.h
+++ b/include/crm/lrmd.h
@@ -1,514 +1,525 @@
/*
* Copyright (c) 2012 David Vossel <davidvossel@gmail.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#ifndef LRMD__H
# define LRMD__H
/**
* \file
* \brief Local Resource Manager
* \ingroup lrmd
*/
#include <stdbool.h>
#include <crm/services.h>
typedef struct lrmd_s lrmd_t;
typedef struct lrmd_key_value_s {
char *key;
char *value;
struct lrmd_key_value_s *next;
} lrmd_key_value_t;
+/* This should be bumped every time there is an incompatible change that
+ * prevents older clients from connecting to this version of the server.
+ */
#define LRMD_PROTOCOL_VERSION "1.1"
+/* This is the version that the client version will actually be compared
+ * against. This should be identical to LRMD_PROTOCOL_VERSION. However, we
+ * accidentally bumped LRMD_PROTOCOL_VERSION in 6424a647 (1.1.15) when we didn't
+ * need to, so for now it's different. If we ever have a truly incompatible
+ * bump, we can drop this and compare against LRMD_PROTOCOL_VERSION.
+ */
+#define LRMD_MIN_PROTOCOL_VERSION "1.0"
+
/* *INDENT-OFF* */
#define DEFAULT_REMOTE_KEY_LOCATION "/etc/pacemaker/authkey"
#define ALT_REMOTE_KEY_LOCATION "/etc/corosync/authkey"
#define DEFAULT_REMOTE_PORT 3121
#define DEFAULT_REMOTE_USERNAME "lrmd"
#define F_LRMD_OPERATION "lrmd_op"
#define F_LRMD_CLIENTNAME "lrmd_clientname"
#define F_LRMD_IS_IPC_PROVIDER "lrmd_is_ipc_provider"
#define F_LRMD_CLIENTID "lrmd_clientid"
#define F_LRMD_PROTOCOL_VERSION "lrmd_protocol_version"
#define F_LRMD_REMOTE_MSG_TYPE "lrmd_remote_msg_type"
#define F_LRMD_REMOTE_MSG_ID "lrmd_remote_msg_id"
#define F_LRMD_CALLBACK_TOKEN "lrmd_async_id"
#define F_LRMD_CALLID "lrmd_callid"
#define F_LRMD_CANCEL_CALLID "lrmd_cancel_callid"
#define F_LRMD_CALLOPTS "lrmd_callopt"
#define F_LRMD_CALLDATA "lrmd_calldata"
#define F_LRMD_RC "lrmd_rc"
#define F_LRMD_EXEC_RC "lrmd_exec_rc"
#define F_LRMD_OP_STATUS "lrmd_exec_op_status"
#define F_LRMD_TIMEOUT "lrmd_timeout"
#define F_LRMD_WATCHDOG "lrmd_watchdog"
#define F_LRMD_CLASS "lrmd_class"
#define F_LRMD_PROVIDER "lrmd_provider"
#define F_LRMD_TYPE "lrmd_type"
#define F_LRMD_ORIGIN "lrmd_origin"
#define F_LRMD_RSC_RUN_TIME "lrmd_run_time"
#define F_LRMD_RSC_RCCHANGE_TIME "lrmd_rcchange_time"
#define F_LRMD_RSC_EXEC_TIME "lrmd_exec_time"
#define F_LRMD_RSC_QUEUE_TIME "lrmd_queue_time"
#define F_LRMD_RSC_ID "lrmd_rsc_id"
#define F_LRMD_RSC_ACTION "lrmd_rsc_action"
#define F_LRMD_RSC_USERDATA_STR "lrmd_rsc_userdata_str"
#define F_LRMD_RSC_OUTPUT "lrmd_rsc_output"
#define F_LRMD_RSC_EXIT_REASON "lrmd_rsc_exit_reason"
#define F_LRMD_RSC_START_DELAY "lrmd_rsc_start_delay"
#define F_LRMD_RSC_INTERVAL "lrmd_rsc_interval"
#define F_LRMD_RSC_METADATA "lrmd_rsc_metadata_res"
#define F_LRMD_RSC_DELETED "lrmd_rsc_deleted"
#define F_LRMD_RSC "lrmd_rsc"
#define F_LRMD_ALERT_ID "lrmd_alert_id"
#define F_LRMD_ALERT_PATH "lrmd_alert_path"
#define F_LRMD_ALERT "lrmd_alert"
#define LRMD_OP_RSC_CHK_REG "lrmd_rsc_check_register"
#define LRMD_OP_RSC_REG "lrmd_rsc_register"
#define LRMD_OP_RSC_EXEC "lrmd_rsc_exec"
#define LRMD_OP_RSC_CANCEL "lrmd_rsc_cancel"
#define LRMD_OP_RSC_UNREG "lrmd_rsc_unregister"
#define LRMD_OP_RSC_INFO "lrmd_rsc_info"
#define LRMD_OP_RSC_METADATA "lrmd_rsc_metadata"
#define LRMD_OP_POKE "lrmd_rsc_poke"
#define LRMD_OP_NEW_CLIENT "lrmd_rsc_new_client"
#define LRMD_OP_CHECK "lrmd_check"
#define LRMD_OP_ALERT_EXEC "lrmd_alert_exec"
#define LRMD_IPC_OP_NEW "new"
#define LRMD_IPC_OP_DESTROY "destroy"
#define LRMD_IPC_OP_EVENT "event"
#define LRMD_IPC_OP_REQUEST "request"
#define LRMD_IPC_OP_RESPONSE "response"
#define LRMD_IPC_OP_SHUTDOWN_REQ "shutdown_req"
#define LRMD_IPC_OP_SHUTDOWN_ACK "shutdown_ack"
#define LRMD_IPC_OP_SHUTDOWN_NACK "shutdown_nack"
#define F_LRMD_IPC_OP "lrmd_ipc_op"
#define F_LRMD_IPC_IPC_SERVER "lrmd_ipc_server"
#define F_LRMD_IPC_SESSION "lrmd_ipc_session"
#define F_LRMD_IPC_CLIENT "lrmd_ipc_client"
#define F_LRMD_IPC_PROXY_NODE "lrmd_ipc_proxy_node"
#define F_LRMD_IPC_USER "lrmd_ipc_user"
#define F_LRMD_IPC_MSG "lrmd_ipc_msg"
#define F_LRMD_IPC_MSG_ID "lrmd_ipc_msg_id"
#define F_LRMD_IPC_MSG_FLAGS "lrmd_ipc_msg_flags"
#define T_LRMD "lrmd"
#define T_LRMD_REPLY "lrmd_reply"
#define T_LRMD_NOTIFY "lrmd_notify"
#define T_LRMD_IPC_PROXY "lrmd_ipc_proxy"
/* *INDENT-ON* */
/*!
* \brief Create a new local lrmd connection
*/
lrmd_t *lrmd_api_new(void);
/*!
* \brief Create a new remote lrmd connection using tls backend
*
* \param nodename name of remote node identified with this connection
* \param server name of server to connect to
* \param port port number to connect to
*
* \note nodename and server may be the same value.
*/
lrmd_t *lrmd_remote_api_new(const char *nodename, const char *server, int port);
/*!
* \brief Use after lrmd_poll returns 1 to read and dispatch a message
*
* \param[in,out] lrmd lrmd connection object
*
* \return TRUE if connection is still up, FALSE if disconnected
*/
bool lrmd_dispatch(lrmd_t * lrmd);
/*!
* \brief Poll for a specified timeout period to determine if a message
* is ready for dispatch.
* \retval 1 msg is ready
* \retval 0 timeout occurred
* \retval negative error code
*/
int lrmd_poll(lrmd_t * lrmd, int timeout);
/*!
* \brief Destroy lrmd object
*/
void lrmd_api_delete(lrmd_t * lrmd);
lrmd_key_value_t *lrmd_key_value_add(lrmd_key_value_t * kvp, const char *key, const char *value);
/* *INDENT-OFF* */
/* Reserved for future use */
enum lrmd_call_options {
lrmd_opt_none = 0x00000000,
/* lrmd_opt_sync_call = 0x00000001, //Not implemented, patches welcome. */
/*! Only notify the client originating a exec() the results */
lrmd_opt_notify_orig_only = 0x00000002,
/*! Drop recurring operations initiated by a client when client disconnects.
* This call_option is only valid when registering a resource. When used
* remotely with the pacemaker_remote daemon, this option means that recurring
* operations will be dropped once all the remote connections disconnect. */
lrmd_opt_drop_recurring = 0x00000003,
/*! Send notifications for recurring operations only when the result changes */
lrmd_opt_notify_changes_only = 0x00000004,
};
enum lrmd_callback_event {
lrmd_event_register,
lrmd_event_unregister,
lrmd_event_exec_complete,
lrmd_event_disconnect,
lrmd_event_connect,
lrmd_event_poke,
lrmd_event_new_client,
};
/* *INDENT-ON* */
typedef struct lrmd_event_data_s {
/*! Type of event, register, unregister, call_completed... */
enum lrmd_callback_event type;
/*! The resource this event occurred on. */
const char *rsc_id;
/*! The action performed, start, stop, monitor... */
const char *op_type;
/*! The userdata string given do exec() api function */
const char *user_data;
/*! The client api call id associated with this event */
int call_id;
/*! The operation's timeout period in ms. */
int timeout;
/*! The operation's recurring interval in ms. */
int interval;
/*! The operation's start delay value in ms. */
int start_delay;
/*! This operation that just completed is on a deleted rsc. */
int rsc_deleted;
/*! The executed ra return code mapped to OCF */
enum ocf_exitcode rc;
/*! The lrmd status returned for exec_complete events */
int op_status;
/*! stdout from resource agent operation */
const char *output;
/*! Timestamp of when op ran */
unsigned int t_run;
/*! Timestamp of last rc change */
unsigned int t_rcchange;
/*! Time in length op took to execute */
unsigned int exec_time;
/*! Time in length spent in queue */
unsigned int queue_time;
/*! int connection result. Used for connection and poke events */
int connection_rc;
/* This is a GHashTable containing the
* parameters given to the operation */
void *params;
/*! client node name associated with this connection
* (used to match actions to the proper client when there are multiple)
*/
const char *remote_nodename;
/*! exit failure reason string from resource agent operation */
const char *exit_reason;
} lrmd_event_data_t;
lrmd_event_data_t *lrmd_copy_event(lrmd_event_data_t * event);
void lrmd_free_event(lrmd_event_data_t * event);
typedef struct lrmd_rsc_info_s {
char *id;
char *type;
char *class;
char *provider;
} lrmd_rsc_info_t;
lrmd_rsc_info_t *lrmd_copy_rsc_info(lrmd_rsc_info_t * rsc_info);
void lrmd_free_rsc_info(lrmd_rsc_info_t * rsc_info);
typedef void (*lrmd_event_callback) (lrmd_event_data_t * event);
typedef struct lrmd_list_s {
const char *val;
struct lrmd_list_s *next;
} lrmd_list_t;
void lrmd_list_freeall(lrmd_list_t * head);
void lrmd_key_value_freeall(lrmd_key_value_t * head);
typedef struct lrmd_api_operations_s {
/*!
* \brief Connect from the lrmd.
*
* \retval 0, success
* \retval negative error code on failure
*/
int (*connect) (lrmd_t * lrmd, const char *client_name, int *fd);
/*!
* \brief Establish an connection to lrmd, don't block while connecting.
* \note this function requires the use of mainloop.
*
* \note The is returned using the event callback.
* \note When this function returns 0, the callback will be invoked
* to report the final result of the connect.
* \retval 0, connect in progress, wait for event callback
* \retval -1, failure.
*/
int (*connect_async) (lrmd_t * lrmd, const char *client_name, int timeout /*ms */ );
/*!
* \brief Is connected to lrmd daemon?
*
* \retval 0, false
* \retval 1, true
*/
int (*is_connected) (lrmd_t * lrmd);
/*!
* \brief Poke lrmd connection to verify it is still capable of serving requests
* \note The response comes in the form of a poke event to the callback.
*
* \retval 0, wait for response in callback
* \retval -1, connection failure, callback may not be invoked
*/
int (*poke_connection) (lrmd_t * lrmd);
/*!
* \brief Disconnect from the lrmd.
*
* \retval 0, success
* \retval negative error code on failure
*/
int (*disconnect) (lrmd_t * lrmd);
/*!
* \brief Register a resource with the lrmd.
*
* \note Synchronous, guaranteed to occur in daemon before function returns.
*
* \retval 0, success
* \retval negative error code on failure
*/
int (*register_rsc) (lrmd_t * lrmd,
const char *rsc_id,
const char *class,
const char *provider, const char *agent, enum lrmd_call_options options);
/*!
* \brief Retrieve registration info for a rsc
*
* \retval info on success
* \retval NULL on failure
*/
lrmd_rsc_info_t *(*get_rsc_info) (lrmd_t * lrmd,
const char *rsc_id, enum lrmd_call_options options);
/*!
* \brief Unregister a resource from the lrmd.
*
* \note All pending and recurring operations will be cancelled
* automatically.
*
* \note Synchronous, guaranteed to occur in daemon before function returns.
*
* \retval 0, success
* \retval -1, success, but operations are currently executing on the rsc which will
* return once they are completed.
* \retval negative error code on failure
*
*/
int (*unregister_rsc) (lrmd_t * lrmd, const char *rsc_id, enum lrmd_call_options options);
/*!
* \brief Sets the callback to receive lrmd events on.
*/
void (*set_callback) (lrmd_t * lrmd, lrmd_event_callback callback);
/*!
* \brief Issue a command on a resource
*
* \note Asynchronous, command is queued in daemon on function return, but
* execution of command is not synced.
*
* \note Operations on individual resources are guaranteed to occur
* in the order the client api calls them in.
*
* \note Operations between different resources are not guaranteed
* to occur in any specific order in relation to one another
* regardless of what order the client api is called in.
* \retval call_id to track async event result on success
* \retval negative error code on failure
*/
int (*exec) (lrmd_t * lrmd, const char *rsc_id, const char *action, const char *userdata, /* userdata string given back in event notification */
int interval, /* ms */
int timeout, /* ms */
int start_delay, /* ms */
enum lrmd_call_options options, lrmd_key_value_t * params); /* ownership of params is given up to api here */
/*!
* \brief Cancel a recurring command.
*
* \note Synchronous, guaranteed to occur in daemon before function returns.
*
* \note The cancel is completed async from this call.
* We can be guaranteed the cancel has completed once
* the callback receives an exec_complete event with
* the lrmd_op_status signifying that the operation is
* cancelled.
* \note For each resource, cancel operations and exec operations
* are processed in the order they are received.
* It is safe to assume that for a single resource, a cancel
* will occur in the lrmd before an exec if the client's cancel
* api call occurs before the exec api call.
*
* It is not however safe to assume any operation on one resource will
* occur before an operation on another resource regardless of
* the order the client api is called in.
*
* \retval 0, cancel command sent.
* \retval negative error code on failure
*/
int (*cancel) (lrmd_t * lrmd, const char *rsc_id, const char *action, int interval);
/*!
* \brief Get resource metadata for a specified resource agent
*
* \param[in] lrmd LRMD connection (unused)
* \param[in] class Resource agent class
* \param[in] provider Resource agent provider
* \param[in] agent Resource agent type
* \param[out] output Metadata will be stored here (must not be NULL)
* \param[in] options Options to use with any LRMD API calls (unused)
*
* \note Caller is responsible for freeing output. This call is currently
* always synchronous (blocking), and always done directly by the
* library (not via the LRMD connection). This means that it is based
* on the local host environment, even if the lrmd connection is to a
* remote node, so (for most resource agent classes) this will fail if
* the agent is not installed locally. This also means that, if an
* external agent must be executed, it will be executed by the
* caller's user, not the lrmd's.
* \todo Add a metadata call to the LRMD API and let the server handle this.
*
* \retval lrmd_ok success
* \retval negative error code on failure
*/
int (*get_metadata) (lrmd_t * lrmd,
const char *class,
const char *provider,
const char *agent, char **output, enum lrmd_call_options options);
/*!
* \brief Retrieve a list of installed resource agents.
*
* \note if class is not provided, all known agents will be returned
* \note list must be freed using lrmd_list_freeall()
*
* \retval num items in list on success
* \retval negative error code on failure
*/
int (*list_agents) (lrmd_t * lrmd, lrmd_list_t ** agents, const char *class,
const char *provider);
/*!
* \brief Retrieve a list of resource agent providers
*
* \note When the agent is provided, only the agent's provider will be returned
* \note When no agent is supplied, all providers will be returned.
* \note List must be freed using lrmd_list_freeall()
*
* \retval num items in list on success
* \retval negative error code on failure
*/
int (*list_ocf_providers) (lrmd_t * lrmd, const char *agent, lrmd_list_t ** providers);
/*!
* \brief Retrieve a list of standards supported by this machine/installation
*
* \note List must be freed using lrmd_list_freeall()
*
* \retval num items in list on success
* \retval negative error code on failure
*/
int (*list_standards) (lrmd_t * lrmd, lrmd_list_t ** standards);
/*!
* \brief Execute an alert agent
*
* \note Asynchronous, command is queued in daemon on function return, but
* execution of command is not synced.
*
* \note Operations on individual alerts are guaranteed to occur
* in the order the client api calls them in.
*
* \note Operations between different alerts are not guaranteed
* to occur in any specific order in relation to one another
* regardless of what order the client api is called in.
* \retval call_id to track async event result on success
* \retval negative error code on failure
*/
int (*exec_alert) (lrmd_t *lrmd, const char *alert_id,
const char *alert_path, int timeout, /* ms */
lrmd_key_value_t *params); /* ownership of params is given up to api here */
} lrmd_api_operations_t;
struct lrmd_s {
lrmd_api_operations_t *cmds;
void *private;
};
static inline const char *
lrmd_event_type2str(enum lrmd_callback_event type)
{
switch (type) {
case lrmd_event_register:
return "register";
case lrmd_event_unregister:
return "unregister";
case lrmd_event_exec_complete:
return "exec_complete";
case lrmd_event_disconnect:
return "disconnect";
case lrmd_event_connect:
return "connect";
case lrmd_event_poke:
return "poke";
case lrmd_event_new_client:
return "new_client";
}
return "unknown";
}
#endif
diff --git a/lib/common/procfs.c b/lib/common/procfs.c
index fbbf9ebdd1..dc3f66b0f7 100644
--- a/lib/common/procfs.c
+++ b/lib/common/procfs.c
@@ -1,172 +1,172 @@
/*
* Copyright (C) 2015 Andrew Beekhof <andrew@beekhof.net>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <crm_internal.h>
#ifndef _GNU_SOURCE
# define _GNU_SOURCE
#endif
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <dirent.h>
#include <ctype.h>
/*!
* \internal
* \brief Get process ID and name associated with a /proc directory entry
*
* \param[in] entry Directory entry (must be result of readdir() on /proc)
* \param[out] name If not NULL, a char[64] to hold the process name
* \param[out] pid If not NULL, will be set to process ID of entry
*
* \return 0 on success, -1 if entry is not for a process or info not found
*
* \note This should be called only on Linux systems, as not all systems that
* support /proc store process names and IDs in the same way.
*/
int
crm_procfs_process_info(struct dirent *entry, char *name, int *pid)
{
int fd, local_pid;
FILE *file;
struct stat statbuf;
char key[16] = { 0 }, procpath[128] = { 0 };
/* We're only interested in entries whose name is a PID,
* so skip anything non-numeric or that is too long.
*
* 114 = 128 - strlen("/proc/") - strlen("/status") - 1
*/
local_pid = atoi(entry->d_name);
if ((local_pid <= 0) || (strlen(entry->d_name) > 114)) {
return -1;
}
if (pid) {
*pid = local_pid;
}
/* Get this entry's file information */
strcpy(procpath, "/proc/");
strcat(procpath, entry->d_name);
fd = open(procpath, O_RDONLY);
if (fd < 0 ) {
return -1;
}
if (fstat(fd, &statbuf) < 0) {
close(fd);
return -1;
}
close(fd);
/* We're only interested in subdirectories */
if (!S_ISDIR(statbuf.st_mode)) {
return -1;
}
/* Read the first entry ("Name:") from the process's status file.
* We could handle the valgrind case if we parsed the cmdline file
* instead, but that's more of a pain than it's worth.
*/
if (name != NULL) {
strcat(procpath, "/status");
file = fopen(procpath, "r");
if (!file) {
return -1;
}
if ((fscanf(file, "%15s%63s", key, name) != 2)
|| safe_str_neq(key, "Name:")) {
fclose(file);
return -1;
}
fclose(file);
}
return 0;
}
/*!
* \internal
* \brief Return process ID of a named process
*
* \param[in] name Process name (as used in /proc/.../status)
*
* \return Process ID of named process if running, 0 otherwise
*
* \note This will return 0 if the process is being run via valgrind.
* This should be called only on Linux systems.
*/
int
crm_procfs_pid_of(const char *name)
{
DIR *dp;
struct dirent *entry;
int pid = 0;
char entry_name[64] = { 0 };
dp = opendir("/proc");
if (dp == NULL) {
crm_notice("Can not read /proc directory to track existing components");
return 0;
}
while ((entry = readdir(dp)) != NULL) {
if ((crm_procfs_process_info(entry, entry_name, &pid) == 0)
&& safe_str_eq(entry_name, name)
&& (crm_pid_active(pid, NULL) == 1)) {
crm_info("Found %s active as process %d", name, pid);
break;
}
pid = 0;
}
closedir(dp);
return pid;
}
/*!
* \internal
* \brief Calculate number of logical CPU cores from procfs
*
* \return Number of cores (or 1 if unable to determine)
*/
unsigned int
crm_procfs_num_cores(void)
{
int cores = 0;
FILE *stream = NULL;
/* Parse /proc/stat instead of /proc/cpuinfo because it's smaller */
stream = fopen("/proc/stat", "r");
if (stream == NULL) {
crm_perror(LOG_INFO, "Could not open /proc/stat");
} else {
char buffer[2048];
while (fgets(buffer, sizeof(buffer), stream)) {
- if (!strncmp(buffer, "cpu", 3) && isdigit(buffer[3])) {
+ if (crm_starts_with(buffer, "cpu") && isdigit(buffer[3])) {
++cores;
}
}
fclose(stream);
}
return cores? cores : 1;
}
diff --git a/lib/common/strings.c b/lib/common/strings.c
index 6b3a7cf022..41ee0d433f 100644
--- a/lib/common/strings.c
+++ b/lib/common/strings.c
@@ -1,453 +1,482 @@
/*
* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <crm_internal.h>
#ifndef _GNU_SOURCE
# define _GNU_SOURCE
#endif
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <bzlib.h>
#include <sys/types.h>
char *
crm_concat(const char *prefix, const char *suffix, char join)
{
int len = 0;
char *new_str = NULL;
CRM_ASSERT(prefix != NULL);
CRM_ASSERT(suffix != NULL);
len = strlen(prefix) + strlen(suffix) + 2;
new_str = malloc(len);
if(new_str) {
sprintf(new_str, "%s%c%s", prefix, join, suffix);
new_str[len - 1] = 0;
}
return new_str;
}
char *
crm_itoa_stack(int an_int, char *buffer, size_t len)
{
if (buffer != NULL) {
snprintf(buffer, len, "%d", an_int);
}
return buffer;
}
char *
crm_itoa(int an_int)
{
int len = 32;
char *buffer = NULL;
buffer = malloc(len + 1);
if (buffer != NULL) {
snprintf(buffer, len, "%d", an_int);
}
return buffer;
}
void
g_hash_destroy_str(gpointer data)
{
free(data);
}
long long
crm_int_helper(const char *text, char **end_text)
{
long long result = -1;
char *local_end_text = NULL;
int saved_errno = 0;
errno = 0;
if (text != NULL) {
#ifdef ANSI_ONLY
if (end_text != NULL) {
result = strtol(text, end_text, 10);
} else {
result = strtol(text, &local_end_text, 10);
}
#else
if (end_text != NULL) {
result = strtoll(text, end_text, 10);
} else {
result = strtoll(text, &local_end_text, 10);
}
#endif
saved_errno = errno;
if (errno == EINVAL) {
crm_err("Conversion of %s failed", text);
result = -1;
} else if (errno == ERANGE) {
crm_err("Conversion of %s was clipped: %lld", text, result);
} else if (errno != 0) {
crm_perror(LOG_ERR, "Conversion of %s failed", text);
}
if (local_end_text != NULL && local_end_text[0] != '\0') {
crm_err("Characters left over after parsing '%s': '%s'", text, local_end_text);
}
errno = saved_errno;
}
return result;
}
int
crm_parse_int(const char *text, const char *default_text)
{
int atoi_result = -1;
if (text != NULL) {
atoi_result = crm_int_helper(text, NULL);
if (errno == 0) {
return atoi_result;
}
}
if (default_text != NULL) {
atoi_result = crm_int_helper(default_text, NULL);
if (errno == 0) {
return atoi_result;
}
} else {
crm_err("No default conversion value supplied");
}
return -1;
}
gboolean
safe_str_neq(const char *a, const char *b)
{
if (a == b) {
return FALSE;
} else if (a == NULL || b == NULL) {
return TRUE;
} else if (strcasecmp(a, b) == 0) {
return FALSE;
}
return TRUE;
}
gboolean
crm_is_true(const char *s)
{
gboolean ret = FALSE;
if (s != NULL) {
crm_str_to_boolean(s, &ret);
}
return ret;
}
int
crm_str_to_boolean(const char *s, int *ret)
{
if (s == NULL) {
return -1;
} else if (strcasecmp(s, "true") == 0
|| strcasecmp(s, "on") == 0
|| strcasecmp(s, "yes") == 0 || strcasecmp(s, "y") == 0 || strcasecmp(s, "1") == 0) {
*ret = TRUE;
return 1;
} else if (strcasecmp(s, "false") == 0
|| strcasecmp(s, "off") == 0
|| strcasecmp(s, "no") == 0 || strcasecmp(s, "n") == 0 || strcasecmp(s, "0") == 0) {
*ret = FALSE;
return 1;
}
return -1;
}
char *
crm_strip_trailing_newline(char *str)
{
int len;
if (str == NULL) {
return str;
}
for (len = strlen(str) - 1; len >= 0 && str[len] == '\n'; len--) {
str[len] = '\0';
}
return str;
}
gboolean
crm_str_eq(const char *a, const char *b, gboolean use_case)
{
if (use_case) {
return g_strcmp0(a, b) == 0;
/* TODO - Figure out which calls, if any, really need to be case independent */
} else if (a == b) {
return TRUE;
} else if (a == NULL || b == NULL) {
/* shouldn't be comparing NULLs */
return FALSE;
} else if (strcasecmp(a, b) == 0) {
return TRUE;
}
return FALSE;
}
static inline const char * null2emptystr(const char *);
static inline const char *
null2emptystr(const char *input)
{
return (input == NULL) ? "" : input;
}
+/*!
+ * \brief Check whether a string starts with a certain sequence
+ *
+ * \param[in] str String to check
+ * \param[in] match Sequence to match against beginning of \p str
+ *
+ * \return \c TRUE if \p str begins with match, \c FALSE otherwise
+ * \note This is equivalent to !strncmp(s, prefix, strlen(prefix))
+ * but is likely less efficient when prefix is a string literal
+ * if the compiler optimizes away the strlen() at compile time,
+ * and more efficient otherwise.
+ */
+bool
+crm_starts_with(const char *str, const char *prefix)
+{
+ const char *s = str;
+ const char *p = prefix;
+
+ if (!s || !p) {
+ return FALSE;
+ }
+ while (*s && *p) {
+ if (*s++ != *p++) {
+ return FALSE;
+ }
+ }
+ return (*p == 0);
+}
+
static inline int crm_ends_with_internal(const char *, const char *, gboolean);
static inline int
crm_ends_with_internal(const char *s, const char *match, gboolean as_extension)
{
if ((s == NULL) || (match == NULL)) {
return 0;
} else {
size_t slen, mlen;
if (match[0] != '\0'
&& (as_extension /* following commented out for inefficiency:
|| strchr(&match[1], match[0]) == NULL */))
return !strcmp(null2emptystr(strrchr(s, match[0])), match);
if ((mlen = strlen(match)) == 0)
return 1;
slen = strlen(s);
return ((slen >= mlen) && !strcmp(s + slen - mlen, match));
}
}
/*!
* \internal
* \brief Check whether a string ends with a certain sequence
*
* \param[in] s String to check
* \param[in] match Sequence to match against end of \p s
*
* \return \c TRUE if \p s ends (verbatim, i.e., case sensitively)
* with match (including empty string), \c FALSE otherwise
*
* \see crm_ends_with_ext()
*/
gboolean
crm_ends_with(const char *s, const char *match)
{
return crm_ends_with_internal(s, match, FALSE);
}
/*!
* \internal
* \brief Check whether a string ends with a certain "extension"
*
* \param[in] s String to check
* \param[in] match Extension to match against end of \p s, that is,
* its first character must not occur anywhere
* in the rest of that very sequence (example: file
* extension where the last dot is its delimiter,
* e.g., ".html"); incorrect results may be
* returned otherwise.
*
* \return \c TRUE if \p s ends (verbatim, i.e., case sensitively)
* with "extension" designated as \p match (including empty
* string), \c FALSE otherwise
*
* \note Main incentive to prefer this function over \c crm_ends_with
* where possible is the efficiency (at the cost of added
* restriction on \p match as stated; the complexity class
* remains the same, though: BigO(M+N) vs. BigO(M+2N)).
*
* \see crm_ends_with()
*/
gboolean
crm_ends_with_ext(const char *s, const char *match)
{
return crm_ends_with_internal(s, match, TRUE);
}
/*
* This re-implements g_str_hash as it was prior to glib2-2.28:
*
* http://git.gnome.org/browse/glib/commit/?id=354d655ba8a54b754cb5a3efb42767327775696c
*
* Note that the new g_str_hash is presumably a *better* hash (it's actually
* a correct implementation of DJB's hash), but we need to preserve existing
* behaviour, because the hash key ultimately determines the "sort" order
* when iterating through GHashTables, which affects allocation of scores to
* clone instances when iterating through rsc->allowed_nodes. It (somehow)
* also appears to have some minor impact on the ordering of a few
* pseudo_event IDs in the transition graph.
*/
guint
g_str_hash_traditional(gconstpointer v)
{
const signed char *p;
guint32 h = 0;
for (p = v; *p != '\0'; p++)
h = (h << 5) - h + *p;
return h;
}
guint
crm_strcase_hash(gconstpointer v)
{
const signed char *p;
guint32 h = 0;
for (p = v; *p != '\0'; p++)
h = (h << 5) - h + g_ascii_tolower(*p);
return h;
}
static void
copy_str_table_entry(gpointer key, gpointer value, gpointer user_data)
{
if (key && value && user_data) {
g_hash_table_insert((GHashTable*)user_data, strdup(key), strdup(value));
}
}
GHashTable *
crm_str_table_dup(GHashTable *old_table)
{
GHashTable *new_table = NULL;
if (old_table) {
new_table = crm_str_table_new();
g_hash_table_foreach(old_table, copy_str_table_entry, new_table);
}
return new_table;
}
char *
add_list_element(char *list, const char *value)
{
int len = 0;
int last = 0;
if (value == NULL) {
return list;
}
if (list) {
last = strlen(list);
}
len = last + 2; /* +1 space, +1 EOS */
len += strlen(value);
list = realloc_safe(list, len);
sprintf(list + last, " %s", value);
return list;
}
bool
crm_compress_string(const char *data, int length, int max, char **result, unsigned int *result_len)
{
int rc;
char *compressed = NULL;
char *uncompressed = strdup(data);
struct timespec after_t;
struct timespec before_t;
if(max == 0) {
max = (length * 1.1) + 600; /* recommended size */
}
#ifdef CLOCK_MONOTONIC
clock_gettime(CLOCK_MONOTONIC, &before_t);
#endif
/* coverity[returned_null] Ignore */
compressed = malloc(max);
*result_len = max;
rc = BZ2_bzBuffToBuffCompress(compressed, result_len, uncompressed, length, CRM_BZ2_BLOCKS, 0,
CRM_BZ2_WORK);
free(uncompressed);
if (rc != BZ_OK) {
crm_err("Compression of %d bytes failed: %s (%d)", length, bz2_strerror(rc), rc);
free(compressed);
return FALSE;
}
#ifdef CLOCK_MONOTONIC
clock_gettime(CLOCK_MONOTONIC, &after_t);
crm_trace("Compressed %d bytes into %d (ratio %d:1) in %ldms",
length, *result_len, length / (*result_len),
(after_t.tv_sec - before_t.tv_sec) * 1000 + (after_t.tv_nsec -
before_t.tv_nsec) / 1000000);
#else
crm_trace("Compressed %d bytes into %d (ratio %d:1)",
length, *result_len, length / (*result_len));
#endif
*result = compressed;
return TRUE;
}
/*!
* \brief Compare two strings alphabetically (case-insensitive)
*
* \param[in] a First string to compare
* \param[in] b Second string to compare
*
* \return 0 if strings are equal, -1 if a < b, 1 if a > b
*
* \note Usable as a GCompareFunc with g_list_sort().
* NULL is considered less than non-NULL.
*/
gint
crm_alpha_sort(gconstpointer a, gconstpointer b)
{
if (!a && !b) {
return 0;
} else if (!a) {
return -1;
} else if (!b) {
return 1;
}
return strcasecmp(a, b);
}
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
index b7357fec6d..651909ec47 100644
--- a/lib/fencing/st_client.c
+++ b/lib/fencing/st_client.c
@@ -1,2677 +1,2677 @@
/*
* Copyright (c) 2004 Andrew Beekhof <andrew@beekhof.net>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include <crm_internal.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <stdarg.h>
#include <string.h>
#include <ctype.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <glib.h>
#include <dirent.h>
#include <libgen.h> /* Add it for compiling on OSX */
#include <crm/crm.h>
#include <crm/stonith-ng.h>
#include <crm/fencing/internal.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#ifdef HAVE_STONITH_STONITH_H
# include <stonith/stonith.h>
# define LHA_STONITH_LIBRARY "libstonith.so.1"
static void *lha_agents_lib = NULL;
#endif
#include <crm/common/mainloop.h>
CRM_TRACE_INIT_DATA(stonith);
struct stonith_action_s {
/*! user defined data */
char *agent;
char *action;
char *victim;
char *args;
int timeout;
int async;
void *userdata;
void (*done_cb) (GPid pid, gint status, const char *output, gpointer user_data);
/*! internal async track data */
int fd_stdout;
int fd_stderr;
int last_timeout_signo;
/*! internal timing information */
time_t initial_start_time;
int tries;
int remaining_timeout;
guint timer_sigterm;
guint timer_sigkill;
int max_retries;
/* device output data */
GPid pid;
int rc;
char *output;
char *error;
};
typedef struct stonith_private_s {
char *token;
crm_ipc_t *ipc;
mainloop_io_t *source;
GHashTable *stonith_op_callback_table;
GList *notify_list;
void (*op_callback) (stonith_t * st, stonith_callback_data_t * data);
} stonith_private_t;
typedef struct stonith_notify_client_s {
const char *event;
const char *obj_id; /* implement one day */
const char *obj_type; /* implement one day */
void (*notify) (stonith_t * st, stonith_event_t * e);
} stonith_notify_client_t;
typedef struct stonith_callback_client_s {
void (*callback) (stonith_t * st, stonith_callback_data_t * data);
const char *id;
void *user_data;
gboolean only_success;
gboolean allow_timeout_updates;
struct timer_rec_s *timer;
} stonith_callback_client_t;
struct notify_blob_s {
stonith_t *stonith;
xmlNode *xml;
};
struct timer_rec_s {
int call_id;
int timeout;
guint ref;
stonith_t *stonith;
};
typedef int (*stonith_op_t) (const char *, int, const char *, xmlNode *,
xmlNode *, xmlNode *, xmlNode **, xmlNode **);
#if HAVE_STONITH_STONITH_H
static const char META_TEMPLATE[] =
"<?xml version=\"1.0\"?>\n"
"<!DOCTYPE resource-agent SYSTEM \"ra-api-1.dtd\">\n"
"<resource-agent name=\"%s\">\n"
" <version>1.0</version>\n"
" <longdesc lang=\"en\">\n"
"%s\n"
" </longdesc>\n"
" <shortdesc lang=\"en\">%s</shortdesc>\n"
"%s\n"
" <actions>\n"
" <action name=\"start\" timeout=\"20\" />\n"
" <action name=\"stop\" timeout=\"15\" />\n"
" <action name=\"status\" timeout=\"20\" />\n"
" <action name=\"monitor\" timeout=\"20\" interval=\"3600\"/>\n"
" <action name=\"meta-data\" timeout=\"15\" />\n"
" </actions>\n"
" <special tag=\"heartbeat\">\n"
" <version>2.0</version>\n" " </special>\n" "</resource-agent>\n";
#endif
bool stonith_dispatch(stonith_t * st);
int stonith_dispatch_internal(const char *buffer, ssize_t length, gpointer userdata);
void stonith_perform_callback(stonith_t * stonith, xmlNode * msg, int call_id, int rc);
xmlNode *stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data,
int call_options);
int stonith_send_command(stonith_t * stonith, const char *op, xmlNode * data,
xmlNode ** output_data, int call_options, int timeout);
static void stonith_connection_destroy(gpointer user_data);
static void stonith_send_notification(gpointer data, gpointer user_data);
static int internal_stonith_action_execute(stonith_action_t * action);
static void log_action(stonith_action_t *action, pid_t pid);
static void
log_action(stonith_action_t *action, pid_t pid)
{
if (action->output) {
/* Logging the whole string confuses syslog when the string is xml */
char *prefix = crm_strdup_printf("%s[%d] stdout:", action->agent, pid);
crm_log_output(LOG_TRACE, prefix, action->output);
free(prefix);
}
if (action->error) {
/* Logging the whole string confuses syslog when the string is xml */
char *prefix = crm_strdup_printf("%s[%d] stderr:", action->agent, pid);
crm_log_output(LOG_WARNING, prefix, action->error);
free(prefix);
}
}
static void
stonith_connection_destroy(gpointer user_data)
{
stonith_t *stonith = user_data;
stonith_private_t *native = NULL;
struct notify_blob_s blob;
crm_trace("Sending destroyed notification");
blob.stonith = stonith;
blob.xml = create_xml_node(NULL, "notify");
native = stonith->private;
native->ipc = NULL;
native->source = NULL;
stonith->state = stonith_disconnected;
crm_xml_add(blob.xml, F_TYPE, T_STONITH_NOTIFY);
crm_xml_add(blob.xml, F_SUBTYPE, T_STONITH_NOTIFY_DISCONNECT);
g_list_foreach(native->notify_list, stonith_send_notification, &blob);
free_xml(blob.xml);
}
xmlNode *
create_device_registration_xml(const char *id, const char *namespace, const char *agent,
stonith_key_value_t * params, const char *rsc_provides)
{
xmlNode *data = create_xml_node(NULL, F_STONITH_DEVICE);
xmlNode *args = create_xml_node(data, XML_TAG_ATTRS);
#if HAVE_STONITH_STONITH_H
namespace = get_stonith_provider(agent, namespace);
if (safe_str_eq(namespace, "heartbeat")) {
hash2field((gpointer) "plugin", (gpointer) agent, args);
agent = "fence_legacy";
}
#endif
crm_xml_add(data, XML_ATTR_ID, id);
crm_xml_add(data, F_STONITH_ORIGIN, __FUNCTION__);
crm_xml_add(data, "agent", agent);
crm_xml_add(data, "namespace", namespace);
if (rsc_provides) {
crm_xml_add(data, "rsc_provides", rsc_provides);
}
for (; params; params = params->next) {
hash2field((gpointer) params->key, (gpointer) params->value, args);
}
return data;
}
static int
stonith_api_register_device(stonith_t * st, int call_options,
const char *id, const char *namespace, const char *agent,
stonith_key_value_t * params)
{
int rc = 0;
xmlNode *data = NULL;
data = create_device_registration_xml(id, namespace, agent, params, NULL);
rc = stonith_send_command(st, STONITH_OP_DEVICE_ADD, data, NULL, call_options, 0);
free_xml(data);
return rc;
}
static int
stonith_api_remove_device(stonith_t * st, int call_options, const char *name)
{
int rc = 0;
xmlNode *data = NULL;
data = create_xml_node(NULL, F_STONITH_DEVICE);
crm_xml_add(data, F_STONITH_ORIGIN, __FUNCTION__);
crm_xml_add(data, XML_ATTR_ID, name);
rc = stonith_send_command(st, STONITH_OP_DEVICE_DEL, data, NULL, call_options, 0);
free_xml(data);
return rc;
}
static int
stonith_api_remove_level_full(stonith_t *st, int options,
const char *node, const char *pattern,
const char *attr, const char *value, int level)
{
int rc = 0;
xmlNode *data = NULL;
CRM_CHECK(node || pattern || (attr && value), return -EINVAL);
data = create_xml_node(NULL, XML_TAG_FENCING_LEVEL);
crm_xml_add(data, F_STONITH_ORIGIN, __FUNCTION__);
if (node) {
crm_xml_add(data, XML_ATTR_STONITH_TARGET, node);
} else if (pattern) {
crm_xml_add(data, XML_ATTR_STONITH_TARGET_PATTERN, pattern);
} else {
crm_xml_add(data, XML_ATTR_STONITH_TARGET_ATTRIBUTE, attr);
crm_xml_add(data, XML_ATTR_STONITH_TARGET_VALUE, value);
}
crm_xml_add_int(data, XML_ATTR_STONITH_INDEX, level);
rc = stonith_send_command(st, STONITH_OP_LEVEL_DEL, data, NULL, options, 0);
free_xml(data);
return rc;
}
static int
stonith_api_remove_level(stonith_t * st, int options, const char *node, int level)
{
return stonith_api_remove_level_full(st, options, node,
NULL, NULL, NULL, level);
}
/*!
* \internal
* \brief Create XML for stonithd topology level registration request
*
* \param[in] node If not NULL, target level by this node name
* \param[in] pattern If not NULL, target by node name using this regex
* \param[in] attr If not NULL, target by this node attribute
* \param[in] value If not NULL, target by this node attribute value
* \param[in] level Index number of level to register
* \param[in] device_list List of devices in level
*
* \return Newly allocated XML tree on success, NULL otherwise
*
* \note The caller should set only one of node, pattern or attr/value.
*/
xmlNode *
create_level_registration_xml(const char *node, const char *pattern,
const char *attr, const char *value,
int level, stonith_key_value_t *device_list)
{
int len = 0;
char *list = NULL;
xmlNode *data;
CRM_CHECK(node || pattern || (attr && value), return NULL);
data = create_xml_node(NULL, XML_TAG_FENCING_LEVEL);
CRM_CHECK(data, return NULL);
crm_xml_add(data, F_STONITH_ORIGIN, __FUNCTION__);
crm_xml_add_int(data, XML_ATTR_ID, level);
crm_xml_add_int(data, XML_ATTR_STONITH_INDEX, level);
if (node) {
crm_xml_add(data, XML_ATTR_STONITH_TARGET, node);
} else if (pattern) {
crm_xml_add(data, XML_ATTR_STONITH_TARGET_PATTERN, pattern);
} else {
crm_xml_add(data, XML_ATTR_STONITH_TARGET_ATTRIBUTE, attr);
crm_xml_add(data, XML_ATTR_STONITH_TARGET_VALUE, value);
}
for (; device_list; device_list = device_list->next) {
int adding = strlen(device_list->value);
if(list) {
adding++; /* +1 space */
}
crm_trace("Adding %s (%dc) at offset %d", device_list->value, adding, len);
list = realloc_safe(list, len + adding + 1); /* +1 EOS */
if (list == NULL) {
crm_perror(LOG_CRIT, "Could not create device list");
free_xml(data);
return NULL;
}
sprintf(list + len, "%s%s", len?",":"", device_list->value);
len += adding;
}
crm_xml_add(data, XML_ATTR_STONITH_DEVICES, list);
free(list);
return data;
}
static int
stonith_api_register_level_full(stonith_t * st, int options, const char *node,
const char *pattern,
const char *attr, const char *value,
int level, stonith_key_value_t *device_list)
{
int rc = 0;
xmlNode *data = create_level_registration_xml(node, pattern, attr, value,
level, device_list);
CRM_CHECK(data != NULL, return -EINVAL);
rc = stonith_send_command(st, STONITH_OP_LEVEL_ADD, data, NULL, options, 0);
free_xml(data);
return rc;
}
static int
stonith_api_register_level(stonith_t * st, int options, const char *node, int level,
stonith_key_value_t * device_list)
{
return stonith_api_register_level_full(st, options, node, NULL, NULL, NULL,
level, device_list);
}
static void
append_arg(const char *key, const char *value, char **args)
{
int len = 3; /* =, \n, \0 */
int last = 0;
CRM_CHECK(key != NULL, return);
CRM_CHECK(value != NULL, return);
if (strstr(key, "pcmk_")) {
return;
} else if (strstr(key, CRM_META)) {
return;
} else if (safe_str_eq(key, "crm_feature_set")) {
return;
}
len += strlen(key);
len += strlen(value);
if (*args != NULL) {
last = strlen(*args);
}
*args = realloc_safe(*args, last + len);
crm_trace("Appending: %s=%s", key, value);
sprintf((*args) + last, "%s=%s\n", key, value);
}
static void
append_config_arg(gpointer key, gpointer value, gpointer user_data)
{
/* stonithd will filter action out when it registers the device,
* but ignore it here just in case any other library callers
* fail to do so.
*/
if (safe_str_neq(key, STONITH_ATTR_ACTION_OP)) {
append_arg(key, value, user_data);
return;
}
}
static void
append_host_specific_args(const char *victim, const char *map, GHashTable * params, char **arg_list)
{
char *name = NULL;
int last = 0, lpc = 0, max = 0;
if (map == NULL) {
/* The best default there is for now... */
crm_debug("Using default arg map: port=uname");
append_arg("port", victim, arg_list);
return;
}
max = strlen(map);
crm_debug("Processing arg map: %s", map);
for (; lpc < max + 1; lpc++) {
if (isalpha(map[lpc])) {
/* keep going */
} else if (map[lpc] == '=' || map[lpc] == ':') {
free(name);
name = calloc(1, 1 + lpc - last);
memcpy(name, map + last, lpc - last);
crm_debug("Got name: %s", name);
last = lpc + 1;
} else if (map[lpc] == 0 || map[lpc] == ',' || isspace(map[lpc])) {
char *param = NULL;
const char *value = NULL;
param = calloc(1, 1 + lpc - last);
memcpy(param, map + last, lpc - last);
last = lpc + 1;
crm_debug("Got key: %s", param);
if (name == NULL) {
crm_err("Misparsed '%s', found '%s' without a name", map, param);
free(param);
continue;
}
if (safe_str_eq(param, "uname")) {
value = victim;
} else {
char *key = crm_meta_name(param);
value = g_hash_table_lookup(params, key);
free(key);
}
if (value) {
crm_debug("Setting '%s'='%s' (%s) for %s", name, value, param, victim);
append_arg(name, value, arg_list);
} else {
crm_err("No node attribute '%s' for '%s'", name, victim);
}
free(name);
name = NULL;
free(param);
if (map[lpc] == 0) {
break;
}
} else if (isspace(map[lpc])) {
last = lpc;
}
}
free(name);
}
static char *
make_args(const char *agent, const char *action, const char *victim, uint32_t victim_nodeid, GHashTable * device_args,
GHashTable * port_map)
{
char buffer[512];
char *arg_list = NULL;
const char *value = NULL;
CRM_CHECK(action != NULL, return NULL);
snprintf(buffer, sizeof(buffer), "pcmk_%s_action", action);
if (device_args) {
value = g_hash_table_lookup(device_args, buffer);
}
if (value == NULL && device_args) {
/* @COMPAT deprecated since 1.1.6 */
snprintf(buffer, sizeof(buffer), "pcmk_%s_cmd", action);
value = g_hash_table_lookup(device_args, buffer);
}
if (value == NULL && device_args && safe_str_eq(action, "off")) {
/* @COMPAT deprecated since 1.1.8 */
value = g_hash_table_lookup(device_args, "pcmk_poweroff_action");
}
if (value) {
crm_info("Substituting action '%s' for requested operation '%s'", value, action);
action = value;
}
append_arg(STONITH_ATTR_ACTION_OP, action, &arg_list);
if (victim && device_args) {
const char *alias = victim;
const char *param = g_hash_table_lookup(device_args, STONITH_ATTR_HOSTARG);
if (port_map && g_hash_table_lookup(port_map, victim)) {
alias = g_hash_table_lookup(port_map, victim);
}
/* Always supply the node's name too:
* https://fedorahosted.org/cluster/wiki/FenceAgentAPI
*/
append_arg("nodename", victim, &arg_list);
if (victim_nodeid) {
char nodeid_str[33] = { 0, };
if (snprintf(nodeid_str, 33, "%u", (unsigned int)victim_nodeid)) {
crm_info("For stonith action (%s) for victim %s, adding nodeid (%s) to parameters",
action, victim, nodeid_str);
append_arg("nodeid", nodeid_str, &arg_list);
}
}
/* Check if we need to supply the victim in any other form */
if(safe_str_eq(agent, "fence_legacy")) {
value = agent;
} else if (param == NULL) {
// @COMPAT config < 1.1.6
// pcmk_arg_map is deprecated in favor of pcmk_host_argument
const char *map = g_hash_table_lookup(device_args, STONITH_ATTR_ARGMAP);
if (map == NULL) {
param = "port";
value = g_hash_table_lookup(device_args, param);
} else {
append_host_specific_args(alias, map, device_args, &arg_list);
value = map; /* Nothing more to do */
}
} else if (safe_str_eq(param, "none")) {
value = param; /* Nothing more to do */
} else {
value = g_hash_table_lookup(device_args, param);
}
/* Don't overwrite explictly set values for $param */
if (value == NULL || safe_str_eq(value, "dynamic")) {
crm_debug("Performing %s action for node '%s' as '%s=%s'", action, victim, param,
alias);
append_arg(param, alias, &arg_list);
}
}
if (device_args) {
g_hash_table_foreach(device_args, append_config_arg, &arg_list);
}
return arg_list;
}
static gboolean
st_child_term(gpointer data)
{
int rc = 0;
stonith_action_t *track = data;
crm_info("Child %d timed out, sending SIGTERM", track->pid);
track->timer_sigterm = 0;
track->last_timeout_signo = SIGTERM;
rc = kill(-track->pid, SIGTERM);
if (rc < 0) {
crm_perror(LOG_ERR, "Couldn't send SIGTERM to %d", track->pid);
}
return FALSE;
}
static gboolean
st_child_kill(gpointer data)
{
int rc = 0;
stonith_action_t *track = data;
crm_info("Child %d timed out, sending SIGKILL", track->pid);
track->timer_sigkill = 0;
track->last_timeout_signo = SIGKILL;
rc = kill(-track->pid, SIGKILL);
if (rc < 0) {
crm_perror(LOG_ERR, "Couldn't send SIGKILL to %d", track->pid);
}
return FALSE;
}
static void
stonith_action_clear_tracking_data(stonith_action_t * action)
{
if (action->timer_sigterm > 0) {
g_source_remove(action->timer_sigterm);
action->timer_sigterm = 0;
}
if (action->timer_sigkill > 0) {
g_source_remove(action->timer_sigkill);
action->timer_sigkill = 0;
}
if (action->fd_stdout) {
close(action->fd_stdout);
action->fd_stdout = 0;
}
if (action->fd_stderr) {
close(action->fd_stderr);
action->fd_stderr = 0;
}
free(action->output);
action->output = NULL;
free(action->error);
action->error = NULL;
action->rc = 0;
action->pid = 0;
action->last_timeout_signo = 0;
}
static void
stonith_action_destroy(stonith_action_t * action)
{
stonith_action_clear_tracking_data(action);
free(action->agent);
free(action->args);
free(action->action);
free(action->victim);
free(action);
}
#define FAILURE_MAX_RETRIES 2
stonith_action_t *
stonith_action_create(const char *agent,
const char *_action,
const char *victim,
uint32_t victim_nodeid,
int timeout, GHashTable * device_args, GHashTable * port_map)
{
stonith_action_t *action;
action = calloc(1, sizeof(stonith_action_t));
crm_debug("Initiating action %s for agent %s (target=%s)", _action, agent, victim);
action->args = make_args(agent, _action, victim, victim_nodeid, device_args, port_map);
action->agent = strdup(agent);
action->action = strdup(_action);
if (victim) {
action->victim = strdup(victim);
}
action->timeout = action->remaining_timeout = timeout;
action->max_retries = FAILURE_MAX_RETRIES;
if (device_args) {
char buffer[512];
const char *value = NULL;
snprintf(buffer, sizeof(buffer), "pcmk_%s_retries", _action);
value = g_hash_table_lookup(device_args, buffer);
if (value) {
action->max_retries = atoi(value);
}
}
return action;
}
#define READ_MAX 500
static char *
read_output(int fd)
{
char buffer[READ_MAX];
char *output = NULL;
int len = 0;
int more = 0;
if (!fd) {
return NULL;
}
do {
errno = 0;
memset(&buffer, 0, READ_MAX);
more = read(fd, buffer, READ_MAX - 1);
if (more > 0) {
buffer[more] = 0; /* Make sure it's nul-terminated for logging
* 'more' is always less than our buffer size
*/
output = realloc_safe(output, len + more + 1);
snprintf(output + len, more + 1, "%s", buffer);
len += more;
}
} while (more == (READ_MAX - 1) || (more < 0 && errno == EINTR));
return output;
}
static gboolean
update_remaining_timeout(stonith_action_t * action)
{
int diff = time(NULL) - action->initial_start_time;
if (action->tries >= action->max_retries) {
crm_info("Attempted to execute agent %s (%s) the maximum number of times (%d) allowed",
action->agent, action->action, action->max_retries);
action->remaining_timeout = 0;
} else if ((action->rc != -ETIME) && diff < (action->timeout * 0.7)) {
/* only set remaining timeout period if there is 30%
* or greater of the original timeout period left */
action->remaining_timeout = action->timeout - diff;
} else {
action->remaining_timeout = 0;
}
return action->remaining_timeout ? TRUE : FALSE;
}
static void
stonith_action_async_done(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode)
{
stonith_action_t *action = mainloop_child_userdata(p);
if (action->timer_sigterm > 0) {
g_source_remove(action->timer_sigterm);
action->timer_sigterm = 0;
}
if (action->timer_sigkill > 0) {
g_source_remove(action->timer_sigkill);
action->timer_sigkill = 0;
}
action->output = read_output(action->fd_stdout);
action->error = read_output(action->fd_stderr);
if (action->last_timeout_signo) {
action->rc = -ETIME;
crm_notice("Child process %d performing action '%s' timed out with signal %d",
pid, action->action, action->last_timeout_signo);
} else if (signo) {
action->rc = -ECONNABORTED;
crm_notice("Child process %d performing action '%s' timed out with signal %d",
pid, action->action, signo);
} else {
crm_debug("Child process %d performing action '%s' exited with rc %d",
pid, action->action, exitcode);
if (exitcode > 0) {
/* Try to provide a useful error code based on the fence agent's
* error output.
*/
if (action->error == NULL) {
exitcode = -ENODATA;
} else if (strstr(action->error, "imed out")) {
/* Some agents have their own internal timeouts */
exitcode = -ETIMEDOUT;
} else if (strstr(action->error, "Unrecognised action")) {
exitcode = -EOPNOTSUPP;
} else {
exitcode = -pcmk_err_generic;
}
}
action->rc = exitcode;
}
log_action(action, pid);
if (action->rc != pcmk_ok && update_remaining_timeout(action)) {
int rc = internal_stonith_action_execute(action);
if (rc == pcmk_ok) {
return;
}
}
if (action->done_cb) {
action->done_cb(pid, action->rc, action->output, action->userdata);
}
stonith_action_destroy(action);
}
static int
internal_stonith_action_execute(stonith_action_t * action)
{
int pid, status = 0, len, rc = -EPROTO;
int ret;
int total = 0;
int p_read_fd, p_write_fd; /* parent read/write file descriptors */
int c_read_fd, c_write_fd; /* child read/write file descriptors */
int c_stderr_fd, p_stderr_fd; /* parent/child side file descriptors for stderr */
int fd1[2];
int fd2[2];
int fd3[2];
int is_retry = 0;
/* clear any previous tracking data */
stonith_action_clear_tracking_data(action);
if (!action->tries) {
action->initial_start_time = time(NULL);
}
action->tries++;
if (action->tries > 1) {
crm_info("Attempt %d to execute %s (%s). remaining timeout is %d",
action->tries, action->agent, action->action, action->remaining_timeout);
is_retry = 1;
}
c_read_fd = c_write_fd = p_read_fd = p_write_fd = c_stderr_fd = p_stderr_fd = -1;
if (action->args == NULL || action->agent == NULL)
goto fail;
len = strlen(action->args);
if (pipe(fd1))
goto fail;
p_read_fd = fd1[0];
c_write_fd = fd1[1];
if (pipe(fd2))
goto fail;
c_read_fd = fd2[0];
p_write_fd = fd2[1];
if (pipe(fd3))
goto fail;
p_stderr_fd = fd3[0];
c_stderr_fd = fd3[1];
crm_debug("forking");
pid = fork();
if (pid < 0) {
rc = -ECHILD;
goto fail;
}
if (!pid) {
/* child */
setpgid(0, 0);
close(1);
/* coverity[leaked_handle] False positive */
if (dup(c_write_fd) < 0)
goto fail;
close(2);
/* coverity[leaked_handle] False positive */
if (dup(c_stderr_fd) < 0)
goto fail;
close(0);
/* coverity[leaked_handle] False positive */
if (dup(c_read_fd) < 0)
goto fail;
/* keep c_stderr_fd open so parent can report all errors. */
/* keep c_write_fd open so hostlist can be sent to parent. */
close(c_read_fd);
close(p_read_fd);
close(p_write_fd);
close(p_stderr_fd);
/* keep retries from executing out of control */
if (is_retry) {
sleep(1);
}
execlp(action->agent, action->agent, NULL);
exit(EXIT_FAILURE);
}
/* parent */
action->pid = pid;
ret = fcntl(p_read_fd, F_SETFL, fcntl(p_read_fd, F_GETFL, 0) | O_NONBLOCK);
if (ret < 0) {
crm_perror(LOG_NOTICE, "Could not change the output of %s to be non-blocking",
action->agent);
}
ret = fcntl(p_stderr_fd, F_SETFL, fcntl(p_stderr_fd, F_GETFL, 0) | O_NONBLOCK);
if (ret < 0) {
crm_perror(LOG_NOTICE, "Could not change the stderr of %s to be non-blocking",
action->agent);
}
do {
crm_debug("sending args");
ret = write(p_write_fd, action->args + total, len - total);
if (ret > 0) {
total += ret;
}
} while (errno == EINTR && total < len);
if (total != len) {
crm_perror(LOG_ERR, "Sent %d not %d bytes", total, len);
if (ret >= 0) {
rc = -ECOMM;
}
goto fail;
}
close(p_write_fd); p_write_fd = -1;
/* async */
if (action->async) {
action->fd_stdout = p_read_fd;
action->fd_stderr = p_stderr_fd;
mainloop_child_add(pid, 0/* Move the timeout here? */, action->action, action, stonith_action_async_done);
crm_trace("Op: %s on %s, pid: %d, timeout: %ds", action->action, action->agent, pid,
action->remaining_timeout);
action->last_timeout_signo = 0;
if (action->remaining_timeout) {
action->timer_sigterm =
g_timeout_add(1000 * action->remaining_timeout, st_child_term, action);
action->timer_sigkill =
g_timeout_add(1000 * (action->remaining_timeout + 5), st_child_kill, action);
} else {
crm_err("No timeout set for stonith operation %s with device %s",
action->action, action->agent);
}
close(c_write_fd);
close(c_read_fd);
close(c_stderr_fd);
return 0;
} else {
/* sync */
int timeout = action->remaining_timeout + 1;
pid_t p = 0;
while (action->remaining_timeout < 0 || timeout > 0) {
p = waitpid(pid, &status, WNOHANG);
if (p > 0) {
break;
}
sleep(1);
timeout--;
}
if (timeout == 0) {
int killrc = kill(-pid, SIGKILL);
if (killrc && errno != ESRCH) {
crm_err("kill(%d, KILL) failed: %s (%d)", pid, pcmk_strerror(errno), errno);
}
/*
* From sigprocmask(2):
* It is not possible to block SIGKILL or SIGSTOP. Attempts to do so are silently ignored.
*
* This makes it safe to skip WNOHANG here
*/
p = waitpid(pid, &status, 0);
}
if (p <= 0) {
crm_perror(LOG_ERR, "waitpid(%d)", pid);
} else if (p != pid) {
crm_err("Waited for %d, got %d", pid, p);
}
action->output = read_output(p_read_fd);
action->error = read_output(p_stderr_fd);
action->rc = -ECONNABORTED;
log_action(action, pid);
rc = action->rc;
if (timeout == 0) {
action->rc = -ETIME;
} else if (WIFEXITED(status)) {
crm_debug("result = %d", WEXITSTATUS(status));
action->rc = -WEXITSTATUS(status);
rc = 0;
} else if (WIFSIGNALED(status)) {
crm_err("call %s for %s exited due to signal %d", action->action, action->agent,
WTERMSIG(status));
} else {
crm_err("call %s for %s returned unexpected status %#x",
action->action, action->agent, status);
}
}
fail:
if (p_read_fd >= 0) {
close(p_read_fd);
}
if (p_write_fd >= 0) {
close(p_write_fd);
}
if (p_stderr_fd >= 0) {
close(p_stderr_fd);
}
if (c_read_fd >= 0) {
close(c_read_fd);
}
if (c_write_fd >= 0) {
close(c_write_fd);
}
if (c_stderr_fd >= 0) {
close(c_stderr_fd);
}
return rc;
}
GPid
stonith_action_execute_async(stonith_action_t * action,
void *userdata,
void (*done) (GPid pid, int rc, const char *output,
gpointer user_data))
{
int rc = 0;
if (!action) {
return -1;
}
action->userdata = userdata;
action->done_cb = done;
action->async = 1;
rc = internal_stonith_action_execute(action);
return rc < 0 ? rc : action->pid;
}
int
stonith_action_execute(stonith_action_t * action, int *agent_result, char **output)
{
int rc = 0;
if (!action) {
return -1;
}
do {
rc = internal_stonith_action_execute(action);
if (rc == pcmk_ok) {
/* success! */
break;
}
/* keep retrying while we have time left */
} while (update_remaining_timeout(action));
if (rc) {
/* error */
return rc;
}
if (agent_result) {
*agent_result = action->rc;
}
if (output) {
*output = action->output;
action->output = NULL; /* handed it off, do not free */
}
stonith_action_destroy(action);
return rc;
}
static int
stonith_api_device_list(stonith_t * stonith, int call_options, const char *namespace,
stonith_key_value_t ** devices, int timeout)
{
int count = 0;
if (devices == NULL) {
crm_err("Parameter error: stonith_api_device_list");
return -EFAULT;
}
/* Include Heartbeat agents */
if (namespace == NULL || safe_str_eq("heartbeat", namespace)) {
#if HAVE_STONITH_STONITH_H
static gboolean need_init = TRUE;
char **entry = NULL;
char **type_list = NULL;
static char **(*type_list_fn) (void) = NULL;
static void (*type_free_fn) (char **) = NULL;
if (need_init) {
need_init = FALSE;
type_list_fn =
find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_types", FALSE);
type_free_fn =
find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_free_hostlist",
FALSE);
}
if (type_list_fn) {
type_list = (*type_list_fn) ();
}
for (entry = type_list; entry != NULL && *entry; ++entry) {
crm_trace("Added: %s", *entry);
*devices = stonith_key_value_add(*devices, NULL, *entry);
count++;
}
if (type_list && type_free_fn) {
(*type_free_fn) (type_list);
}
#else
if (namespace != NULL) {
return -EINVAL; /* Heartbeat agents not supported */
}
#endif
}
/* Include Red Hat agents, basically: ls -1 @sbin_dir@/fence_* */
if (namespace == NULL || safe_str_eq("redhat", namespace)) {
struct dirent **namelist;
int file_num = scandir(RH_STONITH_DIR, &namelist, 0, alphasort);
if (file_num > 0) {
struct stat prop;
char buffer[FILENAME_MAX + 1];
while (file_num--) {
if ('.' == namelist[file_num]->d_name[0]) {
free(namelist[file_num]);
continue;
- } else if (0 != strncmp(RH_STONITH_PREFIX,
- namelist[file_num]->d_name, strlen(RH_STONITH_PREFIX))) {
+ } else if (!crm_starts_with(namelist[file_num]->d_name,
+ RH_STONITH_PREFIX)) {
free(namelist[file_num]);
continue;
}
snprintf(buffer, FILENAME_MAX, "%s/%s", RH_STONITH_DIR, namelist[file_num]->d_name);
if (stat(buffer, &prop) == 0 && S_ISREG(prop.st_mode)) {
*devices = stonith_key_value_add(*devices, NULL, namelist[file_num]->d_name);
count++;
}
free(namelist[file_num]);
}
free(namelist);
}
}
return count;
}
#if HAVE_STONITH_STONITH_H
static inline char *
strdup_null(const char *val)
{
if (val) {
return strdup(val);
}
return NULL;
}
static void
stonith_plugin(int priority, const char *fmt, ...) __attribute__((__format__ (__printf__, 2, 3)));
static void
stonith_plugin(int priority, const char *format, ...)
{
int err = errno;
va_list ap;
int len = 0;
char *string = NULL;
va_start(ap, format);
len = vasprintf (&string, format, ap);
CRM_ASSERT(len > 0);
do_crm_log_alias(priority, __FILE__, __func__, __LINE__, "%s", string);
free(string);
errno = err;
}
#endif
static int
stonith_api_device_metadata(stonith_t * stonith, int call_options, const char *agent,
const char *namespace, char **output, int timeout)
{
int rc = 0;
char *buffer = NULL;
const char *provider = get_stonith_provider(agent, namespace);
crm_trace("looking up %s/%s metadata", agent, provider);
/* By having this in a library, we can access it from stonith_admin
* when neither lrmd or stonith-ng are running
* Important for the crm shell's validations...
*/
if (safe_str_eq(provider, "redhat")) {
stonith_action_t *action = stonith_action_create(agent, "metadata", NULL, 0, 5, NULL, NULL);
int exec_rc = stonith_action_execute(action, &rc, &buffer);
xmlNode *xml = NULL;
xmlNode *actions = NULL;
xmlXPathObject *xpathObj = NULL;
if (exec_rc < 0 || rc != 0 || buffer == NULL) {
crm_warn("Could not obtain metadata for %s", agent);
crm_debug("Query failed: %d %d: %s", exec_rc, rc, crm_str(buffer));
free(buffer); /* Just in case */
return -EINVAL;
}
xml = string2xml(buffer);
if(xml == NULL) {
crm_warn("Metadata for %s is invalid", agent);
free(buffer);
return -EINVAL;
}
xpathObj = xpath_search(xml, "//actions");
if (numXpathResults(xpathObj) > 0) {
actions = getXpathResult(xpathObj, 0);
}
freeXpathObject(xpathObj);
/* Now fudge the metadata so that the start/stop actions appear */
xpathObj = xpath_search(xml, "//action[@name='stop']");
if (numXpathResults(xpathObj) <= 0) {
xmlNode *tmp = NULL;
tmp = create_xml_node(actions, "action");
crm_xml_add(tmp, "name", "stop");
crm_xml_add(tmp, "timeout", CRM_DEFAULT_OP_TIMEOUT_S);
tmp = create_xml_node(actions, "action");
crm_xml_add(tmp, "name", "start");
crm_xml_add(tmp, "timeout", CRM_DEFAULT_OP_TIMEOUT_S);
}
freeXpathObject(xpathObj);
/* Now fudge the metadata so that the port isn't required in the configuration */
xpathObj = xpath_search(xml, "//parameter[@name='port']");
if (numXpathResults(xpathObj) > 0) {
/* We'll fill this in */
xmlNode *tmp = getXpathResult(xpathObj, 0);
crm_xml_add(tmp, "required", "0");
}
freeXpathObject(xpathObj);
free(buffer);
buffer = dump_xml_formatted_with_text(xml);
free_xml(xml);
if (!buffer) {
return -EINVAL;
}
} else {
#if !HAVE_STONITH_STONITH_H
return -EINVAL; /* Heartbeat agents not supported */
#else
int bufferlen = 0;
static const char *no_parameter_info = "<!-- no value -->";
Stonith *stonith_obj = NULL;
static gboolean need_init = TRUE;
static Stonith *(*st_new_fn) (const char *) = NULL;
static const char *(*st_info_fn) (Stonith *, int) = NULL;
static void (*st_del_fn) (Stonith *) = NULL;
static void (*st_log_fn) (Stonith *, PILLogFun) = NULL;
if (need_init) {
need_init = FALSE;
st_new_fn =
find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_new", FALSE);
st_del_fn =
find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_delete",
FALSE);
st_log_fn =
find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_set_log",
FALSE);
st_info_fn =
find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_get_info",
FALSE);
}
if (lha_agents_lib && st_new_fn && st_del_fn && st_info_fn && st_log_fn) {
char *xml_meta_longdesc = NULL;
char *xml_meta_shortdesc = NULL;
char *meta_param = NULL;
char *meta_longdesc = NULL;
char *meta_shortdesc = NULL;
stonith_obj = (*st_new_fn) (agent);
if (stonith_obj) {
(*st_log_fn) (stonith_obj, (PILLogFun) & stonith_plugin);
meta_longdesc = strdup_null((*st_info_fn) (stonith_obj, ST_DEVICEDESCR));
if (meta_longdesc == NULL) {
crm_warn("no long description in %s's metadata.", agent);
meta_longdesc = strdup(no_parameter_info);
}
meta_shortdesc = strdup_null((*st_info_fn) (stonith_obj, ST_DEVICEID));
if (meta_shortdesc == NULL) {
crm_warn("no short description in %s's metadata.", agent);
meta_shortdesc = strdup(no_parameter_info);
}
meta_param = strdup_null((*st_info_fn) (stonith_obj, ST_CONF_XML));
if (meta_param == NULL) {
crm_warn("no list of parameters in %s's metadata.", agent);
meta_param = strdup(no_parameter_info);
}
(*st_del_fn) (stonith_obj);
} else {
return -EINVAL; /* Heartbeat agents not supported */
}
xml_meta_longdesc =
(char *)xmlEncodeEntitiesReentrant(NULL, (const unsigned char *)meta_longdesc);
xml_meta_shortdesc =
(char *)xmlEncodeEntitiesReentrant(NULL, (const unsigned char *)meta_shortdesc);
bufferlen = strlen(META_TEMPLATE) + strlen(agent)
+ strlen(xml_meta_longdesc) + strlen(xml_meta_shortdesc)
+ strlen(meta_param) + 1;
buffer = calloc(1, bufferlen);
snprintf(buffer, bufferlen - 1, META_TEMPLATE,
agent, xml_meta_longdesc, xml_meta_shortdesc, meta_param);
xmlFree(xml_meta_longdesc);
xmlFree(xml_meta_shortdesc);
free(meta_shortdesc);
free(meta_longdesc);
free(meta_param);
}
#endif
}
if (output) {
*output = buffer;
} else {
free(buffer);
}
return rc;
}
static int
stonith_api_query(stonith_t * stonith, int call_options, const char *target,
stonith_key_value_t ** devices, int timeout)
{
int rc = 0, lpc = 0, max = 0;
xmlNode *data = NULL;
xmlNode *output = NULL;
xmlXPathObjectPtr xpathObj = NULL;
CRM_CHECK(devices != NULL, return -EINVAL);
data = create_xml_node(NULL, F_STONITH_DEVICE);
crm_xml_add(data, F_STONITH_ORIGIN, __FUNCTION__);
crm_xml_add(data, F_STONITH_TARGET, target);
crm_xml_add(data, F_STONITH_ACTION, "off");
rc = stonith_send_command(stonith, STONITH_OP_QUERY, data, &output, call_options, timeout);
if (rc < 0) {
return rc;
}
xpathObj = xpath_search(output, "//@agent");
if (xpathObj) {
max = numXpathResults(xpathObj);
for (lpc = 0; lpc < max; lpc++) {
xmlNode *match = getXpathResult(xpathObj, lpc);
CRM_LOG_ASSERT(match != NULL);
if(match != NULL) {
xmlChar *match_path = xmlGetNodePath(match);
crm_info("%s[%d] = %s", "//@agent", lpc, match_path);
free(match_path);
*devices = stonith_key_value_add(*devices, NULL, crm_element_value(match, XML_ATTR_ID));
}
}
freeXpathObject(xpathObj);
}
free_xml(output);
free_xml(data);
return max;
}
static int
stonith_api_call(stonith_t * stonith,
int call_options,
const char *id,
const char *action, const char *victim, int timeout, xmlNode ** output)
{
int rc = 0;
xmlNode *data = NULL;
data = create_xml_node(NULL, F_STONITH_DEVICE);
crm_xml_add(data, F_STONITH_ORIGIN, __FUNCTION__);
crm_xml_add(data, F_STONITH_DEVICE, id);
crm_xml_add(data, F_STONITH_ACTION, action);
crm_xml_add(data, F_STONITH_TARGET, victim);
rc = stonith_send_command(stonith, STONITH_OP_EXEC, data, output, call_options, timeout);
free_xml(data);
return rc;
}
static int
stonith_api_list(stonith_t * stonith, int call_options, const char *id, char **list_info,
int timeout)
{
int rc;
xmlNode *output = NULL;
rc = stonith_api_call(stonith, call_options, id, "list", NULL, timeout, &output);
if (output && list_info) {
const char *list_str;
list_str = crm_element_value(output, "st_output");
if (list_str) {
*list_info = strdup(list_str);
}
}
if (output) {
free_xml(output);
}
return rc;
}
static int
stonith_api_monitor(stonith_t * stonith, int call_options, const char *id, int timeout)
{
return stonith_api_call(stonith, call_options, id, "monitor", NULL, timeout, NULL);
}
static int
stonith_api_status(stonith_t * stonith, int call_options, const char *id, const char *port,
int timeout)
{
return stonith_api_call(stonith, call_options, id, "status", port, timeout, NULL);
}
static int
stonith_api_fence(stonith_t * stonith, int call_options, const char *node, const char *action,
int timeout, int tolerance)
{
int rc = 0;
xmlNode *data = NULL;
data = create_xml_node(NULL, __FUNCTION__);
crm_xml_add(data, F_STONITH_TARGET, node);
crm_xml_add(data, F_STONITH_ACTION, action);
crm_xml_add_int(data, F_STONITH_TIMEOUT, timeout);
crm_xml_add_int(data, F_STONITH_TOLERANCE, tolerance);
rc = stonith_send_command(stonith, STONITH_OP_FENCE, data, NULL, call_options, timeout);
free_xml(data);
return rc;
}
static int
stonith_api_confirm(stonith_t * stonith, int call_options, const char *target)
{
return stonith_api_fence(stonith, call_options | st_opt_manual_ack, target, "off", 0, 0);
}
static int
stonith_api_history(stonith_t * stonith, int call_options, const char *node,
stonith_history_t ** history, int timeout)
{
int rc = 0;
xmlNode *data = NULL;
xmlNode *output = NULL;
stonith_history_t *last = NULL;
*history = NULL;
if (node) {
data = create_xml_node(NULL, __FUNCTION__);
crm_xml_add(data, F_STONITH_TARGET, node);
}
rc = stonith_send_command(stonith, STONITH_OP_FENCE_HISTORY, data, &output,
call_options | st_opt_sync_call, timeout);
free_xml(data);
if (rc == 0) {
xmlNode *op = NULL;
xmlNode *reply = get_xpath_object("//" F_STONITH_HISTORY_LIST, output, LOG_ERR);
for (op = __xml_first_child(reply); op != NULL; op = __xml_next(op)) {
stonith_history_t *kvp;
kvp = calloc(1, sizeof(stonith_history_t));
kvp->target = crm_element_value_copy(op, F_STONITH_TARGET);
kvp->action = crm_element_value_copy(op, F_STONITH_ACTION);
kvp->origin = crm_element_value_copy(op, F_STONITH_ORIGIN);
kvp->delegate = crm_element_value_copy(op, F_STONITH_DELEGATE);
kvp->client = crm_element_value_copy(op, F_STONITH_CLIENTNAME);
crm_element_value_int(op, F_STONITH_DATE, &kvp->completed);
crm_element_value_int(op, F_STONITH_STATE, &kvp->state);
if (last) {
last->next = kvp;
} else {
*history = kvp;
}
last = kvp;
}
}
return rc;
}
gboolean
is_redhat_agent(const char *agent)
{
int rc = 0;
struct stat prop;
char buffer[FILENAME_MAX + 1];
snprintf(buffer, FILENAME_MAX, "%s/%s", RH_STONITH_DIR, agent);
rc = stat(buffer, &prop);
if (rc >= 0 && S_ISREG(prop.st_mode)) {
return TRUE;
}
return FALSE;
}
const char *
get_stonith_provider(const char *agent, const char *provider)
{
/* This function sucks */
if (is_redhat_agent(agent)) {
return "redhat";
#if HAVE_STONITH_STONITH_H
} else {
Stonith *stonith_obj = NULL;
static gboolean need_init = TRUE;
static Stonith *(*st_new_fn) (const char *) = NULL;
static void (*st_del_fn) (Stonith *) = NULL;
if (need_init) {
need_init = FALSE;
st_new_fn =
find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_new", FALSE);
st_del_fn =
find_library_function(&lha_agents_lib, LHA_STONITH_LIBRARY, "stonith_delete",
FALSE);
}
if (lha_agents_lib && st_new_fn && st_del_fn) {
stonith_obj = (*st_new_fn) (agent);
if (stonith_obj) {
(*st_del_fn) (stonith_obj);
return "heartbeat";
}
}
#endif
}
if (safe_str_eq(provider, "internal")) {
return provider;
} else {
crm_err("No such device: %s", agent);
return NULL;
}
}
static gint
stonithlib_GCompareFunc(gconstpointer a, gconstpointer b)
{
int rc = 0;
const stonith_notify_client_t *a_client = a;
const stonith_notify_client_t *b_client = b;
CRM_CHECK(a_client->event != NULL && b_client->event != NULL, return 0);
rc = strcmp(a_client->event, b_client->event);
if (rc == 0) {
if (a_client->notify == NULL || b_client->notify == NULL) {
return 0;
} else if (a_client->notify == b_client->notify) {
return 0;
} else if (((long)a_client->notify) < ((long)b_client->notify)) {
crm_err("callbacks for %s are not equal: %p vs. %p",
a_client->event, a_client->notify, b_client->notify);
return -1;
}
crm_err("callbacks for %s are not equal: %p vs. %p",
a_client->event, a_client->notify, b_client->notify);
return 1;
}
return rc;
}
xmlNode *
stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data, int call_options)
{
xmlNode *op_msg = create_xml_node(NULL, "stonith_command");
CRM_CHECK(op_msg != NULL, return NULL);
CRM_CHECK(token != NULL, return NULL);
crm_xml_add(op_msg, F_XML_TAGNAME, "stonith_command");
crm_xml_add(op_msg, F_TYPE, T_STONITH_NG);
crm_xml_add(op_msg, F_STONITH_CALLBACK_TOKEN, token);
crm_xml_add(op_msg, F_STONITH_OPERATION, op);
crm_xml_add_int(op_msg, F_STONITH_CALLID, call_id);
crm_trace("Sending call options: %.8lx, %d", (long)call_options, call_options);
crm_xml_add_int(op_msg, F_STONITH_CALLOPTS, call_options);
if (data != NULL) {
add_message_xml(op_msg, F_STONITH_CALLDATA, data);
}
return op_msg;
}
static void
stonith_destroy_op_callback(gpointer data)
{
stonith_callback_client_t *blob = data;
if (blob->timer && blob->timer->ref > 0) {
g_source_remove(blob->timer->ref);
}
free(blob->timer);
free(blob);
}
static int
stonith_api_signoff(stonith_t * stonith)
{
stonith_private_t *native = stonith->private;
crm_debug("Signing out of the STONITH Service");
if (native->source != NULL) {
/* Attached to mainloop */
mainloop_del_ipc_client(native->source);
native->source = NULL;
native->ipc = NULL;
} else if (native->ipc) {
/* Not attached to mainloop */
crm_ipc_t *ipc = native->ipc;
native->ipc = NULL;
crm_ipc_close(ipc);
crm_ipc_destroy(ipc);
}
free(native->token); native->token = NULL;
stonith->state = stonith_disconnected;
return pcmk_ok;
}
static int
stonith_api_signon(stonith_t * stonith, const char *name, int *stonith_fd)
{
int rc = pcmk_ok;
stonith_private_t *native = stonith->private;
static struct ipc_client_callbacks st_callbacks = {
.dispatch = stonith_dispatch_internal,
.destroy = stonith_connection_destroy
};
crm_trace("Connecting command channel");
stonith->state = stonith_connected_command;
if (stonith_fd) {
/* No mainloop */
native->ipc = crm_ipc_new("stonith-ng", 0);
if (native->ipc && crm_ipc_connect(native->ipc)) {
*stonith_fd = crm_ipc_get_fd(native->ipc);
} else if (native->ipc) {
crm_perror(LOG_ERR, "Connection to STONITH manager failed");
rc = -ENOTCONN;
}
} else {
/* With mainloop */
native->source =
mainloop_add_ipc_client("stonith-ng", G_PRIORITY_MEDIUM, 0, stonith, &st_callbacks);
native->ipc = mainloop_get_ipc_client(native->source);
}
if (native->ipc == NULL) {
crm_debug("Could not connect to the Stonith API");
rc = -ENOTCONN;
}
if (rc == pcmk_ok) {
xmlNode *reply = NULL;
xmlNode *hello = create_xml_node(NULL, "stonith_command");
crm_xml_add(hello, F_TYPE, T_STONITH_NG);
crm_xml_add(hello, F_STONITH_OPERATION, CRM_OP_REGISTER);
crm_xml_add(hello, F_STONITH_CLIENTNAME, name);
rc = crm_ipc_send(native->ipc, hello, crm_ipc_client_response, -1, &reply);
if (rc < 0) {
crm_perror(LOG_DEBUG, "Couldn't complete registration with the fencing API: %d", rc);
rc = -ECOMM;
} else if (reply == NULL) {
crm_err("Did not receive registration reply");
rc = -EPROTO;
} else {
const char *msg_type = crm_element_value(reply, F_STONITH_OPERATION);
const char *tmp_ticket = crm_element_value(reply, F_STONITH_CLIENTID);
if (safe_str_neq(msg_type, CRM_OP_REGISTER)) {
crm_err("Invalid registration message: %s", msg_type);
crm_log_xml_err(reply, "Bad reply");
rc = -EPROTO;
} else if (tmp_ticket == NULL) {
crm_err("No registration token provided");
crm_log_xml_err(reply, "Bad reply");
rc = -EPROTO;
} else {
crm_trace("Obtained registration token: %s", tmp_ticket);
native->token = strdup(tmp_ticket);
rc = pcmk_ok;
}
}
free_xml(reply);
free_xml(hello);
}
if (rc == pcmk_ok) {
#if HAVE_MSGFROMIPC_TIMEOUT
stonith->call_timeout = MAX_IPC_DELAY;
#endif
crm_debug("Connection to STONITH successful");
return pcmk_ok;
}
crm_debug("Connection to STONITH failed: %s", pcmk_strerror(rc));
stonith->cmds->disconnect(stonith);
return rc;
}
static int
stonith_set_notification(stonith_t * stonith, const char *callback, int enabled)
{
int rc = pcmk_ok;
xmlNode *notify_msg = create_xml_node(NULL, __FUNCTION__);
stonith_private_t *native = stonith->private;
if (stonith->state != stonith_disconnected) {
crm_xml_add(notify_msg, F_STONITH_OPERATION, T_STONITH_NOTIFY);
if (enabled) {
crm_xml_add(notify_msg, F_STONITH_NOTIFY_ACTIVATE, callback);
} else {
crm_xml_add(notify_msg, F_STONITH_NOTIFY_DEACTIVATE, callback);
}
rc = crm_ipc_send(native->ipc, notify_msg, crm_ipc_client_response, -1, NULL);
if (rc < 0) {
crm_perror(LOG_DEBUG, "Couldn't register for fencing notifications: %d", rc);
rc = -ECOMM;
} else {
rc = pcmk_ok;
}
}
free_xml(notify_msg);
return rc;
}
static int
stonith_api_add_notification(stonith_t * stonith, const char *event,
void (*callback) (stonith_t * stonith, stonith_event_t * e))
{
GList *list_item = NULL;
stonith_notify_client_t *new_client = NULL;
stonith_private_t *private = NULL;
private = stonith->private;
crm_trace("Adding callback for %s events (%d)", event, g_list_length(private->notify_list));
new_client = calloc(1, sizeof(stonith_notify_client_t));
new_client->event = event;
new_client->notify = callback;
list_item = g_list_find_custom(private->notify_list, new_client, stonithlib_GCompareFunc);
if (list_item != NULL) {
crm_warn("Callback already present");
free(new_client);
return -ENOTUNIQ;
} else {
private->notify_list = g_list_append(private->notify_list, new_client);
stonith_set_notification(stonith, event, 1);
crm_trace("Callback added (%d)", g_list_length(private->notify_list));
}
return pcmk_ok;
}
static int
stonith_api_del_notification(stonith_t * stonith, const char *event)
{
GList *list_item = NULL;
stonith_notify_client_t *new_client = NULL;
stonith_private_t *private = NULL;
crm_debug("Removing callback for %s events", event);
private = stonith->private;
new_client = calloc(1, sizeof(stonith_notify_client_t));
new_client->event = event;
new_client->notify = NULL;
list_item = g_list_find_custom(private->notify_list, new_client, stonithlib_GCompareFunc);
stonith_set_notification(stonith, event, 0);
if (list_item != NULL) {
stonith_notify_client_t *list_client = list_item->data;
private->notify_list = g_list_remove(private->notify_list, list_client);
free(list_client);
crm_trace("Removed callback");
} else {
crm_trace("Callback not present");
}
free(new_client);
return pcmk_ok;
}
static gboolean
stonith_async_timeout_handler(gpointer data)
{
struct timer_rec_s *timer = data;
crm_err("Async call %d timed out after %dms", timer->call_id, timer->timeout);
stonith_perform_callback(timer->stonith, NULL, timer->call_id, -ETIME);
/* Always return TRUE, never remove the handler
* We do that in stonith_del_callback()
*/
return TRUE;
}
static void
set_callback_timeout(stonith_callback_client_t * callback, stonith_t * stonith, int call_id,
int timeout)
{
struct timer_rec_s *async_timer = callback->timer;
if (timeout <= 0) {
return;
}
if (!async_timer) {
async_timer = calloc(1, sizeof(struct timer_rec_s));
callback->timer = async_timer;
}
async_timer->stonith = stonith;
async_timer->call_id = call_id;
/* Allow a fair bit of grace to allow the server to tell us of a timeout
* This is only a fallback
*/
async_timer->timeout = (timeout + 60) * 1000;
if (async_timer->ref) {
g_source_remove(async_timer->ref);
}
async_timer->ref =
g_timeout_add(async_timer->timeout, stonith_async_timeout_handler, async_timer);
}
static void
update_callback_timeout(int call_id, int timeout, stonith_t * st)
{
stonith_callback_client_t *callback = NULL;
stonith_private_t *private = st->private;
callback = g_hash_table_lookup(private->stonith_op_callback_table, GINT_TO_POINTER(call_id));
if (!callback || !callback->allow_timeout_updates) {
return;
}
set_callback_timeout(callback, st, call_id, timeout);
}
static void
invoke_callback(stonith_t * st, int call_id, int rc, void *userdata,
void (*callback) (stonith_t * st, stonith_callback_data_t * data))
{
stonith_callback_data_t data = { 0, };
data.call_id = call_id;
data.rc = rc;
data.userdata = userdata;
callback(st, &data);
}
static int
stonith_api_add_callback(stonith_t * stonith, int call_id, int timeout, int options,
void *user_data, const char *callback_name,
void (*callback) (stonith_t * st, stonith_callback_data_t * data))
{
stonith_callback_client_t *blob = NULL;
stonith_private_t *private = NULL;
CRM_CHECK(stonith != NULL, return -EINVAL);
CRM_CHECK(stonith->private != NULL, return -EINVAL);
private = stonith->private;
if (call_id == 0) {
private->op_callback = callback;
} else if (call_id < 0) {
if (!(options & st_opt_report_only_success)) {
crm_trace("Call failed, calling %s: %s", callback_name, pcmk_strerror(call_id));
invoke_callback(stonith, call_id, call_id, user_data, callback);
} else {
crm_warn("STONITH call failed: %s", pcmk_strerror(call_id));
}
return FALSE;
}
blob = calloc(1, sizeof(stonith_callback_client_t));
blob->id = callback_name;
blob->only_success = (options & st_opt_report_only_success) ? TRUE : FALSE;
blob->user_data = user_data;
blob->callback = callback;
blob->allow_timeout_updates = (options & st_opt_timeout_updates) ? TRUE : FALSE;
if (timeout > 0) {
set_callback_timeout(blob, stonith, call_id, timeout);
}
g_hash_table_insert(private->stonith_op_callback_table, GINT_TO_POINTER(call_id), blob);
crm_trace("Added callback to %s for call %d", callback_name, call_id);
return TRUE;
}
static int
stonith_api_del_callback(stonith_t * stonith, int call_id, bool all_callbacks)
{
stonith_private_t *private = stonith->private;
if (all_callbacks) {
private->op_callback = NULL;
g_hash_table_destroy(private->stonith_op_callback_table);
private->stonith_op_callback_table = g_hash_table_new_full(g_direct_hash, g_direct_equal,
NULL,
stonith_destroy_op_callback);
} else if (call_id == 0) {
private->op_callback = NULL;
} else {
g_hash_table_remove(private->stonith_op_callback_table, GINT_TO_POINTER(call_id));
}
return pcmk_ok;
}
static void
stonith_dump_pending_op(gpointer key, gpointer value, gpointer user_data)
{
int call = GPOINTER_TO_INT(key);
stonith_callback_client_t *blob = value;
crm_debug("Call %d (%s): pending", call, crm_str(blob->id));
}
void
stonith_dump_pending_callbacks(stonith_t * stonith)
{
stonith_private_t *private = stonith->private;
if (private->stonith_op_callback_table == NULL) {
return;
}
return g_hash_table_foreach(private->stonith_op_callback_table, stonith_dump_pending_op, NULL);
}
void
stonith_perform_callback(stonith_t * stonith, xmlNode * msg, int call_id, int rc)
{
stonith_private_t *private = NULL;
stonith_callback_client_t *blob = NULL;
stonith_callback_client_t local_blob;
CRM_CHECK(stonith != NULL, return);
CRM_CHECK(stonith->private != NULL, return);
private = stonith->private;
local_blob.id = NULL;
local_blob.callback = NULL;
local_blob.user_data = NULL;
local_blob.only_success = FALSE;
if (msg != NULL) {
crm_element_value_int(msg, F_STONITH_RC, &rc);
crm_element_value_int(msg, F_STONITH_CALLID, &call_id);
}
CRM_CHECK(call_id > 0, crm_log_xml_err(msg, "Bad result"));
blob = g_hash_table_lookup(private->stonith_op_callback_table, GINT_TO_POINTER(call_id));
if (blob != NULL) {
local_blob = *blob;
blob = NULL;
stonith_api_del_callback(stonith, call_id, FALSE);
} else {
crm_trace("No callback found for call %d", call_id);
local_blob.callback = NULL;
}
if (local_blob.callback != NULL && (rc == pcmk_ok || local_blob.only_success == FALSE)) {
crm_trace("Invoking callback %s for call %d", crm_str(local_blob.id), call_id);
invoke_callback(stonith, call_id, rc, local_blob.user_data, local_blob.callback);
} else if (private->op_callback == NULL && rc != pcmk_ok) {
crm_warn("STONITH command failed: %s", pcmk_strerror(rc));
crm_log_xml_debug(msg, "Failed STONITH Update");
}
if (private->op_callback != NULL) {
crm_trace("Invoking global callback for call %d", call_id);
invoke_callback(stonith, call_id, rc, NULL, private->op_callback);
}
crm_trace("OP callback activated.");
}
/*
<notify t="st_notify" subt="st_device_register" st_op="st_device_register" st_rc="0" >
<st_calldata >
<stonith_command t="stonith-ng" st_async_id="088fb640-431a-48b9-b2fc-c4ff78d0a2d9" st_op="st_device_register" st_callid="2" st_callopt="4096" st_timeout="0" st_clientid="088fb640-431a-48b9-b2fc-c4ff78d0a2d9" st_clientname="stonith-test" >
<st_calldata >
<st_device_id id="test-id" origin="create_device_registration_xml" agent="fence_virsh" namespace="stonith-ng" >
<attributes ipaddr="localhost" pcmk-portmal="some-host=pcmk-1 pcmk-3=3,4" login="root" identity_file="/root/.ssh/id_dsa" />
</st_device_id>
</st_calldata>
</stonith_command>
</st_calldata>
</notify>
<notify t="st_notify" subt="st_notify_fence" st_op="st_notify_fence" st_rc="0" >
<st_calldata >
<st_notify_fence st_rc="0" st_target="some-host" st_op="st_fence" st_delegate="test-id" st_origin="61dd7759-e229-4be7-b1f8-ef49dd14d9f0" />
</st_calldata>
</notify>
*/
static stonith_event_t *
xml_to_event(xmlNode * msg)
{
stonith_event_t *event = calloc(1, sizeof(stonith_event_t));
const char *ntype = crm_element_value(msg, F_SUBTYPE);
char *data_addr = crm_strdup_printf("//%s", ntype);
xmlNode *data = get_xpath_object(data_addr, msg, LOG_DEBUG);
crm_log_xml_trace(msg, "stonith_notify");
crm_element_value_int(msg, F_STONITH_RC, &(event->result));
if (safe_str_eq(ntype, T_STONITH_NOTIFY_FENCE)) {
event->operation = crm_element_value_copy(msg, F_STONITH_OPERATION);
if (data) {
event->origin = crm_element_value_copy(data, F_STONITH_ORIGIN);
event->action = crm_element_value_copy(data, F_STONITH_ACTION);
event->target = crm_element_value_copy(data, F_STONITH_TARGET);
event->executioner = crm_element_value_copy(data, F_STONITH_DELEGATE);
event->id = crm_element_value_copy(data, F_STONITH_REMOTE_OP_ID);
event->client_origin = crm_element_value_copy(data, F_STONITH_CLIENTNAME);
event->device = crm_element_value_copy(data, F_STONITH_DEVICE);
} else {
crm_err("No data for %s event", ntype);
crm_log_xml_notice(msg, "BadEvent");
}
}
free(data_addr);
return event;
}
static void
event_free(stonith_event_t * event)
{
free(event->id);
free(event->type);
free(event->message);
free(event->operation);
free(event->origin);
free(event->action);
free(event->target);
free(event->executioner);
free(event->device);
free(event->client_origin);
free(event);
}
static void
stonith_send_notification(gpointer data, gpointer user_data)
{
struct notify_blob_s *blob = user_data;
stonith_notify_client_t *entry = data;
stonith_event_t *st_event = NULL;
const char *event = NULL;
if (blob->xml == NULL) {
crm_warn("Skipping callback - NULL message");
return;
}
event = crm_element_value(blob->xml, F_SUBTYPE);
if (entry == NULL) {
crm_warn("Skipping callback - NULL callback client");
return;
} else if (entry->notify == NULL) {
crm_warn("Skipping callback - NULL callback");
return;
} else if (safe_str_neq(entry->event, event)) {
crm_trace("Skipping callback - event mismatch %p/%s vs. %s", entry, entry->event, event);
return;
}
st_event = xml_to_event(blob->xml);
crm_trace("Invoking callback for %p/%s event...", entry, event);
entry->notify(blob->stonith, st_event);
crm_trace("Callback invoked...");
event_free(st_event);
}
int
stonith_send_command(stonith_t * stonith, const char *op, xmlNode * data, xmlNode ** output_data,
int call_options, int timeout)
{
int rc = 0;
int reply_id = -1;
enum crm_ipc_flags ipc_flags = crm_ipc_flags_none;
xmlNode *op_msg = NULL;
xmlNode *op_reply = NULL;
stonith_private_t *native = stonith->private;
if (stonith->state == stonith_disconnected) {
return -ENOTCONN;
}
if (output_data != NULL) {
*output_data = NULL;
}
if (op == NULL) {
crm_err("No operation specified");
return -EINVAL;
}
if (call_options & st_opt_sync_call) {
ipc_flags |= crm_ipc_client_response;
}
stonith->call_id++;
/* prevent call_id from being negative (or zero) and conflicting
* with the stonith_errors enum
* use 2 because we use it as (stonith->call_id - 1) below
*/
if (stonith->call_id < 1) {
stonith->call_id = 1;
}
CRM_CHECK(native->token != NULL,;
);
op_msg = stonith_create_op(stonith->call_id, native->token, op, data, call_options);
if (op_msg == NULL) {
return -EINVAL;
}
crm_xml_add_int(op_msg, F_STONITH_TIMEOUT, timeout);
crm_trace("Sending %s message to STONITH service, Timeout: %ds", op, timeout);
rc = crm_ipc_send(native->ipc, op_msg, ipc_flags, 1000 * (timeout + 60), &op_reply);
free_xml(op_msg);
if (rc < 0) {
crm_perror(LOG_ERR, "Couldn't perform %s operation (timeout=%ds): %d", op, timeout, rc);
rc = -ECOMM;
goto done;
}
crm_log_xml_trace(op_reply, "Reply");
if (!(call_options & st_opt_sync_call)) {
crm_trace("Async call %d, returning", stonith->call_id);
CRM_CHECK(stonith->call_id != 0, return -EPROTO);
free_xml(op_reply);
return stonith->call_id;
}
rc = pcmk_ok;
crm_element_value_int(op_reply, F_STONITH_CALLID, &reply_id);
if (reply_id == stonith->call_id) {
crm_trace("Synchronous reply %d received", reply_id);
if (crm_element_value_int(op_reply, F_STONITH_RC, &rc) != 0) {
rc = -ENOMSG;
}
if ((call_options & st_opt_discard_reply) || output_data == NULL) {
crm_trace("Discarding reply");
} else {
*output_data = op_reply;
op_reply = NULL; /* Prevent subsequent free */
}
} else if (reply_id <= 0) {
crm_err("Received bad reply: No id set");
crm_log_xml_err(op_reply, "Bad reply");
free_xml(op_reply);
rc = -ENOMSG;
} else {
crm_err("Received bad reply: %d (wanted %d)", reply_id, stonith->call_id);
crm_log_xml_err(op_reply, "Old reply");
free_xml(op_reply);
rc = -ENOMSG;
}
done:
if (crm_ipc_connected(native->ipc) == FALSE) {
crm_err("STONITH disconnected");
stonith->state = stonith_disconnected;
}
free_xml(op_reply);
return rc;
}
/* Not used with mainloop */
bool
stonith_dispatch(stonith_t * st)
{
gboolean stay_connected = TRUE;
stonith_private_t *private = NULL;
CRM_ASSERT(st != NULL);
private = st->private;
while (crm_ipc_ready(private->ipc)) {
if (crm_ipc_read(private->ipc) > 0) {
const char *msg = crm_ipc_buffer(private->ipc);
stonith_dispatch_internal(msg, strlen(msg), st);
}
if (crm_ipc_connected(private->ipc) == FALSE) {
crm_err("Connection closed");
stay_connected = FALSE;
}
}
return stay_connected;
}
int
stonith_dispatch_internal(const char *buffer, ssize_t length, gpointer userdata)
{
const char *type = NULL;
struct notify_blob_s blob;
stonith_t *st = userdata;
stonith_private_t *private = NULL;
CRM_ASSERT(st != NULL);
private = st->private;
blob.stonith = st;
blob.xml = string2xml(buffer);
if (blob.xml == NULL) {
crm_warn("Received a NULL msg from STONITH service: %s.", buffer);
return 0;
}
/* do callbacks */
type = crm_element_value(blob.xml, F_TYPE);
crm_trace("Activating %s callbacks...", type);
if (safe_str_eq(type, T_STONITH_NG)) {
stonith_perform_callback(st, blob.xml, 0, 0);
} else if (safe_str_eq(type, T_STONITH_NOTIFY)) {
g_list_foreach(private->notify_list, stonith_send_notification, &blob);
} else if (safe_str_eq(type, T_STONITH_TIMEOUT_VALUE)) {
int call_id = 0;
int timeout = 0;
crm_element_value_int(blob.xml, F_STONITH_TIMEOUT, &timeout);
crm_element_value_int(blob.xml, F_STONITH_CALLID, &call_id);
update_callback_timeout(call_id, timeout, st);
} else {
crm_err("Unknown message type: %s", type);
crm_log_xml_warn(blob.xml, "BadReply");
}
free_xml(blob.xml);
return 1;
}
static int
stonith_api_free(stonith_t * stonith)
{
int rc = pcmk_ok;
crm_trace("Destroying %p", stonith);
if (stonith->state != stonith_disconnected) {
crm_trace("Disconnecting %p first", stonith);
rc = stonith->cmds->disconnect(stonith);
}
if (stonith->state == stonith_disconnected) {
stonith_private_t *private = stonith->private;
crm_trace("Removing %d callbacks", g_hash_table_size(private->stonith_op_callback_table));
g_hash_table_destroy(private->stonith_op_callback_table);
crm_trace("Destroying %d notification clients", g_list_length(private->notify_list));
g_list_free_full(private->notify_list, free);
free(stonith->private);
free(stonith->cmds);
free(stonith);
} else {
crm_err("Not free'ing active connection: %s (%d)", pcmk_strerror(rc), rc);
}
return rc;
}
void
stonith_api_delete(stonith_t * stonith)
{
crm_trace("Destroying %p", stonith);
if(stonith) {
stonith->cmds->free(stonith);
}
}
stonith_t *
stonith_api_new(void)
{
stonith_t *new_stonith = NULL;
stonith_private_t *private = NULL;
new_stonith = calloc(1, sizeof(stonith_t));
private = calloc(1, sizeof(stonith_private_t));
new_stonith->private = private;
private->stonith_op_callback_table = g_hash_table_new_full(g_direct_hash, g_direct_equal,
NULL, stonith_destroy_op_callback);
private->notify_list = NULL;
new_stonith->call_id = 1;
new_stonith->state = stonith_disconnected;
new_stonith->cmds = calloc(1, sizeof(stonith_api_operations_t));
/* *INDENT-OFF* */
new_stonith->cmds->free = stonith_api_free;
new_stonith->cmds->connect = stonith_api_signon;
new_stonith->cmds->disconnect = stonith_api_signoff;
new_stonith->cmds->list = stonith_api_list;
new_stonith->cmds->monitor = stonith_api_monitor;
new_stonith->cmds->status = stonith_api_status;
new_stonith->cmds->fence = stonith_api_fence;
new_stonith->cmds->confirm = stonith_api_confirm;
new_stonith->cmds->history = stonith_api_history;
new_stonith->cmds->list_agents = stonith_api_device_list;
new_stonith->cmds->metadata = stonith_api_device_metadata;
new_stonith->cmds->query = stonith_api_query;
new_stonith->cmds->remove_device = stonith_api_remove_device;
new_stonith->cmds->register_device = stonith_api_register_device;
new_stonith->cmds->remove_level = stonith_api_remove_level;
new_stonith->cmds->remove_level_full = stonith_api_remove_level_full;
new_stonith->cmds->register_level = stonith_api_register_level;
new_stonith->cmds->register_level_full = stonith_api_register_level_full;
new_stonith->cmds->remove_callback = stonith_api_del_callback;
new_stonith->cmds->register_callback = stonith_api_add_callback;
new_stonith->cmds->remove_notification = stonith_api_del_notification;
new_stonith->cmds->register_notification = stonith_api_add_notification;
/* *INDENT-ON* */
return new_stonith;
}
stonith_key_value_t *
stonith_key_value_add(stonith_key_value_t * head, const char *key, const char *value)
{
stonith_key_value_t *p, *end;
p = calloc(1, sizeof(stonith_key_value_t));
if (key) {
p->key = strdup(key);
}
if (value) {
p->value = strdup(value);
}
end = head;
while (end && end->next) {
end = end->next;
}
if (end) {
end->next = p;
} else {
head = p;
}
return head;
}
void
stonith_key_value_freeall(stonith_key_value_t * head, int keys, int values)
{
stonith_key_value_t *p;
while (head) {
p = head->next;
if (keys) {
free(head->key);
}
if (values) {
free(head->value);
}
free(head);
head = p;
}
}
#define api_log_open() openlog("stonith-api", LOG_CONS | LOG_NDELAY | LOG_PID, LOG_DAEMON)
#define api_log(level, fmt, args...) syslog(level, "%s: "fmt, __FUNCTION__, args)
int
stonith_api_kick(uint32_t nodeid, const char *uname, int timeout, bool off)
{
char *name = NULL;
const char *action = "reboot";
int rc = -EPROTO;
stonith_t *st = NULL;
enum stonith_call_options opts = st_opt_sync_call | st_opt_allow_suicide;
api_log_open();
st = stonith_api_new();
if (st) {
rc = st->cmds->connect(st, "stonith-api", NULL);
if(rc != pcmk_ok) {
api_log(LOG_ERR, "Connection failed, could not kick (%s) node %u/%s : %s (%d)", action, nodeid, uname, pcmk_strerror(rc), rc);
}
}
if (uname != NULL) {
name = strdup(uname);
} else if (nodeid > 0) {
opts |= st_opt_cs_nodeid;
name = crm_itoa(nodeid);
}
if (off) {
action = "off";
}
if (rc == pcmk_ok) {
rc = st->cmds->fence(st, opts, name, action, timeout, 0);
if(rc != pcmk_ok) {
api_log(LOG_ERR, "Could not kick (%s) node %u/%s : %s (%d)", action, nodeid, uname, pcmk_strerror(rc), rc);
} else {
api_log(LOG_NOTICE, "Node %u/%s kicked: %s ", nodeid, uname, action);
}
}
if (st) {
st->cmds->disconnect(st);
stonith_api_delete(st);
}
free(name);
return rc;
}
time_t
stonith_api_time(uint32_t nodeid, const char *uname, bool in_progress)
{
int rc = 0;
char *name = NULL;
time_t when = 0;
stonith_t *st = NULL;
stonith_history_t *history, *hp = NULL;
enum stonith_call_options opts = st_opt_sync_call;
st = stonith_api_new();
if (st) {
rc = st->cmds->connect(st, "stonith-api", NULL);
if(rc != pcmk_ok) {
api_log(LOG_NOTICE, "Connection failed: %s (%d)", pcmk_strerror(rc), rc);
}
}
if (uname != NULL) {
name = strdup(uname);
} else if (nodeid > 0) {
opts |= st_opt_cs_nodeid;
name = crm_itoa(nodeid);
}
if (st && rc == pcmk_ok) {
int entries = 0;
int progress = 0;
int completed = 0;
rc = st->cmds->history(st, opts, name, &history, 120);
for (hp = history; hp; hp = hp->next) {
entries++;
if (in_progress) {
progress++;
if (hp->state != st_done && hp->state != st_failed) {
when = time(NULL);
}
} else if (hp->state == st_done) {
completed++;
if (hp->completed > when) {
when = hp->completed;
}
}
}
if(rc == pcmk_ok) {
api_log(LOG_INFO, "Found %d entries for %u/%s: %d in progress, %d completed", entries, nodeid, uname, progress, completed);
} else {
api_log(LOG_ERR, "Could not retrieve fence history for %u/%s: %s (%d)", nodeid, uname, pcmk_strerror(rc), rc);
}
}
if (st) {
st->cmds->disconnect(st);
stonith_api_delete(st);
}
if(when) {
api_log(LOG_INFO, "Node %u/%s last kicked at: %ld", nodeid, uname, (long int)when);
}
free(name);
return when;
}
#if HAVE_STONITH_STONITH_H
# include <pils/plugin.h>
const char *i_hate_pils(int rc);
const char *
i_hate_pils(int rc)
{
return PIL_strerror(rc);
}
#endif
diff --git a/lib/pengine/complex.c b/lib/pengine/complex.c
index b2f7ba774e..bbccb9123c 100644
--- a/lib/pengine/complex.c
+++ b/lib/pengine/complex.c
@@ -1,974 +1,974 @@
/*
* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <crm_internal.h>
#include <crm/pengine/rules.h>
#include <crm/pengine/internal.h>
#include <crm/msg_xml.h>
#include <unpack.h>
void populate_hash(xmlNode * nvpair_list, GHashTable * hash, const char **attrs, int attrs_length);
resource_object_functions_t resource_class_functions[] = {
{
native_unpack,
native_find_rsc,
native_parameter,
native_print,
native_active,
native_resource_state,
native_location,
native_free
},
{
group_unpack,
native_find_rsc,
native_parameter,
group_print,
group_active,
group_resource_state,
native_location,
group_free
},
{
clone_unpack,
native_find_rsc,
native_parameter,
clone_print,
clone_active,
clone_resource_state,
native_location,
clone_free
},
{
master_unpack,
native_find_rsc,
native_parameter,
clone_print,
clone_active,
clone_resource_state,
native_location,
clone_free
},
{
container_unpack,
native_find_rsc,
native_parameter,
container_print,
container_active,
container_resource_state,
native_location,
container_free
}
};
enum pe_obj_types
get_resource_type(const char *name)
{
if (safe_str_eq(name, XML_CIB_TAG_RESOURCE)) {
return pe_native;
} else if (safe_str_eq(name, XML_CIB_TAG_GROUP)) {
return pe_group;
} else if (safe_str_eq(name, XML_CIB_TAG_INCARNATION)) {
return pe_clone;
} else if (safe_str_eq(name, XML_CIB_TAG_MASTER)) {
return pe_master;
} else if (safe_str_eq(name, XML_CIB_TAG_CONTAINER)) {
return pe_container;
}
return pe_unknown;
}
const char *
get_resource_typename(enum pe_obj_types type)
{
switch (type) {
case pe_native:
return XML_CIB_TAG_RESOURCE;
case pe_group:
return XML_CIB_TAG_GROUP;
case pe_clone:
return XML_CIB_TAG_INCARNATION;
case pe_master:
return XML_CIB_TAG_MASTER;
case pe_container:
return XML_CIB_TAG_CONTAINER;
case pe_unknown:
return "unknown";
}
return "<unknown>";
}
static void
dup_attr(gpointer key, gpointer value, gpointer user_data)
{
add_hash_param(user_data, key, value);
}
void
get_meta_attributes(GHashTable * meta_hash, resource_t * rsc,
node_t * node, pe_working_set_t * data_set)
{
GHashTable *node_hash = NULL;
const char *version = crm_element_value(data_set->input, XML_ATTR_CRM_VERSION);
if (node) {
node_hash = node->details->attrs;
}
if (rsc->xml) {
xmlAttrPtr xIter = NULL;
for (xIter = rsc->xml->properties; xIter; xIter = xIter->next) {
const char *prop_name = (const char *)xIter->name;
const char *prop_value = crm_element_value(rsc->xml, prop_name);
add_hash_param(meta_hash, prop_name, prop_value);
}
}
unpack_instance_attributes(data_set->input, rsc->xml, XML_TAG_META_SETS, node_hash,
meta_hash, NULL, FALSE, data_set->now);
if(version == NULL || compare_version(version, "3.0.9") < 0) {
/* populate from the regular attributes until the GUI can create
* meta attributes
*/
unpack_instance_attributes(data_set->input, rsc->xml, XML_TAG_ATTR_SETS, node_hash,
meta_hash, NULL, FALSE, data_set->now);
}
/* set anything else based on the parent */
if (rsc->parent != NULL) {
g_hash_table_foreach(rsc->parent->meta, dup_attr, meta_hash);
}
/* and finally check the defaults */
unpack_instance_attributes(data_set->input, data_set->rsc_defaults, XML_TAG_META_SETS,
node_hash, meta_hash, NULL, FALSE, data_set->now);
}
void
get_rsc_attributes(GHashTable * meta_hash, resource_t * rsc,
node_t * node, pe_working_set_t * data_set)
{
GHashTable *node_hash = NULL;
if (node) {
node_hash = node->details->attrs;
}
unpack_instance_attributes(data_set->input, rsc->xml, XML_TAG_ATTR_SETS, node_hash,
meta_hash, NULL, FALSE, data_set->now);
/* set anything else based on the parent */
if (rsc->parent != NULL) {
get_rsc_attributes(meta_hash, rsc->parent, node, data_set);
} else {
/* and finally check the defaults */
unpack_instance_attributes(data_set->input, data_set->rsc_defaults, XML_TAG_ATTR_SETS,
node_hash, meta_hash, NULL, FALSE, data_set->now);
}
}
#if ENABLE_VERSIONED_ATTRS
void
pe_get_versioned_attributes(xmlNode * meta_hash, resource_t * rsc,
node_t * node, pe_working_set_t * data_set)
{
GHashTable *node_hash = NULL;
if (node) {
node_hash = node->details->attrs;
}
pe_unpack_versioned_attributes(data_set->input, rsc->xml, XML_TAG_ATTR_SETS, node_hash,
meta_hash, data_set->now);
/* set anything else based on the parent */
if (rsc->parent != NULL) {
pe_get_versioned_attributes(meta_hash, rsc->parent, node, data_set);
} else {
/* and finally check the defaults */
pe_unpack_versioned_attributes(data_set->input, data_set->rsc_defaults, XML_TAG_ATTR_SETS,
node_hash, meta_hash, data_set->now);
}
}
#endif
static char *
template_op_key(xmlNode * op)
{
const char *name = crm_element_value(op, "name");
const char *role = crm_element_value(op, "role");
char *key = NULL;
if (role == NULL || crm_str_eq(role, RSC_ROLE_STARTED_S, TRUE)
|| crm_str_eq(role, RSC_ROLE_SLAVE_S, TRUE)) {
role = RSC_ROLE_UNKNOWN_S;
}
key = crm_concat(name, role, '-');
return key;
}
static gboolean
unpack_template(xmlNode * xml_obj, xmlNode ** expanded_xml, pe_working_set_t * data_set)
{
xmlNode *cib_resources = NULL;
xmlNode *template = NULL;
xmlNode *new_xml = NULL;
xmlNode *child_xml = NULL;
xmlNode *rsc_ops = NULL;
xmlNode *template_ops = NULL;
const char *template_ref = NULL;
const char *clone = NULL;
const char *id = NULL;
if (xml_obj == NULL) {
pe_err("No resource object for template unpacking");
return FALSE;
}
template_ref = crm_element_value(xml_obj, XML_CIB_TAG_RSC_TEMPLATE);
if (template_ref == NULL) {
return TRUE;
}
id = ID(xml_obj);
if (id == NULL) {
pe_err("'%s' object must have a id", crm_element_name(xml_obj));
return FALSE;
}
if (crm_str_eq(template_ref, id, TRUE)) {
pe_err("The resource object '%s' should not reference itself", id);
return FALSE;
}
cib_resources = get_xpath_object("//"XML_CIB_TAG_RESOURCES, data_set->input, LOG_TRACE);
if (cib_resources == NULL) {
pe_err("No resources configured");
return FALSE;
}
template = find_entity(cib_resources, XML_CIB_TAG_RSC_TEMPLATE, template_ref);
if (template == NULL) {
pe_err("No template named '%s'", template_ref);
return FALSE;
}
new_xml = copy_xml(template);
xmlNodeSetName(new_xml, xml_obj->name);
crm_xml_replace(new_xml, XML_ATTR_ID, id);
clone = crm_element_value(xml_obj, XML_RSC_ATTR_INCARNATION);
if(clone) {
crm_xml_add(new_xml, XML_RSC_ATTR_INCARNATION, clone);
}
template_ops = find_xml_node(new_xml, "operations", FALSE);
for (child_xml = __xml_first_child(xml_obj); child_xml != NULL;
child_xml = __xml_next_element(child_xml)) {
xmlNode *new_child = NULL;
new_child = add_node_copy(new_xml, child_xml);
if (crm_str_eq((const char *)new_child->name, "operations", TRUE)) {
rsc_ops = new_child;
}
}
if (template_ops && rsc_ops) {
xmlNode *op = NULL;
GHashTable *rsc_ops_hash =
g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, NULL);
for (op = __xml_first_child(rsc_ops); op != NULL; op = __xml_next_element(op)) {
char *key = template_op_key(op);
g_hash_table_insert(rsc_ops_hash, key, op);
}
for (op = __xml_first_child(template_ops); op != NULL; op = __xml_next_element(op)) {
char *key = template_op_key(op);
if (g_hash_table_lookup(rsc_ops_hash, key) == NULL) {
add_node_copy(rsc_ops, op);
}
free(key);
}
if (rsc_ops_hash) {
g_hash_table_destroy(rsc_ops_hash);
}
free_xml(template_ops);
}
/*free_xml(*expanded_xml); */
*expanded_xml = new_xml;
/* Disable multi-level templates for now */
/*if(unpack_template(new_xml, expanded_xml, data_set) == FALSE) {
free_xml(*expanded_xml);
*expanded_xml = NULL;
return FALSE;
} */
return TRUE;
}
static gboolean
add_template_rsc(xmlNode * xml_obj, pe_working_set_t * data_set)
{
const char *template_ref = NULL;
const char *id = NULL;
if (xml_obj == NULL) {
pe_err("No resource object for processing resource list of template");
return FALSE;
}
template_ref = crm_element_value(xml_obj, XML_CIB_TAG_RSC_TEMPLATE);
if (template_ref == NULL) {
return TRUE;
}
id = ID(xml_obj);
if (id == NULL) {
pe_err("'%s' object must have a id", crm_element_name(xml_obj));
return FALSE;
}
if (crm_str_eq(template_ref, id, TRUE)) {
pe_err("The resource object '%s' should not reference itself", id);
return FALSE;
}
if (add_tag_ref(data_set->template_rsc_sets, template_ref, id) == FALSE) {
return FALSE;
}
return TRUE;
}
static void
handle_rsc_isolation(resource_t *rsc)
{
resource_t *top = uber_parent(rsc);
resource_t *iso = rsc;
const char *wrapper = NULL;
const char *value;
/* check for isolation wrapper mapping if the parent doesn't have one set
* isolation mapping is enabled by default. For safety, we are allowing isolation
* to be disabled by setting the meta attr, isolation=false. */
value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_ISOLATION);
if (top->isolation_wrapper == NULL && (value == NULL || crm_is_true(value))) {
if (g_hash_table_lookup(rsc->parameters, "pcmk_docker_image")) {
wrapper = "docker-wrapper";
}
/* add more isolation technologies here as we expand */
} else if (top->isolation_wrapper) {
goto set_rsc_opts;
}
if (wrapper == NULL) {
return;
}
/* if this is a cloned primitive/group, go head and set the isolation wrapper at
* at the clone level. this is really the only sane thing to do in this situation.
* This allows someone to clone an isolated resource without having to shuffle
* around the isolation attributes to the clone parent */
if (top == rsc->parent && pe_rsc_is_clone(top)) {
iso = top;
}
iso->isolation_wrapper = wrapper;
set_bit(top->flags, pe_rsc_unique);
set_rsc_opts:
pe_warn_once(pe_wo_isolation, "Support for 'isolation' resource meta-attribute"
" is deprecated and will be removed in a future release"
" (use bundle syntax instead)");
clear_bit(rsc->flags, pe_rsc_allow_migrate);
set_bit(rsc->flags, pe_rsc_unique);
if (pe_rsc_is_clone(top)) {
add_hash_param(rsc->meta, XML_RSC_ATTR_UNIQUE, XML_BOOLEAN_TRUE);
}
}
static void
check_deprecated_stonith(resource_t *rsc)
{
GHashTableIter iter;
char *key;
g_hash_table_iter_init(&iter, rsc->parameters);
while (g_hash_table_iter_next(&iter, (gpointer *) &key, NULL)) {
- if (!strncmp(key, "pcmk_", 5)) {
+ if (crm_starts_with(key, "pcmk_")) {
char *cmp = key + 5; // the part after "pcmk_"
if (!strcmp(cmp, "poweroff_action")) {
pe_warn_once(pe_wo_poweroff,
"Support for the 'pcmk_poweroff_action' stonith resource parameter"
" is deprecated and will be removed in a future version"
" (use 'pcmk_off_action' instead)");
} else if (!strcmp(cmp, "arg_map")) {
pe_warn_once(pe_wo_arg_map,
"Support for the 'pcmk_arg_map' stonith resource parameter"
" is deprecated and will be removed in a future version"
" (use 'pcmk_host_argument' instead)");
} else if (crm_ends_with(cmp, "_cmd")) {
pe_warn_once(pe_wo_stonith_cmd,
"Support for the 'pcmk_*_cmd' stonith resource parameters"
" is deprecated and will be removed in a future version"
" (use 'pcmk_*_action' instead)");
}
}
}
}
gboolean
common_unpack(xmlNode * xml_obj, resource_t ** rsc,
resource_t * parent, pe_working_set_t * data_set)
{
bool isdefault = FALSE;
xmlNode *expanded_xml = NULL;
xmlNode *ops = NULL;
resource_t *top = NULL;
const char *value = NULL;
const char *rclass = NULL; /* Look for this after any templates have been expanded */
const char *id = crm_element_value(xml_obj, XML_ATTR_ID);
int container_remote_node = 0;
int baremetal_remote_node = 0;
bool has_versioned_params = FALSE;
crm_log_xml_trace(xml_obj, "Processing resource input...");
if (id == NULL) {
pe_err("Must specify id tag in <resource>");
return FALSE;
} else if (rsc == NULL) {
pe_err("Nowhere to unpack resource into");
return FALSE;
}
if (unpack_template(xml_obj, &expanded_xml, data_set) == FALSE) {
return FALSE;
}
*rsc = calloc(1, sizeof(resource_t));
(*rsc)->cluster = data_set;
if (expanded_xml) {
crm_log_xml_trace(expanded_xml, "Expanded resource...");
(*rsc)->xml = expanded_xml;
(*rsc)->orig_xml = xml_obj;
} else {
(*rsc)->xml = xml_obj;
(*rsc)->orig_xml = NULL;
}
/* Do not use xml_obj from here on, use (*rsc)->xml in case templates are involved */
rclass = crm_element_value((*rsc)->xml, XML_AGENT_ATTR_CLASS);
(*rsc)->parent = parent;
ops = find_xml_node((*rsc)->xml, "operations", FALSE);
(*rsc)->ops_xml = expand_idref(ops, data_set->input);
(*rsc)->variant = get_resource_type(crm_element_name((*rsc)->xml));
if ((*rsc)->variant == pe_unknown) {
pe_err("Unknown resource type: %s", crm_element_name((*rsc)->xml));
free(*rsc);
return FALSE;
}
(*rsc)->parameters = crm_str_table_new();
#if ENABLE_VERSIONED_ATTRS
(*rsc)->versioned_parameters = create_xml_node(NULL, XML_TAG_RSC_VER_ATTRS);
#endif
(*rsc)->meta = crm_str_table_new();
(*rsc)->allowed_nodes =
g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, g_hash_destroy_str);
(*rsc)->known_on = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, g_hash_destroy_str);
value = crm_element_value((*rsc)->xml, XML_RSC_ATTR_INCARNATION);
if (value) {
(*rsc)->id = crm_concat(id, value, ':');
add_hash_param((*rsc)->meta, XML_RSC_ATTR_INCARNATION, value);
} else {
(*rsc)->id = strdup(id);
}
(*rsc)->fns = &resource_class_functions[(*rsc)->variant];
pe_rsc_trace((*rsc), "Unpacking resource...");
get_meta_attributes((*rsc)->meta, *rsc, NULL, data_set);
get_rsc_attributes((*rsc)->parameters, *rsc, NULL, data_set);
#if ENABLE_VERSIONED_ATTRS
pe_get_versioned_attributes((*rsc)->versioned_parameters, *rsc, NULL, data_set);
#endif
(*rsc)->flags = 0;
set_bit((*rsc)->flags, pe_rsc_runnable);
set_bit((*rsc)->flags, pe_rsc_provisional);
if (is_set(data_set->flags, pe_flag_is_managed_default)) {
set_bit((*rsc)->flags, pe_rsc_managed);
}
(*rsc)->rsc_cons = NULL;
(*rsc)->rsc_tickets = NULL;
(*rsc)->actions = NULL;
(*rsc)->role = RSC_ROLE_STOPPED;
(*rsc)->next_role = RSC_ROLE_UNKNOWN;
(*rsc)->recovery_type = recovery_stop_start;
(*rsc)->stickiness = data_set->default_resource_stickiness;
(*rsc)->migration_threshold = INFINITY;
(*rsc)->failure_timeout = 0;
value = g_hash_table_lookup((*rsc)->meta, XML_CIB_ATTR_PRIORITY);
(*rsc)->priority = crm_parse_int(value, "0");
(*rsc)->effective_priority = (*rsc)->priority;
value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_NOTIFY);
if (crm_is_true(value)) {
set_bit((*rsc)->flags, pe_rsc_notify);
}
if (xml_contains_remote_node((*rsc)->xml)) {
(*rsc)->is_remote_node = TRUE;
if (g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_CONTAINER)) {
container_remote_node = 1;
} else {
baremetal_remote_node = 1;
}
}
value = g_hash_table_lookup((*rsc)->meta, XML_OP_ATTR_ALLOW_MIGRATE);
#if ENABLE_VERSIONED_ATTRS
has_versioned_params = xml_has_children((*rsc)->versioned_parameters);
#endif
if (crm_is_true(value) && has_versioned_params) {
pe_rsc_trace((*rsc), "Migration is disabled for resources with versioned parameters");
} else if (crm_is_true(value)) {
set_bit((*rsc)->flags, pe_rsc_allow_migrate);
} else if ((value == NULL) && baremetal_remote_node && !has_versioned_params) {
/* by default, we want baremetal remote-nodes to be able
* to float around the cluster without having to stop all the
* resources within the remote-node before moving. Allowing
* migration support enables this feature. If this ever causes
* problems, migration support can be explicitly turned off with
* allow-migrate=false.
* We don't support migration for versioned resources, though. */
set_bit((*rsc)->flags, pe_rsc_allow_migrate);
}
value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_MANAGED);
if (value != NULL && safe_str_neq("default", value)) {
gboolean bool_value = TRUE;
crm_str_to_boolean(value, &bool_value);
if (bool_value == FALSE) {
clear_bit((*rsc)->flags, pe_rsc_managed);
} else {
set_bit((*rsc)->flags, pe_rsc_managed);
}
}
value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_MAINTENANCE);
if (value != NULL && safe_str_neq("default", value)) {
gboolean bool_value = FALSE;
crm_str_to_boolean(value, &bool_value);
if (bool_value == TRUE) {
clear_bit((*rsc)->flags, pe_rsc_managed);
set_bit((*rsc)->flags, pe_rsc_maintenance);
}
} else if (is_set(data_set->flags, pe_flag_maintenance_mode)) {
clear_bit((*rsc)->flags, pe_rsc_managed);
set_bit((*rsc)->flags, pe_rsc_maintenance);
}
pe_rsc_trace((*rsc), "Options for %s", (*rsc)->id);
handle_rsc_isolation(*rsc);
top = uber_parent(*rsc);
value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_UNIQUE);
if (crm_is_true(value) || pe_rsc_is_clone(top) == FALSE) {
set_bit((*rsc)->flags, pe_rsc_unique);
}
value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_RESTART);
if (safe_str_eq(value, "restart")) {
(*rsc)->restart_type = pe_restart_restart;
pe_rsc_trace((*rsc), "\tDependency restart handling: restart");
} else {
(*rsc)->restart_type = pe_restart_ignore;
pe_rsc_trace((*rsc), "\tDependency restart handling: ignore");
}
value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_MULTIPLE);
if (safe_str_eq(value, "stop_only")) {
(*rsc)->recovery_type = recovery_stop_only;
pe_rsc_trace((*rsc), "\tMultiple running resource recovery: stop only");
} else if (safe_str_eq(value, "block")) {
(*rsc)->recovery_type = recovery_block;
pe_rsc_trace((*rsc), "\tMultiple running resource recovery: block");
} else {
(*rsc)->recovery_type = recovery_stop_start;
pe_rsc_trace((*rsc), "\tMultiple running resource recovery: stop/start");
}
value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_STICKINESS);
if (value != NULL && safe_str_neq("default", value)) {
(*rsc)->stickiness = char2score(value);
}
value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_FAIL_STICKINESS);
if (value != NULL && safe_str_neq("default", value)) {
(*rsc)->migration_threshold = char2score(value);
} else if (value == NULL) {
/* Make a best-effort guess at a migration threshold for people with 0.6 configs
* try with underscores and hyphens, from both the resource and global defaults section
*/
const char *legacy = NULL;
legacy = g_hash_table_lookup((*rsc)->meta,
"resource-failure-stickiness");
if (legacy == NULL) {
legacy = g_hash_table_lookup((*rsc)->meta,
"resource_failure_stickiness");
}
if (legacy) {
value = legacy;
pe_warn_once(pe_wo_rsc_failstick,
"Support for 'resource-failure-stickiness' resource meta-attribute"
" is deprecated and will be removed in a future release"
" (use 'migration-threshold' resource meta-attribute instead)");
}
legacy = g_hash_table_lookup(data_set->config_hash,
"default-resource-failure-stickiness");
if (legacy == NULL) {
legacy = g_hash_table_lookup(data_set->config_hash,
"default_resource_failure_stickiness");
}
if (legacy) {
if (value == NULL) {
value = legacy;
}
pe_warn_once(pe_wo_default_rscfs,
"Support for 'default-resource-failure-stickiness' cluster option"
" is deprecated and will be removed in a future release"
" (use 'migration-threshold' in rsc_defaults instead)");
}
if (value) {
int fail_sticky = char2score(value);
if (fail_sticky == -INFINITY) {
(*rsc)->migration_threshold = 1;
pe_rsc_info((*rsc),
"Set a migration threshold of %d for %s based on a failure-stickiness of %s",
(*rsc)->migration_threshold, (*rsc)->id, value);
} else if ((*rsc)->stickiness != 0 && fail_sticky != 0) {
(*rsc)->migration_threshold = (*rsc)->stickiness / fail_sticky;
if ((*rsc)->migration_threshold < 0) {
/* Make sure it's positive */
(*rsc)->migration_threshold = 0 - (*rsc)->migration_threshold;
}
(*rsc)->migration_threshold += 1;
pe_rsc_info((*rsc),
"Calculated a migration threshold for %s of %d based on a stickiness of %d/%s",
(*rsc)->id, (*rsc)->migration_threshold, (*rsc)->stickiness, value);
}
}
}
if (safe_str_eq(rclass, PCMK_RESOURCE_CLASS_STONITH)) {
set_bit(data_set->flags, pe_flag_have_stonith_resource);
set_bit((*rsc)->flags, pe_rsc_fence_device);
check_deprecated_stonith(*rsc);
}
value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_REQUIRES);
handle_requires_pref:
if (safe_str_eq(value, "nothing")) {
} else if (safe_str_eq(value, "quorum")) {
set_bit((*rsc)->flags, pe_rsc_needs_quorum);
} else if (safe_str_eq(value, "unfencing")) {
if (is_set((*rsc)->flags, pe_rsc_fence_device)) {
crm_config_warn("%s is a fencing device but requires (un)fencing", (*rsc)->id);
value = "quorum";
isdefault = TRUE;
goto handle_requires_pref;
} else if (is_not_set(data_set->flags, pe_flag_stonith_enabled)) {
crm_config_warn("%s requires (un)fencing but fencing is disabled", (*rsc)->id);
value = "quorum";
isdefault = TRUE;
goto handle_requires_pref;
} else {
set_bit((*rsc)->flags, pe_rsc_needs_fencing);
set_bit((*rsc)->flags, pe_rsc_needs_unfencing);
}
} else if (safe_str_eq(value, "fencing")) {
set_bit((*rsc)->flags, pe_rsc_needs_fencing);
if (is_not_set(data_set->flags, pe_flag_stonith_enabled)) {
crm_config_warn("%s requires fencing but fencing is disabled", (*rsc)->id);
}
} else {
if (value) {
crm_config_err("Invalid value for %s->requires: %s%s",
(*rsc)->id, value,
is_set(data_set->flags, pe_flag_stonith_enabled) ? "" : " (stonith-enabled=false)");
}
isdefault = TRUE;
if(is_set((*rsc)->flags, pe_rsc_fence_device)) {
value = "quorum";
} else if (is_set(data_set->flags, pe_flag_enable_unfencing)) {
value = "unfencing";
} else if (is_set(data_set->flags, pe_flag_stonith_enabled)) {
value = "fencing";
} else if (data_set->no_quorum_policy == no_quorum_ignore) {
value = "nothing";
} else {
value = "quorum";
}
goto handle_requires_pref;
}
pe_rsc_trace((*rsc), "\tRequired to start: %s%s", value, isdefault?" (default)":"");
value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_FAIL_TIMEOUT);
if (value != NULL) {
/* call crm_get_msec() and convert back to seconds */
(*rsc)->failure_timeout = (crm_get_msec(value) / 1000);
}
if (baremetal_remote_node) {
value = g_hash_table_lookup((*rsc)->parameters, XML_REMOTE_ATTR_RECONNECT_INTERVAL);
if (value) {
/* reconnect delay works by setting failure_timeout and preventing the
* connection from starting until the failure is cleared. */
(*rsc)->remote_reconnect_interval = (crm_get_msec(value) / 1000);
/* we want to override any default failure_timeout in use when remote
* reconnect_interval is in use. */
(*rsc)->failure_timeout = (*rsc)->remote_reconnect_interval;
}
}
get_target_role(*rsc, &((*rsc)->next_role));
pe_rsc_trace((*rsc), "\tDesired next state: %s",
(*rsc)->next_role != RSC_ROLE_UNKNOWN ? role2text((*rsc)->next_role) : "default");
if ((*rsc)->fns->unpack(*rsc, data_set) == FALSE) {
return FALSE;
}
if (is_set(data_set->flags, pe_flag_symmetric_cluster)) {
resource_location(*rsc, NULL, 0, "symmetric_default", data_set);
} else if (container_remote_node) {
/* remote resources tied to a container resource must always be allowed
* to opt-in to the cluster. Whether the connection resource is actually
* allowed to be placed on a node is dependent on the container resource */
resource_location(*rsc, NULL, 0, "remote_connection_default", data_set);
}
pe_rsc_trace((*rsc), "\tAction notification: %s",
is_set((*rsc)->flags, pe_rsc_notify) ? "required" : "not required");
(*rsc)->utilization = crm_str_table_new();
unpack_instance_attributes(data_set->input, (*rsc)->xml, XML_TAG_UTILIZATION, NULL,
(*rsc)->utilization, NULL, FALSE, data_set->now);
/* data_set->resources = g_list_append(data_set->resources, (*rsc)); */
if (expanded_xml) {
if (add_template_rsc(xml_obj, data_set) == FALSE) {
return FALSE;
}
}
return TRUE;
}
void
common_update_score(resource_t * rsc, const char *id, int score)
{
node_t *node = NULL;
node = pe_hash_table_lookup(rsc->allowed_nodes, id);
if (node != NULL) {
pe_rsc_trace(rsc, "Updating score for %s on %s: %d + %d", rsc->id, id, node->weight, score);
node->weight = merge_weights(node->weight, score);
}
if (rsc->children) {
GListPtr gIter = rsc->children;
for (; gIter != NULL; gIter = gIter->next) {
resource_t *child_rsc = (resource_t *) gIter->data;
common_update_score(child_rsc, id, score);
}
}
}
gboolean
is_parent(resource_t *child, resource_t *rsc)
{
resource_t *parent = child;
if (parent == NULL || rsc == NULL) {
return FALSE;
}
while (parent->parent != NULL) {
if (parent->parent == rsc) {
return TRUE;
}
parent = parent->parent;
}
return FALSE;
}
resource_t *
uber_parent(resource_t * rsc)
{
resource_t *parent = rsc;
if (parent == NULL) {
return NULL;
}
while (parent->parent != NULL && parent->parent->variant != pe_container) {
parent = parent->parent;
}
return parent;
}
void
common_free(resource_t * rsc)
{
if (rsc == NULL) {
return;
}
pe_rsc_trace(rsc, "Freeing %s %d", rsc->id, rsc->variant);
g_list_free(rsc->rsc_cons);
g_list_free(rsc->rsc_cons_lhs);
g_list_free(rsc->rsc_tickets);
g_list_free(rsc->dangling_migrations);
if (rsc->parameters != NULL) {
g_hash_table_destroy(rsc->parameters);
}
#if ENABLE_VERSIONED_ATTRS
if (rsc->versioned_parameters != NULL) {
free_xml(rsc->versioned_parameters);
}
#endif
if (rsc->meta != NULL) {
g_hash_table_destroy(rsc->meta);
}
if (rsc->utilization != NULL) {
g_hash_table_destroy(rsc->utilization);
}
if (rsc->parent == NULL && is_set(rsc->flags, pe_rsc_orphan)) {
free_xml(rsc->xml);
rsc->xml = NULL;
free_xml(rsc->orig_xml);
rsc->orig_xml = NULL;
/* if rsc->orig_xml, then rsc->xml is an expanded xml from a template */
} else if (rsc->orig_xml) {
free_xml(rsc->xml);
rsc->xml = NULL;
}
if (rsc->running_on) {
g_list_free(rsc->running_on);
rsc->running_on = NULL;
}
if (rsc->known_on) {
g_hash_table_destroy(rsc->known_on);
rsc->known_on = NULL;
}
if (rsc->actions) {
g_list_free(rsc->actions);
rsc->actions = NULL;
}
if (rsc->allowed_nodes) {
g_hash_table_destroy(rsc->allowed_nodes);
rsc->allowed_nodes = NULL;
}
g_list_free(rsc->fillers);
g_list_free(rsc->rsc_location);
pe_rsc_trace(rsc, "Resource freed");
free(rsc->id);
free(rsc->clone_name);
free(rsc->allocated_to);
free(rsc->variant_opaque);
free(rsc->pending_task);
free(rsc);
}
diff --git a/lib/pengine/container.c b/lib/pengine/container.c
index 0a524896d5..4cc4d7a47a 100644
--- a/lib/pengine/container.c
+++ b/lib/pengine/container.c
@@ -1,1346 +1,1344 @@
/*
* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <crm_internal.h>
#include <ctype.h>
#include <crm/pengine/rules.h>
#include <crm/pengine/status.h>
#include <crm/pengine/internal.h>
#include <unpack.h>
#include <crm/msg_xml.h>
#define VARIANT_CONTAINER 1
#include "./variant.h"
void tuple_free(container_grouping_t *tuple);
static char *
next_ip(const char *last_ip)
{
unsigned int oct1 = 0;
unsigned int oct2 = 0;
unsigned int oct3 = 0;
unsigned int oct4 = 0;
int rc = sscanf(last_ip, "%u.%u.%u.%u", &oct1, &oct2, &oct3, &oct4);
if (rc != 4) {
/*@ TODO check for IPv6 */
return NULL;
} else if (oct3 > 253) {
return NULL;
} else if (oct4 > 253) {
++oct3;
oct4 = 1;
} else {
++oct4;
}
return crm_strdup_printf("%u.%u.%u.%u", oct1, oct2, oct3, oct4);
}
static int
allocate_ip(container_variant_data_t *data, container_grouping_t *tuple, char *buffer, int max)
{
if(data->ip_range_start == NULL) {
return 0;
} else if(data->ip_last) {
tuple->ipaddr = next_ip(data->ip_last);
} else {
tuple->ipaddr = strdup(data->ip_range_start);
}
data->ip_last = tuple->ipaddr;
#if 0
return snprintf(buffer, max, " --add-host=%s-%d:%s --link %s-docker-%d:%s-link-%d",
data->prefix, tuple->offset, tuple->ipaddr,
data->prefix, tuple->offset, data->prefix, tuple->offset);
#else
if (data->type == PE_CONTAINER_TYPE_DOCKER) {
return snprintf(buffer, max, " --add-host=%s-%d:%s",
data->prefix, tuple->offset, tuple->ipaddr);
} else if (data->type == PE_CONTAINER_TYPE_RKT) {
return snprintf(buffer, max, " --hosts-entry=%s=%s-%d",
tuple->ipaddr, data->prefix, tuple->offset);
} else {
return 0;
}
#endif
}
static xmlNode *
create_resource(const char *name, const char *provider, const char *kind)
{
xmlNode *rsc = create_xml_node(NULL, XML_CIB_TAG_RESOURCE);
crm_xml_add(rsc, XML_ATTR_ID, name);
crm_xml_add(rsc, XML_AGENT_ATTR_CLASS, PCMK_RESOURCE_CLASS_OCF);
crm_xml_add(rsc, XML_AGENT_ATTR_PROVIDER, provider);
crm_xml_add(rsc, XML_ATTR_TYPE, kind);
return rsc;
}
/*!
* \internal
* \brief Check whether cluster can manage resource inside container
*
* \param[in] data Container variant data
*
* \return TRUE if networking configuration is acceptable, FALSE otherwise
*
* \note The resource is manageable if an IP range or control port has been
* specified. If a control port is used without an IP range, replicas per
* host must be 1.
*/
static bool
valid_network(container_variant_data_t *data)
{
if(data->ip_range_start) {
return TRUE;
}
if(data->control_port) {
if(data->replicas_per_host > 1) {
pe_err("Specifying the 'control-port' for %s requires 'replicas-per-host=1'", data->prefix);
data->replicas_per_host = 1;
/* @TODO to be sure: clear_bit(rsc->flags, pe_rsc_unique); */
}
return TRUE;
}
return FALSE;
}
static bool
create_ip_resource(
resource_t *parent, container_variant_data_t *data, container_grouping_t *tuple,
pe_working_set_t * data_set)
{
if(data->ip_range_start) {
char *id = NULL;
xmlNode *xml_ip = NULL;
xmlNode *xml_obj = NULL;
id = crm_strdup_printf("%s-ip-%s", data->prefix, tuple->ipaddr);
crm_xml_sanitize_id(id);
xml_ip = create_resource(id, "heartbeat", "IPaddr2");
free(id);
xml_obj = create_xml_node(xml_ip, XML_TAG_ATTR_SETS);
crm_xml_set_id(xml_obj, "%s-attributes-%d", data->prefix, tuple->offset);
crm_create_nvpair_xml(xml_obj, NULL, "ip", tuple->ipaddr);
if(data->host_network) {
crm_create_nvpair_xml(xml_obj, NULL, "nic", data->host_network);
}
if(data->host_netmask) {
crm_create_nvpair_xml(xml_obj, NULL,
"cidr_netmask", data->host_netmask);
} else {
crm_create_nvpair_xml(xml_obj, NULL, "cidr_netmask", "32");
}
xml_obj = create_xml_node(xml_ip, "operations");
crm_create_op_xml(xml_obj, ID(xml_ip), "monitor", "60s", NULL);
// TODO: Other ops? Timeouts and intervals from underlying resource?
if (common_unpack(xml_ip, &tuple->ip, parent, data_set) == false) {
return FALSE;
}
parent->children = g_list_append(parent->children, tuple->ip);
}
return TRUE;
}
static bool
create_docker_resource(
resource_t *parent, container_variant_data_t *data, container_grouping_t *tuple,
pe_working_set_t * data_set)
{
int offset = 0, max = 4096;
char *buffer = calloc(1, max+1);
int doffset = 0, dmax = 1024;
char *dbuffer = calloc(1, dmax+1);
char *id = NULL;
xmlNode *xml_docker = NULL;
xmlNode *xml_obj = NULL;
id = crm_strdup_printf("%s-docker-%d", data->prefix, tuple->offset);
crm_xml_sanitize_id(id);
xml_docker = create_resource(id, "heartbeat", "docker");
free(id);
xml_obj = create_xml_node(xml_docker, XML_TAG_ATTR_SETS);
crm_xml_set_id(xml_obj, "%s-attributes-%d", data->prefix, tuple->offset);
crm_create_nvpair_xml(xml_obj, NULL, "image", data->image);
crm_create_nvpair_xml(xml_obj, NULL, "allow_pull", XML_BOOLEAN_TRUE);
crm_create_nvpair_xml(xml_obj, NULL, "force_kill", XML_BOOLEAN_FALSE);
crm_create_nvpair_xml(xml_obj, NULL, "reuse", XML_BOOLEAN_FALSE);
offset += snprintf(buffer+offset, max-offset, " --restart=no");
/* Set a container hostname only if we have an IP to map it to.
* The user can set -h or --uts=host themselves if they want a nicer
* name for logs, but this makes applications happy who need their
* hostname to match the IP they bind to.
*/
if (data->ip_range_start != NULL) {
offset += snprintf(buffer+offset, max-offset, " -h %s-%d",
data->prefix, tuple->offset);
}
if(data->docker_network) {
// offset += snprintf(buffer+offset, max-offset, " --link-local-ip=%s", tuple->ipaddr);
offset += snprintf(buffer+offset, max-offset, " --net=%s", data->docker_network);
}
if(data->control_port) {
offset += snprintf(buffer+offset, max-offset, " -e PCMK_remote_port=%s", data->control_port);
} else {
offset += snprintf(buffer+offset, max-offset, " -e PCMK_remote_port=%d", DEFAULT_REMOTE_PORT);
}
for(GListPtr pIter = data->mounts; pIter != NULL; pIter = pIter->next) {
container_mount_t *mount = pIter->data;
if(mount->flags) {
char *source = crm_strdup_printf(
"%s/%s-%d", mount->source, data->prefix, tuple->offset);
if(doffset > 0) {
doffset += snprintf(dbuffer+doffset, dmax-doffset, ",");
}
doffset += snprintf(dbuffer+doffset, dmax-doffset, "%s", source);
offset += snprintf(buffer+offset, max-offset, " -v %s:%s", source, mount->target);
free(source);
} else {
offset += snprintf(buffer+offset, max-offset, " -v %s:%s", mount->source, mount->target);
}
if(mount->options) {
offset += snprintf(buffer+offset, max-offset, ":%s", mount->options);
}
}
for(GListPtr pIter = data->ports; pIter != NULL; pIter = pIter->next) {
container_port_t *port = pIter->data;
if(tuple->ipaddr) {
offset += snprintf(buffer+offset, max-offset, " -p %s:%s:%s",
tuple->ipaddr, port->source, port->target);
} else {
offset += snprintf(buffer+offset, max-offset, " -p %s:%s", port->source, port->target);
}
}
if(data->docker_run_options) {
offset += snprintf(buffer+offset, max-offset, " %s", data->docker_run_options);
}
if(data->docker_host_options) {
offset += snprintf(buffer+offset, max-offset, " %s", data->docker_host_options);
}
crm_create_nvpair_xml(xml_obj, NULL, "run_opts", buffer);
free(buffer);
crm_create_nvpair_xml(xml_obj, NULL, "mount_points", dbuffer);
free(dbuffer);
if(tuple->child) {
if(data->docker_run_command) {
crm_create_nvpair_xml(xml_obj, NULL,
"run_cmd", data->docker_run_command);
} else {
crm_create_nvpair_xml(xml_obj, NULL,
"run_cmd", SBIN_DIR "/pacemaker_remoted");
}
/* TODO: Allow users to specify their own?
*
* We just want to know if the container is alive, we'll
* monitor the child independently
*/
crm_create_nvpair_xml(xml_obj, NULL, "monitor_cmd", "/bin/true");
/* } else if(child && data->untrusted) {
* Support this use-case?
*
* The ability to have resources started/stopped by us, but
* unable to set attributes, etc.
*
* Arguably better to control API access this with ACLs like
* "normal" remote nodes
*
* crm_create_nvpair_xml(xml_obj, NULL,
* "run_cmd", "/usr/libexec/pacemaker/lrmd");
* crm_create_nvpair_xml(xml_obj, NULL, "monitor_cmd",
* "/usr/libexec/pacemaker/lrmd_internal_ctl -c poke");
*/
} else {
if(data->docker_run_command) {
crm_create_nvpair_xml(xml_obj, NULL,
"run_cmd", data->docker_run_command);
}
/* TODO: Allow users to specify their own?
*
* We don't know what's in the container, so we just want
* to know if it is alive
*/
crm_create_nvpair_xml(xml_obj, NULL, "monitor_cmd", "/bin/true");
}
xml_obj = create_xml_node(xml_docker, "operations");
crm_create_op_xml(xml_obj, ID(xml_docker), "monitor", "60s", NULL);
// TODO: Other ops? Timeouts and intervals from underlying resource?
if (common_unpack(xml_docker, &tuple->docker, parent, data_set) == FALSE) {
return FALSE;
}
parent->children = g_list_append(parent->children, tuple->docker);
return TRUE;
}
static bool
create_rkt_resource(
resource_t *parent, container_variant_data_t *data, container_grouping_t *tuple,
pe_working_set_t * data_set)
{
int offset = 0, max = 4096;
char *buffer = calloc(1, max+1);
int doffset = 0, dmax = 1024;
char *dbuffer = calloc(1, dmax+1);
char *id = NULL;
xmlNode *xml_docker = NULL;
xmlNode *xml_obj = NULL;
int volid = 0;
id = crm_strdup_printf("%s-rkt-%d", data->prefix, tuple->offset);
crm_xml_sanitize_id(id);
xml_docker = create_resource(id, "heartbeat", "rkt");
free(id);
xml_obj = create_xml_node(xml_docker, XML_TAG_ATTR_SETS);
crm_xml_set_id(xml_obj, "%s-attributes-%d", data->prefix, tuple->offset);
crm_create_nvpair_xml(xml_obj, NULL, "image", data->image);
crm_create_nvpair_xml(xml_obj, NULL, "allow_pull", "true");
crm_create_nvpair_xml(xml_obj, NULL, "force_kill", "false");
crm_create_nvpair_xml(xml_obj, NULL, "reuse", "false");
/* Set a container hostname only if we have an IP to map it to.
* The user can set -h or --uts=host themselves if they want a nicer
* name for logs, but this makes applications happy who need their
* hostname to match the IP they bind to.
*/
if (data->ip_range_start != NULL) {
offset += snprintf(buffer+offset, max-offset, " --hostname=%s-%d",
data->prefix, tuple->offset);
}
if(data->docker_network) {
// offset += snprintf(buffer+offset, max-offset, " --link-local-ip=%s", tuple->ipaddr);
offset += snprintf(buffer+offset, max-offset, " --net=%s", data->docker_network);
}
if(data->control_port) {
offset += snprintf(buffer+offset, max-offset, " --environment=PCMK_remote_port=%s", data->control_port);
} else {
offset += snprintf(buffer+offset, max-offset, " --environment=PCMK_remote_port=%d", DEFAULT_REMOTE_PORT);
}
for(GListPtr pIter = data->mounts; pIter != NULL; pIter = pIter->next) {
container_mount_t *mount = pIter->data;
if(mount->flags) {
char *source = crm_strdup_printf(
"%s/%s-%d", mount->source, data->prefix, tuple->offset);
if(doffset > 0) {
doffset += snprintf(dbuffer+doffset, dmax-doffset, ",");
}
doffset += snprintf(dbuffer+doffset, dmax-doffset, "%s", source);
offset += snprintf(buffer+offset, max-offset, " --volume vol%d,kind=host,source=%s", volid, source);
if(mount->options) {
offset += snprintf(buffer+offset, max-offset, ",%s", mount->options);
}
offset += snprintf(buffer+offset, max-offset, " --mount volume=vol%d,target=%s", volid, mount->target);
free(source);
} else {
offset += snprintf(buffer+offset, max-offset, " --volume vol%d,kind=host,source=%s", volid, mount->source);
if(mount->options) {
offset += snprintf(buffer+offset, max-offset, ",%s", mount->options);
}
offset += snprintf(buffer+offset, max-offset, " --mount volume=vol%d,target=%s", volid, mount->target);
}
volid++;
}
for(GListPtr pIter = data->ports; pIter != NULL; pIter = pIter->next) {
container_port_t *port = pIter->data;
if(tuple->ipaddr) {
offset += snprintf(buffer+offset, max-offset, " --port=%s:%s:%s",
port->target, tuple->ipaddr, port->source);
} else {
offset += snprintf(buffer+offset, max-offset, " --port=%s:%s", port->target, port->source);
}
}
if(data->docker_run_options) {
offset += snprintf(buffer+offset, max-offset, " %s", data->docker_run_options);
}
if(data->docker_host_options) {
offset += snprintf(buffer+offset, max-offset, " %s", data->docker_host_options);
}
crm_create_nvpair_xml(xml_obj, NULL, "run_opts", buffer);
free(buffer);
crm_create_nvpair_xml(xml_obj, NULL, "mount_points", dbuffer);
free(dbuffer);
if(tuple->child) {
if(data->docker_run_command) {
crm_create_nvpair_xml(xml_obj, NULL, "run_cmd", data->docker_run_command);
} else {
crm_create_nvpair_xml(xml_obj, NULL, "run_cmd", SBIN_DIR"/pacemaker_remoted");
}
/* TODO: Allow users to specify their own?
*
* We just want to know if the container is alive, we'll
* monitor the child independently
*/
crm_create_nvpair_xml(xml_obj, NULL, "monitor_cmd", "/bin/true");
/* } else if(child && data->untrusted) {
* Support this use-case?
*
* The ability to have resources started/stopped by us, but
* unable to set attributes, etc.
*
* Arguably better to control API access this with ACLs like
* "normal" remote nodes
*
* crm_create_nvpair_xml(xml_obj, NULL,
* "run_cmd", "/usr/libexec/pacemaker/lrmd");
* crm_create_nvpair_xml(xml_obj, NULL, "monitor_cmd",
* "/usr/libexec/pacemaker/lrmd_internal_ctl -c poke");
*/
} else {
if(data->docker_run_command) {
crm_create_nvpair_xml(xml_obj, NULL, "run_cmd",
data->docker_run_command);
}
/* TODO: Allow users to specify their own?
*
* We don't know what's in the container, so we just want
* to know if it is alive
*/
crm_create_nvpair_xml(xml_obj, NULL, "monitor_cmd", "/bin/true");
}
xml_obj = create_xml_node(xml_docker, "operations");
crm_create_op_xml(xml_obj, ID(xml_docker), "monitor", "60s", NULL);
// TODO: Other ops? Timeouts and intervals from underlying resource?
if (common_unpack(xml_docker, &tuple->docker, parent, data_set) == FALSE) {
return FALSE;
}
parent->children = g_list_append(parent->children, tuple->docker);
return TRUE;
}
/*!
* \brief Ban a node from a resource's (and its children's) allowed nodes list
*
* \param[in,out] rsc Resource to modify
* \param[in] uname Name of node to ban
*/
static void
disallow_node(resource_t *rsc, const char *uname)
{
gpointer match = g_hash_table_lookup(rsc->allowed_nodes, uname);
if (match) {
((pe_node_t *) match)->weight = -INFINITY;
((pe_node_t *) match)->rsc_discover_mode = pe_discover_never;
}
if (rsc->children) {
GListPtr child;
for (child = rsc->children; child != NULL; child = child->next) {
disallow_node((resource_t *) (child->data), uname);
}
}
}
static bool
create_remote_resource(
resource_t *parent, container_variant_data_t *data, container_grouping_t *tuple,
pe_working_set_t * data_set)
{
if (tuple->child && valid_network(data)) {
GHashTableIter gIter;
GListPtr rsc_iter = NULL;
node_t *node = NULL;
xmlNode *xml_remote = NULL;
char *id = crm_strdup_printf("%s-%d", data->prefix, tuple->offset);
char *port_s = NULL;
const char *uname = NULL;
const char *connect_name = NULL;
if (remote_id_conflict(id, data_set)) {
free(id);
// The biggest hammer we have
id = crm_strdup_printf("pcmk-internal-%s-remote-%d", tuple->child->id, tuple->offset);
CRM_ASSERT(remote_id_conflict(id, data_set) == FALSE);
}
/* REMOTE_CONTAINER_HACK: Using "#uname" as the server name when the
* connection does not have its own IP is a magic string that we use to
* support nested remotes (i.e. a bundle running on a remote node).
*/
connect_name = (tuple->ipaddr? tuple->ipaddr : "#uname");
if (data->control_port == NULL) {
port_s = crm_itoa(DEFAULT_REMOTE_PORT);
}
/* This sets tuple->docker as tuple->remote's container, which is
* similar to what happens with guest nodes. This is how the PE knows
* that the bundle node is fenced by recovering docker, and that
* remote should be ordered relative to docker.
*/
xml_remote = pe_create_remote_xml(NULL, id, tuple->docker->id,
XML_BOOLEAN_FALSE, NULL, "60s", NULL,
NULL, connect_name,
(data->control_port?
data->control_port : port_s));
free(port_s);
/* Abandon our created ID, and pull the copy from the XML, because we
* need something that will get freed during data set cleanup to use as
* the node ID and uname.
*/
free(id);
id = NULL;
uname = ID(xml_remote);
/* Ensure a node has been created for the guest (it may have already
* been, if it has a permanent node attribute), and ensure its weight is
* -INFINITY so no other resources can run on it.
*/
node = pe_find_node(data_set->nodes, uname);
if (node == NULL) {
node = pe_create_node(uname, uname, "remote", "-INFINITY",
data_set);
} else {
node->weight = -INFINITY;
}
node->rsc_discover_mode = pe_discover_never;
/* unpack_remote_nodes() ensures that each remote node and guest node
* has a pe_node_t entry. Ideally, it would do the same for bundle nodes.
* Unfortunately, a bundle has to be mostly unpacked before it's obvious
* what nodes will be needed, so we do it just above.
*
* Worse, that means that the node may have been utilized while
* unpacking other resources, without our weight correction. The most
* likely place for this to happen is when common_unpack() calls
* resource_location() to set a default score in symmetric clusters.
* This adds a node *copy* to each resource's allowed nodes, and these
* copies will have the wrong weight.
*
* As a hacky workaround, fix those copies here.
*
* @TODO Possible alternative: ensure bundles are unpacked before other
* resources, so the weight is correct before any copies are made.
*/
for (rsc_iter = data_set->resources; rsc_iter; rsc_iter = rsc_iter->next) {
disallow_node((resource_t *) (rsc_iter->data), uname);
}
tuple->node = node_copy(node);
tuple->node->weight = 500;
tuple->node->rsc_discover_mode = pe_discover_exclusive;
/* Ensure the node shows up as allowed and with the correct discovery set */
g_hash_table_insert(tuple->child->allowed_nodes, (gpointer) tuple->node->details->id, node_copy(tuple->node));
if (common_unpack(xml_remote, &tuple->remote, parent, data_set) == FALSE) {
return FALSE;
}
g_hash_table_iter_init(&gIter, tuple->remote->allowed_nodes);
while (g_hash_table_iter_next(&gIter, NULL, (void **)&node)) {
if(is_remote_node(node)) {
/* Remote resources can only run on 'normal' cluster node */
node->weight = -INFINITY;
}
}
tuple->node->details->remote_rsc = tuple->remote;
/* A bundle's #kind is closer to "container" (guest node) than the
* "remote" set by pe_create_node().
*/
g_hash_table_insert(tuple->node->details->attrs,
strdup(CRM_ATTR_KIND), strdup("container"));
/* One effect of this is that setup_container() will add
* tuple->remote to tuple->docker's fillers, which will make
* rsc_contains_remote_node() true for tuple->docker.
*
* tuple->child does NOT get added to tuple->docker's fillers.
* The only noticeable effect if it did would be for its fail count to
* be taken into account when checking tuple->docker's migration
* threshold.
*/
parent->children = g_list_append(parent->children, tuple->remote);
}
return TRUE;
}
static bool
create_container(
resource_t *parent, container_variant_data_t *data, container_grouping_t *tuple,
pe_working_set_t * data_set)
{
if (data->type == PE_CONTAINER_TYPE_DOCKER &&
create_docker_resource(parent, data, tuple, data_set) == FALSE) {
return FALSE;
}
if (data->type == PE_CONTAINER_TYPE_RKT &&
create_rkt_resource(parent, data, tuple, data_set) == FALSE) {
return FALSE;
}
if(create_ip_resource(parent, data, tuple, data_set) == FALSE) {
return FALSE;
}
if(create_remote_resource(parent, data, tuple, data_set) == FALSE) {
return FALSE;
}
if(tuple->child && tuple->ipaddr) {
add_hash_param(tuple->child->meta, "external-ip", tuple->ipaddr);
}
if(tuple->remote) {
/*
* Allow the remote connection resource to be allocated to a
* different node than the one on which the docker container
* is active.
*
* Makes it possible to have remote nodes, running docker
* containers with pacemaker_remoted inside in order to start
* services inside those containers.
*/
set_bit(tuple->remote->flags, pe_rsc_allow_remote_remotes);
}
return TRUE;
}
static void
mount_add(container_variant_data_t *container_data, const char *source,
const char *target, const char *options, int flags)
{
container_mount_t *mount = calloc(1, sizeof(container_mount_t));
mount->source = strdup(source);
mount->target = strdup(target);
if (options) {
mount->options = strdup(options);
}
mount->flags = flags;
container_data->mounts = g_list_append(container_data->mounts, mount);
}
static void mount_free(container_mount_t *mount)
{
free(mount->source);
free(mount->target);
free(mount->options);
free(mount);
}
static void port_free(container_port_t *port)
{
free(port->source);
free(port->target);
free(port);
}
gboolean
container_unpack(resource_t * rsc, pe_working_set_t * data_set)
{
const char *value = NULL;
xmlNode *xml_obj = NULL;
xmlNode *xml_resource = NULL;
container_variant_data_t *container_data = NULL;
CRM_ASSERT(rsc != NULL);
pe_rsc_trace(rsc, "Processing resource %s...", rsc->id);
container_data = calloc(1, sizeof(container_variant_data_t));
rsc->variant_opaque = container_data;
container_data->prefix = strdup(rsc->id);
xml_obj = first_named_child(rsc->xml, "docker");
if (xml_obj != NULL) {
container_data->type = PE_CONTAINER_TYPE_DOCKER;
} else {
xml_obj = first_named_child(rsc->xml, "rkt");
if (xml_obj != NULL) {
container_data->type = PE_CONTAINER_TYPE_RKT;
} else {
return FALSE;
}
}
value = crm_element_value(xml_obj, "masters");
container_data->masters = crm_parse_int(value, "0");
if (container_data->masters < 0) {
pe_err("'masters' for %s must be nonnegative integer, using 0",
rsc->id);
container_data->masters = 0;
}
value = crm_element_value(xml_obj, "replicas");
if ((value == NULL) && (container_data->masters > 0)) {
container_data->replicas = container_data->masters;
} else {
container_data->replicas = crm_parse_int(value, "1");
}
if (container_data->replicas < 1) {
pe_err("'replicas' for %s must be positive integer, using 1", rsc->id);
container_data->replicas = 1;
}
/*
* Communication between containers on the same host via the
* floating IPs only works if docker is started with:
* --userland-proxy=false --ip-masq=false
*/
value = crm_element_value(xml_obj, "replicas-per-host");
container_data->replicas_per_host = crm_parse_int(value, "1");
if (container_data->replicas_per_host < 1) {
pe_err("'replicas-per-host' for %s must be positive integer, using 1",
rsc->id);
container_data->replicas_per_host = 1;
}
if (container_data->replicas_per_host == 1) {
clear_bit(rsc->flags, pe_rsc_unique);
}
container_data->docker_run_command = crm_element_value_copy(xml_obj, "run-command");
container_data->docker_run_options = crm_element_value_copy(xml_obj, "options");
container_data->image = crm_element_value_copy(xml_obj, "image");
container_data->docker_network = crm_element_value_copy(xml_obj, "network");
xml_obj = first_named_child(rsc->xml, "network");
if(xml_obj) {
container_data->ip_range_start = crm_element_value_copy(xml_obj, "ip-range-start");
container_data->host_netmask = crm_element_value_copy(xml_obj, "host-netmask");
container_data->host_network = crm_element_value_copy(xml_obj, "host-interface");
container_data->control_port = crm_element_value_copy(xml_obj, "control-port");
for (xmlNode *xml_child = __xml_first_child_element(xml_obj); xml_child != NULL;
xml_child = __xml_next_element(xml_child)) {
container_port_t *port = calloc(1, sizeof(container_port_t));
port->source = crm_element_value_copy(xml_child, "port");
if(port->source == NULL) {
port->source = crm_element_value_copy(xml_child, "range");
} else {
port->target = crm_element_value_copy(xml_child, "internal-port");
}
if(port->source != NULL && strlen(port->source) > 0) {
if(port->target == NULL) {
port->target = strdup(port->source);
}
container_data->ports = g_list_append(container_data->ports, port);
} else {
pe_err("Invalid port directive %s", ID(xml_child));
port_free(port);
}
}
}
xml_obj = first_named_child(rsc->xml, "storage");
for (xmlNode *xml_child = __xml_first_child_element(xml_obj); xml_child != NULL;
xml_child = __xml_next_element(xml_child)) {
const char *source = crm_element_value(xml_child, "source-dir");
const char *target = crm_element_value(xml_child, "target-dir");
const char *options = crm_element_value(xml_child, "options");
int flags = 0;
if (source == NULL) {
source = crm_element_value(xml_child, "source-dir-root");
flags = 1;
}
if (source && target) {
mount_add(container_data, source, target, options, flags);
} else {
pe_err("Invalid mount directive %s", ID(xml_child));
}
}
xml_obj = first_named_child(rsc->xml, "primitive");
if (xml_obj && valid_network(container_data)) {
char *value = NULL;
xmlNode *xml_set = NULL;
if(container_data->masters > 0) {
xml_resource = create_xml_node(NULL, XML_CIB_TAG_MASTER);
} else {
xml_resource = create_xml_node(NULL, XML_CIB_TAG_INCARNATION);
}
crm_xml_set_id(xml_resource, "%s-%s", container_data->prefix, xml_resource->name);
xml_set = create_xml_node(xml_resource, XML_TAG_META_SETS);
crm_xml_set_id(xml_set, "%s-%s-meta", container_data->prefix, xml_resource->name);
crm_create_nvpair_xml(xml_set, NULL,
XML_RSC_ATTR_ORDERED, XML_BOOLEAN_TRUE);
value = crm_itoa(container_data->replicas);
crm_create_nvpair_xml(xml_set, NULL,
XML_RSC_ATTR_INCARNATION_MAX, value);
free(value);
value = crm_itoa(container_data->replicas_per_host);
crm_create_nvpair_xml(xml_set, NULL,
XML_RSC_ATTR_INCARNATION_NODEMAX, value);
free(value);
crm_create_nvpair_xml(xml_set, NULL, XML_RSC_ATTR_UNIQUE,
(container_data->replicas_per_host > 1)?
XML_BOOLEAN_TRUE : XML_BOOLEAN_FALSE);
if(container_data->masters) {
value = crm_itoa(container_data->masters);
crm_create_nvpair_xml(xml_set, NULL,
XML_RSC_ATTR_MASTER_MAX, value);
free(value);
}
//crm_xml_add(xml_obj, XML_ATTR_ID, container_data->prefix);
add_node_copy(xml_resource, xml_obj);
} else if(xml_obj) {
pe_err("Cannot control %s inside %s without either ip-range-start or control-port",
rsc->id, ID(xml_obj));
return FALSE;
}
if(xml_resource) {
int lpc = 0;
GListPtr childIter = NULL;
resource_t *new_rsc = NULL;
container_port_t *port = NULL;
const char *key_loc = NULL;
int offset = 0, max = 1024;
char *buffer = NULL;
if (common_unpack(xml_resource, &new_rsc, rsc, data_set) == FALSE) {
pe_err("Failed unpacking resource %s", ID(rsc->xml));
if (new_rsc != NULL && new_rsc->fns != NULL) {
new_rsc->fns->free(new_rsc);
}
return FALSE;
}
container_data->child = new_rsc;
/* We map the remote authentication key (likely) used on the DC to the
* default key location inside the container. This is only the likely
* location because an actual connection will do some validity checking
* on the file before using it.
*
* Mapping to the default location inside the container avoids having to
* pass another environment variable to the container.
*
* This makes several assumptions:
* - if PCMK_authkey_location is set, it has the same value on all nodes
* - the container technology does not propagate host environment
* variables to the container
* - the user does not set this environment variable via their container
* image
*
* @TODO A convoluted but possible way around the first limitation would
* be to allow a resource parameter to include environment
* variable references in its value, and resolve them on the
* executing node's crmd before sending the command to the lrmd.
*/
key_loc = getenv("PCMK_authkey_location");
if (key_loc == NULL) {
key_loc = DEFAULT_REMOTE_KEY_LOCATION;
}
mount_add(container_data, key_loc, DEFAULT_REMOTE_KEY_LOCATION, NULL,
0);
mount_add(container_data, CRM_LOG_DIR "/bundles", "/var/log", NULL, 1);
port = calloc(1, sizeof(container_port_t));
if(container_data->control_port) {
port->source = strdup(container_data->control_port);
} else {
/* If we wanted to respect PCMK_remote_port, we could use
* crm_default_remote_port() here and elsewhere in this file instead
* of DEFAULT_REMOTE_PORT.
*
* However, it gains nothing, since we control both the container
* environment and the connection resource parameters, and the user
* can use a different port if desired by setting control-port.
*/
port->source = crm_itoa(DEFAULT_REMOTE_PORT);
}
port->target = strdup(port->source);
container_data->ports = g_list_append(container_data->ports, port);
buffer = calloc(1, max+1);
for(childIter = container_data->child->children; childIter != NULL; childIter = childIter->next) {
container_grouping_t *tuple = calloc(1, sizeof(container_grouping_t));
tuple->child = childIter->data;
tuple->offset = lpc++;
// Ensure the child's notify gets set based on the underlying primitive's value
if(is_set(tuple->child->flags, pe_rsc_notify)) {
set_bit(container_data->child->flags, pe_rsc_notify);
}
offset += allocate_ip(container_data, tuple, buffer+offset, max-offset);
container_data->tuples = g_list_append(container_data->tuples, tuple);
container_data->attribute_target = g_hash_table_lookup(tuple->child->meta, XML_RSC_ATTR_TARGET);
}
container_data->docker_host_options = buffer;
if(container_data->attribute_target) {
g_hash_table_replace(rsc->meta, strdup(XML_RSC_ATTR_TARGET), strdup(container_data->attribute_target));
g_hash_table_replace(container_data->child->meta, strdup(XML_RSC_ATTR_TARGET), strdup(container_data->attribute_target));
}
} else {
// Just a naked container, no pacemaker-remote
int offset = 0, max = 1024;
char *buffer = calloc(1, max+1);
for(int lpc = 0; lpc < container_data->replicas; lpc++) {
container_grouping_t *tuple = calloc(1, sizeof(container_grouping_t));
tuple->offset = lpc;
offset += allocate_ip(container_data, tuple, buffer+offset, max-offset);
container_data->tuples = g_list_append(container_data->tuples, tuple);
}
container_data->docker_host_options = buffer;
}
for (GListPtr gIter = container_data->tuples; gIter != NULL; gIter = gIter->next) {
container_grouping_t *tuple = (container_grouping_t *)gIter->data;
if (create_container(rsc, container_data, tuple, data_set) == FALSE) {
pe_err("Failed unpacking resource %s", rsc->id);
rsc->fns->free(rsc);
return FALSE;
}
}
if(container_data->child) {
rsc->children = g_list_append(rsc->children, container_data->child);
}
return TRUE;
}
static int
tuple_rsc_active(resource_t *rsc, gboolean all)
{
if (rsc) {
gboolean child_active = rsc->fns->active(rsc, all);
if (child_active && !all) {
return TRUE;
} else if (!child_active && all) {
return FALSE;
}
}
return -1;
}
gboolean
container_active(resource_t * rsc, gboolean all)
{
container_variant_data_t *container_data = NULL;
GListPtr iter = NULL;
get_container_variant_data(container_data, rsc);
for (iter = container_data->tuples; iter != NULL; iter = iter->next) {
container_grouping_t *tuple = (container_grouping_t *)(iter->data);
int rsc_active;
rsc_active = tuple_rsc_active(tuple->ip, all);
if (rsc_active >= 0) {
return (gboolean) rsc_active;
}
rsc_active = tuple_rsc_active(tuple->child, all);
if (rsc_active >= 0) {
return (gboolean) rsc_active;
}
rsc_active = tuple_rsc_active(tuple->docker, all);
if (rsc_active >= 0) {
return (gboolean) rsc_active;
}
rsc_active = tuple_rsc_active(tuple->remote, all);
if (rsc_active >= 0) {
return (gboolean) rsc_active;
}
}
/* If "all" is TRUE, we've already checked that no resources were inactive,
* so return TRUE; if "all" is FALSE, we didn't find any active resources,
* so return FALSE.
*/
return all;
}
resource_t *
find_container_child(const char *stem, resource_t * rsc, node_t *node)
{
container_variant_data_t *container_data = NULL;
resource_t *parent = uber_parent(rsc);
CRM_ASSERT(parent->parent);
parent = parent->parent;
get_container_variant_data(container_data, parent);
if (is_not_set(rsc->flags, pe_rsc_unique)) {
for (GListPtr gIter = container_data->tuples; gIter != NULL; gIter = gIter->next) {
container_grouping_t *tuple = (container_grouping_t *)gIter->data;
CRM_ASSERT(tuple);
if(tuple->node->details == node->details) {
rsc = tuple->child;
break;
}
}
}
if (rsc && safe_str_neq(stem, rsc->id)) {
free(rsc->clone_name);
rsc->clone_name = strdup(stem);
}
return rsc;
}
static void
print_rsc_in_list(resource_t *rsc, const char *pre_text, long options,
void *print_data)
{
if (rsc != NULL) {
if (options & pe_print_html) {
status_print("<li>");
}
rsc->fns->print(rsc, pre_text, options, print_data);
if (options & pe_print_html) {
status_print("</li>\n");
}
}
}
static const char*
container_type_as_string(enum container_type t)
{
if (t == PE_CONTAINER_TYPE_DOCKER) {
return PE_CONTAINER_TYPE_DOCKER_S;
} else if (t == PE_CONTAINER_TYPE_RKT) {
return PE_CONTAINER_TYPE_RKT_S;
} else {
return PE_CONTAINER_TYPE_UNKNOWN_S;
}
}
static void
container_print_xml(resource_t * rsc, const char *pre_text, long options, void *print_data)
{
container_variant_data_t *container_data = NULL;
char *child_text = NULL;
CRM_CHECK(rsc != NULL, return);
if (pre_text == NULL) {
pre_text = "";
}
child_text = crm_concat(pre_text, " ", ' ');
get_container_variant_data(container_data, rsc);
status_print("%s<bundle ", pre_text);
status_print("id=\"%s\" ", rsc->id);
// Always lowercase the container technology type for use as XML value
status_print("type=\"");
for (const char *c = container_type_as_string(container_data->type);
*c; ++c) {
status_print("%c", tolower(*c));
}
status_print("\" ");
status_print("image=\"%s\" ", container_data->image);
status_print("unique=\"%s\" ", is_set(rsc->flags, pe_rsc_unique)? "true" : "false");
status_print("managed=\"%s\" ", is_set(rsc->flags, pe_rsc_managed) ? "true" : "false");
status_print("failed=\"%s\" ", is_set(rsc->flags, pe_rsc_failed) ? "true" : "false");
status_print(">\n");
for (GListPtr gIter = container_data->tuples; gIter != NULL; gIter = gIter->next) {
container_grouping_t *tuple = (container_grouping_t *)gIter->data;
CRM_ASSERT(tuple);
status_print("%s <replica id=\"%d\">\n", pre_text, tuple->offset);
print_rsc_in_list(tuple->ip, child_text, options, print_data);
print_rsc_in_list(tuple->child, child_text, options, print_data);
print_rsc_in_list(tuple->docker, child_text, options, print_data);
print_rsc_in_list(tuple->remote, child_text, options, print_data);
status_print("%s </replica>\n", pre_text);
}
status_print("%s</bundle>\n", pre_text);
free(child_text);
}
static void
tuple_print(container_grouping_t * tuple, const char *pre_text, long options, void *print_data)
{
node_t *node = NULL;
resource_t *rsc = tuple->child;
int offset = 0;
char buffer[LINE_MAX];
if(rsc == NULL) {
rsc = tuple->docker;
}
if(tuple->remote) {
offset += snprintf(buffer + offset, LINE_MAX - offset, "%s", rsc_printable_id(tuple->remote));
} else {
offset += snprintf(buffer + offset, LINE_MAX - offset, "%s", rsc_printable_id(tuple->docker));
}
if(tuple->ipaddr) {
offset += snprintf(buffer + offset, LINE_MAX - offset, " (%s)", tuple->ipaddr);
}
if (tuple->docker->running_on) {
node = tuple->docker->running_on->data;
}
common_print(rsc, pre_text, buffer, node, options, print_data);
}
void
container_print(resource_t * rsc, const char *pre_text, long options, void *print_data)
{
container_variant_data_t *container_data = NULL;
char *child_text = NULL;
CRM_CHECK(rsc != NULL, return);
if (options & pe_print_xml) {
container_print_xml(rsc, pre_text, options, print_data);
return;
}
get_container_variant_data(container_data, rsc);
if (pre_text == NULL) {
pre_text = " ";
}
status_print("%s%s container%s: %s [%s]%s%s\n",
pre_text, container_type_as_string(container_data->type),
container_data->replicas>1?" set":"", rsc->id, container_data->image,
is_set(rsc->flags, pe_rsc_unique) ? " (unique)" : "",
is_set(rsc->flags, pe_rsc_managed) ? "" : " (unmanaged)");
if (options & pe_print_html) {
status_print("<br />\n<ul>\n");
}
for (GListPtr gIter = container_data->tuples; gIter != NULL; gIter = gIter->next) {
container_grouping_t *tuple = (container_grouping_t *)gIter->data;
CRM_ASSERT(tuple);
if (options & pe_print_html) {
status_print("<li>");
}
if(is_set(options, pe_print_clone_details)) {
child_text = crm_strdup_printf(" %s", pre_text);
if(g_list_length(container_data->tuples) > 1) {
status_print(" %sReplica[%d]\n", pre_text, tuple->offset);
}
if (options & pe_print_html) {
status_print("<br />\n<ul>\n");
}
print_rsc_in_list(tuple->ip, child_text, options, print_data);
print_rsc_in_list(tuple->docker, child_text, options, print_data);
print_rsc_in_list(tuple->remote, child_text, options, print_data);
print_rsc_in_list(tuple->child, child_text, options, print_data);
if (options & pe_print_html) {
status_print("</ul>\n");
}
} else {
child_text = crm_strdup_printf("%s ", pre_text);
tuple_print(tuple, child_text, options, print_data);
}
free(child_text);
if (options & pe_print_html) {
status_print("</li>\n");
}
}
if (options & pe_print_html) {
status_print("</ul>\n");
}
}
void
tuple_free(container_grouping_t *tuple)
{
if(tuple == NULL) {
return;
}
if(tuple->node) {
free(tuple->node);
tuple->node = NULL;
}
if(tuple->ip) {
free_xml(tuple->ip->xml);
tuple->ip->xml = NULL;
tuple->ip->fns->free(tuple->ip);
- tuple->ip->xml = NULL;
- free_xml(tuple->ip->xml);
tuple->ip = NULL;
}
if(tuple->docker) {
free_xml(tuple->docker->xml);
tuple->docker->xml = NULL;
tuple->docker->fns->free(tuple->docker);
tuple->docker = NULL;
}
if(tuple->remote) {
free_xml(tuple->remote->xml);
tuple->remote->xml = NULL;
tuple->remote->fns->free(tuple->remote);
tuple->remote = NULL;
}
free(tuple->ipaddr);
free(tuple);
}
void
container_free(resource_t * rsc)
{
container_variant_data_t *container_data = NULL;
CRM_CHECK(rsc != NULL, return);
get_container_variant_data(container_data, rsc);
pe_rsc_trace(rsc, "Freeing %s", rsc->id);
free(container_data->prefix);
free(container_data->image);
free(container_data->control_port);
free(container_data->host_network);
free(container_data->host_netmask);
free(container_data->ip_range_start);
free(container_data->docker_network);
free(container_data->docker_run_options);
free(container_data->docker_run_command);
free(container_data->docker_host_options);
g_list_free_full(container_data->tuples, (GDestroyNotify)tuple_free);
g_list_free_full(container_data->mounts, (GDestroyNotify)mount_free);
g_list_free_full(container_data->ports, (GDestroyNotify)port_free);
g_list_free(rsc->children);
if(container_data->child) {
free_xml(container_data->child->xml);
container_data->child->xml = NULL;
container_data->child->fns->free(container_data->child);
}
common_free(rsc);
}
enum rsc_role_e
container_resource_state(const resource_t * rsc, gboolean current)
{
enum rsc_role_e container_role = RSC_ROLE_UNKNOWN;
return container_role;
}
/*!
* \brief Get the number of configured replicas in a bundle
*
* \param[in] rsc Bundle resource
*
* \return Number of configured replicas, or 0 on error
*/
int
pe_bundle_replicas(const resource_t *rsc)
{
if ((rsc == NULL) || (rsc->variant != pe_container)) {
return 0;
} else {
container_variant_data_t *container_data = NULL;
get_container_variant_data(container_data, rsc);
return container_data->replicas;
}
}
diff --git a/lib/services/services.c b/lib/services/services.c
index 8812d15c91..291f2e6e1b 100644
--- a/lib/services/services.c
+++ b/lib/services/services.c
@@ -1,1478 +1,1477 @@
/*
* Copyright (C) 2010-2016 Andrew Beekhof <andrew@beekhof.net>
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#ifndef _GNU_SOURCE
# define _GNU_SOURCE
#endif
#include <sys/types.h>
#include <sys/stat.h>
#include <stdio.h>
#include <errno.h>
#include <unistd.h>
#include <dirent.h>
#include <fcntl.h>
#include <crm/crm.h>
#include <crm/common/mainloop.h>
#include <crm/services.h>
#include <crm/msg_xml.h>
#include "services_private.h"
#if SUPPORT_UPSTART
# include <upstart.h>
#endif
#if SUPPORT_SYSTEMD
# include <systemd.h>
#endif
/* TODO: Develop a rollover strategy */
static int operations = 0;
static GHashTable *recurring_actions = NULL;
/* ops waiting to run async because of conflicting active
* pending ops */
static GList *blocked_ops = NULL;
/* ops currently active (in-flight) */
static GList *inflight_ops = NULL;
static void handle_blocked_ops(void);
svc_action_t *
services_action_create(const char *name, const char *action, int interval, int timeout)
{
return resources_action_create(name, PCMK_RESOURCE_CLASS_LSB, NULL, name,
action, interval, timeout, NULL, 0);
}
const char *
resources_find_service_class(const char *agent)
{
/* Priority is:
* - lsb
* - systemd
* - upstart
*/
int rc = 0;
struct stat st;
char *path = NULL;
#ifdef LSB_ROOT_DIR
rc = asprintf(&path, "%s/%s", LSB_ROOT_DIR, agent);
if (rc > 0 && stat(path, &st) == 0) {
free(path);
return PCMK_RESOURCE_CLASS_LSB;
}
free(path);
#endif
#if SUPPORT_SYSTEMD
if (systemd_unit_exists(agent)) {
return PCMK_RESOURCE_CLASS_SYSTEMD;
}
#endif
#if SUPPORT_UPSTART
if (upstart_job_exists(agent)) {
return PCMK_RESOURCE_CLASS_UPSTART;
}
#endif
return NULL;
}
static inline void
init_recurring_actions(void)
{
if (recurring_actions == NULL) {
recurring_actions = g_hash_table_new_full(g_str_hash, g_str_equal, NULL,
NULL);
}
}
/*!
* \internal
* \brief Check whether op is in-flight systemd or upstart op
*
* \param[in] op Operation to check
*
* \return TRUE if op is in-flight systemd or upstart op
*/
static inline gboolean
inflight_systemd_or_upstart(svc_action_t *op)
{
return (safe_str_eq(op->standard, PCMK_RESOURCE_CLASS_SYSTEMD)
|| safe_str_eq(op->standard, PCMK_RESOURCE_CLASS_UPSTART))
&& (g_list_find(inflight_ops, op) != NULL);
}
/*!
* \internal
* \brief Expand "service" alias to an actual resource class
*
* \param[in] rsc Resource name (for logging only)
* \param[in] standard Resource class as configured
* \param[in] agent Agent name to look for
*
* \return Newly allocated string with actual resource class
*
* \note The caller is responsible for calling free() on the result.
*/
static char *
expand_resource_class(const char *rsc, const char *standard, const char *agent)
{
char *expanded_class = NULL;
if (strcasecmp(standard, PCMK_RESOURCE_CLASS_SERVICE) == 0) {
const char *found_class = resources_find_service_class(agent);
if (found_class) {
crm_debug("Found %s agent %s for %s", found_class, agent, rsc);
expanded_class = strdup(found_class);
} else {
crm_info("Assuming resource class lsb for agent %s for %s",
agent, rsc);
expanded_class = strdup(PCMK_RESOURCE_CLASS_LSB);
}
} else {
expanded_class = strdup(standard);
}
CRM_ASSERT(expanded_class);
return expanded_class;
}
svc_action_t *
resources_action_create(const char *name, const char *standard, const char *provider,
const char *agent, const char *action, int interval, int timeout,
GHashTable * params, enum svc_action_flags flags)
{
svc_action_t *op = NULL;
/*
* Do some up front sanity checks before we go off and
* build the svc_action_t instance.
*/
if (crm_strlen_zero(name)) {
crm_err("Cannot create operation without resource name");
goto return_error;
}
if (crm_strlen_zero(standard)) {
crm_err("Cannot create operation for %s without resource class", name);
goto return_error;
}
if (crm_provider_required(standard) && crm_strlen_zero(provider)) {
crm_err("Cannot create OCF operation for %s without provider", name);
goto return_error;
}
if (crm_strlen_zero(agent)) {
crm_err("Cannot create operation for %s without agent name", name);
goto return_error;
}
if (crm_strlen_zero(action)) {
crm_err("Cannot create operation for %s without operation name", name);
goto return_error;
}
/*
* Sanity checks passed, proceed!
*/
op = calloc(1, sizeof(svc_action_t));
op->opaque = calloc(1, sizeof(svc_action_private_t));
op->rsc = strdup(name);
op->interval = interval;
op->timeout = timeout;
op->standard = expand_resource_class(name, standard, agent);
op->agent = strdup(agent);
op->sequence = ++operations;
op->flags = flags;
op->id = generate_op_key(name, action, interval);
if (safe_str_eq(action, "monitor") && (
#if SUPPORT_HEARTBEAT
safe_str_eq(op->standard, PCMK_RESOURCE_CLASS_HB) ||
#endif
safe_str_eq(op->standard, PCMK_RESOURCE_CLASS_LSB))) {
action = "status";
}
op->action = strdup(action);
if (crm_provider_required(op->standard)) {
op->provider = strdup(provider);
op->params = params;
params = NULL;
if (asprintf(&op->opaque->exec, "%s/resource.d/%s/%s", OCF_ROOT_DIR, provider, agent) == -1) {
crm_err("Internal error: cannot create agent path");
goto return_error;
}
op->opaque->args[0] = strdup(op->opaque->exec);
op->opaque->args[1] = strdup(action);
} else if (strcasecmp(op->standard, PCMK_RESOURCE_CLASS_LSB) == 0) {
if (op->agent[0] == '/') {
/* if given an absolute path, use that instead
* of tacking on the LSB_ROOT_DIR path to the front */
op->opaque->exec = strdup(op->agent);
} else if (asprintf(&op->opaque->exec, "%s/%s", LSB_ROOT_DIR, op->agent) == -1) {
crm_err("Internal error: cannot create agent path");
goto return_error;
}
op->opaque->args[0] = strdup(op->opaque->exec);
op->opaque->args[1] = strdup(op->action);
op->opaque->args[2] = NULL;
#if SUPPORT_HEARTBEAT
} else if (strcasecmp(op->standard, PCMK_RESOURCE_CLASS_HB) == 0) {
int index;
int param_num;
char buf_tmp[20];
void *value_tmp;
if (op->agent[0] == '/') {
/* if given an absolute path, use that instead
* of tacking on the HB_RA_DIR path to the front */
op->opaque->exec = strdup(op->agent);
} else if (asprintf(&op->opaque->exec, "%s/%s", HB_RA_DIR, op->agent) == -1) {
crm_err("Internal error: cannot create agent path");
goto return_error;
}
op->opaque->args[0] = strdup(op->opaque->exec);
/* The "heartbeat" agent class only has positional arguments,
* which we keyed by their decimal position number. */
param_num = 1;
if (params) {
for (index = 1; index <= MAX_ARGC - 3; index++ ) {
snprintf(buf_tmp, sizeof(buf_tmp), "%d", index);
value_tmp = g_hash_table_lookup(params, buf_tmp);
if (value_tmp == NULL) {
/* maybe: strdup("") ??
* But the old lrmd did simply continue as well. */
continue;
}
op->opaque->args[param_num++] = strdup(value_tmp);
}
}
/* Add operation code as the last argument, */
/* and the terminating NULL pointer */
op->opaque->args[param_num++] = strdup(op->action);
op->opaque->args[param_num] = NULL;
#endif
#if SUPPORT_SYSTEMD
} else if (strcasecmp(op->standard, PCMK_RESOURCE_CLASS_SYSTEMD) == 0) {
op->opaque->exec = strdup("systemd-dbus");
#endif
#if SUPPORT_UPSTART
} else if (strcasecmp(op->standard, PCMK_RESOURCE_CLASS_UPSTART) == 0) {
op->opaque->exec = strdup("upstart-dbus");
#endif
#if SUPPORT_NAGIOS
} else if (strcasecmp(op->standard, PCMK_RESOURCE_CLASS_NAGIOS) == 0) {
int index = 0;
if (op->agent[0] == '/') {
/* if given an absolute path, use that instead
* of tacking on the NAGIOS_PLUGIN_DIR path to the front */
op->opaque->exec = strdup(op->agent);
} else if (asprintf(&op->opaque->exec, "%s/%s", NAGIOS_PLUGIN_DIR, op->agent) == -1) {
crm_err("Internal error: cannot create agent path");
goto return_error;
}
op->opaque->args[0] = strdup(op->opaque->exec);
index = 1;
if (safe_str_eq(op->action, "monitor") && op->interval == 0) {
/* Invoke --version for a nagios probe */
op->opaque->args[index] = strdup("--version");
index++;
} else if (params) {
GHashTableIter iter;
char *key = NULL;
char *value = NULL;
static int args_size = sizeof(op->opaque->args) / sizeof(char *);
g_hash_table_iter_init(&iter, params);
while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value) &&
index <= args_size - 3) {
int len = 3;
char *long_opt = NULL;
if (safe_str_eq(key, XML_ATTR_CRM_VERSION) || strstr(key, CRM_META "_")) {
continue;
}
len += strlen(key);
long_opt = calloc(1, len);
sprintf(long_opt, "--%s", key);
long_opt[len - 1] = 0;
op->opaque->args[index] = long_opt;
op->opaque->args[index + 1] = strdup(value);
index += 2;
}
}
op->opaque->args[index] = NULL;
#endif
} else {
crm_err("Unknown resource standard: %s", op->standard);
services_action_free(op);
op = NULL;
}
if(params) {
g_hash_table_destroy(params);
}
return op;
return_error:
if(params) {
g_hash_table_destroy(params);
}
services_action_free(op);
return NULL;
}
svc_action_t *
services_action_create_generic(const char *exec, const char *args[])
{
svc_action_t *op;
unsigned int cur_arg;
op = calloc(1, sizeof(*op));
op->opaque = calloc(1, sizeof(svc_action_private_t));
op->opaque->exec = strdup(exec);
op->opaque->args[0] = strdup(exec);
for (cur_arg = 1; args && args[cur_arg - 1]; cur_arg++) {
op->opaque->args[cur_arg] = strdup(args[cur_arg - 1]);
if (cur_arg == DIMOF(op->opaque->args) - 1) {
crm_err("svc_action_t args list not long enough for '%s' execution request.", exec);
break;
}
}
return op;
}
/*!
* \brief Create an alert agent action
*
* \param[in] id Alert ID
* \param[in] exec Path to alert agent executable
* \param[in] timeout Action timeout
* \param[in] params Parameters to use with action
* \param[in] sequence Action sequence number
* \param[in] cb_data Data to pass to callback function
*
* \return New action on success, NULL on error
* \note It is the caller's responsibility to free cb_data.
* The caller should not free params explicitly.
*/
svc_action_t *
services_alert_create(const char *id, const char *exec, int timeout,
GHashTable *params, int sequence, void *cb_data)
{
svc_action_t *action = services_action_create_generic(exec, NULL);
CRM_ASSERT(action);
action->timeout = timeout;
action->id = strdup(id);
action->params = params;
action->sequence = sequence;
action->cb_data = cb_data;
return action;
}
/*!
* \brief Set the user and group that an action will execute as
*
* \param[in,out] action Action to modify
* \param[in] user Name of user to execute action as
* \param[in] group Name of group to execute action as
*
* \return pcmk_ok on success, -errno otherwise
*
* \note This will have no effect unless the process executing the action runs
* as root, and the action is not a systemd or upstart action.
* We could implement this for systemd by adding User= and Group= to
* [Service] in the override file, but that seems more likely to cause
* problems than be useful.
*/
int
services_action_user(svc_action_t *op, const char *user)
{
CRM_CHECK((op != NULL) && (user != NULL), return -EINVAL);
return crm_user_lookup(user, &(op->opaque->uid), &(op->opaque->gid));
}
static void
set_alert_env(gpointer key, gpointer value, gpointer user_data)
{
int rc;
if (value) {
rc = setenv(key, value, 1);
} else {
rc = unsetenv(key);
}
if (rc < 0) {
crm_perror(LOG_ERR, "setenv %s=%s",
(char*)key, (value? (char*)value : ""));
} else {
crm_trace("setenv %s=%s", (char*)key, (value? (char*)value : ""));
}
}
static void
unset_alert_env(gpointer key, gpointer value, gpointer user_data)
{
if (unsetenv(key) < 0) {
crm_perror(LOG_ERR, "unset %s", (char*)key);
} else {
crm_trace("unset %s", (char*)key);
}
}
/*!
* \brief Execute an alert agent action
*
* \param[in] action Action to execute
* \param[in] cb Function to call when action completes
*
* \return TRUE if the library will free action, FALSE otherwise
*
* \note If this function returns FALSE, it is the caller's responsibility to
* free the action with services_action_free().
*/
gboolean
services_alert_async(svc_action_t *action, void (*cb)(svc_action_t *op))
{
gboolean responsible;
action->synchronous = false;
action->opaque->callback = cb;
if (action->params) {
g_hash_table_foreach(action->params, set_alert_env, NULL);
}
responsible = services_os_action_execute(action);
if (action->params) {
g_hash_table_foreach(action->params, unset_alert_env, NULL);
}
return responsible;
}
#if SUPPORT_DBUS
/*!
* \internal
* \brief Update operation's pending DBus call, unreferencing old one if needed
*
* \param[in,out] op Operation to modify
* \param[in] pending Pending call to set
*/
void
services_set_op_pending(svc_action_t *op, DBusPendingCall *pending)
{
if (op->opaque->pending && (op->opaque->pending != pending)) {
if (pending) {
crm_info("Lost pending %s DBus call (%p)", op->id, op->opaque->pending);
} else {
crm_trace("Done with pending %s DBus call (%p)", op->id, op->opaque->pending);
}
dbus_pending_call_unref(op->opaque->pending);
}
op->opaque->pending = pending;
if (pending) {
crm_trace("Updated pending %s DBus call (%p)", op->id, pending);
} else {
crm_trace("Cleared pending %s DBus call", op->id);
}
}
#endif
void
services_action_cleanup(svc_action_t * op)
{
if(op->opaque == NULL) {
return;
}
#if SUPPORT_DBUS
if(op->opaque->timerid != 0) {
crm_trace("Removing timer for call %s to %s", op->action, op->rsc);
g_source_remove(op->opaque->timerid);
op->opaque->timerid = 0;
}
if(op->opaque->pending) {
crm_trace("Cleaning up pending dbus call %p %s for %s", op->opaque->pending, op->action, op->rsc);
if(dbus_pending_call_get_completed(op->opaque->pending)) {
crm_warn("Pending dbus call %s for %s did not complete", op->action, op->rsc);
}
dbus_pending_call_cancel(op->opaque->pending);
dbus_pending_call_unref(op->opaque->pending);
op->opaque->pending = NULL;
}
#endif
if (op->opaque->stderr_gsource) {
mainloop_del_fd(op->opaque->stderr_gsource);
op->opaque->stderr_gsource = NULL;
}
if (op->opaque->stdout_gsource) {
mainloop_del_fd(op->opaque->stdout_gsource);
op->opaque->stdout_gsource = NULL;
}
}
void
services_action_free(svc_action_t * op)
{
unsigned int i;
if (op == NULL) {
return;
}
/* The operation should be removed from all tracking lists by this point.
* If it's not, we have a bug somewhere, so bail. That may lead to a
* memory leak, but it's better than a use-after-free segmentation fault.
*/
CRM_CHECK(g_list_find(inflight_ops, op) == NULL, return);
CRM_CHECK(g_list_find(blocked_ops, op) == NULL, return);
CRM_CHECK((recurring_actions == NULL)
|| (g_hash_table_lookup(recurring_actions, op->id) == NULL),
return);
services_action_cleanup(op);
if (op->opaque->repeat_timer) {
g_source_remove(op->opaque->repeat_timer);
op->opaque->repeat_timer = 0;
}
free(op->id);
free(op->opaque->exec);
for (i = 0; i < DIMOF(op->opaque->args); i++) {
free(op->opaque->args[i]);
}
free(op->opaque);
free(op->rsc);
free(op->action);
free(op->standard);
free(op->agent);
free(op->provider);
free(op->stdout_data);
free(op->stderr_data);
if (op->params) {
g_hash_table_destroy(op->params);
op->params = NULL;
}
free(op);
}
gboolean
cancel_recurring_action(svc_action_t * op)
{
crm_info("Cancelling %s operation %s", op->standard, op->id);
if (recurring_actions) {
g_hash_table_remove(recurring_actions, op->id);
}
if (op->opaque->repeat_timer) {
g_source_remove(op->opaque->repeat_timer);
op->opaque->repeat_timer = 0;
}
return TRUE;
}
/*!
* \brief Cancel a recurring action
*
* \param[in] name Name of resource that operation is for
* \param[in] action Name of operation to cancel
* \param[in] interval Interval of operation to cancel
*
* \return TRUE if action was successfully cancelled, FALSE otherwise
*/
gboolean
services_action_cancel(const char *name, const char *action, int interval)
{
gboolean cancelled = FALSE;
char *id = generate_op_key(name, action, interval);
svc_action_t *op = NULL;
/* We can only cancel a recurring action */
init_recurring_actions();
op = g_hash_table_lookup(recurring_actions, id);
if (op == NULL) {
goto done;
}
/* Tell operation_finalize() not to reschedule the operation */
op->cancel = TRUE;
/* Stop tracking it as a recurring operation, and stop its timer */
cancel_recurring_action(op);
/* If the op has a PID, it's an in-flight child process, so kill it.
*
* Whether the kill succeeds or fails, the main loop will send the op to
* operation_finished() (and thus operation_finalize()) when the process
* goes away.
*/
if (op->pid != 0) {
crm_info("Terminating in-flight op %s (pid %d) early because it was cancelled",
id, op->pid);
cancelled = mainloop_child_kill(op->pid);
if (cancelled == FALSE) {
crm_err("Termination of %s (pid %d) failed", id, op->pid);
}
goto done;
}
/* In-flight systemd and upstart ops don't have a pid. The relevant handlers
* will call operation_finalize() when the operation completes.
* @TODO: Can we request early termination, maybe using
* dbus_pending_call_cancel()?
*/
if (inflight_systemd_or_upstart(op)) {
crm_info("Will cancel %s op %s when in-flight instance completes",
op->standard, op->id);
cancelled = FALSE;
goto done;
}
/* Otherwise, operation is not in-flight, just report as cancelled */
op->status = PCMK_LRM_OP_CANCELLED;
if (op->opaque->callback) {
op->opaque->callback(op);
}
blocked_ops = g_list_remove(blocked_ops, op);
services_action_free(op);
cancelled = TRUE;
done:
free(id);
return cancelled;
}
gboolean
services_action_kick(const char *name, const char *action, int interval /* ms */)
{
svc_action_t * op = NULL;
char *id = generate_op_key(name, action, interval);
init_recurring_actions();
op = g_hash_table_lookup(recurring_actions, id);
free(id);
if (op == NULL) {
return FALSE;
}
if (op->pid || inflight_systemd_or_upstart(op)) {
return TRUE;
} else {
if (op->opaque->repeat_timer) {
g_source_remove(op->opaque->repeat_timer);
op->opaque->repeat_timer = 0;
}
recurring_action_timer(op);
return TRUE;
}
}
/*!
* \internal
* \brief Add a new recurring operation, checking for duplicates
*
* \param[in] op Operation to add
*
* \return TRUE if duplicate found (and reschedule), FALSE otherwise
*/
static gboolean
handle_duplicate_recurring(svc_action_t * op)
{
svc_action_t * dup = NULL;
/* check for duplicates */
dup = g_hash_table_lookup(recurring_actions, op->id);
if (dup && (dup != op)) {
/* update user data */
if (op->opaque->callback) {
dup->opaque->callback = op->opaque->callback;
dup->cb_data = op->cb_data;
op->cb_data = NULL;
}
/* immediately execute the next interval */
if (dup->pid != 0) {
if (op->opaque->repeat_timer) {
g_source_remove(op->opaque->repeat_timer);
op->opaque->repeat_timer = 0;
}
recurring_action_timer(dup);
}
/* free the duplicate */
services_action_free(op);
return TRUE;
}
return FALSE;
}
inline static gboolean
action_exec_helper(svc_action_t * op)
{
/* Whether a/synchronous must be decided (op->synchronous) beforehand. */
if (op->standard
&& (strcasecmp(op->standard, PCMK_RESOURCE_CLASS_UPSTART) == 0)) {
#if SUPPORT_UPSTART
return upstart_job_exec(op);
#endif
} else if (op->standard && strcasecmp(op->standard,
PCMK_RESOURCE_CLASS_SYSTEMD) == 0) {
#if SUPPORT_SYSTEMD
return systemd_unit_exec(op);
#endif
} else {
return services_os_action_execute(op);
}
/* The 'op' has probably been freed if the execution functions return TRUE
for the asynchronous 'op'. */
/* Avoid using the 'op' in here. */
return FALSE;
}
void
services_add_inflight_op(svc_action_t * op)
{
if (op == NULL) {
return;
}
CRM_ASSERT(op->synchronous == FALSE);
/* keep track of ops that are in-flight to avoid collisions in the same namespace */
if (op->rsc) {
inflight_ops = g_list_append(inflight_ops, op);
}
}
/*!
* \internal
* \brief Stop tracking an operation that completed
*
* \param[in] op Operation to stop tracking
*/
void
services_untrack_op(svc_action_t *op)
{
/* Op is no longer in-flight or blocked */
inflight_ops = g_list_remove(inflight_ops, op);
blocked_ops = g_list_remove(blocked_ops, op);
/* Op is no longer blocking other ops, so check if any need to run */
handle_blocked_ops();
}
gboolean
services_action_async(svc_action_t * op, void (*action_callback) (svc_action_t *))
{
op->synchronous = false;
if (action_callback) {
op->opaque->callback = action_callback;
}
if (op->interval > 0) {
init_recurring_actions();
if (handle_duplicate_recurring(op) == TRUE) {
/* entry rescheduled, dup freed */
/* exit early */
return TRUE;
}
g_hash_table_replace(recurring_actions, op->id, op);
}
if (op->rsc && is_op_blocked(op->rsc)) {
blocked_ops = g_list_append(blocked_ops, op);
return TRUE;
}
return action_exec_helper(op);
}
static gboolean processing_blocked_ops = FALSE;
gboolean
is_op_blocked(const char *rsc)
{
GList *gIter = NULL;
svc_action_t *op = NULL;
for (gIter = inflight_ops; gIter != NULL; gIter = gIter->next) {
op = gIter->data;
if (safe_str_eq(op->rsc, rsc)) {
return TRUE;
}
}
return FALSE;
}
static void
handle_blocked_ops(void)
{
GList *executed_ops = NULL;
GList *gIter = NULL;
svc_action_t *op = NULL;
gboolean res = FALSE;
if (processing_blocked_ops) {
/* avoid nested calling of this function */
return;
}
processing_blocked_ops = TRUE;
/* n^2 operation here, but blocked ops are incredibly rare. this list
* will be empty 99% of the time. */
for (gIter = blocked_ops; gIter != NULL; gIter = gIter->next) {
op = gIter->data;
if (is_op_blocked(op->rsc)) {
continue;
}
executed_ops = g_list_append(executed_ops, op);
res = action_exec_helper(op);
if (res == FALSE) {
op->status = PCMK_LRM_OP_ERROR;
/* this can cause this function to be called recursively
* which is why we have processing_blocked_ops static variable */
operation_finalize(op);
}
}
for (gIter = executed_ops; gIter != NULL; gIter = gIter->next) {
op = gIter->data;
blocked_ops = g_list_remove(blocked_ops, op);
}
g_list_free(executed_ops);
processing_blocked_ops = FALSE;
}
#define lsb_metadata_template \
"<?xml version='1.0'?>\n" \
"<!DOCTYPE resource-agent SYSTEM 'ra-api-1.dtd'>\n" \
"<resource-agent name='%s' version='" PCMK_DEFAULT_AGENT_VERSION "'>\n" \
" <version>1.0</version>\n" \
" <longdesc lang='en'>\n" \
"%s" \
" </longdesc>\n" \
" <shortdesc lang='en'>%s</shortdesc>\n" \
" <parameters>\n" \
" </parameters>\n" \
" <actions>\n" \
" <action name='meta-data' timeout='5' />\n" \
" <action name='start' timeout='15' />\n" \
" <action name='stop' timeout='15' />\n" \
" <action name='status' timeout='15' />\n" \
" <action name='restart' timeout='15' />\n" \
" <action name='force-reload' timeout='15' />\n" \
" <action name='monitor' timeout='15' interval='15' />\n" \
" </actions>\n" \
" <special tag='LSB'>\n" \
" <Provides>%s</Provides>\n" \
" <Required-Start>%s</Required-Start>\n" \
" <Required-Stop>%s</Required-Stop>\n" \
" <Should-Start>%s</Should-Start>\n" \
" <Should-Stop>%s</Should-Stop>\n" \
" <Default-Start>%s</Default-Start>\n" \
" <Default-Stop>%s</Default-Stop>\n" \
" </special>\n" \
"</resource-agent>\n"
/* See "Comment Conventions for Init Scripts" in the LSB core specification at:
* http://refspecs.linuxfoundation.org/lsb.shtml
*/
#define LSB_INITSCRIPT_INFOBEGIN_TAG "### BEGIN INIT INFO"
#define LSB_INITSCRIPT_INFOEND_TAG "### END INIT INFO"
#define PROVIDES "# Provides:"
#define REQ_START "# Required-Start:"
#define REQ_STOP "# Required-Stop:"
#define SHLD_START "# Should-Start:"
#define SHLD_STOP "# Should-Stop:"
#define DFLT_START "# Default-Start:"
#define DFLT_STOP "# Default-Stop:"
#define SHORT_DSCR "# Short-Description:"
#define DESCRIPTION "# Description:"
#define lsb_meta_helper_free_value(m) \
do { \
if ((m) != NULL) { \
xmlFree(m); \
(m) = NULL; \
} \
} while(0)
/*!
* \internal
* \brief Grab an LSB header value
*
* \param[in] line Line read from LSB init script
* \param[in,out] value If not set, will be set to XML-safe copy of value
* \param[in] prefix Set value if line starts with this pattern
*
* \return TRUE if value was set, FALSE otherwise
*/
static inline gboolean
lsb_meta_helper_get_value(const char *line, char **value, const char *prefix)
{
- if (!*value && !strncmp(line, prefix, strlen(prefix))) {
+ if (!*value && crm_starts_with(line, prefix)) {
*value = (char *)xmlEncodeEntitiesReentrant(NULL, BAD_CAST line+strlen(prefix));
return TRUE;
}
return FALSE;
}
#define DESC_MAX 2048
static int
lsb_get_metadata(const char *type, char **output)
{
char ra_pathname[PATH_MAX] = { 0, };
FILE *fp = NULL;
char buffer[1024] = { 0, };
char *provides = NULL;
char *req_start = NULL;
char *req_stop = NULL;
char *shld_start = NULL;
char *shld_stop = NULL;
char *dflt_start = NULL;
char *dflt_stop = NULL;
char *s_dscrpt = NULL;
char *xml_l_dscrpt = NULL;
int offset = 0;
bool in_header = FALSE;
char description[DESC_MAX] = { 0, };
if (type[0] == '/') {
snprintf(ra_pathname, sizeof(ra_pathname), "%s", type);
} else {
snprintf(ra_pathname, sizeof(ra_pathname), "%s/%s",
LSB_ROOT_DIR, type);
}
crm_trace("Looking into %s", ra_pathname);
fp = fopen(ra_pathname, "r");
if (fp == NULL) {
return -errno;
}
/* Enter into the LSB-compliant comment block */
while (fgets(buffer, sizeof(buffer), fp)) {
// Ignore lines up to and including the block delimiter
- if (!strncmp(buffer, LSB_INITSCRIPT_INFOBEGIN_TAG,
- strlen(LSB_INITSCRIPT_INFOBEGIN_TAG))) {
+ if (crm_starts_with(buffer, LSB_INITSCRIPT_INFOBEGIN_TAG)) {
in_header = TRUE;
continue;
}
if (!in_header) {
continue;
}
/* Assume each of the following eight arguments contain one line */
if (lsb_meta_helper_get_value(buffer, &provides, PROVIDES)) {
continue;
}
if (lsb_meta_helper_get_value(buffer, &req_start, REQ_START)) {
continue;
}
if (lsb_meta_helper_get_value(buffer, &req_stop, REQ_STOP)) {
continue;
}
if (lsb_meta_helper_get_value(buffer, &shld_start, SHLD_START)) {
continue;
}
if (lsb_meta_helper_get_value(buffer, &shld_stop, SHLD_STOP)) {
continue;
}
if (lsb_meta_helper_get_value(buffer, &dflt_start, DFLT_START)) {
continue;
}
if (lsb_meta_helper_get_value(buffer, &dflt_stop, DFLT_STOP)) {
continue;
}
if (lsb_meta_helper_get_value(buffer, &s_dscrpt, SHORT_DSCR)) {
continue;
}
/* Long description may cross multiple lines */
if ((offset == 0) // haven't already found long description
- && !strncmp(buffer, DESCRIPTION, strlen(DESCRIPTION))) {
+ && crm_starts_with(buffer, DESCRIPTION)) {
bool processed_line = TRUE;
// Get remainder of description line itself
offset += snprintf(description, DESC_MAX, "%s",
buffer + strlen(DESCRIPTION));
// Read any continuation lines of the description
buffer[0] = '\0';
while (fgets(buffer, sizeof(buffer), fp)) {
- if (!strncmp(buffer, "# ", 3) || !strncmp(buffer, "#\t", 2)) {
+ if (crm_starts_with(buffer, "# ")
+ || crm_starts_with(buffer, "#\t")) {
/* '#' followed by a tab or more than one space indicates a
* continuation of the long description.
*/
offset += snprintf(description + offset, DESC_MAX - offset,
"%s", buffer + 1);
} else {
/* This line is not part of the long description,
* so continue with normal processing.
*/
processed_line = FALSE;
break;
}
}
// Make long description safe to use in XML
xml_l_dscrpt = (char *)xmlEncodeEntitiesReentrant(NULL, BAD_CAST(description));
if (processed_line) {
// We grabbed the line into the long description
continue;
}
}
// Stop if we leave the header block
- if (!strncmp(buffer, LSB_INITSCRIPT_INFOEND_TAG,
- strlen(LSB_INITSCRIPT_INFOEND_TAG))) {
+ if (crm_starts_with(buffer, LSB_INITSCRIPT_INFOEND_TAG)) {
break;
}
if (buffer[0] != '#') {
break;
}
}
fclose(fp);
*output = crm_strdup_printf(lsb_metadata_template, type,
(xml_l_dscrpt? xml_l_dscrpt : type),
(s_dscrpt? s_dscrpt : type),
(provides? provides : ""),
(req_start? req_start : ""),
(req_stop? req_stop : ""),
(shld_start? shld_start : ""),
(shld_stop? shld_stop : ""),
(dflt_start? dflt_start : ""),
(dflt_stop? dflt_stop : ""));
lsb_meta_helper_free_value(xml_l_dscrpt);
lsb_meta_helper_free_value(s_dscrpt);
lsb_meta_helper_free_value(provides);
lsb_meta_helper_free_value(req_start);
lsb_meta_helper_free_value(req_stop);
lsb_meta_helper_free_value(shld_start);
lsb_meta_helper_free_value(shld_stop);
lsb_meta_helper_free_value(dflt_start);
lsb_meta_helper_free_value(dflt_stop);
crm_trace("Created fake metadata: %llu",
(unsigned long long) strlen(*output));
return pcmk_ok;
}
#if SUPPORT_NAGIOS
static int
nagios_get_metadata(const char *type, char **output)
{
int rc = pcmk_ok;
FILE *file_strm = NULL;
int start = 0, length = 0, read_len = 0;
char *metadata_file = NULL;
int len = 36;
len += strlen(NAGIOS_METADATA_DIR);
len += strlen(type);
metadata_file = calloc(1, len);
CRM_CHECK(metadata_file != NULL, return -ENOMEM);
sprintf(metadata_file, "%s/%s.xml", NAGIOS_METADATA_DIR, type);
file_strm = fopen(metadata_file, "r");
if (file_strm == NULL) {
crm_err("Metadata file %s does not exist", metadata_file);
free(metadata_file);
return -EIO;
}
/* see how big the file is */
start = ftell(file_strm);
fseek(file_strm, 0L, SEEK_END);
length = ftell(file_strm);
fseek(file_strm, 0L, start);
CRM_ASSERT(length >= 0);
CRM_ASSERT(start == ftell(file_strm));
if (length <= 0) {
crm_info("%s was not valid", metadata_file);
free(*output);
*output = NULL;
rc = -EIO;
} else {
crm_trace("Reading %d bytes from file", length);
*output = calloc(1, (length + 1));
read_len = fread(*output, 1, length, file_strm);
if (read_len != length) {
crm_err("Calculated and read bytes differ: %d vs. %d",
length, read_len);
free(*output);
*output = NULL;
rc = -EIO;
}
}
fclose(file_strm);
free(metadata_file);
return rc;
}
#endif
#if SUPPORT_HEARTBEAT
/* strictly speaking, support for class=heartbeat style scripts
* does not require "heartbeat support" to be enabled.
* But since those scripts are part of the "heartbeat" package usually,
* and are very unlikely to be present in any other deployment,
* I leave it inside this ifdef.
*
* Yes, I know, these are legacy and should die,
* or at least be rewritten to be a proper OCF style agent.
* But they exist, and custom scripts following these rules do, too.
*
* Taken from the old "glue" lrmd, see
* http://hg.linux-ha.org/glue/file/0a7add1d9996/lib/plugins/lrm/raexechb.c#l49
* http://hg.linux-ha.org/glue/file/0a7add1d9996/lib/plugins/lrm/raexechb.c#l393
*/
static const char hb_metadata_template[] =
"<?xml version='1.0'?>\n"
"<!DOCTYPE resource-agent SYSTEM 'ra-api-1.dtd'>\n"
"<resource-agent name='%s' version='" PCMK_DEFAULT_AGENT_VERSION "'>\n"
"<version>1.0</version>\n"
"<longdesc lang='en'>\n"
"%s"
"</longdesc>\n"
"<shortdesc lang='en'>%s</shortdesc>\n"
"<parameters>\n"
"<parameter name='1' unique='1' required='0'>\n"
"<longdesc lang='en'>\n"
"This argument will be passed as the first argument to the "
"heartbeat resource agent (assuming it supports one)\n"
"</longdesc>\n"
"<shortdesc lang='en'>argv[1]</shortdesc>\n"
"<content type='string' default=' ' />\n"
"</parameter>\n"
"<parameter name='2' unique='1' required='0'>\n"
"<longdesc lang='en'>\n"
"This argument will be passed as the second argument to the "
"heartbeat resource agent (assuming it supports one)\n"
"</longdesc>\n"
"<shortdesc lang='en'>argv[2]</shortdesc>\n"
"<content type='string' default=' ' />\n"
"</parameter>\n"
"<parameter name='3' unique='1' required='0'>\n"
"<longdesc lang='en'>\n"
"This argument will be passed as the third argument to the "
"heartbeat resource agent (assuming it supports one)\n"
"</longdesc>\n"
"<shortdesc lang='en'>argv[3]</shortdesc>\n"
"<content type='string' default=' ' />\n"
"</parameter>\n"
"<parameter name='4' unique='1' required='0'>\n"
"<longdesc lang='en'>\n"
"This argument will be passed as the fourth argument to the "
"heartbeat resource agent (assuming it supports one)\n"
"</longdesc>\n"
"<shortdesc lang='en'>argv[4]</shortdesc>\n"
"<content type='string' default=' ' />\n"
"</parameter>\n"
"<parameter name='5' unique='1' required='0'>\n"
"<longdesc lang='en'>\n"
"This argument will be passed as the fifth argument to the "
"heartbeat resource agent (assuming it supports one)\n"
"</longdesc>\n"
"<shortdesc lang='en'>argv[5]</shortdesc>\n"
"<content type='string' default=' ' />\n"
"</parameter>\n"
"</parameters>\n"
"<actions>\n"
"<action name='start' timeout='15' />\n"
"<action name='stop' timeout='15' />\n"
"<action name='status' timeout='15' />\n"
"<action name='monitor' timeout='15' interval='15' start-delay='15' />\n"
"<action name='meta-data' timeout='5' />\n"
"</actions>\n"
"<special tag='heartbeat'>\n"
"</special>\n"
"</resource-agent>\n";
static int
heartbeat_get_metadata(const char *type, char **output)
{
*output = crm_strdup_printf(hb_metadata_template, type, type, type);
crm_trace("Created fake metadata: %llu",
(unsigned long long) strlen(*output));
return pcmk_ok;
}
#endif
static gboolean
action_get_metadata(svc_action_t *op)
{
const char *class = op->standard;
if (op->agent == NULL) {
crm_err("meta-data requested without specifying agent");
return FALSE;
}
if (class == NULL) {
crm_err("meta-data requested for agent %s without specifying class",
op->agent);
return FALSE;
}
if (!strcmp(class, PCMK_RESOURCE_CLASS_SERVICE)) {
class = resources_find_service_class(op->agent);
}
if (class == NULL) {
crm_err("meta-data requested for %s, but could not determine class",
op->agent);
return FALSE;
}
if (safe_str_eq(class, PCMK_RESOURCE_CLASS_LSB)) {
return (lsb_get_metadata(op->agent, &op->stdout_data) >= 0);
}
#if SUPPORT_NAGIOS
if (safe_str_eq(class, PCMK_RESOURCE_CLASS_NAGIOS)) {
return (nagios_get_metadata(op->agent, &op->stdout_data) >= 0);
}
#endif
#if SUPPORT_HEARTBEAT
if (safe_str_eq(class, PCMK_RESOURCE_CLASS_HB)) {
return (heartbeat_get_metadata(op->agent, &op->stdout_data) >= 0);
}
#endif
return action_exec_helper(op);
}
gboolean
services_action_sync(svc_action_t * op)
{
gboolean rc = TRUE;
if (op == NULL) {
crm_trace("No operation to execute");
return FALSE;
}
op->synchronous = true;
if (safe_str_eq(op->action, "meta-data")) {
/* Synchronous meta-data operations are handled specially. Since most
* resource classes don't provide any meta-data, it has to be
* synthesized from available information about the agent.
*
* services_action_async() doesn't treat meta-data actions specially, so
* it will result in an error for classes that don't support the action.
*/
rc = action_get_metadata(op);
} else {
rc = action_exec_helper(op);
}
crm_trace(" > %s_%s_%d: %s = %d",
op->rsc, op->action, op->interval, op->opaque->exec, op->rc);
if (op->stdout_data) {
crm_trace(" > stdout: %s", op->stdout_data);
}
if (op->stderr_data) {
crm_trace(" > stderr: %s", op->stderr_data);
}
return rc;
}
GList *
get_directory_list(const char *root, gboolean files, gboolean executable)
{
return services_os_get_directory_list(root, files, executable);
}
GList *
services_list(void)
{
return resources_list_agents(PCMK_RESOURCE_CLASS_LSB, NULL);
}
#if SUPPORT_HEARTBEAT
static GList *
resources_os_list_hb_agents(void)
{
return services_os_get_directory_list(HB_RA_DIR, TRUE, TRUE);
}
#endif
GList *
resources_list_standards(void)
{
GList *standards = NULL;
GList *agents = NULL;
standards = g_list_append(standards, strdup(PCMK_RESOURCE_CLASS_OCF));
standards = g_list_append(standards, strdup(PCMK_RESOURCE_CLASS_LSB));
standards = g_list_append(standards, strdup(PCMK_RESOURCE_CLASS_SERVICE));
#if SUPPORT_SYSTEMD
agents = systemd_unit_listall();
if (agents) {
standards = g_list_append(standards,
strdup(PCMK_RESOURCE_CLASS_SYSTEMD));
g_list_free_full(agents, free);
}
#endif
#if SUPPORT_UPSTART
agents = upstart_job_listall();
if (agents) {
standards = g_list_append(standards,
strdup(PCMK_RESOURCE_CLASS_UPSTART));
g_list_free_full(agents, free);
}
#endif
#if SUPPORT_NAGIOS
agents = resources_os_list_nagios_agents();
if (agents) {
standards = g_list_append(standards,
strdup(PCMK_RESOURCE_CLASS_NAGIOS));
g_list_free_full(agents, free);
}
#endif
#if SUPPORT_HEARTBEAT
standards = g_list_append(standards, strdup(PCMK_RESOURCE_CLASS_HB));
#endif
return standards;
}
GList *
resources_list_providers(const char *standard)
{
if (crm_provider_required(standard)) {
return resources_os_list_ocf_providers();
}
return NULL;
}
GList *
resources_list_agents(const char *standard, const char *provider)
{
if ((standard == NULL)
|| (strcasecmp(standard, PCMK_RESOURCE_CLASS_SERVICE) == 0)) {
GList *tmp1;
GList *tmp2;
GList *result = resources_os_list_lsb_agents();
if (standard == NULL) {
tmp1 = result;
tmp2 = resources_os_list_ocf_agents(NULL);
if (tmp2) {
result = g_list_concat(tmp1, tmp2);
}
}
#if SUPPORT_SYSTEMD
tmp1 = result;
tmp2 = systemd_unit_listall();
if (tmp2) {
result = g_list_concat(tmp1, tmp2);
}
#endif
#if SUPPORT_UPSTART
tmp1 = result;
tmp2 = upstart_job_listall();
if (tmp2) {
result = g_list_concat(tmp1, tmp2);
}
#endif
return result;
} else if (strcasecmp(standard, PCMK_RESOURCE_CLASS_OCF) == 0) {
return resources_os_list_ocf_agents(provider);
} else if (strcasecmp(standard, PCMK_RESOURCE_CLASS_LSB) == 0) {
return resources_os_list_lsb_agents();
#if SUPPORT_HEARTBEAT
} else if (strcasecmp(standard, PCMK_RESOURCE_CLASS_HB) == 0) {
return resources_os_list_hb_agents();
#endif
#if SUPPORT_SYSTEMD
} else if (strcasecmp(standard, PCMK_RESOURCE_CLASS_SYSTEMD) == 0) {
return systemd_unit_listall();
#endif
#if SUPPORT_UPSTART
} else if (strcasecmp(standard, PCMK_RESOURCE_CLASS_UPSTART) == 0) {
return upstart_job_listall();
#endif
#if SUPPORT_NAGIOS
} else if (strcasecmp(standard, PCMK_RESOURCE_CLASS_NAGIOS) == 0) {
return resources_os_list_nagios_agents();
#endif
}
return NULL;
}
diff --git a/lrmd/lrmd.c b/lrmd/lrmd.c
index 6a6cc0dc3d..49bc0c283d 100644
--- a/lrmd/lrmd.c
+++ b/lrmd/lrmd.c
@@ -1,1707 +1,1706 @@
/*
* Copyright (c) 2012 David Vossel <davidvossel@gmail.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include <crm_internal.h>
#include <glib.h>
#include <unistd.h>
#include <crm/crm.h>
#include <crm/services.h>
#include <crm/common/mainloop.h>
#include <crm/common/ipc.h>
#include <crm/common/ipcs.h>
#include <crm/msg_xml.h>
#include <lrmd_private.h>
#ifdef HAVE_SYS_TIMEB_H
# include <sys/timeb.h>
#endif
#define EXIT_REASON_MAX_LEN 128
GHashTable *rsc_list = NULL;
typedef struct lrmd_cmd_s {
int timeout;
int interval;
int start_delay;
int timeout_orig;
int call_id;
int exec_rc;
int lrmd_op_status;
int call_opts;
/* Timer ids, must be removed on cmd destruction. */
int delay_id;
int stonith_recurring_id;
int rsc_deleted;
int service_flags;
char *client_id;
char *origin;
char *rsc_id;
char *action;
char *real_action;
char *exit_reason;
char *output;
char *userdata_str;
/* when set, this cmd should go through a container wrapper */
const char *isolation_wrapper;
#ifdef HAVE_SYS_TIMEB_H
/* recurring and systemd operations may involve more than one lrmd command
* per operation, so they need info about original and most recent
*/
struct timeb t_first_run; /* Timestamp of when op first ran */
struct timeb t_run; /* Timestamp of when op most recently ran */
struct timeb t_first_queue; /* Timestamp of when op first was queued */
struct timeb t_queue; /* Timestamp of when op most recently was queued */
struct timeb t_rcchange; /* Timestamp of last rc change */
#endif
int first_notify_sent;
int last_notify_rc;
int last_notify_op_status;
int last_pid;
GHashTable *params;
} lrmd_cmd_t;
static void cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc);
static gboolean lrmd_rsc_dispatch(gpointer user_data);
static void cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id);
static void
log_finished(lrmd_cmd_t * cmd, int exec_time, int queue_time)
{
char pid_str[32] = { 0, };
int log_level = LOG_INFO;
if (cmd->last_pid) {
snprintf(pid_str, 32, "%d", cmd->last_pid);
}
if (safe_str_eq(cmd->action, "monitor")) {
log_level = LOG_DEBUG;
}
#ifdef HAVE_SYS_TIMEB_H
do_crm_log(log_level,
"finished - rsc:%s action:%s call_id:%d %s%s exit-code:%d exec-time:%dms queue-time:%dms",
cmd->rsc_id, cmd->action, cmd->call_id, cmd->last_pid ? "pid:" : "", pid_str,
cmd->exec_rc, exec_time, queue_time);
#else
do_crm_log(log_level, "finished - rsc:%s action:%s call_id:%d %s%s exit-code:%d",
cmd->rsc_id,
cmd->action, cmd->call_id, cmd->last_pid ? "pid:" : "", pid_str, cmd->exec_rc);
#endif
}
static void
log_execute(lrmd_cmd_t * cmd)
{
int log_level = LOG_INFO;
if (safe_str_eq(cmd->action, "monitor")) {
log_level = LOG_DEBUG;
}
do_crm_log(log_level, "executing - rsc:%s action:%s call_id:%d",
cmd->rsc_id, cmd->action, cmd->call_id);
}
static const char *
normalize_action_name(lrmd_rsc_t * rsc, const char *action)
{
if (safe_str_eq(action, "monitor") &&
(safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_LSB) ||
safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_SERVICE)
|| safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_SYSTEMD))) {
return "status";
}
return action;
}
static lrmd_rsc_t *
build_rsc_from_xml(xmlNode * msg)
{
xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, msg, LOG_ERR);
lrmd_rsc_t *rsc = NULL;
rsc = calloc(1, sizeof(lrmd_rsc_t));
crm_element_value_int(msg, F_LRMD_CALLOPTS, &rsc->call_opts);
rsc->rsc_id = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ID);
rsc->class = crm_element_value_copy(rsc_xml, F_LRMD_CLASS);
rsc->provider = crm_element_value_copy(rsc_xml, F_LRMD_PROVIDER);
rsc->type = crm_element_value_copy(rsc_xml, F_LRMD_TYPE);
rsc->work = mainloop_add_trigger(G_PRIORITY_HIGH, lrmd_rsc_dispatch, rsc);
return rsc;
}
static lrmd_cmd_t *
create_lrmd_cmd(xmlNode * msg, crm_client_t * client, lrmd_rsc_t *rsc)
{
int call_options = 0;
xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, msg, LOG_ERR);
lrmd_cmd_t *cmd = NULL;
cmd = calloc(1, sizeof(lrmd_cmd_t));
crm_element_value_int(msg, F_LRMD_CALLOPTS, &call_options);
cmd->call_opts = call_options;
cmd->client_id = strdup(client->id);
crm_element_value_int(msg, F_LRMD_CALLID, &cmd->call_id);
crm_element_value_int(rsc_xml, F_LRMD_RSC_INTERVAL, &cmd->interval);
crm_element_value_int(rsc_xml, F_LRMD_TIMEOUT, &cmd->timeout);
crm_element_value_int(rsc_xml, F_LRMD_RSC_START_DELAY, &cmd->start_delay);
cmd->timeout_orig = cmd->timeout;
cmd->origin = crm_element_value_copy(rsc_xml, F_LRMD_ORIGIN);
cmd->action = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ACTION);
cmd->userdata_str = crm_element_value_copy(rsc_xml, F_LRMD_RSC_USERDATA_STR);
cmd->rsc_id = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ID);
cmd->params = xml2list(rsc_xml);
cmd->isolation_wrapper = g_hash_table_lookup(cmd->params, "CRM_meta_isolation_wrapper");
if (cmd->isolation_wrapper) {
if (g_hash_table_lookup(cmd->params, "CRM_meta_isolation_instance") == NULL) {
g_hash_table_insert(cmd->params, strdup("CRM_meta_isolation_instance"), strdup(rsc->rsc_id));
}
if (rsc->provider) {
g_hash_table_insert(cmd->params, strdup("CRM_meta_provider"), strdup(rsc->provider));
}
g_hash_table_insert(cmd->params, strdup("CRM_meta_class"), strdup(rsc->class));
g_hash_table_insert(cmd->params, strdup("CRM_meta_type"), strdup(rsc->type));
}
if (safe_str_eq(g_hash_table_lookup(cmd->params, "CRM_meta_on_fail"), "block")) {
crm_debug("Setting flag to leave pid group on timeout and only kill action pid for %s_%s_%d", cmd->rsc_id, cmd->action, cmd->interval);
cmd->service_flags |= SVC_ACTION_LEAVE_GROUP;
}
return cmd;
}
static void
free_lrmd_cmd(lrmd_cmd_t * cmd)
{
if (cmd->stonith_recurring_id) {
g_source_remove(cmd->stonith_recurring_id);
}
if (cmd->delay_id) {
g_source_remove(cmd->delay_id);
}
if (cmd->params) {
g_hash_table_destroy(cmd->params);
}
free(cmd->origin);
free(cmd->action);
free(cmd->real_action);
free(cmd->userdata_str);
free(cmd->rsc_id);
free(cmd->output);
free(cmd->exit_reason);
free(cmd->client_id);
free(cmd);
}
static gboolean
stonith_recurring_op_helper(gpointer data)
{
lrmd_cmd_t *cmd = data;
lrmd_rsc_t *rsc;
cmd->stonith_recurring_id = 0;
if (!cmd->rsc_id) {
return FALSE;
}
rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id);
CRM_ASSERT(rsc != NULL);
/* take it out of recurring_ops list, and put it in the pending ops
* to be executed */
rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd);
rsc->pending_ops = g_list_append(rsc->pending_ops, cmd);
#ifdef HAVE_SYS_TIMEB_H
ftime(&cmd->t_queue);
if (cmd->t_first_queue.time == 0) {
cmd->t_first_queue = cmd->t_queue;
}
#endif
mainloop_set_trigger(rsc->work);
return FALSE;
}
static gboolean
start_delay_helper(gpointer data)
{
lrmd_cmd_t *cmd = data;
lrmd_rsc_t *rsc = NULL;
cmd->delay_id = 0;
rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL;
if (rsc) {
mainloop_set_trigger(rsc->work);
}
return FALSE;
}
static gboolean
merge_recurring_duplicate(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
{
GListPtr gIter = NULL;
lrmd_cmd_t * dup = NULL;
gboolean dup_pending = FALSE;
if (cmd->interval == 0) {
return 0;
}
for (gIter = rsc->pending_ops; gIter != NULL; gIter = gIter->next) {
dup = gIter->data;
if (safe_str_eq(cmd->action, dup->action) && cmd->interval == dup->interval) {
dup_pending = TRUE;
goto merge_dup;
}
}
/* if dup is in recurring_ops list, that means it has already executed
* and is in the interval loop. we can't just remove it in this case. */
for (gIter = rsc->recurring_ops; gIter != NULL; gIter = gIter->next) {
dup = gIter->data;
if (safe_str_eq(cmd->action, dup->action) && cmd->interval == dup->interval) {
goto merge_dup;
}
}
return FALSE;
merge_dup:
/* This should not occur, if it does we need to investigate in the crmd
* how something like this is possible */
crm_warn("Duplicate recurring op entry detected (%s_%s_%d), merging with previous op entry",
rsc->rsc_id,
normalize_action_name(rsc, dup->action),
dup->interval);
/* merge */
dup->first_notify_sent = 0;
free(dup->userdata_str);
dup->userdata_str = cmd->userdata_str;
cmd->userdata_str = NULL;
dup->call_id = cmd->call_id;
if (safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH)) {
/* if we are waiting for the next interval, kick it off now */
if (dup_pending == TRUE) {
g_source_remove(cmd->stonith_recurring_id);
cmd->stonith_recurring_id = 0;
stonith_recurring_op_helper(cmd);
}
} else if (dup_pending == FALSE) {
/* if we've already handed this to the service lib, kick off an early execution */
services_action_kick(rsc->rsc_id, normalize_action_name(rsc, dup->action), dup->interval);
}
free_lrmd_cmd(cmd);
return TRUE;
}
static void
schedule_lrmd_cmd(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
{
gboolean dup_processed = FALSE;
CRM_CHECK(cmd != NULL, return);
CRM_CHECK(rsc != NULL, return);
crm_trace("Scheduling %s on %s", cmd->action, rsc->rsc_id);
dup_processed = merge_recurring_duplicate(rsc, cmd);
if (dup_processed) {
/* duplicate recurring cmd found, cmds merged */
return;
}
/* crmd expects lrmd to automatically cancel recurring ops before rsc stops. */
if (rsc && safe_str_eq(cmd->action, "stop")) {
cancel_all_recurring(rsc, NULL);
}
rsc->pending_ops = g_list_append(rsc->pending_ops, cmd);
#ifdef HAVE_SYS_TIMEB_H
ftime(&cmd->t_queue);
if (cmd->t_first_queue.time == 0) {
cmd->t_first_queue = cmd->t_queue;
}
#endif
mainloop_set_trigger(rsc->work);
if (cmd->start_delay) {
cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd);
}
}
static void
send_reply(crm_client_t * client, int rc, uint32_t id, int call_id)
{
int send_rc = 0;
xmlNode *reply = NULL;
reply = create_xml_node(NULL, T_LRMD_REPLY);
crm_xml_add(reply, F_LRMD_ORIGIN, __FUNCTION__);
crm_xml_add_int(reply, F_LRMD_RC, rc);
crm_xml_add_int(reply, F_LRMD_CALLID, call_id);
send_rc = lrmd_server_send_reply(client, id, reply);
free_xml(reply);
if (send_rc < 0) {
crm_warn("LRMD reply to %s failed: %d", client->name, send_rc);
}
}
static void
send_client_notify(gpointer key, gpointer value, gpointer user_data)
{
xmlNode *update_msg = user_data;
crm_client_t *client = value;
if (client == NULL) {
crm_err("Asked to send event to NULL client");
return;
} else if (client->name == NULL) {
crm_trace("Asked to send event to client with no name");
return;
}
if (lrmd_server_send_notify(client, update_msg) <= 0) {
crm_warn("Notification of client %s/%s failed", client->name, client->id);
}
}
#ifdef HAVE_SYS_TIMEB_H
/*!
* \internal
* \brief Return difference between two times in milliseconds
*
* \param[in] now More recent time (or NULL to use current time)
* \param[in] old Earlier time
*
* \return milliseconds difference (or 0 if old is NULL or has time zero)
*/
static int
time_diff_ms(struct timeb *now, struct timeb *old)
{
struct timeb local_now = { 0, };
if (now == NULL) {
ftime(&local_now);
now = &local_now;
}
if ((old == NULL) || (old->time == 0)) {
return 0;
}
return difftime(now->time, old->time) * 1000 + now->millitm - old->millitm;
}
/*!
* \internal
* \brief Reset a command's operation times to their original values.
*
* Reset a command's run and queued timestamps to the timestamps of the original
* command, so we report the entire time since then and not just the time since
* the most recent command (for recurring and systemd operations).
*
* /param[in] cmd LRMD command object to reset
*
* /note It's not obvious what the queued time should be for a systemd
* start/stop operation, which might go like this:
* initial command queued 5ms, runs 3s
* monitor command queued 10ms, runs 10s
* monitor command queued 10ms, runs 10s
* Is the queued time for that operation 5ms, 10ms or 25ms? The current
* implementation will report 5ms. If it's 25ms, then we need to
* subtract 20ms from the total exec time so as not to count it twice.
* We can implement that later if it matters to anyone ...
*/
static void
cmd_original_times(lrmd_cmd_t * cmd)
{
cmd->t_run = cmd->t_first_run;
cmd->t_queue = cmd->t_first_queue;
}
#endif
static void
send_cmd_complete_notify(lrmd_cmd_t * cmd)
{
int exec_time = 0;
int queue_time = 0;
xmlNode *notify = NULL;
#ifdef HAVE_SYS_TIMEB_H
exec_time = time_diff_ms(NULL, &cmd->t_run);
queue_time = time_diff_ms(&cmd->t_run, &cmd->t_queue);
#endif
log_finished(cmd, exec_time, queue_time);
/* if the first notify result for a cmd has already been sent earlier, and the
* the option to only send notifies on result changes is set. Check to see
* if the last result is the same as the new one. If so, suppress this update */
if (cmd->first_notify_sent && (cmd->call_opts & lrmd_opt_notify_changes_only)) {
if (cmd->last_notify_rc == cmd->exec_rc &&
cmd->last_notify_op_status == cmd->lrmd_op_status) {
/* only send changes */
return;
}
}
cmd->first_notify_sent = 1;
cmd->last_notify_rc = cmd->exec_rc;
cmd->last_notify_op_status = cmd->lrmd_op_status;
notify = create_xml_node(NULL, T_LRMD_NOTIFY);
crm_xml_add(notify, F_LRMD_ORIGIN, __FUNCTION__);
crm_xml_add_int(notify, F_LRMD_TIMEOUT, cmd->timeout);
crm_xml_add_int(notify, F_LRMD_RSC_INTERVAL, cmd->interval);
crm_xml_add_int(notify, F_LRMD_RSC_START_DELAY, cmd->start_delay);
crm_xml_add_int(notify, F_LRMD_EXEC_RC, cmd->exec_rc);
crm_xml_add_int(notify, F_LRMD_OP_STATUS, cmd->lrmd_op_status);
crm_xml_add_int(notify, F_LRMD_CALLID, cmd->call_id);
crm_xml_add_int(notify, F_LRMD_RSC_DELETED, cmd->rsc_deleted);
#ifdef HAVE_SYS_TIMEB_H
crm_xml_add_int(notify, F_LRMD_RSC_RUN_TIME, cmd->t_run.time);
crm_xml_add_int(notify, F_LRMD_RSC_RCCHANGE_TIME, cmd->t_rcchange.time);
crm_xml_add_int(notify, F_LRMD_RSC_EXEC_TIME, exec_time);
crm_xml_add_int(notify, F_LRMD_RSC_QUEUE_TIME, queue_time);
#endif
crm_xml_add(notify, F_LRMD_OPERATION, LRMD_OP_RSC_EXEC);
crm_xml_add(notify, F_LRMD_RSC_ID, cmd->rsc_id);
if(cmd->real_action) {
crm_xml_add(notify, F_LRMD_RSC_ACTION, cmd->real_action);
} else {
crm_xml_add(notify, F_LRMD_RSC_ACTION, cmd->action);
}
crm_xml_add(notify, F_LRMD_RSC_USERDATA_STR, cmd->userdata_str);
crm_xml_add(notify, F_LRMD_RSC_OUTPUT, cmd->output);
crm_xml_add(notify, F_LRMD_RSC_EXIT_REASON, cmd->exit_reason);
if (cmd->params) {
char *key = NULL;
char *value = NULL;
GHashTableIter iter;
xmlNode *args = create_xml_node(notify, XML_TAG_ATTRS);
g_hash_table_iter_init(&iter, cmd->params);
while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
hash2smartfield((gpointer) key, (gpointer) value, args);
}
}
if (cmd->client_id && (cmd->call_opts & lrmd_opt_notify_orig_only)) {
crm_client_t *client = crm_client_get_by_id(cmd->client_id);
if (client) {
send_client_notify(client->id, client, notify);
}
} else if (client_connections != NULL) {
g_hash_table_foreach(client_connections, send_client_notify, notify);
}
free_xml(notify);
}
static void
send_generic_notify(int rc, xmlNode * request)
{
if (client_connections != NULL) {
int call_id = 0;
xmlNode *notify = NULL;
xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
const char *op = crm_element_value(request, F_LRMD_OPERATION);
crm_element_value_int(request, F_LRMD_CALLID, &call_id);
notify = create_xml_node(NULL, T_LRMD_NOTIFY);
crm_xml_add(notify, F_LRMD_ORIGIN, __FUNCTION__);
crm_xml_add_int(notify, F_LRMD_RC, rc);
crm_xml_add_int(notify, F_LRMD_CALLID, call_id);
crm_xml_add(notify, F_LRMD_OPERATION, op);
crm_xml_add(notify, F_LRMD_RSC_ID, rsc_id);
g_hash_table_foreach(client_connections, send_client_notify, notify);
free_xml(notify);
}
}
static void
cmd_reset(lrmd_cmd_t * cmd)
{
cmd->lrmd_op_status = 0;
cmd->last_pid = 0;
memset(&cmd->t_run, 0, sizeof(cmd->t_run));
memset(&cmd->t_queue, 0, sizeof(cmd->t_queue));
free(cmd->exit_reason);
cmd->exit_reason = NULL;
free(cmd->output);
cmd->output = NULL;
}
static void
cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc)
{
crm_trace("Resource operation rsc:%s action:%s completed (%p %p)", cmd->rsc_id, cmd->action,
rsc ? rsc->active : NULL, cmd);
if (rsc && (rsc->active == cmd)) {
rsc->active = NULL;
mainloop_set_trigger(rsc->work);
}
if (!rsc) {
cmd->rsc_deleted = 1;
}
/* reset original timeout so client notification has correct information */
cmd->timeout = cmd->timeout_orig;
send_cmd_complete_notify(cmd);
if (cmd->interval && (cmd->lrmd_op_status == PCMK_LRM_OP_CANCELLED)) {
if (rsc) {
rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd);
rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd);
}
free_lrmd_cmd(cmd);
} else if (cmd->interval == 0) {
if (rsc) {
rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd);
}
free_lrmd_cmd(cmd);
} else {
/* Clear all the values pertaining just to the last iteration of a recurring op. */
cmd_reset(cmd);
}
}
#if SUPPORT_HEARTBEAT
static int pattern_matched(const char *pat, const char *str)
{
if (g_pattern_match_simple(pat, str)) {
crm_debug("RA output matched stopped pattern [%s]", pat);
return TRUE;
}
return FALSE;
}
static int
hb2uniform_rc(const char *action, int rc, const char *stdout_data)
{
const char *stop_pattern[] = { "*stopped*", "*not*running*" };
const char *running_pattern[] = { "*running*", "*OK*" };
char *lower_std_output = NULL;
int result;
if (rc < 0) {
return PCMK_OCF_UNKNOWN_ERROR;
}
/* Treat class heartbeat the same as class lsb. */
if (!safe_str_eq(action, "status") && !safe_str_eq(action, "monitor")) {
return services_get_ocf_exitcode(action, rc);
}
/* for status though, exit code is ignored,
* and the stdout is scanned for specific strings */
if (stdout_data == NULL) {
crm_warn("No status output from the (hb) resource agent, assuming stopped");
return PCMK_OCF_NOT_RUNNING;
}
lower_std_output = g_ascii_strdown(stdout_data, -1);
if (pattern_matched(stop_pattern[0], lower_std_output) ||
pattern_matched(stop_pattern[1], lower_std_output)) {
result = PCMK_OCF_NOT_RUNNING;
} else if (pattern_matched(running_pattern[0], lower_std_output) ||
pattern_matched(running_pattern[1], stdout_data)) {
/* "OK" is matched case sensitive */
result = PCMK_OCF_OK;
} else {
/* It didn't say it was running - must be stopped */
crm_debug("RA output did not match any pattern, assuming stopped");
result = PCMK_OCF_NOT_RUNNING;
}
free(lower_std_output);
return result;
}
#endif
static int
ocf2uniform_rc(int rc)
{
if (rc < 0 || rc > PCMK_OCF_FAILED_MASTER) {
return PCMK_OCF_UNKNOWN_ERROR;
}
return rc;
}
static int
stonith2uniform_rc(const char *action, int rc)
{
if (rc == -ENODEV) {
if (safe_str_eq(action, "stop")) {
rc = PCMK_OCF_OK;
} else if (safe_str_eq(action, "start")) {
rc = PCMK_OCF_NOT_INSTALLED;
} else {
rc = PCMK_OCF_NOT_RUNNING;
}
} else if (rc != 0) {
rc = PCMK_OCF_UNKNOWN_ERROR;
}
return rc;
}
#if SUPPORT_NAGIOS
static int
nagios2uniform_rc(const char *action, int rc)
{
if (rc < 0) {
return PCMK_OCF_UNKNOWN_ERROR;
}
switch (rc) {
case NAGIOS_STATE_OK:
return PCMK_OCF_OK;
case NAGIOS_INSUFFICIENT_PRIV:
return PCMK_OCF_INSUFFICIENT_PRIV;
case NAGIOS_NOT_INSTALLED:
return PCMK_OCF_NOT_INSTALLED;
case NAGIOS_STATE_WARNING:
case NAGIOS_STATE_CRITICAL:
case NAGIOS_STATE_UNKNOWN:
case NAGIOS_STATE_DEPENDENT:
default:
return PCMK_OCF_UNKNOWN_ERROR;
}
return PCMK_OCF_UNKNOWN_ERROR;
}
#endif
static int
get_uniform_rc(const char *standard, const char *action, int rc)
{
if (safe_str_eq(standard, PCMK_RESOURCE_CLASS_OCF)) {
return ocf2uniform_rc(rc);
} else if (safe_str_eq(standard, PCMK_RESOURCE_CLASS_STONITH)) {
return stonith2uniform_rc(action, rc);
} else if (safe_str_eq(standard, PCMK_RESOURCE_CLASS_SYSTEMD)) {
return rc;
} else if (safe_str_eq(standard, PCMK_RESOURCE_CLASS_UPSTART)) {
return rc;
#if SUPPORT_NAGIOS
} else if (safe_str_eq(standard, PCMK_RESOURCE_CLASS_NAGIOS)) {
return nagios2uniform_rc(action, rc);
#endif
} else {
return services_get_ocf_exitcode(action, rc);
}
}
static int
action_get_uniform_rc(svc_action_t * action)
{
lrmd_cmd_t *cmd = action->cb_data;
#if SUPPORT_HEARTBEAT
if (safe_str_eq(action->standard, PCMK_RESOURCE_CLASS_HB)) {
return hb2uniform_rc(cmd->action, action->rc, action->stdout_data);
}
#endif
return get_uniform_rc(action->standard, cmd->action, action->rc);
}
void
notify_of_new_client(crm_client_t *new_client)
{
crm_client_t *client = NULL;
GHashTableIter iter;
xmlNode *notify = NULL;
char *key = NULL;
notify = create_xml_node(NULL, T_LRMD_NOTIFY);
crm_xml_add(notify, F_LRMD_ORIGIN, __FUNCTION__);
crm_xml_add(notify, F_LRMD_OPERATION, LRMD_OP_NEW_CLIENT);
g_hash_table_iter_init(&iter, client_connections);
while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & client)) {
if (safe_str_eq(client->id, new_client->id)) {
continue;
}
send_client_notify((gpointer) key, (gpointer) client, (gpointer) notify);
}
free_xml(notify);
}
static char *
parse_exit_reason(const char *output)
{
const char *cur = NULL;
const char *last = NULL;
char *reason = NULL;
static int cookie_len = 0;
char *eol = NULL;
if (output == NULL) {
return NULL;
}
if (!cookie_len) {
cookie_len = strlen(PCMK_OCF_REASON_PREFIX);
}
cur = strstr(output, PCMK_OCF_REASON_PREFIX);
for (; cur != NULL; cur = strstr(cur, PCMK_OCF_REASON_PREFIX)) {
/* skip over the cookie delimiter string */
cur += cookie_len;
last = cur;
}
if (last == NULL) {
return NULL;
}
/* make our own copy */
reason = calloc(1, (EXIT_REASON_MAX_LEN+1));
CRM_ASSERT(reason);
/* limit reason string size */
strncpy(reason, last, EXIT_REASON_MAX_LEN);
/* truncate everything after a new line */
eol = strchr(reason, '\n');
if (eol != NULL) {
*eol = '\0';
}
return reason;
}
void
client_disconnect_cleanup(const char *client_id)
{
GHashTableIter iter;
lrmd_rsc_t *rsc = NULL;
char *key = NULL;
g_hash_table_iter_init(&iter, rsc_list);
while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & rsc)) {
if (rsc->call_opts & lrmd_opt_drop_recurring) {
/* This client is disconnecting, drop any recurring operations
* it may have initiated on the resource */
cancel_all_recurring(rsc, client_id);
}
}
}
static void
action_complete(svc_action_t * action)
{
lrmd_rsc_t *rsc;
lrmd_cmd_t *cmd = action->cb_data;
const char *rclass = NULL;
bool goagain = false;
if (!cmd) {
crm_err("LRMD action (%s) completed does not match any known operations.", action->id);
return;
}
#ifdef HAVE_SYS_TIMEB_H
if (cmd->exec_rc != action->rc) {
ftime(&cmd->t_rcchange);
}
#endif
cmd->last_pid = action->pid;
cmd->exec_rc = action_get_uniform_rc(action);
cmd->lrmd_op_status = action->status;
rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL;
if (rsc && safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_SERVICE)) {
rclass = resources_find_service_class(rsc->class);
} else if(rsc) {
rclass = rsc->class;
}
if (safe_str_eq(rclass, PCMK_RESOURCE_CLASS_SYSTEMD)) {
if(cmd->exec_rc == PCMK_OCF_OK && safe_str_eq(cmd->action, "start")) {
/* systemd I curse thee!
*
* systemd returns from start actions after the start _begins_
* not after it completes.
*
* So we have to jump through a few hoops so that we don't
* report 'complete' to the rest of pacemaker until, you know,
* it's actually done.
*/
goagain = true;
cmd->real_action = cmd->action;
cmd->action = strdup("monitor");
} else if(cmd->exec_rc == PCMK_OCF_OK && safe_str_eq(cmd->action, "stop")) {
goagain = true;
cmd->real_action = cmd->action;
cmd->action = strdup("monitor");
} else if(cmd->real_action) {
/* Ok, so this is the follow up monitor action to check if start actually completed */
if(cmd->lrmd_op_status == PCMK_LRM_OP_DONE && cmd->exec_rc == PCMK_OCF_PENDING) {
goagain = true;
} else if(cmd->exec_rc == PCMK_OCF_OK && safe_str_eq(cmd->real_action, "stop")) {
goagain = true;
} else {
#ifdef HAVE_SYS_TIMEB_H
int time_sum = time_diff_ms(NULL, &cmd->t_first_run);
int timeout_left = cmd->timeout_orig - time_sum;
crm_debug("%s %s is now complete (elapsed=%dms, remaining=%dms): %s (%d)",
cmd->rsc_id, cmd->real_action, time_sum, timeout_left, services_ocf_exitcode_str(cmd->exec_rc), cmd->exec_rc);
cmd_original_times(cmd);
#endif
if(cmd->lrmd_op_status == PCMK_LRM_OP_DONE && cmd->exec_rc == PCMK_OCF_NOT_RUNNING && safe_str_eq(cmd->real_action, "stop")) {
cmd->exec_rc = PCMK_OCF_OK;
}
}
}
}
#if SUPPORT_NAGIOS
if (rsc && safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_NAGIOS)) {
if (safe_str_eq(cmd->action, "monitor") &&
cmd->interval == 0 && cmd->exec_rc == PCMK_OCF_OK) {
/* Successfully executed --version for the nagios plugin */
cmd->exec_rc = PCMK_OCF_NOT_RUNNING;
} else if (safe_str_eq(cmd->action, "start") && cmd->exec_rc != PCMK_OCF_OK) {
goagain = true;
}
}
#endif
/* Wrapping this section in ifdef implies that systemd resources are not
* fully supported on platforms without sys/timeb.h. Since timeb is
* obsolete, we should eventually prefer a clock_gettime() implementation
* (wrapped in its own ifdef) with timeb as a fallback.
*/
#ifdef HAVE_SYS_TIMEB_H
if(goagain) {
int time_sum = time_diff_ms(NULL, &cmd->t_first_run);
int timeout_left = cmd->timeout_orig - time_sum;
int delay = cmd->timeout_orig / 10;
if(delay >= timeout_left && timeout_left > 20) {
delay = timeout_left/2;
}
delay = QB_MIN(2000, delay);
if (delay < timeout_left) {
cmd->start_delay = delay;
cmd->timeout = timeout_left;
if(cmd->exec_rc == PCMK_OCF_OK) {
crm_debug("%s %s may still be in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)",
cmd->rsc_id, cmd->real_action, time_sum, timeout_left, delay);
} else if(cmd->exec_rc == PCMK_OCF_PENDING) {
crm_info("%s %s is still in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)",
cmd->rsc_id, cmd->action, time_sum, timeout_left, delay);
} else {
crm_notice("%s %s failed '%s' (%d): re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)",
cmd->rsc_id, cmd->action, services_ocf_exitcode_str(cmd->exec_rc), cmd->exec_rc, time_sum, timeout_left, delay);
}
cmd_reset(cmd);
if(rsc) {
rsc->active = NULL;
}
schedule_lrmd_cmd(rsc, cmd);
/* Don't finalize cmd, we're not done with it yet */
return;
} else {
crm_notice("Giving up on %s %s (rc=%d): timeout (elapsed=%dms, remaining=%dms)",
cmd->rsc_id, cmd->real_action?cmd->real_action:cmd->action, cmd->exec_rc, time_sum, timeout_left);
cmd->lrmd_op_status = PCMK_LRM_OP_TIMEOUT;
cmd->exec_rc = PCMK_OCF_TIMEOUT;
cmd_original_times(cmd);
}
}
#endif
if (action->stderr_data) {
cmd->output = strdup(action->stderr_data);
cmd->exit_reason = parse_exit_reason(action->stderr_data);
} else if (action->stdout_data) {
cmd->output = strdup(action->stdout_data);
}
cmd_finalize(cmd, rsc);
}
static void
stonith_action_complete(lrmd_cmd_t * cmd, int rc)
{
int recurring = cmd->interval;
lrmd_rsc_t *rsc = NULL;
cmd->exec_rc = get_uniform_rc(PCMK_RESOURCE_CLASS_STONITH, cmd->action, rc);
rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id);
if (cmd->lrmd_op_status == PCMK_LRM_OP_CANCELLED) {
recurring = 0;
/* do nothing */
} else if (rc == -ENODEV && safe_str_eq(cmd->action, "monitor")) {
/* Not registered == inactive */
cmd->lrmd_op_status = PCMK_LRM_OP_DONE;
cmd->exec_rc = PCMK_OCF_NOT_RUNNING;
} else if (rc) {
/* Attempt to map return codes to op status if possible */
switch (rc) {
case -EPROTONOSUPPORT:
cmd->lrmd_op_status = PCMK_LRM_OP_NOTSUPPORTED;
break;
case -ETIME:
cmd->lrmd_op_status = PCMK_LRM_OP_TIMEOUT;
break;
default:
/* TODO: This looks wrong. Status should be _DONE and exec_rc set to an error */
cmd->lrmd_op_status = PCMK_LRM_OP_ERROR;
}
} else {
/* command successful */
cmd->lrmd_op_status = PCMK_LRM_OP_DONE;
if (safe_str_eq(cmd->action, "start") && rsc) {
rsc->stonith_started = 1;
}
}
if (recurring && rsc) {
if (cmd->stonith_recurring_id) {
g_source_remove(cmd->stonith_recurring_id);
}
cmd->stonith_recurring_id = g_timeout_add(cmd->interval, stonith_recurring_op_helper, cmd);
}
cmd_finalize(cmd, rsc);
}
static void
lrmd_stonith_callback(stonith_t * stonith, stonith_callback_data_t * data)
{
stonith_action_complete(data->userdata, data->rc);
}
void
stonith_connection_failed(void)
{
GHashTableIter iter;
GList *cmd_list = NULL;
GList *cmd_iter = NULL;
lrmd_rsc_t *rsc = NULL;
char *key = NULL;
g_hash_table_iter_init(&iter, rsc_list);
while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & rsc)) {
if (safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH)) {
if (rsc->active) {
cmd_list = g_list_append(cmd_list, rsc->active);
}
if (rsc->recurring_ops) {
cmd_list = g_list_concat(cmd_list, rsc->recurring_ops);
}
if (rsc->pending_ops) {
cmd_list = g_list_concat(cmd_list, rsc->pending_ops);
}
rsc->pending_ops = rsc->recurring_ops = NULL;
}
}
if (!cmd_list) {
return;
}
crm_err("STONITH connection failed, finalizing %d pending operations.",
g_list_length(cmd_list));
for (cmd_iter = cmd_list; cmd_iter; cmd_iter = cmd_iter->next) {
stonith_action_complete(cmd_iter->data, -ENOTCONN);
}
g_list_free(cmd_list);
}
static int
lrmd_rsc_execute_stonith(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
{
int rc = 0;
int do_monitor = 0;
stonith_t *stonith_api = get_stonith_connection();
if (!stonith_api) {
cmd->exec_rc = get_uniform_rc(PCMK_RESOURCE_CLASS_STONITH, cmd->action,
-ENOTCONN);
cmd->lrmd_op_status = PCMK_LRM_OP_ERROR;
cmd_finalize(cmd, rsc);
return -EUNATCH;
}
if (safe_str_eq(cmd->action, "start")) {
char *key = NULL;
char *value = NULL;
stonith_key_value_t *device_params = NULL;
if (cmd->params) {
GHashTableIter iter;
g_hash_table_iter_init(&iter, cmd->params);
while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
device_params = stonith_key_value_add(device_params, key, value);
}
}
/* Stonith automatically registers devices from the IPC when changes occur,
* but to avoid a possible race condition between stonith receiving the IPC update
* and the lrmd requesting that resource, the lrmd still registers the device as well.
* Stonith knows how to handle duplicate device registrations correctly. */
rc = stonith_api->cmds->register_device(stonith_api,
st_opt_sync_call,
cmd->rsc_id,
rsc->provider, rsc->type, device_params);
stonith_key_value_freeall(device_params, 1, 1);
if (rc == 0) {
do_monitor = 1;
}
} else if (safe_str_eq(cmd->action, "stop")) {
rc = stonith_api->cmds->remove_device(stonith_api, st_opt_sync_call, cmd->rsc_id);
rsc->stonith_started = 0;
} else if (safe_str_eq(cmd->action, "monitor")) {
if (cmd->interval) {
do_monitor = 1;
} else {
rc = rsc->stonith_started ? 0 : -ENODEV;
}
}
if (!do_monitor) {
goto cleanup_stonith_exec;
}
rc = stonith_api->cmds->monitor(stonith_api, 0, cmd->rsc_id, cmd->timeout / 1000);
rc = stonith_api->cmds->register_callback(stonith_api,
rc,
0,
0,
cmd, "lrmd_stonith_callback", lrmd_stonith_callback);
/* don't cleanup yet, we will find out the result of the monitor later */
if (rc > 0) {
rsc->active = cmd;
return rc;
} else if (rc == 0) {
rc = -1;
}
cleanup_stonith_exec:
stonith_action_complete(cmd, rc);
return rc;
}
static int
lrmd_rsc_execute_service_lib(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
{
svc_action_t *action = NULL;
GHashTable *params_copy = NULL;
CRM_ASSERT(rsc);
CRM_ASSERT(cmd);
crm_trace("Creating action, resource:%s action:%s class:%s provider:%s agent:%s",
rsc->rsc_id, cmd->action, rsc->class, rsc->provider, rsc->type);
#if SUPPORT_NAGIOS
/* Recurring operations are cancelled anyway for a stop operation */
if (safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_NAGIOS)
&& safe_str_eq(cmd->action, "stop")) {
cmd->exec_rc = PCMK_OCF_OK;
goto exec_done;
}
#endif
params_copy = crm_str_table_dup(cmd->params);
if (cmd->isolation_wrapper) {
g_hash_table_remove(params_copy, "CRM_meta_isolation_wrapper");
action = resources_action_create(rsc->rsc_id,
PCMK_RESOURCE_CLASS_OCF,
LRMD_ISOLATION_PROVIDER,
cmd->isolation_wrapper,
cmd->action, /*action will be normalized in wrapper*/
cmd->interval,
cmd->timeout,
params_copy,
cmd->service_flags);
} else {
action = resources_action_create(rsc->rsc_id,
rsc->class,
rsc->provider,
rsc->type,
normalize_action_name(rsc, cmd->action),
cmd->interval,
cmd->timeout,
params_copy,
cmd->service_flags);
}
if (!action) {
crm_err("Failed to create action, action:%s on resource %s", cmd->action, rsc->rsc_id);
cmd->lrmd_op_status = PCMK_LRM_OP_ERROR;
goto exec_done;
}
action->cb_data = cmd;
/* 'cmd' may not be valid after this point if
* services_action_async() returned TRUE
*
* Upstart and systemd both synchronously determine monitor/status
* results and call action_complete (which may free 'cmd') if necessary.
*/
if (services_action_async(action, action_complete)) {
return TRUE;
}
cmd->exec_rc = action->rc;
if(action->status != PCMK_LRM_OP_DONE) {
cmd->lrmd_op_status = action->status;
} else {
cmd->lrmd_op_status = PCMK_LRM_OP_ERROR;
}
services_action_free(action);
action = NULL;
exec_done:
cmd_finalize(cmd, rsc);
return TRUE;
}
static gboolean
lrmd_rsc_execute(lrmd_rsc_t * rsc)
{
lrmd_cmd_t *cmd = NULL;
CRM_CHECK(rsc != NULL, return FALSE);
if (rsc->active) {
crm_trace("%s is still active", rsc->rsc_id);
return TRUE;
}
if (rsc->pending_ops) {
GList *first = rsc->pending_ops;
cmd = first->data;
if (cmd->delay_id) {
crm_trace
("Command %s %s was asked to run too early, waiting for start_delay timeout of %dms",
cmd->rsc_id, cmd->action, cmd->start_delay);
return TRUE;
}
rsc->pending_ops = g_list_remove_link(rsc->pending_ops, first);
g_list_free_1(first);
#ifdef HAVE_SYS_TIMEB_H
if (cmd->t_first_run.time == 0) {
ftime(&cmd->t_first_run);
}
ftime(&cmd->t_run);
#endif
}
if (!cmd) {
crm_trace("Nothing further to do for %s", rsc->rsc_id);
return TRUE;
}
rsc->active = cmd; /* only one op at a time for a rsc */
if (cmd->interval) {
rsc->recurring_ops = g_list_append(rsc->recurring_ops, cmd);
}
log_execute(cmd);
if (safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH)) {
lrmd_rsc_execute_stonith(rsc, cmd);
} else {
lrmd_rsc_execute_service_lib(rsc, cmd);
}
return TRUE;
}
static gboolean
lrmd_rsc_dispatch(gpointer user_data)
{
return lrmd_rsc_execute(user_data);
}
void
free_rsc(gpointer data)
{
GListPtr gIter = NULL;
lrmd_rsc_t *rsc = data;
int is_stonith = safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH);
gIter = rsc->pending_ops;
while (gIter != NULL) {
GListPtr next = gIter->next;
lrmd_cmd_t *cmd = gIter->data;
/* command was never executed */
cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED;
cmd_finalize(cmd, NULL);
gIter = next;
}
/* frees list, but not list elements. */
g_list_free(rsc->pending_ops);
gIter = rsc->recurring_ops;
while (gIter != NULL) {
GListPtr next = gIter->next;
lrmd_cmd_t *cmd = gIter->data;
if (is_stonith) {
cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED;
/* If a stonith command is in-flight, just mark it as cancelled;
* it is not safe to finalize/free the cmd until the stonith api
* says it has either completed or timed out.
*/
if (rsc->active != cmd) {
cmd_finalize(cmd, NULL);
}
} else {
/* This command is already handed off to service library,
* let service library cancel it and tell us via the callback
* when it is cancelled. The rsc can be safely destroyed
* even if we are waiting for the cancel result */
services_action_cancel(rsc->rsc_id, normalize_action_name(rsc, cmd->action), cmd->interval);
}
gIter = next;
}
/* frees list, but not list elements. */
g_list_free(rsc->recurring_ops);
free(rsc->rsc_id);
free(rsc->class);
free(rsc->provider);
free(rsc->type);
mainloop_destroy_trigger(rsc->work);
free(rsc);
}
static int
process_lrmd_signon(crm_client_t * client, uint32_t id, xmlNode * request)
{
xmlNode *reply = create_xml_node(NULL, "reply");
const char *is_ipc_provider = crm_element_value(request, F_LRMD_IS_IPC_PROVIDER);
const char *protocol_version = crm_element_value(request, F_LRMD_PROTOCOL_VERSION);
- if (compare_version(protocol_version, LRMD_PROTOCOL_VERSION) < 0) {
+ if (compare_version(protocol_version, LRMD_MIN_PROTOCOL_VERSION) < 0) {
crm_err("Cluster API version must be greater than or equal to %s, not %s",
- LRMD_PROTOCOL_VERSION, protocol_version);
+ LRMD_MIN_PROTOCOL_VERSION, protocol_version);
crm_xml_add_int(reply, F_LRMD_RC, -EPROTO);
- crm_xml_add(reply, F_LRMD_PROTOCOL_VERSION, LRMD_PROTOCOL_VERSION);
}
crm_xml_add(reply, F_LRMD_OPERATION, CRM_OP_REGISTER);
crm_xml_add(reply, F_LRMD_CLIENTID, client->id);
crm_xml_add(reply, F_LRMD_PROTOCOL_VERSION, LRMD_PROTOCOL_VERSION);
lrmd_server_send_reply(client, id, reply);
if (crm_is_true(is_ipc_provider)) {
/* this is a remote connection from a cluster nodes crmd */
#ifdef SUPPORT_REMOTE
ipc_proxy_add_provider(client);
#endif
}
free_xml(reply);
return pcmk_ok;
}
static int
process_lrmd_rsc_register(crm_client_t * client, uint32_t id, xmlNode * request)
{
int rc = pcmk_ok;
lrmd_rsc_t *rsc = build_rsc_from_xml(request);
lrmd_rsc_t *dup = g_hash_table_lookup(rsc_list, rsc->rsc_id);
if (dup &&
safe_str_eq(rsc->class, dup->class) &&
safe_str_eq(rsc->provider, dup->provider) && safe_str_eq(rsc->type, dup->type)) {
crm_warn("Can't add, RSC '%s' already present in the rsc list (%d active resources)",
rsc->rsc_id, g_hash_table_size(rsc_list));
free_rsc(rsc);
return rc;
}
g_hash_table_replace(rsc_list, rsc->rsc_id, rsc);
crm_info("Added '%s' to the rsc list (%d active resources)",
rsc->rsc_id, g_hash_table_size(rsc_list));
return rc;
}
static void
process_lrmd_get_rsc_info(crm_client_t * client, uint32_t id, xmlNode * request)
{
int rc = pcmk_ok;
int send_rc = 0;
int call_id = 0;
xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
xmlNode *reply = NULL;
lrmd_rsc_t *rsc = NULL;
crm_element_value_int(request, F_LRMD_CALLID, &call_id);
if (!rsc_id) {
rc = -ENODEV;
goto get_rsc_done;
}
if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) {
crm_info("Resource '%s' not found (%d active resources)",
rsc_id, g_hash_table_size(rsc_list));
rc = -ENODEV;
goto get_rsc_done;
}
get_rsc_done:
reply = create_xml_node(NULL, T_LRMD_REPLY);
crm_xml_add(reply, F_LRMD_ORIGIN, __FUNCTION__);
crm_xml_add_int(reply, F_LRMD_RC, rc);
crm_xml_add_int(reply, F_LRMD_CALLID, call_id);
if (rsc) {
crm_xml_add(reply, F_LRMD_RSC_ID, rsc->rsc_id);
crm_xml_add(reply, F_LRMD_CLASS, rsc->class);
crm_xml_add(reply, F_LRMD_PROVIDER, rsc->provider);
crm_xml_add(reply, F_LRMD_TYPE, rsc->type);
}
send_rc = lrmd_server_send_reply(client, id, reply);
if (send_rc < 0) {
crm_warn("LRMD reply to %s failed: %d", client->name, send_rc);
}
free_xml(reply);
}
static int
process_lrmd_rsc_unregister(crm_client_t * client, uint32_t id, xmlNode * request)
{
int rc = pcmk_ok;
lrmd_rsc_t *rsc = NULL;
xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
if (!rsc_id) {
return -ENODEV;
}
if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) {
crm_info("Resource '%s' not found (%d active resources)",
rsc_id, g_hash_table_size(rsc_list));
return pcmk_ok;
}
if (rsc->active) {
/* let the caller know there are still active ops on this rsc to watch for */
crm_trace("Operation still in progress: %p", rsc->active);
rc = -EINPROGRESS;
}
g_hash_table_remove(rsc_list, rsc_id);
return rc;
}
static int
process_lrmd_rsc_exec(crm_client_t * client, uint32_t id, xmlNode * request)
{
lrmd_rsc_t *rsc = NULL;
lrmd_cmd_t *cmd = NULL;
xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
int call_id;
if (!rsc_id) {
return -EINVAL;
}
if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) {
crm_info("Resource '%s' not found (%d active resources)",
rsc_id, g_hash_table_size(rsc_list));
return -ENODEV;
}
cmd = create_lrmd_cmd(request, client, rsc);
call_id = cmd->call_id;
/* Don't reference cmd after handing it off to be scheduled.
* The cmd could get merged and freed. */
schedule_lrmd_cmd(rsc, cmd);
return call_id;
}
static int
cancel_op(const char *rsc_id, const char *action, int interval)
{
GListPtr gIter = NULL;
lrmd_rsc_t *rsc = g_hash_table_lookup(rsc_list, rsc_id);
/* How to cancel an action.
* 1. Check pending ops list, if it hasn't been handed off
* to the service library or stonith recurring list remove
* it there and that will stop it.
* 2. If it isn't in the pending ops list, then it's either a
* recurring op in the stonith recurring list, or the service
* library's recurring list. Stop it there
* 3. If not found in any lists, then this operation has either
* been executed already and is not a recurring operation, or
* never existed.
*/
if (!rsc) {
return -ENODEV;
}
for (gIter = rsc->pending_ops; gIter != NULL; gIter = gIter->next) {
lrmd_cmd_t *cmd = gIter->data;
if (safe_str_eq(cmd->action, action) && cmd->interval == interval) {
cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED;
cmd_finalize(cmd, rsc);
return pcmk_ok;
}
}
if (safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH)) {
/* The service library does not handle stonith operations.
* We have to handle recurring stonith operations ourselves. */
for (gIter = rsc->recurring_ops; gIter != NULL; gIter = gIter->next) {
lrmd_cmd_t *cmd = gIter->data;
if (safe_str_eq(cmd->action, action) && cmd->interval == interval) {
cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED;
if (rsc->active != cmd) {
cmd_finalize(cmd, rsc);
}
return pcmk_ok;
}
}
} else if (services_action_cancel(rsc_id, normalize_action_name(rsc, action), interval) == TRUE) {
/* The service library will tell the action_complete callback function
* this action was cancelled, which will destroy the cmd and remove
* it from the recurring_op list. Do not do that in this function
* if the service library says it cancelled it. */
return pcmk_ok;
}
return -EOPNOTSUPP;
}
static void
cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id)
{
GList *cmd_list = NULL;
GList *cmd_iter = NULL;
/* Notice a copy of each list is created when concat is called.
* This prevents odd behavior from occurring when the cmd_list
* is iterated through later on. It is possible the cancel_op
* function may end up modifying the recurring_ops and pending_ops
* lists. If we did not copy those lists, our cmd_list iteration
* could get messed up.*/
if (rsc->recurring_ops) {
cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->recurring_ops));
}
if (rsc->pending_ops) {
cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->pending_ops));
}
if (!cmd_list) {
return;
}
for (cmd_iter = cmd_list; cmd_iter; cmd_iter = cmd_iter->next) {
lrmd_cmd_t *cmd = cmd_iter->data;
if (cmd->interval == 0) {
continue;
}
if (client_id && safe_str_neq(cmd->client_id, client_id)) {
continue;
}
cancel_op(rsc->rsc_id, cmd->action, cmd->interval);
}
/* frees only the copied list data, not the cmds */
g_list_free(cmd_list);
}
static int
process_lrmd_rsc_cancel(crm_client_t * client, uint32_t id, xmlNode * request)
{
xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
const char *action = crm_element_value(rsc_xml, F_LRMD_RSC_ACTION);
int interval = 0;
crm_element_value_int(rsc_xml, F_LRMD_RSC_INTERVAL, &interval);
if (!rsc_id || !action) {
return -EINVAL;
}
return cancel_op(rsc_id, action, interval);
}
void
process_lrmd_message(crm_client_t * client, uint32_t id, xmlNode * request)
{
int rc = pcmk_ok;
int call_id = 0;
const char *op = crm_element_value(request, F_LRMD_OPERATION);
int do_reply = 0;
int do_notify = 0;
crm_trace("Processing %s operation from %s", op, client->id);
crm_element_value_int(request, F_LRMD_CALLID, &call_id);
if (crm_str_eq(op, CRM_OP_IPC_FWD, TRUE)) {
#ifdef SUPPORT_REMOTE
ipc_proxy_forward_client(client, request);
#endif
do_reply = 1;
} else if (crm_str_eq(op, CRM_OP_REGISTER, TRUE)) {
rc = process_lrmd_signon(client, id, request);
} else if (crm_str_eq(op, LRMD_OP_RSC_REG, TRUE)) {
rc = process_lrmd_rsc_register(client, id, request);
do_notify = 1;
do_reply = 1;
} else if (crm_str_eq(op, LRMD_OP_RSC_INFO, TRUE)) {
process_lrmd_get_rsc_info(client, id, request);
} else if (crm_str_eq(op, LRMD_OP_RSC_UNREG, TRUE)) {
rc = process_lrmd_rsc_unregister(client, id, request);
/* don't notify anyone about failed un-registers */
if (rc == pcmk_ok || rc == -EINPROGRESS) {
do_notify = 1;
}
do_reply = 1;
} else if (crm_str_eq(op, LRMD_OP_RSC_EXEC, TRUE)) {
rc = process_lrmd_rsc_exec(client, id, request);
do_reply = 1;
} else if (crm_str_eq(op, LRMD_OP_RSC_CANCEL, TRUE)) {
rc = process_lrmd_rsc_cancel(client, id, request);
do_reply = 1;
} else if (crm_str_eq(op, LRMD_OP_POKE, TRUE)) {
do_notify = 1;
do_reply = 1;
} else if (crm_str_eq(op, LRMD_OP_CHECK, TRUE)) {
xmlNode *data = get_message_xml(request, F_LRMD_CALLDATA);
const char *timeout = crm_element_value(data, F_LRMD_WATCHDOG);
CRM_LOG_ASSERT(data != NULL);
check_sbd_timeout(timeout);
} else if (crm_str_eq(op, LRMD_OP_ALERT_EXEC, TRUE)) {
rc = process_lrmd_alert_exec(client, id, request);
do_reply = 1;
} else {
rc = -EOPNOTSUPP;
do_reply = 1;
crm_err("Unknown %s from %s", op, client->name);
crm_log_xml_warn(request, "UnknownOp");
}
crm_debug("Processed %s operation from %s: rc=%d, reply=%d, notify=%d",
op, client->id, rc, do_reply, do_notify);
if (do_reply) {
send_reply(client, rc, id, call_id);
}
if (do_notify) {
send_generic_notify(rc, request);
}
}
diff --git a/pengine/allocate.c b/pengine/allocate.c
index 62b141a0a9..e0f1ffd610 100644
--- a/pengine/allocate.c
+++ b/pengine/allocate.c
@@ -1,2555 +1,2552 @@
/*
* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <crm/crm.h>
#include <crm/cib.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <glib.h>
#include <crm/pengine/status.h>
#include <pengine.h>
#include <allocate.h>
#include <utils.h>
CRM_TRACE_INIT_DATA(pe_allocate);
void set_alloc_actions(pe_working_set_t * data_set);
-void migrate_reload_madness(pe_working_set_t * data_set);
extern void ReloadRsc(resource_t * rsc, node_t *node, pe_working_set_t * data_set);
extern gboolean DeleteRsc(resource_t * rsc, node_t * node, gboolean optional, pe_working_set_t * data_set);
static void apply_remote_node_ordering(pe_working_set_t *data_set);
static enum remote_connection_state get_remote_node_state(pe_node_t *node);
-enum remote_connection_state
-{
+
+enum remote_connection_state {
remote_state_unknown = 0,
remote_state_alive = 1,
remote_state_resting = 2,
remote_state_failed = 3,
remote_state_stopped = 4
};
-
resource_alloc_functions_t resource_class_alloc_functions[] = {
{
native_merge_weights,
native_color,
native_create_actions,
native_create_probe,
native_internal_constraints,
native_rsc_colocation_lh,
native_rsc_colocation_rh,
native_rsc_location,
native_action_flags,
native_update_actions,
native_expand,
native_append_meta,
},
{
group_merge_weights,
group_color,
group_create_actions,
native_create_probe,
group_internal_constraints,
group_rsc_colocation_lh,
group_rsc_colocation_rh,
group_rsc_location,
group_action_flags,
group_update_actions,
group_expand,
group_append_meta,
},
{
clone_merge_weights,
clone_color,
clone_create_actions,
clone_create_probe,
clone_internal_constraints,
clone_rsc_colocation_lh,
clone_rsc_colocation_rh,
clone_rsc_location,
clone_action_flags,
container_update_actions,
clone_expand,
clone_append_meta,
},
{
master_merge_weights,
master_color,
master_create_actions,
clone_create_probe,
master_internal_constraints,
clone_rsc_colocation_lh,
master_rsc_colocation_rh,
clone_rsc_location,
clone_action_flags,
container_update_actions,
clone_expand,
master_append_meta,
},
{
container_merge_weights,
container_color,
container_create_actions,
container_create_probe,
container_internal_constraints,
container_rsc_colocation_lh,
container_rsc_colocation_rh,
container_rsc_location,
container_action_flags,
container_update_actions,
container_expand,
container_append_meta,
}
};
gboolean
update_action_flags(action_t * action, enum pe_action_flags flags, const char *source, int line)
{
static unsigned long calls = 0;
gboolean changed = FALSE;
gboolean clear = is_set(flags, pe_action_clear);
enum pe_action_flags last = action->flags;
if (clear) {
action->flags = crm_clear_bit(source, line, action->uuid, action->flags, flags);
} else {
action->flags = crm_set_bit(source, line, action->uuid, action->flags, flags);
}
if (last != action->flags) {
calls++;
changed = TRUE;
/* Useful for tracking down _who_ changed a specific flag */
/* CRM_ASSERT(calls != 534); */
clear_bit(flags, pe_action_clear);
crm_trace("%s on %s: %sset flags 0x%.6x (was 0x%.6x, now 0x%.6x, %lu, %s)",
action->uuid, action->node ? action->node->details->uname : "[none]",
clear ? "un-" : "", flags, last, action->flags, calls, source);
}
return changed;
}
static gboolean
check_rsc_parameters(resource_t * rsc, node_t * node, xmlNode * rsc_entry,
gboolean active_here, pe_working_set_t * data_set)
{
int attr_lpc = 0;
gboolean force_restart = FALSE;
gboolean delete_resource = FALSE;
gboolean changed = FALSE;
const char *value = NULL;
const char *old_value = NULL;
const char *attr_list[] = {
XML_ATTR_TYPE,
XML_AGENT_ATTR_CLASS,
XML_AGENT_ATTR_PROVIDER
};
for (; attr_lpc < DIMOF(attr_list); attr_lpc++) {
value = crm_element_value(rsc->xml, attr_list[attr_lpc]);
old_value = crm_element_value(rsc_entry, attr_list[attr_lpc]);
if (value == old_value /* i.e. NULL */
|| crm_str_eq(value, old_value, TRUE)) {
continue;
}
changed = TRUE;
trigger_unfencing(rsc, node, "Device definition changed", NULL, data_set);
if (active_here) {
force_restart = TRUE;
crm_notice("Forcing restart of %s on %s, %s changed: %s -> %s",
rsc->id, node->details->uname, attr_list[attr_lpc],
crm_str(old_value), crm_str(value));
}
}
if (force_restart) {
/* make sure the restart happens */
stop_action(rsc, node, FALSE);
set_bit(rsc->flags, pe_rsc_start_pending);
delete_resource = TRUE;
} else if (changed) {
delete_resource = TRUE;
}
return delete_resource;
}
static void
CancelXmlOp(resource_t * rsc, xmlNode * xml_op, node_t * active_node,
const char *reason, pe_working_set_t * data_set)
{
int interval = 0;
action_t *cancel = NULL;
char *key = NULL;
const char *task = NULL;
const char *call_id = NULL;
const char *interval_s = NULL;
CRM_CHECK(xml_op != NULL, return);
CRM_CHECK(active_node != NULL, return);
task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
call_id = crm_element_value(xml_op, XML_LRM_ATTR_CALLID);
interval_s = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL);
interval = crm_parse_int(interval_s, "0");
/* we need to reconstruct the key because of the way we used to construct resource IDs */
key = generate_op_key(rsc->id, task, interval);
crm_info("Action %s on %s will be stopped: %s",
key, active_node->details->uname, reason ? reason : "unknown");
/* TODO: This looks highly dangerous if we ever try to schedule 'key' too */
cancel = custom_action(rsc, strdup(key), RSC_CANCEL, active_node, FALSE, TRUE, data_set);
free(cancel->task);
free(cancel->cancel_task);
cancel->task = strdup(RSC_CANCEL);
cancel->cancel_task = strdup(task);
add_hash_param(cancel->meta, XML_LRM_ATTR_TASK, task);
add_hash_param(cancel->meta, XML_LRM_ATTR_CALLID, call_id);
add_hash_param(cancel->meta, XML_LRM_ATTR_INTERVAL, interval_s);
custom_action_order(rsc, stop_key(rsc), NULL, rsc, NULL, cancel, pe_order_optional, data_set);
free(key);
key = NULL;
}
static gboolean
check_action_definition(resource_t * rsc, node_t * active_node, xmlNode * xml_op,
pe_working_set_t * data_set)
{
char *key = NULL;
int interval = 0;
const char *interval_s = NULL;
const op_digest_cache_t *digest_data = NULL;
gboolean did_change = FALSE;
const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
const char *op_version;
const char *digest_secure = NULL;
CRM_CHECK(active_node != NULL, return FALSE);
if (safe_str_eq(task, RSC_STOP)) {
return FALSE;
}
interval_s = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL);
interval = crm_parse_int(interval_s, "0");
if (interval > 0) {
xmlNode *op_match = NULL;
/* we need to reconstruct the key because of the way we used to construct resource IDs */
key = generate_op_key(rsc->id, task, interval);
pe_rsc_trace(rsc, "Checking parameters for %s", key);
op_match = find_rsc_op_entry(rsc, key);
if (op_match == NULL && is_set(data_set->flags, pe_flag_stop_action_orphans)) {
CancelXmlOp(rsc, xml_op, active_node, "orphan", data_set);
free(key);
return TRUE;
} else if (op_match == NULL) {
pe_rsc_debug(rsc, "Orphan action detected: %s on %s", key, active_node->details->uname);
free(key);
return TRUE;
}
free(key);
key = NULL;
}
crm_trace("Testing %s_%s_%d on %s",
rsc->id, task, interval, active_node->details->uname);
if (interval == 0 && safe_str_eq(task, RSC_STATUS)) {
/* Reload based on the start action not a probe */
task = RSC_START;
} else if (interval == 0 && safe_str_eq(task, RSC_MIGRATED)) {
/* Reload based on the start action not a migrate */
task = RSC_START;
} else if (interval == 0 && safe_str_eq(task, RSC_PROMOTE)) {
/* Reload based on the start action not a promote */
task = RSC_START;
}
op_version = crm_element_value(xml_op, XML_ATTR_CRM_VERSION);
digest_data = rsc_action_digest_cmp(rsc, xml_op, active_node, data_set);
if(is_set(data_set->flags, pe_flag_sanitized)) {
digest_secure = crm_element_value(xml_op, XML_LRM_ATTR_SECURE_DIGEST);
}
if(digest_data->rc != RSC_DIGEST_MATCH
&& digest_secure
&& digest_data->digest_secure_calc
&& strcmp(digest_data->digest_secure_calc, digest_secure) == 0) {
if (is_set(data_set->flags, pe_flag_sanitized)) {
printf("Only 'private' parameters to %s_%s_%d on %s changed: %s\n",
rsc->id, task, interval, active_node->details->uname,
crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC));
}
} else if (digest_data->rc == RSC_DIGEST_RESTART) {
/* Changes that force a restart */
const char *digest_restart = crm_element_value(xml_op, XML_LRM_ATTR_RESTART_DIGEST);
did_change = TRUE;
key = generate_op_key(rsc->id, task, interval);
crm_log_xml_info(digest_data->params_restart, "params:restart");
pe_rsc_info(rsc, "Parameters to %s on %s changed: was %s vs. now %s (restart:%s) %s",
key, active_node->details->uname,
crm_str(digest_restart), digest_data->digest_restart_calc,
op_version, crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC));
custom_action(rsc, key, task, NULL, FALSE, TRUE, data_set);
trigger_unfencing(rsc, active_node, "Device parameters changed", NULL, data_set);
} else if ((digest_data->rc == RSC_DIGEST_ALL) || (digest_data->rc == RSC_DIGEST_UNKNOWN)) {
/* Changes that can potentially be handled by a reload */
const char *digest_restart = crm_element_value(xml_op, XML_LRM_ATTR_RESTART_DIGEST);
const char *digest_all = crm_element_value(xml_op, XML_LRM_ATTR_OP_DIGEST);
did_change = TRUE;
trigger_unfencing(rsc, active_node, "Device parameters changed (reload)", NULL, data_set);
crm_log_xml_info(digest_data->params_all, "params:reload");
key = generate_op_key(rsc->id, task, interval);
pe_rsc_info(rsc, "Parameters to %s on %s changed: was %s vs. now %s (reload:%s) %s",
key, active_node->details->uname,
crm_str(digest_all), digest_data->digest_all_calc, op_version,
crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC));
if (interval > 0) {
action_t *op = NULL;
#if 0
/* Always reload/restart the entire resource */
ReloadRsc(rsc, active_node, data_set);
#else
/* Re-sending the recurring op is sufficient - the old one will be cancelled automatically */
op = custom_action(rsc, key, task, active_node, TRUE, TRUE, data_set);
set_bit(op->flags, pe_action_reschedule);
#endif
} else if (digest_restart && rsc->isolation_wrapper == NULL && (uber_parent(rsc))->isolation_wrapper == NULL) {
pe_rsc_trace(rsc, "Reloading '%s' action for resource %s", task, rsc->id);
/* Reload this resource */
ReloadRsc(rsc, active_node, data_set);
free(key);
} else {
pe_rsc_trace(rsc, "Resource %s doesn't know how to reload", rsc->id);
/* Re-send the start/demote/promote op
* Recurring ops will be detected independently
*/
custom_action(rsc, key, task, NULL, FALSE, TRUE, data_set);
}
}
return did_change;
}
static void
check_actions_for(xmlNode * rsc_entry, resource_t * rsc, node_t * node, pe_working_set_t * data_set)
{
GListPtr gIter = NULL;
int offset = -1;
int interval = 0;
int stop_index = 0;
int start_index = 0;
const char *task = NULL;
const char *interval_s = NULL;
xmlNode *rsc_op = NULL;
GListPtr op_list = NULL;
GListPtr sorted_op_list = NULL;
gboolean is_probe = FALSE;
gboolean did_change = FALSE;
CRM_CHECK(node != NULL, return);
if (is_set(rsc->flags, pe_rsc_orphan)) {
resource_t *parent = uber_parent(rsc);
if(parent == NULL
|| pe_rsc_is_clone(parent) == FALSE
|| is_set(parent->flags, pe_rsc_unique)) {
pe_rsc_trace(rsc, "Skipping param check for %s and deleting: orphan", rsc->id);
DeleteRsc(rsc, node, FALSE, data_set);
} else {
pe_rsc_trace(rsc, "Skipping param check for %s (orphan clone)", rsc->id);
}
return;
} else if (pe_find_node_id(rsc->running_on, node->details->id) == NULL) {
if (check_rsc_parameters(rsc, node, rsc_entry, FALSE, data_set)) {
DeleteRsc(rsc, node, FALSE, data_set);
}
pe_rsc_trace(rsc, "Skipping param check for %s: no longer active on %s",
rsc->id, node->details->uname);
return;
}
pe_rsc_trace(rsc, "Processing %s on %s", rsc->id, node->details->uname);
if (check_rsc_parameters(rsc, node, rsc_entry, TRUE, data_set)) {
DeleteRsc(rsc, node, FALSE, data_set);
}
for (rsc_op = __xml_first_child(rsc_entry); rsc_op != NULL; rsc_op = __xml_next_element(rsc_op)) {
if (crm_str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, TRUE)) {
op_list = g_list_prepend(op_list, rsc_op);
}
}
sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
calculate_active_ops(sorted_op_list, &start_index, &stop_index);
for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
xmlNode *rsc_op = (xmlNode *) gIter->data;
offset++;
if (start_index < stop_index) {
/* stopped */
continue;
} else if (offset < start_index) {
/* action occurred prior to a start */
continue;
}
is_probe = FALSE;
did_change = FALSE;
task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
interval_s = crm_element_value(rsc_op, XML_LRM_ATTR_INTERVAL);
interval = crm_parse_int(interval_s, "0");
if (interval == 0 && safe_str_eq(task, RSC_STATUS)) {
is_probe = TRUE;
}
if (interval > 0 &&
(is_set(rsc->flags, pe_rsc_maintenance) || node->details->maintenance)) {
CancelXmlOp(rsc, rsc_op, node, "maintenance mode", data_set);
} else if (is_probe || safe_str_eq(task, RSC_START) || safe_str_eq(task, RSC_PROMOTE) || interval > 0
|| safe_str_eq(task, RSC_MIGRATED)) {
did_change = check_action_definition(rsc, node, rsc_op, data_set);
}
if (did_change && get_failcount(node, rsc, NULL, data_set)) {
char *key = NULL;
action_t *action_clear = NULL;
key = generate_op_key(rsc->id, CRM_OP_CLEAR_FAILCOUNT, 0);
action_clear =
custom_action(rsc, key, CRM_OP_CLEAR_FAILCOUNT, node, FALSE, TRUE, data_set);
set_bit(action_clear->flags, pe_action_runnable);
crm_notice("Clearing failure of %s on %s "
"because action definition changed " CRM_XS " %s",
rsc->id, node->details->uname, action_clear->uuid);
}
}
g_list_free(sorted_op_list);
}
static GListPtr
find_rsc_list(GListPtr result, resource_t * rsc, const char *id, gboolean renamed_clones,
gboolean partial, pe_working_set_t * data_set)
{
GListPtr gIter = NULL;
gboolean match = FALSE;
if (id == NULL) {
return NULL;
} else if (rsc == NULL && data_set) {
for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
resource_t *child = (resource_t *) gIter->data;
result = find_rsc_list(result, child, id, renamed_clones, partial, NULL);
}
return result;
} else if (rsc == NULL) {
return NULL;
}
if (partial) {
if (strstr(rsc->id, id)) {
match = TRUE;
} else if (renamed_clones && rsc->clone_name && strstr(rsc->clone_name, id)) {
match = TRUE;
}
} else {
if (strcmp(rsc->id, id) == 0) {
match = TRUE;
} else if (renamed_clones && rsc->clone_name && strcmp(rsc->clone_name, id) == 0) {
match = TRUE;
}
}
if (match) {
result = g_list_prepend(result, rsc);
}
if (rsc->children) {
gIter = rsc->children;
for (; gIter != NULL; gIter = gIter->next) {
resource_t *child = (resource_t *) gIter->data;
result = find_rsc_list(result, child, id, renamed_clones, partial, NULL);
}
}
return result;
}
static void
check_actions(pe_working_set_t * data_set)
{
const char *id = NULL;
node_t *node = NULL;
xmlNode *lrm_rscs = NULL;
xmlNode *status = get_object_root(XML_CIB_TAG_STATUS, data_set->input);
xmlNode *node_state = NULL;
for (node_state = __xml_first_child(status); node_state != NULL;
node_state = __xml_next_element(node_state)) {
if (crm_str_eq((const char *)node_state->name, XML_CIB_TAG_STATE, TRUE)) {
id = crm_element_value(node_state, XML_ATTR_ID);
lrm_rscs = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE);
lrm_rscs = find_xml_node(lrm_rscs, XML_LRM_TAG_RESOURCES, FALSE);
node = pe_find_node_id(data_set->nodes, id);
if (node == NULL) {
continue;
/* Still need to check actions for a maintenance node to cancel existing monitor operations */
} else if (can_run_resources(node) == FALSE && node->details->maintenance == FALSE) {
crm_trace("Skipping param check for %s: can't run resources",
node->details->uname);
continue;
}
crm_trace("Processing node %s", node->details->uname);
if (node->details->online || is_set(data_set->flags, pe_flag_stonith_enabled)) {
xmlNode *rsc_entry = NULL;
for (rsc_entry = __xml_first_child(lrm_rscs); rsc_entry != NULL;
rsc_entry = __xml_next_element(rsc_entry)) {
if (crm_str_eq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE, TRUE)) {
if (xml_has_children(rsc_entry)) {
GListPtr gIter = NULL;
GListPtr result = NULL;
const char *rsc_id = ID(rsc_entry);
CRM_CHECK(rsc_id != NULL, return);
result = find_rsc_list(NULL, NULL, rsc_id, TRUE, FALSE, data_set);
for (gIter = result; gIter != NULL; gIter = gIter->next) {
resource_t *rsc = (resource_t *) gIter->data;
if (rsc->variant != pe_native) {
continue;
}
check_actions_for(rsc_entry, rsc, node, data_set);
}
g_list_free(result);
}
}
}
}
}
}
}
static gboolean
apply_placement_constraints(pe_working_set_t * data_set)
{
GListPtr gIter = NULL;
crm_trace("Applying constraints...");
for (gIter = data_set->placement_constraints; gIter != NULL; gIter = gIter->next) {
rsc_to_node_t *cons = (rsc_to_node_t *) gIter->data;
cons->rsc_lh->cmds->rsc_location(cons->rsc_lh, cons);
}
return TRUE;
}
static gboolean
failcount_clear_action_exists(node_t * node, resource_t * rsc)
{
gboolean rc = FALSE;
char *key = generate_op_key(rsc->id, CRM_OP_CLEAR_FAILCOUNT, 0);
GListPtr list = find_actions_exact(rsc->actions, key, node);
if (list) {
rc = TRUE;
}
g_list_free(list);
free(key);
return rc;
}
/*!
* \internal
* \brief Force resource away if failures hit migration threshold
*
* \param[in,out] rsc Resource to check for failures
* \param[in,out] node Node to check for failures
* \param[in,out] data_set Cluster working set to update
*/
static void
check_migration_threshold(resource_t *rsc, node_t *node,
pe_working_set_t *data_set)
{
int fail_count, countdown;
resource_t *failed;
/* Migration threshold of 0 means never force away */
if (rsc->migration_threshold == 0) {
return;
}
/* If there are no failures, there's no need to force away */
fail_count = get_failcount_all(node, rsc, NULL, data_set);
if (fail_count <= 0) {
return;
}
/* How many more times recovery will be tried on this node */
countdown = QB_MAX(rsc->migration_threshold - fail_count, 0);
/* If failed resource has a parent, we'll force the parent away */
failed = rsc;
if (is_not_set(rsc->flags, pe_rsc_unique)) {
failed = uber_parent(rsc);
}
if (countdown == 0) {
resource_location(failed, node, -INFINITY, "__fail_limit__", data_set);
crm_warn("Forcing %s away from %s after %d failures (max=%d)",
failed->id, node->details->uname, fail_count,
rsc->migration_threshold);
} else {
crm_info("%s can fail %d more times on %s before being forced off",
failed->id, countdown, node->details->uname);
}
}
static void
common_apply_stickiness(resource_t * rsc, node_t * node, pe_working_set_t * data_set)
{
if (rsc->children) {
GListPtr gIter = rsc->children;
for (; gIter != NULL; gIter = gIter->next) {
resource_t *child_rsc = (resource_t *) gIter->data;
common_apply_stickiness(child_rsc, node, data_set);
}
return;
}
if (is_set(rsc->flags, pe_rsc_managed)
&& rsc->stickiness != 0 && g_list_length(rsc->running_on) == 1) {
node_t *current = pe_find_node_id(rsc->running_on, node->details->id);
node_t *match = pe_hash_table_lookup(rsc->allowed_nodes, node->details->id);
if (current == NULL) {
} else if (match != NULL || is_set(data_set->flags, pe_flag_symmetric_cluster)) {
resource_t *sticky_rsc = rsc;
resource_location(sticky_rsc, node, rsc->stickiness, "stickiness", data_set);
pe_rsc_debug(sticky_rsc, "Resource %s: preferring current location"
" (node=%s, weight=%d)", sticky_rsc->id,
node->details->uname, rsc->stickiness);
} else {
GHashTableIter iter;
node_t *nIter = NULL;
pe_rsc_debug(rsc, "Ignoring stickiness for %s: the cluster is asymmetric"
" and node %s is not explicitly allowed", rsc->id, node->details->uname);
g_hash_table_iter_init(&iter, rsc->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **)&nIter)) {
crm_err("%s[%s] = %d", rsc->id, nIter->details->uname, nIter->weight);
}
}
}
/* Check the migration threshold only if a failcount clear action
* has not already been placed for this resource on the node.
* There is no sense in potentially forcing the resource from this
* node if the failcount is being reset anyway. */
if (failcount_clear_action_exists(node, rsc) == FALSE) {
check_migration_threshold(rsc, node, data_set);
}
}
void
complex_set_cmds(resource_t * rsc)
{
GListPtr gIter = rsc->children;
rsc->cmds = &resource_class_alloc_functions[rsc->variant];
for (; gIter != NULL; gIter = gIter->next) {
resource_t *child_rsc = (resource_t *) gIter->data;
complex_set_cmds(child_rsc);
}
}
void
set_alloc_actions(pe_working_set_t * data_set)
{
GListPtr gIter = data_set->resources;
for (; gIter != NULL; gIter = gIter->next) {
resource_t *rsc = (resource_t *) gIter->data;
complex_set_cmds(rsc);
}
}
static void
calculate_system_health(gpointer gKey, gpointer gValue, gpointer user_data)
{
const char *key = (const char *)gKey;
const char *value = (const char *)gValue;
int *system_health = (int *)user_data;
if (!gKey || !gValue || !user_data) {
return;
}
- /* Does it start with #health? */
- if (0 == strncmp(key, "#health", 7)) {
+ if (crm_starts_with(key, "#health")) {
int score;
/* Convert the value into an integer */
score = char2score(value);
/* Add it to the running total */
*system_health = merge_weights(score, *system_health);
}
}
static gboolean
apply_system_health(pe_working_set_t * data_set)
{
GListPtr gIter = NULL;
const char *health_strategy = pe_pref(data_set->config_hash, "node-health-strategy");
int base_health = 0;
if (health_strategy == NULL || safe_str_eq(health_strategy, "none")) {
/* Prevent any accidental health -> score translation */
node_score_red = 0;
node_score_yellow = 0;
node_score_green = 0;
return TRUE;
} else if (safe_str_eq(health_strategy, "migrate-on-red")) {
/* Resources on nodes which have health values of red are
* weighted away from that node.
*/
node_score_red = -INFINITY;
node_score_yellow = 0;
node_score_green = 0;
} else if (safe_str_eq(health_strategy, "only-green")) {
/* Resources on nodes which have health values of red or yellow
* are forced away from that node.
*/
node_score_red = -INFINITY;
node_score_yellow = -INFINITY;
node_score_green = 0;
} else if (safe_str_eq(health_strategy, "progressive")) {
/* Same as the above, but use the r/y/g scores provided by the user
* Defaults are provided by the pe_prefs table
* Also, custom health "base score" can be used
*/
base_health = crm_parse_int(pe_pref(data_set->config_hash, "node-health-base"), "0");
} else if (safe_str_eq(health_strategy, "custom")) {
/* Requires the admin to configure the rsc_location constaints for
* processing the stored health scores
*/
/* TODO: Check for the existence of appropriate node health constraints */
return TRUE;
} else {
crm_err("Unknown node health strategy: %s", health_strategy);
return FALSE;
}
crm_info("Applying automated node health strategy: %s", health_strategy);
for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
int system_health = base_health;
node_t *node = (node_t *) gIter->data;
/* Search through the node hash table for system health entries. */
g_hash_table_foreach(node->details->attrs, calculate_system_health, &system_health);
crm_info(" Node %s has an combined system health of %d",
node->details->uname, system_health);
/* If the health is non-zero, then create a new rsc2node so that the
* weight will be added later on.
*/
if (system_health != 0) {
GListPtr gIter2 = data_set->resources;
for (; gIter2 != NULL; gIter2 = gIter2->next) {
resource_t *rsc = (resource_t *) gIter2->data;
rsc2node_new(health_strategy, rsc, system_health, NULL, node, data_set);
}
}
}
return TRUE;
}
gboolean
stage0(pe_working_set_t * data_set)
{
xmlNode *cib_constraints = get_object_root(XML_CIB_TAG_CONSTRAINTS, data_set->input);
if (data_set->input == NULL) {
return FALSE;
}
if (is_set(data_set->flags, pe_flag_have_status) == FALSE) {
crm_trace("Calculating status");
cluster_status(data_set);
}
set_alloc_actions(data_set);
apply_system_health(data_set);
unpack_constraints(cib_constraints, data_set);
return TRUE;
}
/*
* Check nodes for resources started outside of the LRM
*/
gboolean
probe_resources(pe_working_set_t * data_set)
{
action_t *probe_node_complete = NULL;
for (GListPtr gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
node_t *node = (node_t *) gIter->data;
const char *probed = pe_node_attribute_raw(node, CRM_OP_PROBED);
if (is_container_remote_node(node)) {
/* Guest node probes and their ordering requirements are now functional */
} else if (node->details->online == FALSE && node->details->remote_rsc) {
enum remote_connection_state state = get_remote_node_state(node);
if(state == remote_state_failed) {
pe_fence_node(data_set, node, "the connection is unrecoverable");
}
continue;
} else if(node->details->online == FALSE) {
continue;
} else if (node->details->unclean) {
continue;
} else if (node->details->rsc_discovery_enabled == FALSE) {
/* resource discovery is disabled for this node */
continue;
}
if (probed != NULL && crm_is_true(probed) == FALSE) {
action_t *probe_op = custom_action(NULL, crm_strdup_printf("%s-%s", CRM_OP_REPROBE, node->details->uname),
CRM_OP_REPROBE, node, FALSE, TRUE, data_set);
add_hash_param(probe_op->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE);
continue;
}
for (GListPtr gIter2 = data_set->resources; gIter2 != NULL; gIter2 = gIter2->next) {
resource_t *rsc = (resource_t *) gIter2->data;
rsc->cmds->create_probe(rsc, node, probe_node_complete, FALSE, data_set);
}
}
return TRUE;
}
static void
rsc_discover_filter(resource_t *rsc, node_t *node)
{
GListPtr gIter = rsc->children;
resource_t *top = uber_parent(rsc);
node_t *match;
if (rsc->exclusive_discover == FALSE && top->exclusive_discover == FALSE) {
return;
}
for (; gIter != NULL; gIter = gIter->next) {
resource_t *child_rsc = (resource_t *) gIter->data;
rsc_discover_filter(child_rsc, node);
}
match = g_hash_table_lookup(rsc->allowed_nodes, node->details->id);
if (match && match->rsc_discover_mode != pe_discover_exclusive) {
match->weight = -INFINITY;
}
}
/*
* Count how many valid nodes we have (so we know the maximum number of
* colors we can resolve).
*
* Apply node constraints (i.e. filter the "allowed_nodes" part of resources)
*/
gboolean
stage2(pe_working_set_t * data_set)
{
GListPtr gIter = NULL;
crm_trace("Applying placement constraints");
gIter = data_set->nodes;
for (; gIter != NULL; gIter = gIter->next) {
node_t *node = (node_t *) gIter->data;
if (node == NULL) {
/* error */
} else if (node->weight >= 0.0 /* global weight */
&& node->details->online && node->details->type != node_ping) {
data_set->max_valid_nodes++;
}
}
apply_placement_constraints(data_set);
gIter = data_set->nodes;
for (; gIter != NULL; gIter = gIter->next) {
GListPtr gIter2 = NULL;
node_t *node = (node_t *) gIter->data;
gIter2 = data_set->resources;
for (; gIter2 != NULL; gIter2 = gIter2->next) {
resource_t *rsc = (resource_t *) gIter2->data;
common_apply_stickiness(rsc, node, data_set);
rsc_discover_filter(rsc, node);
}
}
return TRUE;
}
/*
* Create internal resource constraints before allocation
*/
gboolean
stage3(pe_working_set_t * data_set)
{
GListPtr gIter = data_set->resources;
for (; gIter != NULL; gIter = gIter->next) {
resource_t *rsc = (resource_t *) gIter->data;
rsc->cmds->internal_constraints(rsc, data_set);
}
return TRUE;
}
/*
* Check for orphaned or redefined actions
*/
gboolean
stage4(pe_working_set_t * data_set)
{
check_actions(data_set);
return TRUE;
}
static gint
sort_rsc_process_order(gconstpointer a, gconstpointer b, gpointer data)
{
int rc = 0;
int r1_weight = -INFINITY;
int r2_weight = -INFINITY;
const char *reason = "existence";
const GListPtr nodes = (GListPtr) data;
resource_t *resource1 = (resource_t *) convert_const_pointer(a);
resource_t *resource2 = (resource_t *) convert_const_pointer(b);
node_t *r1_node = NULL;
node_t *r2_node = NULL;
GListPtr gIter = NULL;
GHashTable *r1_nodes = NULL;
GHashTable *r2_nodes = NULL;
if (a == NULL && b == NULL) {
goto done;
}
if (a == NULL) {
return 1;
}
if (b == NULL) {
return -1;
}
reason = "priority";
r1_weight = resource1->priority;
r2_weight = resource2->priority;
if (r1_weight > r2_weight) {
rc = -1;
goto done;
}
if (r1_weight < r2_weight) {
rc = 1;
goto done;
}
reason = "no node list";
if (nodes == NULL) {
goto done;
}
r1_nodes =
rsc_merge_weights(resource1, resource1->id, NULL, NULL, 1,
pe_weights_forward | pe_weights_init);
dump_node_scores(LOG_TRACE, NULL, resource1->id, r1_nodes);
r2_nodes =
rsc_merge_weights(resource2, resource2->id, NULL, NULL, 1,
pe_weights_forward | pe_weights_init);
dump_node_scores(LOG_TRACE, NULL, resource2->id, r2_nodes);
/* Current location score */
reason = "current location";
r1_weight = -INFINITY;
r2_weight = -INFINITY;
if (resource1->running_on) {
r1_node = g_list_nth_data(resource1->running_on, 0);
r1_node = g_hash_table_lookup(r1_nodes, r1_node->details->id);
if (r1_node != NULL) {
r1_weight = r1_node->weight;
}
}
if (resource2->running_on) {
r2_node = g_list_nth_data(resource2->running_on, 0);
r2_node = g_hash_table_lookup(r2_nodes, r2_node->details->id);
if (r2_node != NULL) {
r2_weight = r2_node->weight;
}
}
if (r1_weight > r2_weight) {
rc = -1;
goto done;
}
if (r1_weight < r2_weight) {
rc = 1;
goto done;
}
reason = "score";
for (gIter = nodes; gIter != NULL; gIter = gIter->next) {
node_t *node = (node_t *) gIter->data;
r1_node = NULL;
r2_node = NULL;
r1_weight = -INFINITY;
if (r1_nodes) {
r1_node = g_hash_table_lookup(r1_nodes, node->details->id);
}
if (r1_node) {
r1_weight = r1_node->weight;
}
r2_weight = -INFINITY;
if (r2_nodes) {
r2_node = g_hash_table_lookup(r2_nodes, node->details->id);
}
if (r2_node) {
r2_weight = r2_node->weight;
}
if (r1_weight > r2_weight) {
rc = -1;
goto done;
}
if (r1_weight < r2_weight) {
rc = 1;
goto done;
}
}
done:
crm_trace("%s (%d) on %s %c %s (%d) on %s: %s",
resource1->id, r1_weight, r1_node ? r1_node->details->id : "n/a",
rc < 0 ? '>' : rc > 0 ? '<' : '=',
resource2->id, r2_weight, r2_node ? r2_node->details->id : "n/a", reason);
if (r1_nodes) {
g_hash_table_destroy(r1_nodes);
}
if (r2_nodes) {
g_hash_table_destroy(r2_nodes);
}
return rc;
}
static void
allocate_resources(pe_working_set_t * data_set)
{
GListPtr gIter = NULL;
if (is_set(data_set->flags, pe_flag_have_remote_nodes)) {
/* Force remote connection resources to be allocated first. This
* also forces any colocation dependencies to be allocated as well */
for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
resource_t *rsc = (resource_t *) gIter->data;
if (rsc->is_remote_node == FALSE) {
continue;
}
pe_rsc_trace(rsc, "Allocating: %s", rsc->id);
/* For remote node connection resources, always prefer the partial
* migration target during resource allocation, if the rsc is in the
* middle of a migration.
*/
rsc->cmds->allocate(rsc, rsc->partial_migration_target, data_set);
}
}
/* now do the rest of the resources */
for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
resource_t *rsc = (resource_t *) gIter->data;
if (rsc->is_remote_node == TRUE) {
continue;
}
pe_rsc_trace(rsc, "Allocating: %s", rsc->id);
rsc->cmds->allocate(rsc, NULL, data_set);
}
}
/* We always use pe_order_preserve with these convenience functions to exempt
* internally generated constraints from the prohibition of user constraints
* involving remote connection resources.
*
* The start ordering additionally uses pe_order_runnable_left so that the
* specified action is not runnable if the start is not runnable.
*/
static inline void
order_start_then_action(resource_t *lh_rsc, action_t *rh_action,
enum pe_ordering extra, pe_working_set_t *data_set)
{
if (lh_rsc && rh_action && data_set) {
custom_action_order(lh_rsc, start_key(lh_rsc), NULL,
rh_action->rsc, NULL, rh_action,
pe_order_preserve | pe_order_runnable_left | extra,
data_set);
}
}
static inline void
order_action_then_stop(action_t *lh_action, resource_t *rh_rsc,
enum pe_ordering extra, pe_working_set_t *data_set)
{
if (lh_action && rh_rsc && data_set) {
custom_action_order(lh_action->rsc, NULL, lh_action,
rh_rsc, stop_key(rh_rsc), NULL,
pe_order_preserve | extra, data_set);
}
}
static void
cleanup_orphans(resource_t * rsc, pe_working_set_t * data_set)
{
GListPtr gIter = NULL;
if (is_set(data_set->flags, pe_flag_stop_rsc_orphans) == FALSE) {
return;
}
/* Don't recurse into ->children, those are just unallocated clone instances */
if(is_not_set(rsc->flags, pe_rsc_orphan)) {
return;
}
for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
node_t *node = (node_t *) gIter->data;
if (node->details->online && get_failcount(node, rsc, NULL, data_set)) {
char *key = generate_op_key(rsc->id, CRM_OP_CLEAR_FAILCOUNT, 0);
action_t *clear_op = custom_action(rsc, key, CRM_OP_CLEAR_FAILCOUNT,
node, FALSE, TRUE, data_set);
add_hash_param(clear_op->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE);
pe_rsc_info(rsc,
"Clearing failure of %s on %s because it is orphaned "
CRM_XS " %s",
rsc->id, node->details->uname, clear_op->uuid);
/* We can't use order_action_then_stop() here because its
* pe_order_preserve breaks things
*/
custom_action_order(clear_op->rsc, NULL, clear_op,
rsc, stop_key(rsc), NULL,
pe_order_optional, data_set);
}
}
}
gboolean
stage5(pe_working_set_t * data_set)
{
GListPtr gIter = NULL;
if (safe_str_neq(data_set->placement_strategy, "default")) {
GListPtr nodes = g_list_copy(data_set->nodes);
nodes = g_list_sort_with_data(nodes, sort_node_weight, NULL);
data_set->resources =
g_list_sort_with_data(data_set->resources, sort_rsc_process_order, nodes);
g_list_free(nodes);
}
gIter = data_set->nodes;
for (; gIter != NULL; gIter = gIter->next) {
node_t *node = (node_t *) gIter->data;
dump_node_capacity(show_utilization ? 0 : utilization_log_level, "Original", node);
}
crm_trace("Allocating services");
/* Take (next) highest resource, assign it and create its actions */
allocate_resources(data_set);
gIter = data_set->nodes;
for (; gIter != NULL; gIter = gIter->next) {
node_t *node = (node_t *) gIter->data;
dump_node_capacity(show_utilization ? 0 : utilization_log_level, "Remaining", node);
}
if (is_set(data_set->flags, pe_flag_startup_probes)) {
crm_trace("Calculating needed probes");
/* This code probably needs optimization
* ptest -x with 100 nodes, 100 clones and clone-max=100:
With probes:
ptest[14781]: 2010/09/27_17:56:46 notice: TRACE: do_calculations: pengine.c:258 Calculate cluster status
ptest[14781]: 2010/09/27_17:56:46 notice: TRACE: do_calculations: pengine.c:278 Applying placement constraints
ptest[14781]: 2010/09/27_17:56:47 notice: TRACE: do_calculations: pengine.c:285 Create internal constraints
ptest[14781]: 2010/09/27_17:56:47 notice: TRACE: do_calculations: pengine.c:292 Check actions
ptest[14781]: 2010/09/27_17:56:48 notice: TRACE: do_calculations: pengine.c:299 Allocate resources
ptest[14781]: 2010/09/27_17:56:48 notice: TRACE: stage5: allocate.c:881 Allocating services
ptest[14781]: 2010/09/27_17:56:49 notice: TRACE: stage5: allocate.c:894 Calculating needed probes
ptest[14781]: 2010/09/27_17:56:51 notice: TRACE: stage5: allocate.c:899 Creating actions
ptest[14781]: 2010/09/27_17:56:52 notice: TRACE: stage5: allocate.c:905 Creating done
ptest[14781]: 2010/09/27_17:56:52 notice: TRACE: do_calculations: pengine.c:306 Processing fencing and shutdown cases
ptest[14781]: 2010/09/27_17:56:52 notice: TRACE: do_calculations: pengine.c:313 Applying ordering constraints
36s
ptest[14781]: 2010/09/27_17:57:28 notice: TRACE: do_calculations: pengine.c:320 Create transition graph
Without probes:
ptest[14637]: 2010/09/27_17:56:21 notice: TRACE: do_calculations: pengine.c:258 Calculate cluster status
ptest[14637]: 2010/09/27_17:56:22 notice: TRACE: do_calculations: pengine.c:278 Applying placement constraints
ptest[14637]: 2010/09/27_17:56:22 notice: TRACE: do_calculations: pengine.c:285 Create internal constraints
ptest[14637]: 2010/09/27_17:56:22 notice: TRACE: do_calculations: pengine.c:292 Check actions
ptest[14637]: 2010/09/27_17:56:23 notice: TRACE: do_calculations: pengine.c:299 Allocate resources
ptest[14637]: 2010/09/27_17:56:23 notice: TRACE: stage5: allocate.c:881 Allocating services
ptest[14637]: 2010/09/27_17:56:24 notice: TRACE: stage5: allocate.c:899 Creating actions
ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: stage5: allocate.c:905 Creating done
ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: do_calculations: pengine.c:306 Processing fencing and shutdown cases
ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: do_calculations: pengine.c:313 Applying ordering constraints
ptest[14637]: 2010/09/27_17:56:25 notice: TRACE: do_calculations: pengine.c:320 Create transition graph
*/
probe_resources(data_set);
}
crm_trace("Handle orphans");
for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
resource_t *rsc = (resource_t *) gIter->data;
cleanup_orphans(rsc, data_set);
}
crm_trace("Creating actions");
for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
resource_t *rsc = (resource_t *) gIter->data;
rsc->cmds->create_actions(rsc, data_set);
}
crm_trace("Creating done");
return TRUE;
}
static gboolean
is_managed(const resource_t * rsc)
{
GListPtr gIter = rsc->children;
if (is_set(rsc->flags, pe_rsc_managed)) {
return TRUE;
}
for (; gIter != NULL; gIter = gIter->next) {
resource_t *child_rsc = (resource_t *) gIter->data;
if (is_managed(child_rsc)) {
return TRUE;
}
}
return FALSE;
}
static gboolean
any_managed_resources(pe_working_set_t * data_set)
{
GListPtr gIter = data_set->resources;
for (; gIter != NULL; gIter = gIter->next) {
resource_t *rsc = (resource_t *) gIter->data;
if (is_managed(rsc)) {
return TRUE;
}
}
return FALSE;
}
/*!
* \internal
* \brief Create pseudo-op for guest node fence, and order relative to it
*
* \param[in] node Guest node to fence
* \param[in] done STONITH_DONE operation
* \param[in] data_set Working set of CIB state
*/
static void
fence_guest(pe_node_t *node, pe_action_t *done, pe_working_set_t *data_set)
{
resource_t *container = node->details->remote_rsc->container;
pe_action_t *stop = NULL;
pe_action_t *stonith_op = NULL;
/* The fence action is just a label; we don't do anything differently for
* off vs. reboot. We specify it explicitly, rather than let it default to
* cluster's default action, because we are not _initiating_ fencing -- we
* are creating a pseudo-event to describe fencing that is already occurring
* by other means (container recovery).
*/
const char *fence_action = "off";
/* Check whether guest's container resource is has any explicit stop or
* start (the stop may be implied by fencing of the guest's host).
*/
if (container) {
stop = find_first_action(container->actions, NULL, CRMD_ACTION_STOP, NULL);
if (find_first_action(container->actions, NULL, CRMD_ACTION_START, NULL)) {
fence_action = "reboot";
}
}
/* Create a fence pseudo-event, so we have an event to order actions
* against, and crmd can always detect it.
*/
stonith_op = pe_fence_op(node, fence_action, FALSE, "guest is unclean", data_set);
update_action_flags(stonith_op, pe_action_pseudo | pe_action_runnable,
__FUNCTION__, __LINE__);
/* We want to imply stops/demotes after the guest is stopped, not wait until
* it is restarted, so we always order pseudo-fencing after stop, not start
* (even though start might be closer to what is done for a real reboot).
*/
if(stop && is_set(stop->flags, pe_action_pseudo)) {
pe_action_t *parent_stonith_op = pe_fence_op(stop->node, NULL, FALSE, NULL, data_set);
crm_info("Implying guest node %s is down (action %d) after %s fencing",
node->details->uname, stonith_op->id, stop->node->details->uname);
order_actions(parent_stonith_op, stonith_op,
pe_order_runnable_left|pe_order_implies_then);
} else if (stop) {
order_actions(stop, stonith_op,
pe_order_runnable_left|pe_order_implies_then);
crm_info("Implying guest node %s is down (action %d) "
"after container %s is stopped (action %d)",
node->details->uname, stonith_op->id,
container->id, stop->id);
} else {
crm_info("Implying guest node %s is down (action %d) ",
node->details->uname, stonith_op->id);
}
/* @TODO: Order pseudo-fence after any (optional) fence of guest's host */
/* Order/imply other actions relative to pseudo-fence as with real fence */
stonith_constraints(node, stonith_op, data_set);
order_actions(stonith_op, done, pe_order_implies_then);
}
/*
* Create dependencies for stonith and shutdown operations
*/
gboolean
stage6(pe_working_set_t * data_set)
{
action_t *dc_down = NULL;
action_t *dc_fence = NULL;
action_t *stonith_op = NULL;
action_t *last_stonith = NULL;
gboolean integrity_lost = FALSE;
action_t *all_stopped = get_pseudo_op(ALL_STOPPED, data_set);
action_t *done = get_pseudo_op(STONITH_DONE, data_set);
gboolean need_stonith = TRUE;
GListPtr gIter;
GListPtr stonith_ops = NULL;
/* Remote ordering constraints need to happen prior to calculate
* fencing because it is one more place we will mark the node as
* dirty.
*
* A nice side-effect of doing it first is that we can remove a
* bunch of special logic from apply_*_ordering() because its
* already part of pe_fence_node()
*/
crm_trace("Creating remote ordering constraints");
apply_remote_node_ordering(data_set);
crm_trace("Processing fencing and shutdown cases");
if (any_managed_resources(data_set) == FALSE) {
crm_notice("Delaying fencing operations until there are resources to manage");
need_stonith = FALSE;
}
/* Check each node for stonith/shutdown */
for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
node_t *node = (node_t *) gIter->data;
/* Guest nodes are "fenced" by recovering their container resource,
* so handle them separately.
*/
if (is_container_remote_node(node)) {
if (node->details->remote_requires_reset && need_stonith) {
fence_guest(node, done, data_set);
}
continue;
}
stonith_op = NULL;
if (node->details->unclean
&& need_stonith && pe_can_fence(data_set, node)) {
stonith_op = pe_fence_op(node, NULL, FALSE, "node is unclean", data_set);
pe_warn("Scheduling Node %s for STONITH", node->details->uname);
stonith_constraints(node, stonith_op, data_set);
if (node->details->is_dc) {
dc_down = stonith_op;
dc_fence = stonith_op;
} else if (is_set(data_set->flags, pe_flag_concurrent_fencing) == FALSE) {
if (last_stonith) {
order_actions(last_stonith, stonith_op, pe_order_optional);
}
last_stonith = stonith_op;
} else {
order_actions(stonith_op, done, pe_order_implies_then);
stonith_ops = g_list_append(stonith_ops, stonith_op);
}
} else if (node->details->online && node->details->shutdown &&
/* TODO define what a shutdown op means for a remote node.
* For now we do not send shutdown operations for remote nodes, but
* if we can come up with a good use for this in the future, we will. */
is_remote_node(node) == FALSE) {
action_t *down_op = NULL;
crm_notice("Scheduling Node %s for shutdown", node->details->uname);
down_op = custom_action(NULL, crm_strdup_printf("%s-%s", CRM_OP_SHUTDOWN, node->details->uname),
CRM_OP_SHUTDOWN, node, FALSE, TRUE, data_set);
shutdown_constraints(node, down_op, data_set);
add_hash_param(down_op->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE);
if (node->details->is_dc) {
dc_down = down_op;
}
}
if (node->details->unclean && stonith_op == NULL) {
integrity_lost = TRUE;
pe_warn("Node %s is unclean!", node->details->uname);
}
}
if (integrity_lost) {
if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) {
pe_warn("YOUR RESOURCES ARE NOW LIKELY COMPROMISED");
pe_err("ENABLE STONITH TO KEEP YOUR RESOURCES SAFE");
} else if (is_set(data_set->flags, pe_flag_have_quorum) == FALSE) {
crm_notice("Cannot fence unclean nodes until quorum is"
" attained (or no-quorum-policy is set to ignore)");
}
}
if (dc_down != NULL) {
GListPtr gIter = NULL;
crm_trace("Ordering shutdowns before %s on %s (DC)",
dc_down->task, dc_down->node->details->uname);
add_hash_param(dc_down->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE);
for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) {
action_t *node_stop = (action_t *) gIter->data;
if (safe_str_neq(CRM_OP_SHUTDOWN, node_stop->task)) {
continue;
} else if (node_stop->node->details->is_dc) {
continue;
}
crm_debug("Ordering shutdown on %s before %s on %s",
node_stop->node->details->uname,
dc_down->task, dc_down->node->details->uname);
order_actions(node_stop, dc_down, pe_order_optional);
}
if (last_stonith) {
if (dc_down != last_stonith) {
order_actions(last_stonith, dc_down, pe_order_optional);
}
} else {
GListPtr gIter2 = NULL;
for (gIter2 = stonith_ops; gIter2 != NULL; gIter2 = gIter2->next) {
stonith_op = (action_t *) gIter2->data;
if (dc_down != stonith_op) {
order_actions(stonith_op, dc_down, pe_order_optional);
}
}
}
}
if (dc_fence) {
order_actions(dc_down, done, pe_order_implies_then);
} else if (last_stonith) {
order_actions(last_stonith, done, pe_order_implies_then);
}
order_actions(done, all_stopped, pe_order_implies_then);
g_list_free(stonith_ops);
return TRUE;
}
/*
* Determine the sets of independent actions and the correct order for the
* actions in each set.
*
* Mark dependencies of un-runnable actions un-runnable
*
*/
static GListPtr
find_actions_by_task(GListPtr actions, resource_t * rsc, const char *original_key)
{
GListPtr list = NULL;
list = find_actions(actions, original_key, NULL);
if (list == NULL) {
/* we're potentially searching a child of the original resource */
char *key = NULL;
char *tmp = NULL;
char *task = NULL;
int interval = 0;
if (parse_op_key(original_key, &tmp, &task, &interval)) {
key = generate_op_key(rsc->id, task, interval);
/* crm_err("looking up %s instead of %s", key, original_key); */
/* slist_iter(action, action_t, actions, lpc, */
/* crm_err(" - %s", action->uuid)); */
list = find_actions(actions, key, NULL);
} else {
crm_err("search key: %s", original_key);
}
free(key);
free(tmp);
free(task);
}
return list;
}
static void
rsc_order_then(action_t * lh_action, resource_t * rsc, order_constraint_t * order)
{
GListPtr gIter = NULL;
GListPtr rh_actions = NULL;
action_t *rh_action = NULL;
enum pe_ordering type = order->type;
CRM_CHECK(rsc != NULL, return);
CRM_CHECK(order != NULL, return);
rh_action = order->rh_action;
crm_trace("Processing RH of ordering constraint %d", order->id);
if (rh_action != NULL) {
rh_actions = g_list_prepend(NULL, rh_action);
} else if (rsc != NULL) {
rh_actions = find_actions_by_task(rsc->actions, rsc, order->rh_action_task);
}
if (rh_actions == NULL) {
pe_rsc_trace(rsc, "No RH-Side (%s/%s) found for constraint..."
" ignoring", rsc->id, order->rh_action_task);
if (lh_action) {
pe_rsc_trace(rsc, "LH-Side was: %s", lh_action->uuid);
}
return;
}
if (lh_action && lh_action->rsc == rsc && is_set(lh_action->flags, pe_action_dangle)) {
pe_rsc_trace(rsc, "Detected dangling operation %s -> %s", lh_action->uuid,
order->rh_action_task);
clear_bit(type, pe_order_implies_then);
}
gIter = rh_actions;
for (; gIter != NULL; gIter = gIter->next) {
action_t *rh_action_iter = (action_t *) gIter->data;
if (lh_action) {
order_actions(lh_action, rh_action_iter, type);
} else if (type & pe_order_implies_then) {
update_action_flags(rh_action_iter, pe_action_runnable | pe_action_clear, __FUNCTION__, __LINE__);
crm_warn("Unrunnable %s 0x%.6x", rh_action_iter->uuid, type);
} else {
crm_warn("neither %s 0x%.6x", rh_action_iter->uuid, type);
}
}
g_list_free(rh_actions);
}
static void
rsc_order_first(resource_t * lh_rsc, order_constraint_t * order, pe_working_set_t * data_set)
{
GListPtr gIter = NULL;
GListPtr lh_actions = NULL;
action_t *lh_action = order->lh_action;
resource_t *rh_rsc = order->rh_rsc;
crm_trace("Processing LH of ordering constraint %d", order->id);
CRM_ASSERT(lh_rsc != NULL);
if (lh_action != NULL) {
lh_actions = g_list_prepend(NULL, lh_action);
} else if (lh_action == NULL) {
lh_actions = find_actions_by_task(lh_rsc->actions, lh_rsc, order->lh_action_task);
}
if (lh_actions == NULL && lh_rsc != rh_rsc) {
char *key = NULL;
char *rsc_id = NULL;
char *op_type = NULL;
int interval = 0;
parse_op_key(order->lh_action_task, &rsc_id, &op_type, &interval);
key = generate_op_key(lh_rsc->id, op_type, interval);
if (lh_rsc->fns->state(lh_rsc, TRUE) == RSC_ROLE_STOPPED && safe_str_eq(op_type, RSC_STOP)) {
free(key);
pe_rsc_trace(lh_rsc, "No LH-Side (%s/%s) found for constraint %d with %s - ignoring",
lh_rsc->id, order->lh_action_task, order->id, order->rh_action_task);
} else if (lh_rsc->fns->state(lh_rsc, TRUE) == RSC_ROLE_SLAVE && safe_str_eq(op_type, RSC_DEMOTE)) {
free(key);
pe_rsc_trace(lh_rsc, "No LH-Side (%s/%s) found for constraint %d with %s - ignoring",
lh_rsc->id, order->lh_action_task, order->id, order->rh_action_task);
} else {
pe_rsc_trace(lh_rsc, "No LH-Side (%s/%s) found for constraint %d with %s - creating",
lh_rsc->id, order->lh_action_task, order->id, order->rh_action_task);
lh_action = custom_action(lh_rsc, key, op_type, NULL, TRUE, TRUE, data_set);
lh_actions = g_list_prepend(NULL, lh_action);
}
free(op_type);
free(rsc_id);
}
gIter = lh_actions;
for (; gIter != NULL; gIter = gIter->next) {
action_t *lh_action_iter = (action_t *) gIter->data;
if (rh_rsc == NULL && order->rh_action) {
rh_rsc = order->rh_action->rsc;
}
if (rh_rsc) {
rsc_order_then(lh_action_iter, rh_rsc, order);
} else if (order->rh_action) {
order_actions(lh_action_iter, order->rh_action, order->type);
}
}
g_list_free(lh_actions);
}
extern gboolean update_action(action_t * action);
extern void update_colo_start_chain(action_t * action);
static int
is_recurring_action(action_t *action)
{
const char *interval_s = g_hash_table_lookup(action->meta, XML_LRM_ATTR_INTERVAL);
int interval = crm_parse_int(interval_s, "0");
if(interval > 0) {
return TRUE;
}
return FALSE;
}
static void
apply_container_ordering(action_t *action, pe_working_set_t *data_set)
{
/* VMs are also classified as containers for these purposes... in
* that they both involve a 'thing' running on a real or remote
* cluster node.
*
* This allows us to be smarter about the type and extent of
* recovery actions required in various scenarios
*/
resource_t *remote_rsc = NULL;
resource_t *container = NULL;
enum action_tasks task = text2task(action->task);
CRM_ASSERT(action->rsc);
CRM_ASSERT(action->node);
CRM_ASSERT(is_remote_node(action->node));
remote_rsc = action->node->details->remote_rsc;
CRM_ASSERT(remote_rsc);
container = remote_rsc->container;
CRM_ASSERT(container);
if(is_set(container->flags, pe_rsc_failed)) {
pe_fence_node(data_set, action->node, "container failed");
}
crm_trace("Order %s action %s relative to %s%s for %s%s",
action->task, action->uuid,
is_set(remote_rsc->flags, pe_rsc_failed)? "failed " : "",
remote_rsc->id,
is_set(container->flags, pe_rsc_failed)? "failed " : "",
container->id);
if (safe_str_eq(action->task, CRMD_ACTION_MIGRATE)
|| safe_str_eq(action->task, CRMD_ACTION_MIGRATE)) {
/* Migration ops map to "no_action", but we need to apply the same
* ordering as for stop or demote (see get_router_node()).
*/
task = stop_rsc;
}
switch (task) {
case start_rsc:
case action_promote:
/* Force resource recovery if the container is recovered */
order_start_then_action(container, action, pe_order_implies_then,
data_set);
/* Wait for the connection resource to be up too */
order_start_then_action(remote_rsc, action, pe_order_none,
data_set);
break;
case stop_rsc:
case action_demote:
if (is_set(container->flags, pe_rsc_failed)) {
/* When the container representing a guest node fails, any stop
* or demote actions for resources running on the guest node
* are implied by the container stopping. This is similar to
* how fencing operations work for cluster nodes and remote
* nodes.
*/
} else {
/* Ensure the operation happens before the connection is brought
* down.
*
* If we really wanted to, we could order these after the
* connection start, IFF the container's current role was
* stopped (otherwise we re-introduce an ordering loop when the
* connection is restarting).
*/
order_action_then_stop(action, remote_rsc, pe_order_none,
data_set);
}
break;
default:
/* Wait for the connection resource to be up */
if (is_recurring_action(action)) {
/* In case we ever get the recovery logic wrong, force
* recurring monitors to be restarted, even if just
* the connection was re-established
*/
if(task != no_action) {
order_start_then_action(remote_rsc, action,
pe_order_implies_then, data_set);
}
} else {
order_start_then_action(remote_rsc, action, pe_order_none,
data_set);
}
break;
}
}
static enum remote_connection_state
get_remote_node_state(pe_node_t *node)
{
resource_t *remote_rsc = NULL;
node_t *cluster_node = NULL;
CRM_ASSERT(node);
remote_rsc = node->details->remote_rsc;
CRM_ASSERT(remote_rsc);
if(remote_rsc->running_on) {
cluster_node = remote_rsc->running_on->data;
}
/* If the cluster node the remote connection resource resides on
* is unclean or went offline, we can't process any operations
* on that remote node until after it starts elsewhere.
*/
if(remote_rsc->next_role == RSC_ROLE_STOPPED || remote_rsc->allocated_to == NULL) {
/* The connection resource is not going to run anywhere */
if (cluster_node && cluster_node->details->unclean) {
/* The remote connection is failed because its resource is on a
* failed node and can't be recovered elsewhere, so we must fence.
*/
return remote_state_failed;
}
if (is_not_set(remote_rsc->flags, pe_rsc_failed)) {
/* Connection resource is cleanly stopped */
return remote_state_stopped;
}
/* Connection resource is failed */
if ((remote_rsc->next_role == RSC_ROLE_STOPPED)
&& remote_rsc->remote_reconnect_interval
&& node->details->remote_was_fenced) {
/* We won't know whether the connection is recoverable until the
* reconnect interval expires and we reattempt connection.
*/
return remote_state_unknown;
}
/* The remote connection is in a failed state. If there are any
* resources known to be active on it (stop) or in an unknown state
* (probe), we must assume the worst and fence it.
*/
return remote_state_failed;
} else if (cluster_node == NULL) {
/* Connection is recoverable but not currently running anywhere, see if we can recover it first */
return remote_state_unknown;
} else if(cluster_node->details->unclean == TRUE
|| cluster_node->details->online == FALSE) {
/* Connection is running on a dead node, see if we can recover it first */
return remote_state_resting;
} else if (g_list_length(remote_rsc->running_on) > 1
&& remote_rsc->partial_migration_source
&& remote_rsc->partial_migration_target) {
/* We're in the middle of migrating a connection resource,
* wait until after the resource migrates before performing
* any actions.
*/
return remote_state_resting;
}
return remote_state_alive;
}
static void
apply_remote_ordering(action_t *action, pe_working_set_t *data_set)
{
resource_t *remote_rsc = NULL;
node_t *cluster_node = NULL;
enum action_tasks task = text2task(action->task);
enum remote_connection_state state = get_remote_node_state(action->node);
enum pe_ordering order_opts = pe_order_none;
if (action->rsc == NULL) {
return;
}
CRM_ASSERT(action->node);
CRM_ASSERT(is_remote_node(action->node));
remote_rsc = action->node->details->remote_rsc;
CRM_ASSERT(remote_rsc);
if(remote_rsc->running_on) {
cluster_node = remote_rsc->running_on->data;
}
crm_trace("Order %s action %s relative to %s%s (state %d)",
action->task, action->uuid,
is_set(remote_rsc->flags, pe_rsc_failed)? "failed " : "",
remote_rsc->id, state);
if (safe_str_eq(action->task, CRMD_ACTION_MIGRATE)
|| safe_str_eq(action->task, CRMD_ACTION_MIGRATE)) {
/* Migration ops map to "no_action", but we need to apply the same
* ordering as for stop or demote (see get_router_node()).
*/
task = stop_rsc;
}
switch (task) {
case start_rsc:
case action_promote:
order_opts = pe_order_none;
if (state == remote_state_failed) {
/* Force recovery, by making this action required */
order_opts |= pe_order_implies_then;
}
/* Ensure connection is up before running this action */
order_start_then_action(remote_rsc, action, order_opts, data_set);
break;
case stop_rsc:
/* Handle special case with remote node where stop actions need to be
* ordered after the connection resource starts somewhere else.
*/
if(state == remote_state_resting) {
/* Wait for the connection resource to be up and assume everything is as we left it */
order_start_then_action(remote_rsc, action, pe_order_none,
data_set);
} else {
if(state == remote_state_failed) {
/* We would only be here if the resource is
* running on the remote node. Since we have no
* way to stop it, it is necessary to fence the
* node.
*/
pe_fence_node(data_set, action->node, "resources are active and the connection is unrecoverable");
}
order_action_then_stop(action, remote_rsc,
pe_order_implies_first, data_set);
}
break;
case action_demote:
/* Only order this demote relative to the connection start if the
* connection isn't being torn down. Otherwise, the demote would be
* blocked because the connection start would not be allowed.
*/
if(state == remote_state_resting || state == remote_state_unknown) {
order_start_then_action(remote_rsc, action, pe_order_none,
data_set);
} /* Otherwise we can rely on the stop ordering */
break;
default:
/* Wait for the connection resource to be up */
if (is_recurring_action(action)) {
/* In case we ever get the recovery logic wrong, force
* recurring monitors to be restarted, even if just
* the connection was re-established
*/
order_start_then_action(remote_rsc, action,
pe_order_implies_then, data_set);
} else {
if(task == monitor_rsc && state == remote_state_failed) {
/* We would only be here if we do not know the
* state of the resource on the remote node.
* Since we have no way to find out, it is
* necessary to fence the node.
*/
pe_fence_node(data_set, action->node, "resources are in an unknown state and the connection is unrecoverable");
}
if(cluster_node && state == remote_state_stopped) {
/* The connection is currently up, but is going
* down permanently.
*
* Make sure we check services are actually
* stopped _before_ we let the connection get
* closed
*/
order_action_then_stop(action, remote_rsc,
pe_order_runnable_left, data_set);
} else {
order_start_then_action(remote_rsc, action, pe_order_none,
data_set);
}
}
break;
}
}
static void
apply_remote_node_ordering(pe_working_set_t *data_set)
{
if (is_set(data_set->flags, pe_flag_have_remote_nodes) == FALSE) {
return;
}
for (GListPtr gIter = data_set->actions; gIter != NULL; gIter = gIter->next) {
action_t *action = (action_t *) gIter->data;
resource_t *remote = NULL;
// We are only interested in resource actions
if (action->rsc == NULL) {
continue;
}
/* Special case: If we are clearing the failcount of an actual
* remote connection resource, then make sure this happens before
* any start of the resource in this transition.
*/
if (action->rsc->is_remote_node &&
safe_str_eq(action->task, CRM_OP_CLEAR_FAILCOUNT)) {
custom_action_order(action->rsc,
NULL,
action,
action->rsc,
generate_op_key(action->rsc->id, RSC_START, 0),
NULL,
pe_order_optional,
data_set);
continue;
}
// We are only interested in actions allocated to a node
if (action->node == NULL) {
continue;
}
if (is_remote_node(action->node) == FALSE) {
continue;
}
/* We are only interested in real actions.
*
* @TODO This is probably wrong; pseudo-actions might be converted to
* real actions and vice versa later in update_actions() at the end of
* stage7().
*/
if (is_set(action->flags, pe_action_pseudo)) {
continue;
}
remote = action->node->details->remote_rsc;
if (remote == NULL) {
// Orphaned
continue;
}
/* The action occurs across a remote connection, so create
* ordering constraints that guarantee the action occurs while the node
* is active (after start, before stop ... things like that).
*
* This is somewhat brittle in that we need to make sure the results of
* this ordering are compatible with the result of get_router_node().
* It would probably be better to add XML_LRM_ATTR_ROUTER_NODE as part
* of this logic rather than action2xml().
*/
if (remote->container) {
crm_trace("Container ordering for %s", action->uuid);
apply_container_ordering(action, data_set);
} else {
crm_trace("Remote ordering for %s", action->uuid);
apply_remote_ordering(action, data_set);
}
}
}
static void
order_probes(pe_working_set_t * data_set)
{
#if 0
GListPtr gIter = NULL;
for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
resource_t *rsc = (resource_t *) gIter->data;
/* Given "A then B", we would prefer to wait for A to be
* started before probing B.
*
* If A was a filesystem on which the binaries and data for B
* lived, it would have been useful if the author of B's agent
* could assume that A is running before B.monitor will be
* called.
*
* However we can't _only_ probe once A is running, otherwise
* we'd not detect the state of B if A could not be started
* for some reason.
*
* In practice however, we cannot even do an opportunistic
* version of this because B may be moving:
*
* B.probe -> B.start
* B.probe -> B.stop
* B.stop -> B.start
* A.stop -> A.start
* A.start -> B.probe
*
* So far so good, but if we add the result of this code:
*
* B.stop -> A.stop
*
* Then we get a loop:
*
* B.probe -> B.stop -> A.stop -> A.start -> B.probe
*
* We could kill the 'B.probe -> B.stop' dependency, but that
* could mean stopping B "too" soon, because B.start must wait
* for the probes to complete.
*
* Another option is to allow it only if A is a non-unique
* clone with clone-max == node-max (since we'll never be
* moving it). However, we could still be stopping one
* instance at the same time as starting another.
* The complexity of checking for allowed conditions combined
* with the ever narrowing usecase suggests that this code
* should remain disabled until someone gets smarter.
*/
action_t *start = NULL;
GListPtr actions = NULL;
GListPtr probes = NULL;
char *key = NULL;
key = start_key(rsc);
actions = find_actions(rsc->actions, key, NULL);
free(key);
if (actions) {
start = actions->data;
g_list_free(actions);
}
if(start == NULL) {
crm_err("No start action for %s", rsc->id);
continue;
}
key = generate_op_key(rsc->id, CRMD_ACTION_STATUS, 0);
probes = find_actions(rsc->actions, key, NULL);
free(key);
for (actions = start->actions_before; actions != NULL; actions = actions->next) {
action_wrapper_t *before = (action_wrapper_t *) actions->data;
GListPtr pIter = NULL;
action_t *first = before->action;
resource_t *first_rsc = first->rsc;
if(first->required_runnable_before) {
GListPtr clone_actions = NULL;
for (clone_actions = first->actions_before; clone_actions != NULL; clone_actions = clone_actions->next) {
before = (action_wrapper_t *) clone_actions->data;
crm_trace("Testing %s -> %s (%p) for %s", first->uuid, before->action->uuid, before->action->rsc, start->uuid);
CRM_ASSERT(before->action->rsc);
first_rsc = before->action->rsc;
break;
}
} else if(safe_str_neq(first->task, RSC_START)) {
crm_trace("Not a start op %s for %s", first->uuid, start->uuid);
}
if(first_rsc == NULL) {
continue;
} else if(uber_parent(first_rsc) == uber_parent(start->rsc)) {
crm_trace("Same parent %s for %s", first_rsc->id, start->uuid);
continue;
} else if(FALSE && pe_rsc_is_clone(uber_parent(first_rsc)) == FALSE) {
crm_trace("Not a clone %s for %s", first_rsc->id, start->uuid);
continue;
}
crm_err("Applying %s before %s %d", first->uuid, start->uuid, uber_parent(first_rsc)->variant);
for (pIter = probes; pIter != NULL; pIter = pIter->next) {
action_t *probe = (action_t *) pIter->data;
crm_err("Ordering %s before %s", first->uuid, probe->uuid);
order_actions(first, probe, pe_order_optional);
}
}
}
#endif
}
gboolean
stage7(pe_working_set_t * data_set)
{
GListPtr gIter = NULL;
crm_trace("Applying ordering constraints");
/* Don't ask me why, but apparently they need to be processed in
* the order they were created in... go figure
*
* Also g_list_append() has horrendous performance characteristics
* So we need to use g_list_prepend() and then reverse the list here
*/
data_set->ordering_constraints = g_list_reverse(data_set->ordering_constraints);
for (gIter = data_set->ordering_constraints; gIter != NULL; gIter = gIter->next) {
order_constraint_t *order = (order_constraint_t *) gIter->data;
resource_t *rsc = order->lh_rsc;
crm_trace("Applying ordering constraint: %d", order->id);
if (rsc != NULL) {
crm_trace("rsc_action-to-*");
rsc_order_first(rsc, order, data_set);
continue;
}
rsc = order->rh_rsc;
if (rsc != NULL) {
crm_trace("action-to-rsc_action");
rsc_order_then(order->lh_action, rsc, order);
} else {
crm_trace("action-to-action");
order_actions(order->lh_action, order->rh_action, order->type);
}
}
for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) {
action_t *action = (action_t *) gIter->data;
update_colo_start_chain(action);
}
crm_trace("Ordering probes");
order_probes(data_set);
crm_trace("Updating %d actions", g_list_length(data_set->actions));
for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) {
action_t *action = (action_t *) gIter->data;
update_action(action);
}
LogNodeActions(data_set, FALSE);
for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
resource_t *rsc = (resource_t *) gIter->data;
LogActions(rsc, data_set, FALSE);
}
return TRUE;
}
int transition_id = -1;
/*
* Create a dependency graph to send to the transitioner (via the CRMd)
*/
gboolean
stage8(pe_working_set_t * data_set)
{
GListPtr gIter = NULL;
const char *value = NULL;
transition_id++;
crm_trace("Creating transition graph %d.", transition_id);
data_set->graph = create_xml_node(NULL, XML_TAG_GRAPH);
value = pe_pref(data_set->config_hash, "cluster-delay");
crm_xml_add(data_set->graph, "cluster-delay", value);
value = pe_pref(data_set->config_hash, "stonith-timeout");
crm_xml_add(data_set->graph, "stonith-timeout", value);
crm_xml_add(data_set->graph, "failed-stop-offset", "INFINITY");
if (is_set(data_set->flags, pe_flag_start_failure_fatal)) {
crm_xml_add(data_set->graph, "failed-start-offset", "INFINITY");
} else {
crm_xml_add(data_set->graph, "failed-start-offset", "1");
}
value = pe_pref(data_set->config_hash, "batch-limit");
crm_xml_add(data_set->graph, "batch-limit", value);
crm_xml_add_int(data_set->graph, "transition_id", transition_id);
value = pe_pref(data_set->config_hash, "migration-limit");
if (crm_int_helper(value, NULL) > 0) {
crm_xml_add(data_set->graph, "migration-limit", value);
}
/* errors...
slist_iter(action, action_t, action_list, lpc,
if(action->optional == FALSE && action->runnable == FALSE) {
print_action("Ignoring", action, TRUE);
}
);
*/
gIter = data_set->resources;
for (; gIter != NULL; gIter = gIter->next) {
resource_t *rsc = (resource_t *) gIter->data;
pe_rsc_trace(rsc, "processing actions for rsc=%s", rsc->id);
rsc->cmds->expand(rsc, data_set);
}
crm_log_xml_trace(data_set->graph, "created resource-driven action list");
/* pseudo action to distribute list of nodes with maintenance state update */
add_maintenance_update(data_set);
/* catch any non-resource specific actions */
crm_trace("processing non-resource actions");
gIter = data_set->actions;
for (; gIter != NULL; gIter = gIter->next) {
action_t *action = (action_t *) gIter->data;
if (action->rsc
&& action->node
&& action->node->details->shutdown
&& is_not_set(action->rsc->flags, pe_rsc_maintenance)
&& is_not_set(action->flags, pe_action_optional)
&& is_not_set(action->flags, pe_action_runnable)
&& crm_str_eq(action->task, RSC_STOP, TRUE)
) {
/* Eventually we should just ignore the 'fence' case
* But for now it's the best way to detect (in CTS) when
* CIB resource updates are being lost
*/
if (is_set(data_set->flags, pe_flag_have_quorum)
|| data_set->no_quorum_policy == no_quorum_ignore) {
crm_crit("Cannot %s node '%s' because of %s:%s%s (%s)",
action->node->details->unclean ? "fence" : "shut down",
action->node->details->uname, action->rsc->id,
is_not_set(action->rsc->flags, pe_rsc_managed) ? " unmanaged" : " blocked",
is_set(action->rsc->flags, pe_rsc_failed) ? " failed" : "",
action->uuid);
}
}
graph_element_from_action(action, data_set);
}
crm_log_xml_trace(data_set->graph, "created generic action list");
crm_trace("Created transition graph %d.", transition_id);
return TRUE;
}
void
LogNodeActions(pe_working_set_t * data_set, gboolean terminal)
{
GListPtr gIter = NULL;
for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) {
char *node_name = NULL;
char *task = NULL;
action_t *action = (action_t *) gIter->data;
if (action->rsc != NULL) {
continue;
} else if (is_set(action->flags, pe_action_optional)) {
continue;
}
if (is_container_remote_node(action->node)) {
node_name = crm_strdup_printf("%s (resource: %s)", action->node->details->uname, action->node->details->remote_rsc->container->id);
} else if(action->node) {
node_name = crm_strdup_printf("%s", action->node->details->uname);
}
if (safe_str_eq(action->task, CRM_OP_SHUTDOWN)) {
task = strdup("Shutdown");
} else if (safe_str_eq(action->task, CRM_OP_FENCE)) {
const char *op = g_hash_table_lookup(action->meta, "stonith_action");
task = crm_strdup_printf("Fence (%s)", op);
}
if(task == NULL) {
/* Nothing to report */
} else if(terminal && action->reason) {
printf(" * %s %s '%s'\n", task, node_name, action->reason);
} else if(terminal) {
printf(" * %s %s\n", task, node_name);
} else if(action->reason) {
crm_notice(" * %s %s '%s'\n", task, node_name, action->reason);
} else {
crm_notice(" * %s %s\n", task, node_name);
}
free(node_name);
free(task);
}
}
void
cleanup_alloc_calculations(pe_working_set_t * data_set)
{
if (data_set == NULL) {
return;
}
crm_trace("deleting %d order cons: %p",
g_list_length(data_set->ordering_constraints), data_set->ordering_constraints);
pe_free_ordering(data_set->ordering_constraints);
data_set->ordering_constraints = NULL;
crm_trace("deleting %d node cons: %p",
g_list_length(data_set->placement_constraints), data_set->placement_constraints);
pe_free_rsc_to_node(data_set->placement_constraints);
data_set->placement_constraints = NULL;
crm_trace("deleting %d inter-resource cons: %p",
g_list_length(data_set->colocation_constraints), data_set->colocation_constraints);
g_list_free_full(data_set->colocation_constraints, free);
data_set->colocation_constraints = NULL;
crm_trace("deleting %d ticket deps: %p",
g_list_length(data_set->ticket_constraints), data_set->ticket_constraints);
g_list_free_full(data_set->ticket_constraints, free);
data_set->ticket_constraints = NULL;
cleanup_calculations(data_set);
}
diff --git a/pengine/regression.sh b/pengine/regression.sh
index 1742172228..43f210a4b4 100755
--- a/pengine/regression.sh
+++ b/pengine/regression.sh
@@ -1,895 +1,895 @@
#!/bin/bash
# Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
#
# This software is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#
core=`dirname $0`
. $core/regression.core.sh || exit 1
DO_VERSIONED_TESTS=0
create_mode="true"
info Generating test outputs for these tests...
# do_test file description
info Done.
echo ""
info Performing the following tests from $io_dir
create_mode="false"
echo ""
do_test simple1 "Offline "
do_test simple2 "Start "
do_test simple3 "Start 2 "
do_test simple4 "Start Failed"
do_test simple6 "Stop Start "
do_test simple7 "Shutdown "
#do_test simple8 "Stonith "
#do_test simple9 "Lower version"
#do_test simple10 "Higher version"
do_test simple11 "Priority (ne)"
do_test simple12 "Priority (eq)"
do_test simple8 "Stickiness"
echo ""
do_test group1 "Group "
do_test group2 "Group + Native "
do_test group3 "Group + Group "
do_test group4 "Group + Native (nothing)"
do_test group5 "Group + Native (move) "
do_test group6 "Group + Group (move) "
do_test group7 "Group colocation"
do_test group13 "Group colocation (cant run)"
do_test group8 "Group anti-colocation"
do_test group9 "Group recovery"
do_test group10 "Group partial recovery"
do_test group11 "Group target_role"
do_test group14 "Group stop (graph terminated)"
do_test group15 "-ve group colocation"
do_test bug-1573 "Partial stop of a group with two children"
do_test bug-1718 "Mandatory group ordering - Stop group_FUN"
do_test bug-lf-2613 "Move group on failure"
do_test bug-lf-2619 "Move group on clone failure"
do_test group-fail "Ensure stop order is preserved for partially active groups"
do_test group-unmanaged "No need to restart r115 because r114 is unmanaged"
do_test group-unmanaged-stopped "Make sure r115 is stopped when r114 fails"
do_test group-dependents "Account for the location preferences of things colocated with a group"
echo ""
do_test rsc_dep1 "Must not "
do_test rsc_dep3 "Must "
do_test rsc_dep5 "Must not 3 "
do_test rsc_dep7 "Must 3 "
do_test rsc_dep10 "Must (but cant)"
do_test rsc_dep2 "Must (running) "
do_test rsc_dep8 "Must (running : alt) "
do_test rsc_dep4 "Must (running + move)"
do_test asymmetric "Asymmetric - require explicit location constraints"
echo ""
do_test orphan-0 "Orphan ignore"
do_test orphan-1 "Orphan stop"
do_test orphan-2 "Orphan stop, remove failcount"
echo ""
do_test params-0 "Params: No change"
do_test params-1 "Params: Changed"
do_test params-2 "Params: Resource definition"
do_test params-4 "Params: Reload"
do_test params-5 "Params: Restart based on probe digest"
do_test novell-251689 "Resource definition change + target_role=stopped"
do_test bug-lf-2106 "Restart all anonymous clone instances after config change"
do_test params-6 "Params: Detect reload in previously migrated resource"
do_test nvpair-id-ref "Support id-ref in nvpair with optional name"
do_test not-reschedule-unneeded-monitor "Do not reschedule unneeded monitors while resource definitions have changed"
do_test reload-becomes-restart "Cancel reload if restart becomes required"
echo ""
do_test target-0 "Target Role : baseline"
do_test target-1 "Target Role : master"
do_test target-2 "Target Role : invalid"
echo ""
do_test base-score "Set a node's default score for all nodes"
echo ""
do_test date-1 "Dates" -t "2005-020"
do_test date-2 "Date Spec - Pass" -t "2005-020T12:30"
do_test date-3 "Date Spec - Fail" -t "2005-020T11:30"
do_test origin "Timing of recurring operations" -t "2014-05-07 00:28:00"
do_test probe-0 "Probe (anon clone)"
do_test probe-1 "Pending Probe"
do_test probe-2 "Correctly re-probe cloned groups"
do_test probe-3 "Probe (pending node)"
do_test probe-4 "Probe (pending node + stopped resource)"
do_test standby "Standby"
do_test comments "Comments"
echo ""
do_test one-or-more-0 "Everything starts"
do_test one-or-more-1 "Nothing starts because of A"
do_test one-or-more-2 "D can start because of C"
do_test one-or-more-3 "D cannot start because of B and C"
do_test one-or-more-4 "D cannot start because of target-role"
do_test one-or-more-5 "Start A and F even though C and D are stopped"
do_test one-or-more-6 "Leave A running even though B is stopped"
do_test one-or-more-7 "Leave A running even though C is stopped"
do_test bug-5140-require-all-false "Allow basegrp:0 to stop"
do_test clone-require-all-1 "clone B starts node 3 and 4"
do_test clone-require-all-2 "clone B remains stopped everywhere"
do_test clone-require-all-3 "clone B stops everywhere because A stops everywhere"
do_test clone-require-all-4 "clone B remains on node 3 and 4 with only one instance of A remaining."
do_test clone-require-all-5 "clone B starts on node 1 3 and 4"
do_test clone-require-all-6 "clone B remains active after shutting down instances of A"
do_test clone-require-all-7 "clone A and B both start at the same time. all instances of A start before B."
do_test clone-require-all-no-interleave-1 "C starts everywhere after A and B"
do_test clone-require-all-no-interleave-2 "C starts on nodes 1, 2, and 4 with only one active instance of B"
do_test clone-require-all-no-interleave-3 "C remains active when instance of B is stopped on one node and started on another."
-do_test one-or-more-unrunnnable-instances "Avoid dependencies on instances that won't ever be started"
+do_test one-or-more-unrunnable-instances "Avoid dependencies on instances that won't ever be started"
echo ""
do_test order1 "Order start 1 "
do_test order2 "Order start 2 "
do_test order3 "Order stop "
do_test order4 "Order (multiple) "
do_test order5 "Order (move) "
do_test order6 "Order (move w/ restart) "
do_test order7 "Order (mandatory) "
do_test order-optional "Order (score=0) "
do_test order-required "Order (score=INFINITY) "
do_test bug-lf-2171 "Prevent group start when clone is stopped"
do_test order-clone "Clone ordering should be able to prevent startup of dependent clones"
do_test order-sets "Ordering for resource sets"
do_test order-serialize "Serialize resources without inhibiting migration"
do_test order-serialize-set "Serialize a set of resources without inhibiting migration"
do_test clone-order-primitive "Order clone start after a primitive"
do_test clone-order-16instances "Verify ordering of 16 cloned resources"
do_test order-optional-keyword "Order (optional keyword)"
do_test order-mandatory "Order (mandatory keyword)"
do_test bug-lf-2493 "Don't imply colocation requirements when applying ordering constraints with clones"
do_test ordered-set-basic-startup "Constraint set with default order settings."
do_test ordered-set-natural "Allow natural set ordering"
do_test order-wrong-kind "Order (error)"
echo ""
do_test coloc-loop "Colocation - loop"
do_test coloc-many-one "Colocation - many-to-one"
do_test coloc-list "Colocation - many-to-one with list"
do_test coloc-group "Colocation - groups"
do_test coloc-slave-anti "Anti-colocation with slave shouldn't prevent master colocation"
do_test coloc-attr "Colocation based on node attributes"
do_test coloc-negative-group "Negative colocation with a group"
do_test coloc-intra-set "Intra-set colocation"
do_test bug-lf-2435 "Colocation sets with a negative score"
do_test coloc-clone-stays-active "Ensure clones don't get stopped/demoted because a dependent must stop"
do_test coloc_fp_logic "Verify floating point calculations in colocation are working"
do_test colo_master_w_native "cl#5070 - Verify promotion order is affected when colocating master to native rsc."
do_test colo_slave_w_native "cl#5070 - Verify promotion order is affected when colocating slave to native rsc."
do_test anti-colocation-order "cl#5187 - Prevent resources in an anti-colocation from even temporarily running on a same node"
do_test anti-colocation-master "Organize order of actions for master resources in anti-colocations"
do_test anti-colocation-slave "Organize order of actions for slave resources in anti-colocations"
do_test enforce-colo1 "Always enforce B with A INFINITY."
do_test complex_enforce_colo "Always enforce B with A INFINITY. (make sure heat-engine stops)"
echo ""
do_test rsc-sets-seq-true "Resource Sets - sequential=false"
do_test rsc-sets-seq-false "Resource Sets - sequential=true"
do_test rsc-sets-clone "Resource Sets - Clone"
do_test rsc-sets-master "Resource Sets - Master"
do_test rsc-sets-clone-1 "Resource Sets - Clone (lf#2404)"
#echo ""
#do_test agent1 "version: lt (empty)"
#do_test agent2 "version: eq "
#do_test agent3 "version: gt "
echo ""
do_test attrs1 "string: eq (and) "
do_test attrs2 "string: lt / gt (and)"
do_test attrs3 "string: ne (or) "
do_test attrs4 "string: exists "
do_test attrs5 "string: not_exists "
do_test attrs6 "is_dc: true "
do_test attrs7 "is_dc: false "
do_test attrs8 "score_attribute "
do_test per-node-attrs "Per node resource parameters"
echo ""
do_test mon-rsc-1 "Schedule Monitor - start"
do_test mon-rsc-2 "Schedule Monitor - move "
do_test mon-rsc-3 "Schedule Monitor - pending start "
do_test mon-rsc-4 "Schedule Monitor - move/pending start"
echo ""
do_test rec-rsc-0 "Resource Recover - no start "
do_test rec-rsc-1 "Resource Recover - start "
do_test rec-rsc-2 "Resource Recover - monitor "
do_test rec-rsc-3 "Resource Recover - stop - ignore"
do_test rec-rsc-4 "Resource Recover - stop - block "
do_test rec-rsc-5 "Resource Recover - stop - fence "
do_test rec-rsc-6 "Resource Recover - multiple - restart"
do_test rec-rsc-7 "Resource Recover - multiple - stop "
do_test rec-rsc-8 "Resource Recover - multiple - block "
do_test rec-rsc-9 "Resource Recover - group/group"
do_test monitor-recovery "on-fail=block + resource recovery detected by recurring monitor"
do_test stop-failure-no-quorum "Stop failure without quorum"
do_test stop-failure-no-fencing "Stop failure without fencing available"
do_test stop-failure-with-fencing "Stop failure with fencing available"
do_test multiple-active-block-group "Support of multiple-active=block for resource groups"
do_test multiple-monitor-one-failed "Consider resource failed if any of the configured monitor operations failed"
echo ""
do_test quorum-1 "No quorum - ignore"
do_test quorum-2 "No quorum - freeze"
do_test quorum-3 "No quorum - stop "
do_test quorum-4 "No quorum - start anyway"
do_test quorum-5 "No quorum - start anyway (group)"
do_test quorum-6 "No quorum - start anyway (clone)"
do_test bug-cl-5212 "No promotion with no-quorum-policy=freeze"
do_test suicide-needed-inquorate "no-quorum-policy=suicide: suicide necessary"
do_test suicide-not-needed-initial-quorum "no-quorum-policy=suicide: suicide not necessary at initial quorum"
do_test suicide-not-needed-never-quorate "no-quorum-policy=suicide: suicide not necessary if never quorate"
do_test suicide-not-needed-quorate "no-quorum-policy=suicide: suicide necessary if quorate"
echo ""
do_test rec-node-1 "Node Recover - Startup - no fence"
do_test rec-node-2 "Node Recover - Startup - fence "
do_test rec-node-3 "Node Recover - HA down - no fence"
do_test rec-node-4 "Node Recover - HA down - fence "
do_test rec-node-5 "Node Recover - CRM down - no fence"
do_test rec-node-6 "Node Recover - CRM down - fence "
do_test rec-node-7 "Node Recover - no quorum - ignore "
do_test rec-node-8 "Node Recover - no quorum - freeze "
do_test rec-node-9 "Node Recover - no quorum - stop "
do_test rec-node-10 "Node Recover - no quorum - stop w/fence"
do_test rec-node-11 "Node Recover - CRM down w/ group - fence "
do_test rec-node-12 "Node Recover - nothing active - fence "
do_test rec-node-13 "Node Recover - failed resource + shutdown - fence "
do_test rec-node-15 "Node Recover - unknown lrm section"
do_test rec-node-14 "Serialize all stonith's"
echo ""
do_test multi1 "Multiple Active (stop/start)"
echo ""
do_test migrate-begin "Normal migration"
do_test migrate-success "Completed migration"
do_test migrate-partial-1 "Completed migration, missing stop on source"
do_test migrate-partial-2 "Successful migrate_to only"
do_test migrate-partial-3 "Successful migrate_to only, target down"
do_test migrate-partial-4 "Migrate from the correct host after migrate_to+migrate_from"
do_test bug-5186-partial-migrate "Handle partial migration when src node loses membership"
do_test migrate-fail-2 "Failed migrate_from"
do_test migrate-fail-3 "Failed migrate_from + stop on source"
do_test migrate-fail-4 "Failed migrate_from + stop on target - ideally we wouldn't need to re-stop on target"
do_test migrate-fail-5 "Failed migrate_from + stop on source and target"
do_test migrate-fail-6 "Failed migrate_to"
do_test migrate-fail-7 "Failed migrate_to + stop on source"
do_test migrate-fail-8 "Failed migrate_to + stop on target - ideally we wouldn't need to re-stop on target"
do_test migrate-fail-9 "Failed migrate_to + stop on source and target"
do_test migrate-stop "Migration in a stopping stack"
do_test migrate-start "Migration in a starting stack"
do_test migrate-stop_start "Migration in a restarting stack"
do_test migrate-stop-complex "Migration in a complex stopping stack"
do_test migrate-start-complex "Migration in a complex starting stack"
do_test migrate-stop-start-complex "Migration in a complex moving stack"
do_test migrate-shutdown "Order the post-migration 'stop' before node shutdown"
do_test migrate-1 "Migrate (migrate)"
do_test migrate-2 "Migrate (stable)"
do_test migrate-3 "Migrate (failed migrate_to)"
do_test migrate-4 "Migrate (failed migrate_from)"
do_test novell-252693 "Migration in a stopping stack"
do_test novell-252693-2 "Migration in a starting stack"
do_test novell-252693-3 "Non-Migration in a starting and stopping stack"
do_test bug-1820 "Migration in a group"
do_test bug-1820-1 "Non-migration in a group"
do_test migrate-5 "Primitive migration with a clone"
do_test migrate-fencing "Migration after Fencing"
do_test migrate-both-vms "Migrate two VMs that have no colocation"
do_test migration-behind-migrating-remote "Migrate resource behind migrating remote connection"
do_test 1-a-then-bm-move-b "Advanced migrate logic. A then B. migrate B."
do_test 2-am-then-b-move-a "Advanced migrate logic, A then B, migrate A without stopping B"
do_test 3-am-then-bm-both-migrate "Advanced migrate logic. A then B. migrate both"
do_test 4-am-then-bm-b-not-migratable "Advanced migrate logic, A then B, B not migratable"
do_test 5-am-then-bm-a-not-migratable "Advanced migrate logic. A then B. move both, a not migratable"
do_test 6-migrate-group "Advanced migrate logic, migrate a group"
do_test 7-migrate-group-one-unmigratable "Advanced migrate logic, migrate group mixed with allow-migrate true/false"
do_test 8-am-then-bm-a-migrating-b-stopping "Advanced migrate logic, A then B, A migrating, B stopping"
do_test 9-am-then-bm-b-migrating-a-stopping "Advanced migrate logic, A then B, B migrate, A stopping"
do_test 10-a-then-bm-b-move-a-clone "Advanced migrate logic, A clone then B, migrate B while stopping A"
do_test 11-a-then-bm-b-move-a-clone-starting "Advanced migrate logic, A clone then B, B moving while A is start/stopping"
do_test a-promote-then-b-migrate "A promote then B start. migrate B"
do_test a-demote-then-b-migrate "A demote then B stop. migrate B"
if [ $DO_VERSIONED_TESTS -eq 1 ]; then
do_test migrate-versioned "Disable migration for versioned resources"
fi
#echo ""
#do_test complex1 "Complex "
do_test bug-lf-2422 "Dependency on partially active group - stop ocfs:*"
echo ""
do_test clone-anon-probe-1 "Probe the correct (anonymous) clone instance for each node"
do_test clone-anon-probe-2 "Avoid needless re-probing of anonymous clones"
do_test clone-anon-failcount "Merge failcounts for anonymous clones"
do_test inc0 "Incarnation start"
do_test inc1 "Incarnation start order"
do_test inc2 "Incarnation silent restart, stop, move"
do_test inc3 "Inter-incarnation ordering, silent restart, stop, move"
do_test inc4 "Inter-incarnation ordering, silent restart, stop, move (ordered)"
do_test inc5 "Inter-incarnation ordering, silent restart, stop, move (restart 1)"
do_test inc6 "Inter-incarnation ordering, silent restart, stop, move (restart 2)"
do_test inc7 "Clone colocation"
do_test inc8 "Clone anti-colocation"
do_test inc9 "Non-unique clone"
do_test inc10 "Non-unique clone (stop)"
do_test inc11 "Primitive colocation with clones"
do_test inc12 "Clone shutdown"
do_test cloned-group "Make sure only the correct number of cloned groups are started"
do_test cloned-group-stop "Ensure stopping qpidd also stops glance and cinder"
do_test clone-no-shuffle "Don't prioritize allocation of instances that must be moved"
do_test clone-max-zero "Orphan processing with clone-max=0"
do_test clone-anon-dup "Bug LF#2087 - Correctly parse the state of anonymous clones that are active more than once per node"
do_test bug-lf-2160 "Don't shuffle clones due to colocation"
do_test bug-lf-2213 "clone-node-max enforcement for cloned groups"
do_test bug-lf-2153 "Clone ordering constraints"
do_test bug-lf-2361 "Ensure clones observe mandatory ordering constraints if the LHS is unrunnable"
do_test bug-lf-2317 "Avoid needless restart of primitive depending on a clone"
do_test clone-colocate-instance-1 "Colocation with a specific clone instance (negative example)"
do_test clone-colocate-instance-2 "Colocation with a specific clone instance"
do_test clone-order-instance "Ordering with specific clone instances"
do_test bug-lf-2453 "Enforce mandatory clone ordering without colocation"
do_test bug-lf-2508 "Correctly reconstruct the status of anonymous cloned groups"
do_test bug-lf-2544 "Balanced clone placement"
do_test bug-lf-2445 "Redistribute clones with node-max > 1 and stickiness = 0"
do_test bug-lf-2574 "Avoid clone shuffle"
do_test bug-lf-2581 "Avoid group restart due to unrelated clone (re)start"
do_test bug-cl-5168 "Don't shuffle clones"
do_test bug-cl-5170 "Prevent clone from starting with on-fail=block"
do_test clone-fail-block-colocation "Move colocated group when failed clone has on-fail=block"
do_test clone-interleave-1 "Clone-3 cannot start on pcmk-1 due to interleaved ordering (no colocation)"
do_test clone-interleave-2 "Clone-3 must stop on pcmk-1 due to interleaved ordering (no colocation)"
do_test clone-interleave-3 "Clone-3 must be recovered on pcmk-1 due to interleaved ordering (no colocation)"
echo ""
do_test cloned_start_one "order first clone then clone... first clone_min=2"
do_test cloned_start_two "order first clone then clone... first clone_min=2"
do_test cloned_stop_one "order first clone then clone... first clone_min=2"
do_test cloned_stop_two "order first clone then clone... first clone_min=2"
do_test clone_min_interleave_start_one "order first clone then clone... first clone_min=2 and then has interleave=true"
do_test clone_min_interleave_start_two "order first clone then clone... first clone_min=2 and then has interleave=true"
do_test clone_min_interleave_stop_one "order first clone then clone... first clone_min=2 and then has interleave=true"
do_test clone_min_interleave_stop_two "order first clone then clone... first clone_min=2 and then has interleave=true"
do_test clone_min_start_one "order first clone then primitive... first clone_min=2"
do_test clone_min_start_two "order first clone then primitive... first clone_min=2"
do_test clone_min_stop_all "order first clone then primitive... first clone_min=2"
do_test clone_min_stop_one "order first clone then primitive... first clone_min=2"
do_test clone_min_stop_two "order first clone then primitive... first clone_min=2"
echo ""
do_test unfence-startup "Clean unfencing"
do_test unfence-definition "Unfencing when the agent changes"
do_test unfence-parameters "Unfencing when the agent parameters changes"
echo ""
do_test master-0 "Stopped -> Slave"
do_test master-1 "Stopped -> Promote"
do_test master-2 "Stopped -> Promote : notify"
do_test master-3 "Stopped -> Promote : master location"
do_test master-4 "Started -> Promote : master location"
do_test master-5 "Promoted -> Promoted"
do_test master-6 "Promoted -> Promoted (2)"
do_test master-7 "Promoted -> Fenced"
do_test master-8 "Promoted -> Fenced -> Moved"
do_test master-9 "Stopped + Promotable + No quorum"
do_test master-10 "Stopped -> Promotable : notify with monitor"
do_test master-11 "Stopped -> Promote : colocation"
do_test novell-239082 "Demote/Promote ordering"
do_test novell-239087 "Stable master placement"
do_test master-12 "Promotion based solely on rsc_location constraints"
do_test master-13 "Include preferences of colocated resources when placing master"
do_test master-demote "Ordering when actions depends on demoting a slave resource"
do_test master-ordering "Prevent resources from starting that need a master"
do_test bug-1765 "Master-Master Colocation (dont stop the slaves)"
do_test master-group "Promotion of cloned groups"
do_test bug-lf-1852 "Don't shuffle master/slave instances unnecessarily"
do_test master-failed-demote "Don't retry failed demote actions"
do_test master-failed-demote-2 "Don't retry failed demote actions (notify=false)"
do_test master-depend "Ensure resources that depend on the master don't get allocated until the master does"
do_test master-reattach "Re-attach to a running master"
do_test master-allow-start "Don't include master score if it would prevent allocation"
do_test master-colocation "Allow master instances placemaker to be influenced by colocation constraints"
do_test master-pseudo "Make sure promote/demote pseudo actions are created correctly"
do_test master-role "Prevent target-role from promoting more than master-max instances"
do_test bug-lf-2358 "Master-Master anti-colocation"
do_test master-promotion-constraint "Mandatory master colocation constraints"
do_test unmanaged-master "Ensure role is preserved for unmanaged resources"
do_test master-unmanaged-monitor "Start the correct monitor operation for unmanaged masters"
do_test master-demote-2 "Demote does not clear past failure"
do_test master-move "Move master based on failure of colocated group"
do_test master-probed-score "Observe the promotion score of probed resources"
do_test colocation_constraint_stops_master "cl#5054 - Ensure master is demoted when stopped by colocation constraint"
do_test colocation_constraint_stops_slave "cl#5054 - Ensure slave is not demoted when stopped by colocation constraint"
do_test order_constraint_stops_master "cl#5054 - Ensure master is demoted when stopped by order constraint"
do_test order_constraint_stops_slave "cl#5054 - Ensure slave is not demoted when stopped by order constraint"
do_test master_monitor_restart "cl#5072 - Ensure master monitor operation will start after promotion."
do_test bug-rh-880249 "Handle replacement of an m/s resource with a primitive"
do_test bug-5143-ms-shuffle "Prevent master shuffling due to promotion score"
do_test master-demote-block "Block promotion if demote fails with on-fail=block"
do_test master-dependent-ban "Don't stop instances from being active because a dependent is banned from that host"
do_test master-stop "Stop instances due to location constraint with role=Started"
do_test master-partially-demoted-group "Allow partially demoted group to finish demoting"
do_test bug-cl-5213 "Ensure role colocation with -INFINITY is enforced"
do_test bug-cl-5219 "Allow unrelated resources with a common colocation target to remain promoted"
do_test master-asymmetrical-order "Fix the behaviors of multi-state resources with asymmetrical ordering"
do_test master-notify "Master promotion with notifies"
do_test master-score-startup "Use permanent master scores without LRM history"
echo ""
do_test history-1 "Correctly parse stateful-1 resource state"
echo ""
do_test managed-0 "Managed (reference)"
do_test managed-1 "Not managed - down "
do_test managed-2 "Not managed - up "
do_test bug-5028 "Shutdown should block if anything depends on an unmanaged resource"
do_test bug-5028-detach "Ensure detach still works"
do_test bug-5028-bottom "Ensure shutdown still blocks if the blocked resource is at the bottom of the stack"
do_test unmanaged-stop-1 "cl#5155 - Block the stop of resources if any depending resource is unmanaged "
do_test unmanaged-stop-2 "cl#5155 - Block the stop of resources if the first resource in a mandatory stop order is unmanaged "
do_test unmanaged-stop-3 "cl#5155 - Block the stop of resources if any depending resource in a group is unmanaged "
do_test unmanaged-stop-4 "cl#5155 - Block the stop of resources if any depending resource in the middle of a group is unmanaged "
do_test unmanaged-block-restart "Block restart of resources if any dependent resource in a group is unmanaged"
echo ""
do_test interleave-0 "Interleave (reference)"
do_test interleave-1 "coloc - not interleaved"
do_test interleave-2 "coloc - interleaved "
do_test interleave-3 "coloc - interleaved (2)"
do_test interleave-pseudo-stop "Interleaved clone during stonith"
do_test interleave-stop "Interleaved clone during stop"
do_test interleave-restart "Interleaved clone during dependency restart"
echo ""
do_test notify-0 "Notify reference"
do_test notify-1 "Notify simple"
do_test notify-2 "Notify simple, confirm"
do_test notify-3 "Notify move, confirm"
do_test novell-239079 "Notification priority"
#do_test notify-2 "Notify - 764"
echo ""
do_test 594 "OSDL #594 - Unrunnable actions scheduled in transition"
do_test 662 "OSDL #662 - Two resources start on one node when incarnation_node_max = 1"
do_test 696 "OSDL #696 - CRM starts stonith RA without monitor"
do_test 726 "OSDL #726 - Attempting to schedule rsc_posic041_monitor_5000 _after_ a stop"
do_test 735 "OSDL #735 - Correctly detect that rsc_hadev1 is stopped on hadev3"
do_test 764 "OSDL #764 - Missing monitor op for DoFencing:child_DoFencing:1"
do_test 797 "OSDL #797 - Assert triggered: task_id_i > max_call_id"
do_test 829 "OSDL #829"
do_test 994 "OSDL #994 - Stopping the last resource in a resource group causes the entire group to be restarted"
do_test 994-2 "OSDL #994 - with a dependent resource"
do_test 1360 "OSDL #1360 - Clone stickiness"
do_test 1484 "OSDL #1484 - on_fail=stop"
do_test 1494 "OSDL #1494 - Clone stability"
do_test unrunnable-1 "Unrunnable"
do_test unrunnable-2 "Unrunnable 2"
do_test stonith-0 "Stonith loop - 1"
do_test stonith-1 "Stonith loop - 2"
do_test stonith-2 "Stonith loop - 3"
do_test stonith-3 "Stonith startup"
do_test stonith-4 "Stonith node state"
do_test bug-1572-1 "Recovery of groups depending on master/slave"
do_test bug-1572-2 "Recovery of groups depending on master/slave when the master is never re-promoted"
do_test bug-1685 "Depends-on-master ordering"
do_test bug-1822 "Don't promote partially active groups"
do_test bug-pm-11 "New resource added to a m/s group"
do_test bug-pm-12 "Recover only the failed portion of a cloned group"
do_test bug-n-387749 "Don't shuffle clone instances"
do_test bug-n-385265 "Don't ignore the failure stickiness of group children - resource_idvscommon should stay stopped"
do_test bug-n-385265-2 "Ensure groups are migrated instead of remaining partially active on the current node"
do_test bug-lf-1920 "Correctly handle probes that find active resources"
do_test bnc-515172 "Location constraint with multiple expressions"
do_test colocate-primitive-with-clone "Optional colocation with a clone"
do_test use-after-free-merge "Use-after-free in native_merge_weights"
do_test bug-lf-2551 "STONITH ordering for stop"
do_test bug-lf-2606 "Stonith implies demote"
do_test bug-lf-2474 "Ensure resource op timeout takes precedence over op_defaults"
do_test bug-suse-707150 "Prevent vm-01 from starting due to colocation/ordering"
do_test bug-5014-A-start-B-start "Verify when A starts B starts using symmetrical=false"
do_test bug-5014-A-stop-B-started "Verify when A stops B does not stop if it has already started using symmetric=false"
do_test bug-5014-A-stopped-B-stopped "Verify when A is stopped and B has not started, B does not start before A using symmetric=false"
do_test bug-5014-CthenAthenB-C-stopped "Verify when C then A is symmetrical=true, A then B is symmetric=false, and C is stopped that nothing starts."
do_test bug-5014-CLONE-A-start-B-start "Verify when A starts B starts using clone resources with symmetric=false"
do_test bug-5014-CLONE-A-stop-B-started "Verify when A stops B does not stop if it has already started using clone resources with symmetric=false."
do_test bug-5014-GROUP-A-start-B-start "Verify when A starts B starts when using group resources with symmetric=false."
do_test bug-5014-GROUP-A-stopped-B-started "Verify when A stops B does not stop if it has already started using group resources with symmetric=false."
do_test bug-5014-GROUP-A-stopped-B-stopped "Verify when A is stopped and B has not started, B does not start before A using group resources with symmetric=false."
do_test bug-5014-ordered-set-symmetrical-false "Verify ordered sets work with symmetrical=false"
do_test bug-5014-ordered-set-symmetrical-true "Verify ordered sets work with symmetrical=true"
do_test bug-5007-masterslave_colocation "Verify use of colocation scores other than INFINITY and -INFINITY work on multi-state resources."
do_test bug-5038 "Prevent restart of anonymous clones when clone-max decreases"
do_test bug-5025-1 "Automatically clean up failcount after resource config change with reload"
do_test bug-5025-2 "Make sure clear failcount action isn't set when config does not change."
do_test bug-5025-3 "Automatically clean up failcount after resource config change with restart"
do_test bug-5025-4 "Clear failcount when last failure is a start op and rsc attributes changed."
do_test failcount "Ensure failcounts are correctly expired"
do_test failcount-block "Ensure failcounts are not expired when on-fail=block is present"
do_test per-op-failcount "Ensure per-operation failcount is handled and not passed to fence agent"
do_test monitor-onfail-restart "bug-5058 - Monitor failure with on-fail set to restart"
do_test monitor-onfail-stop "bug-5058 - Monitor failure wiht on-fail set to stop"
do_test bug-5059 "No need to restart p_stateful1:*"
do_test bug-5069-op-enabled "Test on-fail=ignore with failure when monitor is enabled."
do_test bug-5069-op-disabled "Test on-fail-ignore with failure when monitor is disabled."
do_test obsolete-lrm-resource "cl#5115 - Do not use obsolete lrm_resource sections"
do_test expire-non-blocked-failure "Ignore failure-timeout only if the failed operation has on-fail=block"
do_test asymmetrical-order-move "Respect asymmetrical ordering when trying to move resources"
do_test start-then-stop-with-unfence "Avoid graph loop with start-then-stop constraint plus unfencing"
do_test ignore_stonith_rsc_order1 "cl#5056- Ignore order constraint between stonith and non-stonith rsc."
do_test ignore_stonith_rsc_order2 "cl#5056- Ignore order constraint with group rsc containing mixed stonith and non-stonith."
do_test ignore_stonith_rsc_order3 "cl#5056- Ignore order constraint, stonith clone and mixed group"
do_test ignore_stonith_rsc_order4 "cl#5056- Ignore order constraint, stonith clone and clone with nested mixed group"
do_test honor_stonith_rsc_order1 "cl#5056- Honor order constraint, stonith clone and pure stonith group(single rsc)."
do_test honor_stonith_rsc_order2 "cl#5056- Honor order constraint, stonith clone and pure stonith group(multiple rsc)"
do_test honor_stonith_rsc_order3 "cl#5056- Honor order constraint, stonith clones with nested pure stonith group."
do_test honor_stonith_rsc_order4 "cl#5056- Honor order constraint, between two native stonith rscs."
do_test probe-timeout "cl#5099 - Default probe timeout"
do_test concurrent-fencing "Allow performing fencing operations in parallel"
echo ""
do_test systemhealth1 "System Health () #1"
do_test systemhealth2 "System Health () #2"
do_test systemhealth3 "System Health () #3"
do_test systemhealthn1 "System Health (None) #1"
do_test systemhealthn2 "System Health (None) #2"
do_test systemhealthn3 "System Health (None) #3"
do_test systemhealthm1 "System Health (Migrate On Red) #1"
do_test systemhealthm2 "System Health (Migrate On Red) #2"
do_test systemhealthm3 "System Health (Migrate On Red) #3"
do_test systemhealtho1 "System Health (Only Green) #1"
do_test systemhealtho2 "System Health (Only Green) #2"
do_test systemhealtho3 "System Health (Only Green) #3"
do_test systemhealthp1 "System Health (Progessive) #1"
do_test systemhealthp2 "System Health (Progessive) #2"
do_test systemhealthp3 "System Health (Progessive) #3"
echo ""
do_test utilization "Placement Strategy - utilization"
do_test minimal "Placement Strategy - minimal"
do_test balanced "Placement Strategy - balanced"
echo ""
do_test placement-stickiness "Optimized Placement Strategy - stickiness"
do_test placement-priority "Optimized Placement Strategy - priority"
do_test placement-location "Optimized Placement Strategy - location"
do_test placement-capacity "Optimized Placement Strategy - capacity"
echo ""
do_test utilization-order1 "Utilization Order - Simple"
do_test utilization-order2 "Utilization Order - Complex"
do_test utilization-order3 "Utilization Order - Migrate"
do_test utilization-order4 "Utilization Order - Live Mirgration (bnc#695440)"
do_test utilization-shuffle "Don't displace prmExPostgreSQLDB2 on act2, Start prmExPostgreSQLDB1 on act3"
do_test load-stopped-loop "Avoid transition loop due to load_stopped (cl#5044)"
do_test load-stopped-loop-2 "cl#5235 - Prevent graph loops that can be introduced by load_stopped -> migrate_to ordering"
echo ""
do_test colocated-utilization-primitive-1 "Colocated Utilization - Primitive"
do_test colocated-utilization-primitive-2 "Colocated Utilization - Choose the most capable node"
do_test colocated-utilization-group "Colocated Utilization - Group"
do_test colocated-utilization-clone "Colocated Utilization - Clone"
do_test utilization-check-allowed-nodes "Only check the capacities of the nodes that can run the resource"
echo ""
do_test reprobe-target_rc "Ensure correct target_rc for reprobe of inactive resources"
do_test node-maintenance-1 "cl#5128 - Node maintenance"
do_test node-maintenance-2 "cl#5128 - Node maintenance (coming out of maintenance mode)"
do_test shutdown-maintenance-node "Do not fence a maintenance node if it shuts down cleanly"
do_test rsc-maintenance "Per-resource maintenance"
echo ""
do_test not-installed-agent "The resource agent is missing"
do_test not-installed-tools "Something the resource agent needs is missing"
echo ""
do_test stopped-monitor-00 "Stopped Monitor - initial start"
do_test stopped-monitor-01 "Stopped Monitor - failed started"
do_test stopped-monitor-02 "Stopped Monitor - started multi-up"
do_test stopped-monitor-03 "Stopped Monitor - stop started"
do_test stopped-monitor-04 "Stopped Monitor - failed stop"
do_test stopped-monitor-05 "Stopped Monitor - start unmanaged"
do_test stopped-monitor-06 "Stopped Monitor - unmanaged multi-up"
do_test stopped-monitor-07 "Stopped Monitor - start unmanaged multi-up"
do_test stopped-monitor-08 "Stopped Monitor - migrate"
do_test stopped-monitor-09 "Stopped Monitor - unmanage started"
do_test stopped-monitor-10 "Stopped Monitor - unmanaged started multi-up"
do_test stopped-monitor-11 "Stopped Monitor - stop unmanaged started"
do_test stopped-monitor-12 "Stopped Monitor - unmanaged started multi-up (targer-role="Stopped")"
do_test stopped-monitor-20 "Stopped Monitor - initial stop"
do_test stopped-monitor-21 "Stopped Monitor - stopped single-up"
do_test stopped-monitor-22 "Stopped Monitor - stopped multi-up"
do_test stopped-monitor-23 "Stopped Monitor - start stopped"
do_test stopped-monitor-24 "Stopped Monitor - unmanage stopped"
do_test stopped-monitor-25 "Stopped Monitor - unmanaged stopped multi-up"
do_test stopped-monitor-26 "Stopped Monitor - start unmanaged stopped"
do_test stopped-monitor-27 "Stopped Monitor - unmanaged stopped multi-up (target-role="Started")"
do_test stopped-monitor-30 "Stopped Monitor - new node started"
do_test stopped-monitor-31 "Stopped Monitor - new node stopped"
echo""
do_test ticket-primitive-1 "Ticket - Primitive (loss-policy=stop, initial)"
do_test ticket-primitive-2 "Ticket - Primitive (loss-policy=stop, granted)"
do_test ticket-primitive-3 "Ticket - Primitive (loss-policy-stop, revoked)"
do_test ticket-primitive-4 "Ticket - Primitive (loss-policy=demote, initial)"
do_test ticket-primitive-5 "Ticket - Primitive (loss-policy=demote, granted)"
do_test ticket-primitive-6 "Ticket - Primitive (loss-policy=demote, revoked)"
do_test ticket-primitive-7 "Ticket - Primitive (loss-policy=fence, initial)"
do_test ticket-primitive-8 "Ticket - Primitive (loss-policy=fence, granted)"
do_test ticket-primitive-9 "Ticket - Primitive (loss-policy=fence, revoked)"
do_test ticket-primitive-10 "Ticket - Primitive (loss-policy=freeze, initial)"
do_test ticket-primitive-11 "Ticket - Primitive (loss-policy=freeze, granted)"
do_test ticket-primitive-12 "Ticket - Primitive (loss-policy=freeze, revoked)"
do_test ticket-primitive-13 "Ticket - Primitive (loss-policy=stop, standby, granted)"
do_test ticket-primitive-14 "Ticket - Primitive (loss-policy=stop, granted, standby)"
do_test ticket-primitive-15 "Ticket - Primitive (loss-policy=stop, standby, revoked)"
do_test ticket-primitive-16 "Ticket - Primitive (loss-policy=demote, standby, granted)"
do_test ticket-primitive-17 "Ticket - Primitive (loss-policy=demote, granted, standby)"
do_test ticket-primitive-18 "Ticket - Primitive (loss-policy=demote, standby, revoked)"
do_test ticket-primitive-19 "Ticket - Primitive (loss-policy=fence, standby, granted)"
do_test ticket-primitive-20 "Ticket - Primitive (loss-policy=fence, granted, standby)"
do_test ticket-primitive-21 "Ticket - Primitive (loss-policy=fence, standby, revoked)"
do_test ticket-primitive-22 "Ticket - Primitive (loss-policy=freeze, standby, granted)"
do_test ticket-primitive-23 "Ticket - Primitive (loss-policy=freeze, granted, standby)"
do_test ticket-primitive-24 "Ticket - Primitive (loss-policy=freeze, standby, revoked)"
echo""
do_test ticket-group-1 "Ticket - Group (loss-policy=stop, initial)"
do_test ticket-group-2 "Ticket - Group (loss-policy=stop, granted)"
do_test ticket-group-3 "Ticket - Group (loss-policy-stop, revoked)"
do_test ticket-group-4 "Ticket - Group (loss-policy=demote, initial)"
do_test ticket-group-5 "Ticket - Group (loss-policy=demote, granted)"
do_test ticket-group-6 "Ticket - Group (loss-policy=demote, revoked)"
do_test ticket-group-7 "Ticket - Group (loss-policy=fence, initial)"
do_test ticket-group-8 "Ticket - Group (loss-policy=fence, granted)"
do_test ticket-group-9 "Ticket - Group (loss-policy=fence, revoked)"
do_test ticket-group-10 "Ticket - Group (loss-policy=freeze, initial)"
do_test ticket-group-11 "Ticket - Group (loss-policy=freeze, granted)"
do_test ticket-group-12 "Ticket - Group (loss-policy=freeze, revoked)"
do_test ticket-group-13 "Ticket - Group (loss-policy=stop, standby, granted)"
do_test ticket-group-14 "Ticket - Group (loss-policy=stop, granted, standby)"
do_test ticket-group-15 "Ticket - Group (loss-policy=stop, standby, revoked)"
do_test ticket-group-16 "Ticket - Group (loss-policy=demote, standby, granted)"
do_test ticket-group-17 "Ticket - Group (loss-policy=demote, granted, standby)"
do_test ticket-group-18 "Ticket - Group (loss-policy=demote, standby, revoked)"
do_test ticket-group-19 "Ticket - Group (loss-policy=fence, standby, granted)"
do_test ticket-group-20 "Ticket - Group (loss-policy=fence, granted, standby)"
do_test ticket-group-21 "Ticket - Group (loss-policy=fence, standby, revoked)"
do_test ticket-group-22 "Ticket - Group (loss-policy=freeze, standby, granted)"
do_test ticket-group-23 "Ticket - Group (loss-policy=freeze, granted, standby)"
do_test ticket-group-24 "Ticket - Group (loss-policy=freeze, standby, revoked)"
echo""
do_test ticket-clone-1 "Ticket - Clone (loss-policy=stop, initial)"
do_test ticket-clone-2 "Ticket - Clone (loss-policy=stop, granted)"
do_test ticket-clone-3 "Ticket - Clone (loss-policy-stop, revoked)"
do_test ticket-clone-4 "Ticket - Clone (loss-policy=demote, initial)"
do_test ticket-clone-5 "Ticket - Clone (loss-policy=demote, granted)"
do_test ticket-clone-6 "Ticket - Clone (loss-policy=demote, revoked)"
do_test ticket-clone-7 "Ticket - Clone (loss-policy=fence, initial)"
do_test ticket-clone-8 "Ticket - Clone (loss-policy=fence, granted)"
do_test ticket-clone-9 "Ticket - Clone (loss-policy=fence, revoked)"
do_test ticket-clone-10 "Ticket - Clone (loss-policy=freeze, initial)"
do_test ticket-clone-11 "Ticket - Clone (loss-policy=freeze, granted)"
do_test ticket-clone-12 "Ticket - Clone (loss-policy=freeze, revoked)"
do_test ticket-clone-13 "Ticket - Clone (loss-policy=stop, standby, granted)"
do_test ticket-clone-14 "Ticket - Clone (loss-policy=stop, granted, standby)"
do_test ticket-clone-15 "Ticket - Clone (loss-policy=stop, standby, revoked)"
do_test ticket-clone-16 "Ticket - Clone (loss-policy=demote, standby, granted)"
do_test ticket-clone-17 "Ticket - Clone (loss-policy=demote, granted, standby)"
do_test ticket-clone-18 "Ticket - Clone (loss-policy=demote, standby, revoked)"
do_test ticket-clone-19 "Ticket - Clone (loss-policy=fence, standby, granted)"
do_test ticket-clone-20 "Ticket - Clone (loss-policy=fence, granted, standby)"
do_test ticket-clone-21 "Ticket - Clone (loss-policy=fence, standby, revoked)"
do_test ticket-clone-22 "Ticket - Clone (loss-policy=freeze, standby, granted)"
do_test ticket-clone-23 "Ticket - Clone (loss-policy=freeze, granted, standby)"
do_test ticket-clone-24 "Ticket - Clone (loss-policy=freeze, standby, revoked)"
echo""
do_test ticket-master-1 "Ticket - Master (loss-policy=stop, initial)"
do_test ticket-master-2 "Ticket - Master (loss-policy=stop, granted)"
do_test ticket-master-3 "Ticket - Master (loss-policy-stop, revoked)"
do_test ticket-master-4 "Ticket - Master (loss-policy=demote, initial)"
do_test ticket-master-5 "Ticket - Master (loss-policy=demote, granted)"
do_test ticket-master-6 "Ticket - Master (loss-policy=demote, revoked)"
do_test ticket-master-7 "Ticket - Master (loss-policy=fence, initial)"
do_test ticket-master-8 "Ticket - Master (loss-policy=fence, granted)"
do_test ticket-master-9 "Ticket - Master (loss-policy=fence, revoked)"
do_test ticket-master-10 "Ticket - Master (loss-policy=freeze, initial)"
do_test ticket-master-11 "Ticket - Master (loss-policy=freeze, granted)"
do_test ticket-master-12 "Ticket - Master (loss-policy=freeze, revoked)"
do_test ticket-master-13 "Ticket - Master (loss-policy=stop, standby, granted)"
do_test ticket-master-14 "Ticket - Master (loss-policy=stop, granted, standby)"
do_test ticket-master-15 "Ticket - Master (loss-policy=stop, standby, revoked)"
do_test ticket-master-16 "Ticket - Master (loss-policy=demote, standby, granted)"
do_test ticket-master-17 "Ticket - Master (loss-policy=demote, granted, standby)"
do_test ticket-master-18 "Ticket - Master (loss-policy=demote, standby, revoked)"
do_test ticket-master-19 "Ticket - Master (loss-policy=fence, standby, granted)"
do_test ticket-master-20 "Ticket - Master (loss-policy=fence, granted, standby)"
do_test ticket-master-21 "Ticket - Master (loss-policy=fence, standby, revoked)"
do_test ticket-master-22 "Ticket - Master (loss-policy=freeze, standby, granted)"
do_test ticket-master-23 "Ticket - Master (loss-policy=freeze, granted, standby)"
do_test ticket-master-24 "Ticket - Master (loss-policy=freeze, standby, revoked)"
echo ""
do_test ticket-rsc-sets-1 "Ticket - Resource sets (1 ticket, initial)"
do_test ticket-rsc-sets-2 "Ticket - Resource sets (1 ticket, granted)"
do_test ticket-rsc-sets-3 "Ticket - Resource sets (1 ticket, revoked)"
do_test ticket-rsc-sets-4 "Ticket - Resource sets (2 tickets, initial)"
do_test ticket-rsc-sets-5 "Ticket - Resource sets (2 tickets, granted)"
do_test ticket-rsc-sets-6 "Ticket - Resource sets (2 tickets, granted)"
do_test ticket-rsc-sets-7 "Ticket - Resource sets (2 tickets, revoked)"
do_test ticket-rsc-sets-8 "Ticket - Resource sets (1 ticket, standby, granted)"
do_test ticket-rsc-sets-9 "Ticket - Resource sets (1 ticket, granted, standby)"
do_test ticket-rsc-sets-10 "Ticket - Resource sets (1 ticket, standby, revoked)"
do_test ticket-rsc-sets-11 "Ticket - Resource sets (2 tickets, standby, granted)"
do_test ticket-rsc-sets-12 "Ticket - Resource sets (2 tickets, standby, granted)"
do_test ticket-rsc-sets-13 "Ticket - Resource sets (2 tickets, granted, standby)"
do_test ticket-rsc-sets-14 "Ticket - Resource sets (2 tickets, standby, revoked)"
do_test cluster-specific-params "Cluster-specific instance attributes based on rules"
do_test site-specific-params "Site-specific instance attributes based on rules"
echo ""
do_test template-1 "Template - 1"
do_test template-2 "Template - 2"
do_test template-3 "Template - 3 (merge operations)"
do_test template-coloc-1 "Template - Colocation 1"
do_test template-coloc-2 "Template - Colocation 2"
do_test template-coloc-3 "Template - Colocation 3"
do_test template-order-1 "Template - Order 1"
do_test template-order-2 "Template - Order 2"
do_test template-order-3 "Template - Order 3"
do_test template-ticket "Template - Ticket"
do_test template-rsc-sets-1 "Template - Resource Sets 1"
do_test template-rsc-sets-2 "Template - Resource Sets 2"
do_test template-rsc-sets-3 "Template - Resource Sets 3"
do_test template-rsc-sets-4 "Template - Resource Sets 4"
do_test template-clone-primitive "Cloned primitive from template"
do_test template-clone-group "Cloned group from template"
do_test location-sets-templates "Resource sets and templates - Location"
do_test tags-coloc-order-1 "Tags - Colocation and Order (Simple)"
do_test tags-coloc-order-2 "Tags - Colocation and Order (Resource Sets with Templates)"
do_test tags-location "Tags - Location"
do_test tags-ticket "Tags - Ticket"
echo ""
do_test container-1 "Container - initial"
do_test container-2 "Container - monitor failed"
do_test container-3 "Container - stop failed"
do_test container-4 "Container - reached migration-threshold"
do_test container-group-1 "Container in group - initial"
do_test container-group-2 "Container in group - monitor failed"
do_test container-group-3 "Container in group - stop failed"
do_test container-group-4 "Container in group - reached migration-threshold"
do_test container-is-remote-node "Place resource within container when container is remote-node"
do_test bug-rh-1097457 "Kill user defined container/contents ordering"
do_test bug-cl-5247 "Graph loop when recovering m/s resource in a container"
do_test bundle-order-startup "Bundle startup ordering"
do_test bundle-order-partial-start "Bundle startup ordering when some dependancies are already running"
do_test bundle-order-partial-start-2 "Bundle startup ordering when some dependancies and the container are already running"
do_test bundle-order-stop "Bundle stop ordering"
do_test bundle-order-partial-stop "Bundle startup ordering when some dependancies are already stopped"
do_test bundle-order-startup-clone "Prevent startup because bundle isn't promoted"
do_test bundle-order-startup-clone-2 "Bundle startup with clones"
do_test bundle-order-stop-clone "Stop bundle because clone is stopping"
do_test bundle-nested-colocation "Colocation of nested connection resources"
do_test bundle-order-fencing "Order pseudo bundle fencing after parent node fencing if both are happening"
echo ""
do_test whitebox-fail1 "Fail whitebox container rsc."
do_test whitebox-fail2 "Fail whitebox container rsc lrmd connection."
do_test whitebox-fail3 "Failed containers should not run nested on remote nodes."
do_test whitebox-start "Start whitebox container with resources assigned to it"
do_test whitebox-stop "Stop whitebox container with resources assigned to it"
do_test whitebox-move "Move whitebox container with resources assigned to it"
do_test whitebox-asymmetric "Verify connection rsc opts-in based on container resource"
do_test whitebox-ms-ordering "Verify promote/demote can not occur before connection is established"
do_test whitebox-ms-ordering-move "Stop/Start cycle within a moving container"
do_test whitebox-orphaned "Properly shutdown orphaned whitebox container"
do_test whitebox-orphan-ms "Properly tear down orphan ms resources on remote-nodes"
do_test whitebox-unexpectedly-running "Recover container nodes the cluster did not start."
do_test whitebox-migrate1 "Migrate both container and connection resource"
do_test whitebox-imply-stop-on-fence "imply stop action on container node rsc when host node is fenced"
do_test whitebox-nested-group "Verify guest remote-node works nested in a group"
do_test guest-node-host-dies "Verify guest node is recovered if host goes away"
echo ""
do_test remote-startup-probes "Baremetal remote-node startup probes"
do_test remote-startup "Startup a newly discovered remote-nodes with no status."
do_test remote-fence-unclean "Fence unclean baremetal remote-node"
do_test remote-fence-unclean2 "Fence baremetal remote-node after cluster node fails and connection can not be recovered"
do_test remote-fence-unclean-3 "Probe failed remote nodes (triggers fencing)"
do_test remote-move "Move remote-node connection resource"
do_test remote-disable "Disable a baremetal remote-node"
do_test remote-probe-disable "Probe then stop a baremetal remote-node"
do_test remote-orphaned "Properly shutdown orphaned connection resource"
do_test remote-orphaned2 "verify we can handle orphaned remote connections with active resources on the remote"
do_test remote-recover "Recover connection resource after cluster-node fails."
do_test remote-stale-node-entry "Make sure we properly handle leftover remote-node entries in the node section"
do_test remote-partial-migrate "Make sure partial migrations are handled before ops on the remote node."
do_test remote-partial-migrate2 "Make sure partial migration target is prefered for remote connection."
do_test remote-recover-fail "Make sure start failure causes fencing if rsc are active on remote."
do_test remote-start-fail "Make sure a start failure does not result in fencing if no active resources are on remote."
do_test remote-unclean2 "Make monitor failure always results in fencing, even if no rsc are active on remote."
do_test remote-fence-before-reconnect "Fence before clearing recurring monitor failure"
do_test remote-recovery "Recover remote connections before attempting demotion"
do_test remote-recover-connection "Optimistically recovery of only the connection"
do_test remote-recover-all "Fencing when the connection has no home"
do_test remote-recover-no-resources "Fencing when the connection has no home and no active resources"
do_test remote-recover-unknown "Fencing when the connection has no home and the remote has no operation history"
do_test remote-reconnect-delay "Waiting for remote reconnect interval to expire"
do_test remote-connection-unrecoverable "Remote connection host must be fenced, with connection unrecoverable"
echo ""
do_test resource-discovery "Exercises resource-discovery location constraint option."
do_test rsc-discovery-per-node "Disable resource discovery per node"
echo ""
do_test isolation-start-all "Start docker isolated resources."
do_test isolation-restart-all "Restart docker isolated resources."
do_test isolation-clone "Cloned isolated primitive."
if [ $DO_VERSIONED_TESTS -eq 1 ]; then
echo ""
do_test versioned-resources "Start resources with #ra-version rules"
do_test restart-versioned "Restart resources on #ra-version change"
do_test reload-versioned "Reload resources on #ra-version change"
echo ""
do_test versioned-operations-1 "Use #ra-version to configure operations of native resources"
do_test versioned-operations-2 "Use #ra-version to configure operations of stonith resources"
do_test versioned-operations-3 "Use #ra-version to configure operations of master/slave resources"
do_test versioned-operations-4 "Use #ra-version to configure operations of groups of the resources"
fi
echo ""
test_results
diff --git a/pengine/test10/one-or-more-unrunnnable-instances.dot b/pengine/test10/one-or-more-unrunnable-instances.dot
similarity index 100%
rename from pengine/test10/one-or-more-unrunnnable-instances.dot
rename to pengine/test10/one-or-more-unrunnable-instances.dot
diff --git a/pengine/test10/one-or-more-unrunnnable-instances.exp b/pengine/test10/one-or-more-unrunnable-instances.exp
similarity index 100%
rename from pengine/test10/one-or-more-unrunnnable-instances.exp
rename to pengine/test10/one-or-more-unrunnable-instances.exp
diff --git a/pengine/test10/one-or-more-unrunnnable-instances.scores b/pengine/test10/one-or-more-unrunnable-instances.scores
similarity index 100%
rename from pengine/test10/one-or-more-unrunnnable-instances.scores
rename to pengine/test10/one-or-more-unrunnable-instances.scores
diff --git a/pengine/test10/one-or-more-unrunnnable-instances.summary b/pengine/test10/one-or-more-unrunnable-instances.summary
similarity index 100%
rename from pengine/test10/one-or-more-unrunnnable-instances.summary
rename to pengine/test10/one-or-more-unrunnable-instances.summary
diff --git a/pengine/test10/one-or-more-unrunnnable-instances.xml b/pengine/test10/one-or-more-unrunnable-instances.xml
similarity index 100%
rename from pengine/test10/one-or-more-unrunnnable-instances.xml
rename to pengine/test10/one-or-more-unrunnable-instances.xml
diff --git a/pengine/utils.c b/pengine/utils.c
index cae42a800f..dd32d498ad 100644
--- a/pengine/utils.c
+++ b/pengine/utils.c
@@ -1,436 +1,442 @@
/*
* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <crm_internal.h>
#include <crm/msg_xml.h>
#include <allocate.h>
#include <utils.h>
void
pe_free_ordering(GListPtr constraints)
{
GListPtr iterator = constraints;
while (iterator != NULL) {
order_constraint_t *order = iterator->data;
iterator = iterator->next;
free(order->lh_action_task);
free(order->rh_action_task);
free(order);
}
if (constraints != NULL) {
g_list_free(constraints);
}
}
void
pe_free_rsc_to_node(GListPtr constraints)
{
GListPtr iterator = constraints;
while (iterator != NULL) {
rsc_to_node_t *cons = iterator->data;
iterator = iterator->next;
g_list_free_full(cons->node_list_rh, free);
free(cons->id);
free(cons);
}
if (constraints != NULL) {
g_list_free(constraints);
}
}
rsc_to_node_t *
rsc2node_new(const char *id, resource_t * rsc,
int node_weight, const char *discover_mode,
node_t * foo_node, pe_working_set_t * data_set)
{
rsc_to_node_t *new_con = NULL;
if (rsc == NULL || id == NULL) {
pe_err("Invalid constraint %s for rsc=%p", crm_str(id), rsc);
return NULL;
} else if (foo_node == NULL) {
CRM_CHECK(node_weight == 0, return NULL);
}
new_con = calloc(1, sizeof(rsc_to_node_t));
if (new_con != NULL) {
new_con->id = strdup(id);
new_con->rsc_lh = rsc;
new_con->node_list_rh = NULL;
new_con->role_filter = RSC_ROLE_UNKNOWN;
if (discover_mode == NULL || safe_str_eq(discover_mode, "always")) {
new_con->discover_mode = pe_discover_always;
} else if (safe_str_eq(discover_mode, "never")) {
new_con->discover_mode = pe_discover_never;
} else if (safe_str_eq(discover_mode, "exclusive")) {
new_con->discover_mode = pe_discover_exclusive;
rsc->exclusive_discover = TRUE;
} else {
pe_err("Invalid %s value %s in location constraint", XML_LOCATION_ATTR_DISCOVERY, discover_mode);
}
if (foo_node != NULL) {
node_t *copy = node_copy(foo_node);
copy->weight = node_weight;
new_con->node_list_rh = g_list_prepend(NULL, copy);
}
data_set->placement_constraints = g_list_prepend(data_set->placement_constraints, new_con);
rsc->rsc_location = g_list_prepend(rsc->rsc_location, new_con);
}
return new_con;
}
gboolean
can_run_resources(const node_t * node)
{
if (node == NULL) {
return FALSE;
}
#if 0
if (node->weight < 0) {
return FALSE;
}
#endif
if (node->details->online == FALSE
|| node->details->shutdown || node->details->unclean
|| node->details->standby || node->details->maintenance) {
crm_trace("%s: online=%d, unclean=%d, standby=%d, maintenance=%d",
node->details->uname, node->details->online,
node->details->unclean, node->details->standby, node->details->maintenance);
return FALSE;
}
return TRUE;
}
/* return -1 if 'a' is more preferred
* return 1 if 'b' is more preferred
*/
gint
sort_node_weight(gconstpointer a, gconstpointer b, gpointer data)
{
const node_t *node1 = (const node_t *)a;
const node_t *node2 = (const node_t *)b;
const node_t *active = (node_t *) data;
int node1_weight = 0;
int node2_weight = 0;
int result = 0;
if (a == NULL) {
return 1;
}
if (b == NULL) {
return -1;
}
node1_weight = node1->weight;
node2_weight = node2->weight;
if (can_run_resources(node1) == FALSE) {
node1_weight = -INFINITY;
}
if (can_run_resources(node2) == FALSE) {
node2_weight = -INFINITY;
}
if (node1_weight > node2_weight) {
crm_trace("%s (%d) > %s (%d) : weight",
node1->details->uname, node1_weight, node2->details->uname, node2_weight);
return -1;
}
if (node1_weight < node2_weight) {
crm_trace("%s (%d) < %s (%d) : weight",
node1->details->uname, node1_weight, node2->details->uname, node2_weight);
return 1;
}
crm_trace("%s (%d) == %s (%d) : weight",
node1->details->uname, node1_weight, node2->details->uname, node2_weight);
if (safe_str_eq(pe_dataset->placement_strategy, "minimal")) {
goto equal;
}
if (safe_str_eq(pe_dataset->placement_strategy, "balanced")) {
result = compare_capacity(node1, node2);
- if (result != 0) {
- return result;
+ if (result < 0) {
+ crm_trace("%s > %s : capacity (%d)",
+ node1->details->uname, node2->details->uname, result);
+ return -1;
+ } else if (result > 0) {
+ crm_trace("%s < %s : capacity (%d)",
+ node1->details->uname, node2->details->uname, result);
+ return 1;
}
}
/* now try to balance resources across the cluster */
if (node1->details->num_resources < node2->details->num_resources) {
- crm_trace("%s (%d) < %s (%d) : resources",
+ crm_trace("%s (%d) > %s (%d) : resources",
node1->details->uname, node1->details->num_resources,
node2->details->uname, node2->details->num_resources);
return -1;
} else if (node1->details->num_resources > node2->details->num_resources) {
- crm_trace("%s (%d) > %s (%d) : resources",
+ crm_trace("%s (%d) < %s (%d) : resources",
node1->details->uname, node1->details->num_resources,
node2->details->uname, node2->details->num_resources);
return 1;
}
if (active && active->details == node1->details) {
crm_trace("%s (%d) > %s (%d) : active",
node1->details->uname, node1->details->num_resources,
node2->details->uname, node2->details->num_resources);
return -1;
} else if (active && active->details == node2->details) {
- crm_trace("%s (%d) > %s (%d) : active",
+ crm_trace("%s (%d) < %s (%d) : active",
node1->details->uname, node1->details->num_resources,
node2->details->uname, node2->details->num_resources);
return 1;
}
equal:
crm_trace("%s = %s", node1->details->uname, node2->details->uname);
return strcmp(node1->details->uname, node2->details->uname);
}
void
native_deallocate(resource_t * rsc)
{
if (rsc->allocated_to) {
node_t *old = rsc->allocated_to;
crm_info("Deallocating %s from %s", rsc->id, old->details->uname);
set_bit(rsc->flags, pe_rsc_provisional);
rsc->allocated_to = NULL;
old->details->allocated_rsc = g_list_remove(old->details->allocated_rsc, rsc);
old->details->num_resources--;
/* old->count--; */
calculate_utilization(old->details->utilization, rsc->utilization, TRUE);
free(old);
}
}
gboolean
native_assign_node(resource_t * rsc, GListPtr nodes, node_t * chosen, gboolean force)
{
CRM_ASSERT(rsc->variant == pe_native);
if (force == FALSE && chosen != NULL) {
bool unset = FALSE;
if(chosen->weight < 0) {
unset = TRUE;
// Allow the graph to assume that the remote resource will come up
} else if(can_run_resources(chosen) == FALSE && !is_container_remote_node(chosen)) {
unset = TRUE;
}
if(unset) {
crm_debug("All nodes for resource %s are unavailable"
", unclean or shutting down (%s: %d, %d)",
rsc->id, chosen->details->uname, can_run_resources(chosen), chosen->weight);
rsc->next_role = RSC_ROLE_STOPPED;
chosen = NULL;
}
}
/* todo: update the old node for each resource to reflect its
* new resource count
*/
native_deallocate(rsc);
clear_bit(rsc->flags, pe_rsc_provisional);
if (chosen == NULL) {
GListPtr gIter = NULL;
char *rc_inactive = crm_itoa(PCMK_OCF_NOT_RUNNING);
crm_debug("Could not allocate a node for %s", rsc->id);
rsc->next_role = RSC_ROLE_STOPPED;
for (gIter = rsc->actions; gIter != NULL; gIter = gIter->next) {
action_t *op = (action_t *) gIter->data;
const char *interval = g_hash_table_lookup(op->meta, XML_LRM_ATTR_INTERVAL);
crm_debug("Processing %s", op->uuid);
if(safe_str_eq(RSC_STOP, op->task)) {
update_action_flags(op, pe_action_optional | pe_action_clear, __FUNCTION__, __LINE__);
} else if(safe_str_eq(RSC_START, op->task)) {
update_action_flags(op, pe_action_runnable | pe_action_clear, __FUNCTION__, __LINE__);
/* set_bit(rsc->flags, pe_rsc_block); */
} else if(interval && safe_str_neq(interval, "0")) {
if(safe_str_eq(rc_inactive, g_hash_table_lookup(op->meta, XML_ATTR_TE_TARGET_RC))) {
/* This is a recurring monitor for the stopped state, leave it alone */
} else {
/* Normal monitor operation, cancel it */
update_action_flags(op, pe_action_runnable | pe_action_clear, __FUNCTION__, __LINE__);
}
}
}
free(rc_inactive);
return FALSE;
}
crm_debug("Assigning %s to %s", chosen->details->uname, rsc->id);
rsc->allocated_to = node_copy(chosen);
chosen->details->allocated_rsc = g_list_prepend(chosen->details->allocated_rsc, rsc);
chosen->details->num_resources++;
chosen->count++;
calculate_utilization(chosen->details->utilization, rsc->utilization, FALSE);
dump_rsc_utilization(show_utilization ? 0 : utilization_log_level, __FUNCTION__, rsc, chosen);
return TRUE;
}
void
log_action(unsigned int log_level, const char *pre_text, action_t * action, gboolean details)
{
const char *node_uname = NULL;
const char *node_uuid = NULL;
if (action == NULL) {
crm_trace("%s%s: <NULL>", pre_text == NULL ? "" : pre_text, pre_text == NULL ? "" : ": ");
return;
}
if (is_set(action->flags, pe_action_pseudo)) {
node_uname = NULL;
node_uuid = NULL;
} else if (action->node != NULL) {
node_uname = action->node->details->uname;
node_uuid = action->node->details->id;
} else {
node_uname = "<none>";
node_uuid = NULL;
}
switch (text2task(action->task)) {
case stonith_node:
case shutdown_crm:
crm_trace("%s%s%sAction %d: %s%s%s%s%s%s",
pre_text == NULL ? "" : pre_text,
pre_text == NULL ? "" : ": ",
is_set(action->flags,
pe_action_pseudo) ? "Pseudo " : is_set(action->flags,
pe_action_optional) ?
"Optional " : is_set(action->flags,
pe_action_runnable) ? is_set(action->flags,
pe_action_processed)
? "" : "(Provisional) " : "!!Non-Startable!! ", action->id,
action->uuid, node_uname ? "\ton " : "",
node_uname ? node_uname : "", node_uuid ? "\t\t(" : "",
node_uuid ? node_uuid : "", node_uuid ? ")" : "");
break;
default:
crm_trace("%s%s%sAction %d: %s %s%s%s%s%s%s",
pre_text == NULL ? "" : pre_text,
pre_text == NULL ? "" : ": ",
is_set(action->flags,
pe_action_optional) ? "Optional " : is_set(action->flags,
pe_action_pseudo)
? "Pseudo " : is_set(action->flags,
pe_action_runnable) ? is_set(action->flags,
pe_action_processed)
? "" : "(Provisional) " : "!!Non-Startable!! ", action->id,
action->uuid, action->rsc ? action->rsc->id : "<none>",
node_uname ? "\ton " : "", node_uname ? node_uname : "",
node_uuid ? "\t\t(" : "", node_uuid ? node_uuid : "", node_uuid ? ")" : "");
break;
}
if (details) {
GListPtr gIter = NULL;
crm_trace("\t\t====== Preceding Actions");
gIter = action->actions_before;
for (; gIter != NULL; gIter = gIter->next) {
action_wrapper_t *other = (action_wrapper_t *) gIter->data;
log_action(log_level + 1, "\t\t", other->action, FALSE);
}
crm_trace("\t\t====== Subsequent Actions");
gIter = action->actions_after;
for (; gIter != NULL; gIter = gIter->next) {
action_wrapper_t *other = (action_wrapper_t *) gIter->data;
log_action(log_level + 1, "\t\t", other->action, FALSE);
}
crm_trace("\t\t====== End");
} else {
crm_trace("\t\t(seen=%d, before=%d, after=%d)",
action->seen_count,
g_list_length(action->actions_before), g_list_length(action->actions_after));
}
}
gboolean
can_run_any(GHashTable * nodes)
{
GHashTableIter iter;
node_t *node = NULL;
if (nodes == NULL) {
return FALSE;
}
g_hash_table_iter_init(&iter, nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
if (can_run_resources(node) && node->weight >= 0) {
return TRUE;
}
}
return FALSE;
}
pe_action_t *
create_pseudo_resource_op(resource_t * rsc, const char *task, bool optional, bool runnable, pe_working_set_t *data_set)
{
pe_action_t *action = custom_action(rsc, generate_op_key(rsc->id, task, 0), task, NULL, optional, TRUE, data_set);
update_action_flags(action, pe_action_pseudo, __FUNCTION__, __LINE__);
update_action_flags(action, pe_action_runnable, __FUNCTION__, __LINE__);
if(runnable) {
update_action_flags(action, pe_action_runnable, __FUNCTION__, __LINE__);
}
return action;
}

File Metadata

Mime Type
text/x-diff
Expires
Sat, Nov 23, 12:39 PM (21 h, 58 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1018727
Default Alt Text
(522 KB)

Event Timeline