Page MenuHomeClusterLabs Projects

No OneTemporary

This file is larger than 256 KB, so syntax highlighting was skipped.
diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c
index 4124555f70..32976ad159 100644
--- a/daemons/attrd/attrd_corosync.c
+++ b/daemons/attrd/attrd_corosync.c
@@ -1,596 +1,596 @@
/*
* Copyright 2013-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <errno.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#include <crm/cluster.h>
#include <crm/cluster/internal.h>
#include <crm/common/logging.h>
#include <crm/common/results.h>
#include <crm/common/strings_internal.h>
#include <crm/msg_xml.h>
#include "pacemaker-attrd.h"
static xmlNode *
attrd_confirmation(int callid)
{
xmlNode *node = create_xml_node(NULL, __func__);
crm_xml_add(node, F_TYPE, T_ATTRD);
crm_xml_add(node, F_ORIG, get_local_node_name());
crm_xml_add(node, PCMK__XA_TASK, PCMK__ATTRD_CMD_CONFIRM);
crm_xml_add_int(node, XML_LRM_ATTR_CALLID, callid);
return node;
}
static void
attrd_peer_message(crm_node_t *peer, xmlNode *xml)
{
const char *election_op = crm_element_value(xml, F_CRM_TASK);
if (election_op) {
attrd_handle_election_op(peer, xml);
return;
}
if (attrd_shutting_down(false)) {
/* If we're shutting down, we want to continue responding to election
* ops as long as we're a cluster member (because our vote may be
* needed). Ignore all other messages.
*/
return;
} else {
pcmk__request_t request = {
.ipc_client = NULL,
.ipc_id = 0,
.ipc_flags = 0,
.peer = peer->uname,
.xml = xml,
.call_options = 0,
.result = PCMK__UNKNOWN_RESULT,
};
request.op = crm_element_value_copy(request.xml, PCMK__XA_TASK);
CRM_CHECK(request.op != NULL, return);
attrd_handle_request(&request);
/* Having finished handling the request, check to see if the originating
* peer requested confirmation. If so, send that confirmation back now.
*/
if (pcmk__xe_attr_is_true(xml, PCMK__XA_CONFIRM) &&
!pcmk__str_eq(request.op, PCMK__ATTRD_CMD_CONFIRM, pcmk__str_none)) {
int callid = 0;
xmlNode *reply = NULL;
/* Add the confirmation ID for the message we are confirming to the
* response so the originating peer knows what they're a confirmation
* for.
*/
crm_element_value_int(xml, XML_LRM_ATTR_CALLID, &callid);
reply = attrd_confirmation(callid);
/* And then send the confirmation back to the originating peer. This
* ends up right back in this same function (attrd_peer_message) on the
* peer where it will have to do something with a PCMK__XA_CONFIRM type
* message.
*/
crm_debug("Sending %s a confirmation", peer->uname);
attrd_send_message(peer, reply, false);
free_xml(reply);
}
pcmk__reset_request(&request);
}
}
static void
attrd_cpg_dispatch(cpg_handle_t handle,
const struct cpg_name *groupName,
uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len)
{
uint32_t kind = 0;
xmlNode *xml = NULL;
const char *from = NULL;
char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from);
if(data == NULL) {
return;
}
if (kind == crm_class_cluster) {
xml = string2xml(data);
}
if (xml == NULL) {
crm_err("Bad message of class %d received from %s[%u]: '%.120s'", kind, from, nodeid, data);
} else {
crm_node_t *peer = crm_get_peer(nodeid, from);
attrd_peer_message(peer, xml);
}
free_xml(xml);
free(data);
}
static void
attrd_cpg_destroy(gpointer unused)
{
if (attrd_shutting_down(false)) {
crm_info("Disconnected from Corosync process group");
} else {
crm_crit("Lost connection to Corosync process group, shutting down");
attrd_exit_status = CRM_EX_DISCONNECT;
attrd_shutdown(0);
}
}
/*!
* \internal
* \brief Override an attribute sync with a local value
*
* Broadcast the local node's value for an attribute that's different from the
* value provided in a peer's attribute synchronization response. This ensures a
* node's values for itself take precedence and all peers are kept in sync.
*
* \param[in] a Attribute entry to override
*
* \return Local instance of attribute value
*/
static attribute_value_t *
broadcast_local_value(const attribute_t *a)
{
attribute_value_t *v = g_hash_table_lookup(a->values, attrd_cluster->uname);
xmlNode *sync = create_xml_node(NULL, __func__);
crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE);
attrd_add_value_xml(sync, a, v, false);
attrd_send_message(NULL, sync, false);
free_xml(sync);
return v;
}
#define state_text(state) pcmk__s((state), "in unknown state")
/*!
* \internal
* \brief Return a node's value from hash table (creating one if needed)
*
* \param[in,out] values Hash table of values
* \param[in] node_name Name of node to look up
* \param[in] xml XML describing the attribute
*
* \return Pointer to new or existing hash table entry
*/
static attribute_value_t *
attrd_lookup_or_create_value(GHashTable *values, const char *node_name,
const xmlNode *xml)
{
attribute_value_t *v = g_hash_table_lookup(values, node_name);
int is_remote = 0;
if (v == NULL) {
v = calloc(1, sizeof(attribute_value_t));
CRM_ASSERT(v != NULL);
pcmk__str_update(&v->nodename, node_name);
g_hash_table_replace(values, v->nodename, v);
}
crm_element_value_int(xml, PCMK__XA_ATTR_IS_REMOTE, &is_remote);
if (is_remote) {
attrd_set_value_flags(v, attrd_value_remote);
CRM_ASSERT(crm_remote_peer_get(node_name) != NULL);
}
return(v);
}
static void
attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *data)
{
bool gone = false;
bool is_remote = pcmk_is_set(peer->flags, crm_remote_node);
switch (kind) {
case crm_status_uname:
crm_debug("%s node %s is now %s",
(is_remote? "Remote" : "Cluster"),
peer->uname, state_text(peer->state));
break;
case crm_status_processes:
if (!pcmk_is_set(peer->processes, crm_get_cluster_proc())) {
gone = true;
}
crm_debug("Node %s is %s a peer",
peer->uname, (gone? "no longer" : "now"));
break;
case crm_status_nstate:
crm_debug("%s node %s is now %s (was %s)",
(is_remote? "Remote" : "Cluster"),
peer->uname, state_text(peer->state), state_text(data));
if (pcmk__str_eq(peer->state, CRM_NODE_MEMBER, pcmk__str_casei)) {
/* If we're the writer, send new peers a list of all attributes
* (unless it's a remote node, which doesn't run its own attrd)
*/
if (attrd_election_won()
&& !pcmk_is_set(peer->flags, crm_remote_node)) {
attrd_peer_sync(peer, NULL);
}
} else {
// Remove all attribute values associated with lost nodes
attrd_peer_remove(peer->uname, false, "loss");
gone = true;
}
break;
}
// Remove votes from cluster nodes that leave, in case election in progress
if (gone && !is_remote) {
attrd_remove_voter(peer);
attrd_remove_peer_protocol_ver(peer->uname);
attrd_do_not_expect_from_peer(peer->uname);
}
}
static void
record_peer_nodeid(attribute_value_t *v, const char *host)
{
crm_node_t *known_peer = crm_get_peer(v->nodeid, host);
crm_trace("Learned %s has node id %s", known_peer->uname, known_peer->uuid);
if (attrd_election_won()) {
attrd_write_attributes(attrd_write_changed);
}
}
static void
update_attr_on_host(attribute_t *a, const crm_node_t *peer, const xmlNode *xml,
const char *attr, const char *value, const char *host,
bool filter, int is_force_write)
{
attribute_value_t *v = NULL;
v = attrd_lookup_or_create_value(a->values, host, xml);
if (filter && !pcmk__str_eq(v->current, value, pcmk__str_casei)
&& pcmk__str_eq(host, attrd_cluster->uname, pcmk__str_casei)) {
crm_notice("%s[%s]: local value '%s' takes priority over '%s' from %s",
attr, host, v->current, value, peer->uname);
v = broadcast_local_value(a);
} else if (!pcmk__str_eq(v->current, value, pcmk__str_casei)) {
crm_notice("Setting %s[%s]%s%s: %s -> %s "
CRM_XS " from %s with %s write delay",
attr, host, a->set_type ? " in " : "",
pcmk__s(a->set_type, ""), pcmk__s(v->current, "(unset)"),
pcmk__s(value, "(unset)"), peer->uname,
(a->timeout_ms == 0)? "no" : pcmk__readable_interval(a->timeout_ms));
pcmk__str_update(&v->current, value);
a->changed = true;
if (pcmk__str_eq(host, attrd_cluster->uname, pcmk__str_casei)
&& pcmk__str_eq(attr, XML_CIB_ATTR_SHUTDOWN, pcmk__str_none)) {
if (!pcmk__str_eq(value, "0", pcmk__str_null_matches)) {
attrd_set_requesting_shutdown();
} else {
attrd_clear_requesting_shutdown();
}
}
// Write out new value or start dampening timer
if (a->timeout_ms && a->timer) {
crm_trace("Delayed write out (%dms) for %s", a->timeout_ms, attr);
mainloop_timer_start(a->timer);
} else {
attrd_write_or_elect_attribute(a);
}
} else {
if (is_force_write == 1 && a->timeout_ms && a->timer) {
/* Save forced writing and set change flag. */
/* The actual attribute is written by Writer after election. */
crm_trace("Unchanged %s[%s] from %s is %s(Set the forced write flag)",
attr, host, peer->uname, value);
a->force_write = TRUE;
} else {
crm_trace("Unchanged %s[%s] from %s is %s", attr, host, peer->uname, value);
}
}
// This allows us to later detect local values that peer doesn't know about
attrd_set_value_flags(v, attrd_value_from_peer);
/* If this is a cluster node whose node ID we are learning, remember it */
if ((v->nodeid == 0) && !pcmk_is_set(v->flags, attrd_value_remote)
&& (crm_element_value_int(xml, PCMK__XA_ATTR_NODE_ID,
(int*)&v->nodeid) == 0) && (v->nodeid > 0)) {
record_peer_nodeid(v, host);
}
}
static void
attrd_peer_update_one(const crm_node_t *peer, xmlNode *xml, bool filter)
{
attribute_t *a = NULL;
const char *attr = crm_element_value(xml, PCMK__XA_ATTR_NAME);
const char *value = crm_element_value(xml, PCMK__XA_ATTR_VALUE);
const char *host = crm_element_value(xml, PCMK__XA_ATTR_NODE_NAME);
int is_force_write = 0;
if (attr == NULL) {
crm_warn("Could not update attribute: peer did not specify name");
return;
}
crm_element_value_int(xml, PCMK__XA_ATTR_FORCE, &is_force_write);
a = attrd_populate_attribute(xml, attr);
if (a == NULL) {
return;
}
if (host == NULL) {
// If no host was specified, update all hosts
GHashTableIter vIter;
crm_debug("Setting %s for all hosts to %s", attr, value);
xml_remove_prop(xml, PCMK__XA_ATTR_NODE_ID);
g_hash_table_iter_init(&vIter, a->values);
while (g_hash_table_iter_next(&vIter, (gpointer *) & host, NULL)) {
update_attr_on_host(a, peer, xml, attr, value, host, filter, is_force_write);
}
} else {
// Update attribute value for the given host
update_attr_on_host(a, peer, xml, attr, value, host, filter, is_force_write);
}
/* If this is a message from some attrd instance broadcasting its protocol
* version, check to see if it's a new minimum version.
*/
if (pcmk__str_eq(attr, CRM_ATTR_PROTOCOL, pcmk__str_none)) {
attrd_update_minimum_protocol_ver(peer->uname, value);
}
}
static void
broadcast_unseen_local_values(void)
{
GHashTableIter aIter;
GHashTableIter vIter;
attribute_t *a = NULL;
attribute_value_t *v = NULL;
xmlNode *sync = NULL;
g_hash_table_iter_init(&aIter, attributes);
while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) {
g_hash_table_iter_init(&vIter, a->values);
while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) {
if (!pcmk_is_set(v->flags, attrd_value_from_peer)
&& pcmk__str_eq(v->nodename, attrd_cluster->uname,
pcmk__str_casei)) {
if (sync == NULL) {
sync = create_xml_node(NULL, __func__);
crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE);
}
attrd_add_value_xml(sync, a, v, a->timeout_ms && a->timer);
}
}
}
if (sync != NULL) {
crm_debug("Broadcasting local-only values");
attrd_send_message(NULL, sync, false);
free_xml(sync);
}
}
int
attrd_cluster_connect(void)
{
attrd_cluster = pcmk_cluster_new();
attrd_cluster->destroy = attrd_cpg_destroy;
attrd_cluster->cpg.cpg_deliver_fn = attrd_cpg_dispatch;
attrd_cluster->cpg.cpg_confchg_fn = pcmk_cpg_membership;
crm_set_status_callback(&attrd_peer_change_cb);
if (crm_cluster_connect(attrd_cluster) == FALSE) {
crm_err("Cluster connection failed");
return -ENOTCONN;
}
return pcmk_ok;
}
void
attrd_peer_clear_failure(pcmk__request_t *request)
{
xmlNode *xml = request->xml;
const char *rsc = crm_element_value(xml, PCMK__XA_ATTR_RESOURCE);
const char *host = crm_element_value(xml, PCMK__XA_ATTR_NODE_NAME);
const char *op = crm_element_value(xml, PCMK__XA_ATTR_OPERATION);
const char *interval_spec = crm_element_value(xml, PCMK__XA_ATTR_INTERVAL);
guint interval_ms = 0U;
char *attr = NULL;
GHashTableIter iter;
regex_t regex;
crm_node_t *peer = crm_get_peer(0, request->peer);
- pcmk__parse_interval_spec(interval_spec, &interval_ms);
+ pcmk_parse_interval_spec(interval_spec, &interval_ms);
if (attrd_failure_regex(&regex, rsc, op, interval_ms) != pcmk_ok) {
crm_info("Ignoring invalid request to clear failures for %s",
pcmk__s(rsc, "all resources"));
return;
}
crm_xml_add(xml, PCMK__XA_TASK, PCMK__ATTRD_CMD_UPDATE);
/* Make sure value is not set, so we delete */
xml_remove_prop(xml, PCMK__XA_ATTR_VALUE);
g_hash_table_iter_init(&iter, attributes);
while (g_hash_table_iter_next(&iter, (gpointer *) &attr, NULL)) {
if (regexec(&regex, attr, 0, NULL, 0) == 0) {
crm_trace("Matched %s when clearing %s",
attr, pcmk__s(rsc, "all resources"));
crm_xml_add(xml, PCMK__XA_ATTR_NAME, attr);
attrd_peer_update(peer, xml, host, false);
}
}
regfree(&regex);
}
/*!
* \internal
* \brief Load attributes from a peer sync response
*
* \param[in] peer Peer that sent clear request
* \param[in] peer_won Whether peer is the attribute writer
* \param[in,out] xml Request XML
*/
void
attrd_peer_sync_response(const crm_node_t *peer, bool peer_won, xmlNode *xml)
{
crm_info("Processing " PCMK__ATTRD_CMD_SYNC_RESPONSE " from %s",
peer->uname);
if (peer_won) {
/* Initialize the "seen" flag for all attributes to cleared, so we can
* detect attributes that local node has but the writer doesn't.
*/
attrd_clear_value_seen();
}
// Process each attribute update in the sync response
for (xmlNode *child = pcmk__xml_first_child(xml); child != NULL;
child = pcmk__xml_next(child)) {
attrd_peer_update(peer, child,
crm_element_value(child, PCMK__XA_ATTR_NODE_NAME),
true);
}
if (peer_won) {
/* If any attributes are still not marked as seen, the writer doesn't
* know about them, so send all peers an update with them.
*/
broadcast_unseen_local_values();
}
}
/*!
* \internal
* \brief Remove all attributes and optionally peer cache entries for a node
*
* \param[in] host Name of node to purge
* \param[in] uncache If true, remove node from peer caches
* \param[in] source Who requested removal (only used for logging)
*/
void
attrd_peer_remove(const char *host, bool uncache, const char *source)
{
attribute_t *a = NULL;
GHashTableIter aIter;
CRM_CHECK(host != NULL, return);
crm_notice("Removing all %s attributes for node %s "
CRM_XS " %s reaping node from cache",
host, source, (uncache? "and" : "without"));
g_hash_table_iter_init(&aIter, attributes);
while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) {
if(g_hash_table_remove(a->values, host)) {
crm_debug("Removed %s[%s] for peer %s", a->id, host, source);
}
}
if (uncache) {
pcmk__purge_node_from_cache(host, 0);
}
}
void
attrd_peer_sync(crm_node_t *peer, xmlNode *xml)
{
GHashTableIter aIter;
GHashTableIter vIter;
attribute_t *a = NULL;
attribute_value_t *v = NULL;
xmlNode *sync = create_xml_node(NULL, __func__);
crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE);
g_hash_table_iter_init(&aIter, attributes);
while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) {
g_hash_table_iter_init(&vIter, a->values);
while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) {
crm_debug("Syncing %s[%s] = %s to %s", a->id, v->nodename, v->current, peer?peer->uname:"everyone");
attrd_add_value_xml(sync, a, v, false);
}
}
crm_debug("Syncing values to %s", peer?peer->uname:"everyone");
attrd_send_message(peer, sync, false);
free_xml(sync);
}
void
attrd_peer_update(const crm_node_t *peer, xmlNode *xml, const char *host,
bool filter)
{
bool handle_sync_point = false;
CRM_CHECK((peer != NULL) && (xml != NULL), return);
if (xml->children != NULL) {
for (xmlNode *child = first_named_child(xml, XML_ATTR_OP); child != NULL;
child = crm_next_same_xml(child)) {
attrd_copy_xml_attributes(xml, child);
attrd_peer_update_one(peer, child, filter);
if (attrd_request_has_sync_point(child)) {
handle_sync_point = true;
}
}
} else {
attrd_peer_update_one(peer, xml, filter);
if (attrd_request_has_sync_point(xml)) {
handle_sync_point = true;
}
}
/* If the update XML specified that the client wanted to wait for a sync
* point, process that now.
*/
if (handle_sync_point) {
crm_trace("Hit local sync point for attribute update");
attrd_ack_waitlist_clients(attrd_sync_point_local, xml);
}
}
diff --git a/daemons/attrd/attrd_ipc.c b/daemons/attrd/attrd_ipc.c
index 67defffb41..11a120a95a 100644
--- a/daemons/attrd/attrd_ipc.c
+++ b/daemons/attrd/attrd_ipc.c
@@ -1,630 +1,630 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <errno.h>
#include <stdint.h>
#include <stdlib.h>
#include <sys/types.h>
#include <crm/cluster.h>
#include <crm/cluster/internal.h>
#include <crm/msg_xml.h>
#include <crm/common/acl_internal.h>
#include <crm/common/ipc_internal.h>
#include <crm/common/logging.h>
#include <crm/common/results.h>
#include <crm/common/strings_internal.h>
#include <crm/common/util.h>
#include "pacemaker-attrd.h"
static qb_ipcs_service_t *ipcs = NULL;
/*!
* \internal
* \brief Build the XML reply to a client query
*
* param[in] attr Name of requested attribute
* param[in] host Name of requested host (or NULL for all hosts)
*
* \return New XML reply
* \note Caller is responsible for freeing the resulting XML
*/
static xmlNode *build_query_reply(const char *attr, const char *host)
{
xmlNode *reply = create_xml_node(NULL, __func__);
attribute_t *a;
if (reply == NULL) {
return NULL;
}
crm_xml_add(reply, F_TYPE, T_ATTRD);
crm_xml_add(reply, F_SUBTYPE, PCMK__ATTRD_CMD_QUERY);
crm_xml_add(reply, PCMK__XA_ATTR_VERSION, ATTRD_PROTOCOL_VERSION);
/* If desired attribute exists, add its value(s) to the reply */
a = g_hash_table_lookup(attributes, attr);
if (a) {
attribute_value_t *v;
xmlNode *host_value;
crm_xml_add(reply, PCMK__XA_ATTR_NAME, attr);
/* Allow caller to use "localhost" to refer to local node */
if (pcmk__str_eq(host, "localhost", pcmk__str_casei)) {
host = attrd_cluster->uname;
crm_trace("Mapped localhost to %s", host);
}
/* If a specific node was requested, add its value */
if (host) {
v = g_hash_table_lookup(a->values, host);
host_value = create_xml_node(reply, XML_CIB_TAG_NODE);
if (host_value == NULL) {
free_xml(reply);
return NULL;
}
pcmk__xe_add_node(host_value, host, 0);
crm_xml_add(host_value, PCMK__XA_ATTR_VALUE,
(v? v->current : NULL));
/* Otherwise, add all nodes' values */
} else {
GHashTableIter iter;
g_hash_table_iter_init(&iter, a->values);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &v)) {
host_value = create_xml_node(reply, XML_CIB_TAG_NODE);
if (host_value == NULL) {
free_xml(reply);
return NULL;
}
pcmk__xe_add_node(host_value, v->nodename, 0);
crm_xml_add(host_value, PCMK__XA_ATTR_VALUE, v->current);
}
}
}
return reply;
}
xmlNode *
attrd_client_clear_failure(pcmk__request_t *request)
{
xmlNode *xml = request->xml;
const char *rsc, *op, *interval_spec;
if (minimum_protocol_version >= 2) {
/* Propagate to all peers (including ourselves).
* This ends up at attrd_peer_message().
*/
attrd_send_message(NULL, xml, false);
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
return NULL;
}
rsc = crm_element_value(xml, PCMK__XA_ATTR_RESOURCE);
op = crm_element_value(xml, PCMK__XA_ATTR_OPERATION);
interval_spec = crm_element_value(xml, PCMK__XA_ATTR_INTERVAL);
/* Map this to an update */
crm_xml_add(xml, PCMK__XA_TASK, PCMK__ATTRD_CMD_UPDATE);
/* Add regular expression matching desired attributes */
if (rsc) {
char *pattern;
if (op == NULL) {
pattern = crm_strdup_printf(ATTRD_RE_CLEAR_ONE, rsc);
} else {
guint interval_ms = 0U;
- pcmk__parse_interval_spec(interval_spec, &interval_ms);
+ pcmk_parse_interval_spec(interval_spec, &interval_ms);
pattern = crm_strdup_printf(ATTRD_RE_CLEAR_OP,
rsc, op, interval_ms);
}
crm_xml_add(xml, PCMK__XA_ATTR_PATTERN, pattern);
free(pattern);
} else {
crm_xml_add(xml, PCMK__XA_ATTR_PATTERN, ATTRD_RE_CLEAR_ALL);
}
/* Make sure attribute and value are not set, so we delete via regex */
xml_remove_prop(xml, PCMK__XA_ATTR_NAME);
xml_remove_prop(xml, PCMK__XA_ATTR_VALUE);
return attrd_client_update(request);
}
xmlNode *
attrd_client_peer_remove(pcmk__request_t *request)
{
xmlNode *xml = request->xml;
// Host and ID are not used in combination, rather host has precedence
const char *host = crm_element_value(xml, PCMK__XA_ATTR_NODE_NAME);
char *host_alloc = NULL;
attrd_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags);
if (host == NULL) {
int nodeid = 0;
crm_element_value_int(xml, PCMK__XA_ATTR_NODE_ID, &nodeid);
if (nodeid > 0) {
crm_node_t *node = pcmk__search_cluster_node_cache(nodeid, NULL,
NULL);
char *host_alloc = NULL;
if (node && node->uname) {
// Use cached name if available
host = node->uname;
} else {
// Otherwise ask cluster layer
host_alloc = get_node_name(nodeid);
host = host_alloc;
}
pcmk__xe_add_node(xml, host, 0);
}
}
if (host) {
crm_info("Client %s is requesting all values for %s be removed",
pcmk__client_name(request->ipc_client), host);
attrd_send_message(NULL, xml, false); /* ends up at attrd_peer_message() */
free(host_alloc);
} else {
crm_info("Ignoring request by client %s to remove all peer values without specifying peer",
pcmk__client_name(request->ipc_client));
}
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
return NULL;
}
xmlNode *
attrd_client_query(pcmk__request_t *request)
{
xmlNode *query = request->xml;
xmlNode *reply = NULL;
const char *attr = NULL;
crm_debug("Query arrived from %s", pcmk__client_name(request->ipc_client));
/* Request must specify attribute name to query */
attr = crm_element_value(query, PCMK__XA_ATTR_NAME);
if (attr == NULL) {
pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR,
"Ignoring malformed query from %s (no attribute name given)",
pcmk__client_name(request->ipc_client));
return NULL;
}
/* Build the XML reply */
reply = build_query_reply(attr, crm_element_value(query,
PCMK__XA_ATTR_NODE_NAME));
if (reply == NULL) {
pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR,
"Could not respond to query from %s: could not create XML reply",
pcmk__client_name(request->ipc_client));
return NULL;
} else {
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
}
request->ipc_client->request_id = 0;
return reply;
}
xmlNode *
attrd_client_refresh(pcmk__request_t *request)
{
crm_info("Updating all attributes");
attrd_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags);
attrd_write_attributes(attrd_write_all|attrd_write_no_delay);
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
return NULL;
}
static void
handle_missing_host(xmlNode *xml)
{
const char *host = crm_element_value(xml, PCMK__XA_ATTR_NODE_NAME);
if (host == NULL) {
crm_trace("Inferring host");
pcmk__xe_add_node(xml, attrd_cluster->uname, attrd_cluster->nodeid);
}
}
/* Convert a single IPC message with a regex into one with multiple children, one
* for each regex match.
*/
static int
expand_regexes(xmlNode *xml, const char *attr, const char *value, const char *regex)
{
if (attr == NULL && regex) {
bool matched = false;
GHashTableIter aIter;
regex_t r_patt;
crm_debug("Setting %s to %s", regex, value);
if (regcomp(&r_patt, regex, REG_EXTENDED|REG_NOSUB)) {
return EINVAL;
}
g_hash_table_iter_init(&aIter, attributes);
while (g_hash_table_iter_next(&aIter, (gpointer *) & attr, NULL)) {
int status = regexec(&r_patt, attr, 0, NULL, 0);
if (status == 0) {
xmlNode *child = create_xml_node(xml, XML_ATTR_OP);
crm_trace("Matched %s with %s", attr, regex);
matched = true;
/* Copy all the attributes from the parent over, but remove the
* regex and replace it with the name.
*/
attrd_copy_xml_attributes(xml, child);
xml_remove_prop(child, PCMK__XA_ATTR_PATTERN);
crm_xml_add(child, PCMK__XA_ATTR_NAME, attr);
}
}
regfree(&r_patt);
/* Return a code if we never matched anything. This should not be treated
* as an error. It indicates there was a regex, and it was a valid regex,
* but simply did not match anything and the caller should not continue
* doing any regex-related processing.
*/
if (!matched) {
return pcmk_rc_op_unsatisfied;
}
} else if (attr == NULL) {
return pcmk_rc_bad_nvpair;
}
return pcmk_rc_ok;
}
static int
handle_regexes(pcmk__request_t *request)
{
xmlNode *xml = request->xml;
int rc = pcmk_rc_ok;
const char *attr = crm_element_value(xml, PCMK__XA_ATTR_NAME);
const char *value = crm_element_value(xml, PCMK__XA_ATTR_VALUE);
const char *regex = crm_element_value(xml, PCMK__XA_ATTR_PATTERN);
rc = expand_regexes(xml, attr, value, regex);
if (rc == EINVAL) {
pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR,
"Bad regex '%s' for update from client %s", regex,
pcmk__client_name(request->ipc_client));
} else if (rc == pcmk_rc_bad_nvpair) {
crm_err("Update request did not specify attribute or regular expression");
pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR,
"Client %s update request did not specify attribute or regular expression",
pcmk__client_name(request->ipc_client));
}
return rc;
}
static int
handle_value_expansion(const char **value, xmlNode *xml, const char *op,
const char *attr)
{
attribute_t *a = g_hash_table_lookup(attributes, attr);
if (a == NULL && pcmk__str_eq(op, PCMK__ATTRD_CMD_UPDATE_DELAY, pcmk__str_none)) {
return EINVAL;
}
if (*value && attrd_value_needs_expansion(*value)) {
int int_value;
attribute_value_t *v = NULL;
if (a) {
const char *host = crm_element_value(xml, PCMK__XA_ATTR_NODE_NAME);
v = g_hash_table_lookup(a->values, host);
}
int_value = attrd_expand_value(*value, (v? v->current : NULL));
crm_info("Expanded %s=%s to %d", attr, *value, int_value);
crm_xml_add_int(xml, PCMK__XA_ATTR_VALUE, int_value);
/* Replacing the value frees the previous memory, so re-query it */
*value = crm_element_value(xml, PCMK__XA_ATTR_VALUE);
}
return pcmk_rc_ok;
}
static void
send_update_msg_to_cluster(pcmk__request_t *request, xmlNode *xml)
{
if (pcmk__str_eq(attrd_request_sync_point(xml), PCMK__VALUE_CLUSTER, pcmk__str_none)) {
/* The client is waiting on the cluster-wide sync point. In this case,
* the response ACK is not sent until this attrd broadcasts the update
* and receives its own confirmation back from all peers.
*/
attrd_expect_confirmations(request, attrd_cluster_sync_point_update);
attrd_send_message(NULL, xml, true); /* ends up at attrd_peer_message() */
} else {
/* The client is either waiting on the local sync point or was not
* waiting on any sync point at all. For the local sync point, the
* response ACK is sent in attrd_peer_update. For clients not
* waiting on any sync point, the response ACK is sent in
* handle_update_request immediately before this function was called.
*/
attrd_send_message(NULL, xml, false); /* ends up at attrd_peer_message() */
}
}
static int
send_child_update(xmlNode *child, void *data)
{
pcmk__request_t *request = (pcmk__request_t *) data;
/* Calling pcmk__set_result is handled by one of these calls to
* attrd_client_update, so no need to do it again here.
*/
request->xml = child;
attrd_client_update(request);
return pcmk_rc_ok;
}
xmlNode *
attrd_client_update(pcmk__request_t *request)
{
xmlNode *xml = NULL;
const char *attr, *value, *regex;
CRM_CHECK((request != NULL) && (request->xml != NULL), return NULL);
xml = request->xml;
/* If the message has children, that means it is a message from a newer
* client that supports sending multiple operations at a time. There are
* two ways we can handle that.
*/
if (xml->children != NULL) {
if (ATTRD_SUPPORTS_MULTI_MESSAGE(minimum_protocol_version)) {
/* First, if all peers support a certain protocol version, we can
* just broadcast the big message and they'll handle it. However,
* we also need to apply all the transformations in this function
* to the children since they don't happen anywhere else.
*/
for (xmlNode *child = first_named_child(xml, XML_ATTR_OP); child != NULL;
child = crm_next_same_xml(child)) {
attr = crm_element_value(child, PCMK__XA_ATTR_NAME);
value = crm_element_value(child, PCMK__XA_ATTR_VALUE);
handle_missing_host(child);
if (handle_value_expansion(&value, child, request->op, attr) == EINVAL) {
pcmk__format_result(&request->result, CRM_EX_NOSUCH, PCMK_EXEC_ERROR,
"Attribute %s does not exist", attr);
return NULL;
}
}
send_update_msg_to_cluster(request, xml);
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
} else {
/* Save the original xml node pointer so it can be restored after iterating
* over all the children.
*/
xmlNode *orig_xml = request->xml;
/* Second, if they do not support that protocol version, split it
* up into individual messages and call attrd_client_update on
* each one.
*/
pcmk__xe_foreach_child(xml, XML_ATTR_OP, send_child_update, request);
request->xml = orig_xml;
}
return NULL;
}
attr = crm_element_value(xml, PCMK__XA_ATTR_NAME);
value = crm_element_value(xml, PCMK__XA_ATTR_VALUE);
regex = crm_element_value(xml, PCMK__XA_ATTR_PATTERN);
if (handle_regexes(request) != pcmk_rc_ok) {
/* Error handling was already dealt with in handle_regexes, so just return. */
return NULL;
} else if (regex) {
/* Recursively call attrd_client_update on the new message with regexes
* expanded. If supported by the attribute daemon, this means that all
* matches can also be handled atomically.
*/
return attrd_client_update(request);
}
handle_missing_host(xml);
if (handle_value_expansion(&value, xml, request->op, attr) == EINVAL) {
pcmk__format_result(&request->result, CRM_EX_NOSUCH, PCMK_EXEC_ERROR,
"Attribute %s does not exist", attr);
return NULL;
}
crm_debug("Broadcasting %s[%s]=%s%s", attr, crm_element_value(xml, PCMK__XA_ATTR_NODE_NAME),
value, (attrd_election_won()? " (writer)" : ""));
send_update_msg_to_cluster(request, xml);
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
return NULL;
}
/*!
* \internal
* \brief Accept a new client IPC connection
*
* \param[in,out] c New connection
* \param[in] uid Client user id
* \param[in] gid Client group id
*
* \return pcmk_ok on success, -errno otherwise
*/
static int32_t
attrd_ipc_accept(qb_ipcs_connection_t *c, uid_t uid, gid_t gid)
{
crm_trace("New client connection %p", c);
if (attrd_shutting_down(false)) {
crm_info("Ignoring new connection from pid %d during shutdown",
pcmk__client_pid(c));
return -EPERM;
}
if (pcmk__new_client(c, uid, gid) == NULL) {
return -EIO;
}
return pcmk_ok;
}
/*!
* \internal
* \brief Destroy a client IPC connection
*
* \param[in] c Connection to destroy
*
* \return FALSE (i.e. do not re-run this callback)
*/
static int32_t
attrd_ipc_closed(qb_ipcs_connection_t *c)
{
pcmk__client_t *client = pcmk__find_client(c);
if (client == NULL) {
crm_trace("Ignoring request to clean up unknown connection %p", c);
} else {
crm_trace("Cleaning up closed client connection %p", c);
/* Remove the client from the sync point waitlist if it's present. */
attrd_remove_client_from_waitlist(client);
/* And no longer wait for confirmations from any peers. */
attrd_do_not_wait_for_client(client);
pcmk__free_client(client);
}
return FALSE;
}
/*!
* \internal
* \brief Destroy a client IPC connection
*
* \param[in,out] c Connection to destroy
*
* \note We handle a destroyed connection the same as a closed one,
* but we need a separate handler because the return type is different.
*/
static void
attrd_ipc_destroy(qb_ipcs_connection_t *c)
{
crm_trace("Destroying client connection %p", c);
attrd_ipc_closed(c);
}
static int32_t
attrd_ipc_dispatch(qb_ipcs_connection_t * c, void *data, size_t size)
{
uint32_t id = 0;
uint32_t flags = 0;
pcmk__client_t *client = pcmk__find_client(c);
xmlNode *xml = NULL;
// Sanity-check, and parse XML from IPC data
CRM_CHECK((c != NULL) && (client != NULL), return 0);
if (data == NULL) {
crm_debug("No IPC data from PID %d", pcmk__client_pid(c));
return 0;
}
xml = pcmk__client_data2xml(client, data, &id, &flags);
if (xml == NULL) {
crm_debug("Unrecognizable IPC data from PID %d", pcmk__client_pid(c));
pcmk__ipc_send_ack(client, id, flags, "ack", NULL, CRM_EX_PROTOCOL);
return 0;
} else {
pcmk__request_t request = {
.ipc_client = client,
.ipc_id = id,
.ipc_flags = flags,
.peer = NULL,
.xml = xml,
.call_options = 0,
.result = PCMK__UNKNOWN_RESULT,
};
CRM_ASSERT(client->user != NULL);
pcmk__update_acl_user(xml, PCMK__XA_ATTR_USER, client->user);
request.op = crm_element_value_copy(request.xml, PCMK__XA_TASK);
CRM_CHECK(request.op != NULL, return 0);
attrd_handle_request(&request);
pcmk__reset_request(&request);
}
free_xml(xml);
return 0;
}
static struct qb_ipcs_service_handlers ipc_callbacks = {
.connection_accept = attrd_ipc_accept,
.connection_created = NULL,
.msg_process = attrd_ipc_dispatch,
.connection_closed = attrd_ipc_closed,
.connection_destroyed = attrd_ipc_destroy
};
void
attrd_ipc_fini(void)
{
if (ipcs != NULL) {
pcmk__drop_all_clients(ipcs);
qb_ipcs_destroy(ipcs);
ipcs = NULL;
}
}
/*!
* \internal
* \brief Set up attrd IPC communication
*/
void
attrd_init_ipc(void)
{
pcmk__serve_attrd_ipc(&ipcs, &ipc_callbacks);
}
diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c
index fd77af8394..0a7f799637 100644
--- a/daemons/controld/controld_control.c
+++ b/daemons/controld/controld_control.c
@@ -1,872 +1,872 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/pengine/rules.h>
#include <crm/cluster/internal.h>
#include <crm/cluster/election_internal.h>
#include <crm/common/ipc_internal.h>
#include <pacemaker-controld.h>
static qb_ipcs_service_t *ipcs = NULL;
static crm_trigger_t *config_read_trigger = NULL;
#if SUPPORT_COROSYNC
extern gboolean crm_connect_corosync(crm_cluster_t * cluster);
#endif
void crm_shutdown(int nsig);
static gboolean crm_read_options(gpointer user_data);
/* A_HA_CONNECT */
void
do_ha_control(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
gboolean registered = FALSE;
static crm_cluster_t *cluster = NULL;
if (cluster == NULL) {
cluster = pcmk_cluster_new();
}
if (action & A_HA_DISCONNECT) {
crm_cluster_disconnect(cluster);
crm_info("Disconnected from the cluster");
controld_set_fsa_input_flags(R_HA_DISCONNECTED);
}
if (action & A_HA_CONNECT) {
crm_set_status_callback(&peer_update_callback);
crm_set_autoreap(FALSE);
#if SUPPORT_COROSYNC
if (is_corosync_cluster()) {
registered = crm_connect_corosync(cluster);
}
#endif // SUPPORT_COROSYNC
if (registered) {
controld_election_init(cluster->uname);
controld_globals.our_nodename = cluster->uname;
controld_globals.our_uuid = cluster->uuid;
if(cluster->uuid == NULL) {
crm_err("Could not obtain local uuid");
registered = FALSE;
}
}
if (!registered) {
controld_set_fsa_input_flags(R_HA_DISCONNECTED);
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
return;
}
populate_cib_nodes(node_update_none, __func__);
controld_clear_fsa_input_flags(R_HA_DISCONNECTED);
crm_info("Connected to the cluster");
}
if (action & ~(A_HA_CONNECT | A_HA_DISCONNECT)) {
crm_err("Unexpected action %s in %s", fsa_action2string(action),
__func__);
}
}
/* A_SHUTDOWN */
void
do_shutdown(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
/* just in case */
controld_set_fsa_input_flags(R_SHUTDOWN);
controld_disconnect_fencer(FALSE);
}
/* A_SHUTDOWN_REQ */
void
do_shutdown_req(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
xmlNode *msg = NULL;
controld_set_fsa_input_flags(R_SHUTDOWN);
//controld_set_fsa_input_flags(R_STAYDOWN);
crm_info("Sending shutdown request to all peers (DC is %s)",
pcmk__s(controld_globals.dc_name, "not set"));
msg = create_request(CRM_OP_SHUTDOWN_REQ, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
if (send_cluster_message(NULL, crm_msg_crmd, msg, TRUE) == FALSE) {
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
free_xml(msg);
}
void
crmd_fast_exit(crm_exit_t exit_code)
{
if (pcmk_is_set(controld_globals.fsa_input_register, R_STAYDOWN)) {
crm_warn("Inhibiting respawn "CRM_XS" remapping exit code %d to %d",
exit_code, CRM_EX_FATAL);
exit_code = CRM_EX_FATAL;
} else if ((exit_code == CRM_EX_OK)
&& pcmk_is_set(controld_globals.fsa_input_register,
R_IN_RECOVERY)) {
crm_err("Could not recover from internal error");
exit_code = CRM_EX_ERROR;
}
if (controld_globals.logger_out != NULL) {
controld_globals.logger_out->finish(controld_globals.logger_out,
exit_code, true, NULL);
pcmk__output_free(controld_globals.logger_out);
controld_globals.logger_out = NULL;
}
crm_exit(exit_code);
}
crm_exit_t
crmd_exit(crm_exit_t exit_code)
{
GMainLoop *mloop = controld_globals.mainloop;
static bool in_progress = FALSE;
if (in_progress && (exit_code == CRM_EX_OK)) {
crm_debug("Exit is already in progress");
return exit_code;
} else if(in_progress) {
crm_notice("Error during shutdown process, exiting now with status %d (%s)",
exit_code, crm_exit_str(exit_code));
crm_write_blackbox(SIGTRAP, NULL);
crmd_fast_exit(exit_code);
}
in_progress = TRUE;
crm_trace("Preparing to exit with status %d (%s)",
exit_code, crm_exit_str(exit_code));
/* Suppress secondary errors resulting from us disconnecting everything */
controld_set_fsa_input_flags(R_HA_DISCONNECTED);
/* Close all IPC servers and clients to ensure any and all shared memory files are cleaned up */
if(ipcs) {
crm_trace("Closing IPC server");
mainloop_del_ipc_server(ipcs);
ipcs = NULL;
}
controld_close_attrd_ipc();
controld_shutdown_schedulerd_ipc();
controld_disconnect_fencer(TRUE);
if ((exit_code == CRM_EX_OK) && (controld_globals.mainloop == NULL)) {
crm_debug("No mainloop detected");
exit_code = CRM_EX_ERROR;
}
/* On an error, just get out.
*
* Otherwise, make the effort to have mainloop exit gracefully so
* that it (mostly) cleans up after itself and valgrind has less
* to report on - allowing real errors stand out
*/
if (exit_code != CRM_EX_OK) {
crm_notice("Forcing immediate exit with status %d (%s)",
exit_code, crm_exit_str(exit_code));
crm_write_blackbox(SIGTRAP, NULL);
crmd_fast_exit(exit_code);
}
/* Clean up as much memory as possible for valgrind */
for (GList *iter = controld_globals.fsa_message_queue; iter != NULL;
iter = iter->next) {
fsa_data_t *fsa_data = (fsa_data_t *) iter->data;
crm_info("Dropping %s: [ state=%s cause=%s origin=%s ]",
fsa_input2string(fsa_data->fsa_input),
fsa_state2string(controld_globals.fsa_state),
fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin);
delete_fsa_input(fsa_data);
}
controld_clear_fsa_input_flags(R_MEMBERSHIP);
g_list_free(controld_globals.fsa_message_queue);
controld_globals.fsa_message_queue = NULL;
controld_free_node_pending_timers();
controld_election_fini();
/* Tear down the CIB manager connection, but don't free it yet -- it could
* be used when we drain the mainloop later.
*/
controld_disconnect_cib_manager();
verify_stopped(controld_globals.fsa_state, LOG_WARNING);
controld_clear_fsa_input_flags(R_LRM_CONNECTED);
lrm_state_destroy_all();
mainloop_destroy_trigger(config_read_trigger);
config_read_trigger = NULL;
controld_destroy_fsa_trigger();
controld_destroy_transition_trigger();
pcmk__client_cleanup();
crm_peer_destroy();
controld_free_fsa_timers();
te_cleanup_stonith_history_sync(NULL, TRUE);
controld_free_sched_timer();
free(controld_globals.our_nodename);
controld_globals.our_nodename = NULL;
free(controld_globals.our_uuid);
controld_globals.our_uuid = NULL;
free(controld_globals.dc_name);
controld_globals.dc_name = NULL;
free(controld_globals.dc_version);
controld_globals.dc_version = NULL;
free(controld_globals.cluster_name);
controld_globals.cluster_name = NULL;
free(controld_globals.te_uuid);
controld_globals.te_uuid = NULL;
free_max_generation();
controld_destroy_failed_sync_table();
controld_destroy_outside_events_table();
mainloop_destroy_signal(SIGPIPE);
mainloop_destroy_signal(SIGUSR1);
mainloop_destroy_signal(SIGTERM);
mainloop_destroy_signal(SIGTRAP);
/* leave SIGCHLD engaged as we might still want to drain some service-actions */
if (mloop) {
GMainContext *ctx = g_main_loop_get_context(controld_globals.mainloop);
/* Don't re-enter this block */
controld_globals.mainloop = NULL;
/* no signals on final draining anymore */
mainloop_destroy_signal(SIGCHLD);
crm_trace("Draining mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx));
{
int lpc = 0;
while((g_main_context_pending(ctx) && lpc < 10)) {
lpc++;
crm_trace("Iteration %d", lpc);
g_main_context_dispatch(ctx);
}
}
crm_trace("Closing mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx));
g_main_loop_quit(mloop);
/* Won't do anything yet, since we're inside it now */
g_main_loop_unref(mloop);
} else {
mainloop_destroy_signal(SIGCHLD);
}
cib_delete(controld_globals.cib_conn);
controld_globals.cib_conn = NULL;
throttle_fini();
/* Graceful */
crm_trace("Done preparing for exit with status %d (%s)",
exit_code, crm_exit_str(exit_code));
return exit_code;
}
/* A_EXIT_0, A_EXIT_1 */
void
do_exit(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
crm_exit_t exit_code = CRM_EX_OK;
if (pcmk_is_set(action, A_EXIT_1)) {
exit_code = CRM_EX_ERROR;
crm_err("Exiting now due to errors");
}
verify_stopped(cur_state, LOG_ERR);
crmd_exit(exit_code);
}
static void sigpipe_ignore(int nsig) { return; }
/* A_STARTUP */
void
do_startup(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
crm_debug("Registering Signal Handlers");
mainloop_add_signal(SIGTERM, crm_shutdown);
mainloop_add_signal(SIGPIPE, sigpipe_ignore);
config_read_trigger = mainloop_add_trigger(G_PRIORITY_HIGH,
crm_read_options, NULL);
controld_init_fsa_trigger();
controld_init_transition_trigger();
crm_debug("Creating CIB manager and executor objects");
controld_globals.cib_conn = cib_new();
lrm_state_init_local();
if (controld_init_fsa_timers() == FALSE) {
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
}
// \return libqb error code (0 on success, -errno on error)
static int32_t
accept_controller_client(qb_ipcs_connection_t *c, uid_t uid, gid_t gid)
{
crm_trace("Accepting new IPC client connection");
if (pcmk__new_client(c, uid, gid) == NULL) {
return -EIO;
}
return 0;
}
// \return libqb error code (0 on success, -errno on error)
static int32_t
dispatch_controller_ipc(qb_ipcs_connection_t * c, void *data, size_t size)
{
uint32_t id = 0;
uint32_t flags = 0;
pcmk__client_t *client = pcmk__find_client(c);
xmlNode *msg = pcmk__client_data2xml(client, data, &id, &flags);
if (msg == NULL) {
pcmk__ipc_send_ack(client, id, flags, "ack", NULL, CRM_EX_PROTOCOL);
return 0;
}
pcmk__ipc_send_ack(client, id, flags, "ack", NULL, CRM_EX_INDETERMINATE);
CRM_ASSERT(client->user != NULL);
pcmk__update_acl_user(msg, F_CRM_USER, client->user);
crm_xml_add(msg, F_CRM_SYS_FROM, client->id);
if (controld_authorize_ipc_message(msg, client, NULL)) {
crm_trace("Processing IPC message from client %s",
pcmk__client_name(client));
route_message(C_IPC_MESSAGE, msg);
}
controld_trigger_fsa();
free_xml(msg);
return 0;
}
static int32_t
ipc_client_disconnected(qb_ipcs_connection_t *c)
{
pcmk__client_t *client = pcmk__find_client(c);
if (client) {
crm_trace("Disconnecting %sregistered client %s (%p/%p)",
(client->userdata? "" : "un"), pcmk__client_name(client),
c, client);
free(client->userdata);
pcmk__free_client(client);
controld_trigger_fsa();
}
return 0;
}
static void
ipc_connection_destroyed(qb_ipcs_connection_t *c)
{
crm_trace("Connection %p", c);
ipc_client_disconnected(c);
}
/* A_STOP */
void
do_stop(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
crm_trace("Closing IPC server");
mainloop_del_ipc_server(ipcs); ipcs = NULL;
register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL);
}
/* A_STARTED */
void
do_started(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
static struct qb_ipcs_service_handlers crmd_callbacks = {
.connection_accept = accept_controller_client,
.connection_created = NULL,
.msg_process = dispatch_controller_ipc,
.connection_closed = ipc_client_disconnected,
.connection_destroyed = ipc_connection_destroyed
};
if (cur_state != S_STARTING) {
crm_err("Start cancelled... %s", fsa_state2string(cur_state));
return;
} else if (!pcmk_is_set(controld_globals.fsa_input_register,
R_MEMBERSHIP)) {
crm_info("Delaying start, no membership data (%.16llx)", R_MEMBERSHIP);
crmd_fsa_stall(TRUE);
return;
} else if (!pcmk_is_set(controld_globals.fsa_input_register,
R_LRM_CONNECTED)) {
crm_info("Delaying start, not connected to executor (%.16llx)", R_LRM_CONNECTED);
crmd_fsa_stall(TRUE);
return;
} else if (!pcmk_is_set(controld_globals.fsa_input_register,
R_CIB_CONNECTED)) {
crm_info("Delaying start, CIB not connected (%.16llx)", R_CIB_CONNECTED);
crmd_fsa_stall(TRUE);
return;
} else if (!pcmk_is_set(controld_globals.fsa_input_register,
R_READ_CONFIG)) {
crm_info("Delaying start, Config not read (%.16llx)", R_READ_CONFIG);
crmd_fsa_stall(TRUE);
return;
} else if (!pcmk_is_set(controld_globals.fsa_input_register, R_PEER_DATA)) {
crm_info("Delaying start, No peer data (%.16llx)", R_PEER_DATA);
crmd_fsa_stall(TRUE);
return;
}
crm_debug("Init server comms");
ipcs = pcmk__serve_controld_ipc(&crmd_callbacks);
if (ipcs == NULL) {
crm_err("Failed to create IPC server: shutting down and inhibiting respawn");
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
} else {
crm_notice("Pacemaker controller successfully started and accepting connections");
}
controld_set_fsa_input_flags(R_ST_REQUIRED);
controld_timer_fencer_connect(GINT_TO_POINTER(TRUE));
controld_clear_fsa_input_flags(R_STARTING);
register_fsa_input(msg_data->fsa_cause, I_PENDING, NULL);
}
/* A_RECOVER */
void
do_recover(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
controld_set_fsa_input_flags(R_IN_RECOVERY);
crm_warn("Fast-tracking shutdown in response to errors");
register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL);
}
static pcmk__cluster_option_t controller_options[] = {
/* name, old name, type, allowed values,
* default value, validator,
* short description,
* long description
*/
{
PCMK_OPT_DC_VERSION, NULL, "string", NULL,
PCMK__VALUE_NONE, NULL,
N_("Pacemaker version on cluster node elected Designated Controller (DC)"),
N_("Includes a hash which identifies the exact changeset the code was "
"built from. Used for diagnostic purposes.")
},
{
PCMK_OPT_CLUSTER_INFRASTRUCTURE, NULL, "string", NULL,
"corosync", NULL,
N_("The messaging stack on which Pacemaker is currently running"),
N_("Used for informational and diagnostic purposes.")
},
{
PCMK_OPT_CLUSTER_NAME, NULL, "string", NULL,
NULL, NULL,
N_("An arbitrary name for the cluster"),
N_("This optional value is mostly for users' convenience as desired "
"in administration, but may also be used in Pacemaker "
"configuration rules via the #cluster-name node attribute, and "
"by higher-level tools and resource agents.")
},
{
PCMK_OPT_DC_DEADTIME, NULL, "time", NULL,
"20s", pcmk__valid_interval_spec,
N_("How long to wait for a response from other nodes during start-up"),
N_("The optimal value will depend on the speed and load of your network "
"and the type of switches used.")
},
{
PCMK_OPT_CLUSTER_RECHECK_INTERVAL, NULL, "time",
N_("Zero disables polling, while positive values are an interval in "
"seconds (unless other units are specified, for example \"5min\")"),
"15min", pcmk__valid_interval_spec,
N_("Polling interval to recheck cluster state and evaluate rules "
"with date specifications"),
N_("Pacemaker is primarily event-driven, and looks ahead to know when to "
"recheck cluster state for failure timeouts and most time-based "
"rules. However, it will also recheck the cluster after this "
"amount of inactivity, to evaluate rules with date specifications "
"and serve as a fail-safe for certain types of scheduler bugs.")
},
{
PCMK_OPT_LOAD_THRESHOLD, NULL, "percentage", NULL,
"80%", pcmk__valid_percentage,
N_("Maximum amount of system load that should be used by cluster nodes"),
N_("The cluster will slow down its recovery process when the amount of "
"system resources used (currently CPU) approaches this limit"),
},
{
PCMK_OPT_NODE_ACTION_LIMIT, NULL, "integer", NULL,
"0", pcmk__valid_int,
N_("Maximum number of jobs that can be scheduled per node "
"(defaults to 2x cores)")
},
{
PCMK_OPT_FENCE_REACTION, NULL, "select", "stop, panic",
"stop", NULL,
N_("How a cluster node should react if notified of its own fencing"),
N_("A cluster node may receive notification of its own fencing if fencing "
"is misconfigured, or if fabric fencing is in use that doesn't cut "
"cluster communication. Use \"stop\" to attempt to immediately "
"stop Pacemaker and stay stopped, or \"panic\" to attempt to "
"immediately reboot the local node, falling back to stop on "
"failure.")
},
{
PCMK_OPT_ELECTION_TIMEOUT, NULL, "time", NULL,
"2min", pcmk__valid_interval_spec,
"*** Advanced Use Only ***",
N_("Declare an election failed if it is not decided within this much "
"time. If you need to adjust this value, it probably indicates "
"the presence of a bug.")
},
{
PCMK_OPT_SHUTDOWN_ESCALATION, NULL, "time", NULL,
"20min", pcmk__valid_interval_spec,
"*** Advanced Use Only ***",
N_("Exit immediately if shutdown does not complete within this much "
"time. If you need to adjust this value, it probably indicates "
"the presence of a bug.")
},
{
PCMK_OPT_JOIN_INTEGRATION_TIMEOUT, "crmd-integration-timeout", "time",
NULL,
"3min", pcmk__valid_interval_spec,
"*** Advanced Use Only ***",
N_("If you need to adjust this value, it probably indicates "
"the presence of a bug.")
},
{
PCMK_OPT_JOIN_FINALIZATION_TIMEOUT, "crmd-finalization-timeout",
"time", NULL,
"30min", pcmk__valid_interval_spec,
"*** Advanced Use Only ***",
N_("If you need to adjust this value, it probably indicates "
"the presence of a bug.")
},
{
PCMK_OPT_TRANSITION_DELAY, "crmd-transition-delay", "time", NULL,
"0s", pcmk__valid_interval_spec,
N_("*** Advanced Use Only *** Enabling this option will slow down "
"cluster recovery under all conditions"),
N_("Delay cluster recovery for this much time to allow for additional "
"events to occur. Useful if your configuration is sensitive to "
"the order in which ping updates arrive.")
},
{
PCMK_OPT_STONITH_WATCHDOG_TIMEOUT, NULL, "time", NULL,
"0", controld_verify_stonith_watchdog_timeout,
N_("How long before nodes can be assumed to be safely down when "
"watchdog-based self-fencing via SBD is in use"),
N_("If this is set to a positive value, lost nodes are assumed to "
"self-fence using watchdog-based SBD within this much time. This "
"does not require a fencing resource to be explicitly configured, "
"though a fence_watchdog resource can be configured, to limit use "
"to specific nodes. If this is set to 0 (the default), the cluster "
"will never assume watchdog-based self-fencing. If this is set to a "
"negative value, the cluster will use twice the local value of the "
"`SBD_WATCHDOG_TIMEOUT` environment variable if that is positive, "
"or otherwise treat this as 0. WARNING: When used, this timeout "
"must be larger than `SBD_WATCHDOG_TIMEOUT` on all nodes that use "
"watchdog-based SBD, and Pacemaker will refuse to start on any of "
"those nodes where this is not true for the local value or SBD is "
"not active. When this is set to a negative value, "
"`SBD_WATCHDOG_TIMEOUT` must be set to the same value on all nodes "
"that use SBD, otherwise data corruption or loss could occur.")
},
{
PCMK_OPT_STONITH_MAX_ATTEMPTS, NULL, "integer", NULL,
"10", pcmk__valid_positive_int,
N_("How many times fencing can fail before it will no longer be "
"immediately re-attempted on a target")
},
// Already documented in libpe_status (other values must be kept identical)
{
PCMK_OPT_NO_QUORUM_POLICY, NULL, "select",
"stop, freeze, ignore, demote, suicide",
"stop", pcmk__valid_no_quorum_policy,
N_("What to do when the cluster does not have quorum"), NULL
},
{
PCMK_OPT_SHUTDOWN_LOCK, NULL, "boolean", NULL,
"false", pcmk__valid_boolean,
N_("Whether to lock resources to a cleanly shut down node"),
N_("When true, resources active on a node when it is cleanly shut down "
"are kept \"locked\" to that node (not allowed to run elsewhere) "
"until they start again on that node after it rejoins (or for at "
"most shutdown-lock-limit, if set). Stonith resources and "
"Pacemaker Remote connections are never locked. Clone and bundle "
"instances and the promoted role of promotable clones are "
"currently never locked, though support could be added in a future "
"release.")
},
{
PCMK_OPT_SHUTDOWN_LOCK_LIMIT, NULL, "time", NULL,
"0", pcmk__valid_interval_spec,
N_("Do not lock resources to a cleanly shut down node longer than "
"this"),
N_("If shutdown-lock is true and this is set to a nonzero time "
"duration, shutdown locks will expire after this much time has "
"passed since the shutdown was initiated, even if the node has not "
"rejoined.")
},
{
PCMK_OPT_NODE_PENDING_TIMEOUT, NULL, "time", NULL,
"0", pcmk__valid_interval_spec,
N_("How long to wait for a node that has joined the cluster to join "
"the controller process group"),
N_("Fence nodes that do not join the controller process group within "
"this much time after joining the cluster, to allow the cluster "
"to continue managing resources. A value of 0 means never fence "
"pending nodes. Setting the value to 2h means fence nodes after "
"2 hours.")
},
};
void
crmd_metadata(void)
{
const char *desc_short = "Pacemaker controller options";
const char *desc_long = "Cluster options used by Pacemaker's controller";
gchar *s = pcmk__format_option_metadata("pacemaker-controld", desc_short,
desc_long, controller_options,
PCMK__NELEM(controller_options));
printf("%s", s);
g_free(s);
}
static void
config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
const char *value = NULL;
GHashTable *config_hash = NULL;
crm_time_t *now = crm_time_new(NULL);
xmlNode *crmconfig = NULL;
xmlNode *alerts = NULL;
if (rc != pcmk_ok) {
fsa_data_t *msg_data = NULL;
crm_err("Local CIB query resulted in an error: %s", pcmk_strerror(rc));
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
if (rc == -EACCES || rc == -pcmk_err_schema_validation) {
crm_err("The cluster is mis-configured - shutting down and staying down");
controld_set_fsa_input_flags(R_STAYDOWN);
}
goto bail;
}
crmconfig = output;
if ((crmconfig != NULL)
&& !pcmk__xe_is(crmconfig, XML_CIB_TAG_CRMCONFIG)) {
crmconfig = first_named_child(crmconfig, XML_CIB_TAG_CRMCONFIG);
}
if (!crmconfig) {
fsa_data_t *msg_data = NULL;
crm_err("Local CIB query for " XML_CIB_TAG_CRMCONFIG " section failed");
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
goto bail;
}
crm_debug("Call %d : Parsing CIB options", call_id);
config_hash = pcmk__strkey_table(free, free);
pe_unpack_nvpairs(crmconfig, crmconfig, XML_CIB_TAG_PROPSET, NULL,
config_hash, CIB_OPTIONS_FIRST, FALSE, now, NULL);
// Validate all options, and use defaults if not already present in hash
pcmk__validate_cluster_options(config_hash, controller_options,
PCMK__NELEM(controller_options));
value = g_hash_table_lookup(config_hash, PCMK_OPT_NO_QUORUM_POLICY);
if (pcmk__str_eq(value, "suicide", pcmk__str_casei) && pcmk__locate_sbd()) {
controld_set_global_flags(controld_no_quorum_suicide);
}
value = g_hash_table_lookup(config_hash, PCMK_OPT_SHUTDOWN_LOCK);
if (crm_is_true(value)) {
controld_set_global_flags(controld_shutdown_lock_enabled);
} else {
controld_clear_global_flags(controld_shutdown_lock_enabled);
}
value = g_hash_table_lookup(config_hash, PCMK_OPT_SHUTDOWN_LOCK_LIMIT);
- pcmk__parse_interval_spec(value, &controld_globals.shutdown_lock_limit);
+ pcmk_parse_interval_spec(value, &controld_globals.shutdown_lock_limit);
controld_globals.shutdown_lock_limit /= 1000;
value = g_hash_table_lookup(config_hash, PCMK_OPT_NODE_PENDING_TIMEOUT);
- pcmk__parse_interval_spec(value, &controld_globals.node_pending_timeout);
+ pcmk_parse_interval_spec(value, &controld_globals.node_pending_timeout);
controld_globals.node_pending_timeout /= 1000;
value = g_hash_table_lookup(config_hash, PCMK_OPT_CLUSTER_NAME);
pcmk__str_update(&(controld_globals.cluster_name), value);
// Let subcomponents initialize their own static variables
controld_configure_election(config_hash);
controld_configure_fencing(config_hash);
controld_configure_fsa_timers(config_hash);
controld_configure_throttle(config_hash);
alerts = first_named_child(output, XML_CIB_TAG_ALERTS);
crmd_unpack_alerts(alerts);
controld_set_fsa_input_flags(R_READ_CONFIG);
controld_trigger_fsa();
g_hash_table_destroy(config_hash);
bail:
crm_time_free(now);
}
/*!
* \internal
* \brief Trigger read and processing of the configuration
*
* \param[in] fn Calling function name
* \param[in] line Line number where call occurred
*/
void
controld_trigger_config_as(const char *fn, int line)
{
if (config_read_trigger != NULL) {
crm_trace("%s:%d - Triggered config processing", fn, line);
mainloop_set_trigger(config_read_trigger);
}
}
gboolean
crm_read_options(gpointer user_data)
{
cib_t *cib_conn = controld_globals.cib_conn;
int call_id = cib_conn->cmds->query(cib_conn,
"//" XML_CIB_TAG_CRMCONFIG
" | //" XML_CIB_TAG_ALERTS,
NULL, cib_xpath|cib_scope_local);
fsa_register_cib_callback(call_id, NULL, config_query_callback);
crm_trace("Querying the CIB... call %d", call_id);
return TRUE;
}
/* A_READCONFIG */
void
do_read_config(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
throttle_init();
controld_trigger_config();
}
void
crm_shutdown(int nsig)
{
const char *value = NULL;
guint default_period_ms = 0;
if ((controld_globals.mainloop == NULL)
|| !g_main_loop_is_running(controld_globals.mainloop)) {
crmd_exit(CRM_EX_OK);
return;
}
if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
crm_err("Escalating shutdown");
register_fsa_input_before(C_SHUTDOWN, I_ERROR, NULL);
return;
}
controld_set_fsa_input_flags(R_SHUTDOWN);
register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL);
/* If shutdown timer doesn't have a period set, use the default
*
* @TODO: Evaluate whether this is still necessary. As long as
* config_query_callback() has been run at least once, it doesn't look like
* anything could have changed the timer period since then.
*/
value = pcmk__cluster_option(NULL, controller_options,
PCMK__NELEM(controller_options),
PCMK_OPT_SHUTDOWN_ESCALATION);
- pcmk__parse_interval_spec(value, &default_period_ms);
+ pcmk_parse_interval_spec(value, &default_period_ms);
controld_shutdown_start_countdown(default_period_ms);
}
diff --git a/daemons/controld/controld_election.c b/daemons/controld/controld_election.c
index 448808053c..4dc5fb41fc 100644
--- a/daemons/controld/controld_election.c
+++ b/daemons/controld/controld_election.c
@@ -1,287 +1,287 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <crm/cluster/internal.h>
#include <crm/cluster/election_internal.h>
#include <crm/crm.h>
#include <pacemaker-controld.h>
static election_t *fsa_election = NULL;
static gboolean
election_win_cb(gpointer data)
{
register_fsa_input(C_FSA_INTERNAL, I_ELECTION_DC, NULL);
return FALSE;
}
void
controld_election_init(const char *uname)
{
fsa_election = election_init("DC", uname, 60000 /*60s*/, election_win_cb);
}
/*!
* \internal
* \brief Configure election options based on the CIB
*
* \param[in,out] options Name/value pairs for configured options
*/
void
controld_configure_election(GHashTable *options)
{
const char *value = g_hash_table_lookup(options, PCMK_OPT_ELECTION_TIMEOUT);
guint interval_ms = 0U;
- pcmk__parse_interval_spec(value, &interval_ms);
+ pcmk_parse_interval_spec(value, &interval_ms);
election_timeout_set_period(fsa_election, interval_ms);
}
void
controld_remove_voter(const char *uname)
{
election_remove(fsa_election, uname);
if (pcmk__str_eq(uname, controld_globals.dc_name, pcmk__str_casei)) {
/* Clear any election dampening in effect. Otherwise, if the lost DC had
* just won, an immediate new election could fizzle out with no new DC.
*/
election_clear_dampening(fsa_election);
}
}
void
controld_election_fini(void)
{
election_fini(fsa_election);
fsa_election = NULL;
}
void
controld_stop_current_election_timeout(void)
{
election_timeout_stop(fsa_election);
}
/* A_ELECTION_VOTE */
void
do_election_vote(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
gboolean not_voting = FALSE;
/* don't vote if we're in one of these states or wanting to shut down */
switch (cur_state) {
case S_STARTING:
case S_RECOVERY:
case S_STOPPING:
case S_TERMINATE:
crm_warn("Not voting in election, we're in state %s", fsa_state2string(cur_state));
not_voting = TRUE;
break;
case S_ELECTION:
case S_INTEGRATION:
case S_RELEASE_DC:
break;
default:
crm_err("Broken? Voting in state %s", fsa_state2string(cur_state));
break;
}
if (not_voting == FALSE) {
if (pcmk_is_set(controld_globals.fsa_input_register, R_STARTING)) {
not_voting = TRUE;
}
}
if (not_voting) {
if (AM_I_DC) {
register_fsa_input(C_FSA_INTERNAL, I_RELEASE_DC, NULL);
} else {
register_fsa_input(C_FSA_INTERNAL, I_PENDING, NULL);
}
return;
}
election_vote(fsa_election);
return;
}
void
do_election_check(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
if (controld_globals.fsa_state == S_ELECTION) {
election_check(fsa_election);
} else {
crm_debug("Ignoring election check because we are not in an election");
}
}
/* A_ELECTION_COUNT */
void
do_election_count_vote(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
enum election_result rc = 0;
ha_msg_input_t *vote = fsa_typed_data(fsa_dt_ha_msg);
if(crm_peer_cache == NULL) {
if (!pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
crm_err("Internal error, no peer cache");
}
return;
}
rc = election_count_vote(fsa_election, vote->msg, cur_state != S_STARTING);
switch(rc) {
case election_start:
election_reset(fsa_election);
register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL);
break;
case election_lost:
update_dc(NULL);
if (pcmk_is_set(controld_globals.fsa_input_register, R_THE_DC)) {
cib_t *cib_conn = controld_globals.cib_conn;
register_fsa_input(C_FSA_INTERNAL, I_RELEASE_DC, NULL);
cib_conn->cmds->set_secondary(cib_conn, cib_scope_local);
} else if (cur_state != S_STARTING) {
register_fsa_input(C_FSA_INTERNAL, I_PENDING, NULL);
}
break;
default:
crm_trace("Election message resulted in state %d", rc);
}
}
static void
feature_update_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
if (rc != pcmk_ok) {
fsa_data_t *msg_data = NULL;
crm_notice("Feature update failed: %s "CRM_XS" rc=%d",
pcmk_strerror(rc), rc);
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
}
/*!
* \internal
* \brief Update a node attribute in the CIB during a DC takeover
*
* \param[in] name Name of attribute to update
* \param[in] value New attribute value
*/
#define dc_takeover_update_attr(name, value) do { \
cib__update_node_attr(controld_globals.logger_out, \
controld_globals.cib_conn, cib_none, \
XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL, \
name, value, NULL, NULL); \
} while (0)
/* A_DC_TAKEOVER */
void
do_dc_takeover(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
xmlNode *cib = NULL;
const char *cluster_type = name_for_cluster_type(get_cluster_type());
pid_t watchdog = pcmk__locate_sbd();
crm_info("Taking over DC status for this partition");
controld_set_fsa_input_flags(R_THE_DC);
execute_stonith_cleanup();
election_reset(fsa_election);
controld_set_fsa_input_flags(R_JOIN_OK|R_INVOKE_PE);
controld_globals.cib_conn->cmds->set_primary(controld_globals.cib_conn,
cib_scope_local);
cib = create_xml_node(NULL, XML_TAG_CIB);
crm_xml_add(cib, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET);
controld_update_cib(XML_TAG_CIB, cib, cib_none, feature_update_callback);
dc_takeover_update_attr(PCMK_OPT_HAVE_WATCHDOG, pcmk__btoa(watchdog));
dc_takeover_update_attr(PCMK_OPT_DC_VERSION,
PACEMAKER_VERSION "-" BUILD_VERSION);
dc_takeover_update_attr(PCMK_OPT_CLUSTER_INFRASTRUCTURE, cluster_type);
#if SUPPORT_COROSYNC
if ((controld_globals.cluster_name == NULL) && is_corosync_cluster()) {
char *cluster_name = pcmk__corosync_cluster_name();
if (cluster_name != NULL) {
dc_takeover_update_attr(PCMK_OPT_CLUSTER_NAME, cluster_name);
}
free(cluster_name);
}
#endif
controld_trigger_config();
free_xml(cib);
}
/* A_DC_RELEASE */
void
do_dc_release(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
if (action & A_DC_RELEASE) {
crm_debug("Releasing the role of DC");
controld_clear_fsa_input_flags(R_THE_DC);
controld_expect_sched_reply(NULL);
} else if (action & A_DC_RELEASED) {
crm_info("DC role released");
if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
xmlNode *update = NULL;
crm_node_t *node = crm_get_peer(0, controld_globals.our_nodename);
pcmk__update_peer_expected(__func__, node, CRMD_JOINSTATE_DOWN);
update = create_node_state_update(node, node_update_expected, NULL,
__func__);
/* Don't need a based response because controld will stop. */
fsa_cib_anon_update_discard_reply(XML_CIB_TAG_STATUS, update);
free_xml(update);
}
register_fsa_input(C_FSA_INTERNAL, I_RELEASE_SUCCESS, NULL);
} else {
crm_err("Unknown DC action %s", fsa_action2string(action));
}
crm_trace("Am I still the DC? %s", AM_I_DC ? XML_BOOLEAN_YES : XML_BOOLEAN_NO);
}
diff --git a/daemons/controld/controld_timers.c b/daemons/controld/controld_timers.c
index a978e56787..66e8196bc7 100644
--- a/daemons/controld/controld_timers.c
+++ b/daemons/controld/controld_timers.c
@@ -1,509 +1,509 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <time.h>
#include <stdlib.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <pacemaker-controld.h>
//! FSA mainloop timer type
typedef struct fsa_timer_s {
guint source_id; //!< Timer source ID
guint period_ms; //!< Timer period
enum crmd_fsa_input fsa_input; //!< Input to register if timer pops
gboolean (*callback) (gpointer data); //!< What do if timer pops
bool log_error; //!< Timer popping indicates error
int counter; //!< For detecting loops
} fsa_timer_t;
//! Wait before retrying a failed cib or executor connection
static fsa_timer_t *wait_timer = NULL;
//! Periodically re-run scheduler (for date_spec evaluation and as a failsafe)
static fsa_timer_t *recheck_timer = NULL;
//! Wait at start-up, or after an election, for DC to make contact
static fsa_timer_t *election_timer = NULL;
//! Delay start of new transition with expectation something else might happen
static fsa_timer_t *transition_timer = NULL;
//! \c PCMK_OPT_JOIN_INTEGRATION_TIMEOUT
static fsa_timer_t *integration_timer = NULL;
//! \c PCMK_OPT_JOIN_FINALIZATION_TIMEOUT
static fsa_timer_t *finalization_timer = NULL;
// Wait for DC to stop all resources and give us the all-clear to shut down
fsa_timer_t *shutdown_escalation_timer = NULL;
//! Cluster recheck interval (from configuration)
static guint recheck_interval_ms = 0;
static const char *
get_timer_desc(fsa_timer_t * timer)
{
if (timer == election_timer) {
return "Election Trigger";
} else if (timer == shutdown_escalation_timer) {
return "Shutdown Escalation";
} else if (timer == integration_timer) {
return "Integration Timer";
} else if (timer == finalization_timer) {
return "Finalization Timer";
} else if (timer == transition_timer) {
return "New Transition Timer";
} else if (timer == wait_timer) {
return "Wait Timer";
} else if (timer == recheck_timer) {
return "Cluster Recheck Timer";
}
return "Unknown Timer";
}
/*!
* \internal
* \brief Stop an FSA timer
*
* \param[in,out] timer Timer to stop
*
* \return true if the timer was running, or false otherwise
*/
static bool
controld_stop_timer(fsa_timer_t *timer)
{
CRM_CHECK(timer != NULL, return false);
if (timer->source_id != 0) {
crm_trace("Stopping %s (would inject %s if popped after %ums, src=%d)",
get_timer_desc(timer), fsa_input2string(timer->fsa_input),
timer->period_ms, timer->source_id);
g_source_remove(timer->source_id);
timer->source_id = 0;
} else {
crm_trace("%s already stopped (would inject %s if popped after %ums)",
get_timer_desc(timer), fsa_input2string(timer->fsa_input),
timer->period_ms);
return false;
}
return true;
}
/*!
* \internal
* \brief Start an FSA timer
*
* \param[in,out] timer Timer to start
*/
static void
controld_start_timer(fsa_timer_t *timer)
{
if (timer->source_id == 0 && timer->period_ms > 0) {
timer->source_id = g_timeout_add(timer->period_ms, timer->callback, (void *)timer);
CRM_ASSERT(timer->source_id != 0);
crm_debug("Started %s (inject %s if pops after %ums, source=%d)",
get_timer_desc(timer), fsa_input2string(timer->fsa_input),
timer->period_ms, timer->source_id);
} else {
crm_debug("%s already running (inject %s if pops after %ums, source=%d)",
get_timer_desc(timer), fsa_input2string(timer->fsa_input),
timer->period_ms, timer->source_id);
}
}
/* A_DC_TIMER_STOP, A_DC_TIMER_START,
* A_FINALIZE_TIMER_STOP, A_FINALIZE_TIMER_START
* A_INTEGRATE_TIMER_STOP, A_INTEGRATE_TIMER_START
*/
void
do_timer_control(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
gboolean timer_op_ok = TRUE;
if (action & A_DC_TIMER_STOP) {
timer_op_ok = controld_stop_timer(election_timer);
} else if (action & A_FINALIZE_TIMER_STOP) {
timer_op_ok = controld_stop_timer(finalization_timer);
} else if (action & A_INTEGRATE_TIMER_STOP) {
timer_op_ok = controld_stop_timer(integration_timer);
}
/* don't start a timer that wasn't already running */
if (action & A_DC_TIMER_START && timer_op_ok) {
controld_start_timer(election_timer);
if (AM_I_DC) {
/* there can be only one */
register_fsa_input(cause, I_ELECTION, NULL);
}
} else if (action & A_FINALIZE_TIMER_START) {
controld_start_timer(finalization_timer);
} else if (action & A_INTEGRATE_TIMER_START) {
controld_start_timer(integration_timer);
}
}
static gboolean
crm_timer_popped(gpointer data)
{
fsa_timer_t *timer = (fsa_timer_t *) data;
if (timer->log_error) {
crm_err("%s just popped in state %s! " CRM_XS " input=%s time=%ums",
get_timer_desc(timer),
fsa_state2string(controld_globals.fsa_state),
fsa_input2string(timer->fsa_input), timer->period_ms);
} else {
crm_info("%s just popped " CRM_XS " input=%s time=%ums",
get_timer_desc(timer), fsa_input2string(timer->fsa_input),
timer->period_ms);
timer->counter++;
}
if ((timer == election_timer) && (election_timer->counter > 5)) {
crm_notice("We appear to be in an election loop, something may be wrong");
crm_write_blackbox(0, NULL);
election_timer->counter = 0;
}
controld_stop_timer(timer); // Make timer _not_ go off again
if (timer->fsa_input == I_INTEGRATED) {
crm_info("Welcomed: %d, Integrated: %d",
crmd_join_phase_count(crm_join_welcomed),
crmd_join_phase_count(crm_join_integrated));
if (crmd_join_phase_count(crm_join_welcomed) == 0) {
// If we don't even have ourselves, start again
register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION, NULL, NULL,
__func__);
} else {
register_fsa_input_before(C_TIMER_POPPED, timer->fsa_input, NULL);
}
} else if ((timer == recheck_timer)
&& (controld_globals.fsa_state != S_IDLE)) {
crm_debug("Discarding %s event in state: %s",
fsa_input2string(timer->fsa_input),
fsa_state2string(controld_globals.fsa_state));
} else if ((timer == finalization_timer)
&& (controld_globals.fsa_state != S_FINALIZE_JOIN)) {
crm_debug("Discarding %s event in state: %s",
fsa_input2string(timer->fsa_input),
fsa_state2string(controld_globals.fsa_state));
} else if (timer->fsa_input != I_NULL) {
register_fsa_input(C_TIMER_POPPED, timer->fsa_input, NULL);
}
controld_trigger_fsa();
return TRUE;
}
bool
controld_init_fsa_timers(void)
{
transition_timer = calloc(1, sizeof(fsa_timer_t));
if (transition_timer == NULL) {
return FALSE;
}
integration_timer = calloc(1, sizeof(fsa_timer_t));
if (integration_timer == NULL) {
return FALSE;
}
finalization_timer = calloc(1, sizeof(fsa_timer_t));
if (finalization_timer == NULL) {
return FALSE;
}
election_timer = calloc(1, sizeof(fsa_timer_t));
if (election_timer == NULL) {
return FALSE;
}
shutdown_escalation_timer = calloc(1, sizeof(fsa_timer_t));
if (shutdown_escalation_timer == NULL) {
return FALSE;
}
wait_timer = calloc(1, sizeof(fsa_timer_t));
if (wait_timer == NULL) {
return FALSE;
}
recheck_timer = calloc(1, sizeof(fsa_timer_t));
if (recheck_timer == NULL) {
return FALSE;
}
election_timer->source_id = 0;
election_timer->period_ms = 0;
election_timer->fsa_input = I_DC_TIMEOUT;
election_timer->callback = crm_timer_popped;
election_timer->log_error = FALSE;
transition_timer->source_id = 0;
transition_timer->period_ms = 0;
transition_timer->fsa_input = I_PE_CALC;
transition_timer->callback = crm_timer_popped;
transition_timer->log_error = FALSE;
integration_timer->source_id = 0;
integration_timer->period_ms = 0;
integration_timer->fsa_input = I_INTEGRATED;
integration_timer->callback = crm_timer_popped;
integration_timer->log_error = TRUE;
finalization_timer->source_id = 0;
finalization_timer->period_ms = 0;
finalization_timer->fsa_input = I_FINALIZED;
finalization_timer->callback = crm_timer_popped;
finalization_timer->log_error = FALSE;
/* We can't use I_FINALIZED here, because that creates a bug in the join
* process where a joining node can be stuck in S_PENDING while we think it
* is in S_NOT_DC. This created an infinite transition loop in which we
* continually send probes which the node NACKs because it's pending.
*
* If we have nodes where the cluster layer is active but the controller is
* not, we can avoid this causing an election/join loop, in the integration
* phase.
*/
finalization_timer->fsa_input = I_ELECTION;
shutdown_escalation_timer->source_id = 0;
shutdown_escalation_timer->period_ms = 0;
shutdown_escalation_timer->fsa_input = I_STOP;
shutdown_escalation_timer->callback = crm_timer_popped;
shutdown_escalation_timer->log_error = TRUE;
wait_timer->source_id = 0;
wait_timer->period_ms = 2000;
wait_timer->fsa_input = I_NULL;
wait_timer->callback = crm_timer_popped;
wait_timer->log_error = FALSE;
recheck_timer->source_id = 0;
recheck_timer->period_ms = 0;
recheck_timer->fsa_input = I_PE_CALC;
recheck_timer->callback = crm_timer_popped;
recheck_timer->log_error = FALSE;
return TRUE;
}
/*!
* \internal
* \brief Configure timers based on the CIB
*
* \param[in,out] options Name/value pairs for configured options
*/
void
controld_configure_fsa_timers(GHashTable *options)
{
const char *value = NULL;
// Election timer
value = g_hash_table_lookup(options, PCMK_OPT_DC_DEADTIME);
- pcmk__parse_interval_spec(value, &(election_timer->period_ms));
+ pcmk_parse_interval_spec(value, &(election_timer->period_ms));
// Integration timer
value = g_hash_table_lookup(options, PCMK_OPT_JOIN_INTEGRATION_TIMEOUT);
- pcmk__parse_interval_spec(value, &(integration_timer->period_ms));
+ pcmk_parse_interval_spec(value, &(integration_timer->period_ms));
// Finalization timer
value = g_hash_table_lookup(options, PCMK_OPT_JOIN_FINALIZATION_TIMEOUT);
- pcmk__parse_interval_spec(value, &(finalization_timer->period_ms));
+ pcmk_parse_interval_spec(value, &(finalization_timer->period_ms));
// Shutdown escalation timer
value = g_hash_table_lookup(options, PCMK_OPT_SHUTDOWN_ESCALATION);
- pcmk__parse_interval_spec(value, &(shutdown_escalation_timer->period_ms));
+ pcmk_parse_interval_spec(value, &(shutdown_escalation_timer->period_ms));
crm_debug("Shutdown escalation occurs if DC has not responded to request "
"in %ums", shutdown_escalation_timer->period_ms);
// Transition timer
value = g_hash_table_lookup(options, PCMK_OPT_TRANSITION_DELAY);
- pcmk__parse_interval_spec(value, &(transition_timer->period_ms));
+ pcmk_parse_interval_spec(value, &(transition_timer->period_ms));
// Recheck interval
value = g_hash_table_lookup(options, PCMK_OPT_CLUSTER_RECHECK_INTERVAL);
- pcmk__parse_interval_spec(value, &recheck_interval_ms);
+ pcmk_parse_interval_spec(value, &recheck_interval_ms);
crm_debug("Re-run scheduler after %dms of inactivity", recheck_interval_ms);
}
void
controld_free_fsa_timers(void)
{
controld_stop_timer(transition_timer);
controld_stop_timer(integration_timer);
controld_stop_timer(finalization_timer);
controld_stop_timer(election_timer);
controld_stop_timer(shutdown_escalation_timer);
controld_stop_timer(wait_timer);
controld_stop_timer(recheck_timer);
free(transition_timer); transition_timer = NULL;
free(integration_timer); integration_timer = NULL;
free(finalization_timer); finalization_timer = NULL;
free(election_timer); election_timer = NULL;
free(shutdown_escalation_timer); shutdown_escalation_timer = NULL;
free(wait_timer); wait_timer = NULL;
free(recheck_timer); recheck_timer = NULL;
}
/*!
* \internal
* \brief Check whether the transition timer is started
* \return true if the transition timer is started, or false otherwise
*/
bool
controld_is_started_transition_timer(void)
{
return (transition_timer->period_ms > 0)
&& (transition_timer->source_id != 0);
}
/*!
* \internal
* \brief Start the recheck timer
*/
void
controld_start_recheck_timer(void)
{
// Default to recheck interval configured in CIB (if any)
guint period_ms = recheck_interval_ms;
// If scheduler supplied a "recheck by" time, check whether that's sooner
if (controld_globals.transition_graph->recheck_by > 0) {
time_t diff_seconds = controld_globals.transition_graph->recheck_by
- time(NULL);
if (diff_seconds < 1) {
// We're already past the desired time
period_ms = 500;
} else {
period_ms = (guint) diff_seconds * 1000;
}
// Use "recheck by" only if it's sooner than interval from CIB
if (period_ms > recheck_interval_ms) {
period_ms = recheck_interval_ms;
}
}
if (period_ms > 0) {
recheck_timer->period_ms = period_ms;
controld_start_timer(recheck_timer);
}
}
/*!
* \internal
* \brief Start the wait timer
*/
void
controld_start_wait_timer(void)
{
controld_start_timer(wait_timer);
}
/*!
* \internal
* \brief Stop the recheck timer
*
* \return true if the recheck timer was running, or false otherwise
*/
bool
controld_stop_recheck_timer(void)
{
return controld_stop_timer(recheck_timer);
}
/*!
* \brief Get the transition timer's configured period
* \return The transition_timer's period
*/
guint
controld_get_period_transition_timer(void)
{
return transition_timer->period_ms;
}
/*!
* \internal
* \brief Reset the election timer's counter to 0
*/
void
controld_reset_counter_election_timer(void)
{
election_timer->counter = 0;
}
/*!
* \internal
* \brief Stop the transition timer
*
* \return true if the transition timer was running, or false otherwise
*/
bool
controld_stop_transition_timer(void)
{
return controld_stop_timer(transition_timer);
}
/*!
* \internal
* \brief Start the transition timer
*/
void
controld_start_transition_timer(void)
{
controld_start_timer(transition_timer);
}
/*!
* \internal
* \brief Start the countdown sequence for a shutdown
*
* \param[in] default_period_ms Period to use if the shutdown escalation
* timer's period is 0
*/
void
controld_shutdown_start_countdown(guint default_period_ms)
{
if (shutdown_escalation_timer->period_ms == 0) {
shutdown_escalation_timer->period_ms = default_period_ms;
}
crm_notice("Initiating controller shutdown sequence " CRM_XS " limit=%ums",
shutdown_escalation_timer->period_ms);
controld_start_timer(shutdown_escalation_timer);
}
diff --git a/daemons/execd/cts-exec-helper.c b/daemons/execd/cts-exec-helper.c
index 3220d2836d..ac214a7d8b 100644
--- a/daemons/execd/cts-exec-helper.c
+++ b/daemons/execd/cts-exec-helper.c
@@ -1,626 +1,626 @@
/*
* Copyright 2012-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <glib.h>
#include <unistd.h>
#include <crm/crm.h>
#include <crm/services.h>
#include <crm/common/cmdline_internal.h>
#include <crm/common/mainloop.h>
#include <crm/pengine/status.h>
#include <crm/pengine/internal.h>
#include <crm/cib.h>
#include <crm/cib/internal.h>
#include <crm/lrmd.h>
#define SUMMARY "cts-exec-helper - inject commands into the Pacemaker executor and watch for events"
static int exec_call_id = 0;
static gboolean start_test(gpointer user_data);
static void try_connect(void);
static char *key = NULL;
static char *val = NULL;
static struct {
int verbose;
int quiet;
guint interval_ms;
int timeout;
int start_delay;
int cancel_call_id;
gboolean no_wait;
gboolean is_running;
gboolean no_connect;
int exec_call_opts;
const char *api_call;
const char *rsc_id;
const char *provider;
const char *class;
const char *type;
const char *action;
const char *listen;
gboolean use_tls;
lrmd_key_value_t *params;
} options;
static gboolean
interval_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) {
- return pcmk__parse_interval_spec(optarg,
- &options.interval_ms) == pcmk_rc_ok;
+ return pcmk_parse_interval_spec(optarg,
+ &options.interval_ms) == pcmk_rc_ok;
}
static gboolean
notify_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) {
if (pcmk__str_any_of(option_name, "--notify-orig", "-n", NULL)) {
options.exec_call_opts = lrmd_opt_notify_orig_only;
} else if (pcmk__str_any_of(option_name, "--notify-changes", "-o", NULL)) {
options.exec_call_opts = lrmd_opt_notify_changes_only;
}
return TRUE;
}
static gboolean
param_key_val_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) {
if (pcmk__str_any_of(option_name, "--param-key", "-k", NULL)) {
pcmk__str_update(&key, optarg);
} else if (pcmk__str_any_of(option_name, "--param-val", "-v", NULL)) {
pcmk__str_update(&val, optarg);
}
if (key != NULL && val != NULL) {
options.params = lrmd_key_value_add(options.params, key, val);
pcmk__str_update(&key, NULL);
pcmk__str_update(&val, NULL);
}
return TRUE;
}
static GOptionEntry basic_entries[] = {
{ "api-call", 'c', 0, G_OPTION_ARG_STRING, &options.api_call,
"Directly relates to executor API functions",
NULL },
{ "is-running", 'R', 0, G_OPTION_ARG_NONE, &options.is_running,
"Determine if a resource is registered and running",
NULL },
{ "listen", 'l', 0, G_OPTION_ARG_STRING, &options.listen,
"Listen for a specific event string",
NULL },
{ "no-wait", 'w', 0, G_OPTION_ARG_NONE, &options.no_wait,
"Make api call and do not wait for result",
NULL },
{ "notify-changes", 'o', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, notify_cb,
"Only notify client changes to recurring operations",
NULL },
{ "notify-orig", 'n', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, notify_cb,
"Only notify this client of the results of an API action",
NULL },
{ "tls", 'S', 0, G_OPTION_ARG_NONE, &options.use_tls,
"Use TLS backend for local connection",
NULL },
{ NULL }
};
static GOptionEntry api_call_entries[] = {
{ "action", 'a', 0, G_OPTION_ARG_STRING, &options.action,
NULL, NULL },
{ "cancel-call-id", 'x', 0, G_OPTION_ARG_INT, &options.cancel_call_id,
NULL, NULL },
{ "class", 'C', 0, G_OPTION_ARG_STRING, &options.class,
NULL, NULL },
{ "interval", 'i', 0, G_OPTION_ARG_CALLBACK, interval_cb,
NULL, NULL },
{ "param-key", 'k', 0, G_OPTION_ARG_CALLBACK, param_key_val_cb,
NULL, NULL },
{ "param-val", 'v', 0, G_OPTION_ARG_CALLBACK, param_key_val_cb,
NULL, NULL },
{ "provider", 'P', 0, G_OPTION_ARG_STRING, &options.provider,
NULL, NULL },
{ "rsc-id", 'r', 0, G_OPTION_ARG_STRING, &options.rsc_id,
NULL, NULL },
{ "start-delay", 's', 0, G_OPTION_ARG_INT, &options.start_delay,
NULL, NULL },
{ "timeout", 't', 0, G_OPTION_ARG_INT, &options.timeout,
NULL, NULL },
{ "type", 'T', 0, G_OPTION_ARG_STRING, &options.type,
NULL, NULL },
{ NULL }
};
static GMainLoop *mainloop = NULL;
static lrmd_t *lrmd_conn = NULL;
static char event_buf_v0[1024];
static crm_exit_t
test_exit(crm_exit_t exit_code)
{
lrmd_api_delete(lrmd_conn);
return crm_exit(exit_code);
}
#define print_result(fmt, args...) \
if (!options.quiet) { \
printf(fmt "\n" , ##args); \
}
#define report_event(event) \
snprintf(event_buf_v0, sizeof(event_buf_v0), "NEW_EVENT event_type:%s rsc_id:%s action:%s rc:%s op_status:%s", \
lrmd_event_type2str(event->type), \
event->rsc_id, \
event->op_type ? event->op_type : "none", \
services_ocf_exitcode_str(event->rc), \
pcmk_exec_status_str(event->op_status)); \
crm_info("%s", event_buf_v0);
static void
test_shutdown(int nsig)
{
lrmd_api_delete(lrmd_conn);
lrmd_conn = NULL;
}
static void
read_events(lrmd_event_data_t * event)
{
report_event(event);
if (options.listen) {
if (pcmk__str_eq(options.listen, event_buf_v0, pcmk__str_casei)) {
print_result("LISTEN EVENT SUCCESSFUL");
test_exit(CRM_EX_OK);
}
}
if (exec_call_id && (event->call_id == exec_call_id)) {
if (event->op_status == 0 && event->rc == 0) {
print_result("API-CALL SUCCESSFUL for 'exec'");
} else {
print_result("API-CALL FAILURE for 'exec', rc:%d lrmd_op_status:%s",
event->rc, pcmk_exec_status_str(event->op_status));
test_exit(CRM_EX_ERROR);
}
if (!options.listen) {
test_exit(CRM_EX_OK);
}
}
}
static gboolean
timeout_err(gpointer data)
{
print_result("LISTEN EVENT FAILURE - timeout occurred, never found");
test_exit(CRM_EX_TIMEOUT);
return FALSE;
}
static void
connection_events(lrmd_event_data_t * event)
{
int rc = event->connection_rc;
if (event->type != lrmd_event_connect) {
/* ignore */
return;
}
if (!rc) {
crm_info("Executor client connection established");
start_test(NULL);
return;
} else {
sleep(1);
try_connect();
crm_notice("Executor client connection failed");
}
}
static void
try_connect(void)
{
int tries = 10;
static int num_tries = 0;
int rc = 0;
lrmd_conn->cmds->set_callback(lrmd_conn, connection_events);
for (; num_tries < tries; num_tries++) {
rc = lrmd_conn->cmds->connect_async(lrmd_conn, crm_system_name, 3000);
if (!rc) {
return; /* we'll hear back in async callback */
}
sleep(1);
}
print_result("API CONNECTION FAILURE");
test_exit(CRM_EX_ERROR);
}
static gboolean
start_test(gpointer user_data)
{
int rc = 0;
if (!options.no_connect) {
if (!lrmd_conn->cmds->is_connected(lrmd_conn)) {
try_connect();
/* async connect -- this function will get called back into */
return 0;
}
}
lrmd_conn->cmds->set_callback(lrmd_conn, read_events);
if (options.timeout) {
g_timeout_add(options.timeout, timeout_err, NULL);
}
if (!options.api_call) {
return 0;
}
if (pcmk__str_eq(options.api_call, "exec", pcmk__str_casei)) {
rc = lrmd_conn->cmds->exec(lrmd_conn,
options.rsc_id,
options.action,
NULL,
options.interval_ms,
options.timeout,
options.start_delay,
options.exec_call_opts,
options.params);
if (rc > 0) {
exec_call_id = rc;
print_result("API-CALL 'exec' action pending, waiting on response");
}
} else if (pcmk__str_eq(options.api_call, "register_rsc", pcmk__str_casei)) {
rc = lrmd_conn->cmds->register_rsc(lrmd_conn,
options.rsc_id,
options.class, options.provider, options.type, 0);
} else if (pcmk__str_eq(options.api_call, "get_rsc_info", pcmk__str_casei)) {
lrmd_rsc_info_t *rsc_info;
rsc_info = lrmd_conn->cmds->get_rsc_info(lrmd_conn, options.rsc_id, 0);
if (rsc_info) {
print_result("RSC_INFO: id:%s class:%s provider:%s type:%s",
rsc_info->id, rsc_info->standard,
(rsc_info->provider? rsc_info->provider : "<none>"),
rsc_info->type);
lrmd_free_rsc_info(rsc_info);
rc = pcmk_ok;
} else {
rc = -1;
}
} else if (pcmk__str_eq(options.api_call, "unregister_rsc", pcmk__str_casei)) {
rc = lrmd_conn->cmds->unregister_rsc(lrmd_conn, options.rsc_id, 0);
} else if (pcmk__str_eq(options.api_call, "cancel", pcmk__str_casei)) {
rc = lrmd_conn->cmds->cancel(lrmd_conn, options.rsc_id, options.action,
options.interval_ms);
} else if (pcmk__str_eq(options.api_call, "metadata", pcmk__str_casei)) {
char *output = NULL;
rc = lrmd_conn->cmds->get_metadata(lrmd_conn,
options.class,
options.provider, options.type, &output, 0);
if (rc == pcmk_ok) {
print_result("%s", output);
free(output);
}
} else if (pcmk__str_eq(options.api_call, "list_agents", pcmk__str_casei)) {
lrmd_list_t *list = NULL;
lrmd_list_t *iter = NULL;
rc = lrmd_conn->cmds->list_agents(lrmd_conn, &list, options.class, options.provider);
if (rc > 0) {
print_result("%d agents found", rc);
for (iter = list; iter != NULL; iter = iter->next) {
print_result("%s", iter->val);
}
lrmd_list_freeall(list);
rc = 0;
} else {
print_result("API_CALL FAILURE - no agents found");
rc = -1;
}
} else if (pcmk__str_eq(options.api_call, "list_ocf_providers", pcmk__str_casei)) {
lrmd_list_t *list = NULL;
lrmd_list_t *iter = NULL;
rc = lrmd_conn->cmds->list_ocf_providers(lrmd_conn, options.type, &list);
if (rc > 0) {
print_result("%d providers found", rc);
for (iter = list; iter != NULL; iter = iter->next) {
print_result("%s", iter->val);
}
lrmd_list_freeall(list);
rc = 0;
} else {
print_result("API_CALL FAILURE - no providers found");
rc = -1;
}
} else if (pcmk__str_eq(options.api_call, "list_standards", pcmk__str_casei)) {
lrmd_list_t *list = NULL;
lrmd_list_t *iter = NULL;
rc = lrmd_conn->cmds->list_standards(lrmd_conn, &list);
if (rc > 0) {
print_result("%d standards found", rc);
for (iter = list; iter != NULL; iter = iter->next) {
print_result("%s", iter->val);
}
lrmd_list_freeall(list);
rc = 0;
} else {
print_result("API_CALL FAILURE - no providers found");
rc = -1;
}
} else if (pcmk__str_eq(options.api_call, "get_recurring_ops", pcmk__str_casei)) {
GList *op_list = NULL;
GList *op_item = NULL;
rc = lrmd_conn->cmds->get_recurring_ops(lrmd_conn, options.rsc_id, 0, 0,
&op_list);
for (op_item = op_list; op_item != NULL; op_item = op_item->next) {
lrmd_op_info_t *op_info = op_item->data;
print_result("RECURRING_OP: %s_%s_%s timeout=%sms",
op_info->rsc_id, op_info->action,
op_info->interval_ms_s, op_info->timeout_ms_s);
lrmd_free_op_info(op_info);
}
g_list_free(op_list);
} else if (options.api_call) {
print_result("API-CALL FAILURE unknown action '%s'", options.action);
test_exit(CRM_EX_ERROR);
}
if (rc < 0) {
print_result("API-CALL FAILURE for '%s' api_rc:%d",
options.api_call, rc);
test_exit(CRM_EX_ERROR);
}
if (options.api_call && rc == pcmk_ok) {
print_result("API-CALL SUCCESSFUL for '%s'", options.api_call);
if (!options.listen) {
test_exit(CRM_EX_OK);
}
}
if (options.no_wait) {
/* just make the call and exit regardless of anything else. */
test_exit(CRM_EX_OK);
}
return 0;
}
/*!
* \internal
* \brief Generate resource parameters from CIB if none explicitly given
*
* \return Standard Pacemaker return code
*/
static int
generate_params(void)
{
int rc = pcmk_rc_ok;
pcmk_scheduler_t *scheduler = NULL;
xmlNode *cib_xml_copy = NULL;
pcmk_resource_t *rsc = NULL;
GHashTable *params = NULL;
GHashTable *meta = NULL;
GHashTableIter iter;
char *key = NULL;
char *value = NULL;
if (options.params != NULL) {
return pcmk_rc_ok; // User specified parameters explicitly
}
// Retrieve and update CIB
rc = cib__signon_query(NULL, NULL, &cib_xml_copy);
if (rc != pcmk_rc_ok) {
return rc;
}
if (!cli_config_update(&cib_xml_copy, NULL, FALSE)) {
crm_err("Could not update CIB");
return pcmk_rc_cib_corrupt;
}
// Calculate cluster status
scheduler = pe_new_working_set();
if (scheduler == NULL) {
crm_crit("Could not allocate scheduler data");
return ENOMEM;
}
pe__set_working_set_flags(scheduler,
pcmk_sched_no_counts|pcmk_sched_no_compat);
scheduler->input = cib_xml_copy;
scheduler->now = crm_time_new(NULL);
cluster_status(scheduler);
// Find resource in CIB
rsc = pe_find_resource_with_flags(scheduler->resources, options.rsc_id,
pcmk_rsc_match_history
|pcmk_rsc_match_basename);
if (rsc == NULL) {
crm_err("Resource does not exist in config");
pe_free_working_set(scheduler);
return EINVAL;
}
// Add resource instance parameters to options.params
params = pe_rsc_params(rsc, NULL, scheduler);
if (params != NULL) {
g_hash_table_iter_init(&iter, params);
while (g_hash_table_iter_next(&iter, (gpointer *) &key,
(gpointer *) &value)) {
options.params = lrmd_key_value_add(options.params, key, value);
}
}
// Add resource meta-attributes to options.params
meta = pcmk__strkey_table(free, free);
get_meta_attributes(meta, rsc, NULL, scheduler);
g_hash_table_iter_init(&iter, meta);
while (g_hash_table_iter_next(&iter, (gpointer *) &key,
(gpointer *) &value)) {
char *crm_name = crm_meta_name(key);
options.params = lrmd_key_value_add(options.params, crm_name, value);
free(crm_name);
}
g_hash_table_destroy(meta);
pe_free_working_set(scheduler);
return rc;
}
static GOptionContext *
build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) {
GOptionContext *context = NULL;
context = pcmk__build_arg_context(args, NULL, group, NULL);
pcmk__add_main_args(context, basic_entries);
pcmk__add_arg_group(context, "api-call", "API Call Options:",
"Parameters for api-call option", api_call_entries);
return context;
}
int
main(int argc, char **argv)
{
GError *error = NULL;
crm_exit_t exit_code = CRM_EX_OK;
crm_trigger_t *trig = NULL;
pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY);
/* Typically we'd pass all the single character options that take an argument
* as the second parameter here (and there's a bunch of those in this tool).
* However, we control how this program is called so we can just not call it
* in a way where the preprocessing ever matters.
*/
gchar **processed_args = pcmk__cmdline_preproc(argv, NULL);
GOptionContext *context = build_arg_context(args, NULL);
if (!g_option_context_parse_strv(context, &processed_args, &error)) {
exit_code = CRM_EX_USAGE;
goto done;
}
/* We have to use crm_log_init here to set up the logging because there's
* different handling for daemons vs. command line programs, and
* pcmk__cli_init_logging is set up to only handle the latter.
*/
crm_log_init(NULL, LOG_INFO, TRUE, (args->verbosity? TRUE : FALSE), argc,
argv, FALSE);
for (int i = 0; i < args->verbosity; i++) {
crm_bump_log_level(argc, argv);
}
if (!options.listen && pcmk__strcase_any_of(options.api_call, "metadata", "list_agents",
"list_standards", "list_ocf_providers", NULL)) {
options.no_connect = TRUE;
}
if (options.is_running) {
int rc = pcmk_rc_ok;
if (options.rsc_id == NULL) {
exit_code = CRM_EX_USAGE;
g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
"--is-running requires --rsc-id");
goto done;
}
options.interval_ms = 0;
if (options.timeout == 0) {
options.timeout = 30000;
}
rc = generate_params();
if (rc != pcmk_rc_ok) {
exit_code = pcmk_rc2exitc(rc);
g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
"Can not determine resource status: "
"unable to get parameters from CIB");
goto done;
}
options.api_call = "exec";
options.action = PCMK_ACTION_MONITOR;
options.exec_call_opts = lrmd_opt_notify_orig_only;
}
if (!options.api_call && !options.listen) {
exit_code = CRM_EX_USAGE;
g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
"Must specify at least one of --api-call, --listen, "
"or --is-running");
goto done;
}
if (options.use_tls) {
lrmd_conn = lrmd_remote_api_new(NULL, "localhost", 0);
} else {
lrmd_conn = lrmd_api_new();
}
trig = mainloop_add_trigger(G_PRIORITY_HIGH, start_test, NULL);
mainloop_set_trigger(trig);
mainloop_add_signal(SIGTERM, test_shutdown);
crm_info("Starting");
mainloop = g_main_loop_new(NULL, FALSE);
g_main_loop_run(mainloop);
done:
g_strfreev(processed_args);
pcmk__free_arg_context(context);
free(key);
free(val);
pcmk__output_and_clear_error(&error, NULL);
return test_exit(exit_code);
}
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
index f6a02a080d..ed2c43ccb2 100644
--- a/daemons/fenced/fenced_commands.c
+++ b/daemons/fenced/fenced_commands.c
@@ -1,3683 +1,3683 @@
/*
* Copyright 2009-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/stat.h>
#include <unistd.h>
#include <sys/utsname.h>
#include <stdlib.h>
#include <errno.h>
#include <fcntl.h>
#include <ctype.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/common/ipc.h>
#include <crm/common/ipc_internal.h>
#include <crm/cluster/internal.h>
#include <crm/common/mainloop.h>
#include <crm/stonith-ng.h>
#include <crm/fencing/internal.h>
#include <crm/common/xml.h>
#include <pacemaker-fenced.h>
GHashTable *device_list = NULL;
GHashTable *topology = NULL;
static GList *cmd_list = NULL;
static GHashTable *fenced_handlers = NULL;
struct device_search_s {
/* target of fence action */
char *host;
/* requested fence action */
char *action;
/* timeout to use if a device is queried dynamically for possible targets */
int per_device_timeout;
/* number of registered fencing devices at time of request */
int replies_needed;
/* number of device replies received so far */
int replies_received;
/* whether the target is eligible to perform requested action (or off) */
bool allow_suicide;
/* private data to pass to search callback function */
void *user_data;
/* function to call when all replies have been received */
void (*callback) (GList * devices, void *user_data);
/* devices capable of performing requested action (or off if remapping) */
GList *capable;
/* Whether to perform searches that support the action */
uint32_t support_action_only;
};
static gboolean stonith_device_dispatch(gpointer user_data);
static void st_child_done(int pid, const pcmk__action_result_t *result,
void *user_data);
static void search_devices_record_result(struct device_search_s *search, const char *device,
gboolean can_fence);
static int get_agent_metadata(const char *agent, xmlNode **metadata);
static void read_action_metadata(stonith_device_t *device);
static enum fenced_target_by unpack_level_kind(const xmlNode *level);
typedef struct async_command_s {
int id;
int pid;
int fd_stdout;
int options;
int default_timeout; /* seconds */
int timeout; /* seconds */
int start_delay; // seconds (-1 means disable static/random fencing delays)
int delay_id;
char *op;
char *origin;
char *client;
char *client_name;
char *remote_op_id;
char *target;
uint32_t target_nodeid;
char *action;
char *device;
GList *device_list;
GList *next_device_iter; // device_list entry for next device to execute
void *internal_user_data;
void (*done_cb) (int pid, const pcmk__action_result_t *result,
void *user_data);
guint timer_sigterm;
guint timer_sigkill;
/*! If the operation timed out, this is the last signal
* we sent to the process to get it to terminate */
int last_timeout_signo;
stonith_device_t *active_on;
stonith_device_t *activating_on;
} async_command_t;
static xmlNode *construct_async_reply(const async_command_t *cmd,
const pcmk__action_result_t *result);
static gboolean
is_action_required(const char *action, const stonith_device_t *device)
{
return (device != NULL) && device->automatic_unfencing
&& pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none);
}
static int
get_action_delay_max(const stonith_device_t *device, const char *action)
{
const char *value = NULL;
guint delay_max = 0U;
if (!pcmk__is_fencing_action(action)) {
return 0;
}
value = g_hash_table_lookup(device->params, PCMK_STONITH_DELAY_MAX);
if (value) {
- pcmk__parse_interval_spec(value, &delay_max);
+ pcmk_parse_interval_spec(value, &delay_max);
delay_max /= 1000;
}
return (int) delay_max;
}
static int
get_action_delay_base(const stonith_device_t *device, const char *action,
const char *target)
{
char *hash_value = NULL;
guint delay_base = 0U;
if (!pcmk__is_fencing_action(action)) {
return 0;
}
hash_value = g_hash_table_lookup(device->params, PCMK_STONITH_DELAY_BASE);
if (hash_value) {
char *value = strdup(hash_value);
char *valptr = value;
CRM_ASSERT(value != NULL);
if (target != NULL) {
for (char *val = strtok(value, "; \t"); val != NULL; val = strtok(NULL, "; \t")) {
char *mapval = strchr(val, ':');
if (mapval == NULL || mapval[1] == 0) {
crm_err("pcmk_delay_base: empty value in mapping", val);
continue;
}
if (mapval != val && strncasecmp(target, val, (size_t)(mapval - val)) == 0) {
value = mapval + 1;
crm_debug("pcmk_delay_base mapped to %s for %s",
value, target);
break;
}
}
}
if (strchr(value, ':') == 0) {
- pcmk__parse_interval_spec(value, &delay_base);
+ pcmk_parse_interval_spec(value, &delay_base);
delay_base /= 1000;
}
free(valptr);
}
return (int) delay_base;
}
/*!
* \internal
* \brief Override STONITH timeout with pcmk_*_timeout if available
*
* \param[in] device STONITH device to use
* \param[in] action STONITH action name
* \param[in] default_timeout Timeout to use if device does not have
* a pcmk_*_timeout parameter for action
*
* \return Value of pcmk_(action)_timeout if available, otherwise default_timeout
* \note For consistency, it would be nice if reboot/off/on timeouts could be
* set the same way as start/stop/monitor timeouts, i.e. with an
* <operation> entry in the fencing resource configuration. However that
* is insufficient because fencing devices may be registered directly via
* the fencer's register_device() API instead of going through the CIB
* (e.g. stonith_admin uses it for its -R option, and the executor uses it
* to ensure a device is registered when a command is issued). As device
* properties, pcmk_*_timeout parameters can be grabbed by the fencer when
* the device is registered, whether by CIB change or API call.
*/
static int
get_action_timeout(const stonith_device_t *device, const char *action,
int default_timeout)
{
if (action && device && device->params) {
char buffer[64] = { 0, };
const char *value = NULL;
/* If "reboot" was requested but the device does not support it,
* we will remap to "off", so check timeout for "off" instead
*/
if (pcmk__str_eq(action, PCMK_ACTION_REBOOT, pcmk__str_none)
&& !pcmk_is_set(device->flags, st_device_supports_reboot)) {
crm_trace("%s doesn't support reboot, using timeout for off instead",
device->id);
action = PCMK_ACTION_OFF;
}
/* If the device config specified an action-specific timeout, use it */
snprintf(buffer, sizeof(buffer), "pcmk_%s_timeout", action);
value = g_hash_table_lookup(device->params, buffer);
if (value) {
return atoi(value);
}
}
return default_timeout;
}
/*!
* \internal
* \brief Get the currently executing device for a fencing operation
*
* \param[in] cmd Fencing operation to check
*
* \return Currently executing device for \p cmd if any, otherwise NULL
*/
static stonith_device_t *
cmd_device(const async_command_t *cmd)
{
if ((cmd == NULL) || (cmd->device == NULL) || (device_list == NULL)) {
return NULL;
}
return g_hash_table_lookup(device_list, cmd->device);
}
/*!
* \internal
* \brief Return the configured reboot action for a given device
*
* \param[in] device_id Device ID
*
* \return Configured reboot action for \p device_id
*/
const char *
fenced_device_reboot_action(const char *device_id)
{
const char *action = NULL;
if ((device_list != NULL) && (device_id != NULL)) {
stonith_device_t *device = g_hash_table_lookup(device_list, device_id);
if ((device != NULL) && (device->params != NULL)) {
action = g_hash_table_lookup(device->params, "pcmk_reboot_action");
}
}
return pcmk__s(action, PCMK_ACTION_REBOOT);
}
/*!
* \internal
* \brief Check whether a given device supports the "on" action
*
* \param[in] device_id Device ID
*
* \return true if \p device_id supports "on", otherwise false
*/
bool
fenced_device_supports_on(const char *device_id)
{
if ((device_list != NULL) && (device_id != NULL)) {
stonith_device_t *device = g_hash_table_lookup(device_list, device_id);
if (device != NULL) {
return pcmk_is_set(device->flags, st_device_supports_on);
}
}
return false;
}
static void
free_async_command(async_command_t * cmd)
{
if (!cmd) {
return;
}
if (cmd->delay_id) {
g_source_remove(cmd->delay_id);
}
cmd_list = g_list_remove(cmd_list, cmd);
g_list_free_full(cmd->device_list, free);
free(cmd->device);
free(cmd->action);
free(cmd->target);
free(cmd->remote_op_id);
free(cmd->client);
free(cmd->client_name);
free(cmd->origin);
free(cmd->op);
free(cmd);
}
/*!
* \internal
* \brief Create a new asynchronous fencing operation from request XML
*
* \param[in] msg Fencing request XML (from IPC or CPG)
*
* \return Newly allocated fencing operation on success, otherwise NULL
*
* \note This asserts on memory errors, so a NULL return indicates an
* unparseable message.
*/
static async_command_t *
create_async_command(xmlNode *msg)
{
xmlNode *op = NULL;
async_command_t *cmd = NULL;
if (msg == NULL) {
return NULL;
}
op = get_xpath_object("//@" F_STONITH_ACTION, msg, LOG_ERR);
if (op == NULL) {
return NULL;
}
cmd = calloc(1, sizeof(async_command_t));
CRM_ASSERT(cmd != NULL);
// All messages must include these
cmd->action = crm_element_value_copy(op, F_STONITH_ACTION);
cmd->op = crm_element_value_copy(msg, F_STONITH_OPERATION);
cmd->client = crm_element_value_copy(msg, F_STONITH_CLIENTID);
if ((cmd->action == NULL) || (cmd->op == NULL) || (cmd->client == NULL)) {
free_async_command(cmd);
return NULL;
}
crm_element_value_int(msg, F_STONITH_CALLID, &(cmd->id));
crm_element_value_int(msg, F_STONITH_CALLOPTS, &(cmd->options));
crm_element_value_int(msg, F_STONITH_DELAY, &(cmd->start_delay));
crm_element_value_int(msg, F_STONITH_TIMEOUT, &(cmd->default_timeout));
cmd->timeout = cmd->default_timeout;
cmd->origin = crm_element_value_copy(msg, F_ORIG);
cmd->remote_op_id = crm_element_value_copy(msg, F_STONITH_REMOTE_OP_ID);
cmd->client_name = crm_element_value_copy(msg, F_STONITH_CLIENTNAME);
cmd->target = crm_element_value_copy(op, F_STONITH_TARGET);
cmd->device = crm_element_value_copy(op, F_STONITH_DEVICE);
cmd->done_cb = st_child_done;
// Track in global command list
cmd_list = g_list_append(cmd_list, cmd);
return cmd;
}
static int
get_action_limit(stonith_device_t * device)
{
const char *value = NULL;
int action_limit = 1;
value = g_hash_table_lookup(device->params, PCMK_STONITH_ACTION_LIMIT);
if ((value == NULL)
|| (pcmk__scan_min_int(value, &action_limit, INT_MIN) != pcmk_rc_ok)
|| (action_limit == 0)) {
action_limit = 1;
}
return action_limit;
}
static int
get_active_cmds(stonith_device_t * device)
{
int counter = 0;
GList *gIter = NULL;
GList *gIterNext = NULL;
CRM_CHECK(device != NULL, return 0);
for (gIter = cmd_list; gIter != NULL; gIter = gIterNext) {
async_command_t *cmd = gIter->data;
gIterNext = gIter->next;
if (cmd->active_on == device) {
counter++;
}
}
return counter;
}
static void
fork_cb(int pid, void *user_data)
{
async_command_t *cmd = (async_command_t *) user_data;
stonith_device_t * device =
/* in case of a retry we've done the move from
activating_on to active_on already
*/
cmd->activating_on?cmd->activating_on:cmd->active_on;
CRM_ASSERT(device);
crm_debug("Operation '%s' [%d]%s%s using %s now running with %ds timeout",
cmd->action, pid,
((cmd->target == NULL)? "" : " targeting "),
pcmk__s(cmd->target, ""), device->id, cmd->timeout);
cmd->active_on = device;
cmd->activating_on = NULL;
}
static int
get_agent_metadata_cb(gpointer data) {
stonith_device_t *device = data;
guint period_ms;
switch (get_agent_metadata(device->agent, &device->agent_metadata)) {
case pcmk_rc_ok:
if (device->agent_metadata) {
read_action_metadata(device);
stonith__device_parameter_flags(&(device->flags), device->id,
device->agent_metadata);
}
return G_SOURCE_REMOVE;
case EAGAIN:
period_ms = pcmk__mainloop_timer_get_period(device->timer);
if (period_ms < 160 * 1000) {
mainloop_timer_set_period(device->timer, 2 * period_ms);
}
return G_SOURCE_CONTINUE;
default:
return G_SOURCE_REMOVE;
}
}
/*!
* \internal
* \brief Call a command's action callback for an internal (not library) result
*
* \param[in,out] cmd Command to report result for
* \param[in] execution_status Execution status to use for result
* \param[in] exit_status Exit status to use for result
* \param[in] exit_reason Exit reason to use for result
*/
static void
report_internal_result(async_command_t *cmd, int exit_status,
int execution_status, const char *exit_reason)
{
pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
pcmk__set_result(&result, exit_status, execution_status, exit_reason);
cmd->done_cb(0, &result, cmd);
pcmk__reset_result(&result);
}
static gboolean
stonith_device_execute(stonith_device_t * device)
{
int exec_rc = 0;
const char *action_str = NULL;
const char *host_arg = NULL;
async_command_t *cmd = NULL;
stonith_action_t *action = NULL;
int active_cmds = 0;
int action_limit = 0;
GList *gIter = NULL;
GList *gIterNext = NULL;
CRM_CHECK(device != NULL, return FALSE);
active_cmds = get_active_cmds(device);
action_limit = get_action_limit(device);
if (action_limit > -1 && active_cmds >= action_limit) {
crm_trace("%s is over its action limit of %d (%u active action%s)",
device->id, action_limit, active_cmds,
pcmk__plural_s(active_cmds));
return TRUE;
}
for (gIter = device->pending_ops; gIter != NULL; gIter = gIterNext) {
async_command_t *pending_op = gIter->data;
gIterNext = gIter->next;
if (pending_op && pending_op->delay_id) {
crm_trace("Operation '%s'%s%s using %s was asked to run too early, "
"waiting for start delay of %ds",
pending_op->action,
((pending_op->target == NULL)? "" : " targeting "),
pcmk__s(pending_op->target, ""),
device->id, pending_op->start_delay);
continue;
}
device->pending_ops = g_list_remove_link(device->pending_ops, gIter);
g_list_free_1(gIter);
cmd = pending_op;
break;
}
if (cmd == NULL) {
crm_trace("No actions using %s are needed", device->id);
return TRUE;
}
if (pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT,
STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) {
if (pcmk__is_fencing_action(cmd->action)) {
if (node_does_watchdog_fencing(stonith_our_uname)) {
pcmk__panic(__func__);
goto done;
}
} else {
crm_info("Faking success for %s watchdog operation", cmd->action);
report_internal_result(cmd, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
goto done;
}
}
#if SUPPORT_CIBSECRETS
exec_rc = pcmk__substitute_secrets(device->id, device->params);
if (exec_rc != pcmk_rc_ok) {
if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP, pcmk__str_none)) {
crm_info("Proceeding with stop operation for %s "
"despite being unable to load CIB secrets (%s)",
device->id, pcmk_rc_str(exec_rc));
} else {
crm_err("Considering %s unconfigured "
"because unable to load CIB secrets: %s",
device->id, pcmk_rc_str(exec_rc));
report_internal_result(cmd, CRM_EX_ERROR, PCMK_EXEC_NO_SECRETS,
"Failed to get CIB secrets");
goto done;
}
}
#endif
action_str = cmd->action;
if (pcmk__str_eq(cmd->action, PCMK_ACTION_REBOOT, pcmk__str_none)
&& !pcmk_is_set(device->flags, st_device_supports_reboot)) {
crm_notice("Remapping 'reboot' action%s%s using %s to 'off' "
"because agent '%s' does not support reboot",
((cmd->target == NULL)? "" : " targeting "),
pcmk__s(cmd->target, ""), device->id, device->agent);
action_str = PCMK_ACTION_OFF;
}
if (pcmk_is_set(device->flags, st_device_supports_parameter_port)) {
host_arg = "port";
} else if (pcmk_is_set(device->flags, st_device_supports_parameter_plug)) {
host_arg = "plug";
}
action = stonith__action_create(device->agent, action_str, cmd->target,
cmd->target_nodeid, cmd->timeout,
device->params, device->aliases, host_arg);
/* for async exec, exec_rc is negative for early error exit
otherwise handling of success/errors is done via callbacks */
cmd->activating_on = device;
exec_rc = stonith__execute_async(action, (void *)cmd, cmd->done_cb,
fork_cb);
if (exec_rc < 0) {
cmd->activating_on = NULL;
cmd->done_cb(0, stonith__action_result(action), cmd);
stonith__destroy_action(action);
}
done:
/* Device might get triggered to work by multiple fencing commands
* simultaneously. Trigger the device again to make sure any
* remaining concurrent commands get executed. */
if (device->pending_ops) {
mainloop_set_trigger(device->work);
}
return TRUE;
}
static gboolean
stonith_device_dispatch(gpointer user_data)
{
return stonith_device_execute(user_data);
}
static gboolean
start_delay_helper(gpointer data)
{
async_command_t *cmd = data;
stonith_device_t *device = cmd_device(cmd);
cmd->delay_id = 0;
if (device) {
mainloop_set_trigger(device->work);
}
return FALSE;
}
static void
schedule_stonith_command(async_command_t * cmd, stonith_device_t * device)
{
int delay_max = 0;
int delay_base = 0;
int requested_delay = cmd->start_delay;
CRM_CHECK(cmd != NULL, return);
CRM_CHECK(device != NULL, return);
if (cmd->device) {
free(cmd->device);
}
if (device->include_nodeid && (cmd->target != NULL)) {
crm_node_t *node = crm_get_peer(0, cmd->target);
cmd->target_nodeid = node->id;
}
cmd->device = strdup(device->id);
cmd->timeout = get_action_timeout(device, cmd->action, cmd->default_timeout);
if (cmd->remote_op_id) {
crm_debug("Scheduling '%s' action%s%s using %s for remote peer %s "
"with op id %.8s and timeout %ds",
cmd->action,
(cmd->target == NULL)? "" : " targeting ",
pcmk__s(cmd->target, ""),
device->id, cmd->origin, cmd->remote_op_id, cmd->timeout);
} else {
crm_debug("Scheduling '%s' action%s%s using %s for %s with timeout %ds",
cmd->action,
(cmd->target == NULL)? "" : " targeting ",
pcmk__s(cmd->target, ""),
device->id, cmd->client, cmd->timeout);
}
device->pending_ops = g_list_append(device->pending_ops, cmd);
mainloop_set_trigger(device->work);
// Value -1 means disable any static/random fencing delays
if (requested_delay < 0) {
return;
}
delay_max = get_action_delay_max(device, cmd->action);
delay_base = get_action_delay_base(device, cmd->action, cmd->target);
if (delay_max == 0) {
delay_max = delay_base;
}
if (delay_max < delay_base) {
crm_warn(PCMK_STONITH_DELAY_BASE " (%ds) is larger than "
PCMK_STONITH_DELAY_MAX " (%ds) for %s using %s "
"(limiting to maximum delay)",
delay_base, delay_max, cmd->action, device->id);
delay_base = delay_max;
}
if (delay_max > 0) {
// coverity[dontcall] It doesn't matter here if rand() is predictable
cmd->start_delay +=
((delay_max != delay_base)?(rand() % (delay_max - delay_base)):0)
+ delay_base;
}
if (cmd->start_delay > 0) {
crm_notice("Delaying '%s' action%s%s using %s for %ds " CRM_XS
" timeout=%ds requested_delay=%ds base=%ds max=%ds",
cmd->action,
(cmd->target == NULL)? "" : " targeting ",
pcmk__s(cmd->target, ""),
device->id, cmd->start_delay, cmd->timeout,
requested_delay, delay_base, delay_max);
cmd->delay_id =
g_timeout_add_seconds(cmd->start_delay, start_delay_helper, cmd);
}
}
static void
free_device(gpointer data)
{
GList *gIter = NULL;
stonith_device_t *device = data;
g_hash_table_destroy(device->params);
g_hash_table_destroy(device->aliases);
for (gIter = device->pending_ops; gIter != NULL; gIter = gIter->next) {
async_command_t *cmd = gIter->data;
crm_warn("Removal of device '%s' purged operation '%s'", device->id, cmd->action);
report_internal_result(cmd, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
"Device was removed before action could be executed");
}
g_list_free(device->pending_ops);
g_list_free_full(device->targets, free);
if (device->timer) {
mainloop_timer_stop(device->timer);
mainloop_timer_del(device->timer);
}
mainloop_destroy_trigger(device->work);
free_xml(device->agent_metadata);
free(device->namespace);
if (device->on_target_actions != NULL) {
g_string_free(device->on_target_actions, TRUE);
}
free(device->agent);
free(device->id);
free(device);
}
void free_device_list(void)
{
if (device_list != NULL) {
g_hash_table_destroy(device_list);
device_list = NULL;
}
}
void
init_device_list(void)
{
if (device_list == NULL) {
device_list = pcmk__strkey_table(NULL, free_device);
}
}
static GHashTable *
build_port_aliases(const char *hostmap, GList ** targets)
{
char *name = NULL;
int last = 0, lpc = 0, max = 0, added = 0;
GHashTable *aliases = pcmk__strikey_table(free, free);
if (hostmap == NULL) {
return aliases;
}
max = strlen(hostmap);
for (; lpc <= max; lpc++) {
switch (hostmap[lpc]) {
/* Skip escaped chars */
case '\\':
lpc++;
break;
/* Assignment chars */
case '=':
case ':':
if (lpc > last) {
free(name);
name = calloc(1, 1 + lpc - last);
memcpy(name, hostmap + last, lpc - last);
}
last = lpc + 1;
break;
/* Delimeter chars */
/* case ',': Potentially used to specify multiple ports */
case 0:
case ';':
case ' ':
case '\t':
if (name) {
char *value = NULL;
int k = 0;
value = calloc(1, 1 + lpc - last);
memcpy(value, hostmap + last, lpc - last);
for (int i = 0; value[i] != '\0'; i++) {
if (value[i] != '\\') {
value[k++] = value[i];
}
}
value[k] = '\0';
crm_debug("Adding alias '%s'='%s'", name, value);
g_hash_table_replace(aliases, name, value);
if (targets) {
*targets = g_list_append(*targets, strdup(value));
}
value = NULL;
name = NULL;
added++;
} else if (lpc > last) {
crm_debug("Parse error at offset %d near '%s'", lpc - last, hostmap + last);
}
last = lpc + 1;
break;
}
if (hostmap[lpc] == 0) {
break;
}
}
if (added == 0) {
crm_info("No host mappings detected in '%s'", hostmap);
}
free(name);
return aliases;
}
GHashTable *metadata_cache = NULL;
void
free_metadata_cache(void) {
if (metadata_cache != NULL) {
g_hash_table_destroy(metadata_cache);
metadata_cache = NULL;
}
}
static void
init_metadata_cache(void) {
if (metadata_cache == NULL) {
metadata_cache = pcmk__strkey_table(free, free);
}
}
int
get_agent_metadata(const char *agent, xmlNode ** metadata)
{
char *buffer = NULL;
if (metadata == NULL) {
return EINVAL;
}
*metadata = NULL;
if (pcmk__str_eq(agent, STONITH_WATCHDOG_AGENT_INTERNAL, pcmk__str_none)) {
return pcmk_rc_ok;
}
init_metadata_cache();
buffer = g_hash_table_lookup(metadata_cache, agent);
if (buffer == NULL) {
stonith_t *st = stonith_api_new();
int rc;
if (st == NULL) {
crm_warn("Could not get agent meta-data: "
"API memory allocation failed");
return EAGAIN;
}
rc = st->cmds->metadata(st, st_opt_sync_call, agent,
NULL, &buffer, 10);
stonith_api_delete(st);
if (rc || !buffer) {
crm_err("Could not retrieve metadata for fencing agent %s", agent);
return EAGAIN;
}
g_hash_table_replace(metadata_cache, strdup(agent), buffer);
}
*metadata = string2xml(buffer);
return pcmk_rc_ok;
}
static gboolean
is_nodeid_required(xmlNode * xml)
{
xmlXPathObjectPtr xpath = NULL;
if (stand_alone) {
return FALSE;
}
if (!xml) {
return FALSE;
}
xpath = xpath_search(xml, "//parameter[@name='nodeid']");
if (numXpathResults(xpath) <= 0) {
freeXpathObject(xpath);
return FALSE;
}
freeXpathObject(xpath);
return TRUE;
}
static void
read_action_metadata(stonith_device_t *device)
{
xmlXPathObjectPtr xpath = NULL;
int max = 0;
int lpc = 0;
if (device->agent_metadata == NULL) {
return;
}
xpath = xpath_search(device->agent_metadata, "//action");
max = numXpathResults(xpath);
if (max <= 0) {
freeXpathObject(xpath);
return;
}
for (lpc = 0; lpc < max; lpc++) {
const char *action = NULL;
xmlNode *match = getXpathResult(xpath, lpc);
CRM_LOG_ASSERT(match != NULL);
if(match == NULL) { continue; };
action = crm_element_value(match, "name");
if (pcmk__str_eq(action, PCMK_ACTION_LIST, pcmk__str_none)) {
stonith__set_device_flags(device->flags, device->id,
st_device_supports_list);
} else if (pcmk__str_eq(action, PCMK_ACTION_STATUS, pcmk__str_none)) {
stonith__set_device_flags(device->flags, device->id,
st_device_supports_status);
} else if (pcmk__str_eq(action, PCMK_ACTION_REBOOT, pcmk__str_none)) {
stonith__set_device_flags(device->flags, device->id,
st_device_supports_reboot);
} else if (pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none)) {
/* "automatic" means the cluster will unfence node when it joins */
/* "required" is a deprecated synonym for "automatic" */
if (pcmk__xe_attr_is_true(match, "automatic") || pcmk__xe_attr_is_true(match, "required")) {
device->automatic_unfencing = TRUE;
}
stonith__set_device_flags(device->flags, device->id,
st_device_supports_on);
}
if ((action != NULL) && pcmk__xe_attr_is_true(match, "on_target")) {
pcmk__add_word(&(device->on_target_actions), 64, action);
}
}
freeXpathObject(xpath);
}
/*!
* \internal
* \brief Set a pcmk_*_action parameter if not already set
*
* \param[in,out] params Device parameters
* \param[in] action Name of action
* \param[in] value Value to use if action is not already set
*/
static void
map_action(GHashTable *params, const char *action, const char *value)
{
char *key = crm_strdup_printf("pcmk_%s_action", action);
if (g_hash_table_lookup(params, key)) {
crm_warn("Ignoring %s='%s', see %s instead",
STONITH_ATTR_ACTION_OP, value, key);
free(key);
} else {
crm_warn("Mapping %s='%s' to %s='%s'",
STONITH_ATTR_ACTION_OP, value, key, value);
g_hash_table_insert(params, key, strdup(value));
}
}
/*!
* \internal
* \brief Create device parameter table from XML
*
* \param[in] name Device name (used for logging only)
* \param[in] dev XML containing device parameters
*/
static GHashTable *
xml2device_params(const char *name, const xmlNode *dev)
{
GHashTable *params = xml2list(dev);
const char *value;
/* Action should never be specified in the device configuration,
* but we support it for users who are familiar with other software
* that worked that way.
*/
value = g_hash_table_lookup(params, STONITH_ATTR_ACTION_OP);
if (value != NULL) {
crm_warn("%s has '%s' parameter, which should never be specified in configuration",
name, STONITH_ATTR_ACTION_OP);
if (*value == '\0') {
crm_warn("Ignoring empty '%s' parameter", STONITH_ATTR_ACTION_OP);
} else if (strcmp(value, PCMK_ACTION_REBOOT) == 0) {
crm_warn("Ignoring %s='reboot' (see " PCMK_OPT_STONITH_ACTION
" cluster property instead)",
STONITH_ATTR_ACTION_OP);
} else if (strcmp(value, PCMK_ACTION_OFF) == 0) {
map_action(params, PCMK_ACTION_REBOOT, value);
} else {
map_action(params, PCMK_ACTION_OFF, value);
map_action(params, PCMK_ACTION_REBOOT, value);
}
g_hash_table_remove(params, STONITH_ATTR_ACTION_OP);
}
return params;
}
static const char *
target_list_type(stonith_device_t * dev)
{
const char *check_type = NULL;
check_type = g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_CHECK);
if (check_type == NULL) {
if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_LIST)) {
check_type = "static-list";
} else if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_MAP)) {
check_type = "static-list";
} else if (pcmk_is_set(dev->flags, st_device_supports_list)) {
check_type = "dynamic-list";
} else if (pcmk_is_set(dev->flags, st_device_supports_status)) {
check_type = "status";
} else {
check_type = PCMK__VALUE_NONE;
}
}
return check_type;
}
static stonith_device_t *
build_device_from_xml(xmlNode *dev)
{
const char *value;
stonith_device_t *device = NULL;
char *agent = crm_element_value_copy(dev, "agent");
CRM_CHECK(agent != NULL, return device);
device = calloc(1, sizeof(stonith_device_t));
CRM_CHECK(device != NULL, {free(agent); return device;});
device->id = crm_element_value_copy(dev, XML_ATTR_ID);
device->agent = agent;
device->namespace = crm_element_value_copy(dev, "namespace");
device->params = xml2device_params(device->id, dev);
value = g_hash_table_lookup(device->params, PCMK_STONITH_HOST_LIST);
if (value) {
device->targets = stonith__parse_targets(value);
}
value = g_hash_table_lookup(device->params, PCMK_STONITH_HOST_MAP);
device->aliases = build_port_aliases(value, &(device->targets));
value = target_list_type(device);
if (!pcmk__str_eq(value, "static-list", pcmk__str_casei) && device->targets) {
/* Other than "static-list", dev-> targets is unnecessary. */
g_list_free_full(device->targets, free);
device->targets = NULL;
}
switch (get_agent_metadata(device->agent, &device->agent_metadata)) {
case pcmk_rc_ok:
if (device->agent_metadata) {
read_action_metadata(device);
stonith__device_parameter_flags(&(device->flags), device->id,
device->agent_metadata);
}
break;
case EAGAIN:
if (device->timer == NULL) {
device->timer = mainloop_timer_add("get_agent_metadata", 10 * 1000,
TRUE, get_agent_metadata_cb, device);
}
if (!mainloop_timer_running(device->timer)) {
mainloop_timer_start(device->timer);
}
break;
default:
break;
}
value = g_hash_table_lookup(device->params, "nodeid");
if (!value) {
device->include_nodeid = is_nodeid_required(device->agent_metadata);
}
value = crm_element_value(dev, "rsc_provides");
if (pcmk__str_eq(value, PCMK__VALUE_UNFENCING, pcmk__str_casei)) {
device->automatic_unfencing = TRUE;
}
if (is_action_required(PCMK_ACTION_ON, device)) {
crm_info("Fencing device '%s' requires unfencing", device->id);
}
if (device->on_target_actions != NULL) {
crm_info("Fencing device '%s' requires actions (%s) to be executed "
"on target", device->id,
(const char *) device->on_target_actions->str);
}
device->work = mainloop_add_trigger(G_PRIORITY_HIGH, stonith_device_dispatch, device);
/* TODO: Hook up priority */
return device;
}
static void
schedule_internal_command(const char *origin,
stonith_device_t * device,
const char *action,
const char *target,
int timeout,
void *internal_user_data,
void (*done_cb) (int pid,
const pcmk__action_result_t *result,
void *user_data))
{
async_command_t *cmd = NULL;
cmd = calloc(1, sizeof(async_command_t));
cmd->id = -1;
cmd->default_timeout = timeout ? timeout : 60;
cmd->timeout = cmd->default_timeout;
cmd->action = strdup(action);
pcmk__str_update(&cmd->target, target);
cmd->device = strdup(device->id);
cmd->origin = strdup(origin);
cmd->client = strdup(crm_system_name);
cmd->client_name = strdup(crm_system_name);
cmd->internal_user_data = internal_user_data;
cmd->done_cb = done_cb; /* cmd, not internal_user_data, is passed to 'done_cb' as the userdata */
schedule_stonith_command(cmd, device);
}
// Fence agent status commands use custom exit status codes
enum fence_status_code {
fence_status_invalid = -1,
fence_status_active = 0,
fence_status_unknown = 1,
fence_status_inactive = 2,
};
static void
status_search_cb(int pid, const pcmk__action_result_t *result, void *user_data)
{
async_command_t *cmd = user_data;
struct device_search_s *search = cmd->internal_user_data;
stonith_device_t *dev = cmd_device(cmd);
gboolean can = FALSE;
free_async_command(cmd);
if (!dev) {
search_devices_record_result(search, NULL, FALSE);
return;
}
mainloop_set_trigger(dev->work);
if (result->execution_status != PCMK_EXEC_DONE) {
crm_warn("Assuming %s cannot fence %s "
"because status could not be executed: %s%s%s%s",
dev->id, search->host,
pcmk_exec_status_str(result->execution_status),
((result->exit_reason == NULL)? "" : " ("),
((result->exit_reason == NULL)? "" : result->exit_reason),
((result->exit_reason == NULL)? "" : ")"));
search_devices_record_result(search, dev->id, FALSE);
return;
}
switch (result->exit_status) {
case fence_status_unknown:
crm_trace("%s reported it cannot fence %s", dev->id, search->host);
break;
case fence_status_active:
case fence_status_inactive:
crm_trace("%s reported it can fence %s", dev->id, search->host);
can = TRUE;
break;
default:
crm_warn("Assuming %s cannot fence %s "
"(status returned unknown code %d)",
dev->id, search->host, result->exit_status);
break;
}
search_devices_record_result(search, dev->id, can);
}
static void
dynamic_list_search_cb(int pid, const pcmk__action_result_t *result,
void *user_data)
{
async_command_t *cmd = user_data;
struct device_search_s *search = cmd->internal_user_data;
stonith_device_t *dev = cmd_device(cmd);
gboolean can_fence = FALSE;
free_async_command(cmd);
/* Host/alias must be in the list output to be eligible to be fenced
*
* Will cause problems if down'd nodes aren't listed or (for virtual nodes)
* if the guest is still listed despite being moved to another machine
*/
if (!dev) {
search_devices_record_result(search, NULL, FALSE);
return;
}
mainloop_set_trigger(dev->work);
if (pcmk__result_ok(result)) {
crm_info("Refreshing target list for %s", dev->id);
g_list_free_full(dev->targets, free);
dev->targets = stonith__parse_targets(result->action_stdout);
dev->targets_age = time(NULL);
} else if (dev->targets != NULL) {
if (result->execution_status == PCMK_EXEC_DONE) {
crm_info("Reusing most recent target list for %s "
"because list returned error code %d",
dev->id, result->exit_status);
} else {
crm_info("Reusing most recent target list for %s "
"because list could not be executed: %s%s%s%s",
dev->id, pcmk_exec_status_str(result->execution_status),
((result->exit_reason == NULL)? "" : " ("),
((result->exit_reason == NULL)? "" : result->exit_reason),
((result->exit_reason == NULL)? "" : ")"));
}
} else { // We have never successfully executed list
if (result->execution_status == PCMK_EXEC_DONE) {
crm_warn("Assuming %s cannot fence %s "
"because list returned error code %d",
dev->id, search->host, result->exit_status);
} else {
crm_warn("Assuming %s cannot fence %s "
"because list could not be executed: %s%s%s%s",
dev->id, search->host,
pcmk_exec_status_str(result->execution_status),
((result->exit_reason == NULL)? "" : " ("),
((result->exit_reason == NULL)? "" : result->exit_reason),
((result->exit_reason == NULL)? "" : ")"));
}
/* Fall back to pcmk_host_check="status" if the user didn't explicitly
* specify "dynamic-list".
*/
if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_CHECK) == NULL) {
crm_notice("Switching to pcmk_host_check='status' for %s", dev->id);
g_hash_table_replace(dev->params, strdup(PCMK_STONITH_HOST_CHECK),
strdup("status"));
}
}
if (dev->targets) {
const char *alias = g_hash_table_lookup(dev->aliases, search->host);
if (!alias) {
alias = search->host;
}
if (pcmk__str_in_list(alias, dev->targets, pcmk__str_casei)) {
can_fence = TRUE;
}
}
search_devices_record_result(search, dev->id, can_fence);
}
/*!
* \internal
* \brief Returns true if any key in first is not in second or second has a different value for key
*/
static int
device_params_diff(GHashTable *first, GHashTable *second) {
char *key = NULL;
char *value = NULL;
GHashTableIter gIter;
g_hash_table_iter_init(&gIter, first);
while (g_hash_table_iter_next(&gIter, (void **)&key, (void **)&value)) {
if(strstr(key, "CRM_meta") == key) {
continue;
} else if(strcmp(key, "crm_feature_set") == 0) {
continue;
} else {
char *other_value = g_hash_table_lookup(second, key);
if (!other_value || !pcmk__str_eq(other_value, value, pcmk__str_casei)) {
crm_trace("Different value for %s: %s != %s", key, other_value, value);
return 1;
}
}
}
return 0;
}
/*!
* \internal
* \brief Checks to see if an identical device already exists in the device_list
*/
static stonith_device_t *
device_has_duplicate(const stonith_device_t *device)
{
stonith_device_t *dup = g_hash_table_lookup(device_list, device->id);
if (!dup) {
crm_trace("No match for %s", device->id);
return NULL;
} else if (!pcmk__str_eq(dup->agent, device->agent, pcmk__str_casei)) {
crm_trace("Different agent: %s != %s", dup->agent, device->agent);
return NULL;
}
/* Use calculate_operation_digest() here? */
if (device_params_diff(device->params, dup->params) ||
device_params_diff(dup->params, device->params)) {
return NULL;
}
crm_trace("Match");
return dup;
}
int
stonith_device_register(xmlNode *dev, gboolean from_cib)
{
stonith_device_t *dup = NULL;
stonith_device_t *device = build_device_from_xml(dev);
guint ndevices = 0;
int rv = pcmk_ok;
CRM_CHECK(device != NULL, return -ENOMEM);
/* do we have a watchdog-device? */
if (pcmk__str_eq(device->id, STONITH_WATCHDOG_ID, pcmk__str_none) ||
pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT,
STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) do {
if (stonith_watchdog_timeout_ms <= 0) {
crm_err("Ignoring watchdog fence device without "
PCMK_OPT_STONITH_WATCHDOG_TIMEOUT " set.");
rv = -ENODEV;
/* fall through to cleanup & return */
} else if (!pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT,
STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) {
crm_err("Ignoring watchdog fence device with unknown "
"agent '%s' unequal '" STONITH_WATCHDOG_AGENT "'.",
device->agent?device->agent:"");
rv = -ENODEV;
/* fall through to cleanup & return */
} else if (!pcmk__str_eq(device->id, STONITH_WATCHDOG_ID,
pcmk__str_none)) {
crm_err("Ignoring watchdog fence device "
"named %s !='"STONITH_WATCHDOG_ID"'.",
device->id?device->id:"");
rv = -ENODEV;
/* fall through to cleanup & return */
} else {
if (pcmk__str_eq(device->agent, STONITH_WATCHDOG_AGENT,
pcmk__str_none)) {
/* this either has an empty list or the targets
configured for watchdog-fencing
*/
g_list_free_full(stonith_watchdog_targets, free);
stonith_watchdog_targets = device->targets;
device->targets = NULL;
}
if (node_does_watchdog_fencing(stonith_our_uname)) {
g_list_free_full(device->targets, free);
device->targets = stonith__parse_targets(stonith_our_uname);
g_hash_table_replace(device->params,
strdup(PCMK_STONITH_HOST_LIST),
strdup(stonith_our_uname));
/* proceed as with any other stonith-device */
break;
}
crm_debug("Skip registration of watchdog fence device on node not in host-list.");
/* cleanup and fall through to more cleanup and return */
device->targets = NULL;
stonith_device_remove(device->id, from_cib);
}
free_device(device);
return rv;
} while (0);
dup = device_has_duplicate(device);
if (dup) {
ndevices = g_hash_table_size(device_list);
crm_debug("Device '%s' already in device list (%d active device%s)",
device->id, ndevices, pcmk__plural_s(ndevices));
free_device(device);
device = dup;
dup = g_hash_table_lookup(device_list, device->id);
dup->dirty = FALSE;
} else {
stonith_device_t *old = g_hash_table_lookup(device_list, device->id);
if (from_cib && old && old->api_registered) {
/* If the cib is writing over an entry that is shared with a stonith client,
* copy any pending ops that currently exist on the old entry to the new one.
* Otherwise the pending ops will be reported as failures
*/
crm_info("Overwriting existing entry for %s from CIB", device->id);
device->pending_ops = old->pending_ops;
device->api_registered = TRUE;
old->pending_ops = NULL;
if (device->pending_ops) {
mainloop_set_trigger(device->work);
}
}
g_hash_table_replace(device_list, device->id, device);
ndevices = g_hash_table_size(device_list);
crm_notice("Added '%s' to device list (%d active device%s)",
device->id, ndevices, pcmk__plural_s(ndevices));
}
if (from_cib) {
device->cib_registered = TRUE;
} else {
device->api_registered = TRUE;
}
return pcmk_ok;
}
void
stonith_device_remove(const char *id, bool from_cib)
{
stonith_device_t *device = g_hash_table_lookup(device_list, id);
guint ndevices = 0;
if (!device) {
ndevices = g_hash_table_size(device_list);
crm_info("Device '%s' not found (%d active device%s)",
id, ndevices, pcmk__plural_s(ndevices));
return;
}
if (from_cib) {
device->cib_registered = FALSE;
} else {
device->verified = FALSE;
device->api_registered = FALSE;
}
if (!device->cib_registered && !device->api_registered) {
g_hash_table_remove(device_list, id);
ndevices = g_hash_table_size(device_list);
crm_info("Removed '%s' from device list (%d active device%s)",
id, ndevices, pcmk__plural_s(ndevices));
} else {
crm_trace("Not removing '%s' from device list (%d active) because "
"still registered via:%s%s",
id, g_hash_table_size(device_list),
(device->cib_registered? " cib" : ""),
(device->api_registered? " api" : ""));
}
}
/*!
* \internal
* \brief Return the number of stonith levels registered for a node
*
* \param[in] tp Node's topology table entry
*
* \return Number of non-NULL levels in topology entry
* \note This function is used only for log messages.
*/
static int
count_active_levels(const stonith_topology_t *tp)
{
int lpc = 0;
int count = 0;
for (lpc = 0; lpc < ST_LEVEL_MAX; lpc++) {
if (tp->levels[lpc] != NULL) {
count++;
}
}
return count;
}
static void
free_topology_entry(gpointer data)
{
stonith_topology_t *tp = data;
int lpc = 0;
for (lpc = 0; lpc < ST_LEVEL_MAX; lpc++) {
if (tp->levels[lpc] != NULL) {
g_list_free_full(tp->levels[lpc], free);
}
}
free(tp->target);
free(tp->target_value);
free(tp->target_pattern);
free(tp->target_attribute);
free(tp);
}
void
free_topology_list(void)
{
if (topology != NULL) {
g_hash_table_destroy(topology);
topology = NULL;
}
}
void
init_topology_list(void)
{
if (topology == NULL) {
topology = pcmk__strkey_table(NULL, free_topology_entry);
}
}
char *
stonith_level_key(const xmlNode *level, enum fenced_target_by mode)
{
if (mode == fenced_target_by_unknown) {
mode = unpack_level_kind(level);
}
switch (mode) {
case fenced_target_by_name:
return crm_element_value_copy(level, XML_ATTR_STONITH_TARGET);
case fenced_target_by_pattern:
return crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_PATTERN);
case fenced_target_by_attribute:
return crm_strdup_printf("%s=%s",
crm_element_value(level, XML_ATTR_STONITH_TARGET_ATTRIBUTE),
crm_element_value(level, XML_ATTR_STONITH_TARGET_VALUE));
default:
return crm_strdup_printf("unknown-%s", ID(level));
}
}
/*!
* \internal
* \brief Parse target identification from topology level XML
*
* \param[in] level Topology level XML to parse
*
* \return How to identify target of \p level
*/
static enum fenced_target_by
unpack_level_kind(const xmlNode *level)
{
if (crm_element_value(level, XML_ATTR_STONITH_TARGET) != NULL) {
return fenced_target_by_name;
}
if (crm_element_value(level, XML_ATTR_STONITH_TARGET_PATTERN) != NULL) {
return fenced_target_by_pattern;
}
if (!stand_alone /* if standalone, there's no attribute manager */
&& (crm_element_value(level, XML_ATTR_STONITH_TARGET_ATTRIBUTE) != NULL)
&& (crm_element_value(level, XML_ATTR_STONITH_TARGET_VALUE) != NULL)) {
return fenced_target_by_attribute;
}
return fenced_target_by_unknown;
}
static stonith_key_value_t *
parse_device_list(const char *devices)
{
int lpc = 0;
int max = 0;
int last = 0;
stonith_key_value_t *output = NULL;
if (devices == NULL) {
return output;
}
max = strlen(devices);
for (lpc = 0; lpc <= max; lpc++) {
if (devices[lpc] == ',' || devices[lpc] == 0) {
char *line = strndup(devices + last, lpc - last);
output = stonith_key_value_add(output, NULL, line);
free(line);
last = lpc + 1;
}
}
return output;
}
/*!
* \internal
* \brief Unpack essential information from topology request XML
*
* \param[in] xml Request XML to search
* \param[out] mode If not NULL, where to store level kind
* \param[out] target If not NULL, where to store representation of target
* \param[out] id If not NULL, where to store level number
* \param[out] desc If not NULL, where to store log-friendly level description
*
* \return Topology level XML from within \p xml, or NULL if not found
* \note The caller is responsible for freeing \p *target and \p *desc if set.
*/
static xmlNode *
unpack_level_request(xmlNode *xml, enum fenced_target_by *mode, char **target,
int *id, char **desc)
{
enum fenced_target_by local_mode = fenced_target_by_unknown;
char *local_target = NULL;
int local_id = 0;
/* The level element can be the top element or lower. If top level, don't
* search by xpath, because it might give multiple hits if the XML is the
* CIB.
*/
if ((xml != NULL) && !pcmk__xe_is(xml, XML_TAG_FENCING_LEVEL)) {
xml = get_xpath_object("//" XML_TAG_FENCING_LEVEL, xml, LOG_WARNING);
}
if (xml == NULL) {
if (desc != NULL) {
*desc = crm_strdup_printf("missing");
}
} else {
local_mode = unpack_level_kind(xml);
local_target = stonith_level_key(xml, local_mode);
crm_element_value_int(xml, XML_ATTR_STONITH_INDEX, &local_id);
if (desc != NULL) {
*desc = crm_strdup_printf("%s[%d]", local_target, local_id);
}
}
if (mode != NULL) {
*mode = local_mode;
}
if (id != NULL) {
*id = local_id;
}
if (target != NULL) {
*target = local_target;
} else {
free(local_target);
}
return xml;
}
/*!
* \internal
* \brief Register a fencing topology level for a target
*
* Given an XML request specifying the target name, level index, and device IDs
* for the level, this will create an entry for the target in the global topology
* table if one does not already exist, then append the specified device IDs to
* the entry's device list for the specified level.
*
* \param[in] msg XML request for STONITH level registration
* \param[out] desc If not NULL, set to string representation "TARGET[LEVEL]"
* \param[out] result Where to set result of registration
*/
void
fenced_register_level(xmlNode *msg, char **desc, pcmk__action_result_t *result)
{
int id = 0;
xmlNode *level;
enum fenced_target_by mode;
char *target;
stonith_topology_t *tp;
stonith_key_value_t *dIter = NULL;
stonith_key_value_t *devices = NULL;
CRM_CHECK((msg != NULL) && (result != NULL), return);
level = unpack_level_request(msg, &mode, &target, &id, desc);
if (level == NULL) {
fenced_set_protocol_error(result);
return;
}
// Ensure an ID was given (even the client API adds an ID)
if (pcmk__str_empty(ID(level))) {
crm_warn("Ignoring registration for topology level without ID");
free(target);
crm_log_xml_trace(level, "Bad level");
pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID,
"Topology level is invalid without ID");
return;
}
// Ensure a valid target was specified
if (mode == fenced_target_by_unknown) {
crm_warn("Ignoring registration for topology level '%s' "
"without valid target", ID(level));
free(target);
crm_log_xml_trace(level, "Bad level");
pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID,
"Invalid target for topology level '%s'",
ID(level));
return;
}
// Ensure level ID is in allowed range
if ((id <= 0) || (id >= ST_LEVEL_MAX)) {
crm_warn("Ignoring topology registration for %s with invalid level %d",
target, id);
free(target);
crm_log_xml_trace(level, "Bad level");
pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID,
"Invalid level number '%s' for topology level '%s'",
pcmk__s(crm_element_value(level,
XML_ATTR_STONITH_INDEX),
""),
ID(level));
return;
}
/* Find or create topology table entry */
tp = g_hash_table_lookup(topology, target);
if (tp == NULL) {
tp = calloc(1, sizeof(stonith_topology_t));
if (tp == NULL) {
pcmk__set_result(result, CRM_EX_ERROR, PCMK_EXEC_ERROR,
strerror(ENOMEM));
free(target);
return;
}
tp->kind = mode;
tp->target = target;
tp->target_value = crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_VALUE);
tp->target_pattern = crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_PATTERN);
tp->target_attribute = crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_ATTRIBUTE);
g_hash_table_replace(topology, tp->target, tp);
crm_trace("Added %s (%d) to the topology (%d active entries)",
target, (int) mode, g_hash_table_size(topology));
} else {
free(target);
}
if (tp->levels[id] != NULL) {
crm_info("Adding to the existing %s[%d] topology entry",
tp->target, id);
}
devices = parse_device_list(crm_element_value(level, XML_ATTR_STONITH_DEVICES));
for (dIter = devices; dIter; dIter = dIter->next) {
const char *device = dIter->value;
crm_trace("Adding device '%s' for %s[%d]", device, tp->target, id);
tp->levels[id] = g_list_append(tp->levels[id], strdup(device));
}
stonith_key_value_freeall(devices, 1, 1);
{
int nlevels = count_active_levels(tp);
crm_info("Target %s has %d active fencing level%s",
tp->target, nlevels, pcmk__plural_s(nlevels));
}
pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
}
/*!
* \internal
* \brief Unregister a fencing topology level for a target
*
* Given an XML request specifying the target name and level index (or 0 for all
* levels), this will remove any corresponding entry for the target from the
* global topology table.
*
* \param[in] msg XML request for STONITH level registration
* \param[out] desc If not NULL, set to string representation "TARGET[LEVEL]"
* \param[out] result Where to set result of unregistration
*/
void
fenced_unregister_level(xmlNode *msg, char **desc,
pcmk__action_result_t *result)
{
int id = -1;
stonith_topology_t *tp;
char *target;
xmlNode *level = NULL;
CRM_CHECK(result != NULL, return);
level = unpack_level_request(msg, NULL, &target, &id, desc);
if (level == NULL) {
fenced_set_protocol_error(result);
return;
}
// Ensure level ID is in allowed range
if ((id < 0) || (id >= ST_LEVEL_MAX)) {
crm_warn("Ignoring topology unregistration for %s with invalid level %d",
target, id);
free(target);
crm_log_xml_trace(level, "Bad level");
pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID,
"Invalid level number '%s' for topology level %s",
pcmk__s(crm_element_value(level,
XML_ATTR_STONITH_INDEX),
"<null>"),
// Client API doesn't add ID to unregistration XML
pcmk__s(ID(level), ""));
return;
}
tp = g_hash_table_lookup(topology, target);
if (tp == NULL) {
guint nentries = g_hash_table_size(topology);
crm_info("No fencing topology found for %s (%d active %s)",
target, nentries,
pcmk__plural_alt(nentries, "entry", "entries"));
} else if (id == 0 && g_hash_table_remove(topology, target)) {
guint nentries = g_hash_table_size(topology);
crm_info("Removed all fencing topology entries related to %s "
"(%d active %s remaining)", target, nentries,
pcmk__plural_alt(nentries, "entry", "entries"));
} else if (tp->levels[id] != NULL) {
guint nlevels;
g_list_free_full(tp->levels[id], free);
tp->levels[id] = NULL;
nlevels = count_active_levels(tp);
crm_info("Removed level %d from fencing topology for %s "
"(%d active level%s remaining)",
id, target, nlevels, pcmk__plural_s(nlevels));
}
free(target);
pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
}
static char *
list_to_string(GList *list, const char *delim, gboolean terminate_with_delim)
{
int max = g_list_length(list);
size_t delim_len = delim?strlen(delim):0;
size_t alloc_size = 1 + (max?((max-1+(terminate_with_delim?1:0))*delim_len):0);
char *rv;
GList *gIter;
for (gIter = list; gIter != NULL; gIter = gIter->next) {
const char *value = (const char *) gIter->data;
alloc_size += strlen(value);
}
rv = calloc(alloc_size, sizeof(char));
if (rv) {
char *pos = rv;
const char *lead_delim = "";
for (gIter = list; gIter != NULL; gIter = gIter->next) {
const char *value = (const char *) gIter->data;
pos = &pos[sprintf(pos, "%s%s", lead_delim, value)];
lead_delim = delim;
}
if (max && terminate_with_delim) {
sprintf(pos, "%s", delim);
}
}
return rv;
}
/*!
* \internal
* \brief Execute a fence agent action directly (and asynchronously)
*
* Handle a STONITH_OP_EXEC API message by scheduling a requested agent action
* directly on a specified device. Only list, monitor, and status actions are
* expected to use this call, though it should work with any agent command.
*
* \param[in] msg Request XML specifying action
* \param[out] result Where to store result of action
*
* \note If the action is monitor, the device must be registered via the API
* (CIB registration is not sufficient), because monitor should not be
* possible unless the device is "started" (API registered).
*/
static void
execute_agent_action(xmlNode *msg, pcmk__action_result_t *result)
{
xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, msg, LOG_ERR);
xmlNode *op = get_xpath_object("//@" F_STONITH_ACTION, msg, LOG_ERR);
const char *id = crm_element_value(dev, F_STONITH_DEVICE);
const char *action = crm_element_value(op, F_STONITH_ACTION);
async_command_t *cmd = NULL;
stonith_device_t *device = NULL;
if ((id == NULL) || (action == NULL)) {
crm_info("Malformed API action request: device %s, action %s",
(id? id : "not specified"),
(action? action : "not specified"));
fenced_set_protocol_error(result);
return;
}
if (pcmk__str_eq(id, STONITH_WATCHDOG_ID, pcmk__str_none)) {
// Watchdog agent actions are implemented internally
if (stonith_watchdog_timeout_ms <= 0) {
pcmk__set_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
"Watchdog fence device not configured");
return;
} else if (pcmk__str_eq(action, PCMK_ACTION_LIST, pcmk__str_none)) {
pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
pcmk__set_result_output(result,
list_to_string(stonith_watchdog_targets,
"\n", TRUE),
NULL);
return;
} else if (pcmk__str_eq(action, PCMK_ACTION_MONITOR, pcmk__str_none)) {
pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
return;
}
}
device = g_hash_table_lookup(device_list, id);
if (device == NULL) {
crm_info("Ignoring API '%s' action request because device %s not found",
action, id);
pcmk__format_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
"'%s' not found", id);
return;
} else if (!device->api_registered
&& (strcmp(action, PCMK_ACTION_MONITOR) == 0)) {
// Monitors may run only on "started" (API-registered) devices
crm_info("Ignoring API '%s' action request because device %s not active",
action, id);
pcmk__format_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
"'%s' not active", id);
return;
}
cmd = create_async_command(msg);
if (cmd == NULL) {
crm_log_xml_warn(msg, "invalid");
fenced_set_protocol_error(result);
return;
}
schedule_stonith_command(cmd, device);
pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL);
}
static void
search_devices_record_result(struct device_search_s *search, const char *device, gboolean can_fence)
{
search->replies_received++;
if (can_fence && device) {
if (search->support_action_only != st_device_supports_none) {
stonith_device_t *dev = g_hash_table_lookup(device_list, device);
if (dev && !pcmk_is_set(dev->flags, search->support_action_only)) {
return;
}
}
search->capable = g_list_append(search->capable, strdup(device));
}
if (search->replies_needed == search->replies_received) {
guint ndevices = g_list_length(search->capable);
crm_debug("Search found %d device%s that can perform '%s' targeting %s",
ndevices, pcmk__plural_s(ndevices),
(search->action? search->action : "unknown action"),
(search->host? search->host : "any node"));
search->callback(search->capable, search->user_data);
free(search->host);
free(search->action);
free(search);
}
}
/*!
* \internal
* \brief Check whether the local host is allowed to execute a fencing action
*
* \param[in] device Fence device to check
* \param[in] action Fence action to check
* \param[in] target Hostname of fence target
* \param[in] allow_suicide Whether self-fencing is allowed for this operation
*
* \return TRUE if local host is allowed to execute action, FALSE otherwise
*/
static gboolean
localhost_is_eligible(const stonith_device_t *device, const char *action,
const char *target, gboolean allow_suicide)
{
gboolean localhost_is_target = pcmk__str_eq(target, stonith_our_uname,
pcmk__str_casei);
if ((device != NULL) && (action != NULL)
&& (device->on_target_actions != NULL)
&& (strstr((const char*) device->on_target_actions->str,
action) != NULL)) {
if (!localhost_is_target) {
crm_trace("Operation '%s' using %s can only be executed for local "
"host, not %s", action, device->id, target);
return FALSE;
}
} else if (localhost_is_target && !allow_suicide) {
crm_trace("'%s' operation does not support self-fencing", action);
return FALSE;
}
return TRUE;
}
/*!
* \internal
* \brief Check if local node is allowed to execute (possibly remapped) action
*
* \param[in] device Fence device to check
* \param[in] action Fence action to check
* \param[in] target Node name of fence target
* \param[in] allow_self Whether self-fencing is allowed for this operation
*
* \return true if local node is allowed to execute \p action or any actions it
* might be remapped to, otherwise false
*/
static bool
localhost_is_eligible_with_remap(const stonith_device_t *device,
const char *action, const char *target,
gboolean allow_self)
{
// Check exact action
if (localhost_is_eligible(device, action, target, allow_self)) {
return true;
}
// Check potential remaps
if (pcmk__str_eq(action, PCMK_ACTION_REBOOT, pcmk__str_none)) {
/* "reboot" might get remapped to "off" then "on", so even if reboot is
* disallowed, return true if either of those is allowed. We'll report
* the disallowed actions with the results. We never allow self-fencing
* for remapped "on" actions because the target is off at that point.
*/
if (localhost_is_eligible(device, PCMK_ACTION_OFF, target, allow_self)
|| localhost_is_eligible(device, PCMK_ACTION_ON, target, FALSE)) {
return true;
}
}
return false;
}
static void
can_fence_host_with_device(stonith_device_t *dev,
struct device_search_s *search)
{
gboolean can = FALSE;
const char *check_type = "Internal bug";
const char *target = NULL;
const char *alias = NULL;
const char *dev_id = "Unspecified device";
const char *action = (search == NULL)? NULL : search->action;
CRM_CHECK((dev != NULL) && (action != NULL), goto search_report_results);
if (dev->id != NULL) {
dev_id = dev->id;
}
target = search->host;
if (target == NULL) {
can = TRUE;
check_type = "No target";
goto search_report_results;
}
/* Answer immediately if the device does not support the action
* or the local node is not allowed to perform it
*/
if (pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none)
&& !pcmk_is_set(dev->flags, st_device_supports_on)) {
check_type = "Agent does not support 'on'";
goto search_report_results;
} else if (!localhost_is_eligible_with_remap(dev, action, target,
search->allow_suicide)) {
check_type = "This node is not allowed to execute action";
goto search_report_results;
}
// Check eligibility as specified by pcmk_host_check
check_type = target_list_type(dev);
alias = g_hash_table_lookup(dev->aliases, target);
if (pcmk__str_eq(check_type, PCMK__VALUE_NONE, pcmk__str_casei)) {
can = TRUE;
} else if (pcmk__str_eq(check_type, "static-list", pcmk__str_casei)) {
if (pcmk__str_in_list(target, dev->targets, pcmk__str_casei)) {
can = TRUE;
} else if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_MAP)
&& g_hash_table_lookup(dev->aliases, target)) {
can = TRUE;
}
} else if (pcmk__str_eq(check_type, "dynamic-list", pcmk__str_casei)) {
time_t now = time(NULL);
if (dev->targets == NULL || dev->targets_age + 60 < now) {
int device_timeout = get_action_timeout(dev, PCMK_ACTION_LIST,
search->per_device_timeout);
if (device_timeout > search->per_device_timeout) {
crm_notice("Since the pcmk_list_timeout (%ds) parameter of %s "
"is larger than " PCMK_OPT_STONITH_TIMEOUT
" (%ds), timeout may occur",
device_timeout, dev_id, search->per_device_timeout);
}
crm_trace("Running '%s' to check whether %s is eligible to fence %s (%s)",
check_type, dev_id, target, action);
schedule_internal_command(__func__, dev, PCMK_ACTION_LIST, NULL,
search->per_device_timeout, search, dynamic_list_search_cb);
/* we'll respond to this search request async in the cb */
return;
}
if (pcmk__str_in_list(((alias == NULL)? target : alias), dev->targets,
pcmk__str_casei)) {
can = TRUE;
}
} else if (pcmk__str_eq(check_type, "status", pcmk__str_casei)) {
int device_timeout = get_action_timeout(dev, check_type, search->per_device_timeout);
if (device_timeout > search->per_device_timeout) {
crm_notice("Since the pcmk_status_timeout (%ds) parameter of %s is "
"larger than " PCMK_OPT_STONITH_TIMEOUT " (%ds), "
"timeout may occur",
device_timeout, dev_id, search->per_device_timeout);
}
crm_trace("Running '%s' to check whether %s is eligible to fence %s (%s)",
check_type, dev_id, target, action);
schedule_internal_command(__func__, dev, PCMK_ACTION_STATUS, target,
search->per_device_timeout, search, status_search_cb);
/* we'll respond to this search request async in the cb */
return;
} else {
crm_err("Invalid value for " PCMK_STONITH_HOST_CHECK ": %s", check_type);
check_type = "Invalid " PCMK_STONITH_HOST_CHECK;
}
search_report_results:
crm_info("%s is%s eligible to fence (%s) %s%s%s%s: %s",
dev_id, (can? "" : " not"), pcmk__s(action, "unspecified action"),
pcmk__s(target, "unspecified target"),
(alias == NULL)? "" : " (as '", pcmk__s(alias, ""),
(alias == NULL)? "" : "')", check_type);
search_devices_record_result(search, ((dev == NULL)? NULL : dev_id), can);
}
static void
search_devices(gpointer key, gpointer value, gpointer user_data)
{
stonith_device_t *dev = value;
struct device_search_s *search = user_data;
can_fence_host_with_device(dev, search);
}
#define DEFAULT_QUERY_TIMEOUT 20
static void
get_capable_devices(const char *host, const char *action, int timeout, bool suicide, void *user_data,
void (*callback) (GList * devices, void *user_data), uint32_t support_action_only)
{
struct device_search_s *search;
guint ndevices = g_hash_table_size(device_list);
if (ndevices == 0) {
callback(NULL, user_data);
return;
}
search = calloc(1, sizeof(struct device_search_s));
if (!search) {
crm_crit("Cannot search for capable fence devices: %s",
strerror(ENOMEM));
callback(NULL, user_data);
return;
}
pcmk__str_update(&search->host, host);
pcmk__str_update(&search->action, action);
search->per_device_timeout = timeout;
search->allow_suicide = suicide;
search->callback = callback;
search->user_data = user_data;
search->support_action_only = support_action_only;
/* We are guaranteed this many replies, even if a device is
* unregistered while the search is in progress.
*/
search->replies_needed = ndevices;
crm_debug("Searching %d device%s to see which can execute '%s' targeting %s",
ndevices, pcmk__plural_s(ndevices),
(search->action? search->action : "unknown action"),
(search->host? search->host : "any node"));
g_hash_table_foreach(device_list, search_devices, search);
}
struct st_query_data {
xmlNode *reply;
char *remote_peer;
char *client_id;
char *target;
char *action;
int call_options;
};
/*!
* \internal
* \brief Add action-specific attributes to query reply XML
*
* \param[in,out] xml XML to add attributes to
* \param[in] action Fence action
* \param[in] device Fence device
* \param[in] target Fence target
*/
static void
add_action_specific_attributes(xmlNode *xml, const char *action,
const stonith_device_t *device,
const char *target)
{
int action_specific_timeout;
int delay_max;
int delay_base;
CRM_CHECK(xml && action && device, return);
if (is_action_required(action, device)) {
crm_trace("Action '%s' is required using %s", action, device->id);
crm_xml_add_int(xml, F_STONITH_DEVICE_REQUIRED, 1);
}
action_specific_timeout = get_action_timeout(device, action, 0);
if (action_specific_timeout) {
crm_trace("Action '%s' has timeout %dms using %s",
action, action_specific_timeout, device->id);
crm_xml_add_int(xml, F_STONITH_ACTION_TIMEOUT, action_specific_timeout);
}
delay_max = get_action_delay_max(device, action);
if (delay_max > 0) {
crm_trace("Action '%s' has maximum random delay %ds using %s",
action, delay_max, device->id);
crm_xml_add_int(xml, F_STONITH_DELAY_MAX, delay_max);
}
delay_base = get_action_delay_base(device, action, target);
if (delay_base > 0) {
crm_xml_add_int(xml, F_STONITH_DELAY_BASE, delay_base);
}
if ((delay_max > 0) && (delay_base == 0)) {
crm_trace("Action '%s' has maximum random delay %ds using %s",
action, delay_max, device->id);
} else if ((delay_max == 0) && (delay_base > 0)) {
crm_trace("Action '%s' has a static delay of %ds using %s",
action, delay_base, device->id);
} else if ((delay_max > 0) && (delay_base > 0)) {
crm_trace("Action '%s' has a minimum delay of %ds and a randomly chosen "
"maximum delay of %ds using %s",
action, delay_base, delay_max, device->id);
}
}
/*!
* \internal
* \brief Add "disallowed" attribute to query reply XML if appropriate
*
* \param[in,out] xml XML to add attribute to
* \param[in] action Fence action
* \param[in] device Fence device
* \param[in] target Fence target
* \param[in] allow_suicide Whether self-fencing is allowed
*/
static void
add_disallowed(xmlNode *xml, const char *action, const stonith_device_t *device,
const char *target, gboolean allow_suicide)
{
if (!localhost_is_eligible(device, action, target, allow_suicide)) {
crm_trace("Action '%s' using %s is disallowed for local host",
action, device->id);
pcmk__xe_set_bool_attr(xml, F_STONITH_ACTION_DISALLOWED, true);
}
}
/*!
* \internal
* \brief Add child element with action-specific values to query reply XML
*
* \param[in,out] xml XML to add attribute to
* \param[in] action Fence action
* \param[in] device Fence device
* \param[in] target Fence target
* \param[in] allow_suicide Whether self-fencing is allowed
*/
static void
add_action_reply(xmlNode *xml, const char *action,
const stonith_device_t *device, const char *target,
gboolean allow_suicide)
{
xmlNode *child = create_xml_node(xml, F_STONITH_ACTION);
crm_xml_add(child, XML_ATTR_ID, action);
add_action_specific_attributes(child, action, device, target);
add_disallowed(child, action, device, target, allow_suicide);
}
/*!
* \internal
* \brief Send a reply to a CPG peer or IPC client
*
* \param[in] reply XML reply to send
* \param[in] call_options Send synchronously if st_opt_sync_call is set
* \param[in] remote_peer If not NULL, name of peer node to send CPG reply
* \param[in,out] client If not NULL, client to send IPC reply
*/
static void
stonith_send_reply(const xmlNode *reply, int call_options,
const char *remote_peer, pcmk__client_t *client)
{
CRM_CHECK((reply != NULL) && ((remote_peer != NULL) || (client != NULL)),
return);
if (remote_peer == NULL) {
do_local_reply(reply, client, call_options);
} else {
send_cluster_message(crm_get_peer(0, remote_peer), crm_msg_stonith_ng,
reply, FALSE);
}
}
static void
stonith_query_capable_device_cb(GList * devices, void *user_data)
{
struct st_query_data *query = user_data;
int available_devices = 0;
xmlNode *dev = NULL;
xmlNode *list = NULL;
GList *lpc = NULL;
pcmk__client_t *client = NULL;
if (query->client_id != NULL) {
client = pcmk__find_client_by_id(query->client_id);
if ((client == NULL) && (query->remote_peer == NULL)) {
crm_trace("Skipping reply to %s: no longer a client",
query->client_id);
goto done;
}
}
/* Pack the results into XML */
list = create_xml_node(NULL, __func__);
crm_xml_add(list, F_STONITH_TARGET, query->target);
for (lpc = devices; lpc != NULL; lpc = lpc->next) {
stonith_device_t *device = g_hash_table_lookup(device_list, lpc->data);
const char *action = query->action;
if (!device) {
/* It is possible the device got unregistered while
* determining who can fence the target */
continue;
}
available_devices++;
dev = create_xml_node(list, F_STONITH_DEVICE);
crm_xml_add(dev, XML_ATTR_ID, device->id);
crm_xml_add(dev, "namespace", device->namespace);
crm_xml_add(dev, "agent", device->agent);
crm_xml_add_int(dev, F_STONITH_DEVICE_VERIFIED, device->verified);
crm_xml_add_int(dev, F_STONITH_DEVICE_SUPPORT_FLAGS, device->flags);
/* If the originating fencer wants to reboot the node, and we have a
* capable device that doesn't support "reboot", remap to "off" instead.
*/
if (!pcmk_is_set(device->flags, st_device_supports_reboot)
&& pcmk__str_eq(query->action, PCMK_ACTION_REBOOT,
pcmk__str_none)) {
crm_trace("%s doesn't support reboot, using values for off instead",
device->id);
action = PCMK_ACTION_OFF;
}
/* Add action-specific values if available */
add_action_specific_attributes(dev, action, device, query->target);
if (pcmk__str_eq(query->action, PCMK_ACTION_REBOOT, pcmk__str_none)) {
/* A "reboot" *might* get remapped to "off" then "on", so after
* sending the "reboot"-specific values in the main element, we add
* sub-elements for "off" and "on" values.
*
* We short-circuited earlier if "reboot", "off" and "on" are all
* disallowed for the local host. However if only one or two are
* disallowed, we send back the results and mark which ones are
* disallowed. If "reboot" is disallowed, this might cause problems
* with older fencer versions, which won't check for it. Older
* versions will ignore "off" and "on", so they are not a problem.
*/
add_disallowed(dev, action, device, query->target,
pcmk_is_set(query->call_options, st_opt_allow_suicide));
add_action_reply(dev, PCMK_ACTION_OFF, device, query->target,
pcmk_is_set(query->call_options, st_opt_allow_suicide));
add_action_reply(dev, PCMK_ACTION_ON, device, query->target, FALSE);
}
/* A query without a target wants device parameters */
if (query->target == NULL) {
xmlNode *attrs = create_xml_node(dev, XML_TAG_ATTRS);
g_hash_table_foreach(device->params, hash2field, attrs);
}
}
crm_xml_add_int(list, F_STONITH_AVAILABLE_DEVICES, available_devices);
if (query->target) {
crm_debug("Found %d matching device%s for target '%s'",
available_devices, pcmk__plural_s(available_devices),
query->target);
} else {
crm_debug("%d device%s installed",
available_devices, pcmk__plural_s(available_devices));
}
if (list != NULL) {
crm_log_xml_trace(list, "Add query results");
add_message_xml(query->reply, F_STONITH_CALLDATA, list);
}
stonith_send_reply(query->reply, query->call_options, query->remote_peer,
client);
done:
free_xml(query->reply);
free(query->remote_peer);
free(query->client_id);
free(query->target);
free(query->action);
free(query);
free_xml(list);
g_list_free_full(devices, free);
}
/*!
* \internal
* \brief Log the result of an asynchronous command
*
* \param[in] cmd Command the result is for
* \param[in] result Result of command
* \param[in] pid Process ID of command, if available
* \param[in] next Alternate device that will be tried if command failed
* \param[in] op_merged Whether this command was merged with an earlier one
*/
static void
log_async_result(const async_command_t *cmd,
const pcmk__action_result_t *result,
int pid, const char *next, bool op_merged)
{
int log_level = LOG_ERR;
int output_log_level = LOG_NEVER;
guint devices_remaining = g_list_length(cmd->next_device_iter);
GString *msg = g_string_sized_new(80); // Reasonable starting size
// Choose log levels appropriately if we have a result
if (pcmk__result_ok(result)) {
log_level = (cmd->target == NULL)? LOG_DEBUG : LOG_NOTICE;
if ((result->action_stdout != NULL)
&& !pcmk__str_eq(cmd->action, "metadata", pcmk__str_none)) {
output_log_level = LOG_DEBUG;
}
next = NULL;
} else {
log_level = (cmd->target == NULL)? LOG_NOTICE : LOG_ERR;
if ((result->action_stdout != NULL)
&& !pcmk__str_eq(cmd->action, "metadata", pcmk__str_none)) {
output_log_level = LOG_WARNING;
}
}
// Build the log message piece by piece
pcmk__g_strcat(msg, "Operation '", cmd->action, "' ", NULL);
if (pid != 0) {
g_string_append_printf(msg, "[%d] ", pid);
}
if (cmd->target != NULL) {
pcmk__g_strcat(msg, "targeting ", cmd->target, " ", NULL);
}
if (cmd->device != NULL) {
pcmk__g_strcat(msg, "using ", cmd->device, " ", NULL);
}
// Add exit status or execution status as appropriate
if (result->execution_status == PCMK_EXEC_DONE) {
g_string_append_printf(msg, "returned %d", result->exit_status);
} else {
pcmk__g_strcat(msg, "could not be executed: ",
pcmk_exec_status_str(result->execution_status), NULL);
}
// Add exit reason and next device if appropriate
if (result->exit_reason != NULL) {
pcmk__g_strcat(msg, " (", result->exit_reason, ")", NULL);
}
if (next != NULL) {
pcmk__g_strcat(msg, ", retrying with ", next, NULL);
}
if (devices_remaining > 0) {
g_string_append_printf(msg, " (%u device%s remaining)",
(unsigned int) devices_remaining,
pcmk__plural_s(devices_remaining));
}
g_string_append_printf(msg, " " CRM_XS " %scall %d from %s",
(op_merged? "merged " : ""), cmd->id,
cmd->client_name);
// Log the result
do_crm_log(log_level, "%s", msg->str);
g_string_free(msg, TRUE);
// Log the output (which may have multiple lines), if appropriate
if (output_log_level != LOG_NEVER) {
char *prefix = crm_strdup_printf("%s[%d]", cmd->device, pid);
crm_log_output(output_log_level, prefix, result->action_stdout);
free(prefix);
}
}
/*!
* \internal
* \brief Reply to requester after asynchronous command completion
*
* \param[in] cmd Command that completed
* \param[in] result Result of command
* \param[in] pid Process ID of command, if available
* \param[in] merged If true, command was merged with another, not executed
*/
static void
send_async_reply(const async_command_t *cmd, const pcmk__action_result_t *result,
int pid, bool merged)
{
xmlNode *reply = NULL;
pcmk__client_t *client = NULL;
CRM_CHECK((cmd != NULL) && (result != NULL), return);
log_async_result(cmd, result, pid, NULL, merged);
if (cmd->client != NULL) {
client = pcmk__find_client_by_id(cmd->client);
if ((client == NULL) && (cmd->origin == NULL)) {
crm_trace("Skipping reply to %s: no longer a client", cmd->client);
return;
}
}
reply = construct_async_reply(cmd, result);
if (merged) {
pcmk__xe_set_bool_attr(reply, F_STONITH_MERGED, true);
}
if (!stand_alone && pcmk__is_fencing_action(cmd->action)
&& pcmk__str_eq(cmd->origin, cmd->target, pcmk__str_casei)) {
/* The target was also the originator, so broadcast the result on its
* behalf (since it will be unable to).
*/
crm_trace("Broadcast '%s' result for %s (target was also originator)",
cmd->action, cmd->target);
crm_xml_add(reply, F_SUBTYPE, "broadcast");
crm_xml_add(reply, F_STONITH_OPERATION, T_STONITH_NOTIFY);
send_cluster_message(NULL, crm_msg_stonith_ng, reply, FALSE);
} else {
// Reply only to the originator
stonith_send_reply(reply, cmd->options, cmd->origin, client);
}
crm_log_xml_trace(reply, "Reply");
free_xml(reply);
if (stand_alone) {
/* Do notification with a clean data object */
xmlNode *notify_data = create_xml_node(NULL, T_STONITH_NOTIFY_FENCE);
stonith__xe_set_result(notify_data, result);
crm_xml_add(notify_data, F_STONITH_TARGET, cmd->target);
crm_xml_add(notify_data, F_STONITH_OPERATION, cmd->op);
crm_xml_add(notify_data, F_STONITH_DELEGATE, "localhost");
crm_xml_add(notify_data, F_STONITH_DEVICE, cmd->device);
crm_xml_add(notify_data, F_STONITH_REMOTE_OP_ID, cmd->remote_op_id);
crm_xml_add(notify_data, F_STONITH_ORIGIN, cmd->client);
fenced_send_notification(T_STONITH_NOTIFY_FENCE, result, notify_data);
fenced_send_notification(T_STONITH_NOTIFY_HISTORY, NULL, NULL);
}
}
static void
cancel_stonith_command(async_command_t * cmd)
{
stonith_device_t *device = cmd_device(cmd);
if (device) {
crm_trace("Cancel scheduled '%s' action using %s",
cmd->action, device->id);
device->pending_ops = g_list_remove(device->pending_ops, cmd);
}
}
/*!
* \internal
* \brief Cancel and reply to any duplicates of a just-completed operation
*
* Check whether any fencing operations are scheduled to do the same thing as
* one that just succeeded. If so, rather than performing the same operation
* twice, return the result of this operation for all matching pending commands.
*
* \param[in,out] cmd Fencing operation that just succeeded
* \param[in] result Result of \p cmd
* \param[in] pid If nonzero, process ID of agent invocation (for logs)
*
* \note Duplicate merging will do the right thing for either type of remapped
* reboot. If the executing fencer remapped an unsupported reboot to off,
* then cmd->action will be "reboot" and will be merged with any other
* reboot requests. If the originating fencer remapped a topology reboot
* to off then on, we will get here once with cmd->action "off" and once
* with "on", and they will be merged separately with similar requests.
*/
static void
reply_to_duplicates(async_command_t *cmd, const pcmk__action_result_t *result,
int pid)
{
GList *next = NULL;
for (GList *iter = cmd_list; iter != NULL; iter = next) {
async_command_t *cmd_other = iter->data;
next = iter->next; // We might delete this entry, so grab next now
if (cmd == cmd_other) {
continue;
}
/* A pending operation matches if:
* 1. The client connections are different.
* 2. The target is the same.
* 3. The fencing action is the same.
* 4. The device scheduled to execute the action is the same.
*/
if (pcmk__str_eq(cmd->client, cmd_other->client, pcmk__str_casei) ||
!pcmk__str_eq(cmd->target, cmd_other->target, pcmk__str_casei) ||
!pcmk__str_eq(cmd->action, cmd_other->action, pcmk__str_none) ||
!pcmk__str_eq(cmd->device, cmd_other->device, pcmk__str_casei)) {
continue;
}
crm_notice("Merging fencing action '%s'%s%s originating from "
"client %s with identical fencing request from client %s",
cmd_other->action,
(cmd_other->target == NULL)? "" : " targeting ",
pcmk__s(cmd_other->target, ""), cmd_other->client_name,
cmd->client_name);
// Stop tracking the duplicate, send its result, and cancel it
cmd_list = g_list_remove_link(cmd_list, iter);
send_async_reply(cmd_other, result, pid, true);
cancel_stonith_command(cmd_other);
free_async_command(cmd_other);
g_list_free_1(iter);
}
}
/*!
* \internal
* \brief Return the next required device (if any) for an operation
*
* \param[in,out] cmd Fencing operation that just succeeded
*
* \return Next device required for action if any, otherwise NULL
*/
static stonith_device_t *
next_required_device(async_command_t *cmd)
{
for (GList *iter = cmd->next_device_iter; iter != NULL; iter = iter->next) {
stonith_device_t *next_device = g_hash_table_lookup(device_list,
iter->data);
if (is_action_required(cmd->action, next_device)) {
/* This is only called for successful actions, so it's OK to skip
* non-required devices.
*/
cmd->next_device_iter = iter->next;
return next_device;
}
}
return NULL;
}
static void
st_child_done(int pid, const pcmk__action_result_t *result, void *user_data)
{
async_command_t *cmd = user_data;
stonith_device_t *device = NULL;
stonith_device_t *next_device = NULL;
CRM_CHECK(cmd != NULL, return);
device = cmd_device(cmd);
cmd->active_on = NULL;
/* The device is ready to do something else now */
if (device) {
if (!device->verified && pcmk__result_ok(result)
&& pcmk__strcase_any_of(cmd->action, PCMK_ACTION_LIST,
PCMK_ACTION_MONITOR, PCMK_ACTION_STATUS,
NULL)) {
device->verified = TRUE;
}
mainloop_set_trigger(device->work);
}
if (pcmk__result_ok(result)) {
next_device = next_required_device(cmd);
} else if ((cmd->next_device_iter != NULL)
&& !is_action_required(cmd->action, device)) {
/* if this device didn't work out, see if there are any others we can try.
* if the failed device was 'required', we can't pick another device. */
next_device = g_hash_table_lookup(device_list,
cmd->next_device_iter->data);
cmd->next_device_iter = cmd->next_device_iter->next;
}
if (next_device == NULL) {
send_async_reply(cmd, result, pid, false);
if (pcmk__result_ok(result)) {
reply_to_duplicates(cmd, result, pid);
}
free_async_command(cmd);
} else { // This operation requires more fencing
log_async_result(cmd, result, pid, next_device->id, false);
schedule_stonith_command(cmd, next_device);
}
}
static gint
sort_device_priority(gconstpointer a, gconstpointer b)
{
const stonith_device_t *dev_a = a;
const stonith_device_t *dev_b = b;
if (dev_a->priority > dev_b->priority) {
return -1;
} else if (dev_a->priority < dev_b->priority) {
return 1;
}
return 0;
}
static void
stonith_fence_get_devices_cb(GList * devices, void *user_data)
{
async_command_t *cmd = user_data;
stonith_device_t *device = NULL;
guint ndevices = g_list_length(devices);
crm_info("Found %d matching device%s for target '%s'",
ndevices, pcmk__plural_s(ndevices), cmd->target);
if (devices != NULL) {
/* Order based on priority */
devices = g_list_sort(devices, sort_device_priority);
device = g_hash_table_lookup(device_list, devices->data);
}
if (device == NULL) { // No device found
pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
pcmk__format_result(&result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
"No device configured for target '%s'",
cmd->target);
send_async_reply(cmd, &result, 0, false);
pcmk__reset_result(&result);
free_async_command(cmd);
g_list_free_full(devices, free);
} else { // Device found, schedule it for fencing
cmd->device_list = devices;
cmd->next_device_iter = devices->next;
schedule_stonith_command(cmd, device);
}
}
/*!
* \internal
* \brief Execute a fence action via the local node
*
* \param[in] msg Fencing request
* \param[out] result Where to store result of fence action
*/
static void
fence_locally(xmlNode *msg, pcmk__action_result_t *result)
{
const char *device_id = NULL;
stonith_device_t *device = NULL;
async_command_t *cmd = NULL;
xmlNode *dev = NULL;
CRM_CHECK((msg != NULL) && (result != NULL), return);
dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_ERR);
cmd = create_async_command(msg);
if (cmd == NULL) {
crm_log_xml_warn(msg, "invalid");
fenced_set_protocol_error(result);
return;
}
device_id = crm_element_value(dev, F_STONITH_DEVICE);
if (device_id != NULL) {
device = g_hash_table_lookup(device_list, device_id);
if (device == NULL) {
crm_err("Requested device '%s' is not available", device_id);
pcmk__format_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
"Requested device '%s' not found", device_id);
return;
}
schedule_stonith_command(cmd, device);
} else {
const char *host = crm_element_value(dev, F_STONITH_TARGET);
if (pcmk_is_set(cmd->options, st_opt_cs_nodeid)) {
int nodeid = 0;
crm_node_t *node = NULL;
pcmk__scan_min_int(host, &nodeid, 0);
node = pcmk__search_known_node_cache(nodeid, NULL, CRM_GET_PEER_ANY);
if (node != NULL) {
host = node->uname;
}
}
/* If we get to here, then self-fencing is implicitly allowed */
get_capable_devices(host, cmd->action, cmd->default_timeout,
TRUE, cmd, stonith_fence_get_devices_cb,
fenced_support_flag(cmd->action));
}
pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL);
}
/*!
* \internal
* \brief Build an XML reply for a fencing operation
*
* \param[in] request Request that reply is for
* \param[in] data If not NULL, add to reply as call data
* \param[in] result Full result of fencing operation
*
* \return Newly created XML reply
* \note The caller is responsible for freeing the result.
* \note This has some overlap with construct_async_reply(), but that copies
* values from an async_command_t, whereas this one copies them from the
* request.
*/
xmlNode *
fenced_construct_reply(const xmlNode *request, xmlNode *data,
const pcmk__action_result_t *result)
{
xmlNode *reply = NULL;
reply = create_xml_node(NULL, T_STONITH_REPLY);
crm_xml_add(reply, "st_origin", __func__);
crm_xml_add(reply, F_TYPE, T_STONITH_NG);
stonith__xe_set_result(reply, result);
if (request == NULL) {
/* Most likely, this is the result of a stonith operation that was
* initiated before we came up. Unfortunately that means we lack enough
* information to provide clients with a full result.
*
* @TODO Maybe synchronize this information at start-up?
*/
crm_warn("Missing request information for client notifications for "
"operation with result '%s' (initiated before we came up?)",
pcmk_exec_status_str(result->execution_status));
} else {
const char *name = NULL;
const char *value = NULL;
// Attributes to copy from request to reply
const char *names[] = {
F_STONITH_OPERATION,
F_STONITH_CALLID,
F_STONITH_CLIENTID,
F_STONITH_CLIENTNAME,
F_STONITH_REMOTE_OP_ID,
F_STONITH_CALLOPTS
};
for (int lpc = 0; lpc < PCMK__NELEM(names); lpc++) {
name = names[lpc];
value = crm_element_value(request, name);
crm_xml_add(reply, name, value);
}
if (data != NULL) {
add_message_xml(reply, F_STONITH_CALLDATA, data);
}
}
return reply;
}
/*!
* \internal
* \brief Build an XML reply to an asynchronous fencing command
*
* \param[in] cmd Fencing command that reply is for
* \param[in] result Command result
*/
static xmlNode *
construct_async_reply(const async_command_t *cmd,
const pcmk__action_result_t *result)
{
xmlNode *reply = create_xml_node(NULL, T_STONITH_REPLY);
crm_xml_add(reply, "st_origin", __func__);
crm_xml_add(reply, F_TYPE, T_STONITH_NG);
crm_xml_add(reply, F_STONITH_OPERATION, cmd->op);
crm_xml_add(reply, F_STONITH_DEVICE, cmd->device);
crm_xml_add(reply, F_STONITH_REMOTE_OP_ID, cmd->remote_op_id);
crm_xml_add(reply, F_STONITH_CLIENTID, cmd->client);
crm_xml_add(reply, F_STONITH_CLIENTNAME, cmd->client_name);
crm_xml_add(reply, F_STONITH_TARGET, cmd->target);
crm_xml_add(reply, F_STONITH_ACTION, cmd->op);
crm_xml_add(reply, F_STONITH_ORIGIN, cmd->origin);
crm_xml_add_int(reply, F_STONITH_CALLID, cmd->id);
crm_xml_add_int(reply, F_STONITH_CALLOPTS, cmd->options);
stonith__xe_set_result(reply, result);
return reply;
}
bool fencing_peer_active(crm_node_t *peer)
{
if (peer == NULL) {
return FALSE;
} else if (peer->uname == NULL) {
return FALSE;
} else if (pcmk_is_set(peer->processes, crm_get_cluster_proc())) {
return TRUE;
}
return FALSE;
}
void
set_fencing_completed(remote_fencing_op_t *op)
{
struct timespec tv;
qb_util_timespec_from_epoch_get(&tv);
op->completed = tv.tv_sec;
op->completed_nsec = tv.tv_nsec;
}
/*!
* \internal
* \brief Look for alternate node needed if local node shouldn't fence target
*
* \param[in] target Node that must be fenced
*
* \return Name of an alternate node that should fence \p target if any,
* or NULL otherwise
*/
static const char *
check_alternate_host(const char *target)
{
if (pcmk__str_eq(target, stonith_our_uname, pcmk__str_casei)) {
GHashTableIter gIter;
crm_node_t *entry = NULL;
g_hash_table_iter_init(&gIter, crm_peer_cache);
while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
if (fencing_peer_active(entry)
&& !pcmk__str_eq(entry->uname, target, pcmk__str_casei)) {
crm_notice("Forwarding self-fencing request to %s",
entry->uname);
return entry->uname;
}
}
crm_warn("Will handle own fencing because no peer can");
}
return NULL;
}
static void
remove_relay_op(xmlNode * request)
{
xmlNode *dev = get_xpath_object("//@" F_STONITH_ACTION, request, LOG_TRACE);
const char *relay_op_id = NULL;
const char *op_id = NULL;
const char *client_name = NULL;
const char *target = NULL;
remote_fencing_op_t *relay_op = NULL;
if (dev) {
target = crm_element_value(dev, F_STONITH_TARGET);
}
relay_op_id = crm_element_value(request, F_STONITH_REMOTE_OP_ID_RELAY);
op_id = crm_element_value(request, F_STONITH_REMOTE_OP_ID);
client_name = crm_element_value(request, F_STONITH_CLIENTNAME);
/* Delete RELAY operation. */
if (relay_op_id && target && pcmk__str_eq(target, stonith_our_uname, pcmk__str_casei)) {
relay_op = g_hash_table_lookup(stonith_remote_op_list, relay_op_id);
if (relay_op) {
GHashTableIter iter;
remote_fencing_op_t *list_op = NULL;
g_hash_table_iter_init(&iter, stonith_remote_op_list);
/* If the operation to be deleted is registered as a duplicate, delete the registration. */
while (g_hash_table_iter_next(&iter, NULL, (void **)&list_op)) {
GList *dup_iter = NULL;
if (list_op != relay_op) {
for (dup_iter = list_op->duplicates; dup_iter != NULL; dup_iter = dup_iter->next) {
remote_fencing_op_t *other = dup_iter->data;
if (other == relay_op) {
other->duplicates = g_list_remove(other->duplicates, relay_op);
break;
}
}
}
}
crm_debug("Deleting relay op %s ('%s'%s%s for %s), "
"replaced by op %s ('%s'%s%s for %s)",
relay_op->id, relay_op->action,
(relay_op->target == NULL)? "" : " targeting ",
pcmk__s(relay_op->target, ""),
relay_op->client_name, op_id, relay_op->action,
(target == NULL)? "" : " targeting ", pcmk__s(target, ""),
client_name);
g_hash_table_remove(stonith_remote_op_list, relay_op_id);
}
}
}
/*!
* \internal
* \brief Check whether an API request was sent by a privileged user
*
* API commands related to fencing configuration may be done only by privileged
* IPC users (i.e. root or hacluster), because all other users should go through
* the CIB to have ACLs applied. If no client was given, this is a peer request,
* which is always allowed.
*
* \param[in] c IPC client that sent request (or NULL if sent by CPG peer)
* \param[in] op Requested API operation (for logging only)
*
* \return true if sender is peer or privileged client, otherwise false
*/
static inline bool
is_privileged(const pcmk__client_t *c, const char *op)
{
if ((c == NULL) || pcmk_is_set(c->flags, pcmk__client_privileged)) {
return true;
} else {
crm_warn("Rejecting IPC request '%s' from unprivileged client %s",
pcmk__s(op, ""), pcmk__client_name(c));
return false;
}
}
// CRM_OP_REGISTER
static xmlNode *
handle_register_request(pcmk__request_t *request)
{
xmlNode *reply = create_xml_node(NULL, "reply");
CRM_ASSERT(request->ipc_client != NULL);
crm_xml_add(reply, F_STONITH_OPERATION, CRM_OP_REGISTER);
crm_xml_add(reply, F_STONITH_CLIENTID, request->ipc_client->id);
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
pcmk__set_request_flags(request, pcmk__request_reuse_options);
return reply;
}
// STONITH_OP_EXEC
static xmlNode *
handle_agent_request(pcmk__request_t *request)
{
execute_agent_action(request->xml, &request->result);
if (request->result.execution_status == PCMK_EXEC_PENDING) {
return NULL;
}
return fenced_construct_reply(request->xml, NULL, &request->result);
}
// STONITH_OP_TIMEOUT_UPDATE
static xmlNode *
handle_update_timeout_request(pcmk__request_t *request)
{
const char *call_id = crm_element_value(request->xml, F_STONITH_CALLID);
const char *client_id = crm_element_value(request->xml, F_STONITH_CLIENTID);
int op_timeout = 0;
crm_element_value_int(request->xml, F_STONITH_TIMEOUT, &op_timeout);
do_stonith_async_timeout_update(client_id, call_id, op_timeout);
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
return NULL;
}
// STONITH_OP_QUERY
static xmlNode *
handle_query_request(pcmk__request_t *request)
{
int timeout = 0;
xmlNode *dev = NULL;
const char *action = NULL;
const char *target = NULL;
const char *client_id = crm_element_value(request->xml, F_STONITH_CLIENTID);
struct st_query_data *query = NULL;
if (request->peer != NULL) {
// Record it for the future notification
create_remote_stonith_op(client_id, request->xml, TRUE);
}
/* Delete the DC node RELAY operation. */
remove_relay_op(request->xml);
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
dev = get_xpath_object("//@" F_STONITH_ACTION, request->xml, LOG_NEVER);
if (dev != NULL) {
const char *device = crm_element_value(dev, F_STONITH_DEVICE);
if (pcmk__str_eq(device, "manual_ack", pcmk__str_casei)) {
return NULL; // No query or reply necessary
}
target = crm_element_value(dev, F_STONITH_TARGET);
action = crm_element_value(dev, F_STONITH_ACTION);
}
crm_log_xml_trace(request->xml, "Query");
query = calloc(1, sizeof(struct st_query_data));
CRM_ASSERT(query != NULL);
query->reply = fenced_construct_reply(request->xml, NULL, &request->result);
pcmk__str_update(&query->remote_peer, request->peer);
pcmk__str_update(&query->client_id, client_id);
pcmk__str_update(&query->target, target);
pcmk__str_update(&query->action, action);
query->call_options = request->call_options;
crm_element_value_int(request->xml, F_STONITH_TIMEOUT, &timeout);
get_capable_devices(target, action, timeout,
pcmk_is_set(query->call_options, st_opt_allow_suicide),
query, stonith_query_capable_device_cb, st_device_supports_none);
return NULL;
}
// T_STONITH_NOTIFY
static xmlNode *
handle_notify_request(pcmk__request_t *request)
{
const char *flag_name = NULL;
CRM_ASSERT(request->ipc_client != NULL);
flag_name = crm_element_value(request->xml, F_STONITH_NOTIFY_ACTIVATE);
if (flag_name != NULL) {
crm_debug("Enabling %s callbacks for client %s",
flag_name, pcmk__request_origin(request));
pcmk__set_client_flags(request->ipc_client, get_stonith_flag(flag_name));
}
flag_name = crm_element_value(request->xml, F_STONITH_NOTIFY_DEACTIVATE);
if (flag_name != NULL) {
crm_debug("Disabling %s callbacks for client %s",
flag_name, pcmk__request_origin(request));
pcmk__clear_client_flags(request->ipc_client,
get_stonith_flag(flag_name));
}
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
pcmk__set_request_flags(request, pcmk__request_reuse_options);
return pcmk__ipc_create_ack(request->ipc_flags, "ack", NULL, CRM_EX_OK);
}
// STONITH_OP_RELAY
static xmlNode *
handle_relay_request(pcmk__request_t *request)
{
xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request->xml,
LOG_TRACE);
crm_notice("Received forwarded fencing request from "
"%s %s to fence (%s) peer %s",
pcmk__request_origin_type(request),
pcmk__request_origin(request),
crm_element_value(dev, F_STONITH_ACTION),
crm_element_value(dev, F_STONITH_TARGET));
if (initiate_remote_stonith_op(NULL, request->xml, FALSE) == NULL) {
fenced_set_protocol_error(&request->result);
return fenced_construct_reply(request->xml, NULL, &request->result);
}
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL);
return NULL;
}
// STONITH_OP_FENCE
static xmlNode *
handle_fence_request(pcmk__request_t *request)
{
if ((request->peer != NULL) || stand_alone) {
fence_locally(request->xml, &request->result);
} else if (pcmk_is_set(request->call_options, st_opt_manual_ack)) {
switch (fenced_handle_manual_confirmation(request->ipc_client,
request->xml)) {
case pcmk_rc_ok:
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE,
NULL);
break;
case EINPROGRESS:
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING,
NULL);
break;
default:
fenced_set_protocol_error(&request->result);
break;
}
} else {
const char *alternate_host = NULL;
xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request->xml,
LOG_TRACE);
const char *target = crm_element_value(dev, F_STONITH_TARGET);
const char *action = crm_element_value(dev, F_STONITH_ACTION);
const char *device = crm_element_value(dev, F_STONITH_DEVICE);
if (request->ipc_client != NULL) {
int tolerance = 0;
crm_notice("Client %s wants to fence (%s) %s using %s",
pcmk__request_origin(request), action,
target, (device? device : "any device"));
crm_element_value_int(dev, F_STONITH_TOLERANCE, &tolerance);
if (stonith_check_fence_tolerance(tolerance, target, action)) {
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE,
NULL);
return fenced_construct_reply(request->xml, NULL,
&request->result);
}
alternate_host = check_alternate_host(target);
} else {
crm_notice("Peer %s wants to fence (%s) '%s' with device '%s'",
request->peer, action, target,
(device == NULL)? "(any)" : device);
}
if (alternate_host != NULL) {
const char *client_id = NULL;
remote_fencing_op_t *op = NULL;
if (request->ipc_client->id == 0) {
client_id = crm_element_value(request->xml, F_STONITH_CLIENTID);
} else {
client_id = request->ipc_client->id;
}
/* Create a duplicate fencing operation to relay with the client ID.
* When a query response is received, this operation should be
* deleted to avoid keeping the duplicate around.
*/
op = create_remote_stonith_op(client_id, request->xml, FALSE);
crm_xml_add(request->xml, F_STONITH_OPERATION, STONITH_OP_RELAY);
crm_xml_add(request->xml, F_STONITH_CLIENTID,
request->ipc_client->id);
crm_xml_add(request->xml, F_STONITH_REMOTE_OP_ID, op->id);
send_cluster_message(crm_get_peer(0, alternate_host),
crm_msg_stonith_ng, request->xml, FALSE);
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING,
NULL);
} else if (initiate_remote_stonith_op(request->ipc_client, request->xml,
FALSE) == NULL) {
fenced_set_protocol_error(&request->result);
} else {
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING,
NULL);
}
}
if (request->result.execution_status == PCMK_EXEC_PENDING) {
return NULL;
}
return fenced_construct_reply(request->xml, NULL, &request->result);
}
// STONITH_OP_FENCE_HISTORY
static xmlNode *
handle_history_request(pcmk__request_t *request)
{
xmlNode *reply = NULL;
xmlNode *data = NULL;
stonith_fence_history(request->xml, &data, request->peer,
request->call_options);
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
if (!pcmk_is_set(request->call_options, st_opt_discard_reply)) {
/* When the local node broadcasts its history, it sets
* st_opt_discard_reply and doesn't need a reply.
*/
reply = fenced_construct_reply(request->xml, data, &request->result);
}
free_xml(data);
return reply;
}
// STONITH_OP_DEVICE_ADD
static xmlNode *
handle_device_add_request(pcmk__request_t *request)
{
const char *op = crm_element_value(request->xml, F_STONITH_OPERATION);
xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, request->xml,
LOG_ERR);
if (is_privileged(request->ipc_client, op)) {
int rc = stonith_device_register(dev, FALSE);
pcmk__set_result(&request->result,
((rc == pcmk_ok)? CRM_EX_OK : CRM_EX_ERROR),
stonith__legacy2status(rc),
((rc == pcmk_ok)? NULL : pcmk_strerror(rc)));
} else {
pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV,
PCMK_EXEC_INVALID,
"Unprivileged users must register device via CIB");
}
fenced_send_device_notification(op, &request->result,
(dev == NULL)? NULL : ID(dev));
return fenced_construct_reply(request->xml, NULL, &request->result);
}
// STONITH_OP_DEVICE_DEL
static xmlNode *
handle_device_delete_request(pcmk__request_t *request)
{
xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, request->xml,
LOG_ERR);
const char *device_id = crm_element_value(dev, XML_ATTR_ID);
const char *op = crm_element_value(request->xml, F_STONITH_OPERATION);
if (is_privileged(request->ipc_client, op)) {
stonith_device_remove(device_id, false);
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
} else {
pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV,
PCMK_EXEC_INVALID,
"Unprivileged users must delete device via CIB");
}
fenced_send_device_notification(op, &request->result, device_id);
return fenced_construct_reply(request->xml, NULL, &request->result);
}
// STONITH_OP_LEVEL_ADD
static xmlNode *
handle_level_add_request(pcmk__request_t *request)
{
char *desc = NULL;
const char *op = crm_element_value(request->xml, F_STONITH_OPERATION);
if (is_privileged(request->ipc_client, op)) {
fenced_register_level(request->xml, &desc, &request->result);
} else {
unpack_level_request(request->xml, NULL, NULL, NULL, &desc);
pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV,
PCMK_EXEC_INVALID,
"Unprivileged users must add level via CIB");
}
fenced_send_level_notification(op, &request->result, desc);
free(desc);
return fenced_construct_reply(request->xml, NULL, &request->result);
}
// STONITH_OP_LEVEL_DEL
static xmlNode *
handle_level_delete_request(pcmk__request_t *request)
{
char *desc = NULL;
const char *op = crm_element_value(request->xml, F_STONITH_OPERATION);
if (is_privileged(request->ipc_client, op)) {
fenced_unregister_level(request->xml, &desc, &request->result);
} else {
unpack_level_request(request->xml, NULL, NULL, NULL, &desc);
pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV,
PCMK_EXEC_INVALID,
"Unprivileged users must delete level via CIB");
}
fenced_send_level_notification(op, &request->result, desc);
free(desc);
return fenced_construct_reply(request->xml, NULL, &request->result);
}
// CRM_OP_RM_NODE_CACHE
static xmlNode *
handle_cache_request(pcmk__request_t *request)
{
int node_id = 0;
const char *name = NULL;
crm_element_value_int(request->xml, XML_ATTR_ID, &node_id);
name = crm_element_value(request->xml, XML_ATTR_UNAME);
reap_crm_member(node_id, name);
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
return NULL;
}
static xmlNode *
handle_unknown_request(pcmk__request_t *request)
{
crm_err("Unknown IPC request %s from %s %s",
request->op, pcmk__request_origin_type(request),
pcmk__request_origin(request));
pcmk__format_result(&request->result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID,
"Unknown IPC request type '%s' (bug?)", request->op);
return fenced_construct_reply(request->xml, NULL, &request->result);
}
static void
fenced_register_handlers(void)
{
pcmk__server_command_t handlers[] = {
{ CRM_OP_REGISTER, handle_register_request },
{ STONITH_OP_EXEC, handle_agent_request },
{ STONITH_OP_TIMEOUT_UPDATE, handle_update_timeout_request },
{ STONITH_OP_QUERY, handle_query_request },
{ T_STONITH_NOTIFY, handle_notify_request },
{ STONITH_OP_RELAY, handle_relay_request },
{ STONITH_OP_FENCE, handle_fence_request },
{ STONITH_OP_FENCE_HISTORY, handle_history_request },
{ STONITH_OP_DEVICE_ADD, handle_device_add_request },
{ STONITH_OP_DEVICE_DEL, handle_device_delete_request },
{ STONITH_OP_LEVEL_ADD, handle_level_add_request },
{ STONITH_OP_LEVEL_DEL, handle_level_delete_request },
{ CRM_OP_RM_NODE_CACHE, handle_cache_request },
{ NULL, handle_unknown_request },
};
fenced_handlers = pcmk__register_handlers(handlers);
}
void
fenced_unregister_handlers(void)
{
if (fenced_handlers != NULL) {
g_hash_table_destroy(fenced_handlers);
fenced_handlers = NULL;
}
}
static void
handle_request(pcmk__request_t *request)
{
xmlNode *reply = NULL;
const char *reason = NULL;
if (fenced_handlers == NULL) {
fenced_register_handlers();
}
reply = pcmk__process_request(request, fenced_handlers);
if (reply != NULL) {
if (pcmk_is_set(request->flags, pcmk__request_reuse_options)
&& (request->ipc_client != NULL)) {
/* Certain IPC-only commands must reuse the call options from the
* original request rather than the ones set by stonith_send_reply()
* -> do_local_reply().
*/
pcmk__ipc_send_xml(request->ipc_client, request->ipc_id, reply,
request->ipc_flags);
request->ipc_client->request_id = 0;
} else {
stonith_send_reply(reply, request->call_options,
request->peer, request->ipc_client);
}
free_xml(reply);
}
reason = request->result.exit_reason;
crm_debug("Processed %s request from %s %s: %s%s%s%s",
request->op, pcmk__request_origin_type(request),
pcmk__request_origin(request),
pcmk_exec_status_str(request->result.execution_status),
(reason == NULL)? "" : " (",
(reason == NULL)? "" : reason,
(reason == NULL)? "" : ")");
}
static void
handle_reply(pcmk__client_t *client, xmlNode *request, const char *remote_peer)
{
// Copy, because request might be freed before we want to log this
char *op = crm_element_value_copy(request, F_STONITH_OPERATION);
if (pcmk__str_eq(op, STONITH_OP_QUERY, pcmk__str_none)) {
process_remote_stonith_query(request);
} else if (pcmk__str_any_of(op, T_STONITH_NOTIFY, STONITH_OP_FENCE, NULL)) {
fenced_process_fencing_reply(request);
} else {
crm_err("Ignoring unknown %s reply from %s %s",
pcmk__s(op, "untyped"), ((client == NULL)? "peer" : "client"),
((client == NULL)? remote_peer : pcmk__client_name(client)));
crm_log_xml_warn(request, "UnknownOp");
free(op);
return;
}
crm_debug("Processed %s reply from %s %s",
op, ((client == NULL)? "peer" : "client"),
((client == NULL)? remote_peer : pcmk__client_name(client)));
free(op);
}
/*!
* \internal
* \brief Handle a message from an IPC client or CPG peer
*
* \param[in,out] client If not NULL, IPC client that sent message
* \param[in] id If from IPC client, IPC message ID
* \param[in] flags Message flags
* \param[in,out] message Message XML
* \param[in] remote_peer If not NULL, CPG peer that sent message
*/
void
stonith_command(pcmk__client_t *client, uint32_t id, uint32_t flags,
xmlNode *message, const char *remote_peer)
{
int call_options = st_opt_none;
bool is_reply = false;
CRM_CHECK(message != NULL, return);
if (get_xpath_object("//" T_STONITH_REPLY, message, LOG_NEVER) != NULL) {
is_reply = true;
}
crm_element_value_int(message, F_STONITH_CALLOPTS, &call_options);
crm_debug("Processing %ssynchronous %s %s %u from %s %s",
pcmk_is_set(call_options, st_opt_sync_call)? "" : "a",
crm_element_value(message, F_STONITH_OPERATION),
(is_reply? "reply" : "request"), id,
((client == NULL)? "peer" : "client"),
((client == NULL)? remote_peer : pcmk__client_name(client)));
if (pcmk_is_set(call_options, st_opt_sync_call)) {
CRM_ASSERT(client == NULL || client->request_id == id);
}
if (is_reply) {
handle_reply(client, message, remote_peer);
} else {
pcmk__request_t request = {
.ipc_client = client,
.ipc_id = id,
.ipc_flags = flags,
.peer = remote_peer,
.xml = message,
.call_options = call_options,
.result = PCMK__UNKNOWN_RESULT,
};
request.op = crm_element_value_copy(request.xml, F_STONITH_OPERATION);
CRM_CHECK(request.op != NULL, return);
if (pcmk_is_set(request.call_options, st_opt_sync_call)) {
pcmk__set_request_flags(&request, pcmk__request_sync);
}
handle_request(&request);
pcmk__reset_request(&request);
}
}
diff --git a/include/crm/common/options_internal.h b/include/crm/common/options_internal.h
index b4d937e0eb..5f5be45e4e 100644
--- a/include/crm/common/options_internal.h
+++ b/include/crm/common/options_internal.h
@@ -1,151 +1,151 @@
/*
* Copyright 2006-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef PCMK__OPTIONS_INTERNAL__H
# define PCMK__OPTIONS_INTERNAL__H
# ifndef PCMK__CONFIG_H
# define PCMK__CONFIG_H
# include <config.h> // _Noreturn
# endif
# include <glib.h> // GHashTable
# include <stdbool.h> // bool
-#include <crm/common/strings_internal.h> // pcmk__parse_interval_spec()
+#include <crm/common/util.h> // pcmk_parse_interval_spec()
_Noreturn void pcmk__cli_help(char cmd);
/*
* Environment variable option handling
*/
const char *pcmk__env_option(const char *option);
void pcmk__set_env_option(const char *option, const char *value, bool compat);
bool pcmk__env_option_enabled(const char *daemon, const char *option);
/*
* Cluster option handling
*/
typedef struct pcmk__cluster_option_s {
const char *name;
const char *alt_name;
const char *type;
const char *values;
const char *default_value;
bool (*is_valid)(const char *);
const char *description_short;
const char *description_long;
} pcmk__cluster_option_t;
const char *pcmk__cluster_option(GHashTable *options,
const pcmk__cluster_option_t *option_list,
int len, const char *name);
gchar *pcmk__format_option_metadata(const char *name, const char *desc_short,
const char *desc_long,
pcmk__cluster_option_t *option_list,
int len);
void pcmk__validate_cluster_options(GHashTable *options,
pcmk__cluster_option_t *option_list,
int len);
bool pcmk__valid_interval_spec(const char *value);
bool pcmk__valid_boolean(const char *value);
bool pcmk__valid_int(const char *value);
bool pcmk__valid_positive_int(const char *value);
bool pcmk__valid_no_quorum_policy(const char *value);
bool pcmk__valid_percentage(const char *value);
bool pcmk__valid_script(const char *value);
// from watchdog.c
long pcmk__get_sbd_timeout(void);
bool pcmk__get_sbd_sync_resource_startup(void);
long pcmk__auto_watchdog_timeout(void);
bool pcmk__valid_sbd_timeout(const char *value);
// Constants for environment variable names
#define PCMK__ENV_AUTHKEY_LOCATION "authkey_location"
#define PCMK__ENV_BLACKBOX "blackbox"
#define PCMK__ENV_CALLGRIND_ENABLED "callgrind_enabled"
#define PCMK__ENV_CLUSTER_TYPE "cluster_type"
#define PCMK__ENV_DEBUG "debug"
#define PCMK__ENV_DH_MAX_BITS "dh_max_bits"
#define PCMK__ENV_DH_MIN_BITS "dh_min_bits"
#define PCMK__ENV_FAIL_FAST "fail_fast"
#define PCMK__ENV_IPC_BUFFER "ipc_buffer"
#define PCMK__ENV_IPC_TYPE "ipc_type"
#define PCMK__ENV_LOGFACILITY "logfacility"
#define PCMK__ENV_LOGFILE "logfile"
#define PCMK__ENV_LOGFILE_MODE "logfile_mode"
#define PCMK__ENV_LOGPRIORITY "logpriority"
#define PCMK__ENV_NODE_ACTION_LIMIT "node_action_limit"
#define PCMK__ENV_NODE_START_STATE "node_start_state"
#define PCMK__ENV_PANIC_ACTION "panic_action"
#define PCMK__ENV_PHYSICAL_HOST "physical_host"
#define PCMK__ENV_REMOTE_ADDRESS "remote_address"
#define PCMK__ENV_REMOTE_SCHEMA_DIR "remote_schema_directory"
#define PCMK__ENV_REMOTE_PID1 "remote_pid1"
#define PCMK__ENV_REMOTE_PORT "remote_port"
#define PCMK__ENV_RESPAWNED "respawned"
#define PCMK__ENV_SCHEMA_DIRECTORY "schema_directory"
#define PCMK__ENV_SERVICE "service"
#define PCMK__ENV_STDERR "stderr"
#define PCMK__ENV_TLS_PRIORITIES "tls_priorities"
#define PCMK__ENV_TRACE_BLACKBOX "trace_blackbox"
#define PCMK__ENV_TRACE_FILES "trace_files"
#define PCMK__ENV_TRACE_FORMATS "trace_formats"
#define PCMK__ENV_TRACE_FUNCTIONS "trace_functions"
#define PCMK__ENV_TRACE_TAGS "trace_tags"
#define PCMK__ENV_VALGRIND_ENABLED "valgrind_enabled"
// @COMPAT Drop at 3.0.0; default is plenty
#define PCMK__ENV_CIB_TIMEOUT "cib_timeout"
// @COMPAT Drop at 3.0.0; likely last used in 1.1.24
#define PCMK__ENV_MCP "mcp"
// @COMPAT Drop at 3.0.0; added unused in 1.1.9
#define PCMK__ENV_QUORUM_TYPE "quorum_type"
/* @COMPAT Drop at 3.0.0; added to debug shutdown issues when Pacemaker is
* managed by systemd, but no longer useful.
*/
#define PCMK__ENV_SHUTDOWN_DELAY "shutdown_delay"
// @COMPAT Deprecated since 2.1.0
#define PCMK__OPT_REMOVE_AFTER_STOP "remove-after-stop"
// Constants for meta-attribute names
#define PCMK__META_ALLOW_UNHEALTHY_NODES "allow-unhealthy-nodes"
// Constants for enumerated values for various options
#define PCMK__VALUE_CLUSTER "cluster"
#define PCMK__VALUE_CUSTOM "custom"
#define PCMK__VALUE_FENCING "fencing"
#define PCMK__VALUE_GREEN "green"
#define PCMK__VALUE_LOCAL "local"
#define PCMK__VALUE_MIGRATE_ON_RED "migrate-on-red"
#define PCMK__VALUE_NONE "none"
#define PCMK__VALUE_NOTHING "nothing"
#define PCMK__VALUE_ONLY_GREEN "only-green"
#define PCMK__VALUE_PROGRESSIVE "progressive"
#define PCMK__VALUE_QUORUM "quorum"
#define PCMK__VALUE_RED "red"
#define PCMK__VALUE_UNFENCING "unfencing"
#define PCMK__VALUE_YELLOW "yellow"
#endif // PCMK__OPTIONS_INTERNAL__H
diff --git a/include/crm/common/strings_internal.h b/include/crm/common/strings_internal.h
index 5c5e36a1a1..e5e1758b0a 100644
--- a/include/crm/common/strings_internal.h
+++ b/include/crm/common/strings_internal.h
@@ -1,215 +1,214 @@
/*
* Copyright 2015-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef PCMK__STRINGS_INTERNAL__H
#define PCMK__STRINGS_INTERNAL__H
#include <stdbool.h> // bool
#include <glib.h> // guint, GList, GHashTable
/* internal constants for generic string functions (from strings.c) */
#define PCMK__PARSE_INT_DEFAULT -1
#define PCMK__PARSE_DBL_DEFAULT -1.0
/* internal generic string functions (from strings.c) */
enum pcmk__str_flags {
pcmk__str_none = 0,
pcmk__str_casei = 1 << 0,
pcmk__str_null_matches = 1 << 1,
pcmk__str_regex = 1 << 2,
pcmk__str_star_matches = 1 << 3,
};
int pcmk__scan_double(const char *text, double *result,
const char *default_text, char **end_text);
int pcmk__guint_from_hash(GHashTable *table, const char *key, guint default_val,
guint *result);
-int pcmk__parse_interval_spec(const char *input, guint *result_ms);
bool pcmk__starts_with(const char *str, const char *prefix);
bool pcmk__ends_with(const char *s, const char *match);
bool pcmk__ends_with_ext(const char *s, const char *match);
char *pcmk__trim(char *str);
void pcmk__add_separated_word(GString **list, size_t init_size,
const char *word, const char *separator);
int pcmk__compress(const char *data, unsigned int length, unsigned int max,
char **result, unsigned int *result_len);
int pcmk__scan_ll(const char *text, long long *result, long long default_value);
int pcmk__scan_min_int(const char *text, int *result, int minimum);
int pcmk__scan_port(const char *text, int *port);
int pcmk__parse_ll_range(const char *srcstring, long long *start, long long *end);
GHashTable *pcmk__strkey_table(GDestroyNotify key_destroy_func,
GDestroyNotify value_destroy_func);
GHashTable *pcmk__strikey_table(GDestroyNotify key_destroy_func,
GDestroyNotify value_destroy_func);
GHashTable *pcmk__str_table_dup(GHashTable *old_table);
/*!
* \internal
* \brief Get a string value with a default if NULL
*
* \param[in] s String to return if non-NULL
* \param[in] default_value String (or NULL) to return if \p s is NULL
*
* \return \p s if \p s is non-NULL, otherwise \p default_value
*/
static inline const char *
pcmk__s(const char *s, const char *default_value)
{
return (s == NULL)? default_value : s;
}
/*!
* \internal
* \brief Create a hash table with integer keys
*
* \param[in] value_destroy_func Function to free a value
*
* \return Newly allocated hash table
* \note It is the caller's responsibility to free the result, using
* g_hash_table_destroy().
*/
static inline GHashTable *
pcmk__intkey_table(GDestroyNotify value_destroy_func)
{
return g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL,
value_destroy_func);
}
/*!
* \internal
* \brief Insert a value into a hash table with integer keys
*
* \param[in,out] hash_table Table to insert into
* \param[in] key Integer key to insert
* \param[in] value Value to insert
*
* \return Whether the key/value was already in the table
* \note This has the same semantics as g_hash_table_insert(). If the key
* already exists in the table, the old value is freed and replaced.
*/
static inline gboolean
pcmk__intkey_table_insert(GHashTable *hash_table, int key, gpointer value)
{
return g_hash_table_insert(hash_table, GINT_TO_POINTER(key), value);
}
/*!
* \internal
* \brief Look up a value in a hash table with integer keys
*
* \param[in] hash_table Table to check
* \param[in] key Integer key to look for
*
* \return Value in table for \key (or NULL if not found)
*/
static inline gpointer
pcmk__intkey_table_lookup(GHashTable *hash_table, int key)
{
return g_hash_table_lookup(hash_table, GINT_TO_POINTER(key));
}
/*!
* \internal
* \brief Remove a key/value from a hash table with integer keys
*
* \param[in,out] hash_table Table to modify
* \param[in] key Integer key of entry to remove
*
* \return Whether \p key was found and removed from \p hash_table
*/
static inline gboolean
pcmk__intkey_table_remove(GHashTable *hash_table, int key)
{
return g_hash_table_remove(hash_table, GINT_TO_POINTER(key));
}
gboolean pcmk__str_in_list(const gchar *s, const GList *lst, uint32_t flags);
bool pcmk__strcase_any_of(const char *s, ...) G_GNUC_NULL_TERMINATED;
bool pcmk__str_any_of(const char *s, ...) G_GNUC_NULL_TERMINATED;
bool pcmk__char_in_any_str(int ch, ...) G_GNUC_NULL_TERMINATED;
int pcmk__strcmp(const char *s1, const char *s2, uint32_t flags);
int pcmk__numeric_strcasecmp(const char *s1, const char *s2);
void pcmk__str_update(char **str, const char *value);
void pcmk__g_strcat(GString *buffer, ...) G_GNUC_NULL_TERMINATED;
static inline bool
pcmk__str_eq(const char *s1, const char *s2, uint32_t flags)
{
return pcmk__strcmp(s1, s2, flags) == 0;
}
// Like pcmk__add_separated_word() but using a space as separator
static inline void
pcmk__add_word(GString **list, size_t init_size, const char *word)
{
return pcmk__add_separated_word(list, init_size, word, " ");
}
/* Correctly displaying singular or plural is complicated; consider "1 node has"
* vs. "2 nodes have". A flexible solution is to pluralize entire strings, e.g.
*
* if (a == 1) {
* crm_info("singular message"):
* } else {
* crm_info("plural message");
* }
*
* though even that's not sufficient for all languages besides English (if we
* ever desire to do translations of output and log messages). But the following
* convenience macros are "good enough" and more concise for many cases.
*/
/* Example:
* crm_info("Found %d %s", nentries,
* pcmk__plural_alt(nentries, "entry", "entries"));
*/
#define pcmk__plural_alt(i, s1, s2) (((i) == 1)? (s1) : (s2))
// Example: crm_info("Found %d node%s", nnodes, pcmk__plural_s(nnodes));
#define pcmk__plural_s(i) pcmk__plural_alt(i, "", "s")
static inline int
pcmk__str_empty(const char *s)
{
return (s == NULL) || (s[0] == '\0');
}
static inline char *
pcmk__itoa(int an_int)
{
return crm_strdup_printf("%d", an_int);
}
static inline char *
pcmk__ftoa(double a_float)
{
return crm_strdup_printf("%f", a_float);
}
static inline char *
pcmk__ttoa(time_t epoch_time)
{
return crm_strdup_printf("%lld", (long long) epoch_time);
}
// note this returns const not allocated
static inline const char *
pcmk__btoa(bool condition)
{
return condition? "true" : "false";
}
#endif /* PCMK__STRINGS_INTERNAL__H */
diff --git a/include/crm/common/util.h b/include/crm/common/util.h
index 24a93c92f6..5bc9eee8ef 100644
--- a/include/crm/common/util.h
+++ b/include/crm/common/util.h
@@ -1,129 +1,131 @@
/*
* Copyright 2004-2023 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef PCMK__CRM_COMMON_UTIL__H
# define PCMK__CRM_COMMON_UTIL__H
# include <sys/types.h> // gid_t, mode_t, size_t, time_t, uid_t
# include <stdlib.h>
# include <stdbool.h>
# include <stdint.h> // uint32_t
# include <limits.h>
# include <signal.h>
# include <glib.h>
# include <crm/common/acl.h>
# include <crm/common/actions.h>
# include <crm/common/agents.h>
# include <crm/common/results.h>
#ifdef __cplusplus
extern "C" {
#endif
/**
* \file
* \brief Utility functions
* \ingroup core
*/
# define ONLINESTATUS "online" // Status of an online client
# define OFFLINESTATUS "offline" // Status of an offline client
/* public node attribute functions (from attrd_client.c) */
char *pcmk_promotion_score_name(const char *rsc_id);
/* public Pacemaker Remote functions (from remote.c) */
int crm_default_remote_port(void);
/* public score-related functions (from scores.c) */
const char *pcmk_readable_score(int score);
int char2score(const char *score);
int pcmk__add_scores(int score1, int score2);
/* public string functions (from strings.c) */
gboolean crm_is_true(const char *s);
int crm_str_to_boolean(const char *s, int *ret);
long long crm_get_msec(const char *input);
char * crm_strip_trailing_newline(char *str);
char *crm_strdup_printf(char const *format, ...) G_GNUC_PRINTF(1, 2);
+int pcmk_parse_interval_spec(const char *input, guint *result_ms);
+
int compare_version(const char *version1, const char *version2);
/* coverity[+kill] */
void crm_abort(const char *file, const char *function, int line,
const char *condition, gboolean do_core, gboolean do_fork);
/*!
* \brief Check whether any of specified flags are set in a flag group
*
* \param[in] flag_group The flag group being examined
* \param[in] flags_to_check Which flags in flag_group should be checked
*
* \return true if \p flags_to_check is nonzero and any of its flags are set in
* \p flag_group, or false otherwise
*/
static inline bool
pcmk_any_flags_set(uint64_t flag_group, uint64_t flags_to_check)
{
return (flag_group & flags_to_check) != 0;
}
/*!
* \brief Check whether all of specified flags are set in a flag group
*
* \param[in] flag_group The flag group being examined
* \param[in] flags_to_check Which flags in flag_group should be checked
*
* \return true if \p flags_to_check is zero or all of its flags are set in
* \p flag_group, or false otherwise
*/
static inline bool
pcmk_all_flags_set(uint64_t flag_group, uint64_t flags_to_check)
{
return (flag_group & flags_to_check) == flags_to_check;
}
/*!
* \brief Convenience alias for pcmk_all_flags_set(), to check single flag
*/
#define pcmk_is_set(g, f) pcmk_all_flags_set((g), (f))
char *crm_meta_name(const char *field);
const char *crm_meta_value(GHashTable * hash, const char *field);
char *crm_md5sum(const char *buffer);
char *crm_generate_uuid(void);
// This belongs in ipc.h but is here for backward compatibility
bool crm_is_daemon_name(const char *name);
int crm_user_lookup(const char *name, uid_t * uid, gid_t * gid);
int pcmk_daemon_user(uid_t *uid, gid_t *gid);
#ifdef HAVE_GNUTLS_GNUTLS_H
void crm_gnutls_global_init(void);
#endif
char *pcmk_hostname(void);
bool pcmk_str_is_infinity(const char *s);
bool pcmk_str_is_minus_infinity(const char *s);
#if !defined(PCMK_ALLOW_DEPRECATED) || (PCMK_ALLOW_DEPRECATED == 1)
#include <crm/common/util_compat.h>
#endif
#ifdef __cplusplus
}
#endif
#endif
diff --git a/include/crm/common/util_compat.h b/include/crm/common/util_compat.h
index 8d988f516e..c3ab998eed 100644
--- a/include/crm/common/util_compat.h
+++ b/include/crm/common/util_compat.h
@@ -1,174 +1,174 @@
/*
* Copyright 2004-2023 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef PCMK__CRM_COMMON_UTIL_COMPAT__H
# define PCMK__CRM_COMMON_UTIL_COMPAT__H
# include <glib.h>
# include <libxml/tree.h>
# include <crm/common/util.h>
#ifdef __cplusplus
extern "C" {
#endif
/**
* \file
* \brief Deprecated Pacemaker utilities
* \ingroup core
* \deprecated Do not include this header directly. The utilities in this
* header, and the header itself, will be removed in a future
* release.
*/
//! \deprecated Do not use
#define crm_get_interval crm_parse_interval_spec
//! \deprecated Do not use
#define CRM_DEFAULT_OP_TIMEOUT_S "20s"
//! \deprecated Use !pcmk_is_set() or !pcmk_all_flags_set() instead
static inline gboolean
is_not_set(long long word, long long bit)
{
return ((word & bit) == 0);
}
//! \deprecated Use pcmk_is_set() or pcmk_all_flags_set() instead
static inline gboolean
is_set(long long word, long long bit)
{
return ((word & bit) == bit);
}
//! \deprecated Use pcmk_any_flags_set() instead
static inline gboolean
is_set_any(long long word, long long bit)
{
return ((word & bit) != 0);
}
//! \deprecated Use strcmp() or strcasecmp() instead
gboolean crm_str_eq(const char *a, const char *b, gboolean use_case);
//! \deprecated Use strcmp() instead
gboolean safe_str_neq(const char *a, const char *b);
//! \deprecated Use strcasecmp() instead
#define safe_str_eq(a, b) crm_str_eq(a, b, FALSE)
//! \deprecated Use snprintf() instead
char *crm_itoa_stack(int an_int, char *buf, size_t len);
//! \deprecated Use sscanf() instead
int pcmk_scan_nvpair(const char *input, char **name, char **value);
//! \deprecated Use a standard printf()-style function instead
char *pcmk_format_nvpair(const char *name, const char *value,
const char *units);
//! \deprecated Use \c crm_xml_add() or \c xml_remove_prop() instead
const char *crm_xml_replace(xmlNode *node, const char *name, const char *value);
//! \deprecated Use a standard printf()-style function instead
char *pcmk_format_named_time(const char *name, time_t epoch_time);
//! \deprecated Use strtoll() instead
long long crm_parse_ll(const char *text, const char *default_text);
//! \deprecated Use strtoll() instead
int crm_parse_int(const char *text, const char *default_text);
//! \deprecated Use strtoll() instead
# define crm_atoi(text, default_text) crm_parse_int(text, default_text)
//! \deprecated Use g_str_hash() instead
guint g_str_hash_traditional(gconstpointer v);
//! \deprecated Use g_str_hash() instead
#define crm_str_hash g_str_hash_traditional
//! \deprecated Do not use Pacemaker for generic string comparison
gboolean crm_strcase_equal(gconstpointer a, gconstpointer b);
//! \deprecated Do not use Pacemaker for generic string manipulation
guint crm_strcase_hash(gconstpointer v);
//! \deprecated Use g_hash_table_new_full() instead
static inline GHashTable *
crm_str_table_new(void)
{
return g_hash_table_new_full(crm_str_hash, g_str_equal, free, free);
}
//! \deprecated Use g_hash_table_new_full() instead
static inline GHashTable *
crm_strcase_table_new(void)
{
return g_hash_table_new_full(crm_strcase_hash, crm_strcase_equal,
free, free);
}
//! \deprecated Do not use Pacemaker for generic hash table manipulation
GHashTable *crm_str_table_dup(GHashTable *old_table);
//! \deprecated Use g_hash_able_size() instead
static inline guint
crm_hash_table_size(GHashTable *hashtable)
{
if (hashtable == NULL) {
return 0;
}
return g_hash_table_size(hashtable);
}
//! \deprecated Don't use Pacemaker for string manipulation
char *crm_strip_trailing_newline(char *str);
//! \deprecated Don't use Pacemaker for string manipulation
int pcmk_numeric_strcasecmp(const char *s1, const char *s2);
//! \deprecated Don't use Pacemaker for string manipulation
static inline char *
crm_itoa(int an_int)
{
return crm_strdup_printf("%d", an_int);
}
//! \deprecated Don't use Pacemaker for string manipulation
static inline char *
crm_ftoa(double a_float)
{
return crm_strdup_printf("%f", a_float);
}
//! \deprecated Don't use Pacemaker for string manipulation
static inline char *
crm_ttoa(time_t epoch_time)
{
return crm_strdup_printf("%lld", (long long) epoch_time);
}
//! \deprecated Do not use Pacemaker libraries for generic I/O
void crm_build_path(const char *path_c, mode_t mode);
//! \deprecated Use pcmk_readable_score() instead
char *score2char(int score);
//! \deprecated Use pcmk_readable_score() instead
char *score2char_stack(int score, char *buf, size_t len);
-//! \deprecated Do not use
+//! \deprecated Use \c pcmk_parse_interval_spec() instead
guint crm_parse_interval_spec(const char *input);
#ifdef __cplusplus
}
#endif
#endif // PCMK__CRM_COMMON_UTIL_COMPAT__H
diff --git a/lib/common/options.c b/lib/common/options.c
index 2a53c71a34..6ce92b31c5 100644
--- a/lib/common/options.c
+++ b/lib/common/options.c
@@ -1,580 +1,580 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef _GNU_SOURCE
# define _GNU_SOURCE
#endif
#include <crm_internal.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <crm/crm.h>
void
pcmk__cli_help(char cmd)
{
if (cmd == 'v' || cmd == '$') {
printf("Pacemaker %s\n", PACEMAKER_VERSION);
printf("Written by Andrew Beekhof and "
"the Pacemaker project contributors\n");
} else if (cmd == '!') {
printf("Pacemaker %s (Build: %s): %s\n", PACEMAKER_VERSION, BUILD_VERSION, CRM_FEATURES);
}
crm_exit(CRM_EX_OK);
while(1); // above does not return
}
/*
* Environment variable option handling
*/
/*!
* \internal
* \brief Get the value of a Pacemaker environment variable option
*
* If an environment variable option is set, with either a PCMK_ or (for
* backward compatibility) HA_ prefix, log and return the value.
*
* \param[in] option Environment variable name (without prefix)
*
* \return Value of environment variable option, or NULL in case of
* option name too long or value not found
*/
const char *
pcmk__env_option(const char *option)
{
const char *const prefixes[] = {"PCMK_", "HA_"};
char env_name[NAME_MAX];
const char *value = NULL;
CRM_CHECK(!pcmk__str_empty(option), return NULL);
for (int i = 0; i < PCMK__NELEM(prefixes); i++) {
int rv = snprintf(env_name, NAME_MAX, "%s%s", prefixes[i], option);
if (rv < 0) {
crm_err("Failed to write %s%s to buffer: %s", prefixes[i], option,
strerror(errno));
return NULL;
}
if (rv >= sizeof(env_name)) {
crm_trace("\"%s%s\" is too long", prefixes[i], option);
continue;
}
value = getenv(env_name);
if (value != NULL) {
crm_trace("Found %s = %s", env_name, value);
return value;
}
}
crm_trace("Nothing found for %s", option);
return NULL;
}
/*!
* \brief Set or unset a Pacemaker environment variable option
*
* Set an environment variable option with a \c "PCMK_" prefix and optionally
* an \c "HA_" prefix for backward compatibility.
*
* \param[in] option Environment variable name (without prefix)
* \param[in] value New value (or NULL to unset)
* \param[in] compat If false and \p value is not \c NULL, set only
* \c "PCMK_<option>"; otherwise, set (or unset) both
* \c "PCMK_<option>" and \c "HA_<option>"
*
* \note \p compat is ignored when \p value is \c NULL. A \c NULL \p value
* means we're unsetting \p option. \c pcmk__get_env_option() checks for
* both prefixes, so we want to clear them both.
*/
void
pcmk__set_env_option(const char *option, const char *value, bool compat)
{
// @COMPAT Drop support for "HA_" options eventually
const char *const prefixes[] = {"PCMK_", "HA_"};
char env_name[NAME_MAX];
CRM_CHECK(!pcmk__str_empty(option) && (strchr(option, '=') == NULL),
return);
for (int i = 0; i < PCMK__NELEM(prefixes); i++) {
int rv = snprintf(env_name, NAME_MAX, "%s%s", prefixes[i], option);
if (rv < 0) {
crm_err("Failed to write %s%s to buffer: %s", prefixes[i], option,
strerror(errno));
return;
}
if (rv >= sizeof(env_name)) {
crm_trace("\"%s%s\" is too long", prefixes[i], option);
continue;
}
if (value != NULL) {
crm_trace("Setting %s to %s", env_name, value);
rv = setenv(env_name, value, 1);
} else {
crm_trace("Unsetting %s", env_name);
rv = unsetenv(env_name);
}
if (rv < 0) {
crm_err("Failed to %sset %s: %s", (value != NULL)? "" : "un",
env_name, strerror(errno));
}
if (!compat && (value != NULL)) {
// For set, don't proceed to HA_<option> unless compat is enabled
break;
}
}
}
/*!
* \internal
* \brief Check whether Pacemaker environment variable option is enabled
*
* Given a Pacemaker environment variable option that can either be boolean
* or a list of daemon names, return true if the option is enabled for a given
* daemon.
*
* \param[in] daemon Daemon name (can be NULL)
* \param[in] option Pacemaker environment variable name
*
* \return true if variable is enabled for daemon, otherwise false
*/
bool
pcmk__env_option_enabled(const char *daemon, const char *option)
{
const char *value = pcmk__env_option(option);
return (value != NULL)
&& (crm_is_true(value)
|| ((daemon != NULL) && (strstr(value, daemon) != NULL)));
}
/*
* Cluster option handling
*/
/*!
* \internal
* \brief Check whether a string represents a valid interval specification
*
* \param[in] value String to validate
*
* \return \c true if \p value is a valid interval specification, or \c false
* otherwise
*/
bool
pcmk__valid_interval_spec(const char *value)
{
- return pcmk__parse_interval_spec(value, NULL) == pcmk_rc_ok;
+ return pcmk_parse_interval_spec(value, NULL) == pcmk_rc_ok;
}
/*!
* \internal
* \brief Check whether a string represents a valid boolean value
*
* \param[in] value String to validate
*
* \return \c true if \p value is a valid boolean value, or \c false otherwise
*/
bool
pcmk__valid_boolean(const char *value)
{
return crm_str_to_boolean(value, NULL) == 1;
}
/*!
* \internal
* \brief Check whether a string represents a valid integer
*
* Valid values include \c INFINITY, \c -INFINITY, and all 64-bit integers.
*
* \param[in] value String to validate
*
* \return \c true if \p value is a valid integer, or \c false otherwise
*/
bool
pcmk__valid_int(const char *value)
{
return (value != NULL)
&& (pcmk_str_is_infinity(value)
|| pcmk_str_is_minus_infinity(value)
|| (pcmk__scan_ll(value, NULL, 0LL) == pcmk_rc_ok));
}
/*!
* \internal
* \brief Check whether a string represents a valid positive integer
*
* Valid values include \c INFINITY and all 64-bit positive integers.
*
* \param[in] value String to validate
*
* \return \c true if \p value is a valid positive integer, or \c false
* otherwise
*/
bool
pcmk__valid_positive_int(const char *value)
{
long long num = 0LL;
return pcmk_str_is_infinity(value)
|| ((pcmk__scan_ll(value, &num, 0LL) == pcmk_rc_ok)
&& (num > 0));
}
/*!
* \internal
* \brief Check whether a string represents a valid
* \c PCMK__OPT_NO_QUORUM_POLICY value
*
* \param[in] value String to validate
*
* \return \c true if \p value is a valid \c PCMK__OPT_NO_QUORUM_POLICY value,
* or \c false otherwise
*/
bool
pcmk__valid_no_quorum_policy(const char *value)
{
return pcmk__strcase_any_of(value,
"stop", "freeze", "ignore", "demote", "suicide",
NULL);
}
/*!
* \internal
* \brief Check whether a string represents a valid percentage
*
* Valid values include long integers, with an optional trailing string
* beginning with '%'.
*
* \param[in] value String to validate
*
* \return \c true if \p value is a valid percentage value, or \c false
* otherwise
*/
bool
pcmk__valid_percentage(const char *value)
{
char *end = NULL;
float number = strtof(value, &end);
return ((end == NULL) || (end[0] == '%')) && (number >= 0);
}
/*!
* \internal
* \brief Check whether a string represents a valid script
*
* Valid values include \c /dev/null and paths of executable regular files
*
* \param[in] value String to validate
*
* \return \c true if \p value is a valid script, or \c false otherwise
*/
bool
pcmk__valid_script(const char *value)
{
struct stat st;
if (pcmk__str_eq(value, "/dev/null", pcmk__str_none)) {
return true;
}
if (stat(value, &st) != 0) {
crm_err("Script %s does not exist", value);
return false;
}
if (S_ISREG(st.st_mode) == 0) {
crm_err("Script %s is not a regular file", value);
return false;
}
if ((st.st_mode & (S_IXUSR | S_IXGRP)) == 0) {
crm_err("Script %s is not executable", value);
return false;
}
return true;
}
/*!
* \internal
* \brief Check a table of configured options for a particular option
*
* \param[in,out] options Name/value pairs for configured options
* \param[in] validate If not NULL, validator function for option value
* \param[in] name Option name to look for
* \param[in] old_name Alternative option name to look for
* \param[in] def_value Default to use if option not configured
*
* \return Option value (from supplied options table or default value)
*/
static const char *
cluster_option_value(GHashTable *options, bool (*validate)(const char *),
const char *name, const char *old_name,
const char *def_value)
{
const char *value = NULL;
char *new_value = NULL;
CRM_ASSERT(name != NULL);
if (options) {
value = g_hash_table_lookup(options, name);
if ((value == NULL) && old_name) {
value = g_hash_table_lookup(options, old_name);
if (value != NULL) {
pcmk__config_warn("Support for legacy name '%s' for cluster "
"option '%s' is deprecated and will be "
"removed in a future release",
old_name, name);
// Inserting copy with current name ensures we only warn once
new_value = strdup(value);
g_hash_table_insert(options, strdup(name), new_value);
value = new_value;
}
}
if (value && validate && (validate(value) == FALSE)) {
pcmk__config_err("Using default value for cluster option '%s' "
"because '%s' is invalid", name, value);
value = NULL;
}
if (value) {
return value;
}
}
// No value found, use default
value = def_value;
if (value == NULL) {
crm_trace("No value or default provided for cluster option '%s'",
name);
return NULL;
}
if (validate) {
CRM_CHECK(validate(value) != FALSE,
crm_err("Bug: default value for cluster option '%s' is invalid", name);
return NULL);
}
crm_trace("Using default value '%s' for cluster option '%s'",
value, name);
if (options) {
new_value = strdup(value);
g_hash_table_insert(options, strdup(name), new_value);
value = new_value;
}
return value;
}
/*!
* \internal
* \brief Get the value of a cluster option
*
* \param[in,out] options Name/value pairs for configured options
* \param[in] option_list Possible cluster options
* \param[in] len Length of \p option_list
* \param[in] name (Primary) option name to look for
*
* \return Option value
*/
const char *
pcmk__cluster_option(GHashTable *options,
const pcmk__cluster_option_t *option_list,
int len, const char *name)
{
const char *value = NULL;
for (int lpc = 0; lpc < len; lpc++) {
if (pcmk__str_eq(name, option_list[lpc].name, pcmk__str_casei)) {
value = cluster_option_value(options, option_list[lpc].is_valid,
option_list[lpc].name,
option_list[lpc].alt_name,
option_list[lpc].default_value);
return value;
}
}
CRM_CHECK(FALSE, crm_err("Bug: looking for unknown option '%s'", name));
return NULL;
}
/*!
* \internal
* \brief Add a description element to a meta-data string
*
* \param[in,out] s Meta-data string to add to
* \param[in] tag Name of element to add ("longdesc" or "shortdesc")
* \param[in] desc Textual description to add
* \param[in] values If not \p NULL, the allowed values for the parameter
* \param[in] spaces If not \p NULL, spaces to insert at the beginning of
* each line
*/
static void
add_desc(GString *s, const char *tag, const char *desc, const char *values,
const char *spaces)
{
char *escaped_en = crm_xml_escape(desc);
if (spaces != NULL) {
g_string_append(s, spaces);
}
pcmk__g_strcat(s, "<", tag, " lang=\"en\">", escaped_en, NULL);
if (values != NULL) {
// Append a period if desc doesn't end in "." or ".)"
if (!pcmk__str_empty(escaped_en)
&& (s->str[s->len - 1] != '.')
&& ((s->str[s->len - 2] != '.') || (s->str[s->len - 1] != ')'))) {
g_string_append_c(s, '.');
}
pcmk__g_strcat(s, " Allowed values: ", values, NULL);
g_string_append_c(s, '.');
}
pcmk__g_strcat(s, "</", tag, ">\n", NULL);
#ifdef ENABLE_NLS
{
static const char *locale = NULL;
char *localized = crm_xml_escape(_(desc));
if (strcmp(escaped_en, localized) != 0) {
if (locale == NULL) {
locale = strtok(setlocale(LC_ALL, NULL), "_");
}
if (spaces != NULL) {
g_string_append(s, spaces);
}
pcmk__g_strcat(s, "<", tag, " lang=\"", locale, "\">", localized,
NULL);
if (values != NULL) {
pcmk__g_strcat(s, _(" Allowed values: "), _(values), NULL);
}
pcmk__g_strcat(s, "</", tag, ">\n", NULL);
}
free(localized);
}
#endif
free(escaped_en);
}
gchar *
pcmk__format_option_metadata(const char *name, const char *desc_short,
const char *desc_long,
pcmk__cluster_option_t *option_list, int len)
{
/* big enough to hold "pacemaker-schedulerd metadata" output */
GString *s = g_string_sized_new(13000);
pcmk__g_strcat(s,
"<?xml version=\"1.0\"?>\n"
"<resource-agent name=\"", name, "\" "
"version=\"" PACEMAKER_VERSION "\">\n"
" <version>" PCMK_OCF_VERSION "</version>\n", NULL);
add_desc(s, "longdesc", desc_long, NULL, " ");
add_desc(s, "shortdesc", desc_short, NULL, " ");
g_string_append(s, " <parameters>\n");
for (int lpc = 0; lpc < len; lpc++) {
const char *opt_name = option_list[lpc].name;
const char *opt_type = option_list[lpc].type;
const char *opt_values = option_list[lpc].values;
const char *opt_default = option_list[lpc].default_value;
const char *opt_desc_short = option_list[lpc].description_short;
const char *opt_desc_long = option_list[lpc].description_long;
// The standard requires long and short parameter descriptions
CRM_ASSERT((opt_desc_short != NULL) || (opt_desc_long != NULL));
if (opt_desc_short == NULL) {
opt_desc_short = opt_desc_long;
} else if (opt_desc_long == NULL) {
opt_desc_long = opt_desc_short;
}
// The standard requires a parameter type
CRM_ASSERT(opt_type != NULL);
pcmk__g_strcat(s, " <parameter name=\"", opt_name, "\">\n", NULL);
add_desc(s, "longdesc", opt_desc_long, opt_values, " ");
add_desc(s, "shortdesc", opt_desc_short, NULL, " ");
pcmk__g_strcat(s, " <content type=\"", opt_type, "\"", NULL);
if (opt_default != NULL) {
pcmk__g_strcat(s, " default=\"", opt_default, "\"", NULL);
}
if ((opt_values != NULL) && (strcmp(opt_type, "select") == 0)) {
char *str = strdup(opt_values);
const char *delim = ", ";
char *ptr = strtok(str, delim);
g_string_append(s, ">\n");
while (ptr != NULL) {
pcmk__g_strcat(s, " <option value=\"", ptr, "\" />\n",
NULL);
ptr = strtok(NULL, delim);
}
g_string_append_printf(s, " </content>\n");
free(str);
} else {
g_string_append(s, "/>\n");
}
g_string_append(s, " </parameter>\n");
}
g_string_append(s, " </parameters>\n</resource-agent>\n");
return g_string_free(s, FALSE);
}
void
pcmk__validate_cluster_options(GHashTable *options,
pcmk__cluster_option_t *option_list, int len)
{
for (int lpc = 0; lpc < len; lpc++) {
cluster_option_value(options, option_list[lpc].is_valid,
option_list[lpc].name,
option_list[lpc].alt_name,
option_list[lpc].default_value);
}
}
diff --git a/lib/common/strings.c b/lib/common/strings.c
index 7f36149230..62b836c571 100644
--- a/lib/common/strings.c
+++ b/lib/common/strings.c
@@ -1,1421 +1,1420 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include "crm/common/results.h"
#include <crm_internal.h>
#ifndef _GNU_SOURCE
# define _GNU_SOURCE
#endif
#include <regex.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#include <float.h> // DBL_MIN
#include <limits.h>
#include <bzlib.h>
#include <sys/types.h>
/*!
* \internal
* \brief Scan a long long integer from a string
*
* \param[in] text String to scan
* \param[out] result If not NULL, where to store scanned value
* \param[in] default_value Value to use if text is NULL or invalid
* \param[out] end_text If not NULL, where to store pointer to first
* non-integer character
*
* \return Standard Pacemaker return code (\c pcmk_rc_ok on success,
* \c EINVAL on failed string conversion due to invalid input,
* or \c EOVERFLOW on arithmetic overflow)
* \note Sets \c errno on error
*/
static int
scan_ll(const char *text, long long *result, long long default_value,
char **end_text)
{
long long local_result = default_value;
char *local_end_text = NULL;
int rc = pcmk_rc_ok;
errno = 0;
if (text != NULL) {
local_result = strtoll(text, &local_end_text, 10);
if (errno == ERANGE) {
rc = EOVERFLOW;
crm_warn("Integer parsed from '%s' was clipped to %lld",
text, local_result);
} else if (errno != 0) {
rc = errno;
local_result = default_value;
crm_warn("Could not parse integer from '%s' (using %lld instead): "
"%s", text, default_value, pcmk_rc_str(rc));
} else if (local_end_text == text) {
rc = EINVAL;
local_result = default_value;
crm_warn("Could not parse integer from '%s' (using %lld instead): "
"No digits found", text, default_value);
}
if ((end_text == NULL) && !pcmk__str_empty(local_end_text)) {
crm_warn("Characters left over after parsing '%s': '%s'",
text, local_end_text);
}
errno = rc;
}
if (end_text != NULL) {
*end_text = local_end_text;
}
if (result != NULL) {
*result = local_result;
}
return rc;
}
/*!
* \internal
* \brief Scan a long long integer value from a string
*
* \param[in] text The string to scan (may be NULL)
* \param[out] result Where to store result (or NULL to ignore)
* \param[in] default_value Value to use if text is NULL or invalid
*
* \return Standard Pacemaker return code
*/
int
pcmk__scan_ll(const char *text, long long *result, long long default_value)
{
long long local_result = default_value;
int rc = pcmk_rc_ok;
if (text != NULL) {
rc = scan_ll(text, &local_result, default_value, NULL);
if (rc != pcmk_rc_ok) {
local_result = default_value;
}
}
if (result != NULL) {
*result = local_result;
}
return rc;
}
/*!
* \internal
* \brief Scan an integer value from a string, constrained to a minimum
*
* \param[in] text The string to scan (may be NULL)
* \param[out] result Where to store result (or NULL to ignore)
* \param[in] minimum Value to use as default and minimum
*
* \return Standard Pacemaker return code
* \note If the value is larger than the maximum integer, EOVERFLOW will be
* returned and \p result will be set to the maximum integer.
*/
int
pcmk__scan_min_int(const char *text, int *result, int minimum)
{
int rc;
long long result_ll;
rc = pcmk__scan_ll(text, &result_ll, (long long) minimum);
if (result_ll < (long long) minimum) {
crm_warn("Clipped '%s' to minimum acceptable value %d", text, minimum);
result_ll = (long long) minimum;
} else if (result_ll > INT_MAX) {
crm_warn("Clipped '%s' to maximum integer %d", text, INT_MAX);
result_ll = (long long) INT_MAX;
rc = EOVERFLOW;
}
if (result != NULL) {
*result = (int) result_ll;
}
return rc;
}
/*!
* \internal
* \brief Scan a TCP port number from a string
*
* \param[in] text The string to scan
* \param[out] port Where to store result (or NULL to ignore)
*
* \return Standard Pacemaker return code
* \note \p port will be -1 if \p text is NULL or invalid
*/
int
pcmk__scan_port(const char *text, int *port)
{
long long port_ll;
int rc = pcmk__scan_ll(text, &port_ll, -1LL);
if ((text != NULL) && (rc == pcmk_rc_ok) // wasn't default or invalid
&& ((port_ll < 0LL) || (port_ll > 65535LL))) {
crm_warn("Ignoring port specification '%s' "
"not in valid range (0-65535)", text);
rc = (port_ll < 0LL)? pcmk_rc_before_range : pcmk_rc_after_range;
port_ll = -1LL;
}
if (port != NULL) {
*port = (int) port_ll;
}
return rc;
}
/*!
* \internal
* \brief Scan a double-precision floating-point value from a string
*
* \param[in] text The string to parse
* \param[out] result Parsed value on success, or
* \c PCMK__PARSE_DBL_DEFAULT on error
* \param[in] default_text Default string to parse if \p text is
* \c NULL
* \param[out] end_text If not \c NULL, where to store a pointer
* to the position immediately after the
* value
*
* \return Standard Pacemaker return code (\c pcmk_rc_ok on success,
* \c EINVAL on failed string conversion due to invalid input,
* \c EOVERFLOW on arithmetic overflow, \c pcmk_rc_underflow
* on arithmetic underflow, or \c errno from \c strtod() on
* other parse errors)
*/
int
pcmk__scan_double(const char *text, double *result, const char *default_text,
char **end_text)
{
int rc = pcmk_rc_ok;
char *local_end_text = NULL;
CRM_ASSERT(result != NULL);
*result = PCMK__PARSE_DBL_DEFAULT;
text = (text != NULL) ? text : default_text;
if (text == NULL) {
rc = EINVAL;
crm_debug("No text and no default conversion value supplied");
} else {
errno = 0;
*result = strtod(text, &local_end_text);
if (errno == ERANGE) {
/*
* Overflow: strtod() returns +/- HUGE_VAL and sets errno to
* ERANGE
*
* Underflow: strtod() returns "a value whose magnitude is
* no greater than the smallest normalized
* positive" double. Whether ERANGE is set is
* implementation-defined.
*/
const char *over_under;
if (QB_ABS(*result) > DBL_MIN) {
rc = EOVERFLOW;
over_under = "over";
} else {
rc = pcmk_rc_underflow;
over_under = "under";
}
crm_debug("Floating-point value parsed from '%s' would %sflow "
"(using %g instead)", text, over_under, *result);
} else if (errno != 0) {
rc = errno;
// strtod() set *result = 0 on parse failure
*result = PCMK__PARSE_DBL_DEFAULT;
crm_debug("Could not parse floating-point value from '%s' (using "
"%.1f instead): %s", text, PCMK__PARSE_DBL_DEFAULT,
pcmk_rc_str(rc));
} else if (local_end_text == text) {
// errno == 0, but nothing was parsed
rc = EINVAL;
*result = PCMK__PARSE_DBL_DEFAULT;
crm_debug("Could not parse floating-point value from '%s' (using "
"%.1f instead): No digits found", text,
PCMK__PARSE_DBL_DEFAULT);
} else if (QB_ABS(*result) <= DBL_MIN) {
/*
* errno == 0 and text was parsed, but value might have
* underflowed.
*
* ERANGE might not be set for underflow. Check magnitude
* of *result, but also make sure the input number is not
* actually zero (0 <= DBL_MIN is not underflow).
*
* This check must come last. A parse failure in strtod()
* also sets *result == 0, so a parse failure would match
* this test condition prematurely.
*/
for (const char *p = text; p != local_end_text; p++) {
if (strchr("0.eE", *p) == NULL) {
rc = pcmk_rc_underflow;
crm_debug("Floating-point value parsed from '%s' would "
"underflow (using %g instead)", text, *result);
break;
}
}
} else {
crm_trace("Floating-point value parsed successfully from "
"'%s': %g", text, *result);
}
if ((end_text == NULL) && !pcmk__str_empty(local_end_text)) {
crm_debug("Characters left over after parsing '%s': '%s'",
text, local_end_text);
}
}
if (end_text != NULL) {
*end_text = local_end_text;
}
return rc;
}
/*!
* \internal
* \brief Parse a guint from a string stored in a hash table
*
* \param[in] table Hash table to search
* \param[in] key Hash table key to use to retrieve string
* \param[in] default_val What to use if key has no entry in table
* \param[out] result If not NULL, where to store parsed integer
*
* \return Standard Pacemaker return code
*/
int
pcmk__guint_from_hash(GHashTable *table, const char *key, guint default_val,
guint *result)
{
const char *value;
long long value_ll;
int rc = pcmk_rc_ok;
CRM_CHECK((table != NULL) && (key != NULL), return EINVAL);
if (result != NULL) {
*result = default_val;
}
value = g_hash_table_lookup(table, key);
if (value == NULL) {
return pcmk_rc_ok;
}
rc = pcmk__scan_ll(value, &value_ll, 0LL);
if (rc != pcmk_rc_ok) {
return rc;
}
if ((value_ll < 0) || (value_ll > G_MAXUINT)) {
crm_warn("Could not parse non-negative integer from %s", value);
return ERANGE;
}
if (result != NULL) {
*result = (guint) value_ll;
}
return pcmk_rc_ok;
}
#ifndef NUMCHARS
# define NUMCHARS "0123456789."
#endif
#ifndef WHITESPACE
# define WHITESPACE " \t\n\r\f"
#endif
/*!
* \brief Parse a time+units string and return milliseconds equivalent
*
* \param[in] input String with a nonnegative number and optional unit
* (optionally with whitespace before and/or after the
* number). If missing, the unit defaults to seconds.
*
* \return Milliseconds corresponding to string expression, or
* \c PCMK__PARSE_INT_DEFAULT on error
*/
long long
crm_get_msec(const char *input)
{
const char *num_start = NULL;
const char *units;
long long multiplier = 1000;
long long divisor = 1;
long long msec = PCMK__PARSE_INT_DEFAULT;
size_t num_len = 0;
char *end_text = NULL;
if (input == NULL) {
return PCMK__PARSE_INT_DEFAULT;
}
num_start = input + strspn(input, WHITESPACE);
num_len = strspn(num_start, NUMCHARS);
if (num_len < 1) {
return PCMK__PARSE_INT_DEFAULT;
}
units = num_start + num_len;
units += strspn(units, WHITESPACE);
if (!strncasecmp(units, "ms", 2) || !strncasecmp(units, "msec", 4)) {
multiplier = 1;
divisor = 1;
} else if (!strncasecmp(units, "us", 2) || !strncasecmp(units, "usec", 4)) {
multiplier = 1;
divisor = 1000;
} else if (!strncasecmp(units, "s", 1) || !strncasecmp(units, "sec", 3)) {
multiplier = 1000;
divisor = 1;
} else if (!strncasecmp(units, "m", 1) || !strncasecmp(units, "min", 3)) {
multiplier = 60 * 1000;
divisor = 1;
} else if (!strncasecmp(units, "h", 1) || !strncasecmp(units, "hr", 2)) {
multiplier = 60 * 60 * 1000;
divisor = 1;
} else if ((*units != '\0') && (*units != '\n') && (*units != '\r')) {
return PCMK__PARSE_INT_DEFAULT;
}
scan_ll(num_start, &msec, PCMK__PARSE_INT_DEFAULT, &end_text);
if (msec > (LLONG_MAX / multiplier)) {
// Arithmetics overflow while multiplier/divisor mutually exclusive
return LLONG_MAX;
}
msec *= multiplier;
msec /= divisor;
return msec;
}
/*!
- * \internal
* \brief Parse milliseconds from a Pacemaker interval specification
*
* \param[in] input Pacemaker time interval specification (a bare number
* of seconds; a number with a unit, optionally with
* whitespace before and/or after the number; or an ISO
* 8601 duration)
* \param[out] result_ms Where to store milliseconds equivalent of \p input on
* success (limited to the range of an unsigned integer),
* or 0 if \p input is \c NULL or invalid
*
* \return Standard Pacemaker return code (specifically, \c pcmk_rc_ok if
* \p input is valid or \c NULL, and \c EINVAL otherwise)
*/
int
-pcmk__parse_interval_spec(const char *input, guint *result_ms)
+pcmk_parse_interval_spec(const char *input, guint *result_ms)
{
long long msec = PCMK__PARSE_INT_DEFAULT;
int rc = pcmk_rc_ok;
if (input == NULL) {
msec = 0;
goto done;
}
if (input[0] == 'P') {
crm_time_t *period_s = crm_time_parse_duration(input);
if (period_s != NULL) {
msec = 1000 * crm_time_get_seconds(period_s);
crm_time_free(period_s);
}
} else {
msec = crm_get_msec(input);
}
if (msec == PCMK__PARSE_INT_DEFAULT) {
crm_warn("Using 0 instead of invalid interval specification '%s'",
input);
msec = 0;
rc = EINVAL;
}
done:
if (result_ms != NULL) {
*result_ms = (msec >= G_MAXUINT)? G_MAXUINT : (guint) msec;
}
return rc;
}
gboolean
crm_is_true(const char *s)
{
gboolean ret = FALSE;
return (crm_str_to_boolean(s, &ret) < 0)? FALSE : ret;
}
int
crm_str_to_boolean(const char *s, int *ret)
{
if (s == NULL) {
return -1;
} else if (strcasecmp(s, "true") == 0
|| strcasecmp(s, "on") == 0
|| strcasecmp(s, "yes") == 0 || strcasecmp(s, "y") == 0 || strcasecmp(s, "1") == 0) {
if (ret != NULL) {
*ret = TRUE;
}
return 1;
} else if (strcasecmp(s, "false") == 0
|| strcasecmp(s, "off") == 0
|| strcasecmp(s, "no") == 0 || strcasecmp(s, "n") == 0 || strcasecmp(s, "0") == 0) {
if (ret != NULL) {
*ret = FALSE;
}
return 1;
}
return -1;
}
/*!
* \internal
* \brief Replace any trailing newlines in a string with \0's
*
* \param[in,out] str String to trim
*
* \return \p str
*/
char *
pcmk__trim(char *str)
{
int len;
if (str == NULL) {
return str;
}
for (len = strlen(str) - 1; len >= 0 && str[len] == '\n'; len--) {
str[len] = '\0';
}
return str;
}
/*!
* \brief Check whether a string starts with a certain sequence
*
* \param[in] str String to check
* \param[in] prefix Sequence to match against beginning of \p str
*
* \return \c true if \p str begins with match, \c false otherwise
* \note This is equivalent to !strncmp(s, prefix, strlen(prefix))
* but is likely less efficient when prefix is a string literal
* if the compiler optimizes away the strlen() at compile time,
* and more efficient otherwise.
*/
bool
pcmk__starts_with(const char *str, const char *prefix)
{
const char *s = str;
const char *p = prefix;
if (!s || !p) {
return false;
}
while (*s && *p) {
if (*s++ != *p++) {
return false;
}
}
return (*p == 0);
}
static inline bool
ends_with(const char *s, const char *match, bool as_extension)
{
if (pcmk__str_empty(match)) {
return true;
} else if (s == NULL) {
return false;
} else {
size_t slen, mlen;
/* Besides as_extension, we could also check
!strchr(&match[1], match[0]) but that would be inefficient.
*/
if (as_extension) {
s = strrchr(s, match[0]);
return (s == NULL)? false : !strcmp(s, match);
}
mlen = strlen(match);
slen = strlen(s);
return ((slen >= mlen) && !strcmp(s + slen - mlen, match));
}
}
/*!
* \internal
* \brief Check whether a string ends with a certain sequence
*
* \param[in] s String to check
* \param[in] match Sequence to match against end of \p s
*
* \return \c true if \p s ends case-sensitively with match, \c false otherwise
* \note pcmk__ends_with_ext() can be used if the first character of match
* does not recur in match.
*/
bool
pcmk__ends_with(const char *s, const char *match)
{
return ends_with(s, match, false);
}
/*!
* \internal
* \brief Check whether a string ends with a certain "extension"
*
* \param[in] s String to check
* \param[in] match Extension to match against end of \p s, that is,
* its first character must not occur anywhere
* in the rest of that very sequence (example: file
* extension where the last dot is its delimiter,
* e.g., ".html"); incorrect results may be
* returned otherwise.
*
* \return \c true if \p s ends (verbatim, i.e., case sensitively)
* with "extension" designated as \p match (including empty
* string), \c false otherwise
*
* \note Main incentive to prefer this function over \c pcmk__ends_with()
* where possible is the efficiency (at the cost of added
* restriction on \p match as stated; the complexity class
* remains the same, though: BigO(M+N) vs. BigO(M+2N)).
*/
bool
pcmk__ends_with_ext(const char *s, const char *match)
{
return ends_with(s, match, true);
}
/*!
* \internal
* \brief Create a hash of a string suitable for use with GHashTable
*
* \param[in] v String to hash
*
* \return A hash of \p v compatible with g_str_hash() before glib 2.28
* \note glib changed their hash implementation:
*
* https://gitlab.gnome.org/GNOME/glib/commit/354d655ba8a54b754cb5a3efb42767327775696c
*
* Note that the new g_str_hash is presumably a *better* hash (it's actually
* a correct implementation of DJB's hash), but we need to preserve existing
* behaviour, because the hash key ultimately determines the "sort" order
* when iterating through GHashTables, which affects allocation of scores to
* clone instances when iterating through rsc->allowed_nodes. It (somehow)
* also appears to have some minor impact on the ordering of a few
* pseudo_event IDs in the transition graph.
*/
static guint
pcmk__str_hash(gconstpointer v)
{
const signed char *p;
guint32 h = 0;
for (p = v; *p != '\0'; p++)
h = (h << 5) - h + *p;
return h;
}
/*!
* \internal
* \brief Create a hash table with case-sensitive strings as keys
*
* \param[in] key_destroy_func Function to free a key
* \param[in] value_destroy_func Function to free a value
*
* \return Newly allocated hash table
* \note It is the caller's responsibility to free the result, using
* g_hash_table_destroy().
*/
GHashTable *
pcmk__strkey_table(GDestroyNotify key_destroy_func,
GDestroyNotify value_destroy_func)
{
return g_hash_table_new_full(pcmk__str_hash, g_str_equal,
key_destroy_func, value_destroy_func);
}
/* used with hash tables where case does not matter */
static gboolean
pcmk__strcase_equal(gconstpointer a, gconstpointer b)
{
return pcmk__str_eq((const char *)a, (const char *)b, pcmk__str_casei);
}
static guint
pcmk__strcase_hash(gconstpointer v)
{
const signed char *p;
guint32 h = 0;
for (p = v; *p != '\0'; p++)
h = (h << 5) - h + g_ascii_tolower(*p);
return h;
}
/*!
* \internal
* \brief Create a hash table with case-insensitive strings as keys
*
* \param[in] key_destroy_func Function to free a key
* \param[in] value_destroy_func Function to free a value
*
* \return Newly allocated hash table
* \note It is the caller's responsibility to free the result, using
* g_hash_table_destroy().
*/
GHashTable *
pcmk__strikey_table(GDestroyNotify key_destroy_func,
GDestroyNotify value_destroy_func)
{
return g_hash_table_new_full(pcmk__strcase_hash, pcmk__strcase_equal,
key_destroy_func, value_destroy_func);
}
static void
copy_str_table_entry(gpointer key, gpointer value, gpointer user_data)
{
if (key && value && user_data) {
g_hash_table_insert((GHashTable*)user_data, strdup(key), strdup(value));
}
}
/*!
* \internal
* \brief Copy a hash table that uses dynamically allocated strings
*
* \param[in,out] old_table Hash table to duplicate
*
* \return New hash table with copies of everything in \p old_table
* \note This assumes the hash table uses dynamically allocated strings -- that
* is, both the key and value free functions are free().
*/
GHashTable *
pcmk__str_table_dup(GHashTable *old_table)
{
GHashTable *new_table = NULL;
if (old_table) {
new_table = pcmk__strkey_table(free, free);
g_hash_table_foreach(old_table, copy_str_table_entry, new_table);
}
return new_table;
}
/*!
* \internal
* \brief Add a word to a string list of words
*
* \param[in,out] list Pointer to current string list (may not be \p NULL)
* \param[in] init_size \p list will be initialized to at least this size,
* if it needs initialization (if 0, use GLib's default
* initial string size)
* \param[in] word String to add to \p list (\p list will be
* unchanged if this is \p NULL or the empty string)
* \param[in] separator String to separate words in \p list
* (a space will be used if this is NULL)
*
* \note \p word may contain \p separator, though that would be a bad idea if
* the string needs to be parsed later.
*/
void
pcmk__add_separated_word(GString **list, size_t init_size, const char *word,
const char *separator)
{
CRM_ASSERT(list != NULL);
if (pcmk__str_empty(word)) {
return;
}
if (*list == NULL) {
if (init_size > 0) {
*list = g_string_sized_new(init_size);
} else {
*list = g_string_new(NULL);
}
}
if ((*list)->len == 0) {
// Don't add a separator before the first word in the list
separator = "";
} else if (separator == NULL) {
// Default to space-separated
separator = " ";
}
g_string_append(*list, separator);
g_string_append(*list, word);
}
/*!
* \internal
* \brief Compress data
*
* \param[in] data Data to compress
* \param[in] length Number of characters of data to compress
* \param[in] max Maximum size of compressed data (or 0 to estimate)
* \param[out] result Where to store newly allocated compressed result
* \param[out] result_len Where to store actual compressed length of result
*
* \return Standard Pacemaker return code
*/
int
pcmk__compress(const char *data, unsigned int length, unsigned int max,
char **result, unsigned int *result_len)
{
int rc;
char *compressed = NULL;
char *uncompressed = strdup(data);
#ifdef CLOCK_MONOTONIC
struct timespec after_t;
struct timespec before_t;
#endif
if (max == 0) {
max = (length * 1.01) + 601; // Size guaranteed to hold result
}
#ifdef CLOCK_MONOTONIC
clock_gettime(CLOCK_MONOTONIC, &before_t);
#endif
compressed = calloc((size_t) max, sizeof(char));
CRM_ASSERT(compressed);
*result_len = max;
rc = BZ2_bzBuffToBuffCompress(compressed, result_len, uncompressed, length,
CRM_BZ2_BLOCKS, 0, CRM_BZ2_WORK);
rc = pcmk__bzlib2rc(rc);
free(uncompressed);
if (rc != pcmk_rc_ok) {
crm_err("Compression of %d bytes failed: %s " CRM_XS " rc=%d",
length, pcmk_rc_str(rc), rc);
free(compressed);
return rc;
}
#ifdef CLOCK_MONOTONIC
clock_gettime(CLOCK_MONOTONIC, &after_t);
crm_trace("Compressed %d bytes into %d (ratio %d:1) in %.0fms",
length, *result_len, length / (*result_len),
(after_t.tv_sec - before_t.tv_sec) * 1000 +
(after_t.tv_nsec - before_t.tv_nsec) / 1e6);
#else
crm_trace("Compressed %d bytes into %d (ratio %d:1)",
length, *result_len, length / (*result_len));
#endif
*result = compressed;
return pcmk_rc_ok;
}
char *
crm_strdup_printf(char const *format, ...)
{
va_list ap;
int len = 0;
char *string = NULL;
va_start(ap, format);
len = vasprintf (&string, format, ap);
CRM_ASSERT(len > 0);
va_end(ap);
return string;
}
int
pcmk__parse_ll_range(const char *srcstring, long long *start, long long *end)
{
char *remainder = NULL;
int rc = pcmk_rc_ok;
CRM_ASSERT(start != NULL && end != NULL);
*start = PCMK__PARSE_INT_DEFAULT;
*end = PCMK__PARSE_INT_DEFAULT;
crm_trace("Attempting to decode: [%s]", srcstring);
if (pcmk__str_eq(srcstring, "", pcmk__str_null_matches)) {
return ENODATA;
} else if (pcmk__str_eq(srcstring, "-", pcmk__str_none)) {
return pcmk_rc_bad_input;
}
/* String starts with a dash, so this is either a range with
* no beginning or garbage.
* */
if (*srcstring == '-') {
int rc = scan_ll(srcstring+1, end, PCMK__PARSE_INT_DEFAULT, &remainder);
if (rc != pcmk_rc_ok || *remainder != '\0') {
return pcmk_rc_bad_input;
} else {
return pcmk_rc_ok;
}
}
rc = scan_ll(srcstring, start, PCMK__PARSE_INT_DEFAULT, &remainder);
if (rc != pcmk_rc_ok) {
return rc;
}
if (*remainder && *remainder == '-') {
if (*(remainder+1)) {
char *more_remainder = NULL;
int rc = scan_ll(remainder+1, end, PCMK__PARSE_INT_DEFAULT,
&more_remainder);
if (rc != pcmk_rc_ok) {
return rc;
} else if (*more_remainder != '\0') {
return pcmk_rc_bad_input;
}
}
} else if (*remainder && *remainder != '-') {
*start = PCMK__PARSE_INT_DEFAULT;
return pcmk_rc_bad_input;
} else {
/* The input string contained only one number. Set start and end
* to the same value and return pcmk_rc_ok. This gives the caller
* a way to tell this condition apart from a range with no end.
*/
*end = *start;
}
return pcmk_rc_ok;
}
/*!
* \internal
* \brief Find a string in a list of strings
*
* \note This function takes the same flags and has the same behavior as
* pcmk__str_eq().
*
* \note No matter what input string or flags are provided, an empty
* list will always return FALSE.
*
* \param[in] s String to search for
* \param[in] lst List to search
* \param[in] flags A bitfield of pcmk__str_flags to modify operation
*
* \return \c TRUE if \p s is in \p lst, or \c FALSE otherwise
*/
gboolean
pcmk__str_in_list(const gchar *s, const GList *lst, uint32_t flags)
{
for (const GList *ele = lst; ele != NULL; ele = ele->next) {
if (pcmk__str_eq(s, ele->data, flags)) {
return TRUE;
}
}
return FALSE;
}
static bool
str_any_of(const char *s, va_list args, uint32_t flags)
{
if (s == NULL) {
return pcmk_is_set(flags, pcmk__str_null_matches);
}
while (1) {
const char *ele = va_arg(args, const char *);
if (ele == NULL) {
break;
} else if (pcmk__str_eq(s, ele, flags)) {
return true;
}
}
return false;
}
/*!
* \internal
* \brief Is a string a member of a list of strings?
*
* \param[in] s String to search for in \p ...
* \param[in] ... Strings to compare \p s against. The final string
* must be NULL.
*
* \note The comparison is done case-insensitively. The function name is
* meant to be reminiscent of strcasecmp.
*
* \return \c true if \p s is in \p ..., or \c false otherwise
*/
bool
pcmk__strcase_any_of(const char *s, ...)
{
va_list ap;
bool rc;
va_start(ap, s);
rc = str_any_of(s, ap, pcmk__str_casei);
va_end(ap);
return rc;
}
/*!
* \internal
* \brief Is a string a member of a list of strings?
*
* \param[in] s String to search for in \p ...
* \param[in] ... Strings to compare \p s against. The final string
* must be NULL.
*
* \note The comparison is done taking case into account.
*
* \return \c true if \p s is in \p ..., or \c false otherwise
*/
bool
pcmk__str_any_of(const char *s, ...)
{
va_list ap;
bool rc;
va_start(ap, s);
rc = str_any_of(s, ap, pcmk__str_none);
va_end(ap);
return rc;
}
/*!
* \internal
* \brief Check whether a character is in any of a list of strings
*
* \param[in] ch Character (ASCII) to search for
* \param[in] ... Strings to search. Final argument must be
* \c NULL.
*
* \return \c true if any of \p ... contain \p ch, \c false otherwise
* \note \p ... must contain at least one argument (\c NULL).
*/
bool
pcmk__char_in_any_str(int ch, ...)
{
bool rc = false;
va_list ap;
/*
* Passing a char to va_start() can generate compiler warnings,
* so ch is declared as an int.
*/
va_start(ap, ch);
while (1) {
const char *ele = va_arg(ap, const char *);
if (ele == NULL) {
break;
} else if (strchr(ele, ch) != NULL) {
rc = true;
break;
}
}
va_end(ap);
return rc;
}
/*!
* \internal
* \brief Sort strings, with numeric portions sorted numerically
*
* Sort two strings case-insensitively like strcasecmp(), but with any numeric
* portions of the string sorted numerically. This is particularly useful for
* node names (for example, "node10" will sort higher than "node9" but lower
* than "remotenode9").
*
* \param[in] s1 First string to compare (must not be NULL)
* \param[in] s2 Second string to compare (must not be NULL)
*
* \retval -1 \p s1 comes before \p s2
* \retval 0 \p s1 and \p s2 are equal
* \retval 1 \p s1 comes after \p s2
*/
int
pcmk__numeric_strcasecmp(const char *s1, const char *s2)
{
CRM_ASSERT((s1 != NULL) && (s2 != NULL));
while (*s1 && *s2) {
if (isdigit(*s1) && isdigit(*s2)) {
// If node names contain a number, sort numerically
char *end1 = NULL;
char *end2 = NULL;
long num1 = strtol(s1, &end1, 10);
long num2 = strtol(s2, &end2, 10);
// allow ordering e.g. 007 > 7
size_t len1 = end1 - s1;
size_t len2 = end2 - s2;
if (num1 < num2) {
return -1;
} else if (num1 > num2) {
return 1;
} else if (len1 < len2) {
return -1;
} else if (len1 > len2) {
return 1;
}
s1 = end1;
s2 = end2;
} else {
// Compare non-digits case-insensitively
int lower1 = tolower(*s1);
int lower2 = tolower(*s2);
if (lower1 < lower2) {
return -1;
} else if (lower1 > lower2) {
return 1;
}
++s1;
++s2;
}
}
if (!*s1 && *s2) {
return -1;
} else if (*s1 && !*s2) {
return 1;
}
return 0;
}
/*!
* \internal
* \brief Sort strings.
*
* This is your one-stop function for string comparison. By default, this
* function works like \p g_strcmp0. That is, like \p strcmp but a \p NULL
* string sorts before a non-<tt>NULL</tt> string.
*
* The \p pcmk__str_none flag produces the default behavior. Behavior can be
* changed with various flags:
*
* - \p pcmk__str_regex - The second string is a regular expression that the
* first string will be matched against.
* - \p pcmk__str_casei - By default, comparisons are done taking case into
* account. This flag makes comparisons case-
* insensitive. This can be combined with
* \p pcmk__str_regex.
* - \p pcmk__str_null_matches - If one string is \p NULL and the other is not,
* still return \p 0.
* - \p pcmk__str_star_matches - If one string is \p "*" and the other is not,
* still return \p 0.
*
* \param[in] s1 First string to compare
* \param[in] s2 Second string to compare, or a regular expression to
* match if \p pcmk__str_regex is set
* \param[in] flags A bitfield of \p pcmk__str_flags to modify operation
*
* \retval negative \p s1 is \p NULL or comes before \p s2
* \retval 0 \p s1 and \p s2 are equal, or \p s1 is found in \p s2 if
* \c pcmk__str_regex is set
* \retval positive \p s2 is \p NULL or \p s1 comes after \p s2, or \p s2
* is an invalid regular expression, or \p s1 was not found
* in \p s2 if \p pcmk__str_regex is set.
*/
int
pcmk__strcmp(const char *s1, const char *s2, uint32_t flags)
{
/* If this flag is set, the second string is a regex. */
if (pcmk_is_set(flags, pcmk__str_regex)) {
regex_t r_patt;
int reg_flags = REG_EXTENDED | REG_NOSUB;
int regcomp_rc = 0;
int rc = 0;
if (s1 == NULL || s2 == NULL) {
return 1;
}
if (pcmk_is_set(flags, pcmk__str_casei)) {
reg_flags |= REG_ICASE;
}
regcomp_rc = regcomp(&r_patt, s2, reg_flags);
if (regcomp_rc != 0) {
rc = 1;
crm_err("Bad regex '%s' for update: %s", s2, strerror(regcomp_rc));
} else {
rc = regexec(&r_patt, s1, 0, NULL, 0);
regfree(&r_patt);
if (rc != 0) {
rc = 1;
}
}
return rc;
}
/* If the strings are the same pointer, return 0 immediately. */
if (s1 == s2) {
return 0;
}
/* If this flag is set, return 0 if either (or both) of the input strings
* are NULL. If neither one is NULL, we need to continue and compare
* them normally.
*/
if (pcmk_is_set(flags, pcmk__str_null_matches)) {
if (s1 == NULL || s2 == NULL) {
return 0;
}
}
/* Handle the cases where one is NULL and the str_null_matches flag is not set.
* A NULL string always sorts to the beginning.
*/
if (s1 == NULL) {
return -1;
} else if (s2 == NULL) {
return 1;
}
/* If this flag is set, return 0 if either (or both) of the input strings
* are "*". If neither one is, we need to continue and compare them
* normally.
*/
if (pcmk_is_set(flags, pcmk__str_star_matches)) {
if (strcmp(s1, "*") == 0 || strcmp(s2, "*") == 0) {
return 0;
}
}
if (pcmk_is_set(flags, pcmk__str_casei)) {
return strcasecmp(s1, s2);
} else {
return strcmp(s1, s2);
}
}
/*!
* \internal
* \brief Update a dynamically allocated string with a new value
*
* Given a dynamically allocated string and a new value for it, if the string
* is different from the new value, free the string and replace it with either a
* newly allocated duplicate of the value or NULL as appropriate.
*
* \param[in,out] str Pointer to dynamically allocated string
* \param[in] value New value to duplicate (or NULL)
*
* \note The caller remains responsibile for freeing \p *str.
*/
void
pcmk__str_update(char **str, const char *value)
{
if ((str != NULL) && !pcmk__str_eq(*str, value, pcmk__str_none)) {
free(*str);
if (value == NULL) {
*str = NULL;
} else {
*str = strdup(value);
CRM_ASSERT(*str != NULL);
}
}
}
/*!
* \internal
* \brief Append a list of strings to a destination \p GString
*
* \param[in,out] buffer Where to append the strings (must not be \p NULL)
* \param[in] ... A <tt>NULL</tt>-terminated list of strings
*
* \note This tends to be more efficient than a single call to
* \p g_string_append_printf().
*/
void
pcmk__g_strcat(GString *buffer, ...)
{
va_list ap;
CRM_ASSERT(buffer != NULL);
va_start(ap, buffer);
while (true) {
const char *ele = va_arg(ap, const char *);
if (ele == NULL) {
break;
}
g_string_append(buffer, ele);
}
va_end(ap);
}
// Deprecated functions kept only for backward API compatibility
// LCOV_EXCL_START
#include <crm/common/util_compat.h>
gboolean
safe_str_neq(const char *a, const char *b)
{
if (a == b) {
return FALSE;
} else if (a == NULL || b == NULL) {
return TRUE;
} else if (strcasecmp(a, b) == 0) {
return FALSE;
}
return TRUE;
}
gboolean
crm_str_eq(const char *a, const char *b, gboolean use_case)
{
if (use_case) {
return g_strcmp0(a, b) == 0;
/* TODO - Figure out which calls, if any, really need to be case independent */
} else if (a == b) {
return TRUE;
} else if (a == NULL || b == NULL) {
/* shouldn't be comparing NULLs */
return FALSE;
} else if (strcasecmp(a, b) == 0) {
return TRUE;
}
return FALSE;
}
char *
crm_itoa_stack(int an_int, char *buffer, size_t len)
{
if (buffer != NULL) {
snprintf(buffer, len, "%d", an_int);
}
return buffer;
}
guint
g_str_hash_traditional(gconstpointer v)
{
return pcmk__str_hash(v);
}
gboolean
crm_strcase_equal(gconstpointer a, gconstpointer b)
{
return pcmk__strcase_equal(a, b);
}
guint
crm_strcase_hash(gconstpointer v)
{
return pcmk__strcase_hash(v);
}
GHashTable *
crm_str_table_dup(GHashTable *old_table)
{
return pcmk__str_table_dup(old_table);
}
long long
crm_parse_ll(const char *text, const char *default_text)
{
long long result;
if (text == NULL) {
text = default_text;
if (text == NULL) {
crm_err("No default conversion value supplied");
errno = EINVAL;
return PCMK__PARSE_INT_DEFAULT;
}
}
scan_ll(text, &result, PCMK__PARSE_INT_DEFAULT, NULL);
return result;
}
int
crm_parse_int(const char *text, const char *default_text)
{
long long result = crm_parse_ll(text, default_text);
if (result < INT_MIN) {
// If errno is ERANGE, crm_parse_ll() has already logged a message
if (errno != ERANGE) {
crm_err("Conversion of %s was clipped: %lld", text, result);
errno = ERANGE;
}
return INT_MIN;
} else if (result > INT_MAX) {
// If errno is ERANGE, crm_parse_ll() has already logged a message
if (errno != ERANGE) {
crm_err("Conversion of %s was clipped: %lld", text, result);
errno = ERANGE;
}
return INT_MAX;
}
return (int) result;
}
char *
crm_strip_trailing_newline(char *str)
{
return pcmk__trim(str);
}
int
pcmk_numeric_strcasecmp(const char *s1, const char *s2)
{
return pcmk__numeric_strcasecmp(s1, s2);
}
// LCOV_EXCL_STOP
// End deprecated API
diff --git a/lib/pacemaker/pcmk_graph_consumer.c b/lib/pacemaker/pcmk_graph_consumer.c
index 9a266cc4a8..a009edfeff 100644
--- a/lib/pacemaker/pcmk_graph_consumer.c
+++ b/lib/pacemaker/pcmk_graph_consumer.c
@@ -1,882 +1,882 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <sys/stat.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <crm/common/xml_internal.h>
#include <crm/lrmd_internal.h>
#include <pacemaker-internal.h>
/*
* Functions for updating graph
*/
/*!
* \internal
* \brief Update synapse after completed prerequisite
*
* A synapse is ready to be executed once all its prerequisite actions (inputs)
* complete. Given a completed action, check whether it is an input for a given
* synapse, and if so, mark the input as confirmed, and mark the synapse as
* ready if appropriate.
*
* \param[in,out] synapse Transition graph synapse to update
* \param[in] action_id ID of an action that completed
*
* \note The only substantial effect here is confirming synapse inputs.
* should_fire_synapse() will recalculate pcmk__synapse_ready, so the only
* thing that uses the pcmk__synapse_ready from here is
* synapse_state_str().
*/
static void
update_synapse_ready(pcmk__graph_synapse_t *synapse, int action_id)
{
if (pcmk_is_set(synapse->flags, pcmk__synapse_ready)) {
return; // All inputs have already been confirmed
}
// Presume ready until proven otherwise
pcmk__set_synapse_flags(synapse, pcmk__synapse_ready);
for (GList *lpc = synapse->inputs; lpc != NULL; lpc = lpc->next) {
pcmk__graph_action_t *prereq = (pcmk__graph_action_t *) lpc->data;
if (prereq->id == action_id) {
crm_trace("Confirming input %d of synapse %d",
action_id, synapse->id);
pcmk__set_graph_action_flags(prereq, pcmk__graph_action_confirmed);
} else if (!pcmk_is_set(prereq->flags, pcmk__graph_action_confirmed)) {
pcmk__clear_synapse_flags(synapse, pcmk__synapse_ready);
crm_trace("Synapse %d still not ready after action %d",
synapse->id, action_id);
}
}
if (pcmk_is_set(synapse->flags, pcmk__synapse_ready)) {
crm_trace("Synapse %d is now ready to execute", synapse->id);
}
}
/*!
* \internal
* \brief Update action and synapse confirmation after action completion
*
* \param[in,out] synapse Transition graph synapse that action belongs to
* \param[in] action_id ID of action that completed
*/
static void
update_synapse_confirmed(pcmk__graph_synapse_t *synapse, int action_id)
{
bool all_confirmed = true;
for (GList *lpc = synapse->actions; lpc != NULL; lpc = lpc->next) {
pcmk__graph_action_t *action = (pcmk__graph_action_t *) lpc->data;
if (action->id == action_id) {
crm_trace("Confirmed action %d of synapse %d",
action_id, synapse->id);
pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed);
} else if (all_confirmed &&
!pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
all_confirmed = false;
crm_trace("Synapse %d still not confirmed after action %d",
synapse->id, action_id);
}
}
if (all_confirmed
&& !pcmk_is_set(synapse->flags, pcmk__synapse_confirmed)) {
crm_trace("Confirmed synapse %d", synapse->id);
pcmk__set_synapse_flags(synapse, pcmk__synapse_confirmed);
}
}
/*!
* \internal
* \brief Update the transition graph with a completed action result
*
* \param[in,out] graph Transition graph to update
* \param[in] action Action that completed
*/
void
pcmk__update_graph(pcmk__graph_t *graph, const pcmk__graph_action_t *action)
{
for (GList *lpc = graph->synapses; lpc != NULL; lpc = lpc->next) {
pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) lpc->data;
if (pcmk_any_flags_set(synapse->flags,
pcmk__synapse_confirmed|pcmk__synapse_failed)) {
continue; // This synapse already completed
} else if (pcmk_is_set(synapse->flags, pcmk__synapse_executed)) {
update_synapse_confirmed(synapse, action->id);
} else if (!pcmk_is_set(action->flags, pcmk__graph_action_failed)
|| (synapse->priority == INFINITY)) {
update_synapse_ready(synapse, action->id);
}
}
}
/*
* Functions for executing graph
*/
/* A transition graph consists of various types of actions. The library caller
* registers execution functions for each action type, which will be stored
* here.
*/
static pcmk__graph_functions_t *graph_fns = NULL;
/*!
* \internal
* \brief Set transition graph execution functions
*
* \param[in] Execution functions to use
*/
void
pcmk__set_graph_functions(pcmk__graph_functions_t *fns)
{
crm_debug("Setting custom functions for executing transition graphs");
graph_fns = fns;
CRM_ASSERT(graph_fns != NULL);
CRM_ASSERT(graph_fns->rsc != NULL);
CRM_ASSERT(graph_fns->cluster != NULL);
CRM_ASSERT(graph_fns->pseudo != NULL);
CRM_ASSERT(graph_fns->fence != NULL);
}
/*!
* \internal
* \brief Check whether a graph synapse is ready to be executed
*
* \param[in,out] graph Transition graph that synapse is part of
* \param[in,out] synapse Synapse to check
*
* \return true if synapse is ready, false otherwise
*/
static bool
should_fire_synapse(pcmk__graph_t *graph, pcmk__graph_synapse_t *synapse)
{
GList *lpc = NULL;
pcmk__set_synapse_flags(synapse, pcmk__synapse_ready);
for (lpc = synapse->inputs; lpc != NULL; lpc = lpc->next) {
pcmk__graph_action_t *prereq = (pcmk__graph_action_t *) lpc->data;
if (!(pcmk_is_set(prereq->flags, pcmk__graph_action_confirmed))) {
crm_trace("Input %d for synapse %d not yet confirmed",
prereq->id, synapse->id);
pcmk__clear_synapse_flags(synapse, pcmk__synapse_ready);
break;
} else if (pcmk_is_set(prereq->flags, pcmk__graph_action_failed)
&& !pcmk_is_set(prereq->flags,
pcmk__graph_action_can_fail)) {
crm_trace("Input %d for synapse %d confirmed but failed",
prereq->id, synapse->id);
pcmk__clear_synapse_flags(synapse, pcmk__synapse_ready);
break;
}
}
if (pcmk_is_set(synapse->flags, pcmk__synapse_ready)) {
crm_trace("Synapse %d is ready to execute", synapse->id);
} else {
return false;
}
for (lpc = synapse->actions; lpc != NULL; lpc = lpc->next) {
pcmk__graph_action_t *a = (pcmk__graph_action_t *) lpc->data;
if (a->type == pcmk__pseudo_graph_action) {
/* None of the below applies to pseudo ops */
} else if (synapse->priority < graph->abort_priority) {
crm_trace("Skipping synapse %d: priority %d is less than "
"abort priority %d",
synapse->id, synapse->priority, graph->abort_priority);
graph->skipped++;
return false;
} else if (graph_fns->allowed && !(graph_fns->allowed(graph, a))) {
crm_trace("Deferring synapse %d: not allowed", synapse->id);
return false;
}
}
return true;
}
/*!
* \internal
* \brief Initiate an action from a transition graph
*
* \param[in,out] graph Transition graph containing action
* \param[in,out] action Action to execute
*
* \return Standard Pacemaker return code
*/
static int
initiate_action(pcmk__graph_t *graph, pcmk__graph_action_t *action)
{
const char *id = ID(action->xml);
CRM_CHECK(id != NULL, return EINVAL);
CRM_CHECK(!pcmk_is_set(action->flags, pcmk__graph_action_executed),
return pcmk_rc_already);
pcmk__set_graph_action_flags(action, pcmk__graph_action_executed);
switch (action->type) {
case pcmk__pseudo_graph_action:
crm_trace("Executing pseudo-action %d (%s)", action->id, id);
return graph_fns->pseudo(graph, action);
case pcmk__rsc_graph_action:
crm_trace("Executing resource action %d (%s)", action->id, id);
return graph_fns->rsc(graph, action);
case pcmk__cluster_graph_action:
if (pcmk__str_eq(crm_element_value(action->xml, XML_LRM_ATTR_TASK),
PCMK_ACTION_STONITH, pcmk__str_none)) {
crm_trace("Executing fencing action %d (%s)",
action->id, id);
return graph_fns->fence(graph, action);
}
crm_trace("Executing cluster action %d (%s)", action->id, id);
return graph_fns->cluster(graph, action);
default:
crm_err("Unsupported graph action type <%s " XML_ATTR_ID "='%s'> "
"(bug?)",
action->xml->name, id);
return EINVAL;
}
}
/*!
* \internal
* \brief Execute a graph synapse
*
* \param[in,out] graph Transition graph with synapse to execute
* \param[in,out] synapse Synapse to execute
*
* \return Standard Pacemaker return value
*/
static int
fire_synapse(pcmk__graph_t *graph, pcmk__graph_synapse_t *synapse)
{
pcmk__set_synapse_flags(synapse, pcmk__synapse_executed);
for (GList *lpc = synapse->actions; lpc != NULL; lpc = lpc->next) {
pcmk__graph_action_t *action = (pcmk__graph_action_t *) lpc->data;
int rc = initiate_action(graph, action);
if (rc != pcmk_rc_ok) {
crm_err("Failed initiating <%s " XML_ATTR_ID "=%d> in synapse %d: "
"%s",
action->xml->name, action->id, synapse->id,
pcmk_rc_str(rc));
pcmk__set_synapse_flags(synapse, pcmk__synapse_confirmed);
pcmk__set_graph_action_flags(action,
pcmk__graph_action_confirmed
|pcmk__graph_action_failed);
return pcmk_rc_error;
}
}
return pcmk_rc_ok;
}
/*!
* \internal
* \brief Dummy graph method that can be used with simulations
*
* \param[in,out] graph Transition graph containing action
* \param[in,out] action Graph action to be initiated
*
* \return Standard Pacemaker return code
* \note If the PE_fail environment variable is set to the action ID,
* then the graph action will be marked as failed.
*/
static int
pseudo_action_dummy(pcmk__graph_t *graph, pcmk__graph_action_t *action)
{
static int fail = -1;
if (fail < 0) {
long long fail_ll;
if ((pcmk__scan_ll(getenv("PE_fail"), &fail_ll, 0LL) == pcmk_rc_ok)
&& (fail_ll > 0LL) && (fail_ll <= INT_MAX)) {
fail = (int) fail_ll;
} else {
fail = 0;
}
}
if (action->id == fail) {
crm_err("Dummy event handler: pretending action %d failed", action->id);
pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);
graph->abort_priority = INFINITY;
} else {
crm_trace("Dummy event handler: action %d initiated", action->id);
}
pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed);
pcmk__update_graph(graph, action);
return pcmk_rc_ok;
}
static pcmk__graph_functions_t default_fns = {
pseudo_action_dummy,
pseudo_action_dummy,
pseudo_action_dummy,
pseudo_action_dummy
};
/*!
* \internal
* \brief Execute all actions in a transition graph
*
* \param[in,out] graph Transition graph to execute
*
* \return Status of transition after execution
*/
enum pcmk__graph_status
pcmk__execute_graph(pcmk__graph_t *graph)
{
GList *lpc = NULL;
int log_level = LOG_DEBUG;
enum pcmk__graph_status pass_result = pcmk__graph_active;
const char *status = "In progress";
if (graph_fns == NULL) {
graph_fns = &default_fns;
}
if (graph == NULL) {
return pcmk__graph_complete;
}
graph->fired = 0;
graph->pending = 0;
graph->skipped = 0;
graph->completed = 0;
graph->incomplete = 0;
// Count completed and in-flight synapses
for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) {
pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) lpc->data;
if (pcmk_is_set(synapse->flags, pcmk__synapse_confirmed)) {
graph->completed++;
} else if (!pcmk_is_set(synapse->flags, pcmk__synapse_failed)
&& pcmk_is_set(synapse->flags, pcmk__synapse_executed)) {
graph->pending++;
}
}
crm_trace("Executing graph %d (%d synapses already completed, %d pending)",
graph->id, graph->completed, graph->pending);
// Execute any synapses that are ready
for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) {
pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) lpc->data;
if ((graph->batch_limit > 0)
&& (graph->pending >= graph->batch_limit)) {
crm_debug("Throttling graph execution: batch limit (%d) reached",
graph->batch_limit);
break;
} else if (pcmk_is_set(synapse->flags, pcmk__synapse_failed)) {
graph->skipped++;
continue;
} else if (pcmk_any_flags_set(synapse->flags,
pcmk__synapse_confirmed
|pcmk__synapse_executed)) {
continue; // Already handled
} else if (should_fire_synapse(graph, synapse)) {
graph->fired++;
if (fire_synapse(graph, synapse) != pcmk_rc_ok) {
crm_err("Synapse %d failed to fire", synapse->id);
log_level = LOG_ERR;
graph->abort_priority = INFINITY;
graph->incomplete++;
graph->fired--;
}
if (!(pcmk_is_set(synapse->flags, pcmk__synapse_confirmed))) {
graph->pending++;
}
} else {
crm_trace("Synapse %d cannot fire", synapse->id);
graph->incomplete++;
}
}
if ((graph->pending == 0) && (graph->fired == 0)) {
graph->complete = true;
if ((graph->incomplete != 0) && (graph->abort_priority <= 0)) {
log_level = LOG_WARNING;
pass_result = pcmk__graph_terminated;
status = "Terminated";
} else if (graph->skipped != 0) {
log_level = LOG_NOTICE;
pass_result = pcmk__graph_complete;
status = "Stopped";
} else {
log_level = LOG_NOTICE;
pass_result = pcmk__graph_complete;
status = "Complete";
}
} else if (graph->fired == 0) {
pass_result = pcmk__graph_pending;
}
do_crm_log(log_level,
"Transition %d (Complete=%d, Pending=%d,"
" Fired=%d, Skipped=%d, Incomplete=%d, Source=%s): %s",
graph->id, graph->completed, graph->pending, graph->fired,
graph->skipped, graph->incomplete, graph->source, status);
return pass_result;
}
/*
* Functions for unpacking transition graph XML into structs
*/
/*!
* \internal
* \brief Unpack a transition graph action from XML
*
* \param[in] parent Synapse that action is part of
* \param[in] xml_action Action XML to unparse
*
* \return Newly allocated action on success, or NULL otherwise
*/
static pcmk__graph_action_t *
unpack_action(pcmk__graph_synapse_t *parent, xmlNode *xml_action)
{
enum pcmk__graph_action_type action_type;
pcmk__graph_action_t *action = NULL;
const char *value = ID(xml_action);
if (value == NULL) {
crm_err("Ignoring transition graph action without id (bug?)");
crm_log_xml_trace(xml_action, "invalid");
return NULL;
}
if (pcmk__xe_is(xml_action, XML_GRAPH_TAG_RSC_OP)) {
action_type = pcmk__rsc_graph_action;
} else if (pcmk__xe_is(xml_action, XML_GRAPH_TAG_PSEUDO_EVENT)) {
action_type = pcmk__pseudo_graph_action;
} else if (pcmk__xe_is(xml_action, XML_GRAPH_TAG_CRM_EVENT)) {
action_type = pcmk__cluster_graph_action;
} else {
crm_err("Ignoring transition graph action of unknown type '%s' (bug?)",
xml_action->name);
crm_log_xml_trace(xml_action, "invalid");
return NULL;
}
action = calloc(1, sizeof(pcmk__graph_action_t));
if (action == NULL) {
crm_perror(LOG_CRIT, "Cannot unpack transition graph action");
crm_log_xml_trace(xml_action, "lost");
return NULL;
}
pcmk__scan_min_int(value, &(action->id), -1);
action->type = pcmk__rsc_graph_action;
action->xml = copy_xml(xml_action);
action->synapse = parent;
action->type = action_type;
action->params = xml2list(action->xml);
value = g_hash_table_lookup(action->params, "CRM_meta_timeout");
pcmk__scan_min_int(value, &(action->timeout), 0);
/* Take start-delay into account for the timeout of the action timer */
value = g_hash_table_lookup(action->params, "CRM_meta_start_delay");
{
int start_delay;
pcmk__scan_min_int(value, &start_delay, 0);
action->timeout += start_delay;
}
if (pcmk__guint_from_hash(action->params,
CRM_META "_" XML_LRM_ATTR_INTERVAL, 0,
&(action->interval_ms)) != pcmk_rc_ok) {
action->interval_ms = 0;
}
value = g_hash_table_lookup(action->params, "CRM_meta_can_fail");
if (value != NULL) {
int can_fail = 0;
if ((crm_str_to_boolean(value, &can_fail) > 0) && (can_fail > 0)) {
pcmk__set_graph_action_flags(action, pcmk__graph_action_can_fail);
} else {
pcmk__clear_graph_action_flags(action, pcmk__graph_action_can_fail);
}
#ifndef PCMK__COMPAT_2_0
if (pcmk_is_set(action->flags, pcmk__graph_action_can_fail)) {
crm_warn("Support for the can_fail meta-attribute is deprecated"
" and will be removed in a future release");
}
#endif
}
crm_trace("Action %d has timer set to %dms", action->id, action->timeout);
return action;
}
/*!
* \internal
* \brief Unpack transition graph synapse from XML
*
* \param[in,out] new_graph Transition graph that synapse is part of
* \param[in] xml_synapse Synapse XML
*
* \return Newly allocated synapse on success, or NULL otherwise
*/
static pcmk__graph_synapse_t *
unpack_synapse(pcmk__graph_t *new_graph, const xmlNode *xml_synapse)
{
const char *value = NULL;
xmlNode *action_set = NULL;
pcmk__graph_synapse_t *new_synapse = NULL;
crm_trace("Unpacking synapse %s", ID(xml_synapse));
new_synapse = calloc(1, sizeof(pcmk__graph_synapse_t));
if (new_synapse == NULL) {
return NULL;
}
pcmk__scan_min_int(ID(xml_synapse), &(new_synapse->id), 0);
value = crm_element_value(xml_synapse, XML_CIB_ATTR_PRIORITY);
pcmk__scan_min_int(value, &(new_synapse->priority), 0);
CRM_CHECK(new_synapse->id >= 0, free(new_synapse);
return NULL);
new_graph->num_synapses++;
crm_trace("Unpacking synapse %s action sets",
crm_element_value(xml_synapse, XML_ATTR_ID));
for (action_set = first_named_child(xml_synapse, "action_set");
action_set != NULL; action_set = crm_next_same_xml(action_set)) {
for (xmlNode *action = pcmk__xml_first_child(action_set);
action != NULL; action = pcmk__xml_next(action)) {
pcmk__graph_action_t *new_action = unpack_action(new_synapse,
action);
if (new_action == NULL) {
continue;
}
crm_trace("Adding action %d to synapse %d",
new_action->id, new_synapse->id);
new_graph->num_actions++;
new_synapse->actions = g_list_append(new_synapse->actions,
new_action);
}
}
crm_trace("Unpacking synapse %s inputs", ID(xml_synapse));
for (xmlNode *inputs = first_named_child(xml_synapse, "inputs");
inputs != NULL; inputs = crm_next_same_xml(inputs)) {
for (xmlNode *trigger = first_named_child(inputs, "trigger");
trigger != NULL; trigger = crm_next_same_xml(trigger)) {
for (xmlNode *input = pcmk__xml_first_child(trigger);
input != NULL; input = pcmk__xml_next(input)) {
pcmk__graph_action_t *new_input = unpack_action(new_synapse,
input);
if (new_input == NULL) {
continue;
}
crm_trace("Adding input %d to synapse %d",
new_input->id, new_synapse->id);
new_synapse->inputs = g_list_append(new_synapse->inputs,
new_input);
}
}
}
return new_synapse;
}
/*!
* \internal
* \brief Unpack transition graph XML
*
* \param[in] xml_graph Transition graph XML to unpack
* \param[in] reference Where the XML came from (for logging)
*
* \return Newly allocated transition graph on success, NULL otherwise
* \note The caller is responsible for freeing the return value using
* pcmk__free_graph().
* \note The XML is expected to be structured like:
<transition_graph ...>
<synapse id="0">
<action_set>
<rsc_op id="2" ...>
...
</action_set>
<inputs>
<rsc_op id="1" ...
...
</inputs>
</synapse>
...
</transition_graph>
*/
pcmk__graph_t *
pcmk__unpack_graph(const xmlNode *xml_graph, const char *reference)
{
pcmk__graph_t *new_graph = NULL;
new_graph = calloc(1, sizeof(pcmk__graph_t));
if (new_graph == NULL) {
return NULL;
}
new_graph->source = strdup((reference == NULL)? "unknown" : reference);
if (new_graph->source == NULL) {
free(new_graph);
return NULL;
}
new_graph->id = -1;
new_graph->abort_priority = 0;
new_graph->network_delay = 0;
new_graph->stonith_timeout = 0;
new_graph->completion_action = pcmk__graph_done;
// Parse top-level attributes from <transition_graph>
if (xml_graph != NULL) {
const char *buf = crm_element_value(xml_graph, "transition_id");
CRM_CHECK(buf != NULL, free(new_graph);
return NULL);
pcmk__scan_min_int(buf, &(new_graph->id), -1);
buf = crm_element_value(xml_graph, PCMK_OPT_CLUSTER_DELAY);
CRM_CHECK(buf != NULL, free(new_graph);
return NULL);
- pcmk__parse_interval_spec(buf, &(new_graph->network_delay));
+ pcmk_parse_interval_spec(buf, &(new_graph->network_delay));
buf = crm_element_value(xml_graph, PCMK_OPT_STONITH_TIMEOUT);
if (buf == NULL) {
new_graph->stonith_timeout = new_graph->network_delay;
} else {
- pcmk__parse_interval_spec(buf, &(new_graph->stonith_timeout));
+ pcmk_parse_interval_spec(buf, &(new_graph->stonith_timeout));
}
// Use 0 (dynamic limit) as default/invalid, -1 (no limit) as minimum
buf = crm_element_value(xml_graph, PCMK_OPT_BATCH_LIMIT);
if ((buf == NULL)
|| (pcmk__scan_min_int(buf, &(new_graph->batch_limit),
-1) != pcmk_rc_ok)) {
new_graph->batch_limit = 0;
}
buf = crm_element_value(xml_graph, PCMK_OPT_MIGRATION_LIMIT);
pcmk__scan_min_int(buf, &(new_graph->migration_limit), -1);
pcmk__str_update(&(new_graph->failed_stop_offset),
crm_element_value(xml_graph, "failed-stop-offset"));
pcmk__str_update(&(new_graph->failed_start_offset),
crm_element_value(xml_graph, "failed-start-offset"));
if (crm_element_value_epoch(xml_graph, "recheck-by",
&(new_graph->recheck_by)) != pcmk_ok) {
new_graph->recheck_by = 0;
}
}
// Unpack each child <synapse> element
for (const xmlNode *synapse_xml = first_named_child(xml_graph, "synapse");
synapse_xml != NULL; synapse_xml = crm_next_same_xml(synapse_xml)) {
pcmk__graph_synapse_t *new_synapse = unpack_synapse(new_graph,
synapse_xml);
if (new_synapse != NULL) {
new_graph->synapses = g_list_append(new_graph->synapses,
new_synapse);
}
}
crm_debug("Unpacked transition %d from %s: %d actions in %d synapses",
new_graph->id, new_graph->source, new_graph->num_actions,
new_graph->num_synapses);
return new_graph;
}
/*
* Functions for freeing transition graph objects
*/
/*!
* \internal
* \brief Free a transition graph action object
*
* \param[in,out] user_data Action to free
*/
static void
free_graph_action(gpointer user_data)
{
pcmk__graph_action_t *action = user_data;
if (action->timer != 0) {
crm_warn("Cancelling timer for graph action %d", action->id);
g_source_remove(action->timer);
}
if (action->params != NULL) {
g_hash_table_destroy(action->params);
}
free_xml(action->xml);
free(action);
}
/*!
* \internal
* \brief Free a transition graph synapse object
*
* \param[in,out] user_data Synapse to free
*/
static void
free_graph_synapse(gpointer user_data)
{
pcmk__graph_synapse_t *synapse = user_data;
g_list_free_full(synapse->actions, free_graph_action);
g_list_free_full(synapse->inputs, free_graph_action);
free(synapse);
}
/*!
* \internal
* \brief Free a transition graph object
*
* \param[in,out] graph Transition graph to free
*/
void
pcmk__free_graph(pcmk__graph_t *graph)
{
if (graph != NULL) {
g_list_free_full(graph->synapses, free_graph_synapse);
free(graph->source);
free(graph->failed_stop_offset);
free(graph->failed_start_offset);
free(graph);
}
}
/*
* Other transition graph utilities
*/
/*!
* \internal
* \brief Synthesize an executor event from a graph action
*
* \param[in] resource If not NULL, use greater call ID than in this XML
* \param[in] action Graph action
* \param[in] status What to use as event execution status
* \param[in] rc What to use as event exit status
* \param[in] exit_reason What to use as event exit reason
*
* \return Newly allocated executor event on success, or NULL otherwise
*/
lrmd_event_data_t *
pcmk__event_from_graph_action(const xmlNode *resource,
const pcmk__graph_action_t *action,
int status, int rc, const char *exit_reason)
{
lrmd_event_data_t *op = NULL;
GHashTableIter iter;
const char *name = NULL;
const char *value = NULL;
xmlNode *action_resource = NULL;
CRM_CHECK(action != NULL, return NULL);
CRM_CHECK(action->type == pcmk__rsc_graph_action, return NULL);
action_resource = first_named_child(action->xml, XML_CIB_TAG_RESOURCE);
CRM_CHECK(action_resource != NULL, crm_log_xml_warn(action->xml, "invalid");
return NULL);
op = lrmd_new_event(ID(action_resource),
crm_element_value(action->xml, XML_LRM_ATTR_TASK),
action->interval_ms);
lrmd__set_result(op, rc, status, exit_reason);
op->t_run = time(NULL);
op->t_rcchange = op->t_run;
op->params = pcmk__strkey_table(free, free);
g_hash_table_iter_init(&iter, action->params);
while (g_hash_table_iter_next(&iter, (void **)&name, (void **)&value)) {
g_hash_table_insert(op->params, strdup(name), strdup(value));
}
for (xmlNode *xop = pcmk__xml_first_child(resource); xop != NULL;
xop = pcmk__xml_next(xop)) {
int tmp = 0;
crm_element_value_int(xop, XML_LRM_ATTR_CALLID, &tmp);
crm_debug("Got call_id=%d for %s", tmp, ID(resource));
if (tmp > op->call_id) {
op->call_id = tmp;
}
}
op->call_id++;
return op;
}
diff --git a/lib/pacemaker/pcmk_sched_recurring.c b/lib/pacemaker/pcmk_sched_recurring.c
index 9255f3bd1b..2beda213c7 100644
--- a/lib/pacemaker/pcmk_sched_recurring.c
+++ b/lib/pacemaker/pcmk_sched_recurring.c
@@ -1,737 +1,737 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <stdbool.h>
#include <crm/msg_xml.h>
#include <crm/common/scheduler_internal.h>
#include <pacemaker-internal.h>
#include "libpacemaker_private.h"
// Information parsed from an operation history entry in the CIB
struct op_history {
// XML attributes
const char *id; // ID of history entry
const char *name; // Action name
// Parsed information
char *key; // Operation key for action
enum rsc_role_e role; // Action role (or pcmk_role_unknown for default)
guint interval_ms; // Action interval
};
/*!
* \internal
* \brief Parse an interval from XML
*
* \param[in] xml XML containing an interval attribute
*
* \return Interval parsed from XML (or 0 as default)
*/
static guint
xe_interval(const xmlNode *xml)
{
guint interval_ms = 0U;
- pcmk__parse_interval_spec(crm_element_value(xml, XML_LRM_ATTR_INTERVAL),
+ pcmk_parse_interval_spec(crm_element_value(xml, XML_LRM_ATTR_INTERVAL),
&interval_ms);
return interval_ms;
}
/*!
* \internal
* \brief Check whether an operation exists multiple times in resource history
*
* \param[in] rsc Resource with history to search
* \param[in] name Name of action to search for
* \param[in] interval_ms Interval (in milliseconds) of action to search for
*
* \return true if an operation with \p name and \p interval_ms exists more than
* once in the operation history of \p rsc, otherwise false
*/
static bool
is_op_dup(const pcmk_resource_t *rsc, const char *name, guint interval_ms)
{
const char *id = NULL;
for (xmlNode *op = first_named_child(rsc->ops_xml, "op");
op != NULL; op = crm_next_same_xml(op)) {
// Check whether action name and interval match
if (!pcmk__str_eq(crm_element_value(op, "name"), name, pcmk__str_none)
|| (xe_interval(op) != interval_ms)) {
continue;
}
if (ID(op) == NULL) {
continue; // Shouldn't be possible
}
if (id == NULL) {
id = ID(op); // First matching op
} else {
pcmk__config_err("Operation %s is duplicate of %s (do not use "
"same name and interval combination more "
"than once per resource)", ID(op), id);
return true;
}
}
return false;
}
/*!
* \internal
* \brief Check whether an action name is one that can be recurring
*
* \param[in] name Action name to check
*
* \return true if \p name is an action known to be unsuitable as a recurring
* operation, otherwise false
*
* \note Pacemaker's current philosophy is to allow users to configure recurring
* operations except for a short list of actions known not to be suitable
* for that (as opposed to allowing only actions known to be suitable,
* which includes only monitor). Among other things, this approach allows
* users to define their own custom operations and make them recurring,
* though that use case is not well tested.
*/
static bool
op_cannot_recur(const char *name)
{
return pcmk__str_any_of(name, PCMK_ACTION_STOP, PCMK_ACTION_START,
PCMK_ACTION_DEMOTE, PCMK_ACTION_PROMOTE,
PCMK_ACTION_RELOAD_AGENT,
PCMK_ACTION_MIGRATE_TO, PCMK_ACTION_MIGRATE_FROM,
NULL);
}
/*!
* \internal
* \brief Check whether a resource history entry is for a recurring action
*
* \param[in] rsc Resource that history entry is for
* \param[in] xml XML of resource history entry to check
* \param[out] op Where to store parsed info if recurring
*
* \return true if \p xml is for a recurring action, otherwise false
*/
static bool
is_recurring_history(const pcmk_resource_t *rsc, const xmlNode *xml,
struct op_history *op)
{
const char *role = NULL;
op->interval_ms = xe_interval(xml);
if (op->interval_ms == 0) {
return false; // Not recurring
}
op->id = ID(xml);
if (pcmk__str_empty(op->id)) {
pcmk__config_err("Ignoring resource history entry without ID");
return false; // Shouldn't be possible (unless CIB was manually edited)
}
op->name = crm_element_value(xml, "name");
if (op_cannot_recur(op->name)) {
pcmk__config_err("Ignoring %s because %s action cannot be recurring",
op->id, pcmk__s(op->name, "unnamed"));
return false;
}
// There should only be one recurring operation per action/interval
if (is_op_dup(rsc, op->name, op->interval_ms)) {
return false;
}
// Ensure role is valid if specified
role = crm_element_value(xml, "role");
if (role == NULL) {
op->role = pcmk_role_unknown;
} else {
op->role = text2role(role);
if (op->role == pcmk_role_unknown) {
pcmk__config_err("Ignoring %s role because %s is not a valid role",
op->id, role);
return false;
}
}
// Only actions that are still configured and enabled matter
if (pcmk__find_action_config(rsc, op->name, op->interval_ms,
false) == NULL) {
pcmk__rsc_trace(rsc,
"Ignoring %s (%s-interval %s for %s) because it is "
"disabled or no longer in configuration",
op->id, pcmk__readable_interval(op->interval_ms),
op->name, rsc->id);
return false;
}
op->key = pcmk__op_key(rsc->id, op->name, op->interval_ms);
return true;
}
/*!
* \internal
* \brief Check whether a recurring action for an active role should be optional
*
* \param[in] rsc Resource that recurring action is for
* \param[in] node Node that \p rsc will be active on (if any)
* \param[in] key Operation key for recurring action to check
* \param[in,out] start Start action for \p rsc
*
* \return true if recurring action should be optional, otherwise false
*/
static bool
active_recurring_should_be_optional(const pcmk_resource_t *rsc,
const pcmk_node_t *node, const char *key,
pcmk_action_t *start)
{
GList *possible_matches = NULL;
if (node == NULL) { // Should only be possible if unmanaged and stopped
pcmk__rsc_trace(rsc,
"%s will be mandatory because resource is unmanaged",
key);
return false;
}
if (!pcmk_is_set(rsc->cmds->action_flags(start, NULL),
pcmk_action_optional)) {
pcmk__rsc_trace(rsc, "%s will be mandatory because %s is",
key, start->uuid);
return false;
}
possible_matches = find_actions_exact(rsc->actions, key, node);
if (possible_matches == NULL) {
pcmk__rsc_trace(rsc,
"%s will be mandatory because it is not active on %s",
key, pe__node_name(node));
return false;
}
for (const GList *iter = possible_matches;
iter != NULL; iter = iter->next) {
const pcmk_action_t *op = (const pcmk_action_t *) iter->data;
if (pcmk_is_set(op->flags, pcmk_action_reschedule)) {
pcmk__rsc_trace(rsc,
"%s will be mandatory because "
"it needs to be rescheduled", key);
g_list_free(possible_matches);
return false;
}
}
g_list_free(possible_matches);
return true;
}
/*!
* \internal
* \brief Create recurring action from resource history entry for an active role
*
* \param[in,out] rsc Resource that resource history is for
* \param[in,out] start Start action for \p rsc on \p node
* \param[in] node Node that resource will be active on (if any)
* \param[in] op Resource history entry
*/
static void
recurring_op_for_active(pcmk_resource_t *rsc, pcmk_action_t *start,
const pcmk_node_t *node, const struct op_history *op)
{
pcmk_action_t *mon = NULL;
bool is_optional = true;
const bool is_default_role = (op->role == pcmk_role_unknown);
// We're only interested in recurring actions for active roles
if (op->role == pcmk_role_stopped) {
return;
}
is_optional = active_recurring_should_be_optional(rsc, node, op->key,
start);
if ((!is_default_role && (rsc->next_role != op->role))
|| (is_default_role && (rsc->next_role == pcmk_role_promoted))) {
// Configured monitor role doesn't match role resource will have
if (is_optional) { // It's running, so cancel it
char *after_key = NULL;
pcmk_action_t *cancel_op = pcmk__new_cancel_action(rsc, op->name,
op->interval_ms,
node);
switch (rsc->role) {
case pcmk_role_unpromoted:
case pcmk_role_started:
if (rsc->next_role == pcmk_role_promoted) {
after_key = promote_key(rsc);
} else if (rsc->next_role == pcmk_role_stopped) {
after_key = stop_key(rsc);
}
break;
case pcmk_role_promoted:
after_key = demote_key(rsc);
break;
default:
break;
}
if (after_key) {
pcmk__new_ordering(rsc, NULL, cancel_op, rsc, after_key, NULL,
pcmk__ar_unrunnable_first_blocks,
rsc->cluster);
}
}
do_crm_log((is_optional? LOG_INFO : LOG_TRACE),
"%s recurring action %s because %s configured for %s role "
"(not %s)",
(is_optional? "Cancelling" : "Ignoring"), op->key, op->id,
role2text(is_default_role? pcmk_role_unpromoted : op->role),
role2text(rsc->next_role));
return;
}
pcmk__rsc_trace(rsc,
"Creating %s recurring action %s for %s (%s %s on %s)",
(is_optional? "optional" : "mandatory"), op->key,
op->id, rsc->id, role2text(rsc->next_role),
pe__node_name(node));
mon = custom_action(rsc, strdup(op->key), op->name, node, is_optional,
rsc->cluster);
if (!pcmk_is_set(start->flags, pcmk_action_runnable)) {
pcmk__rsc_trace(rsc, "%s is unrunnable because start is", mon->uuid);
pe__clear_action_flags(mon, pcmk_action_runnable);
} else if ((node == NULL) || !node->details->online
|| node->details->unclean) {
pcmk__rsc_trace(rsc, "%s is unrunnable because no node is available",
mon->uuid);
pe__clear_action_flags(mon, pcmk_action_runnable);
} else if (!pcmk_is_set(mon->flags, pcmk_action_optional)) {
pcmk__rsc_info(rsc, "Start %s-interval %s for %s on %s",
pcmk__readable_interval(op->interval_ms), mon->task,
rsc->id, pe__node_name(node));
}
if (rsc->next_role == pcmk_role_promoted) {
pe__add_action_expected_result(mon, CRM_EX_PROMOTED);
}
// Order monitor relative to other actions
if ((node == NULL) || pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
pcmk__new_ordering(rsc, start_key(rsc), NULL,
NULL, strdup(mon->uuid), mon,
pcmk__ar_first_implies_then
|pcmk__ar_unrunnable_first_blocks,
rsc->cluster);
pcmk__new_ordering(rsc, reload_key(rsc), NULL,
NULL, strdup(mon->uuid), mon,
pcmk__ar_first_implies_then
|pcmk__ar_unrunnable_first_blocks,
rsc->cluster);
if (rsc->next_role == pcmk_role_promoted) {
pcmk__new_ordering(rsc, promote_key(rsc), NULL,
rsc, NULL, mon,
pcmk__ar_ordered
|pcmk__ar_unrunnable_first_blocks,
rsc->cluster);
} else if (rsc->role == pcmk_role_promoted) {
pcmk__new_ordering(rsc, demote_key(rsc), NULL,
rsc, NULL, mon,
pcmk__ar_ordered
|pcmk__ar_unrunnable_first_blocks,
rsc->cluster);
}
}
}
/*!
* \internal
* \brief Cancel a recurring action if running on a node
*
* \param[in,out] rsc Resource that action is for
* \param[in] node Node to cancel action on
* \param[in] key Operation key for action
* \param[in] name Action name
* \param[in] interval_ms Action interval (in milliseconds)
*/
static void
cancel_if_running(pcmk_resource_t *rsc, const pcmk_node_t *node,
const char *key, const char *name, guint interval_ms)
{
GList *possible_matches = find_actions_exact(rsc->actions, key, node);
pcmk_action_t *cancel_op = NULL;
if (possible_matches == NULL) {
return; // Recurring action isn't running on this node
}
g_list_free(possible_matches);
cancel_op = pcmk__new_cancel_action(rsc, name, interval_ms, node);
switch (rsc->next_role) {
case pcmk_role_started:
case pcmk_role_unpromoted:
/* Order starts after cancel. If the current role is
* stopped, this cancels the monitor before the resource
* starts; if the current role is started, then this cancels
* the monitor on a migration target before starting there.
*/
pcmk__new_ordering(rsc, NULL, cancel_op,
rsc, start_key(rsc), NULL,
pcmk__ar_unrunnable_first_blocks, rsc->cluster);
break;
default:
break;
}
pcmk__rsc_info(rsc,
"Cancelling %s-interval %s action for %s on %s because "
"configured for " PCMK__ROLE_STOPPED " role (not %s)",
pcmk__readable_interval(interval_ms), name, rsc->id,
pe__node_name(node), role2text(rsc->next_role));
}
/*!
* \internal
* \brief Order an action after all probes of a resource on a node
*
* \param[in,out] rsc Resource to check for probes
* \param[in] node Node to check for probes of \p rsc
* \param[in,out] action Action to order after probes of \p rsc on \p node
*/
static void
order_after_probes(pcmk_resource_t *rsc, const pcmk_node_t *node,
pcmk_action_t *action)
{
GList *probes = pe__resource_actions(rsc, node, PCMK_ACTION_MONITOR, FALSE);
for (GList *iter = probes; iter != NULL; iter = iter->next) {
order_actions((pcmk_action_t *) iter->data, action,
pcmk__ar_unrunnable_first_blocks);
}
g_list_free(probes);
}
/*!
* \internal
* \brief Order an action after all stops of a resource on a node
*
* \param[in,out] rsc Resource to check for stops
* \param[in] node Node to check for stops of \p rsc
* \param[in,out] action Action to order after stops of \p rsc on \p node
*/
static void
order_after_stops(pcmk_resource_t *rsc, const pcmk_node_t *node,
pcmk_action_t *action)
{
GList *stop_ops = pe__resource_actions(rsc, node, PCMK_ACTION_STOP, TRUE);
for (GList *iter = stop_ops; iter != NULL; iter = iter->next) {
pcmk_action_t *stop = (pcmk_action_t *) iter->data;
if (!pcmk_is_set(stop->flags, pcmk_action_optional)
&& !pcmk_is_set(action->flags, pcmk_action_optional)
&& !pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
pcmk__rsc_trace(rsc, "%s optional on %s: unmanaged",
action->uuid, pe__node_name(node));
pe__set_action_flags(action, pcmk_action_optional);
}
if (!pcmk_is_set(stop->flags, pcmk_action_runnable)) {
crm_debug("%s unrunnable on %s: stop is unrunnable",
action->uuid, pe__node_name(node));
pe__clear_action_flags(action, pcmk_action_runnable);
}
if (pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
pcmk__new_ordering(rsc, stop_key(rsc), stop,
NULL, NULL, action,
pcmk__ar_first_implies_then
|pcmk__ar_unrunnable_first_blocks,
rsc->cluster);
}
}
g_list_free(stop_ops);
}
/*!
* \internal
* \brief Create recurring action from resource history entry for inactive role
*
* \param[in,out] rsc Resource that resource history is for
* \param[in] node Node that resource will be active on (if any)
* \param[in] op Resource history entry
*/
static void
recurring_op_for_inactive(pcmk_resource_t *rsc, const pcmk_node_t *node,
const struct op_history *op)
{
GList *possible_matches = NULL;
// We're only interested in recurring actions for the inactive role
if (op->role != pcmk_role_stopped) {
return;
}
if (!pcmk_is_set(rsc->flags, pcmk_rsc_unique)) {
crm_notice("Ignoring %s (recurring monitors for " PCMK__ROLE_STOPPED
" role are not supported for anonymous clones)", op->id);
return; // @TODO add support
}
pcmk__rsc_trace(rsc,
"Creating recurring action %s for %s on nodes "
"where it should not be running", op->id, rsc->id);
for (GList *iter = rsc->cluster->nodes; iter != NULL; iter = iter->next) {
pcmk_node_t *stop_node = (pcmk_node_t *) iter->data;
bool is_optional = true;
pcmk_action_t *stopped_mon = NULL;
// Cancel action on node where resource will be active
if ((node != NULL)
&& pcmk__str_eq(stop_node->details->uname, node->details->uname,
pcmk__str_casei)) {
cancel_if_running(rsc, node, op->key, op->name, op->interval_ms);
continue;
}
// Recurring action on this node is optional if it's already active here
possible_matches = find_actions_exact(rsc->actions, op->key, stop_node);
is_optional = (possible_matches != NULL);
g_list_free(possible_matches);
pcmk__rsc_trace(rsc,
"Creating %s recurring action %s for %s (%s "
PCMK__ROLE_STOPPED " on %s)",
(is_optional? "optional" : "mandatory"),
op->key, op->id, rsc->id, pe__node_name(stop_node));
stopped_mon = custom_action(rsc, strdup(op->key), op->name, stop_node,
is_optional, rsc->cluster);
pe__add_action_expected_result(stopped_mon, CRM_EX_NOT_RUNNING);
if (pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
order_after_probes(rsc, stop_node, stopped_mon);
}
/* The recurring action is for the inactive role, so it shouldn't be
* performed until the resource is inactive.
*/
order_after_stops(rsc, stop_node, stopped_mon);
if (!stop_node->details->online || stop_node->details->unclean) {
pcmk__rsc_debug(rsc, "%s unrunnable on %s: node unavailable)",
stopped_mon->uuid, pe__node_name(stop_node));
pe__clear_action_flags(stopped_mon, pcmk_action_runnable);
}
if (pcmk_is_set(stopped_mon->flags, pcmk_action_runnable)
&& !pcmk_is_set(stopped_mon->flags, pcmk_action_optional)) {
crm_notice("Start recurring %s-interval %s for "
PCMK__ROLE_STOPPED " %s on %s",
pcmk__readable_interval(op->interval_ms),
stopped_mon->task, rsc->id, pe__node_name(stop_node));
}
}
}
/*!
* \internal
* \brief Create recurring actions for a resource
*
* \param[in,out] rsc Resource to create recurring actions for
*/
void
pcmk__create_recurring_actions(pcmk_resource_t *rsc)
{
pcmk_action_t *start = NULL;
if (pcmk_is_set(rsc->flags, pcmk_rsc_blocked)) {
pcmk__rsc_trace(rsc,
"Skipping recurring actions for blocked resource %s",
rsc->id);
return;
}
if (pcmk_is_set(rsc->flags, pcmk_rsc_maintenance)) {
pcmk__rsc_trace(rsc,
"Skipping recurring actions for %s "
"in maintenance mode", rsc->id);
return;
}
if (rsc->allocated_to == NULL) {
// Recurring actions for active roles not needed
} else if (rsc->allocated_to->details->maintenance) {
pcmk__rsc_trace(rsc,
"Skipping recurring actions for %s on %s "
"in maintenance mode",
rsc->id, pe__node_name(rsc->allocated_to));
} else if ((rsc->next_role != pcmk_role_stopped)
|| !pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
// Recurring actions for active roles needed
start = start_action(rsc, rsc->allocated_to, TRUE);
}
pcmk__rsc_trace(rsc, "Creating any recurring actions needed for %s",
rsc->id);
for (xmlNode *op = first_named_child(rsc->ops_xml, "op");
op != NULL; op = crm_next_same_xml(op)) {
struct op_history op_history = { NULL, };
if (!is_recurring_history(rsc, op, &op_history)) {
continue;
}
if (start != NULL) {
recurring_op_for_active(rsc, start, rsc->allocated_to, &op_history);
}
recurring_op_for_inactive(rsc, rsc->allocated_to, &op_history);
free(op_history.key);
}
}
/*!
* \internal
* \brief Create an executor cancel action
*
* \param[in,out] rsc Resource of action to cancel
* \param[in] task Name of action to cancel
* \param[in] interval_ms Interval of action to cancel
* \param[in] node Node of action to cancel
*
* \return Created op
*/
pcmk_action_t *
pcmk__new_cancel_action(pcmk_resource_t *rsc, const char *task,
guint interval_ms, const pcmk_node_t *node)
{
pcmk_action_t *cancel_op = NULL;
char *key = NULL;
char *interval_ms_s = NULL;
CRM_ASSERT((rsc != NULL) && (task != NULL) && (node != NULL));
// @TODO dangerous if possible to schedule another action with this key
key = pcmk__op_key(rsc->id, task, interval_ms);
cancel_op = custom_action(rsc, key, PCMK_ACTION_CANCEL, node, FALSE,
rsc->cluster);
pcmk__str_update(&cancel_op->task, PCMK_ACTION_CANCEL);
pcmk__str_update(&cancel_op->cancel_task, task);
interval_ms_s = crm_strdup_printf("%u", interval_ms);
add_hash_param(cancel_op->meta, XML_LRM_ATTR_TASK, task);
add_hash_param(cancel_op->meta, XML_LRM_ATTR_INTERVAL_MS, interval_ms_s);
free(interval_ms_s);
return cancel_op;
}
/*!
* \internal
* \brief Schedule cancellation of a recurring action
*
* \param[in,out] rsc Resource that action is for
* \param[in] call_id Action's call ID from history
* \param[in] task Action name
* \param[in] interval_ms Action interval
* \param[in] node Node that history entry is for
* \param[in] reason Short description of why action is cancelled
*/
void
pcmk__schedule_cancel(pcmk_resource_t *rsc, const char *call_id,
const char *task, guint interval_ms,
const pcmk_node_t *node, const char *reason)
{
pcmk_action_t *cancel = NULL;
CRM_CHECK((rsc != NULL) && (task != NULL)
&& (node != NULL) && (reason != NULL),
return);
crm_info("Recurring %s-interval %s for %s will be stopped on %s: %s",
pcmk__readable_interval(interval_ms), task, rsc->id,
pe__node_name(node), reason);
cancel = pcmk__new_cancel_action(rsc, task, interval_ms, node);
add_hash_param(cancel->meta, XML_LRM_ATTR_CALLID, call_id);
// Cancellations happen after stops
pcmk__new_ordering(rsc, stop_key(rsc), NULL, rsc, NULL, cancel,
pcmk__ar_ordered, rsc->cluster);
}
/*!
* \internal
* \brief Create a recurring action marked as needing rescheduling if active
*
* \param[in,out] rsc Resource that action is for
* \param[in] task Name of action being rescheduled
* \param[in] interval_ms Action interval (in milliseconds)
* \param[in,out] node Node where action should be rescheduled
*/
void
pcmk__reschedule_recurring(pcmk_resource_t *rsc, const char *task,
guint interval_ms, pcmk_node_t *node)
{
pcmk_action_t *op = NULL;
trigger_unfencing(rsc, node, "Device parameters changed (reschedule)",
NULL, rsc->cluster);
op = custom_action(rsc, pcmk__op_key(rsc->id, task, interval_ms),
task, node, TRUE, rsc->cluster);
pe__set_action_flags(op, pcmk_action_reschedule);
}
/*!
* \internal
* \brief Check whether an action is recurring
*
* \param[in] action Action to check
*
* \return true if \p action has a nonzero interval, otherwise false
*/
bool
pcmk__action_is_recurring(const pcmk_action_t *action)
{
guint interval_ms = 0;
if (pcmk__guint_from_hash(action->meta,
XML_LRM_ATTR_INTERVAL_MS, 0,
&interval_ms) != pcmk_rc_ok) {
return false;
}
return (interval_ms > 0);
}
diff --git a/lib/pengine/complex.c b/lib/pengine/complex.c
index dc7503c505..2535f7151a 100644
--- a/lib/pengine/complex.c
+++ b/lib/pengine/complex.c
@@ -1,1204 +1,1204 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <crm/pengine/rules.h>
#include <crm/pengine/internal.h>
#include <crm/msg_xml.h>
#include <crm/common/xml_internal.h>
#include <crm/common/scheduler_internal.h>
#include "pe_status_private.h"
void populate_hash(xmlNode * nvpair_list, GHashTable * hash, const char **attrs, int attrs_length);
static pcmk_node_t *active_node(const pcmk_resource_t *rsc,
unsigned int *count_all,
unsigned int *count_clean);
pcmk_rsc_methods_t resource_class_functions[] = {
{
native_unpack,
native_find_rsc,
native_parameter,
native_print,
native_active,
native_resource_state,
native_location,
native_free,
pe__count_common,
pe__native_is_filtered,
active_node,
pe__primitive_max_per_node,
},
{
group_unpack,
native_find_rsc,
native_parameter,
group_print,
group_active,
group_resource_state,
native_location,
group_free,
pe__count_common,
pe__group_is_filtered,
active_node,
pe__group_max_per_node,
},
{
clone_unpack,
native_find_rsc,
native_parameter,
clone_print,
clone_active,
clone_resource_state,
native_location,
clone_free,
pe__count_common,
pe__clone_is_filtered,
active_node,
pe__clone_max_per_node,
},
{
pe__unpack_bundle,
native_find_rsc,
native_parameter,
pe__print_bundle,
pe__bundle_active,
pe__bundle_resource_state,
native_location,
pe__free_bundle,
pe__count_bundle,
pe__bundle_is_filtered,
pe__bundle_active_node,
pe__bundle_max_per_node,
}
};
static enum pe_obj_types
get_resource_type(const char *name)
{
if (pcmk__str_eq(name, XML_CIB_TAG_RESOURCE, pcmk__str_casei)) {
return pcmk_rsc_variant_primitive;
} else if (pcmk__str_eq(name, XML_CIB_TAG_GROUP, pcmk__str_casei)) {
return pcmk_rsc_variant_group;
} else if (pcmk__str_eq(name, XML_CIB_TAG_INCARNATION, pcmk__str_casei)) {
return pcmk_rsc_variant_clone;
} else if (pcmk__str_eq(name, PCMK_XE_PROMOTABLE_LEGACY, pcmk__str_casei)) {
// @COMPAT deprecated since 2.0.0
return pcmk_rsc_variant_clone;
} else if (pcmk__str_eq(name, XML_CIB_TAG_CONTAINER, pcmk__str_casei)) {
return pcmk_rsc_variant_bundle;
}
return pcmk_rsc_variant_unknown;
}
static void
dup_attr(gpointer key, gpointer value, gpointer user_data)
{
add_hash_param(user_data, key, value);
}
static void
expand_parents_fixed_nvpairs(pcmk_resource_t *rsc,
pe_rule_eval_data_t *rule_data,
GHashTable *meta_hash, pcmk_scheduler_t *scheduler)
{
GHashTable *parent_orig_meta = pcmk__strkey_table(free, free);
pcmk_resource_t *p = rsc->parent;
if (p == NULL) {
return ;
}
/* Search all parent resources, get the fixed value of "meta_attributes" set only in the original xml, and stack it in the hash table. */
/* The fixed value of the lower parent resource takes precedence and is not overwritten. */
while(p != NULL) {
/* A hash table for comparison is generated, including the id-ref. */
pe__unpack_dataset_nvpairs(p->xml, XML_TAG_META_SETS, rule_data,
parent_orig_meta, NULL, FALSE, scheduler);
p = p->parent;
}
/* If there is a fixed value of "meta_attributes" of the parent resource, it will be processed. */
if (parent_orig_meta != NULL) {
GHashTableIter iter;
char *key = NULL;
char *value = NULL;
g_hash_table_iter_init(&iter, parent_orig_meta);
while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) {
/* Parameters set in the original xml of the parent resource will also try to overwrite the child resource. */
/* Attributes that already exist in the child lease are not updated. */
dup_attr(key, value, meta_hash);
}
}
if (parent_orig_meta != NULL) {
g_hash_table_destroy(parent_orig_meta);
}
return ;
}
void
get_meta_attributes(GHashTable * meta_hash, pcmk_resource_t * rsc,
pcmk_node_t *node, pcmk_scheduler_t *scheduler)
{
pe_rsc_eval_data_t rsc_rule_data = {
.standard = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS),
.provider = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER),
.agent = crm_element_value(rsc->xml, XML_EXPR_ATTR_TYPE)
};
pe_rule_eval_data_t rule_data = {
.node_hash = NULL,
.role = pcmk_role_unknown,
.now = scheduler->now,
.match_data = NULL,
.rsc_data = &rsc_rule_data,
.op_data = NULL
};
if (node) {
rule_data.node_hash = node->details->attrs;
}
for (xmlAttrPtr a = pcmk__xe_first_attr(rsc->xml); a != NULL; a = a->next) {
const char *prop_name = (const char *) a->name;
const char *prop_value = pcmk__xml_attr_value(a);
add_hash_param(meta_hash, prop_name, prop_value);
}
pe__unpack_dataset_nvpairs(rsc->xml, XML_TAG_META_SETS, &rule_data,
meta_hash, NULL, FALSE, scheduler);
/* Set the "meta_attributes" explicitly set in the parent resource to the hash table of the child resource. */
/* If it is already explicitly set as a child, it will not be overwritten. */
if (rsc->parent != NULL) {
expand_parents_fixed_nvpairs(rsc, &rule_data, meta_hash, scheduler);
}
/* check the defaults */
pe__unpack_dataset_nvpairs(scheduler->rsc_defaults, XML_TAG_META_SETS,
&rule_data, meta_hash, NULL, FALSE, scheduler);
/* If there is "meta_attributes" that the parent resource has not explicitly set, set a value that is not set from rsc_default either. */
/* The values already set up to this point will not be overwritten. */
if (rsc->parent) {
g_hash_table_foreach(rsc->parent->meta, dup_attr, meta_hash);
}
}
void
get_rsc_attributes(GHashTable *meta_hash, const pcmk_resource_t *rsc,
const pcmk_node_t *node, pcmk_scheduler_t *scheduler)
{
pe_rule_eval_data_t rule_data = {
.node_hash = NULL,
.role = pcmk_role_unknown,
.now = scheduler->now,
.match_data = NULL,
.rsc_data = NULL,
.op_data = NULL
};
if (node) {
rule_data.node_hash = node->details->attrs;
}
pe__unpack_dataset_nvpairs(rsc->xml, XML_TAG_ATTR_SETS, &rule_data,
meta_hash, NULL, FALSE, scheduler);
/* set anything else based on the parent */
if (rsc->parent != NULL) {
get_rsc_attributes(meta_hash, rsc->parent, node, scheduler);
} else {
/* and finally check the defaults */
pe__unpack_dataset_nvpairs(scheduler->rsc_defaults, XML_TAG_ATTR_SETS,
&rule_data, meta_hash, NULL, FALSE,
scheduler);
}
}
static char *
template_op_key(xmlNode * op)
{
const char *name = crm_element_value(op, "name");
const char *role = crm_element_value(op, "role");
char *key = NULL;
if ((role == NULL)
|| pcmk__strcase_any_of(role, PCMK__ROLE_STARTED, PCMK__ROLE_UNPROMOTED,
PCMK__ROLE_UNPROMOTED_LEGACY, NULL)) {
role = PCMK__ROLE_UNKNOWN;
}
key = crm_strdup_printf("%s-%s", name, role);
return key;
}
static gboolean
unpack_template(xmlNode *xml_obj, xmlNode **expanded_xml,
pcmk_scheduler_t *scheduler)
{
xmlNode *cib_resources = NULL;
xmlNode *template = NULL;
xmlNode *new_xml = NULL;
xmlNode *child_xml = NULL;
xmlNode *rsc_ops = NULL;
xmlNode *template_ops = NULL;
const char *template_ref = NULL;
const char *clone = NULL;
const char *id = NULL;
if (xml_obj == NULL) {
pcmk__config_err("No resource object for template unpacking");
return FALSE;
}
template_ref = crm_element_value(xml_obj, XML_CIB_TAG_RSC_TEMPLATE);
if (template_ref == NULL) {
return TRUE;
}
id = ID(xml_obj);
if (id == NULL) {
pcmk__config_err("'%s' object must have a id", xml_obj->name);
return FALSE;
}
if (pcmk__str_eq(template_ref, id, pcmk__str_none)) {
pcmk__config_err("The resource object '%s' should not reference itself",
id);
return FALSE;
}
cib_resources = get_xpath_object("//" XML_CIB_TAG_RESOURCES,
scheduler->input, LOG_TRACE);
if (cib_resources == NULL) {
pcmk__config_err("No resources configured");
return FALSE;
}
template = pcmk__xe_match(cib_resources, XML_CIB_TAG_RSC_TEMPLATE,
XML_ATTR_ID, template_ref);
if (template == NULL) {
pcmk__config_err("No template named '%s'", template_ref);
return FALSE;
}
new_xml = copy_xml(template);
xmlNodeSetName(new_xml, xml_obj->name);
crm_xml_add(new_xml, XML_ATTR_ID, id);
clone = crm_element_value(xml_obj, XML_RSC_ATTR_INCARNATION);
if(clone) {
crm_xml_add(new_xml, XML_RSC_ATTR_INCARNATION, clone);
}
template_ops = find_xml_node(new_xml, "operations", FALSE);
for (child_xml = pcmk__xe_first_child(xml_obj); child_xml != NULL;
child_xml = pcmk__xe_next(child_xml)) {
xmlNode *new_child = NULL;
new_child = add_node_copy(new_xml, child_xml);
if (pcmk__str_eq((const char *)new_child->name, "operations", pcmk__str_none)) {
rsc_ops = new_child;
}
}
if (template_ops && rsc_ops) {
xmlNode *op = NULL;
GHashTable *rsc_ops_hash = pcmk__strkey_table(free, NULL);
for (op = pcmk__xe_first_child(rsc_ops); op != NULL;
op = pcmk__xe_next(op)) {
char *key = template_op_key(op);
g_hash_table_insert(rsc_ops_hash, key, op);
}
for (op = pcmk__xe_first_child(template_ops); op != NULL;
op = pcmk__xe_next(op)) {
char *key = template_op_key(op);
if (g_hash_table_lookup(rsc_ops_hash, key) == NULL) {
add_node_copy(rsc_ops, op);
}
free(key);
}
if (rsc_ops_hash) {
g_hash_table_destroy(rsc_ops_hash);
}
free_xml(template_ops);
}
/*free_xml(*expanded_xml); */
*expanded_xml = new_xml;
#if 0 /* Disable multi-level templates for now */
if (!unpack_template(new_xml, expanded_xml, scheduler)) {
free_xml(*expanded_xml);
*expanded_xml = NULL;
return FALSE;
}
#endif
return TRUE;
}
static gboolean
add_template_rsc(xmlNode *xml_obj, pcmk_scheduler_t *scheduler)
{
const char *template_ref = NULL;
const char *id = NULL;
if (xml_obj == NULL) {
pcmk__config_err("No resource object for processing resource list "
"of template");
return FALSE;
}
template_ref = crm_element_value(xml_obj, XML_CIB_TAG_RSC_TEMPLATE);
if (template_ref == NULL) {
return TRUE;
}
id = ID(xml_obj);
if (id == NULL) {
pcmk__config_err("'%s' object must have a id", xml_obj->name);
return FALSE;
}
if (pcmk__str_eq(template_ref, id, pcmk__str_none)) {
pcmk__config_err("The resource object '%s' should not reference itself",
id);
return FALSE;
}
if (add_tag_ref(scheduler->template_rsc_sets, template_ref, id) == FALSE) {
return FALSE;
}
return TRUE;
}
static bool
detect_promotable(pcmk_resource_t *rsc)
{
const char *promotable = g_hash_table_lookup(rsc->meta,
XML_RSC_ATTR_PROMOTABLE);
if (crm_is_true(promotable)) {
return TRUE;
}
// @COMPAT deprecated since 2.0.0
if (pcmk__xe_is(rsc->xml, PCMK_XE_PROMOTABLE_LEGACY)) {
/* @TODO in some future version, pcmk__warn_once() here,
* then drop support in even later version
*/
g_hash_table_insert(rsc->meta, strdup(XML_RSC_ATTR_PROMOTABLE),
strdup(XML_BOOLEAN_TRUE));
return TRUE;
}
return FALSE;
}
static void
free_params_table(gpointer data)
{
g_hash_table_destroy((GHashTable *) data);
}
/*!
* \brief Get a table of resource parameters
*
* \param[in,out] rsc Resource to query
* \param[in] node Node for evaluating rules (NULL for defaults)
* \param[in,out] scheduler Scheduler data
*
* \return Hash table containing resource parameter names and values
* (or NULL if \p rsc or \p scheduler is NULL)
* \note The returned table will be destroyed when the resource is freed, so
* callers should not destroy it.
*/
GHashTable *
pe_rsc_params(pcmk_resource_t *rsc, const pcmk_node_t *node,
pcmk_scheduler_t *scheduler)
{
GHashTable *params_on_node = NULL;
/* A NULL node is used to request the resource's default parameters
* (not evaluated for node), but we always want something non-NULL
* as a hash table key.
*/
const char *node_name = "";
// Sanity check
if ((rsc == NULL) || (scheduler == NULL)) {
return NULL;
}
if ((node != NULL) && (node->details->uname != NULL)) {
node_name = node->details->uname;
}
// Find the parameter table for given node
if (rsc->parameter_cache == NULL) {
rsc->parameter_cache = pcmk__strikey_table(free, free_params_table);
} else {
params_on_node = g_hash_table_lookup(rsc->parameter_cache, node_name);
}
// If none exists yet, create one with parameters evaluated for node
if (params_on_node == NULL) {
params_on_node = pcmk__strkey_table(free, free);
get_rsc_attributes(params_on_node, rsc, node, scheduler);
g_hash_table_insert(rsc->parameter_cache, strdup(node_name),
params_on_node);
}
return params_on_node;
}
/*!
* \internal
* \brief Unpack a resource's "requires" meta-attribute
*
* \param[in,out] rsc Resource being unpacked
* \param[in] value Value of "requires" meta-attribute
* \param[in] is_default Whether \p value was selected by default
*/
static void
unpack_requires(pcmk_resource_t *rsc, const char *value, bool is_default)
{
if (pcmk__str_eq(value, PCMK__VALUE_NOTHING, pcmk__str_casei)) {
} else if (pcmk__str_eq(value, PCMK__VALUE_QUORUM, pcmk__str_casei)) {
pe__set_resource_flags(rsc, pcmk_rsc_needs_quorum);
} else if (pcmk__str_eq(value, PCMK__VALUE_FENCING, pcmk__str_casei)) {
pe__set_resource_flags(rsc, pcmk_rsc_needs_fencing);
if (!pcmk_is_set(rsc->cluster->flags, pcmk_sched_fencing_enabled)) {
pcmk__config_warn("%s requires fencing but fencing is disabled",
rsc->id);
}
} else if (pcmk__str_eq(value, PCMK__VALUE_UNFENCING, pcmk__str_casei)) {
if (pcmk_is_set(rsc->flags, pcmk_rsc_fence_device)) {
pcmk__config_warn("Resetting \"" XML_RSC_ATTR_REQUIRES "\" for %s "
"to \"" PCMK__VALUE_QUORUM "\" because fencing "
"devices cannot require unfencing", rsc->id);
unpack_requires(rsc, PCMK__VALUE_QUORUM, true);
return;
} else if (!pcmk_is_set(rsc->cluster->flags,
pcmk_sched_fencing_enabled)) {
pcmk__config_warn("Resetting \"" XML_RSC_ATTR_REQUIRES "\" for %s "
"to \"" PCMK__VALUE_QUORUM "\" because fencing "
"is disabled", rsc->id);
unpack_requires(rsc, PCMK__VALUE_QUORUM, true);
return;
} else {
pe__set_resource_flags(rsc, pcmk_rsc_needs_fencing
|pcmk_rsc_needs_unfencing);
}
} else {
const char *orig_value = value;
if (pcmk_is_set(rsc->flags, pcmk_rsc_fence_device)) {
value = PCMK__VALUE_QUORUM;
} else if ((rsc->variant == pcmk_rsc_variant_primitive)
&& xml_contains_remote_node(rsc->xml)) {
value = PCMK__VALUE_QUORUM;
} else if (pcmk_is_set(rsc->cluster->flags,
pcmk_sched_enable_unfencing)) {
value = PCMK__VALUE_UNFENCING;
} else if (pcmk_is_set(rsc->cluster->flags,
pcmk_sched_fencing_enabled)) {
value = PCMK__VALUE_FENCING;
} else if (rsc->cluster->no_quorum_policy == pcmk_no_quorum_ignore) {
value = PCMK__VALUE_NOTHING;
} else {
value = PCMK__VALUE_QUORUM;
}
if (orig_value != NULL) {
pcmk__config_err("Resetting '" XML_RSC_ATTR_REQUIRES "' for %s "
"to '%s' because '%s' is not valid",
rsc->id, value, orig_value);
}
unpack_requires(rsc, value, true);
return;
}
pcmk__rsc_trace(rsc, "\tRequired to start: %s%s", value,
(is_default? " (default)" : ""));
}
#ifndef PCMK__COMPAT_2_0
static void
warn_about_deprecated_classes(pcmk_resource_t *rsc)
{
const char *std = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
if (pcmk__str_eq(std, PCMK_RESOURCE_CLASS_UPSTART, pcmk__str_none)) {
pcmk__warn_once(pcmk__wo_upstart,
"Support for Upstart resources (such as %s) is "
"deprecated and will be removed in a future release",
rsc->id);
} else if (pcmk__str_eq(std, PCMK_RESOURCE_CLASS_NAGIOS, pcmk__str_none)) {
pcmk__warn_once(pcmk__wo_nagios,
"Support for Nagios resources (such as %s) is "
"deprecated and will be removed in a future release",
rsc->id);
}
}
#endif
/*!
* \internal
* \brief Unpack configuration XML for a given resource
*
* Unpack the XML object containing a resource's configuration into a new
* \c pcmk_resource_t object.
*
* \param[in] xml_obj XML node containing the resource's configuration
* \param[out] rsc Where to store the unpacked resource information
* \param[in] parent Resource's parent, if any
* \param[in,out] scheduler Scheduler data
*
* \return Standard Pacemaker return code
* \note If pcmk_rc_ok is returned, \p *rsc is guaranteed to be non-NULL, and
* the caller is responsible for freeing it using its variant-specific
* free() method. Otherwise, \p *rsc is guaranteed to be NULL.
*/
int
pe__unpack_resource(xmlNode *xml_obj, pcmk_resource_t **rsc,
pcmk_resource_t *parent, pcmk_scheduler_t *scheduler)
{
xmlNode *expanded_xml = NULL;
xmlNode *ops = NULL;
const char *value = NULL;
const char *id = NULL;
bool guest_node = false;
bool remote_node = false;
pe_rule_eval_data_t rule_data = {
.node_hash = NULL,
.role = pcmk_role_unknown,
.now = NULL,
.match_data = NULL,
.rsc_data = NULL,
.op_data = NULL
};
CRM_CHECK(rsc != NULL, return EINVAL);
CRM_CHECK((xml_obj != NULL) && (scheduler != NULL),
*rsc = NULL;
return EINVAL);
rule_data.now = scheduler->now;
crm_log_xml_trace(xml_obj, "[raw XML]");
id = crm_element_value(xml_obj, XML_ATTR_ID);
if (id == NULL) {
pcmk__config_err("Ignoring <%s> configuration without " XML_ATTR_ID,
xml_obj->name);
return pcmk_rc_unpack_error;
}
if (unpack_template(xml_obj, &expanded_xml, scheduler) == FALSE) {
return pcmk_rc_unpack_error;
}
*rsc = calloc(1, sizeof(pcmk_resource_t));
if (*rsc == NULL) {
pcmk__sched_err("Unable to allocate memory for resource '%s'", id);
return ENOMEM;
}
(*rsc)->cluster = scheduler;
if (expanded_xml) {
crm_log_xml_trace(expanded_xml, "[expanded XML]");
(*rsc)->xml = expanded_xml;
(*rsc)->orig_xml = xml_obj;
} else {
(*rsc)->xml = xml_obj;
(*rsc)->orig_xml = NULL;
}
/* Do not use xml_obj from here on, use (*rsc)->xml in case templates are involved */
(*rsc)->parent = parent;
ops = find_xml_node((*rsc)->xml, "operations", FALSE);
(*rsc)->ops_xml = expand_idref(ops, scheduler->input);
(*rsc)->variant = get_resource_type((const char *) (*rsc)->xml->name);
if ((*rsc)->variant == pcmk_rsc_variant_unknown) {
pcmk__config_err("Ignoring resource '%s' of unknown type '%s'",
id, (*rsc)->xml->name);
common_free(*rsc);
*rsc = NULL;
return pcmk_rc_unpack_error;
}
#ifndef PCMK__COMPAT_2_0
warn_about_deprecated_classes(*rsc);
#endif
(*rsc)->meta = pcmk__strkey_table(free, free);
(*rsc)->allowed_nodes = pcmk__strkey_table(NULL, free);
(*rsc)->known_on = pcmk__strkey_table(NULL, free);
value = crm_element_value((*rsc)->xml, XML_RSC_ATTR_INCARNATION);
if (value) {
(*rsc)->id = crm_strdup_printf("%s:%s", id, value);
add_hash_param((*rsc)->meta, XML_RSC_ATTR_INCARNATION, value);
} else {
(*rsc)->id = strdup(id);
}
(*rsc)->fns = &resource_class_functions[(*rsc)->variant];
get_meta_attributes((*rsc)->meta, *rsc, NULL, scheduler);
(*rsc)->parameters = pe_rsc_params(*rsc, NULL, scheduler); // \deprecated
(*rsc)->flags = 0;
pe__set_resource_flags(*rsc, pcmk_rsc_runnable|pcmk_rsc_unassigned);
if (!pcmk_is_set(scheduler->flags, pcmk_sched_in_maintenance)) {
pe__set_resource_flags(*rsc, pcmk_rsc_managed);
}
(*rsc)->rsc_cons = NULL;
(*rsc)->rsc_tickets = NULL;
(*rsc)->actions = NULL;
(*rsc)->role = pcmk_role_stopped;
(*rsc)->next_role = pcmk_role_unknown;
(*rsc)->recovery_type = pcmk_multiply_active_restart;
(*rsc)->stickiness = 0;
(*rsc)->migration_threshold = INFINITY;
(*rsc)->failure_timeout = 0;
value = g_hash_table_lookup((*rsc)->meta, XML_CIB_ATTR_PRIORITY);
(*rsc)->priority = char2score(value);
value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_CRITICAL);
if ((value == NULL) || crm_is_true(value)) {
pe__set_resource_flags(*rsc, pcmk_rsc_critical);
}
value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_NOTIFY);
if (crm_is_true(value)) {
pe__set_resource_flags(*rsc, pcmk_rsc_notify);
}
if (xml_contains_remote_node((*rsc)->xml)) {
(*rsc)->is_remote_node = TRUE;
if (g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_CONTAINER)) {
guest_node = true;
} else {
remote_node = true;
}
}
value = g_hash_table_lookup((*rsc)->meta, XML_OP_ATTR_ALLOW_MIGRATE);
if (crm_is_true(value)) {
pe__set_resource_flags(*rsc, pcmk_rsc_migratable);
} else if ((value == NULL) && remote_node) {
/* By default, we want remote nodes to be able
* to float around the cluster without having to stop all the
* resources within the remote-node before moving. Allowing
* migration support enables this feature. If this ever causes
* problems, migration support can be explicitly turned off with
* allow-migrate=false.
*/
pe__set_resource_flags(*rsc, pcmk_rsc_migratable);
}
value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_MANAGED);
if (value != NULL && !pcmk__str_eq("default", value, pcmk__str_casei)) {
if (crm_is_true(value)) {
pe__set_resource_flags(*rsc, pcmk_rsc_managed);
} else {
pe__clear_resource_flags(*rsc, pcmk_rsc_managed);
}
}
value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_MAINTENANCE);
if (crm_is_true(value)) {
pe__clear_resource_flags(*rsc, pcmk_rsc_managed);
pe__set_resource_flags(*rsc, pcmk_rsc_maintenance);
}
if (pcmk_is_set(scheduler->flags, pcmk_sched_in_maintenance)) {
pe__clear_resource_flags(*rsc, pcmk_rsc_managed);
pe__set_resource_flags(*rsc, pcmk_rsc_maintenance);
}
if (pe_rsc_is_clone(pe__const_top_resource(*rsc, false))) {
value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_UNIQUE);
if (crm_is_true(value)) {
pe__set_resource_flags(*rsc, pcmk_rsc_unique);
}
if (detect_promotable(*rsc)) {
pe__set_resource_flags(*rsc, pcmk_rsc_promotable);
}
} else {
pe__set_resource_flags(*rsc, pcmk_rsc_unique);
}
value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_RESTART);
if (pcmk__str_eq(value, "restart", pcmk__str_casei)) {
(*rsc)->restart_type = pe_restart_restart;
pcmk__rsc_trace(*rsc, "%s dependency restart handling: restart",
(*rsc)->id);
pcmk__warn_once(pcmk__wo_restart_type,
"Support for restart-type is deprecated "
"and will be removed in a future release");
} else {
(*rsc)->restart_type = pe_restart_ignore;
pcmk__rsc_trace(*rsc, "%s dependency restart handling: ignore",
(*rsc)->id);
}
value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_MULTIPLE);
if (pcmk__str_eq(value, "stop_only", pcmk__str_casei)) {
(*rsc)->recovery_type = pcmk_multiply_active_stop;
pcmk__rsc_trace(*rsc, "%s multiple running resource recovery: stop only",
(*rsc)->id);
} else if (pcmk__str_eq(value, "block", pcmk__str_casei)) {
(*rsc)->recovery_type = pcmk_multiply_active_block;
pcmk__rsc_trace(*rsc, "%s multiple running resource recovery: block",
(*rsc)->id);
} else if (pcmk__str_eq(value, "stop_unexpected", pcmk__str_casei)) {
(*rsc)->recovery_type = pcmk_multiply_active_unexpected;
pcmk__rsc_trace(*rsc,
"%s multiple running resource recovery: "
"stop unexpected instances",
(*rsc)->id);
} else { // "stop_start"
if (!pcmk__str_eq(value, "stop_start",
pcmk__str_casei|pcmk__str_null_matches)) {
pcmk__config_warn("%s is not a valid value for "
XML_RSC_ATTR_MULTIPLE
", using default of \"stop_start\"", value);
}
(*rsc)->recovery_type = pcmk_multiply_active_restart;
pcmk__rsc_trace(*rsc,
"%s multiple running resource recovery: stop/start",
(*rsc)->id);
}
value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_STICKINESS);
if (value != NULL && !pcmk__str_eq("default", value, pcmk__str_casei)) {
(*rsc)->stickiness = char2score(value);
}
value = g_hash_table_lookup((*rsc)->meta, PCMK_META_MIGRATION_THRESHOLD);
if (value != NULL && !pcmk__str_eq("default", value, pcmk__str_casei)) {
(*rsc)->migration_threshold = char2score(value);
if ((*rsc)->migration_threshold < 0) {
/* @TODO We use 1 here to preserve previous behavior, but this
* should probably use the default (INFINITY) or 0 (to disable)
* instead.
*/
pcmk__warn_once(pcmk__wo_neg_threshold,
PCMK_META_MIGRATION_THRESHOLD
" must be non-negative, using 1 instead");
(*rsc)->migration_threshold = 1;
}
}
if (pcmk__str_eq(crm_element_value((*rsc)->xml, XML_AGENT_ATTR_CLASS),
PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) {
pe__set_working_set_flags(scheduler, pcmk_sched_have_fencing);
pe__set_resource_flags(*rsc, pcmk_rsc_fence_device);
}
value = g_hash_table_lookup((*rsc)->meta, XML_RSC_ATTR_REQUIRES);
unpack_requires(*rsc, value, false);
value = g_hash_table_lookup((*rsc)->meta, PCMK_META_FAILURE_TIMEOUT);
if (value != NULL) {
guint interval_ms = 0U;
// Stored as seconds
- pcmk__parse_interval_spec(value, &interval_ms);
+ pcmk_parse_interval_spec(value, &interval_ms);
(*rsc)->failure_timeout = (int) (interval_ms / 1000);
}
if (remote_node) {
GHashTable *params = pe_rsc_params(*rsc, NULL, scheduler);
/* Grabbing the value now means that any rules based on node attributes
* will evaluate to false, so such rules should not be used with
* reconnect_interval.
*
* @TODO Evaluate per node before using
*/
value = g_hash_table_lookup(params, XML_REMOTE_ATTR_RECONNECT_INTERVAL);
if (value) {
/* reconnect delay works by setting failure_timeout and preventing the
* connection from starting until the failure is cleared. */
- pcmk__parse_interval_spec(value, &((*rsc)->remote_reconnect_ms));
+ pcmk_parse_interval_spec(value, &((*rsc)->remote_reconnect_ms));
/* we want to override any default failure_timeout in use when remote
* reconnect_interval is in use. */
(*rsc)->failure_timeout = (*rsc)->remote_reconnect_ms / 1000;
}
}
get_target_role(*rsc, &((*rsc)->next_role));
pcmk__rsc_trace(*rsc, "%s desired next state: %s", (*rsc)->id,
(*rsc)->next_role != pcmk_role_unknown? role2text((*rsc)->next_role) : "default");
if ((*rsc)->fns->unpack(*rsc, scheduler) == FALSE) {
(*rsc)->fns->free(*rsc);
*rsc = NULL;
return pcmk_rc_unpack_error;
}
if (pcmk_is_set(scheduler->flags, pcmk_sched_symmetric_cluster)) {
// This tag must stay exactly the same because it is tested elsewhere
resource_location(*rsc, NULL, 0, "symmetric_default", scheduler);
} else if (guest_node) {
/* remote resources tied to a container resource must always be allowed
* to opt-in to the cluster. Whether the connection resource is actually
* allowed to be placed on a node is dependent on the container resource */
resource_location(*rsc, NULL, 0, "remote_connection_default",
scheduler);
}
pcmk__rsc_trace(*rsc, "%s action notification: %s", (*rsc)->id,
pcmk_is_set((*rsc)->flags, pcmk_rsc_notify)? "required" : "not required");
(*rsc)->utilization = pcmk__strkey_table(free, free);
pe__unpack_dataset_nvpairs((*rsc)->xml, XML_TAG_UTILIZATION, &rule_data,
(*rsc)->utilization, NULL, FALSE, scheduler);
if (expanded_xml) {
if (add_template_rsc(xml_obj, scheduler) == FALSE) {
(*rsc)->fns->free(*rsc);
*rsc = NULL;
return pcmk_rc_unpack_error;
}
}
return pcmk_rc_ok;
}
gboolean
is_parent(pcmk_resource_t *child, pcmk_resource_t *rsc)
{
pcmk_resource_t *parent = child;
if (parent == NULL || rsc == NULL) {
return FALSE;
}
while (parent->parent != NULL) {
if (parent->parent == rsc) {
return TRUE;
}
parent = parent->parent;
}
return FALSE;
}
pcmk_resource_t *
uber_parent(pcmk_resource_t *rsc)
{
pcmk_resource_t *parent = rsc;
if (parent == NULL) {
return NULL;
}
while ((parent->parent != NULL)
&& (parent->parent->variant != pcmk_rsc_variant_bundle)) {
parent = parent->parent;
}
return parent;
}
/*!
* \internal
* \brief Get the topmost parent of a resource as a const pointer
*
* \param[in] rsc Resource to check
* \param[in] include_bundle If true, go all the way to bundle
*
* \return \p NULL if \p rsc is NULL, \p rsc if \p rsc has no parent,
* the bundle if \p rsc is bundled and \p include_bundle is true,
* otherwise the topmost parent of \p rsc up to a clone
*/
const pcmk_resource_t *
pe__const_top_resource(const pcmk_resource_t *rsc, bool include_bundle)
{
const pcmk_resource_t *parent = rsc;
if (parent == NULL) {
return NULL;
}
while (parent->parent != NULL) {
if (!include_bundle
&& (parent->parent->variant == pcmk_rsc_variant_bundle)) {
break;
}
parent = parent->parent;
}
return parent;
}
void
common_free(pcmk_resource_t * rsc)
{
if (rsc == NULL) {
return;
}
pcmk__rsc_trace(rsc, "Freeing %s %d", rsc->id, rsc->variant);
g_list_free(rsc->rsc_cons);
g_list_free(rsc->rsc_cons_lhs);
g_list_free(rsc->rsc_tickets);
g_list_free(rsc->dangling_migrations);
if (rsc->parameter_cache != NULL) {
g_hash_table_destroy(rsc->parameter_cache);
}
if (rsc->meta != NULL) {
g_hash_table_destroy(rsc->meta);
}
if (rsc->utilization != NULL) {
g_hash_table_destroy(rsc->utilization);
}
if ((rsc->parent == NULL)
&& pcmk_is_set(rsc->flags, pcmk_rsc_removed)) {
free_xml(rsc->xml);
rsc->xml = NULL;
free_xml(rsc->orig_xml);
rsc->orig_xml = NULL;
/* if rsc->orig_xml, then rsc->xml is an expanded xml from a template */
} else if (rsc->orig_xml) {
free_xml(rsc->xml);
rsc->xml = NULL;
}
if (rsc->running_on) {
g_list_free(rsc->running_on);
rsc->running_on = NULL;
}
if (rsc->known_on) {
g_hash_table_destroy(rsc->known_on);
rsc->known_on = NULL;
}
if (rsc->actions) {
g_list_free(rsc->actions);
rsc->actions = NULL;
}
if (rsc->allowed_nodes) {
g_hash_table_destroy(rsc->allowed_nodes);
rsc->allowed_nodes = NULL;
}
g_list_free(rsc->fillers);
g_list_free(rsc->rsc_location);
pcmk__rsc_trace(rsc, "Resource freed");
free(rsc->id);
free(rsc->clone_name);
free(rsc->allocated_to);
free(rsc->variant_opaque);
free(rsc->pending_task);
free(rsc);
}
/*!
* \internal
* \brief Count a node and update most preferred to it as appropriate
*
* \param[in] rsc An active resource
* \param[in] node A node that \p rsc is active on
* \param[in,out] active This will be set to \p node if \p node is more
* preferred than the current value
* \param[in,out] count_all If not NULL, this will be incremented
* \param[in,out] count_clean If not NULL, this will be incremented if \p node
* is online and clean
*
* \return true if the count should continue, or false if sufficiently known
*/
bool
pe__count_active_node(const pcmk_resource_t *rsc, pcmk_node_t *node,
pcmk_node_t **active, unsigned int *count_all,
unsigned int *count_clean)
{
bool keep_looking = false;
bool is_happy = false;
CRM_CHECK((rsc != NULL) && (node != NULL) && (active != NULL),
return false);
is_happy = node->details->online && !node->details->unclean;
if (count_all != NULL) {
++*count_all;
}
if ((count_clean != NULL) && is_happy) {
++*count_clean;
}
if ((count_all != NULL) || (count_clean != NULL)) {
keep_looking = true; // We're counting, so go through entire list
}
if (rsc->partial_migration_source != NULL) {
if (node->details == rsc->partial_migration_source->details) {
*active = node; // This is the migration source
} else {
keep_looking = true;
}
} else if (!pcmk_is_set(rsc->flags, pcmk_rsc_needs_fencing)) {
if (is_happy && ((*active == NULL) || !(*active)->details->online
|| (*active)->details->unclean)) {
*active = node; // This is the first clean node
} else {
keep_looking = true;
}
}
if (*active == NULL) {
*active = node; // This is the first node checked
}
return keep_looking;
}
// Shared implementation of pcmk_rsc_methods_t:active_node()
static pcmk_node_t *
active_node(const pcmk_resource_t *rsc, unsigned int *count_all,
unsigned int *count_clean)
{
pcmk_node_t *active = NULL;
if (count_all != NULL) {
*count_all = 0;
}
if (count_clean != NULL) {
*count_clean = 0;
}
if (rsc == NULL) {
return NULL;
}
for (GList *iter = rsc->running_on; iter != NULL; iter = iter->next) {
if (!pe__count_active_node(rsc, (pcmk_node_t *) iter->data, &active,
count_all, count_clean)) {
break; // Don't waste time iterating if we don't have to
}
}
return active;
}
/*!
* \brief
* \internal Find and count active nodes according to "requires"
*
* \param[in] rsc Resource to check
* \param[out] count If not NULL, will be set to count of active nodes
*
* \return An active node (or NULL if resource is not active anywhere)
*
* \note This is a convenience wrapper for active_node() where the count of all
* active nodes or only clean active nodes is desired according to the
* "requires" meta-attribute.
*/
pcmk_node_t *
pe__find_active_requires(const pcmk_resource_t *rsc, unsigned int *count)
{
if (rsc == NULL) {
if (count != NULL) {
*count = 0;
}
return NULL;
} else if (pcmk_is_set(rsc->flags, pcmk_rsc_needs_fencing)) {
return rsc->fns->active_node(rsc, count, NULL);
} else {
return rsc->fns->active_node(rsc, NULL, count);
}
}
void
pe__count_common(pcmk_resource_t *rsc)
{
if (rsc->children != NULL) {
for (GList *item = rsc->children; item != NULL; item = item->next) {
((pcmk_resource_t *) item->data)->fns->count(item->data);
}
} else if (!pcmk_is_set(rsc->flags, pcmk_rsc_removed)
|| (rsc->role > pcmk_role_stopped)) {
rsc->cluster->ninstances++;
if (pe__resource_is_disabled(rsc)) {
rsc->cluster->disabled_resources++;
}
if (pcmk_is_set(rsc->flags, pcmk_rsc_blocked)) {
rsc->cluster->blocked_resources++;
}
}
}
/*!
* \internal
* \brief Update a resource's next role
*
* \param[in,out] rsc Resource to be updated
* \param[in] role Resource's new next role
* \param[in] why Human-friendly reason why role is changing (for logs)
*/
void
pe__set_next_role(pcmk_resource_t *rsc, enum rsc_role_e role, const char *why)
{
CRM_ASSERT((rsc != NULL) && (why != NULL));
if (rsc->next_role != role) {
pcmk__rsc_trace(rsc, "Resetting next role for %s from %s to %s (%s)",
rsc->id, role2text(rsc->next_role), role2text(role),
why);
rsc->next_role = role;
}
}
diff --git a/lib/pengine/failcounts.c b/lib/pengine/failcounts.c
index 9c6d2b57e3..47e19b42a2 100644
--- a/lib/pengine/failcounts.c
+++ b/lib/pengine/failcounts.c
@@ -1,471 +1,471 @@
/*
* Copyright 2008-2024 the Pacemaker project contributors
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/types.h>
#include <regex.h>
#include <glib.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <crm/common/util.h>
#include <crm/pengine/internal.h>
static gboolean
is_matched_failure(const char *rsc_id, const xmlNode *conf_op_xml,
const xmlNode *lrm_op_xml)
{
gboolean matched = FALSE;
const char *conf_op_name = NULL;
const char *lrm_op_task = NULL;
const char *conf_op_interval_spec = NULL;
guint conf_op_interval_ms = 0;
guint lrm_op_interval_ms = 0;
const char *lrm_op_id = NULL;
char *last_failure_key = NULL;
if (rsc_id == NULL || conf_op_xml == NULL || lrm_op_xml == NULL) {
return FALSE;
}
// Get name and interval from configured op
conf_op_name = crm_element_value(conf_op_xml, "name");
conf_op_interval_spec = crm_element_value(conf_op_xml,
XML_LRM_ATTR_INTERVAL);
- pcmk__parse_interval_spec(conf_op_interval_spec, &conf_op_interval_ms);
+ pcmk_parse_interval_spec(conf_op_interval_spec, &conf_op_interval_ms);
// Get name and interval from op history entry
lrm_op_task = crm_element_value(lrm_op_xml, XML_LRM_ATTR_TASK);
crm_element_value_ms(lrm_op_xml, XML_LRM_ATTR_INTERVAL_MS,
&lrm_op_interval_ms);
if ((conf_op_interval_ms != lrm_op_interval_ms)
|| !pcmk__str_eq(conf_op_name, lrm_op_task, pcmk__str_casei)) {
return FALSE;
}
lrm_op_id = ID(lrm_op_xml);
last_failure_key = pcmk__op_key(rsc_id, "last_failure", 0);
if (pcmk__str_eq(last_failure_key, lrm_op_id, pcmk__str_casei)) {
matched = TRUE;
} else {
char *expected_op_key = pcmk__op_key(rsc_id, conf_op_name,
conf_op_interval_ms);
if (pcmk__str_eq(expected_op_key, lrm_op_id, pcmk__str_casei)) {
int rc = 0;
int target_rc = pe__target_rc_from_xml(lrm_op_xml);
crm_element_value_int(lrm_op_xml, XML_LRM_ATTR_RC, &rc);
if (rc != target_rc) {
matched = TRUE;
}
}
free(expected_op_key);
}
free(last_failure_key);
return matched;
}
static gboolean
block_failure(const pcmk_node_t *node, pcmk_resource_t *rsc,
const xmlNode *xml_op)
{
char *xml_name = clone_strip(rsc->id);
/* @TODO This xpath search occurs after template expansion, but it is unable
* to properly detect on-fail in id-ref, operation meta-attributes, or
* op_defaults, or evaluate rules.
*
* Also, on-fail defaults to block (in unpack_operation()) for stop actions
* when stonith is disabled.
*
* Ideally, we'd unpack the operation before this point, and pass in a
* meta-attributes table that takes all that into consideration.
*/
char *xpath = crm_strdup_printf("//" XML_CIB_TAG_RESOURCE
"[@" XML_ATTR_ID "='%s']"
"//" XML_ATTR_OP
"[@" XML_OP_ATTR_ON_FAIL "='block']",
xml_name);
xmlXPathObject *xpathObj = xpath_search(rsc->xml, xpath);
gboolean should_block = FALSE;
free(xpath);
if (xpathObj) {
int max = numXpathResults(xpathObj);
int lpc = 0;
for (lpc = 0; lpc < max; lpc++) {
xmlNode *pref = getXpathResult(xpathObj, lpc);
if (xml_op) {
should_block = is_matched_failure(xml_name, pref, xml_op);
if (should_block) {
break;
}
} else {
const char *conf_op_name = NULL;
const char *conf_op_interval_spec = NULL;
guint conf_op_interval_ms = 0;
char *lrm_op_xpath = NULL;
xmlXPathObject *lrm_op_xpathObj = NULL;
// Get name and interval from configured op
conf_op_name = crm_element_value(pref, "name");
conf_op_interval_spec = crm_element_value(pref, XML_LRM_ATTR_INTERVAL);
- pcmk__parse_interval_spec(conf_op_interval_spec,
- &conf_op_interval_ms);
+ pcmk_parse_interval_spec(conf_op_interval_spec,
+ &conf_op_interval_ms);
#define XPATH_FMT "//" XML_CIB_TAG_STATE "[@" XML_ATTR_UNAME "='%s']" \
"//" XML_LRM_TAG_RESOURCE "[@" XML_ATTR_ID "='%s']" \
"/" XML_LRM_TAG_RSC_OP "[@" XML_LRM_ATTR_TASK "='%s']" \
"[@" XML_LRM_ATTR_INTERVAL "='%u']"
lrm_op_xpath = crm_strdup_printf(XPATH_FMT,
node->details->uname, xml_name,
conf_op_name,
conf_op_interval_ms);
lrm_op_xpathObj = xpath_search(rsc->cluster->input, lrm_op_xpath);
free(lrm_op_xpath);
if (lrm_op_xpathObj) {
int max2 = numXpathResults(lrm_op_xpathObj);
int lpc2 = 0;
for (lpc2 = 0; lpc2 < max2; lpc2++) {
xmlNode *lrm_op_xml = getXpathResult(lrm_op_xpathObj,
lpc2);
should_block = is_matched_failure(xml_name, pref,
lrm_op_xml);
if (should_block) {
break;
}
}
}
freeXpathObject(lrm_op_xpathObj);
if (should_block) {
break;
}
}
}
}
free(xml_name);
freeXpathObject(xpathObj);
return should_block;
}
/*!
* \internal
* \brief Get resource name as used in failure-related node attributes
*
* \param[in] rsc Resource to check
*
* \return Newly allocated string containing resource's fail name
* \note The caller is responsible for freeing the result.
*/
static inline char *
rsc_fail_name(const pcmk_resource_t *rsc)
{
const char *name = (rsc->clone_name? rsc->clone_name : rsc->id);
return pcmk_is_set(rsc->flags, pcmk_rsc_unique)? strdup(name) : clone_strip(name);
}
/*!
* \internal
* \brief Compile regular expression to match a failure-related node attribute
*
* \param[in] prefix Attribute prefix to match
* \param[in] rsc_name Resource name to match as used in failure attributes
* \param[in] is_legacy Whether DC uses per-resource fail counts
* \param[in] is_unique Whether the resource is a globally unique clone
* \param[out] re Where to store resulting regular expression
*
* \return Standard Pacemaker return code
* \note Fail attributes are named like PREFIX-RESOURCE#OP_INTERVAL.
* The caller is responsible for freeing re with regfree().
*/
static int
generate_fail_regex(const char *prefix, const char *rsc_name,
gboolean is_legacy, gboolean is_unique, regex_t *re)
{
char *pattern;
/* @COMPAT DC < 1.1.17: Fail counts used to be per-resource rather than
* per-operation.
*/
const char *op_pattern = (is_legacy? "" : "#.+_[0-9]+");
/* Ignore instance numbers for anything other than globally unique clones.
* Anonymous clone fail counts could contain an instance number if the
* clone was initially unique, failed, then was converted to anonymous.
* @COMPAT Also, before 1.1.8, anonymous clone fail counts always contained
* clone instance numbers.
*/
const char *instance_pattern = (is_unique? "" : "(:[0-9]+)?");
pattern = crm_strdup_printf("^%s-%s%s%s$", prefix, rsc_name,
instance_pattern, op_pattern);
if (regcomp(re, pattern, REG_EXTENDED|REG_NOSUB) != 0) {
free(pattern);
return EINVAL;
}
free(pattern);
return pcmk_rc_ok;
}
/*!
* \internal
* \brief Compile regular expressions to match failure-related node attributes
*
* \param[in] rsc Resource being checked for failures
* \param[out] failcount_re Storage for regular expression for fail count
* \param[out] lastfailure_re Storage for regular expression for last failure
*
* \return Standard Pacemaker return code
* \note On success, the caller is responsible for freeing the expressions with
* regfree().
*/
static int
generate_fail_regexes(const pcmk_resource_t *rsc,
regex_t *failcount_re, regex_t *lastfailure_re)
{
int rc = pcmk_rc_ok;
char *rsc_name = rsc_fail_name(rsc);
const char *version = crm_element_value(rsc->cluster->input,
XML_ATTR_CRM_VERSION);
// @COMPAT Pacemaker <= 1.1.16 used a single fail count per resource
gboolean is_legacy = (compare_version(version, "3.0.13") < 0);
if (generate_fail_regex(PCMK__FAIL_COUNT_PREFIX, rsc_name, is_legacy,
pcmk_is_set(rsc->flags, pcmk_rsc_unique),
failcount_re) != pcmk_rc_ok) {
rc = EINVAL;
} else if (generate_fail_regex(PCMK__LAST_FAILURE_PREFIX, rsc_name,
is_legacy,
pcmk_is_set(rsc->flags, pcmk_rsc_unique),
lastfailure_re) != pcmk_rc_ok) {
rc = EINVAL;
regfree(failcount_re);
}
free(rsc_name);
return rc;
}
// Data for fail-count-related iterators
struct failcount_data {
const pcmk_node_t *node;// Node to check for fail count
pcmk_resource_t *rsc; // Resource to check for fail count
uint32_t flags; // Fail count flags
const xmlNode *xml_op; // History entry for expiration purposes (or NULL)
regex_t failcount_re; // Fail count regular expression to match
regex_t lastfailure_re; // Last failure regular expression to match
int failcount; // Fail count so far
time_t last_failure; // Time of most recent failure so far
};
/*!
* \internal
* \brief Update fail count and last failure appropriately for a node attribute
*
* \param[in] key Node attribute name
* \param[in] value Node attribute value
* \param[in] user_data Fail count data to update
*/
static void
update_failcount_for_attr(gpointer key, gpointer value, gpointer user_data)
{
struct failcount_data *fc_data = user_data;
// If this is a matching fail count attribute, update fail count
if (regexec(&(fc_data->failcount_re), (const char *) key, 0, NULL, 0) == 0) {
fc_data->failcount = pcmk__add_scores(fc_data->failcount,
char2score(value));
pcmk__rsc_trace(fc_data->rsc, "Added %s (%s) to %s fail count (now %s)",
(const char *) key, (const char *) value,
fc_data->rsc->id,
pcmk_readable_score(fc_data->failcount));
return;
}
// If this is a matching last failure attribute, update last failure
if (regexec(&(fc_data->lastfailure_re), (const char *) key, 0, NULL,
0) == 0) {
long long last_ll;
if (pcmk__scan_ll(value, &last_ll, 0LL) == pcmk_rc_ok) {
fc_data->last_failure = (time_t) QB_MAX(fc_data->last_failure,
last_ll);
}
}
}
/*!
* \internal
* \brief Update fail count and last failure appropriately for a filler resource
*
* \param[in] data Filler resource
* \param[in] user_data Fail count data to update
*/
static void
update_failcount_for_filler(gpointer data, gpointer user_data)
{
pcmk_resource_t *filler = data;
struct failcount_data *fc_data = user_data;
time_t filler_last_failure = 0;
fc_data->failcount += pe_get_failcount(fc_data->node, filler,
&filler_last_failure, fc_data->flags,
fc_data->xml_op);
fc_data->last_failure = QB_MAX(fc_data->last_failure, filler_last_failure);
}
/*!
* \internal
* \brief Get a resource's fail count on a node
*
* \param[in] node Node to check
* \param[in,out] rsc Resource to check
* \param[out] last_failure If not NULL, where to set time of most recent
* failure of \p rsc on \p node
* \param[in] flags Group of enum pcmk__fc_flags
* \param[in] xml_op If not NULL, consider only the action in this
* history entry when determining whether on-fail
* is configured as "blocked", otherwise consider
* all actions configured for \p rsc
*
* \return Fail count for \p rsc on \p node according to \p flags
*/
int
pe_get_failcount(const pcmk_node_t *node, pcmk_resource_t *rsc,
time_t *last_failure, uint32_t flags, const xmlNode *xml_op)
{
struct failcount_data fc_data = {
.node = node,
.rsc = rsc,
.flags = flags,
.xml_op = xml_op,
.failcount = 0,
.last_failure = (time_t) 0,
};
// Calculate resource failcount as sum of all matching operation failcounts
CRM_CHECK(generate_fail_regexes(rsc, &fc_data.failcount_re,
&fc_data.lastfailure_re) == pcmk_rc_ok,
return 0);
g_hash_table_foreach(node->details->attrs, update_failcount_for_attr,
&fc_data);
regfree(&(fc_data.failcount_re));
regfree(&(fc_data.lastfailure_re));
// If failure blocks the resource, disregard any failure timeout
if ((fc_data.failcount > 0) && (rsc->failure_timeout > 0)
&& block_failure(node, rsc, xml_op)) {
pcmk__config_warn("Ignoring failure timeout %d for %s "
"because it conflicts with on-fail=block",
rsc->failure_timeout, rsc->id);
rsc->failure_timeout = 0;
}
// If all failures have expired, ignore fail count
if (pcmk_is_set(flags, pcmk__fc_effective) && (fc_data.failcount > 0)
&& (fc_data.last_failure > 0) && (rsc->failure_timeout != 0)) {
time_t now = get_effective_time(rsc->cluster);
if (now > (fc_data.last_failure + rsc->failure_timeout)) {
pcmk__rsc_debug(rsc, "Failcount for %s on %s expired after %ds",
rsc->id, pe__node_name(node), rsc->failure_timeout);
fc_data.failcount = 0;
}
}
/* Add the fail count of any filler resources, except that we never want the
* fail counts of a bundle container's fillers to count towards the
* container's fail count.
*
* Most importantly, a Pacemaker Remote connection to a bundle container
* is a filler of the container, but can reside on a different node than the
* container itself. Counting its fail count on its node towards the
* container's fail count on that node could lead to attempting to stop the
* container on the wrong node.
*/
if (pcmk_is_set(flags, pcmk__fc_fillers) && (rsc->fillers != NULL)
&& !pe_rsc_is_bundled(rsc)) {
g_list_foreach(rsc->fillers, update_failcount_for_filler, &fc_data);
if (fc_data.failcount > 0) {
pcmk__rsc_info(rsc,
"Container %s and the resources within it "
"have failed %s time%s on %s",
rsc->id, pcmk_readable_score(fc_data.failcount),
pcmk__plural_s(fc_data.failcount),
pe__node_name(node));
}
} else if (fc_data.failcount > 0) {
pcmk__rsc_info(rsc, "%s has failed %s time%s on %s",
rsc->id, pcmk_readable_score(fc_data.failcount),
pcmk__plural_s(fc_data.failcount), pe__node_name(node));
}
if (last_failure != NULL) {
if ((fc_data.failcount > 0) && (fc_data.last_failure > 0)) {
*last_failure = fc_data.last_failure;
} else {
*last_failure = 0;
}
}
return fc_data.failcount;
}
/*!
* \brief Schedule a controller operation to clear a fail count
*
* \param[in,out] rsc Resource with failure
* \param[in] node Node failure occurred on
* \param[in] reason Readable description why needed (for logging)
* \param[in,out] scheduler Scheduler data cluster
*
* \return Scheduled action
*/
pcmk_action_t *
pe__clear_failcount(pcmk_resource_t *rsc, const pcmk_node_t *node,
const char *reason, pcmk_scheduler_t *scheduler)
{
char *key = NULL;
pcmk_action_t *clear = NULL;
CRM_CHECK(rsc && node && reason && scheduler, return NULL);
key = pcmk__op_key(rsc->id, PCMK_ACTION_CLEAR_FAILCOUNT, 0);
clear = custom_action(rsc, key, PCMK_ACTION_CLEAR_FAILCOUNT, node, FALSE,
scheduler);
add_hash_param(clear->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE);
crm_notice("Clearing failure of %s on %s because %s " CRM_XS " %s",
rsc->id, pe__node_name(node), reason, clear->uuid);
return clear;
}
diff --git a/lib/pengine/pe_actions.c b/lib/pengine/pe_actions.c
index 825f32fa2b..77664dc571 100644
--- a/lib/pengine/pe_actions.c
+++ b/lib/pengine/pe_actions.c
@@ -1,1888 +1,1888 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <glib.h>
#include <stdbool.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/common/scheduler_internal.h>
#include <crm/pengine/internal.h>
#include <crm/common/xml_internal.h>
#include "pe_status_private.h"
static void unpack_operation(pcmk_action_t *action, const xmlNode *xml_obj,
guint interval_ms);
static void
add_singleton(pcmk_scheduler_t *scheduler, pcmk_action_t *action)
{
if (scheduler->singletons == NULL) {
scheduler->singletons = pcmk__strkey_table(NULL, NULL);
}
g_hash_table_insert(scheduler->singletons, action->uuid, action);
}
static pcmk_action_t *
lookup_singleton(pcmk_scheduler_t *scheduler, const char *action_uuid)
{
if (scheduler->singletons == NULL) {
return NULL;
}
return g_hash_table_lookup(scheduler->singletons, action_uuid);
}
/*!
* \internal
* \brief Find an existing action that matches arguments
*
* \param[in] key Action key to match
* \param[in] rsc Resource to match (if any)
* \param[in] node Node to match (if any)
* \param[in] scheduler Scheduler data
*
* \return Existing action that matches arguments (or NULL if none)
*/
static pcmk_action_t *
find_existing_action(const char *key, const pcmk_resource_t *rsc,
const pcmk_node_t *node, const pcmk_scheduler_t *scheduler)
{
GList *matches = NULL;
pcmk_action_t *action = NULL;
/* When rsc is NULL, it would be quicker to check scheduler->singletons,
* but checking all scheduler->actions takes the node into account.
*/
matches = find_actions(((rsc == NULL)? scheduler->actions : rsc->actions),
key, node);
if (matches == NULL) {
return NULL;
}
CRM_LOG_ASSERT(!pcmk__list_of_multiple(matches));
action = matches->data;
g_list_free(matches);
return action;
}
/*!
* \internal
* \brief Find the XML configuration corresponding to a specific action key
*
* \param[in] rsc Resource to find action configuration for
* \param[in] key "RSC_ACTION_INTERVAL" of action to find
* \param[in] include_disabled If false, do not return disabled actions
*
* \return XML configuration of desired action if any, otherwise NULL
*/
static xmlNode *
find_exact_action_config(const pcmk_resource_t *rsc, const char *action_name,
guint interval_ms, bool include_disabled)
{
for (xmlNode *operation = first_named_child(rsc->ops_xml, XML_ATTR_OP);
operation != NULL; operation = crm_next_same_xml(operation)) {
bool enabled = false;
const char *config_name = NULL;
const char *interval_spec = NULL;
guint tmp_ms = 0U;
// @TODO This does not consider rules, defaults, etc.
if (!include_disabled
&& (pcmk__xe_get_bool_attr(operation, "enabled",
&enabled) == pcmk_rc_ok) && !enabled) {
continue;
}
interval_spec = crm_element_value(operation, XML_LRM_ATTR_INTERVAL);
- pcmk__parse_interval_spec(interval_spec, &tmp_ms);
+ pcmk_parse_interval_spec(interval_spec, &tmp_ms);
if (tmp_ms != interval_ms) {
continue;
}
config_name = crm_element_value(operation, "name");
if (pcmk__str_eq(action_name, config_name, pcmk__str_none)) {
return operation;
}
}
return NULL;
}
/*!
* \internal
* \brief Find the XML configuration of a resource action
*
* \param[in] rsc Resource to find action configuration for
* \param[in] action_name Action name to search for
* \param[in] interval_ms Action interval (in milliseconds) to search for
* \param[in] include_disabled If false, do not return disabled actions
*
* \return XML configuration of desired action if any, otherwise NULL
*/
xmlNode *
pcmk__find_action_config(const pcmk_resource_t *rsc, const char *action_name,
guint interval_ms, bool include_disabled)
{
xmlNode *action_config = NULL;
// Try requested action first
action_config = find_exact_action_config(rsc, action_name, interval_ms,
include_disabled);
// For migrate_to and migrate_from actions, retry with "migrate"
// @TODO This should be either documented or deprecated
if ((action_config == NULL)
&& pcmk__str_any_of(action_name, PCMK_ACTION_MIGRATE_TO,
PCMK_ACTION_MIGRATE_FROM, NULL)) {
action_config = find_exact_action_config(rsc, "migrate", 0,
include_disabled);
}
return action_config;
}
/*!
* \internal
* \brief Create a new action object
*
* \param[in] key Action key
* \param[in] task Action name
* \param[in,out] rsc Resource that action is for (if any)
* \param[in] node Node that action is on (if any)
* \param[in] optional Whether action should be considered optional
* \param[in,out] scheduler Scheduler data
*
* \return Newly allocated action
* \note This function takes ownership of \p key. It is the caller's
* responsibility to free the return value with pe_free_action().
*/
static pcmk_action_t *
new_action(char *key, const char *task, pcmk_resource_t *rsc,
const pcmk_node_t *node, bool optional, pcmk_scheduler_t *scheduler)
{
pcmk_action_t *action = calloc(1, sizeof(pcmk_action_t));
CRM_ASSERT(action != NULL);
action->rsc = rsc;
action->task = strdup(task); CRM_ASSERT(action->task != NULL);
action->uuid = key;
if (node) {
action->node = pe__copy_node(node);
}
if (pcmk__str_eq(task, PCMK_ACTION_LRM_DELETE, pcmk__str_casei)) {
// Resource history deletion for a node can be done on the DC
pe__set_action_flags(action, pcmk_action_on_dc);
}
pe__set_action_flags(action, pcmk_action_runnable);
if (optional) {
pe__set_action_flags(action, pcmk_action_optional);
} else {
pe__clear_action_flags(action, pcmk_action_optional);
}
if (rsc == NULL) {
action->meta = pcmk__strkey_table(free, free);
} else {
guint interval_ms = 0;
parse_op_key(key, NULL, NULL, &interval_ms);
action->op_entry = pcmk__find_action_config(rsc, task, interval_ms,
true);
/* If the given key is for one of the many notification pseudo-actions
* (pre_notify_promote, etc.), the actual action name is "notify"
*/
if ((action->op_entry == NULL) && (strstr(key, "_notify_") != NULL)) {
action->op_entry = find_exact_action_config(rsc, PCMK_ACTION_NOTIFY,
0, true);
}
unpack_operation(action, action->op_entry, interval_ms);
}
pcmk__rsc_trace(rsc, "Created %s action %d (%s): %s for %s on %s",
(optional? "optional" : "required"),
scheduler->action_id, key, task,
((rsc == NULL)? "no resource" : rsc->id),
pe__node_name(node));
action->id = scheduler->action_id++;
scheduler->actions = g_list_prepend(scheduler->actions, action);
if (rsc == NULL) {
add_singleton(scheduler, action);
} else {
rsc->actions = g_list_prepend(rsc->actions, action);
}
return action;
}
/*!
* \internal
* \brief Unpack a resource's action-specific instance parameters
*
* \param[in] action_xml XML of action's configuration in CIB (if any)
* \param[in,out] node_attrs Table of node attributes (for rule evaluation)
* \param[in,out] scheduler Cluster working set (for rule evaluation)
*
* \return Newly allocated hash table of action-specific instance parameters
*/
GHashTable *
pcmk__unpack_action_rsc_params(const xmlNode *action_xml,
GHashTable *node_attrs,
pcmk_scheduler_t *scheduler)
{
GHashTable *params = pcmk__strkey_table(free, free);
pe_rule_eval_data_t rule_data = {
.node_hash = node_attrs,
.role = pcmk_role_unknown,
.now = scheduler->now,
.match_data = NULL,
.rsc_data = NULL,
.op_data = NULL
};
pe__unpack_dataset_nvpairs(action_xml, XML_TAG_ATTR_SETS,
&rule_data, params, NULL,
FALSE, scheduler);
return params;
}
/*!
* \internal
* \brief Update an action's optional flag
*
* \param[in,out] action Action to update
* \param[in] optional Requested optional status
*/
static void
update_action_optional(pcmk_action_t *action, gboolean optional)
{
// Force a non-recurring action to be optional if its resource is unmanaged
if ((action->rsc != NULL) && (action->node != NULL)
&& !pcmk_is_set(action->flags, pcmk_action_pseudo)
&& !pcmk_is_set(action->rsc->flags, pcmk_rsc_managed)
&& (g_hash_table_lookup(action->meta,
XML_LRM_ATTR_INTERVAL_MS) == NULL)) {
pcmk__rsc_debug(action->rsc,
"%s on %s is optional (%s is unmanaged)",
action->uuid, pe__node_name(action->node),
action->rsc->id);
pe__set_action_flags(action, pcmk_action_optional);
// We shouldn't clear runnable here because ... something
// Otherwise require the action if requested
} else if (!optional) {
pe__clear_action_flags(action, pcmk_action_optional);
}
}
static enum pe_quorum_policy
effective_quorum_policy(pcmk_resource_t *rsc, pcmk_scheduler_t *scheduler)
{
enum pe_quorum_policy policy = scheduler->no_quorum_policy;
if (pcmk_is_set(scheduler->flags, pcmk_sched_quorate)) {
policy = pcmk_no_quorum_ignore;
} else if (scheduler->no_quorum_policy == pcmk_no_quorum_demote) {
switch (rsc->role) {
case pcmk_role_promoted:
case pcmk_role_unpromoted:
if (rsc->next_role > pcmk_role_unpromoted) {
pe__set_next_role(rsc, pcmk_role_unpromoted,
PCMK_OPT_NO_QUORUM_POLICY "=demote");
}
policy = pcmk_no_quorum_ignore;
break;
default:
policy = pcmk_no_quorum_stop;
break;
}
}
return policy;
}
/*!
* \internal
* \brief Update a resource action's runnable flag
*
* \param[in,out] action Action to update
* \param[in,out] scheduler Scheduler data
*
* \note This may also schedule fencing if a stop is unrunnable.
*/
static void
update_resource_action_runnable(pcmk_action_t *action,
pcmk_scheduler_t *scheduler)
{
if (pcmk_is_set(action->flags, pcmk_action_pseudo)) {
return;
}
if (action->node == NULL) {
pcmk__rsc_trace(action->rsc, "%s is unrunnable (unallocated)",
action->uuid);
pe__clear_action_flags(action, pcmk_action_runnable);
} else if (!pcmk_is_set(action->flags, pcmk_action_on_dc)
&& !(action->node->details->online)
&& (!pe__is_guest_node(action->node)
|| action->node->details->remote_requires_reset)) {
pe__clear_action_flags(action, pcmk_action_runnable);
do_crm_log(LOG_WARNING, "%s on %s is unrunnable (node is offline)",
action->uuid, pe__node_name(action->node));
if (pcmk_is_set(action->rsc->flags, pcmk_rsc_managed)
&& pcmk__str_eq(action->task, PCMK_ACTION_STOP, pcmk__str_casei)
&& !(action->node->details->unclean)) {
pe_fence_node(scheduler, action->node, "stop is unrunnable", false);
}
} else if (!pcmk_is_set(action->flags, pcmk_action_on_dc)
&& action->node->details->pending) {
pe__clear_action_flags(action, pcmk_action_runnable);
do_crm_log(LOG_WARNING,
"Action %s on %s is unrunnable (node is pending)",
action->uuid, pe__node_name(action->node));
} else if (action->needs == pcmk_requires_nothing) {
pe_action_set_reason(action, NULL, TRUE);
if (pe__is_guest_node(action->node)
&& !pe_can_fence(scheduler, action->node)) {
/* An action that requires nothing usually does not require any
* fencing in order to be runnable. However, there is an exception:
* such an action cannot be completed if it is on a guest node whose
* host is unclean and cannot be fenced.
*/
pcmk__rsc_debug(action->rsc,
"%s on %s is unrunnable "
"(node's host cannot be fenced)",
action->uuid, pe__node_name(action->node));
pe__clear_action_flags(action, pcmk_action_runnable);
} else {
pcmk__rsc_trace(action->rsc,
"%s on %s does not require fencing or quorum",
action->uuid, pe__node_name(action->node));
pe__set_action_flags(action, pcmk_action_runnable);
}
} else {
switch (effective_quorum_policy(action->rsc, scheduler)) {
case pcmk_no_quorum_stop:
pcmk__rsc_debug(action->rsc,
"%s on %s is unrunnable (no quorum)",
action->uuid, pe__node_name(action->node));
pe__clear_action_flags(action, pcmk_action_runnable);
pe_action_set_reason(action, "no quorum", true);
break;
case pcmk_no_quorum_freeze:
if (!action->rsc->fns->active(action->rsc, TRUE)
|| (action->rsc->next_role > action->rsc->role)) {
pcmk__rsc_debug(action->rsc,
"%s on %s is unrunnable (no quorum)",
action->uuid, pe__node_name(action->node));
pe__clear_action_flags(action, pcmk_action_runnable);
pe_action_set_reason(action, "quorum freeze", true);
}
break;
default:
//pe_action_set_reason(action, NULL, TRUE);
pe__set_action_flags(action, pcmk_action_runnable);
break;
}
}
}
/*!
* \internal
* \brief Update a resource object's flags for a new action on it
*
* \param[in,out] rsc Resource that action is for (if any)
* \param[in] action New action
*/
static void
update_resource_flags_for_action(pcmk_resource_t *rsc,
const pcmk_action_t *action)
{
/* @COMPAT pcmk_rsc_starting and pcmk_rsc_stopping are deprecated and unused
* within Pacemaker, and will eventually be removed
*/
if (pcmk__str_eq(action->task, PCMK_ACTION_STOP, pcmk__str_casei)) {
pe__set_resource_flags(rsc, pcmk_rsc_stopping);
} else if (pcmk__str_eq(action->task, PCMK_ACTION_START, pcmk__str_casei)) {
if (pcmk_is_set(action->flags, pcmk_action_runnable)) {
pe__set_resource_flags(rsc, pcmk_rsc_starting);
} else {
pe__clear_resource_flags(rsc, pcmk_rsc_starting);
}
}
}
static bool
valid_stop_on_fail(const char *value)
{
return !pcmk__strcase_any_of(value, "standby", "demote", "stop", NULL);
}
/*!
* \internal
* \brief Validate (and possibly reset) resource action's on_fail meta-attribute
*
* \param[in] rsc Resource that action is for
* \param[in] action_name Action name
* \param[in] action_config Action configuration XML from CIB (if any)
* \param[in,out] meta Table of action meta-attributes
*/
static void
validate_on_fail(const pcmk_resource_t *rsc, const char *action_name,
const xmlNode *action_config, GHashTable *meta)
{
const char *name = NULL;
const char *role = NULL;
const char *interval_spec = NULL;
const char *value = g_hash_table_lookup(meta, XML_OP_ATTR_ON_FAIL);
char *key = NULL;
char *new_value = NULL;
guint interval_ms = 0U;
// Stop actions can only use certain on-fail values
if (pcmk__str_eq(action_name, PCMK_ACTION_STOP, pcmk__str_none)
&& !valid_stop_on_fail(value)) {
pcmk__config_err("Resetting '" XML_OP_ATTR_ON_FAIL "' for %s stop "
"action to default value because '%s' is not "
"allowed for stop", rsc->id, value);
g_hash_table_remove(meta, XML_OP_ATTR_ON_FAIL);
return;
}
/* Demote actions default on-fail to the on-fail value for the first
* recurring monitor for the promoted role (if any).
*/
if (pcmk__str_eq(action_name, PCMK_ACTION_DEMOTE, pcmk__str_none)
&& (value == NULL)) {
/* @TODO This does not consider promote options set in a meta-attribute
* block (which may have rules that need to be evaluated) rather than
* XML properties.
*/
for (xmlNode *operation = first_named_child(rsc->ops_xml, XML_ATTR_OP);
operation != NULL; operation = crm_next_same_xml(operation)) {
bool enabled = false;
const char *promote_on_fail = NULL;
/* We only care about explicit on-fail (if promote uses default, so
* can demote)
*/
promote_on_fail = crm_element_value(operation, XML_OP_ATTR_ON_FAIL);
if (promote_on_fail == NULL) {
continue;
}
// We only care about recurring monitors for the promoted role
name = crm_element_value(operation, "name");
role = crm_element_value(operation, "role");
if (!pcmk__str_eq(name, PCMK_ACTION_MONITOR, pcmk__str_none)
|| !pcmk__strcase_any_of(role, PCMK__ROLE_PROMOTED,
PCMK__ROLE_PROMOTED_LEGACY, NULL)) {
continue;
}
interval_spec = crm_element_value(operation, XML_LRM_ATTR_INTERVAL);
- pcmk__parse_interval_spec(interval_spec, &interval_ms);
+ pcmk_parse_interval_spec(interval_spec, &interval_ms);
if (interval_ms == 0U) {
continue;
}
// We only care about enabled monitors
if ((pcmk__xe_get_bool_attr(operation, "enabled",
&enabled) == pcmk_rc_ok) && !enabled) {
continue;
}
// Demote actions can't default to on-fail="demote"
if (pcmk__str_eq(promote_on_fail, "demote", pcmk__str_casei)) {
continue;
}
// Use value from first applicable promote action found
key = strdup(XML_OP_ATTR_ON_FAIL);
new_value = strdup(promote_on_fail);
CRM_ASSERT((key != NULL) && (new_value != NULL));
g_hash_table_insert(meta, key, new_value);
}
return;
}
if (pcmk__str_eq(action_name, PCMK_ACTION_LRM_DELETE, pcmk__str_none)
&& !pcmk__str_eq(value, "ignore", pcmk__str_casei)) {
key = strdup(XML_OP_ATTR_ON_FAIL);
new_value = strdup("ignore");
CRM_ASSERT((key != NULL) && (new_value != NULL));
g_hash_table_insert(meta, key, new_value);
return;
}
// on-fail="demote" is allowed only for certain actions
if (pcmk__str_eq(value, "demote", pcmk__str_casei)) {
name = crm_element_value(action_config, "name");
role = crm_element_value(action_config, "role");
interval_spec = crm_element_value(action_config,
XML_LRM_ATTR_INTERVAL);
- pcmk__parse_interval_spec(interval_spec, &interval_ms);
+ pcmk_parse_interval_spec(interval_spec, &interval_ms);
if (!pcmk__str_eq(name, PCMK_ACTION_PROMOTE, pcmk__str_none)
&& ((interval_ms == 0U)
|| !pcmk__str_eq(name, PCMK_ACTION_MONITOR, pcmk__str_none)
|| !pcmk__strcase_any_of(role, PCMK__ROLE_PROMOTED,
PCMK__ROLE_PROMOTED_LEGACY, NULL))) {
pcmk__config_err("Resetting '" XML_OP_ATTR_ON_FAIL "' for %s %s "
"action to default value because 'demote' is not "
"allowed for it", rsc->id, name);
g_hash_table_remove(meta, XML_OP_ATTR_ON_FAIL);
return;
}
}
}
static int
unpack_timeout(const char *value)
{
int timeout_ms = crm_get_msec(value);
if (timeout_ms < 0) {
timeout_ms = PCMK_DEFAULT_ACTION_TIMEOUT_MS;
}
return timeout_ms;
}
// true if value contains valid, non-NULL interval origin for recurring op
static bool
unpack_interval_origin(const char *value, const xmlNode *xml_obj,
guint interval_ms, const crm_time_t *now,
long long *start_delay)
{
long long result = 0;
guint interval_sec = interval_ms / 1000;
crm_time_t *origin = NULL;
// Ignore unspecified values and non-recurring operations
if ((value == NULL) || (interval_ms == 0) || (now == NULL)) {
return false;
}
// Parse interval origin from text
origin = crm_time_new(value);
if (origin == NULL) {
pcmk__config_err("Ignoring '" XML_OP_ATTR_ORIGIN "' for operation "
"'%s' because '%s' is not valid",
(ID(xml_obj)? ID(xml_obj) : "(missing ID)"), value);
return false;
}
// Get seconds since origin (negative if origin is in the future)
result = crm_time_get_seconds(now) - crm_time_get_seconds(origin);
crm_time_free(origin);
// Calculate seconds from closest interval to now
result = result % interval_sec;
// Calculate seconds remaining until next interval
result = ((result <= 0)? 0 : interval_sec) - result;
crm_info("Calculated a start delay of %llds for operation '%s'",
result,
(ID(xml_obj)? ID(xml_obj) : "(unspecified)"));
if (start_delay != NULL) {
*start_delay = result * 1000; // milliseconds
}
return true;
}
static int
unpack_start_delay(const char *value, GHashTable *meta)
{
int start_delay = 0;
if (value != NULL) {
start_delay = crm_get_msec(value);
if (start_delay < 0) {
start_delay = 0;
}
if (meta) {
g_hash_table_replace(meta, strdup(XML_OP_ATTR_START_DELAY),
pcmk__itoa(start_delay));
}
}
return start_delay;
}
/*!
* \internal
* \brief Find a resource's most frequent recurring monitor
*
* \param[in] rsc Resource to check
*
* \return Operation XML configured for most frequent recurring monitor for
* \p rsc (if any)
*/
static xmlNode *
most_frequent_monitor(const pcmk_resource_t *rsc)
{
guint min_interval_ms = G_MAXUINT;
xmlNode *op = NULL;
for (xmlNode *operation = first_named_child(rsc->ops_xml, XML_ATTR_OP);
operation != NULL; operation = crm_next_same_xml(operation)) {
bool enabled = false;
guint interval_ms = 0U;
const char *interval_spec = crm_element_value(operation,
XML_LRM_ATTR_INTERVAL);
// We only care about enabled recurring monitors
if (!pcmk__str_eq(crm_element_value(operation, "name"),
PCMK_ACTION_MONITOR, pcmk__str_none)) {
continue;
}
- pcmk__parse_interval_spec(interval_spec, &interval_ms);
+ pcmk_parse_interval_spec(interval_spec, &interval_ms);
if (interval_ms == 0U) {
continue;
}
// @TODO This does not account for rules, defaults, etc.
if ((pcmk__xe_get_bool_attr(operation, "enabled",
&enabled) == pcmk_rc_ok) && !enabled) {
continue;
}
if (interval_ms < min_interval_ms) {
min_interval_ms = interval_ms;
op = operation;
}
}
return op;
}
/*!
* \internal
* \brief Unpack action meta-attributes
*
* \param[in,out] rsc Resource that action is for
* \param[in] node Node that action is on
* \param[in] action_name Action name
* \param[in] interval_ms Action interval (in milliseconds)
* \param[in] action_config Action XML configuration from CIB (if any)
*
* Unpack a resource action's meta-attributes (normalizing the interval,
* timeout, and start delay values as integer milliseconds) from its CIB XML
* configuration (including defaults).
*
* \return Newly allocated hash table with normalized action meta-attributes
*/
GHashTable *
pcmk__unpack_action_meta(pcmk_resource_t *rsc, const pcmk_node_t *node,
const char *action_name, guint interval_ms,
const xmlNode *action_config)
{
GHashTable *meta = NULL;
char *name = NULL;
char *value = NULL;
const char *timeout_spec = NULL;
const char *str = NULL;
pe_rsc_eval_data_t rsc_rule_data = {
.standard = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS),
.provider = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER),
.agent = crm_element_value(rsc->xml, XML_EXPR_ATTR_TYPE),
};
pe_op_eval_data_t op_rule_data = {
.op_name = action_name,
.interval = interval_ms,
};
pe_rule_eval_data_t rule_data = {
.node_hash = (node == NULL)? NULL : node->details->attrs,
.role = pcmk_role_unknown,
.now = rsc->cluster->now,
.match_data = NULL,
.rsc_data = &rsc_rule_data,
.op_data = &op_rule_data,
};
meta = pcmk__strkey_table(free, free);
// Cluster-wide <op_defaults> <meta_attributes>
pe__unpack_dataset_nvpairs(rsc->cluster->op_defaults, XML_TAG_META_SETS,
&rule_data, meta, NULL, FALSE, rsc->cluster);
// Derive default timeout for probes from recurring monitor timeouts
if (pcmk_is_probe(action_name, interval_ms)) {
xmlNode *min_interval_mon = most_frequent_monitor(rsc);
if (min_interval_mon != NULL) {
/* @TODO This does not consider timeouts set in meta_attributes
* blocks (which may also have rules that need to be evaluated).
*/
timeout_spec = crm_element_value(min_interval_mon,
XML_ATTR_TIMEOUT);
if (timeout_spec != NULL) {
pcmk__rsc_trace(rsc,
"Setting default timeout for %s probe to "
"most frequent monitor's timeout '%s'",
rsc->id, timeout_spec);
name = strdup(XML_ATTR_TIMEOUT);
value = strdup(timeout_spec);
CRM_ASSERT((name != NULL) && (value != NULL));
g_hash_table_insert(meta, name, value);
}
}
}
if (action_config != NULL) {
// <op> <meta_attributes> take precedence over defaults
pe__unpack_dataset_nvpairs(action_config, XML_TAG_META_SETS, &rule_data,
meta, NULL, TRUE, rsc->cluster);
/* Anything set as an <op> XML property has highest precedence.
* This ensures we use the name and interval from the <op> tag.
* (See below for the only exception, fence device start/probe timeout.)
*/
for (xmlAttrPtr attr = action_config->properties;
attr != NULL; attr = attr->next) {
name = strdup((const char *) attr->name);
value = strdup(pcmk__xml_attr_value(attr));
CRM_ASSERT((name != NULL) && (value != NULL));
g_hash_table_insert(meta, name, value);
}
}
g_hash_table_remove(meta, XML_ATTR_ID);
// Normalize interval to milliseconds
if (interval_ms > 0) {
name = strdup(XML_LRM_ATTR_INTERVAL);
CRM_ASSERT(name != NULL);
value = crm_strdup_printf("%u", interval_ms);
g_hash_table_insert(meta, name, value);
} else {
g_hash_table_remove(meta, XML_LRM_ATTR_INTERVAL);
}
/* Timeout order of precedence (highest to lowest):
* 1. pcmk_monitor_timeout resource parameter (only for starts and probes
* when rsc has pcmk_ra_cap_fence_params; this gets used for recurring
* monitors via the executor instead)
* 2. timeout configured in <op> (with <op timeout> taking precedence over
* <op> <meta_attributes>)
* 3. timeout configured in <op_defaults> <meta_attributes>
* 4. PCMK_DEFAULT_ACTION_TIMEOUT_MS
*/
// Check for pcmk_monitor_timeout
if (pcmk_is_set(pcmk_get_ra_caps(rsc_rule_data.standard),
pcmk_ra_cap_fence_params)
&& (pcmk__str_eq(action_name, PCMK_ACTION_START, pcmk__str_none)
|| pcmk_is_probe(action_name, interval_ms))) {
GHashTable *params = pe_rsc_params(rsc, node, rsc->cluster);
timeout_spec = g_hash_table_lookup(params, "pcmk_monitor_timeout");
if (timeout_spec != NULL) {
pcmk__rsc_trace(rsc,
"Setting timeout for %s %s to "
"pcmk_monitor_timeout (%s)",
rsc->id, action_name, timeout_spec);
name = strdup(XML_ATTR_TIMEOUT);
value = strdup(timeout_spec);
CRM_ASSERT((name != NULL) && (value != NULL));
g_hash_table_insert(meta, name, value);
}
}
// Normalize timeout to positive milliseconds
name = strdup(XML_ATTR_TIMEOUT);
CRM_ASSERT(name != NULL);
timeout_spec = g_hash_table_lookup(meta, XML_ATTR_TIMEOUT);
g_hash_table_insert(meta, name, pcmk__itoa(unpack_timeout(timeout_spec)));
// Ensure on-fail has a valid value
validate_on_fail(rsc, action_name, action_config, meta);
// Normalize start-delay
str = g_hash_table_lookup(meta, XML_OP_ATTR_START_DELAY);
if (str != NULL) {
unpack_start_delay(str, meta);
} else {
long long start_delay = 0;
str = g_hash_table_lookup(meta, XML_OP_ATTR_ORIGIN);
if (unpack_interval_origin(str, action_config, interval_ms,
rsc->cluster->now, &start_delay)) {
name = strdup(XML_OP_ATTR_START_DELAY);
CRM_ASSERT(name != NULL);
g_hash_table_insert(meta, name,
crm_strdup_printf("%lld", start_delay));
}
}
return meta;
}
/*!
* \internal
* \brief Determine an action's quorum and fencing dependency
*
* \param[in] rsc Resource that action is for
* \param[in] action_name Name of action being unpacked
*
* \return Quorum and fencing dependency appropriate to action
*/
enum rsc_start_requirement
pcmk__action_requires(const pcmk_resource_t *rsc, const char *action_name)
{
const char *value = NULL;
enum rsc_start_requirement requires = pcmk_requires_nothing;
CRM_CHECK((rsc != NULL) && (action_name != NULL), return requires);
if (!pcmk__strcase_any_of(action_name, PCMK_ACTION_START,
PCMK_ACTION_PROMOTE, NULL)) {
value = "nothing (not start or promote)";
} else if (pcmk_is_set(rsc->flags, pcmk_rsc_needs_fencing)) {
requires = pcmk_requires_fencing;
value = "fencing";
} else if (pcmk_is_set(rsc->flags, pcmk_rsc_needs_quorum)) {
requires = pcmk_requires_quorum;
value = "quorum";
} else {
value = "nothing";
}
pcmk__rsc_trace(rsc, "%s of %s requires %s", action_name, rsc->id, value);
return requires;
}
/*!
* \internal
* \brief Parse action failure response from a user-provided string
*
* \param[in] rsc Resource that action is for
* \param[in] action_name Name of action
* \param[in] interval_ms Action interval (in milliseconds)
* \param[in] value User-provided configuration value for on-fail
*
* \return Action failure response parsed from \p text
*/
enum action_fail_response
pcmk__parse_on_fail(const pcmk_resource_t *rsc, const char *action_name,
guint interval_ms, const char *value)
{
const char *desc = NULL;
bool needs_remote_reset = false;
enum action_fail_response on_fail = pcmk_on_fail_ignore;
if (value == NULL) {
// Use default
} else if (pcmk__str_eq(value, "block", pcmk__str_casei)) {
on_fail = pcmk_on_fail_block;
desc = "block";
} else if (pcmk__str_eq(value, "fence", pcmk__str_casei)) {
if (pcmk_is_set(rsc->cluster->flags, pcmk_sched_fencing_enabled)) {
on_fail = pcmk_on_fail_fence_node;
desc = "node fencing";
} else {
pcmk__config_err("Resetting '" XML_OP_ATTR_ON_FAIL "' for "
"%s of %s to 'stop' because 'fence' is not "
"valid when fencing is disabled",
action_name, rsc->id);
on_fail = pcmk_on_fail_stop;
desc = "stop resource";
}
} else if (pcmk__str_eq(value, "standby", pcmk__str_casei)) {
on_fail = pcmk_on_fail_standby_node;
desc = "node standby";
} else if (pcmk__strcase_any_of(value, "ignore", PCMK__VALUE_NOTHING,
NULL)) {
desc = "ignore";
} else if (pcmk__str_eq(value, "migrate", pcmk__str_casei)) {
on_fail = pcmk_on_fail_ban;
desc = "force migration";
} else if (pcmk__str_eq(value, "stop", pcmk__str_casei)) {
on_fail = pcmk_on_fail_stop;
desc = "stop resource";
} else if (pcmk__str_eq(value, "restart", pcmk__str_casei)) {
on_fail = pcmk_on_fail_restart;
desc = "restart (and possibly migrate)";
} else if (pcmk__str_eq(value, "restart-container", pcmk__str_casei)) {
if (rsc->container == NULL) {
pcmk__rsc_debug(rsc,
"Using default " XML_OP_ATTR_ON_FAIL " for %s "
"of %s because it does not have a container",
action_name, rsc->id);
} else {
on_fail = pcmk_on_fail_restart_container;
desc = "restart container (and possibly migrate)";
}
} else if (pcmk__str_eq(value, "demote", pcmk__str_casei)) {
on_fail = pcmk_on_fail_demote;
desc = "demote instance";
} else {
pcmk__config_err("Using default '" XML_OP_ATTR_ON_FAIL "' for "
"%s of %s because '%s' is not valid",
action_name, rsc->id, value);
}
/* Remote node connections are handled specially. Failures that result
* in dropping an active connection must result in fencing. The only
* failures that don't are probes and starts. The user can explicitly set
* on-fail="fence" to fence after start failures.
*/
if (pe__resource_is_remote_conn(rsc)
&& !pcmk_is_probe(action_name, interval_ms)
&& !pcmk__str_eq(action_name, PCMK_ACTION_START, pcmk__str_none)) {
needs_remote_reset = true;
if (!pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
desc = NULL; // Force default for unmanaged connections
}
}
if (desc != NULL) {
// Explicit value used, default not needed
} else if (rsc->container != NULL) {
on_fail = pcmk_on_fail_restart_container;
desc = "restart container (and possibly migrate) (default)";
} else if (needs_remote_reset) {
if (pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
if (pcmk_is_set(rsc->cluster->flags,
pcmk_sched_fencing_enabled)) {
desc = "fence remote node (default)";
} else {
desc = "recover remote node connection (default)";
}
on_fail = pcmk_on_fail_reset_remote;
} else {
on_fail = pcmk_on_fail_stop;
desc = "stop unmanaged remote node (enforcing default)";
}
} else if (pcmk__str_eq(action_name, PCMK_ACTION_STOP, pcmk__str_none)) {
if (pcmk_is_set(rsc->cluster->flags, pcmk_sched_fencing_enabled)) {
on_fail = pcmk_on_fail_fence_node;
desc = "resource fence (default)";
} else {
on_fail = pcmk_on_fail_block;
desc = "resource block (default)";
}
} else {
on_fail = pcmk_on_fail_restart;
desc = "restart (and possibly migrate) (default)";
}
pcmk__rsc_trace(rsc, "Failure handling for %s-interval %s of %s: %s",
pcmk__readable_interval(interval_ms), action_name,
rsc->id, desc);
return on_fail;
}
/*!
* \internal
* \brief Determine a resource's role after failure of an action
*
* \param[in] rsc Resource that action is for
* \param[in] action_name Action name
* \param[in] on_fail Failure handling for action
* \param[in] meta Unpacked action meta-attributes
*
* \return Resource role that results from failure of action
*/
enum rsc_role_e
pcmk__role_after_failure(const pcmk_resource_t *rsc, const char *action_name,
enum action_fail_response on_fail, GHashTable *meta)
{
const char *value = NULL;
enum rsc_role_e role = pcmk_role_unknown;
// Set default for role after failure specially in certain circumstances
switch (on_fail) {
case pcmk_on_fail_stop:
role = pcmk_role_stopped;
break;
case pcmk_on_fail_reset_remote:
if (rsc->remote_reconnect_ms != 0) {
role = pcmk_role_stopped;
}
break;
default:
break;
}
// @COMPAT Check for explicitly configured role (deprecated)
value = g_hash_table_lookup(meta, "role_after_failure");
if (value != NULL) {
pcmk__warn_once(pcmk__wo_role_after,
"Support for role_after_failure is deprecated "
"and will be removed in a future release");
if (role == pcmk_role_unknown) {
role = text2role(value);
if (role == pcmk_role_unknown) {
pcmk__config_err("Ignoring invalid value %s "
"for role_after_failure", value);
}
}
}
if (role == pcmk_role_unknown) {
// Use default
if (pcmk__str_eq(action_name, PCMK_ACTION_PROMOTE, pcmk__str_none)) {
role = pcmk_role_unpromoted;
} else {
role = pcmk_role_started;
}
}
pcmk__rsc_trace(rsc, "Role after %s %s failure is: %s",
rsc->id, action_name, role2text(role));
return role;
}
/*!
* \internal
* \brief Unpack action configuration
*
* Unpack a resource action's meta-attributes (normalizing the interval,
* timeout, and start delay values as integer milliseconds), requirements, and
* failure policy from its CIB XML configuration (including defaults).
*
* \param[in,out] action Resource action to unpack into
* \param[in] xml_obj Action configuration XML (NULL for defaults only)
* \param[in] interval_ms How frequently to perform the operation
*/
static void
unpack_operation(pcmk_action_t *action, const xmlNode *xml_obj,
guint interval_ms)
{
const char *value = NULL;
action->meta = pcmk__unpack_action_meta(action->rsc, action->node,
action->task, interval_ms, xml_obj);
action->needs = pcmk__action_requires(action->rsc, action->task);
value = g_hash_table_lookup(action->meta, XML_OP_ATTR_ON_FAIL);
action->on_fail = pcmk__parse_on_fail(action->rsc, action->task,
interval_ms, value);
action->fail_role = pcmk__role_after_failure(action->rsc, action->task,
action->on_fail, action->meta);
}
/*!
* \brief Create or update an action object
*
* \param[in,out] rsc Resource that action is for (if any)
* \param[in,out] key Action key (must be non-NULL)
* \param[in] task Action name (must be non-NULL)
* \param[in] on_node Node that action is on (if any)
* \param[in] optional Whether action should be considered optional
* \param[in,out] scheduler Scheduler data
*
* \return Action object corresponding to arguments (guaranteed not to be
* \c NULL)
* \note This function takes ownership of (and might free) \p key, and
* \p scheduler takes ownership of the returned action (the caller should
* not free it).
*/
pcmk_action_t *
custom_action(pcmk_resource_t *rsc, char *key, const char *task,
const pcmk_node_t *on_node, gboolean optional,
pcmk_scheduler_t *scheduler)
{
pcmk_action_t *action = NULL;
CRM_ASSERT((key != NULL) && (task != NULL) && (scheduler != NULL));
action = find_existing_action(key, rsc, on_node, scheduler);
if (action == NULL) {
action = new_action(key, task, rsc, on_node, optional, scheduler);
} else {
free(key);
}
update_action_optional(action, optional);
if (rsc != NULL) {
if ((action->node != NULL) && (action->op_entry != NULL)
&& !pcmk_is_set(action->flags, pcmk_action_attrs_evaluated)) {
GHashTable *attrs = action->node->details->attrs;
if (action->extra != NULL) {
g_hash_table_destroy(action->extra);
}
action->extra = pcmk__unpack_action_rsc_params(action->op_entry,
attrs, scheduler);
pe__set_action_flags(action, pcmk_action_attrs_evaluated);
}
update_resource_action_runnable(action, scheduler);
update_resource_flags_for_action(rsc, action);
}
if (action->extra == NULL) {
action->extra = pcmk__strkey_table(free, free);
}
return action;
}
pcmk_action_t *
get_pseudo_op(const char *name, pcmk_scheduler_t *scheduler)
{
pcmk_action_t *op = lookup_singleton(scheduler, name);
if (op == NULL) {
op = custom_action(NULL, strdup(name), name, NULL, TRUE, scheduler);
pe__set_action_flags(op, pcmk_action_pseudo|pcmk_action_runnable);
}
return op;
}
static GList *
find_unfencing_devices(GList *candidates, GList *matches)
{
for (GList *gIter = candidates; gIter != NULL; gIter = gIter->next) {
pcmk_resource_t *candidate = gIter->data;
if (candidate->children != NULL) {
matches = find_unfencing_devices(candidate->children, matches);
} else if (!pcmk_is_set(candidate->flags, pcmk_rsc_fence_device)) {
continue;
} else if (pcmk_is_set(candidate->flags, pcmk_rsc_needs_unfencing)) {
matches = g_list_prepend(matches, candidate);
} else if (pcmk__str_eq(g_hash_table_lookup(candidate->meta,
PCMK_STONITH_PROVIDES),
PCMK__VALUE_UNFENCING,
pcmk__str_casei)) {
matches = g_list_prepend(matches, candidate);
}
}
return matches;
}
static int
node_priority_fencing_delay(const pcmk_node_t *node,
const pcmk_scheduler_t *scheduler)
{
int member_count = 0;
int online_count = 0;
int top_priority = 0;
int lowest_priority = 0;
GList *gIter = NULL;
// PCMK_OPT_PRIORITY_FENCING_DELAY is disabled
if (scheduler->priority_fencing_delay <= 0) {
return 0;
}
/* No need to request a delay if the fencing target is not a normal cluster
* member, for example if it's a remote node or a guest node. */
if (node->details->type != pcmk_node_variant_cluster) {
return 0;
}
// No need to request a delay if the fencing target is in our partition
if (node->details->online) {
return 0;
}
for (gIter = scheduler->nodes; gIter != NULL; gIter = gIter->next) {
pcmk_node_t *n = gIter->data;
if (n->details->type != pcmk_node_variant_cluster) {
continue;
}
member_count ++;
if (n->details->online) {
online_count++;
}
if (member_count == 1
|| n->details->priority > top_priority) {
top_priority = n->details->priority;
}
if (member_count == 1
|| n->details->priority < lowest_priority) {
lowest_priority = n->details->priority;
}
}
// No need to delay if we have more than half of the cluster members
if (online_count > member_count / 2) {
return 0;
}
/* All the nodes have equal priority.
* Any configured corresponding `pcmk_delay_base/max` will be applied. */
if (lowest_priority == top_priority) {
return 0;
}
if (node->details->priority < top_priority) {
return 0;
}
return scheduler->priority_fencing_delay;
}
pcmk_action_t *
pe_fence_op(pcmk_node_t *node, const char *op, bool optional,
const char *reason, bool priority_delay,
pcmk_scheduler_t *scheduler)
{
char *op_key = NULL;
pcmk_action_t *stonith_op = NULL;
if(op == NULL) {
op = scheduler->stonith_action;
}
op_key = crm_strdup_printf("%s-%s-%s",
PCMK_ACTION_STONITH, node->details->uname, op);
stonith_op = lookup_singleton(scheduler, op_key);
if(stonith_op == NULL) {
stonith_op = custom_action(NULL, op_key, PCMK_ACTION_STONITH, node,
TRUE, scheduler);
add_hash_param(stonith_op->meta, XML_LRM_ATTR_TARGET, node->details->uname);
add_hash_param(stonith_op->meta, XML_LRM_ATTR_TARGET_UUID, node->details->id);
add_hash_param(stonith_op->meta, "stonith_action", op);
if (pcmk_is_set(scheduler->flags, pcmk_sched_enable_unfencing)) {
/* Extra work to detect device changes
*/
GString *digests_all = g_string_sized_new(1024);
GString *digests_secure = g_string_sized_new(1024);
GList *matches = find_unfencing_devices(scheduler->resources, NULL);
char *key = NULL;
char *value = NULL;
for (GList *gIter = matches; gIter != NULL; gIter = gIter->next) {
pcmk_resource_t *match = gIter->data;
const char *agent = g_hash_table_lookup(match->meta,
XML_ATTR_TYPE);
pcmk__op_digest_t *data = NULL;
data = pe__compare_fencing_digest(match, agent, node,
scheduler);
if (data->rc == pcmk__digest_mismatch) {
optional = FALSE;
crm_notice("Unfencing node %s because the definition of "
"%s changed", pe__node_name(node), match->id);
if (!pcmk__is_daemon && scheduler->priv != NULL) {
pcmk__output_t *out = scheduler->priv;
out->info(out,
"notice: Unfencing node %s because the "
"definition of %s changed",
pe__node_name(node), match->id);
}
}
pcmk__g_strcat(digests_all,
match->id, ":", agent, ":",
data->digest_all_calc, ",", NULL);
pcmk__g_strcat(digests_secure,
match->id, ":", agent, ":",
data->digest_secure_calc, ",", NULL);
}
key = strdup(XML_OP_ATTR_DIGESTS_ALL);
value = strdup((const char *) digests_all->str);
CRM_ASSERT((key != NULL) && (value != NULL));
g_hash_table_insert(stonith_op->meta, key, value);
g_string_free(digests_all, TRUE);
key = strdup(XML_OP_ATTR_DIGESTS_SECURE);
value = strdup((const char *) digests_secure->str);
CRM_ASSERT((key != NULL) && (value != NULL));
g_hash_table_insert(stonith_op->meta, key, value);
g_string_free(digests_secure, TRUE);
}
} else {
free(op_key);
}
if (scheduler->priority_fencing_delay > 0
/* It's a suitable case where PCMK_OPT_PRIORITY_FENCING_DELAY
* applies. At least add PCMK_OPT_PRIORITY_FENCING_DELAY field as
* an indicator.
*/
&& (priority_delay
/* The priority delay needs to be recalculated if this function has
* been called by schedule_fencing_and_shutdowns() after node
* priority has already been calculated by native_add_running().
*/
|| g_hash_table_lookup(stonith_op->meta,
PCMK_OPT_PRIORITY_FENCING_DELAY) != NULL)) {
/* Add PCMK_OPT_PRIORITY_FENCING_DELAY to the fencing op even if
* it's 0 for the targeting node. So that it takes precedence over
* any possible `pcmk_delay_base/max`.
*/
char *delay_s = pcmk__itoa(node_priority_fencing_delay(node,
scheduler));
g_hash_table_insert(stonith_op->meta,
strdup(PCMK_OPT_PRIORITY_FENCING_DELAY),
delay_s);
}
if(optional == FALSE && pe_can_fence(scheduler, node)) {
pe__clear_action_flags(stonith_op, pcmk_action_optional);
pe_action_set_reason(stonith_op, reason, false);
} else if(reason && stonith_op->reason == NULL) {
stonith_op->reason = strdup(reason);
}
return stonith_op;
}
void
pe_free_action(pcmk_action_t *action)
{
if (action == NULL) {
return;
}
g_list_free_full(action->actions_before, free);
g_list_free_full(action->actions_after, free);
if (action->extra) {
g_hash_table_destroy(action->extra);
}
if (action->meta) {
g_hash_table_destroy(action->meta);
}
free(action->cancel_task);
free(action->reason);
free(action->task);
free(action->uuid);
free(action->node);
free(action);
}
int
pe_get_configured_timeout(pcmk_resource_t *rsc, const char *action,
pcmk_scheduler_t *scheduler)
{
xmlNode *child = NULL;
GHashTable *action_meta = NULL;
const char *timeout_spec = NULL;
int timeout_ms = 0;
pe_rule_eval_data_t rule_data = {
.node_hash = NULL,
.role = pcmk_role_unknown,
.now = scheduler->now,
.match_data = NULL,
.rsc_data = NULL,
.op_data = NULL
};
for (child = first_named_child(rsc->ops_xml, XML_ATTR_OP);
child != NULL; child = crm_next_same_xml(child)) {
if (pcmk__str_eq(action, crm_element_value(child, XML_NVPAIR_ATTR_NAME),
pcmk__str_casei)) {
timeout_spec = crm_element_value(child, XML_ATTR_TIMEOUT);
break;
}
}
if (timeout_spec == NULL && scheduler->op_defaults) {
action_meta = pcmk__strkey_table(free, free);
pe__unpack_dataset_nvpairs(scheduler->op_defaults, XML_TAG_META_SETS,
&rule_data, action_meta, NULL, FALSE,
scheduler);
timeout_spec = g_hash_table_lookup(action_meta, XML_ATTR_TIMEOUT);
}
// @TODO check meta-attributes
// @TODO maybe use min-interval monitor timeout as default for monitors
timeout_ms = crm_get_msec(timeout_spec);
if (timeout_ms < 0) {
timeout_ms = PCMK_DEFAULT_ACTION_TIMEOUT_MS;
}
if (action_meta != NULL) {
g_hash_table_destroy(action_meta);
}
return timeout_ms;
}
enum action_tasks
get_complex_task(const pcmk_resource_t *rsc, const char *name)
{
enum action_tasks task = text2task(name);
if ((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_primitive)) {
switch (task) {
case pcmk_action_stopped:
case pcmk_action_started:
case pcmk_action_demoted:
case pcmk_action_promoted:
crm_trace("Folding %s back into its atomic counterpart for %s",
name, rsc->id);
--task;
break;
default:
break;
}
}
return task;
}
/*!
* \internal
* \brief Find first matching action in a list
*
* \param[in] input List of actions to search
* \param[in] uuid If not NULL, action must have this UUID
* \param[in] task If not NULL, action must have this action name
* \param[in] on_node If not NULL, action must be on this node
*
* \return First action in list that matches criteria, or NULL if none
*/
pcmk_action_t *
find_first_action(const GList *input, const char *uuid, const char *task,
const pcmk_node_t *on_node)
{
CRM_CHECK(uuid || task, return NULL);
for (const GList *gIter = input; gIter != NULL; gIter = gIter->next) {
pcmk_action_t *action = (pcmk_action_t *) gIter->data;
if (uuid != NULL && !pcmk__str_eq(uuid, action->uuid, pcmk__str_casei)) {
continue;
} else if (task != NULL && !pcmk__str_eq(task, action->task, pcmk__str_casei)) {
continue;
} else if (on_node == NULL) {
return action;
} else if (action->node == NULL) {
continue;
} else if (on_node->details == action->node->details) {
return action;
}
}
return NULL;
}
GList *
find_actions(GList *input, const char *key, const pcmk_node_t *on_node)
{
GList *gIter = input;
GList *result = NULL;
CRM_CHECK(key != NULL, return NULL);
for (; gIter != NULL; gIter = gIter->next) {
pcmk_action_t *action = (pcmk_action_t *) gIter->data;
if (!pcmk__str_eq(key, action->uuid, pcmk__str_casei)) {
continue;
} else if (on_node == NULL) {
crm_trace("Action %s matches (ignoring node)", key);
result = g_list_prepend(result, action);
} else if (action->node == NULL) {
crm_trace("Action %s matches (unallocated, assigning to %s)",
key, pe__node_name(on_node));
action->node = pe__copy_node(on_node);
result = g_list_prepend(result, action);
} else if (on_node->details == action->node->details) {
crm_trace("Action %s on %s matches", key, pe__node_name(on_node));
result = g_list_prepend(result, action);
}
}
return result;
}
GList *
find_actions_exact(GList *input, const char *key, const pcmk_node_t *on_node)
{
GList *result = NULL;
CRM_CHECK(key != NULL, return NULL);
if (on_node == NULL) {
return NULL;
}
for (GList *gIter = input; gIter != NULL; gIter = gIter->next) {
pcmk_action_t *action = (pcmk_action_t *) gIter->data;
if ((action->node != NULL)
&& pcmk__str_eq(key, action->uuid, pcmk__str_casei)
&& pcmk__str_eq(on_node->details->id, action->node->details->id,
pcmk__str_casei)) {
crm_trace("Action %s on %s matches", key, pe__node_name(on_node));
result = g_list_prepend(result, action);
}
}
return result;
}
/*!
* \brief Find all actions of given type for a resource
*
* \param[in] rsc Resource to search
* \param[in] node Find only actions scheduled on this node
* \param[in] task Action name to search for
* \param[in] require_node If TRUE, NULL node or action node will not match
*
* \return List of actions found (or NULL if none)
* \note If node is not NULL and require_node is FALSE, matching actions
* without a node will be assigned to node.
*/
GList *
pe__resource_actions(const pcmk_resource_t *rsc, const pcmk_node_t *node,
const char *task, bool require_node)
{
GList *result = NULL;
char *key = pcmk__op_key(rsc->id, task, 0);
if (require_node) {
result = find_actions_exact(rsc->actions, key, node);
} else {
result = find_actions(rsc->actions, key, node);
}
free(key);
return result;
}
/*!
* \internal
* \brief Create an action reason string based on the action itself
*
* \param[in] action Action to create reason string for
* \param[in] flag Action flag that was cleared
*
* \return Newly allocated string suitable for use as action reason
* \note It is the caller's responsibility to free() the result.
*/
char *
pe__action2reason(const pcmk_action_t *action, enum pe_action_flags flag)
{
const char *change = NULL;
switch (flag) {
case pcmk_action_runnable:
change = "unrunnable";
break;
case pcmk_action_migratable:
change = "unmigrateable";
break;
case pcmk_action_optional:
change = "required";
break;
default:
// Bug: caller passed unsupported flag
CRM_CHECK(change != NULL, change = "");
break;
}
return crm_strdup_printf("%s%s%s %s", change,
(action->rsc == NULL)? "" : " ",
(action->rsc == NULL)? "" : action->rsc->id,
action->task);
}
void pe_action_set_reason(pcmk_action_t *action, const char *reason,
bool overwrite)
{
if (action->reason != NULL && overwrite) {
pcmk__rsc_trace(action->rsc, "Changing %s reason from '%s' to '%s'",
action->uuid, action->reason,
pcmk__s(reason, "(none)"));
} else if (action->reason == NULL) {
pcmk__rsc_trace(action->rsc, "Set %s reason to '%s'",
action->uuid, pcmk__s(reason, "(none)"));
} else {
// crm_assert(action->reason != NULL && !overwrite);
return;
}
pcmk__str_update(&action->reason, reason);
}
/*!
* \internal
* \brief Create an action to clear a resource's history from CIB
*
* \param[in,out] rsc Resource to clear
* \param[in] node Node to clear history on
*/
void
pe__clear_resource_history(pcmk_resource_t *rsc, const pcmk_node_t *node)
{
CRM_ASSERT((rsc != NULL) && (node != NULL));
custom_action(rsc, pcmk__op_key(rsc->id, PCMK_ACTION_LRM_DELETE, 0),
PCMK_ACTION_LRM_DELETE, node, FALSE, rsc->cluster);
}
#define sort_return(an_int, why) do { \
free(a_uuid); \
free(b_uuid); \
crm_trace("%s (%d) %c %s (%d) : %s", \
a_xml_id, a_call_id, an_int>0?'>':an_int<0?'<':'=', \
b_xml_id, b_call_id, why); \
return an_int; \
} while(0)
int
pe__is_newer_op(const xmlNode *xml_a, const xmlNode *xml_b,
bool same_node_default)
{
int a_call_id = -1;
int b_call_id = -1;
char *a_uuid = NULL;
char *b_uuid = NULL;
const char *a_xml_id = crm_element_value(xml_a, XML_ATTR_ID);
const char *b_xml_id = crm_element_value(xml_b, XML_ATTR_ID);
const char *a_node = crm_element_value(xml_a, XML_LRM_ATTR_TARGET);
const char *b_node = crm_element_value(xml_b, XML_LRM_ATTR_TARGET);
bool same_node = true;
/* @COMPAT The on_node attribute was added to last_failure as of 1.1.13 (via
* 8b3ca1c) and the other entries as of 1.1.12 (via 0b07b5c).
*
* In case that any of the lrm_rsc_op entries doesn't have on_node
* attribute, we need to explicitly tell whether the two operations are on
* the same node.
*/
if (a_node == NULL || b_node == NULL) {
same_node = same_node_default;
} else {
same_node = pcmk__str_eq(a_node, b_node, pcmk__str_casei);
}
if (same_node && pcmk__str_eq(a_xml_id, b_xml_id, pcmk__str_none)) {
/* We have duplicate lrm_rsc_op entries in the status
* section which is unlikely to be a good thing
* - we can handle it easily enough, but we need to get
* to the bottom of why it's happening.
*/
pcmk__config_err("Duplicate lrm_rsc_op entries named %s", a_xml_id);
sort_return(0, "duplicate");
}
crm_element_value_int(xml_a, XML_LRM_ATTR_CALLID, &a_call_id);
crm_element_value_int(xml_b, XML_LRM_ATTR_CALLID, &b_call_id);
if (a_call_id == -1 && b_call_id == -1) {
/* both are pending ops so it doesn't matter since
* stops are never pending
*/
sort_return(0, "pending");
} else if (same_node && a_call_id >= 0 && a_call_id < b_call_id) {
sort_return(-1, "call id");
} else if (same_node && b_call_id >= 0 && a_call_id > b_call_id) {
sort_return(1, "call id");
} else if (a_call_id >= 0 && b_call_id >= 0
&& (!same_node || a_call_id == b_call_id)) {
/*
* The op and last_failed_op are the same
* Order on last-rc-change
*/
time_t last_a = -1;
time_t last_b = -1;
crm_element_value_epoch(xml_a, XML_RSC_OP_LAST_CHANGE, &last_a);
crm_element_value_epoch(xml_b, XML_RSC_OP_LAST_CHANGE, &last_b);
crm_trace("rc-change: %lld vs %lld",
(long long) last_a, (long long) last_b);
if (last_a >= 0 && last_a < last_b) {
sort_return(-1, "rc-change");
} else if (last_b >= 0 && last_a > last_b) {
sort_return(1, "rc-change");
}
sort_return(0, "rc-change");
} else {
/* One of the inputs is a pending operation
* Attempt to use XML_ATTR_TRANSITION_MAGIC to determine its age relative to the other
*/
int a_id = -1;
int b_id = -1;
const char *a_magic = crm_element_value(xml_a, XML_ATTR_TRANSITION_MAGIC);
const char *b_magic = crm_element_value(xml_b, XML_ATTR_TRANSITION_MAGIC);
CRM_CHECK(a_magic != NULL && b_magic != NULL, sort_return(0, "No magic"));
if (!decode_transition_magic(a_magic, &a_uuid, &a_id, NULL, NULL, NULL,
NULL)) {
sort_return(0, "bad magic a");
}
if (!decode_transition_magic(b_magic, &b_uuid, &b_id, NULL, NULL, NULL,
NULL)) {
sort_return(0, "bad magic b");
}
/* try to determine the relative age of the operation...
* some pending operations (e.g. a start) may have been superseded
* by a subsequent stop
*
* [a|b]_id == -1 means it's a shutdown operation and _always_ comes last
*/
if (!pcmk__str_eq(a_uuid, b_uuid, pcmk__str_casei) || a_id == b_id) {
/*
* some of the logic in here may be redundant...
*
* if the UUID from the TE doesn't match then one better
* be a pending operation.
* pending operations don't survive between elections and joins
* because we query the LRM directly
*/
if (b_call_id == -1) {
sort_return(-1, "transition + call");
} else if (a_call_id == -1) {
sort_return(1, "transition + call");
}
} else if ((a_id >= 0 && a_id < b_id) || b_id == -1) {
sort_return(-1, "transition");
} else if ((b_id >= 0 && a_id > b_id) || a_id == -1) {
sort_return(1, "transition");
}
}
/* we should never end up here */
CRM_CHECK(FALSE, sort_return(0, "default"));
}
gint
sort_op_by_callid(gconstpointer a, gconstpointer b)
{
const xmlNode *xml_a = a;
const xmlNode *xml_b = b;
return pe__is_newer_op(xml_a, xml_b, true);
}
/*!
* \internal
* \brief Create a new pseudo-action for a resource
*
* \param[in,out] rsc Resource to create action for
* \param[in] task Action name
* \param[in] optional Whether action should be considered optional
* \param[in] runnable Whethe action should be considered runnable
*
* \return New action object corresponding to arguments
*/
pcmk_action_t *
pe__new_rsc_pseudo_action(pcmk_resource_t *rsc, const char *task, bool optional,
bool runnable)
{
pcmk_action_t *action = NULL;
CRM_ASSERT((rsc != NULL) && (task != NULL));
action = custom_action(rsc, pcmk__op_key(rsc->id, task, 0), task, NULL,
optional, rsc->cluster);
pe__set_action_flags(action, pcmk_action_pseudo);
if (runnable) {
pe__set_action_flags(action, pcmk_action_runnable);
}
return action;
}
/*!
* \internal
* \brief Add the expected result to an action
*
* \param[in,out] action Action to add expected result to
* \param[in] expected_result Expected result to add
*
* \note This is more efficient than calling add_hash_param().
*/
void
pe__add_action_expected_result(pcmk_action_t *action, int expected_result)
{
char *name = NULL;
CRM_ASSERT((action != NULL) && (action->meta != NULL));
name = strdup(XML_ATTR_TE_TARGET_RC);
CRM_ASSERT (name != NULL);
g_hash_table_insert(action->meta, name, pcmk__itoa(expected_result));
}
diff --git a/lib/pengine/rules.c b/lib/pengine/rules.c
index e73302789d..1cfbb285cc 100644
--- a/lib/pengine/rules.c
+++ b/lib/pengine/rules.c
@@ -1,1345 +1,1345 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <crm/common/xml_internal.h>
#include <glib.h>
#include <crm/pengine/rules.h>
#include <crm/pengine/rules_internal.h>
#include <crm/pengine/internal.h>
#include <sys/types.h>
#include <regex.h>
#include <ctype.h>
CRM_TRACE_INIT_DATA(pe_rules);
/*!
* \brief Evaluate any rules contained by given XML element
*
* \param[in,out] xml XML element to check for rules
* \param[in] node_hash Node attributes to use to evaluate expressions
* \param[in] now Time to use when evaluating expressions
* \param[out] next_change If not NULL, set to when evaluation will change
*
* \return TRUE if no rules, or any of rules present is in effect, else FALSE
*/
gboolean
pe_evaluate_rules(xmlNode *ruleset, GHashTable *node_hash, crm_time_t *now,
crm_time_t *next_change)
{
pe_rule_eval_data_t rule_data = {
.node_hash = node_hash,
.role = pcmk_role_unknown,
.now = now,
.match_data = NULL,
.rsc_data = NULL,
.op_data = NULL
};
return pe_eval_rules(ruleset, &rule_data, next_change);
}
gboolean
pe_test_rule(xmlNode *rule, GHashTable *node_hash, enum rsc_role_e role,
crm_time_t *now, crm_time_t *next_change,
pe_match_data_t *match_data)
{
pe_rule_eval_data_t rule_data = {
.node_hash = node_hash,
.role = role,
.now = now,
.match_data = match_data,
.rsc_data = NULL,
.op_data = NULL
};
return pe_eval_expr(rule, &rule_data, next_change);
}
/*!
* \brief Evaluate one rule subelement (pass/fail)
*
* A rule element may contain another rule, a node attribute expression, or a
* date expression. Given any one of those, evaluate it and return whether it
* passed.
*
* \param[in,out] expr Rule subelement XML
* \param[in] node_hash Node attributes to use when evaluating expression
* \param[in] role Resource role to use when evaluating expression
* \param[in] now Time to use when evaluating expression
* \param[out] next_change If not NULL, set to when evaluation will change
* \param[in] match_data If not NULL, resource back-references and params
*
* \return TRUE if expression is in effect under given conditions, else FALSE
*/
gboolean
pe_test_expression(xmlNode *expr, GHashTable *node_hash, enum rsc_role_e role,
crm_time_t *now, crm_time_t *next_change,
pe_match_data_t *match_data)
{
pe_rule_eval_data_t rule_data = {
.node_hash = node_hash,
.role = role,
.now = now,
.match_data = match_data,
.rsc_data = NULL,
.op_data = NULL
};
return pe_eval_subexpr(expr, &rule_data, next_change);
}
enum expression_type
find_expression_type(xmlNode * expr)
{
const char *attr = NULL;
attr = crm_element_value(expr, XML_EXPR_ATTR_ATTRIBUTE);
if (pcmk__xe_is(expr, PCMK_XE_DATE_EXPRESSION)) {
return pcmk__subexpr_datetime;
} else if (pcmk__xe_is(expr, PCMK_XE_RSC_EXPRESSION)) {
return pcmk__subexpr_resource;
} else if (pcmk__xe_is(expr, PCMK_XE_OP_EXPRESSION)) {
return pcmk__subexpr_operation;
} else if (pcmk__xe_is(expr, XML_TAG_RULE)) {
return pcmk__subexpr_rule;
} else if (!pcmk__xe_is(expr, XML_TAG_EXPRESSION)) {
return pcmk__subexpr_unknown;
} else if (pcmk__str_any_of(attr, CRM_ATTR_UNAME, CRM_ATTR_KIND, CRM_ATTR_ID, NULL)) {
return pcmk__subexpr_location;
} else if (pcmk__str_eq(attr, CRM_ATTR_ROLE, pcmk__str_none)) {
return pcmk__subexpr_role;
}
return pcmk__subexpr_attribute;
}
/* As per the nethack rules:
*
* moon period = 29.53058 days ~= 30, year = 365.2422 days
* days moon phase advances on first day of year compared to preceding year
* = 365.2422 - 12*29.53058 ~= 11
* years in Metonic cycle (time until same phases fall on the same days of
* the month) = 18.6 ~= 19
* moon phase on first day of year (epact) ~= (11*(year%19) + 29) % 30
* (29 as initial condition)
* current phase in days = first day phase + days elapsed in year
* 6 moons ~= 177 days
* 177 ~= 8 reported phases * 22
* + 11/22 for rounding
*
* 0-7, with 0: new, 4: full
*/
static int
phase_of_the_moon(const crm_time_t *now)
{
uint32_t epact, diy, goldn;
uint32_t y;
crm_time_get_ordinal(now, &y, &diy);
goldn = (y % 19) + 1;
epact = (11 * goldn + 18) % 30;
if ((epact == 25 && goldn > 11) || epact == 24)
epact++;
return ((((((diy + epact) * 6) + 11) % 177) / 22) & 7);
}
static int
check_one(const xmlNode *cron_spec, const char *xml_field, uint32_t time_field)
{
int rc = pcmk_rc_undetermined;
const char *value = crm_element_value(cron_spec, xml_field);
long long low, high;
if (value == NULL) {
/* Return pe_date_result_undetermined if the field is missing. */
goto bail;
}
if (pcmk__parse_ll_range(value, &low, &high) != pcmk_rc_ok) {
goto bail;
} else if (low == high) {
/* A single number was given, not a range. */
if (time_field < low) {
rc = pcmk_rc_before_range;
} else if (time_field > high) {
rc = pcmk_rc_after_range;
} else {
rc = pcmk_rc_within_range;
}
} else if (low != -1 && high != -1) {
/* This is a range with both bounds. */
if (time_field < low) {
rc = pcmk_rc_before_range;
} else if (time_field > high) {
rc = pcmk_rc_after_range;
} else {
rc = pcmk_rc_within_range;
}
} else if (low == -1) {
/* This is a range with no starting value. */
rc = time_field <= high ? pcmk_rc_within_range : pcmk_rc_after_range;
} else if (high == -1) {
/* This is a range with no ending value. */
rc = time_field >= low ? pcmk_rc_within_range : pcmk_rc_before_range;
}
bail:
if (rc == pcmk_rc_within_range) {
crm_debug("Condition '%s' in %s: passed", value, xml_field);
} else {
crm_debug("Condition '%s' in %s: failed", value, xml_field);
}
return rc;
}
static gboolean
check_passes(int rc) {
/* _within_range is obvious. _undetermined is a pass because
* this is the return value if a field is not given. In this
* case, we just want to ignore it and check other fields to
* see if they place some restriction on what can pass.
*/
return rc == pcmk_rc_within_range || rc == pcmk_rc_undetermined;
}
#define CHECK_ONE(spec, name, var) do { \
int subpart_rc = check_one(spec, name, var); \
if (check_passes(subpart_rc) == FALSE) { \
return subpart_rc; \
} \
} while (0)
int
pe_cron_range_satisfied(const crm_time_t *now, const xmlNode *cron_spec)
{
uint32_t h, m, s, y, d, w;
CRM_CHECK(now != NULL, return pcmk_rc_op_unsatisfied);
crm_time_get_gregorian(now, &y, &m, &d);
CHECK_ONE(cron_spec, "years", y);
CHECK_ONE(cron_spec, "months", m);
CHECK_ONE(cron_spec, "monthdays", d);
crm_time_get_timeofday(now, &h, &m, &s);
CHECK_ONE(cron_spec, "hours", h);
CHECK_ONE(cron_spec, "minutes", m);
CHECK_ONE(cron_spec, "seconds", s);
crm_time_get_ordinal(now, &y, &d);
CHECK_ONE(cron_spec, "yeardays", d);
crm_time_get_isoweek(now, &y, &w, &d);
CHECK_ONE(cron_spec, "weekyears", y);
CHECK_ONE(cron_spec, "weeks", w);
CHECK_ONE(cron_spec, "weekdays", d);
CHECK_ONE(cron_spec, "moon", phase_of_the_moon(now));
if (crm_element_value(cron_spec, "moon") != NULL) {
pcmk__config_warn("Support for 'moon' in date_spec elements "
"(such as %s) is deprecated and will be removed "
"in a future release of Pacemaker", ID(cron_spec));
}
/* If we get here, either no fields were specified (which is success), or all
* the fields that were specified had their conditions met (which is also a
* success). Thus, the result is success.
*/
return pcmk_rc_ok;
}
static void
update_field(crm_time_t *t, const xmlNode *xml, const char *attr,
void (*time_fn)(crm_time_t *, int))
{
long long value;
if ((pcmk__scan_ll(crm_element_value(xml, attr), &value, 0LL) == pcmk_rc_ok)
&& (value != 0LL) && (value >= INT_MIN) && (value <= INT_MAX)) {
time_fn(t, (int) value);
}
}
static crm_time_t *
parse_xml_duration(const crm_time_t *start, const xmlNode *duration_spec)
{
crm_time_t *end = pcmk_copy_time(start);
update_field(end, duration_spec, "years", crm_time_add_years);
update_field(end, duration_spec, "months", crm_time_add_months);
update_field(end, duration_spec, "weeks", crm_time_add_weeks);
update_field(end, duration_spec, "days", crm_time_add_days);
update_field(end, duration_spec, "hours", crm_time_add_hours);
update_field(end, duration_spec, "minutes", crm_time_add_minutes);
update_field(end, duration_spec, "seconds", crm_time_add_seconds);
return end;
}
// Set next_change to t if t is earlier
static void
crm_time_set_if_earlier(crm_time_t *next_change, crm_time_t *t)
{
if ((next_change != NULL) && (t != NULL)) {
if (!crm_time_is_defined(next_change)
|| (crm_time_compare(t, next_change) < 0)) {
crm_time_set(next_change, t);
}
}
}
// Information about a block of nvpair elements
typedef struct sorted_set_s {
int score; // This block's score for sorting
const char *name; // This block's ID
const char *special_name; // ID that should sort first
xmlNode *attr_set; // This block
gboolean overwrite; // Whether existing values will be overwritten
} sorted_set_t;
static gint
sort_pairs(gconstpointer a, gconstpointer b)
{
const sorted_set_t *pair_a = a;
const sorted_set_t *pair_b = b;
if (a == NULL && b == NULL) {
return 0;
} else if (a == NULL) {
return 1;
} else if (b == NULL) {
return -1;
}
if (pcmk__str_eq(pair_a->name, pair_a->special_name, pcmk__str_casei)) {
return -1;
} else if (pcmk__str_eq(pair_b->name, pair_a->special_name, pcmk__str_casei)) {
return 1;
}
/* If we're overwriting values, we want lowest score first, so the highest
* score is processed last; if we're not overwriting values, we want highest
* score first, so nothing else overwrites it.
*/
if (pair_a->score < pair_b->score) {
return pair_a->overwrite? -1 : 1;
} else if (pair_a->score > pair_b->score) {
return pair_a->overwrite? 1 : -1;
}
return 0;
}
static void
populate_hash(xmlNode * nvpair_list, GHashTable * hash, gboolean overwrite, xmlNode * top)
{
const char *name = NULL;
const char *value = NULL;
const char *old_value = NULL;
xmlNode *list = nvpair_list;
xmlNode *an_attr = NULL;
if (pcmk__xe_is(list->children, XML_TAG_ATTRS)) {
list = list->children;
}
for (an_attr = pcmk__xe_first_child(list); an_attr != NULL;
an_attr = pcmk__xe_next(an_attr)) {
if (pcmk__str_eq((const char *)an_attr->name, XML_CIB_TAG_NVPAIR, pcmk__str_none)) {
xmlNode *ref_nvpair = expand_idref(an_attr, top);
name = crm_element_value(an_attr, XML_NVPAIR_ATTR_NAME);
if ((name == NULL) && (ref_nvpair != NULL)) {
name = crm_element_value(ref_nvpair, XML_NVPAIR_ATTR_NAME);
}
value = crm_element_value(an_attr, XML_NVPAIR_ATTR_VALUE);
if ((value == NULL) && (ref_nvpair != NULL)) {
value = crm_element_value(ref_nvpair, XML_NVPAIR_ATTR_VALUE);
}
if (name == NULL || value == NULL) {
continue;
}
old_value = g_hash_table_lookup(hash, name);
if (pcmk__str_eq(value, "#default", pcmk__str_casei)) {
if (old_value) {
crm_trace("Letting %s default (removing explicit value \"%s\")",
name, value);
g_hash_table_remove(hash, name);
}
continue;
} else if (old_value == NULL) {
crm_trace("Setting %s=\"%s\"", name, value);
g_hash_table_insert(hash, strdup(name), strdup(value));
} else if (overwrite) {
crm_trace("Setting %s=\"%s\" (overwriting old value \"%s\")",
name, value, old_value);
g_hash_table_replace(hash, strdup(name), strdup(value));
}
}
}
}
typedef struct unpack_data_s {
gboolean overwrite;
void *hash;
crm_time_t *next_change;
const pe_rule_eval_data_t *rule_data;
xmlNode *top;
} unpack_data_t;
static void
unpack_attr_set(gpointer data, gpointer user_data)
{
sorted_set_t *pair = data;
unpack_data_t *unpack_data = user_data;
if (!pe_eval_rules(pair->attr_set, unpack_data->rule_data,
unpack_data->next_change)) {
return;
}
crm_trace("Adding attributes from %s (score %d) %s overwrite",
pair->name, pair->score,
(unpack_data->overwrite? "with" : "without"));
populate_hash(pair->attr_set, unpack_data->hash, unpack_data->overwrite, unpack_data->top);
}
/*!
* \internal
* \brief Create a sorted list of nvpair blocks
*
* \param[in,out] top XML document root (used to expand id-ref's)
* \param[in] xml_obj XML element containing blocks of nvpair elements
* \param[in] set_name If not NULL, only get blocks of this element
* \param[in] always_first If not NULL, sort block with this ID as first
*
* \return List of sorted_set_t entries for nvpair blocks
*/
static GList *
make_pairs(xmlNode *top, const xmlNode *xml_obj, const char *set_name,
const char *always_first, gboolean overwrite)
{
GList *unsorted = NULL;
if (xml_obj == NULL) {
return NULL;
}
for (xmlNode *attr_set = pcmk__xe_first_child(xml_obj); attr_set != NULL;
attr_set = pcmk__xe_next(attr_set)) {
if (pcmk__str_eq(set_name, (const char *) attr_set->name,
pcmk__str_null_matches)) {
const char *score = NULL;
sorted_set_t *pair = NULL;
xmlNode *expanded_attr_set = expand_idref(attr_set, top);
if (expanded_attr_set == NULL) {
continue; // Not possible with schema validation enabled
}
pair = calloc(1, sizeof(sorted_set_t));
pair->name = ID(expanded_attr_set);
pair->special_name = always_first;
pair->attr_set = expanded_attr_set;
pair->overwrite = overwrite;
score = crm_element_value(expanded_attr_set, XML_RULE_ATTR_SCORE);
pair->score = char2score(score);
unsorted = g_list_prepend(unsorted, pair);
}
}
return g_list_sort(unsorted, sort_pairs);
}
/*!
* \brief Extract nvpair blocks contained by an XML element into a hash table
*
* \param[in,out] top XML document root (used to expand id-ref's)
* \param[in] xml_obj XML element containing blocks of nvpair elements
* \param[in] set_name If not NULL, only use blocks of this element
* \param[in] rule_data Matching parameters to use when unpacking
* \param[out] hash Where to store extracted name/value pairs
* \param[in] always_first If not NULL, process block with this ID first
* \param[in] overwrite Whether to replace existing values with same name
* \param[out] next_change If not NULL, set to when evaluation will change
*/
void
pe_eval_nvpairs(xmlNode *top, const xmlNode *xml_obj, const char *set_name,
const pe_rule_eval_data_t *rule_data, GHashTable *hash,
const char *always_first, gboolean overwrite,
crm_time_t *next_change)
{
GList *pairs = make_pairs(top, xml_obj, set_name, always_first, overwrite);
if (pairs) {
unpack_data_t data = {
.hash = hash,
.overwrite = overwrite,
.next_change = next_change,
.top = top,
.rule_data = rule_data
};
g_list_foreach(pairs, unpack_attr_set, &data);
g_list_free_full(pairs, free);
}
}
/*!
* \brief Extract nvpair blocks contained by an XML element into a hash table
*
* \param[in,out] top XML document root (used to expand id-ref's)
* \param[in] xml_obj XML element containing blocks of nvpair elements
* \param[in] set_name Element name to identify nvpair blocks
* \param[in] node_hash Node attributes to use when evaluating rules
* \param[out] hash Where to store extracted name/value pairs
* \param[in] always_first If not NULL, process block with this ID first
* \param[in] overwrite Whether to replace existing values with same name
* \param[in] now Time to use when evaluating rules
* \param[out] next_change If not NULL, set to when evaluation will change
*/
void
pe_unpack_nvpairs(xmlNode *top, const xmlNode *xml_obj, const char *set_name,
GHashTable *node_hash, GHashTable *hash,
const char *always_first, gboolean overwrite,
crm_time_t *now, crm_time_t *next_change)
{
pe_rule_eval_data_t rule_data = {
.node_hash = node_hash,
.role = pcmk_role_unknown,
.now = now,
.match_data = NULL,
.rsc_data = NULL,
.op_data = NULL
};
pe_eval_nvpairs(top, xml_obj, set_name, &rule_data, hash,
always_first, overwrite, next_change);
}
/*!
* \brief Expand any regular expression submatches (%0-%9) in a string
*
* \param[in] string String possibly containing submatch variables
* \param[in] match_data If not NULL, regular expression matches
*
* \return Newly allocated string identical to \p string with submatches
* expanded, or NULL if there were no matches
*/
char *
pe_expand_re_matches(const char *string, const pe_re_match_data_t *match_data)
{
size_t len = 0;
int i;
const char *p, *last_match_index;
char *p_dst, *result = NULL;
if (pcmk__str_empty(string) || !match_data) {
return NULL;
}
p = last_match_index = string;
while (*p) {
if (*p == '%' && *(p + 1) && isdigit(*(p + 1))) {
i = *(p + 1) - '0';
if (match_data->nregs >= i && match_data->pmatch[i].rm_so != -1 &&
match_data->pmatch[i].rm_eo > match_data->pmatch[i].rm_so) {
len += p - last_match_index + (match_data->pmatch[i].rm_eo - match_data->pmatch[i].rm_so);
last_match_index = p + 2;
}
p++;
}
p++;
}
len += p - last_match_index + 1;
/* FIXME: Excessive? */
if (len - 1 <= 0) {
return NULL;
}
p_dst = result = calloc(1, len);
p = string;
while (*p) {
if (*p == '%' && *(p + 1) && isdigit(*(p + 1))) {
i = *(p + 1) - '0';
if (match_data->nregs >= i && match_data->pmatch[i].rm_so != -1 &&
match_data->pmatch[i].rm_eo > match_data->pmatch[i].rm_so) {
/* rm_eo can be equal to rm_so, but then there is nothing to do */
int match_len = match_data->pmatch[i].rm_eo - match_data->pmatch[i].rm_so;
memcpy(p_dst, match_data->string + match_data->pmatch[i].rm_so, match_len);
p_dst += match_len;
}
p++;
} else {
*(p_dst) = *(p);
p_dst++;
}
p++;
}
return result;
}
/*!
* \brief Evaluate rules
*
* \param[in,out] ruleset XML possibly containing rule sub-elements
* \param[in] rule_data
* \param[out] next_change If not NULL, set to when evaluation will change
*
* \return TRUE if there are no rules or
*/
gboolean
pe_eval_rules(xmlNode *ruleset, const pe_rule_eval_data_t *rule_data,
crm_time_t *next_change)
{
// If there are no rules, pass by default
gboolean ruleset_default = TRUE;
for (xmlNode *rule = first_named_child(ruleset, XML_TAG_RULE);
rule != NULL; rule = crm_next_same_xml(rule)) {
ruleset_default = FALSE;
if (pe_eval_expr(rule, rule_data, next_change)) {
/* Only the deprecated "lifetime" element of location constraints
* may contain more than one rule at the top level -- the schema
* limits a block of nvpairs to a single top-level rule. So, this
* effectively means that a lifetime is active if any rule it
* contains is active.
*/
return TRUE;
}
}
return ruleset_default;
}
/*!
* \brief Evaluate all of a rule's expressions
*
* \param[in,out] rule XML containing a rule definition or its id-ref
* \param[in] rule_data Matching parameters to check against rule
* \param[out] next_change If not NULL, set to when evaluation will change
*
* \return TRUE if \p rule_data passes \p rule, otherwise FALSE
*/
gboolean
pe_eval_expr(xmlNode *rule, const pe_rule_eval_data_t *rule_data,
crm_time_t *next_change)
{
xmlNode *expr = NULL;
gboolean test = TRUE;
gboolean empty = TRUE;
gboolean passed = TRUE;
gboolean do_and = TRUE;
const char *value = NULL;
rule = expand_idref(rule, NULL);
if (rule == NULL) {
return FALSE; // Not possible with schema validation enabled
}
value = crm_element_value(rule, XML_RULE_ATTR_BOOLEAN_OP);
if (pcmk__str_eq(value, "or", pcmk__str_casei)) {
do_and = FALSE;
passed = FALSE;
}
crm_trace("Testing rule %s", ID(rule));
for (expr = pcmk__xe_first_child(rule); expr != NULL;
expr = pcmk__xe_next(expr)) {
test = pe_eval_subexpr(expr, rule_data, next_change);
empty = FALSE;
if (test && do_and == FALSE) {
crm_trace("Expression %s/%s passed", ID(rule), ID(expr));
return TRUE;
} else if (test == FALSE && do_and) {
crm_trace("Expression %s/%s failed", ID(rule), ID(expr));
return FALSE;
}
}
if (empty) {
pcmk__config_err("Ignoring rule %s because it contains no expressions",
ID(rule));
}
crm_trace("Rule %s %s", ID(rule), passed ? "passed" : "failed");
return passed;
}
/*!
* \brief Evaluate a single rule expression, including any subexpressions
*
* \param[in,out] expr XML containing a rule expression
* \param[in] rule_data Matching parameters to check against expression
* \param[out] next_change If not NULL, set to when evaluation will change
*
* \return TRUE if \p rule_data passes \p expr, otherwise FALSE
*/
gboolean
pe_eval_subexpr(xmlNode *expr, const pe_rule_eval_data_t *rule_data,
crm_time_t *next_change)
{
gboolean accept = FALSE;
const char *uname = NULL;
switch (find_expression_type(expr)) {
case pcmk__subexpr_rule:
accept = pe_eval_expr(expr, rule_data, next_change);
break;
case pcmk__subexpr_attribute:
case pcmk__subexpr_location:
/* these expressions can never succeed if there is
* no node to compare with
*/
if (rule_data->node_hash != NULL) {
accept = pe__eval_attr_expr(expr, rule_data);
}
break;
case pcmk__subexpr_datetime:
switch (pe__eval_date_expr(expr, rule_data, next_change)) {
case pcmk_rc_within_range:
case pcmk_rc_ok:
accept = TRUE;
break;
default:
accept = FALSE;
break;
}
break;
case pcmk__subexpr_role:
accept = pe__eval_role_expr(expr, rule_data);
break;
case pcmk__subexpr_resource:
accept = pe__eval_rsc_expr(expr, rule_data);
break;
case pcmk__subexpr_operation:
accept = pe__eval_op_expr(expr, rule_data);
break;
default:
CRM_CHECK(FALSE /* bad type */ , return FALSE);
accept = FALSE;
}
if (rule_data->node_hash) {
uname = g_hash_table_lookup(rule_data->node_hash, CRM_ATTR_UNAME);
}
crm_trace("Expression %s %s on %s",
ID(expr), accept ? "passed" : "failed", uname ? uname : "all nodes");
return accept;
}
/*!
* \internal
* \brief Compare two values in a rule's node attribute expression
*
* \param[in] l_val Value on left-hand side of comparison
* \param[in] r_val Value on right-hand side of comparison
* \param[in] type How to interpret the values (allowed values:
* \c "string", \c "integer", \c "number",
* \c "version", \c NULL)
* \param[in] op Type of comparison
*
* \return -1 if <tt>(l_val < r_val)</tt>,
* 0 if <tt>(l_val == r_val)</tt>,
* 1 if <tt>(l_val > r_val)</tt>
*/
static int
compare_attr_expr_vals(const char *l_val, const char *r_val, const char *type,
const char *op)
{
int cmp = 0;
if (l_val != NULL && r_val != NULL) {
if (type == NULL) {
if (pcmk__strcase_any_of(op, "lt", "lte", "gt", "gte", NULL)) {
if (pcmk__char_in_any_str('.', l_val, r_val, NULL)) {
type = "number";
} else {
type = "integer";
}
} else {
type = "string";
}
crm_trace("Defaulting to %s based comparison for '%s' op", type, op);
}
if (pcmk__str_eq(type, "string", pcmk__str_casei)) {
cmp = strcasecmp(l_val, r_val);
} else if (pcmk__str_eq(type, "integer", pcmk__str_casei)) {
long long l_val_num;
int rc1 = pcmk__scan_ll(l_val, &l_val_num, 0LL);
long long r_val_num;
int rc2 = pcmk__scan_ll(r_val, &r_val_num, 0LL);
if ((rc1 == pcmk_rc_ok) && (rc2 == pcmk_rc_ok)) {
if (l_val_num < r_val_num) {
cmp = -1;
} else if (l_val_num > r_val_num) {
cmp = 1;
} else {
cmp = 0;
}
} else {
crm_debug("Integer parse error. Comparing %s and %s as strings",
l_val, r_val);
cmp = compare_attr_expr_vals(l_val, r_val, "string", op);
}
} else if (pcmk__str_eq(type, "number", pcmk__str_casei)) {
double l_val_num;
double r_val_num;
int rc1 = pcmk__scan_double(l_val, &l_val_num, NULL, NULL);
int rc2 = pcmk__scan_double(r_val, &r_val_num, NULL, NULL);
if (rc1 == pcmk_rc_ok && rc2 == pcmk_rc_ok) {
if (l_val_num < r_val_num) {
cmp = -1;
} else if (l_val_num > r_val_num) {
cmp = 1;
} else {
cmp = 0;
}
} else {
crm_debug("Floating-point parse error. Comparing %s and %s as "
"strings", l_val, r_val);
cmp = compare_attr_expr_vals(l_val, r_val, "string", op);
}
} else if (pcmk__str_eq(type, "version", pcmk__str_casei)) {
cmp = compare_version(l_val, r_val);
}
} else if (l_val == NULL && r_val == NULL) {
cmp = 0;
} else if (r_val == NULL) {
cmp = 1;
} else { // l_val == NULL && r_val != NULL
cmp = -1;
}
return cmp;
}
/*!
* \internal
* \brief Check whether an attribute expression evaluates to \c true
*
* \param[in] l_val Value on left-hand side of comparison
* \param[in] r_val Value on right-hand side of comparison
* \param[in] type How to interpret the values (allowed values:
* \c "string", \c "integer", \c "number",
* \c "version", \c NULL)
* \param[in] op Type of comparison.
*
* \return \c true if expression evaluates to \c true, \c false
* otherwise
*/
static bool
accept_attr_expr(const char *l_val, const char *r_val, const char *type,
const char *op)
{
int cmp;
if (pcmk__str_eq(op, "defined", pcmk__str_casei)) {
return (l_val != NULL);
} else if (pcmk__str_eq(op, "not_defined", pcmk__str_casei)) {
return (l_val == NULL);
}
cmp = compare_attr_expr_vals(l_val, r_val, type, op);
if (pcmk__str_eq(op, "eq", pcmk__str_casei)) {
return (cmp == 0);
} else if (pcmk__str_eq(op, "ne", pcmk__str_casei)) {
return (cmp != 0);
} else if (l_val == NULL || r_val == NULL) {
// The comparison is meaningless from this point on
return false;
} else if (pcmk__str_eq(op, "lt", pcmk__str_casei)) {
return (cmp < 0);
} else if (pcmk__str_eq(op, "lte", pcmk__str_casei)) {
return (cmp <= 0);
} else if (pcmk__str_eq(op, "gt", pcmk__str_casei)) {
return (cmp > 0);
} else if (pcmk__str_eq(op, "gte", pcmk__str_casei)) {
return (cmp >= 0);
}
return false; // Should never reach this point
}
/*!
* \internal
* \brief Get correct value according to value-source
*
* \param[in] value value given in rule expression
* \param[in] value_source value-source given in rule expressions
* \param[in] match_data If not NULL, resource back-references and params
*/
static const char *
expand_value_source(const char *value, const char *value_source,
const pe_match_data_t *match_data)
{
GHashTable *table = NULL;
if (pcmk__str_empty(value)) {
return NULL; // value_source is irrelevant
} else if (pcmk__str_eq(value_source, "param", pcmk__str_casei)) {
table = match_data->params;
} else if (pcmk__str_eq(value_source, "meta", pcmk__str_casei)) {
table = match_data->meta;
} else { // literal
return value;
}
if (table == NULL) {
return NULL;
}
return (const char *) g_hash_table_lookup(table, value);
}
/*!
* \internal
* \brief Evaluate a node attribute expression based on #uname, #id, #kind,
* or a generic node attribute
*
* \param[in] expr XML of rule expression
* \param[in] rule_data The match_data and node_hash members are used
*
* \return TRUE if rule_data satisfies the expression, FALSE otherwise
*/
gboolean
pe__eval_attr_expr(const xmlNode *expr, const pe_rule_eval_data_t *rule_data)
{
gboolean attr_allocated = FALSE;
const char *h_val = NULL;
const char *op = NULL;
const char *type = NULL;
const char *attr = NULL;
const char *value = NULL;
const char *value_source = NULL;
attr = crm_element_value(expr, XML_EXPR_ATTR_ATTRIBUTE);
op = crm_element_value(expr, XML_EXPR_ATTR_OPERATION);
value = crm_element_value(expr, XML_EXPR_ATTR_VALUE);
type = crm_element_value(expr, XML_EXPR_ATTR_TYPE);
value_source = crm_element_value(expr, XML_EXPR_ATTR_VALUE_SOURCE);
if (attr == NULL) {
pcmk__config_err("Expression %s invalid: " XML_EXPR_ATTR_ATTRIBUTE
" not specified", pcmk__s(ID(expr), "without ID"));
return FALSE;
} else if (op == NULL) {
pcmk__config_err("Expression %s invalid: " XML_EXPR_ATTR_OPERATION
" not specified", pcmk__s(ID(expr), "without ID"));
return FALSE;
}
if (rule_data->match_data != NULL) {
// Expand any regular expression submatches (%0-%9) in attribute name
if (rule_data->match_data->re != NULL) {
char *resolved_attr = pe_expand_re_matches(attr, rule_data->match_data->re);
if (resolved_attr != NULL) {
attr = (const char *) resolved_attr;
attr_allocated = TRUE;
}
}
// Get value appropriate to value-source
value = expand_value_source(value, value_source, rule_data->match_data);
}
if (rule_data->node_hash != NULL) {
h_val = (const char *)g_hash_table_lookup(rule_data->node_hash, attr);
}
if (attr_allocated) {
free((char *)attr);
attr = NULL;
}
return accept_attr_expr(h_val, value, type, op);
}
/*!
* \internal
* \brief Evaluate a date_expression
*
* \param[in] expr XML of rule expression
* \param[in] rule_data Only the now member is used
* \param[out] next_change If not NULL, set to when evaluation will change
*
* \return Standard Pacemaker return code
*/
int
pe__eval_date_expr(const xmlNode *expr, const pe_rule_eval_data_t *rule_data,
crm_time_t *next_change)
{
crm_time_t *start = NULL;
crm_time_t *end = NULL;
const char *value = NULL;
const char *op = crm_element_value(expr, "operation");
xmlNode *duration_spec = NULL;
xmlNode *date_spec = NULL;
// "undetermined" will also be returned for parsing errors
int rc = pcmk_rc_undetermined;
crm_trace("Testing expression: %s", ID(expr));
duration_spec = first_named_child(expr, "duration");
date_spec = first_named_child(expr, "date_spec");
value = crm_element_value(expr, "start");
if (value != NULL) {
start = crm_time_new(value);
}
value = crm_element_value(expr, "end");
if (value != NULL) {
end = crm_time_new(value);
}
if (start != NULL && end == NULL && duration_spec != NULL) {
end = parse_xml_duration(start, duration_spec);
}
if (pcmk__str_eq(op, "in_range", pcmk__str_null_matches | pcmk__str_casei)) {
if ((start == NULL) && (end == NULL)) {
// in_range requires at least one of start or end
} else if ((start != NULL) && (crm_time_compare(rule_data->now, start) < 0)) {
rc = pcmk_rc_before_range;
crm_time_set_if_earlier(next_change, start);
} else if ((end != NULL) && (crm_time_compare(rule_data->now, end) > 0)) {
rc = pcmk_rc_after_range;
} else {
rc = pcmk_rc_within_range;
if (end && next_change) {
// Evaluation doesn't change until second after end
crm_time_add_seconds(end, 1);
crm_time_set_if_earlier(next_change, end);
}
}
} else if (pcmk__str_eq(op, "date_spec", pcmk__str_casei)) {
rc = pe_cron_range_satisfied(rule_data->now, date_spec);
// @TODO set next_change appropriately
} else if (pcmk__str_eq(op, "gt", pcmk__str_casei)) {
if (start == NULL) {
// gt requires start
} else if (crm_time_compare(rule_data->now, start) > 0) {
rc = pcmk_rc_within_range;
} else {
rc = pcmk_rc_before_range;
// Evaluation doesn't change until second after start
crm_time_add_seconds(start, 1);
crm_time_set_if_earlier(next_change, start);
}
} else if (pcmk__str_eq(op, "lt", pcmk__str_casei)) {
if (end == NULL) {
// lt requires end
} else if (crm_time_compare(rule_data->now, end) < 0) {
rc = pcmk_rc_within_range;
crm_time_set_if_earlier(next_change, end);
} else {
rc = pcmk_rc_after_range;
}
}
crm_time_free(start);
crm_time_free(end);
return rc;
}
gboolean
pe__eval_op_expr(const xmlNode *expr, const pe_rule_eval_data_t *rule_data)
{
const char *name = crm_element_value(expr, XML_NVPAIR_ATTR_NAME);
const char *interval_s = crm_element_value(expr, XML_LRM_ATTR_INTERVAL);
guint interval_ms = 0U;
crm_trace("Testing op_defaults expression: %s", ID(expr));
if (rule_data->op_data == NULL) {
crm_trace("No operations data provided");
return FALSE;
}
- if (pcmk__parse_interval_spec(interval_s, &interval_ms) != pcmk_rc_ok) {
+ if (pcmk_parse_interval_spec(interval_s, &interval_ms) != pcmk_rc_ok) {
crm_trace("Could not parse interval: %s", interval_s);
return FALSE;
}
if ((interval_s != NULL) && (interval_ms != rule_data->op_data->interval)) {
crm_trace("Interval doesn't match: %d != %d",
interval_ms, rule_data->op_data->interval);
return FALSE;
}
if (!pcmk__str_eq(name, rule_data->op_data->op_name, pcmk__str_none)) {
crm_trace("Name doesn't match: %s != %s", name, rule_data->op_data->op_name);
return FALSE;
}
return TRUE;
}
/*!
* \internal
* \brief Check whether a resource role matches a rule role
*
* \param[in] expr XML of rule expression
* \param[in] rule_data Only the role member is used
*
* \return true if role matches, otherwise false
*/
static bool
role_matches(const xmlNode *expr, const pe_rule_eval_data_t *rule_data)
{
const char *value = crm_element_value(expr, XML_EXPR_ATTR_VALUE);
enum rsc_role_e role = text2role(value);
if (role == pcmk_role_unknown) {
pcmk__config_err("Invalid role %s in rule expression", value);
return false;
}
return role == rule_data->role;
}
/*!
* \internal
* \brief Evaluate a node attribute expression based on #role
*
* \param[in] expr XML of rule expression
* \param[in] rule_data Only the role member is used
*
* \return TRUE if rule_data->role satisfies the expression, FALSE otherwise
* \todo Drop this whole code. The #role attribute was never implemented
* (rule_data->role is always pcmk_role_unknown), and it would be a poor
* design anyway, since a unique promotable clone could have multiple
* instances with different roles on a given node.
*/
gboolean
pe__eval_role_expr(const xmlNode *expr, const pe_rule_eval_data_t *rule_data)
{
const char *op = NULL;
// A known role must be given to compare against
if (rule_data->role == pcmk_role_unknown) {
return FALSE;
}
op = crm_element_value(expr, XML_EXPR_ATTR_OPERATION);
if (pcmk__str_eq(op, "defined", pcmk__str_casei)) {
if (rule_data->role > pcmk_role_started) {
return TRUE;
}
} else if (pcmk__str_eq(op, "not_defined", pcmk__str_casei)) {
if ((rule_data->role > pcmk_role_unknown)
&& (rule_data->role < pcmk_role_unpromoted)) {
return TRUE;
}
} else if (pcmk__str_eq(op, "eq", pcmk__str_casei)) {
return role_matches(expr, rule_data)? TRUE : FALSE;
} else if (pcmk__str_eq(op, "ne", pcmk__str_casei)
// Test "ne" only with promotable clone roles
&& (rule_data->role >= pcmk_role_unpromoted)) {
return role_matches(expr, rule_data)? FALSE : TRUE;
} else {
pcmk__config_err("Operation '%s' is not valid with " CRM_ATTR_ROLE
" comparisons", op);
}
return FALSE;
}
gboolean
pe__eval_rsc_expr(const xmlNode *expr, const pe_rule_eval_data_t *rule_data)
{
const char *class = crm_element_value(expr, XML_AGENT_ATTR_CLASS);
const char *provider = crm_element_value(expr, XML_AGENT_ATTR_PROVIDER);
const char *type = crm_element_value(expr, XML_EXPR_ATTR_TYPE);
crm_trace("Testing rsc_defaults expression: %s", ID(expr));
if (rule_data->rsc_data == NULL) {
crm_trace("No resource data provided");
return FALSE;
}
if (class != NULL &&
!pcmk__str_eq(class, rule_data->rsc_data->standard, pcmk__str_none)) {
crm_trace("Class doesn't match: %s != %s", class, rule_data->rsc_data->standard);
return FALSE;
}
if ((provider == NULL && rule_data->rsc_data->provider != NULL) ||
(provider != NULL && rule_data->rsc_data->provider == NULL) ||
!pcmk__str_eq(provider, rule_data->rsc_data->provider, pcmk__str_none)) {
crm_trace("Provider doesn't match: %s != %s", provider, rule_data->rsc_data->provider);
return FALSE;
}
if (type != NULL &&
!pcmk__str_eq(type, rule_data->rsc_data->agent, pcmk__str_none)) {
crm_trace("Agent doesn't match: %s != %s", type, rule_data->rsc_data->agent);
return FALSE;
}
return TRUE;
}
// Deprecated functions kept only for backward API compatibility
// LCOV_EXCL_START
#include <crm/pengine/rules_compat.h>
gboolean
test_ruleset(xmlNode *ruleset, GHashTable *node_hash, crm_time_t *now)
{
return pe_evaluate_rules(ruleset, node_hash, now, NULL);
}
gboolean
test_rule(xmlNode * rule, GHashTable * node_hash, enum rsc_role_e role, crm_time_t * now)
{
return pe_test_rule(rule, node_hash, role, now, NULL, NULL);
}
gboolean
pe_test_rule_re(xmlNode * rule, GHashTable * node_hash, enum rsc_role_e role, crm_time_t * now, pe_re_match_data_t * re_match_data)
{
pe_match_data_t match_data = {
.re = re_match_data,
.params = NULL,
.meta = NULL,
};
return pe_test_rule(rule, node_hash, role, now, NULL, &match_data);
}
gboolean
pe_test_rule_full(xmlNode *rule, GHashTable *node_hash, enum rsc_role_e role,
crm_time_t *now, pe_match_data_t *match_data)
{
return pe_test_rule(rule, node_hash, role, now, NULL, match_data);
}
gboolean
test_expression(xmlNode * expr, GHashTable * node_hash, enum rsc_role_e role, crm_time_t * now)
{
return pe_test_expression(expr, node_hash, role, now, NULL, NULL);
}
gboolean
pe_test_expression_re(xmlNode * expr, GHashTable * node_hash, enum rsc_role_e role, crm_time_t * now, pe_re_match_data_t * re_match_data)
{
pe_match_data_t match_data = {
.re = re_match_data,
.params = NULL,
.meta = NULL,
};
return pe_test_expression(expr, node_hash, role, now, NULL, &match_data);
}
gboolean
pe_test_expression_full(xmlNode *expr, GHashTable *node_hash,
enum rsc_role_e role, crm_time_t *now,
pe_match_data_t *match_data)
{
return pe_test_expression(expr, node_hash, role, now, NULL, match_data);
}
void
unpack_instance_attributes(xmlNode *top, xmlNode *xml_obj, const char *set_name,
GHashTable *node_hash, GHashTable *hash,
const char *always_first, gboolean overwrite,
crm_time_t *now)
{
pe_rule_eval_data_t rule_data = {
.node_hash = node_hash,
.role = pcmk_role_unknown,
.now = now,
.match_data = NULL,
.rsc_data = NULL,
.op_data = NULL
};
pe_eval_nvpairs(top, xml_obj, set_name, &rule_data, hash, always_first,
overwrite, NULL);
}
// LCOV_EXCL_STOP
// End deprecated API
diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c
index 21f7c60e53..78d3344946 100644
--- a/lib/pengine/unpack.c
+++ b/lib/pengine/unpack.c
@@ -1,5119 +1,5119 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <stdio.h>
#include <string.h>
#include <glib.h>
#include <time.h>
#include <crm/crm.h>
#include <crm/services.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <crm/common/xml_internal.h>
#include <crm/common/util.h>
#include <crm/pengine/rules.h>
#include <crm/pengine/internal.h>
#include <pe_status_private.h>
CRM_TRACE_INIT_DATA(pe_status);
// A (parsed) resource action history entry
struct action_history {
pcmk_resource_t *rsc; // Resource that history is for
pcmk_node_t *node; // Node that history is for
xmlNode *xml; // History entry XML
// Parsed from entry XML
const char *id; // XML ID of history entry
const char *key; // Operation key of action
const char *task; // Action name
const char *exit_reason; // Exit reason given for result
guint interval_ms; // Action interval
int call_id; // Call ID of action
int expected_exit_status; // Expected exit status of action
int exit_status; // Actual exit status of action
int execution_status; // Execution status of action
};
/* This uses pcmk__set_flags_as()/pcmk__clear_flags_as() directly rather than
* use pe__set_working_set_flags()/pe__clear_working_set_flags() so that the
* flag is stringified more readably in log messages.
*/
#define set_config_flag(scheduler, option, flag) do { \
const char *scf_value = pe_pref((scheduler)->config_hash, (option)); \
if (scf_value != NULL) { \
if (crm_is_true(scf_value)) { \
(scheduler)->flags = pcmk__set_flags_as(__func__, __LINE__, \
LOG_TRACE, "Scheduler", \
crm_system_name, (scheduler)->flags, \
(flag), #flag); \
} else { \
(scheduler)->flags = pcmk__clear_flags_as(__func__, __LINE__, \
LOG_TRACE, "Scheduler", \
crm_system_name, (scheduler)->flags, \
(flag), #flag); \
} \
} \
} while(0)
static void unpack_rsc_op(pcmk_resource_t *rsc, pcmk_node_t *node,
xmlNode *xml_op, xmlNode **last_failure,
enum action_fail_response *failed);
static void determine_remote_online_status(pcmk_scheduler_t *scheduler,
pcmk_node_t *this_node);
static void add_node_attrs(const xmlNode *xml_obj, pcmk_node_t *node,
bool overwrite, pcmk_scheduler_t *scheduler);
static void determine_online_status(const xmlNode *node_state,
pcmk_node_t *this_node,
pcmk_scheduler_t *scheduler);
static void unpack_node_lrm(pcmk_node_t *node, const xmlNode *xml,
pcmk_scheduler_t *scheduler);
static gboolean
is_dangling_guest_node(pcmk_node_t *node)
{
/* we are looking for a remote-node that was supposed to be mapped to a
* container resource, but all traces of that container have disappeared
* from both the config and the status section. */
if (pe__is_guest_or_remote_node(node) &&
node->details->remote_rsc &&
node->details->remote_rsc->container == NULL &&
pcmk_is_set(node->details->remote_rsc->flags,
pcmk_rsc_removed_filler)) {
return TRUE;
}
return FALSE;
}
/*!
* \brief Schedule a fence action for a node
*
* \param[in,out] scheduler Scheduler data
* \param[in,out] node Node to fence
* \param[in] reason Text description of why fencing is needed
* \param[in] priority_delay Whether to consider
* \c PCMK_OPT_PRIORITY_FENCING_DELAY
*/
void
pe_fence_node(pcmk_scheduler_t *scheduler, pcmk_node_t *node,
const char *reason, bool priority_delay)
{
CRM_CHECK(node, return);
/* A guest node is fenced by marking its container as failed */
if (pe__is_guest_node(node)) {
pcmk_resource_t *rsc = node->details->remote_rsc->container;
if (!pcmk_is_set(rsc->flags, pcmk_rsc_failed)) {
if (!pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
crm_notice("Not fencing guest node %s "
"(otherwise would because %s): "
"its guest resource %s is unmanaged",
pe__node_name(node), reason, rsc->id);
} else {
pcmk__sched_warn("Guest node %s will be fenced "
"(by recovering its guest resource %s): %s",
pe__node_name(node), rsc->id, reason);
/* We don't mark the node as unclean because that would prevent the
* node from running resources. We want to allow it to run resources
* in this transition if the recovery succeeds.
*/
node->details->remote_requires_reset = TRUE;
pe__set_resource_flags(rsc,
pcmk_rsc_failed|pcmk_rsc_stop_if_failed);
}
}
} else if (is_dangling_guest_node(node)) {
crm_info("Cleaning up dangling connection for guest node %s: "
"fencing was already done because %s, "
"and guest resource no longer exists",
pe__node_name(node), reason);
pe__set_resource_flags(node->details->remote_rsc,
pcmk_rsc_failed|pcmk_rsc_stop_if_failed);
} else if (pe__is_remote_node(node)) {
pcmk_resource_t *rsc = node->details->remote_rsc;
if ((rsc != NULL) && !pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
crm_notice("Not fencing remote node %s "
"(otherwise would because %s): connection is unmanaged",
pe__node_name(node), reason);
} else if(node->details->remote_requires_reset == FALSE) {
node->details->remote_requires_reset = TRUE;
pcmk__sched_warn("Remote node %s %s: %s",
pe__node_name(node),
pe_can_fence(scheduler, node)? "will be fenced" : "is unclean",
reason);
}
node->details->unclean = TRUE;
// No need to apply PCMK_OPT_PRIORITY_FENCING_DELAY for remote nodes
pe_fence_op(node, NULL, TRUE, reason, FALSE, scheduler);
} else if (node->details->unclean) {
crm_trace("Cluster node %s %s because %s",
pe__node_name(node),
pe_can_fence(scheduler, node)? "would also be fenced" : "also is unclean",
reason);
} else {
pcmk__sched_warn("Cluster node %s %s: %s",
pe__node_name(node),
pe_can_fence(scheduler, node)? "will be fenced" : "is unclean",
reason);
node->details->unclean = TRUE;
pe_fence_op(node, NULL, TRUE, reason, priority_delay, scheduler);
}
}
// @TODO xpaths can't handle templates, rules, or id-refs
// nvpair with provides or requires set to unfencing
#define XPATH_UNFENCING_NVPAIR XML_CIB_TAG_NVPAIR \
"[(@" XML_NVPAIR_ATTR_NAME "='" PCMK_STONITH_PROVIDES "'" \
"or @" XML_NVPAIR_ATTR_NAME "='" XML_RSC_ATTR_REQUIRES "') " \
"and @" XML_NVPAIR_ATTR_VALUE "='" PCMK__VALUE_UNFENCING "']"
// unfencing in rsc_defaults or any resource
#define XPATH_ENABLE_UNFENCING \
"/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_RESOURCES \
"//" XML_TAG_META_SETS "/" XPATH_UNFENCING_NVPAIR \
"|/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_RSCCONFIG \
"/" XML_TAG_META_SETS "/" XPATH_UNFENCING_NVPAIR
static void
set_if_xpath(uint64_t flag, const char *xpath, pcmk_scheduler_t *scheduler)
{
xmlXPathObjectPtr result = NULL;
if (!pcmk_is_set(scheduler->flags, flag)) {
result = xpath_search(scheduler->input, xpath);
if (result && (numXpathResults(result) > 0)) {
pe__set_working_set_flags(scheduler, flag);
}
freeXpathObject(result);
}
}
gboolean
unpack_config(xmlNode *config, pcmk_scheduler_t *scheduler)
{
const char *value = NULL;
guint interval_ms = 0U;
GHashTable *config_hash = pcmk__strkey_table(free, free);
pe_rule_eval_data_t rule_data = {
.node_hash = NULL,
.role = pcmk_role_unknown,
.now = scheduler->now,
.match_data = NULL,
.rsc_data = NULL,
.op_data = NULL
};
scheduler->config_hash = config_hash;
pe__unpack_dataset_nvpairs(config, XML_CIB_TAG_PROPSET, &rule_data, config_hash,
CIB_OPTIONS_FIRST, FALSE, scheduler);
verify_pe_options(scheduler->config_hash);
set_config_flag(scheduler, PCMK_OPT_ENABLE_STARTUP_PROBES,
pcmk_sched_probe_resources);
if (!pcmk_is_set(scheduler->flags, pcmk_sched_probe_resources)) {
crm_info("Startup probes: disabled (dangerous)");
}
value = pe_pref(scheduler->config_hash, PCMK_OPT_HAVE_WATCHDOG);
if (value && crm_is_true(value)) {
crm_info("Watchdog-based self-fencing will be performed via SBD if "
"fencing is required and " PCMK_OPT_STONITH_WATCHDOG_TIMEOUT
" is nonzero");
pe__set_working_set_flags(scheduler, pcmk_sched_have_fencing);
}
/* Set certain flags via xpath here, so they can be used before the relevant
* configuration sections are unpacked.
*/
set_if_xpath(pcmk_sched_enable_unfencing, XPATH_ENABLE_UNFENCING,
scheduler);
value = pe_pref(scheduler->config_hash, PCMK_OPT_STONITH_TIMEOUT);
- pcmk__parse_interval_spec(value, &interval_ms);
+ pcmk_parse_interval_spec(value, &interval_ms);
if (interval_ms >= INT_MAX) {
scheduler->stonith_timeout = INT_MAX;
} else {
scheduler->stonith_timeout = (int) interval_ms;
}
crm_debug("STONITH timeout: %d", scheduler->stonith_timeout);
set_config_flag(scheduler, PCMK_OPT_STONITH_ENABLED,
pcmk_sched_fencing_enabled);
if (pcmk_is_set(scheduler->flags, pcmk_sched_fencing_enabled)) {
crm_debug("STONITH of failed nodes is enabled");
} else {
crm_debug("STONITH of failed nodes is disabled");
}
scheduler->stonith_action = pe_pref(scheduler->config_hash,
PCMK_OPT_STONITH_ACTION);
if (!strcmp(scheduler->stonith_action, "poweroff")) {
pcmk__warn_once(pcmk__wo_poweroff,
"Support for " PCMK_OPT_STONITH_ACTION " of "
"'poweroff' is deprecated and will be removed in a "
"future release (use 'off' instead)");
scheduler->stonith_action = PCMK_ACTION_OFF;
}
crm_trace("STONITH will %s nodes", scheduler->stonith_action);
set_config_flag(scheduler, PCMK_OPT_CONCURRENT_FENCING,
pcmk_sched_concurrent_fencing);
if (pcmk_is_set(scheduler->flags, pcmk_sched_concurrent_fencing)) {
crm_debug("Concurrent fencing is enabled");
} else {
crm_debug("Concurrent fencing is disabled");
}
value = pe_pref(scheduler->config_hash,
PCMK_OPT_PRIORITY_FENCING_DELAY);
if (value) {
- pcmk__parse_interval_spec(value, &interval_ms);
+ pcmk_parse_interval_spec(value, &interval_ms);
scheduler->priority_fencing_delay = (int) (interval_ms / 1000);
crm_trace("Priority fencing delay is %ds",
scheduler->priority_fencing_delay);
}
set_config_flag(scheduler, PCMK_OPT_STOP_ALL_RESOURCES,
pcmk_sched_stop_all);
crm_debug("Stop all active resources: %s",
pcmk__btoa(pcmk_is_set(scheduler->flags, pcmk_sched_stop_all)));
set_config_flag(scheduler, PCMK_OPT_SYMMETRIC_CLUSTER,
pcmk_sched_symmetric_cluster);
if (pcmk_is_set(scheduler->flags, pcmk_sched_symmetric_cluster)) {
crm_debug("Cluster is symmetric" " - resources can run anywhere by default");
}
value = pe_pref(scheduler->config_hash, PCMK_OPT_NO_QUORUM_POLICY);
if (pcmk__str_eq(value, "ignore", pcmk__str_casei)) {
scheduler->no_quorum_policy = pcmk_no_quorum_ignore;
} else if (pcmk__str_eq(value, "freeze", pcmk__str_casei)) {
scheduler->no_quorum_policy = pcmk_no_quorum_freeze;
} else if (pcmk__str_eq(value, "demote", pcmk__str_casei)) {
scheduler->no_quorum_policy = pcmk_no_quorum_demote;
} else if (pcmk__str_eq(value, "suicide", pcmk__str_casei)) {
if (pcmk_is_set(scheduler->flags, pcmk_sched_fencing_enabled)) {
int do_panic = 0;
crm_element_value_int(scheduler->input, XML_ATTR_QUORUM_PANIC,
&do_panic);
if (do_panic || pcmk_is_set(scheduler->flags, pcmk_sched_quorate)) {
scheduler->no_quorum_policy = pcmk_no_quorum_fence;
} else {
crm_notice("Resetting " PCMK_OPT_NO_QUORUM_POLICY
" to 'stop': cluster has never had quorum");
scheduler->no_quorum_policy = pcmk_no_quorum_stop;
}
} else {
pcmk__config_err("Resetting " PCMK_OPT_NO_QUORUM_POLICY
" to 'stop' because fencing is disabled");
scheduler->no_quorum_policy = pcmk_no_quorum_stop;
}
} else {
scheduler->no_quorum_policy = pcmk_no_quorum_stop;
}
switch (scheduler->no_quorum_policy) {
case pcmk_no_quorum_freeze:
crm_debug("On loss of quorum: Freeze resources");
break;
case pcmk_no_quorum_stop:
crm_debug("On loss of quorum: Stop ALL resources");
break;
case pcmk_no_quorum_demote:
crm_debug("On loss of quorum: "
"Demote promotable resources and stop other resources");
break;
case pcmk_no_quorum_fence:
crm_notice("On loss of quorum: Fence all remaining nodes");
break;
case pcmk_no_quorum_ignore:
crm_notice("On loss of quorum: Ignore");
break;
}
set_config_flag(scheduler, PCMK_OPT_STOP_ORPHAN_RESOURCES,
pcmk_sched_stop_removed_resources);
if (pcmk_is_set(scheduler->flags, pcmk_sched_stop_removed_resources)) {
crm_trace("Orphan resources are stopped");
} else {
crm_trace("Orphan resources are ignored");
}
set_config_flag(scheduler, PCMK_OPT_STOP_ORPHAN_ACTIONS,
pcmk_sched_cancel_removed_actions);
if (pcmk_is_set(scheduler->flags, pcmk_sched_cancel_removed_actions)) {
crm_trace("Orphan resource actions are stopped");
} else {
crm_trace("Orphan resource actions are ignored");
}
value = pe_pref(scheduler->config_hash, PCMK__OPT_REMOVE_AFTER_STOP);
if (value != NULL) {
if (crm_is_true(value)) {
pe__set_working_set_flags(scheduler, pcmk_sched_remove_after_stop);
#ifndef PCMK__COMPAT_2_0
pcmk__warn_once(pcmk__wo_remove_after,
"Support for the " PCMK__OPT_REMOVE_AFTER_STOP
" cluster property is deprecated and will be "
"removed in a future release");
#endif
} else {
pe__clear_working_set_flags(scheduler,
pcmk_sched_remove_after_stop);
}
}
set_config_flag(scheduler, PCMK_OPT_MAINTENANCE_MODE,
pcmk_sched_in_maintenance);
crm_trace("Maintenance mode: %s",
pcmk__btoa(pcmk_is_set(scheduler->flags,
pcmk_sched_in_maintenance)));
set_config_flag(scheduler, PCMK_OPT_START_FAILURE_IS_FATAL,
pcmk_sched_start_failure_fatal);
if (pcmk_is_set(scheduler->flags, pcmk_sched_start_failure_fatal)) {
crm_trace("Start failures are always fatal");
} else {
crm_trace("Start failures are handled by failcount");
}
if (pcmk_is_set(scheduler->flags, pcmk_sched_fencing_enabled)) {
set_config_flag(scheduler, PCMK_OPT_STARTUP_FENCING,
pcmk_sched_startup_fencing);
}
if (pcmk_is_set(scheduler->flags, pcmk_sched_startup_fencing)) {
crm_trace("Unseen nodes will be fenced");
} else {
pcmk__warn_once(pcmk__wo_blind,
"Blind faith: not fencing unseen nodes");
}
pe__unpack_node_health_scores(scheduler);
scheduler->placement_strategy = pe_pref(scheduler->config_hash,
PCMK_OPT_PLACEMENT_STRATEGY);
crm_trace("Placement strategy: %s", scheduler->placement_strategy);
set_config_flag(scheduler, PCMK_OPT_SHUTDOWN_LOCK,
pcmk_sched_shutdown_lock);
if (pcmk_is_set(scheduler->flags, pcmk_sched_shutdown_lock)) {
value = pe_pref(scheduler->config_hash, PCMK_OPT_SHUTDOWN_LOCK_LIMIT);
- pcmk__parse_interval_spec(value, &(scheduler->shutdown_lock));
+ pcmk_parse_interval_spec(value, &(scheduler->shutdown_lock));
scheduler->shutdown_lock /= 1000;
crm_trace("Resources will be locked to nodes that were cleanly "
"shut down (locks expire after %s)",
pcmk__readable_interval(scheduler->shutdown_lock));
} else {
crm_trace("Resources will not be locked to nodes that were cleanly "
"shut down");
}
value = pe_pref(scheduler->config_hash, PCMK_OPT_NODE_PENDING_TIMEOUT);
- pcmk__parse_interval_spec(value, &(scheduler->node_pending_timeout));
+ pcmk_parse_interval_spec(value, &(scheduler->node_pending_timeout));
scheduler->node_pending_timeout /= 1000;
if (scheduler->node_pending_timeout == 0) {
crm_trace("Do not fence pending nodes");
} else {
crm_trace("Fence pending nodes after %s",
pcmk__readable_interval(scheduler->node_pending_timeout
* 1000));
}
return TRUE;
}
pcmk_node_t *
pe_create_node(const char *id, const char *uname, const char *type,
const char *score, pcmk_scheduler_t *scheduler)
{
pcmk_node_t *new_node = NULL;
if (pe_find_node(scheduler->nodes, uname) != NULL) {
pcmk__config_warn("More than one node entry has name '%s'", uname);
}
new_node = calloc(1, sizeof(pcmk_node_t));
if (new_node == NULL) {
pcmk__sched_err("Could not allocate memory for node %s", uname);
return NULL;
}
new_node->weight = char2score(score);
new_node->details = calloc(1, sizeof(struct pe_node_shared_s));
if (new_node->details == NULL) {
free(new_node);
pcmk__sched_err("Could not allocate memory for node %s", uname);
return NULL;
}
crm_trace("Creating node for entry %s/%s", uname, id);
new_node->details->id = id;
new_node->details->uname = uname;
new_node->details->online = FALSE;
new_node->details->shutdown = FALSE;
new_node->details->rsc_discovery_enabled = TRUE;
new_node->details->running_rsc = NULL;
new_node->details->data_set = scheduler;
if (pcmk__str_eq(type, "member", pcmk__str_null_matches | pcmk__str_casei)) {
new_node->details->type = pcmk_node_variant_cluster;
} else if (pcmk__str_eq(type, "remote", pcmk__str_casei)) {
new_node->details->type = pcmk_node_variant_remote;
pe__set_working_set_flags(scheduler, pcmk_sched_have_remote_nodes);
} else {
/* @COMPAT 'ping' is the default for backward compatibility, but it
* should be changed to 'member' at a compatibility break
*/
if (!pcmk__str_eq(type, "ping", pcmk__str_casei)) {
pcmk__config_warn("Node %s has unrecognized type '%s', "
"assuming 'ping'", pcmk__s(uname, "without name"),
type);
}
pcmk__warn_once(pcmk__wo_ping_node,
"Support for nodes of type 'ping' (such as %s) is "
"deprecated and will be removed in a future release",
pcmk__s(uname, "unnamed node"));
new_node->details->type = node_ping;
}
new_node->details->attrs = pcmk__strkey_table(free, free);
if (pe__is_guest_or_remote_node(new_node)) {
g_hash_table_insert(new_node->details->attrs, strdup(CRM_ATTR_KIND),
strdup("remote"));
} else {
g_hash_table_insert(new_node->details->attrs, strdup(CRM_ATTR_KIND),
strdup("cluster"));
}
new_node->details->utilization = pcmk__strkey_table(free, free);
new_node->details->digest_cache = pcmk__strkey_table(free,
pe__free_digests);
scheduler->nodes = g_list_insert_sorted(scheduler->nodes, new_node,
pe__cmp_node_name);
return new_node;
}
static const char *
expand_remote_rsc_meta(xmlNode *xml_obj, xmlNode *parent, pcmk_scheduler_t *data)
{
xmlNode *attr_set = NULL;
xmlNode *attr = NULL;
const char *container_id = ID(xml_obj);
const char *remote_name = NULL;
const char *remote_server = NULL;
const char *remote_port = NULL;
const char *connect_timeout = "60s";
const char *remote_allow_migrate=NULL;
const char *is_managed = NULL;
for (attr_set = pcmk__xe_first_child(xml_obj); attr_set != NULL;
attr_set = pcmk__xe_next(attr_set)) {
if (!pcmk__str_eq((const char *)attr_set->name, XML_TAG_META_SETS,
pcmk__str_casei)) {
continue;
}
for (attr = pcmk__xe_first_child(attr_set); attr != NULL;
attr = pcmk__xe_next(attr)) {
const char *value = crm_element_value(attr, XML_NVPAIR_ATTR_VALUE);
const char *name = crm_element_value(attr, XML_NVPAIR_ATTR_NAME);
if (pcmk__str_eq(name, XML_RSC_ATTR_REMOTE_NODE, pcmk__str_casei)) {
remote_name = value;
} else if (pcmk__str_eq(name, "remote-addr", pcmk__str_casei)) {
remote_server = value;
} else if (pcmk__str_eq(name, "remote-port", pcmk__str_casei)) {
remote_port = value;
} else if (pcmk__str_eq(name, "remote-connect-timeout", pcmk__str_casei)) {
connect_timeout = value;
} else if (pcmk__str_eq(name, "remote-allow-migrate", pcmk__str_casei)) {
remote_allow_migrate=value;
} else if (pcmk__str_eq(name, XML_RSC_ATTR_MANAGED, pcmk__str_casei)) {
is_managed = value;
}
}
}
if (remote_name == NULL) {
return NULL;
}
if (pe_find_resource(data->resources, remote_name) != NULL) {
return NULL;
}
pe_create_remote_xml(parent, remote_name, container_id,
remote_allow_migrate, is_managed,
connect_timeout, remote_server, remote_port);
return remote_name;
}
static void
handle_startup_fencing(pcmk_scheduler_t *scheduler, pcmk_node_t *new_node)
{
if ((new_node->details->type == pcmk_node_variant_remote)
&& (new_node->details->remote_rsc == NULL)) {
/* Ignore fencing for remote nodes that don't have a connection resource
* associated with them. This happens when remote node entries get left
* in the nodes section after the connection resource is removed.
*/
return;
}
if (pcmk_is_set(scheduler->flags, pcmk_sched_startup_fencing)) {
// All nodes are unclean until we've seen their status entry
new_node->details->unclean = TRUE;
} else {
// Blind faith ...
new_node->details->unclean = FALSE;
}
/* We need to be able to determine if a node's status section
* exists or not separate from whether the node is unclean. */
new_node->details->unseen = TRUE;
}
gboolean
unpack_nodes(xmlNode *xml_nodes, pcmk_scheduler_t *scheduler)
{
xmlNode *xml_obj = NULL;
pcmk_node_t *new_node = NULL;
const char *id = NULL;
const char *uname = NULL;
const char *type = NULL;
const char *score = NULL;
for (xml_obj = pcmk__xe_first_child(xml_nodes); xml_obj != NULL;
xml_obj = pcmk__xe_next(xml_obj)) {
if (pcmk__str_eq((const char *)xml_obj->name, XML_CIB_TAG_NODE, pcmk__str_none)) {
new_node = NULL;
id = crm_element_value(xml_obj, XML_ATTR_ID);
uname = crm_element_value(xml_obj, XML_ATTR_UNAME);
type = crm_element_value(xml_obj, XML_ATTR_TYPE);
score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE);
crm_trace("Processing node %s/%s", uname, id);
if (id == NULL) {
pcmk__config_err("Ignoring <" XML_CIB_TAG_NODE
"> entry in configuration without id");
continue;
}
new_node = pe_create_node(id, uname, type, score, scheduler);
if (new_node == NULL) {
return FALSE;
}
handle_startup_fencing(scheduler, new_node);
add_node_attrs(xml_obj, new_node, FALSE, scheduler);
crm_trace("Done with node %s", crm_element_value(xml_obj, XML_ATTR_UNAME));
}
}
if (scheduler->localhost
&& (pe_find_node(scheduler->nodes, scheduler->localhost) == NULL)) {
crm_info("Creating a fake local node");
pe_create_node(scheduler->localhost, scheduler->localhost, NULL, 0,
scheduler);
}
return TRUE;
}
static void
setup_container(pcmk_resource_t *rsc, pcmk_scheduler_t *scheduler)
{
const char *container_id = NULL;
if (rsc->children) {
g_list_foreach(rsc->children, (GFunc) setup_container, scheduler);
return;
}
container_id = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_CONTAINER);
if (container_id && !pcmk__str_eq(container_id, rsc->id, pcmk__str_casei)) {
pcmk_resource_t *container = pe_find_resource(scheduler->resources,
container_id);
if (container) {
rsc->container = container;
pe__set_resource_flags(container, pcmk_rsc_has_filler);
container->fillers = g_list_append(container->fillers, rsc);
pcmk__rsc_trace(rsc, "Resource %s's container is %s",
rsc->id, container_id);
} else {
pcmk__config_err("Resource %s: Unknown resource container (%s)",
rsc->id, container_id);
}
}
}
gboolean
unpack_remote_nodes(xmlNode *xml_resources, pcmk_scheduler_t *scheduler)
{
xmlNode *xml_obj = NULL;
/* Create remote nodes and guest nodes from the resource configuration
* before unpacking resources.
*/
for (xml_obj = pcmk__xe_first_child(xml_resources); xml_obj != NULL;
xml_obj = pcmk__xe_next(xml_obj)) {
const char *new_node_id = NULL;
/* Check for remote nodes, which are defined by ocf:pacemaker:remote
* primitives.
*/
if (xml_contains_remote_node(xml_obj)) {
new_node_id = ID(xml_obj);
/* The "pe_find_node" check is here to make sure we don't iterate over
* an expanded node that has already been added to the node list. */
if (new_node_id
&& (pe_find_node(scheduler->nodes, new_node_id) == NULL)) {
crm_trace("Found remote node %s defined by resource %s",
new_node_id, ID(xml_obj));
pe_create_node(new_node_id, new_node_id, "remote", NULL,
scheduler);
}
continue;
}
/* Check for guest nodes, which are defined by special meta-attributes
* of a primitive of any type (for example, VirtualDomain or Xen).
*/
if (pcmk__str_eq((const char *)xml_obj->name, XML_CIB_TAG_RESOURCE, pcmk__str_none)) {
/* This will add an ocf:pacemaker:remote primitive to the
* configuration for the guest node's connection, to be unpacked
* later.
*/
new_node_id = expand_remote_rsc_meta(xml_obj, xml_resources,
scheduler);
if (new_node_id
&& (pe_find_node(scheduler->nodes, new_node_id) == NULL)) {
crm_trace("Found guest node %s in resource %s",
new_node_id, ID(xml_obj));
pe_create_node(new_node_id, new_node_id, "remote", NULL,
scheduler);
}
continue;
}
/* Check for guest nodes inside a group. Clones are currently not
* supported as guest nodes.
*/
if (pcmk__str_eq((const char *)xml_obj->name, XML_CIB_TAG_GROUP, pcmk__str_none)) {
xmlNode *xml_obj2 = NULL;
for (xml_obj2 = pcmk__xe_first_child(xml_obj); xml_obj2 != NULL;
xml_obj2 = pcmk__xe_next(xml_obj2)) {
new_node_id = expand_remote_rsc_meta(xml_obj2, xml_resources,
scheduler);
if (new_node_id
&& (pe_find_node(scheduler->nodes, new_node_id) == NULL)) {
crm_trace("Found guest node %s in resource %s inside group %s",
new_node_id, ID(xml_obj2), ID(xml_obj));
pe_create_node(new_node_id, new_node_id, "remote", NULL,
scheduler);
}
}
}
}
return TRUE;
}
/* Call this after all the nodes and resources have been
* unpacked, but before the status section is read.
*
* A remote node's online status is reflected by the state
* of the remote node's connection resource. We need to link
* the remote node to this connection resource so we can have
* easy access to the connection resource during the scheduler calculations.
*/
static void
link_rsc2remotenode(pcmk_scheduler_t *scheduler, pcmk_resource_t *new_rsc)
{
pcmk_node_t *remote_node = NULL;
if (new_rsc->is_remote_node == FALSE) {
return;
}
if (pcmk_is_set(scheduler->flags, pcmk_sched_location_only)) {
/* remote_nodes and remote_resources are not linked in quick location calculations */
return;
}
remote_node = pe_find_node(scheduler->nodes, new_rsc->id);
CRM_CHECK(remote_node != NULL, return);
pcmk__rsc_trace(new_rsc, "Linking remote connection resource %s to %s",
new_rsc->id, pe__node_name(remote_node));
remote_node->details->remote_rsc = new_rsc;
if (new_rsc->container == NULL) {
/* Handle start-up fencing for remote nodes (as opposed to guest nodes)
* the same as is done for cluster nodes.
*/
handle_startup_fencing(scheduler, remote_node);
} else {
/* pe_create_node() marks the new node as "remote" or "cluster"; now
* that we know the node is a guest node, update it correctly.
*/
g_hash_table_replace(remote_node->details->attrs, strdup(CRM_ATTR_KIND),
strdup("container"));
}
}
static void
destroy_tag(gpointer data)
{
pcmk_tag_t *tag = data;
if (tag) {
free(tag->id);
g_list_free_full(tag->refs, free);
free(tag);
}
}
/*!
* \internal
* \brief Parse configuration XML for resource information
*
* \param[in] xml_resources Top of resource configuration XML
* \param[in,out] scheduler Scheduler data
*
* \return TRUE
*
* \note unpack_remote_nodes() MUST be called before this, so that the nodes can
* be used when pe__unpack_resource() calls resource_location()
*/
gboolean
unpack_resources(const xmlNode *xml_resources, pcmk_scheduler_t *scheduler)
{
xmlNode *xml_obj = NULL;
GList *gIter = NULL;
scheduler->template_rsc_sets = pcmk__strkey_table(free, destroy_tag);
for (xml_obj = pcmk__xe_first_child(xml_resources); xml_obj != NULL;
xml_obj = pcmk__xe_next(xml_obj)) {
pcmk_resource_t *new_rsc = NULL;
const char *id = ID(xml_obj);
if (pcmk__str_empty(id)) {
pcmk__config_err("Ignoring <%s> resource without ID",
xml_obj->name);
continue;
}
if (pcmk__str_eq((const char *) xml_obj->name, XML_CIB_TAG_RSC_TEMPLATE,
pcmk__str_none)) {
if (g_hash_table_lookup_extended(scheduler->template_rsc_sets, id,
NULL, NULL) == FALSE) {
/* Record the template's ID for the knowledge of its existence anyway. */
g_hash_table_insert(scheduler->template_rsc_sets, strdup(id),
NULL);
}
continue;
}
crm_trace("Unpacking <%s " XML_ATTR_ID "='%s'>",
xml_obj->name, id);
if (pe__unpack_resource(xml_obj, &new_rsc, NULL,
scheduler) == pcmk_rc_ok) {
scheduler->resources = g_list_append(scheduler->resources, new_rsc);
pcmk__rsc_trace(new_rsc, "Added resource %s", new_rsc->id);
} else {
pcmk__config_err("Ignoring <%s> resource '%s' "
"because configuration is invalid",
xml_obj->name, id);
}
}
for (gIter = scheduler->resources; gIter != NULL; gIter = gIter->next) {
pcmk_resource_t *rsc = (pcmk_resource_t *) gIter->data;
setup_container(rsc, scheduler);
link_rsc2remotenode(scheduler, rsc);
}
scheduler->resources = g_list_sort(scheduler->resources,
pe__cmp_rsc_priority);
if (pcmk_is_set(scheduler->flags, pcmk_sched_location_only)) {
/* Ignore */
} else if (pcmk_is_set(scheduler->flags, pcmk_sched_fencing_enabled)
&& !pcmk_is_set(scheduler->flags, pcmk_sched_have_fencing)) {
pcmk__config_err("Resource start-up disabled since no STONITH resources have been defined");
pcmk__config_err("Either configure some or disable STONITH with the "
PCMK_OPT_STONITH_ENABLED " option");
pcmk__config_err("NOTE: Clusters with shared data need STONITH to ensure data integrity");
}
return TRUE;
}
gboolean
unpack_tags(xmlNode *xml_tags, pcmk_scheduler_t *scheduler)
{
xmlNode *xml_tag = NULL;
scheduler->tags = pcmk__strkey_table(free, destroy_tag);
for (xml_tag = pcmk__xe_first_child(xml_tags); xml_tag != NULL;
xml_tag = pcmk__xe_next(xml_tag)) {
xmlNode *xml_obj_ref = NULL;
const char *tag_id = ID(xml_tag);
if (!pcmk__str_eq((const char *)xml_tag->name, XML_CIB_TAG_TAG, pcmk__str_none)) {
continue;
}
if (tag_id == NULL) {
pcmk__config_err("Ignoring <%s> without " XML_ATTR_ID,
(const char *) xml_tag->name);
continue;
}
for (xml_obj_ref = pcmk__xe_first_child(xml_tag); xml_obj_ref != NULL;
xml_obj_ref = pcmk__xe_next(xml_obj_ref)) {
const char *obj_ref = ID(xml_obj_ref);
if (!pcmk__str_eq((const char *)xml_obj_ref->name, XML_CIB_TAG_OBJ_REF, pcmk__str_none)) {
continue;
}
if (obj_ref == NULL) {
pcmk__config_err("Ignoring <%s> for tag '%s' without " XML_ATTR_ID,
xml_obj_ref->name, tag_id);
continue;
}
if (add_tag_ref(scheduler->tags, tag_id, obj_ref) == FALSE) {
return FALSE;
}
}
}
return TRUE;
}
/* The ticket state section:
* "/cib/status/tickets/ticket_state" */
static gboolean
unpack_ticket_state(xmlNode *xml_ticket, pcmk_scheduler_t *scheduler)
{
const char *ticket_id = NULL;
const char *granted = NULL;
const char *last_granted = NULL;
const char *standby = NULL;
xmlAttrPtr xIter = NULL;
pcmk_ticket_t *ticket = NULL;
ticket_id = ID(xml_ticket);
if (pcmk__str_empty(ticket_id)) {
return FALSE;
}
crm_trace("Processing ticket state for %s", ticket_id);
ticket = g_hash_table_lookup(scheduler->tickets, ticket_id);
if (ticket == NULL) {
ticket = ticket_new(ticket_id, scheduler);
if (ticket == NULL) {
return FALSE;
}
}
for (xIter = xml_ticket->properties; xIter; xIter = xIter->next) {
const char *prop_name = (const char *)xIter->name;
const char *prop_value = pcmk__xml_attr_value(xIter);
if (pcmk__str_eq(prop_name, XML_ATTR_ID, pcmk__str_none)) {
continue;
}
g_hash_table_replace(ticket->state, strdup(prop_name), strdup(prop_value));
}
granted = g_hash_table_lookup(ticket->state, "granted");
if (granted && crm_is_true(granted)) {
ticket->granted = TRUE;
crm_info("We have ticket '%s'", ticket->id);
} else {
ticket->granted = FALSE;
crm_info("We do not have ticket '%s'", ticket->id);
}
last_granted = g_hash_table_lookup(ticket->state, "last-granted");
if (last_granted) {
long long last_granted_ll;
pcmk__scan_ll(last_granted, &last_granted_ll, 0LL);
ticket->last_granted = (time_t) last_granted_ll;
}
standby = g_hash_table_lookup(ticket->state, "standby");
if (standby && crm_is_true(standby)) {
ticket->standby = TRUE;
if (ticket->granted) {
crm_info("Granted ticket '%s' is in standby-mode", ticket->id);
}
} else {
ticket->standby = FALSE;
}
crm_trace("Done with ticket state for %s", ticket_id);
return TRUE;
}
static gboolean
unpack_tickets_state(xmlNode *xml_tickets, pcmk_scheduler_t *scheduler)
{
xmlNode *xml_obj = NULL;
for (xml_obj = pcmk__xe_first_child(xml_tickets); xml_obj != NULL;
xml_obj = pcmk__xe_next(xml_obj)) {
if (!pcmk__str_eq((const char *)xml_obj->name, XML_CIB_TAG_TICKET_STATE, pcmk__str_none)) {
continue;
}
unpack_ticket_state(xml_obj, scheduler);
}
return TRUE;
}
static void
unpack_handle_remote_attrs(pcmk_node_t *this_node, const xmlNode *state,
pcmk_scheduler_t *scheduler)
{
const char *resource_discovery_enabled = NULL;
const xmlNode *attrs = NULL;
pcmk_resource_t *rsc = NULL;
if (!pcmk__str_eq((const char *)state->name, XML_CIB_TAG_STATE, pcmk__str_none)) {
return;
}
if ((this_node == NULL) || !pe__is_guest_or_remote_node(this_node)) {
return;
}
crm_trace("Processing Pacemaker Remote node %s", pe__node_name(this_node));
pcmk__scan_min_int(crm_element_value(state, XML_NODE_IS_MAINTENANCE),
&(this_node->details->remote_maintenance), 0);
rsc = this_node->details->remote_rsc;
if (this_node->details->remote_requires_reset == FALSE) {
this_node->details->unclean = FALSE;
this_node->details->unseen = FALSE;
}
attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS, FALSE);
add_node_attrs(attrs, this_node, TRUE, scheduler);
if (pe__shutdown_requested(this_node)) {
crm_info("%s is shutting down", pe__node_name(this_node));
this_node->details->shutdown = TRUE;
}
if (crm_is_true(pe_node_attribute_raw(this_node, "standby"))) {
crm_info("%s is in standby mode", pe__node_name(this_node));
this_node->details->standby = TRUE;
}
if (crm_is_true(pe_node_attribute_raw(this_node, "maintenance")) ||
((rsc != NULL) && !pcmk_is_set(rsc->flags, pcmk_rsc_managed))) {
crm_info("%s is in maintenance mode", pe__node_name(this_node));
this_node->details->maintenance = TRUE;
}
resource_discovery_enabled = pe_node_attribute_raw(this_node, XML_NODE_ATTR_RSC_DISCOVERY);
if (resource_discovery_enabled && !crm_is_true(resource_discovery_enabled)) {
if (pe__is_remote_node(this_node)
&& !pcmk_is_set(scheduler->flags, pcmk_sched_fencing_enabled)) {
pcmk__config_warn("Ignoring " XML_NODE_ATTR_RSC_DISCOVERY
" attribute on Pacemaker Remote node %s"
" because fencing is disabled",
pe__node_name(this_node));
} else {
/* This is either a remote node with fencing enabled, or a guest
* node. We don't care whether fencing is enabled when fencing guest
* nodes, because they are "fenced" by recovering their containing
* resource.
*/
crm_info("%s has resource discovery disabled",
pe__node_name(this_node));
this_node->details->rsc_discovery_enabled = FALSE;
}
}
}
/*!
* \internal
* \brief Unpack a cluster node's transient attributes
*
* \param[in] state CIB node state XML
* \param[in,out] node Cluster node whose attributes are being unpacked
* \param[in,out] scheduler Scheduler data
*/
static void
unpack_transient_attributes(const xmlNode *state, pcmk_node_t *node,
pcmk_scheduler_t *scheduler)
{
const char *discovery = NULL;
const xmlNode *attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS,
FALSE);
add_node_attrs(attrs, node, TRUE, scheduler);
if (crm_is_true(pe_node_attribute_raw(node, "standby"))) {
crm_info("%s is in standby mode", pe__node_name(node));
node->details->standby = TRUE;
}
if (crm_is_true(pe_node_attribute_raw(node, "maintenance"))) {
crm_info("%s is in maintenance mode", pe__node_name(node));
node->details->maintenance = TRUE;
}
discovery = pe_node_attribute_raw(node, XML_NODE_ATTR_RSC_DISCOVERY);
if ((discovery != NULL) && !crm_is_true(discovery)) {
pcmk__config_warn("Ignoring " XML_NODE_ATTR_RSC_DISCOVERY " attribute "
"for %s because disabling resource discovery is "
"not allowed for cluster nodes", pe__node_name(node));
}
}
/*!
* \internal
* \brief Unpack a node state entry (first pass)
*
* Unpack one node state entry from status. This unpacks information from the
* node_state element itself and node attributes inside it, but not the
* resource history inside it. Multiple passes through the status are needed to
* fully unpack everything.
*
* \param[in] state CIB node state XML
* \param[in,out] scheduler Scheduler data
*/
static void
unpack_node_state(const xmlNode *state, pcmk_scheduler_t *scheduler)
{
const char *id = NULL;
const char *uname = NULL;
pcmk_node_t *this_node = NULL;
id = crm_element_value(state, XML_ATTR_ID);
if (id == NULL) {
pcmk__config_err("Ignoring invalid " XML_CIB_TAG_STATE " entry without "
XML_ATTR_ID);
crm_log_xml_info(state, "missing-id");
return;
}
uname = crm_element_value(state, XML_ATTR_UNAME);
if (uname == NULL) {
/* If a joining peer makes the cluster acquire the quorum from corosync
* meanwhile it has not joined CPG membership of pacemaker-controld yet,
* it's possible that the created node_state entry doesn't have an uname
* yet. We should recognize the node as `pending` and wait for it to
* join CPG.
*/
crm_trace("Handling " XML_CIB_TAG_STATE " entry with id=\"%s\" without "
XML_ATTR_UNAME, id);
}
this_node = pe_find_node_any(scheduler->nodes, id, uname);
if (this_node == NULL) {
pcmk__config_warn("Ignoring recorded node state for id=\"%s\" (%s) "
"because it is no longer in the configuration",
id, pcmk__s(uname, "uname unknown"));
return;
}
if (pe__is_guest_or_remote_node(this_node)) {
/* We can't determine the online status of Pacemaker Remote nodes until
* after all resource history has been unpacked. In this first pass, we
* do need to mark whether the node has been fenced, as this plays a
* role during unpacking cluster node resource state.
*/
pcmk__scan_min_int(crm_element_value(state, XML_NODE_IS_FENCED),
&(this_node->details->remote_was_fenced), 0);
return;
}
unpack_transient_attributes(state, this_node, scheduler);
/* Provisionally mark this cluster node as clean. We have at least seen it
* in the current cluster's lifetime.
*/
this_node->details->unclean = FALSE;
this_node->details->unseen = FALSE;
crm_trace("Determining online status of cluster node %s (id %s)",
pe__node_name(this_node), id);
determine_online_status(state, this_node, scheduler);
if (!pcmk_is_set(scheduler->flags, pcmk_sched_quorate)
&& this_node->details->online
&& (scheduler->no_quorum_policy == pcmk_no_quorum_fence)) {
/* Everything else should flow from this automatically
* (at least until the scheduler becomes able to migrate off
* healthy resources)
*/
pe_fence_node(scheduler, this_node, "cluster does not have quorum",
FALSE);
}
}
/*!
* \internal
* \brief Unpack nodes' resource history as much as possible
*
* Unpack as many nodes' resource history as possible in one pass through the
* status. We need to process Pacemaker Remote nodes' connections/containers
* before unpacking their history; the connection/container history will be
* in another node's history, so it might take multiple passes to unpack
* everything.
*
* \param[in] status CIB XML status section
* \param[in] fence If true, treat any not-yet-unpacked nodes as unseen
* \param[in,out] scheduler Scheduler data
*
* \return Standard Pacemaker return code (specifically pcmk_rc_ok if done,
* or EAGAIN if more unpacking remains to be done)
*/
static int
unpack_node_history(const xmlNode *status, bool fence,
pcmk_scheduler_t *scheduler)
{
int rc = pcmk_rc_ok;
// Loop through all node_state entries in CIB status
for (const xmlNode *state = first_named_child(status, XML_CIB_TAG_STATE);
state != NULL; state = crm_next_same_xml(state)) {
const char *id = ID(state);
const char *uname = crm_element_value(state, XML_ATTR_UNAME);
pcmk_node_t *this_node = NULL;
if ((id == NULL) || (uname == NULL)) {
// Warning already logged in first pass through status section
crm_trace("Not unpacking resource history from malformed "
XML_CIB_TAG_STATE " without id and/or uname");
continue;
}
this_node = pe_find_node_any(scheduler->nodes, id, uname);
if (this_node == NULL) {
// Warning already logged in first pass through status section
crm_trace("Not unpacking resource history for node %s because "
"no longer in configuration", id);
continue;
}
if (this_node->details->unpacked) {
crm_trace("Not unpacking resource history for node %s because "
"already unpacked", id);
continue;
}
if (fence) {
// We're processing all remaining nodes
} else if (pe__is_guest_node(this_node)) {
/* We can unpack a guest node's history only after we've unpacked
* other resource history to the point that we know that the node's
* connection and containing resource are both up.
*/
pcmk_resource_t *rsc = this_node->details->remote_rsc;
if ((rsc == NULL) || (rsc->role != pcmk_role_started)
|| (rsc->container->role != pcmk_role_started)) {
crm_trace("Not unpacking resource history for guest node %s "
"because container and connection are not known to "
"be up", id);
continue;
}
} else if (pe__is_remote_node(this_node)) {
/* We can unpack a remote node's history only after we've unpacked
* other resource history to the point that we know that the node's
* connection is up, with the exception of when shutdown locks are
* in use.
*/
pcmk_resource_t *rsc = this_node->details->remote_rsc;
if ((rsc == NULL)
|| (!pcmk_is_set(scheduler->flags, pcmk_sched_shutdown_lock)
&& (rsc->role != pcmk_role_started))) {
crm_trace("Not unpacking resource history for remote node %s "
"because connection is not known to be up", id);
continue;
}
/* If fencing and shutdown locks are disabled and we're not processing
* unseen nodes, then we don't want to unpack offline nodes until online
* nodes have been unpacked. This allows us to number active clone
* instances first.
*/
} else if (!pcmk_any_flags_set(scheduler->flags,
pcmk_sched_fencing_enabled
|pcmk_sched_shutdown_lock)
&& !this_node->details->online) {
crm_trace("Not unpacking resource history for offline "
"cluster node %s", id);
continue;
}
if (pe__is_guest_or_remote_node(this_node)) {
determine_remote_online_status(scheduler, this_node);
unpack_handle_remote_attrs(this_node, state, scheduler);
}
crm_trace("Unpacking resource history for %snode %s",
(fence? "unseen " : ""), id);
this_node->details->unpacked = TRUE;
unpack_node_lrm(this_node, state, scheduler);
rc = EAGAIN; // Other node histories might depend on this one
}
return rc;
}
/* remove nodes that are down, stopping */
/* create positive rsc_to_node constraints between resources and the nodes they are running on */
/* anything else? */
gboolean
unpack_status(xmlNode *status, pcmk_scheduler_t *scheduler)
{
xmlNode *state = NULL;
crm_trace("Beginning unpack");
if (scheduler->tickets == NULL) {
scheduler->tickets = pcmk__strkey_table(free, destroy_ticket);
}
for (state = pcmk__xe_first_child(status); state != NULL;
state = pcmk__xe_next(state)) {
if (pcmk__str_eq((const char *)state->name, XML_CIB_TAG_TICKETS, pcmk__str_none)) {
unpack_tickets_state((xmlNode *) state, scheduler);
} else if (pcmk__str_eq((const char *)state->name, XML_CIB_TAG_STATE, pcmk__str_none)) {
unpack_node_state(state, scheduler);
}
}
while (unpack_node_history(status, FALSE, scheduler) == EAGAIN) {
crm_trace("Another pass through node resource histories is needed");
}
// Now catch any nodes we didn't see
unpack_node_history(status,
pcmk_is_set(scheduler->flags,
pcmk_sched_fencing_enabled),
scheduler);
/* Now that we know where resources are, we can schedule stops of containers
* with failed bundle connections
*/
if (scheduler->stop_needed != NULL) {
for (GList *item = scheduler->stop_needed; item; item = item->next) {
pcmk_resource_t *container = item->data;
pcmk_node_t *node = pe__current_node(container);
if (node) {
stop_action(container, node, FALSE);
}
}
g_list_free(scheduler->stop_needed);
scheduler->stop_needed = NULL;
}
/* Now that we know status of all Pacemaker Remote connections and nodes,
* we can stop connections for node shutdowns, and check the online status
* of remote/guest nodes that didn't have any node history to unpack.
*/
for (GList *gIter = scheduler->nodes; gIter != NULL; gIter = gIter->next) {
pcmk_node_t *this_node = gIter->data;
if (!pe__is_guest_or_remote_node(this_node)) {
continue;
}
if (this_node->details->shutdown
&& (this_node->details->remote_rsc != NULL)) {
pe__set_next_role(this_node->details->remote_rsc, pcmk_role_stopped,
"remote shutdown");
}
if (!this_node->details->unpacked) {
determine_remote_online_status(scheduler, this_node);
}
}
return TRUE;
}
/*!
* \internal
* \brief Unpack node's time when it became a member at the cluster layer
*
* \param[in] node_state Node's node_state entry
* \param[in,out] scheduler Scheduler data
*
* \return Epoch time when node became a cluster member
* (or scheduler effective time for legacy entries) if a member,
* 0 if not a member, or -1 if no valid information available
*/
static long long
unpack_node_member(const xmlNode *node_state, pcmk_scheduler_t *scheduler)
{
const char *member_time = crm_element_value(node_state, PCMK__XA_IN_CCM);
int member = 0;
if (member_time == NULL) {
return -1LL;
} else if (crm_str_to_boolean(member_time, &member) == 1) {
/* If in_ccm=0, we'll return 0 here. If in_ccm=1, either the entry was
* recorded as a boolean for a DC < 2.1.7, or the node is pending
* shutdown and has left the CPG, in which case it was set to 1 to avoid
* fencing for PCMK_OPT_NODE_PENDING_TIMEOUT.
*
* We return the effective time for in_ccm=1 because what's important to
* avoid fencing is that effective time minus this value is less than
* the pending node timeout.
*/
return member? (long long) get_effective_time(scheduler) : 0LL;
} else {
long long when_member = 0LL;
if ((pcmk__scan_ll(member_time, &when_member,
0LL) != pcmk_rc_ok) || (when_member < 0LL)) {
crm_warn("Unrecognized value '%s' for " PCMK__XA_IN_CCM
" in " XML_CIB_TAG_STATE " entry", member_time);
return -1LL;
}
return when_member;
}
}
/*!
* \internal
* \brief Unpack node's time when it became online in process group
*
* \param[in] node_state Node's node_state entry
*
* \return Epoch time when node became online in process group (or 0 if not
* online, or 1 for legacy online entries)
*/
static long long
unpack_node_online(const xmlNode *node_state)
{
const char *peer_time = crm_element_value(node_state, PCMK__XA_CRMD);
// @COMPAT Entries recorded for DCs < 2.1.7 have "online" or "offline"
if (pcmk__str_eq(peer_time, OFFLINESTATUS,
pcmk__str_casei|pcmk__str_null_matches)) {
return 0LL;
} else if (pcmk__str_eq(peer_time, ONLINESTATUS, pcmk__str_casei)) {
return 1LL;
} else {
long long when_online = 0LL;
if ((pcmk__scan_ll(peer_time, &when_online, 0LL) != pcmk_rc_ok)
|| (when_online < 0)) {
crm_warn("Unrecognized value '%s' for " PCMK__XA_CRMD " in "
XML_CIB_TAG_STATE " entry, assuming offline", peer_time);
return 0LL;
}
return when_online;
}
}
/*!
* \internal
* \brief Unpack node attribute for user-requested fencing
*
* \param[in] node Node to check
* \param[in] node_state Node's node_state entry in CIB status
*
* \return \c true if fencing has been requested for \p node, otherwise \c false
*/
static bool
unpack_node_terminate(const pcmk_node_t *node, const xmlNode *node_state)
{
long long value = 0LL;
int value_i = 0;
const char *value_s = pe_node_attribute_raw(node, PCMK_NODE_ATTR_TERMINATE);
// Value may be boolean or an epoch time
if (crm_str_to_boolean(value_s, &value_i) == 1) {
return (value_i != 0);
}
if (pcmk__scan_ll(value_s, &value, 0LL) == pcmk_rc_ok) {
return (value > 0);
}
crm_warn("Ignoring unrecognized value '%s' for " PCMK_NODE_ATTR_TERMINATE
"node attribute for %s", value_s, pe__node_name(node));
return false;
}
static gboolean
determine_online_status_no_fencing(pcmk_scheduler_t *scheduler,
const xmlNode *node_state,
pcmk_node_t *this_node)
{
gboolean online = FALSE;
const char *join = crm_element_value(node_state, PCMK__XA_JOIN);
const char *exp_state = crm_element_value(node_state, PCMK__XA_EXPECTED);
long long when_member = unpack_node_member(node_state, scheduler);
long long when_online = unpack_node_online(node_state);
if (when_member <= 0) {
crm_trace("Node %s is %sdown", pe__node_name(this_node),
((when_member < 0)? "presumed " : ""));
} else if (when_online > 0) {
if (pcmk__str_eq(join, CRMD_JOINSTATE_MEMBER, pcmk__str_casei)) {
online = TRUE;
} else {
crm_debug("Node %s is not ready to run resources: %s",
pe__node_name(this_node), join);
}
} else if (this_node->details->expected_up == FALSE) {
crm_trace("Node %s controller is down: "
"member@%lld online@%lld join=%s expected=%s",
pe__node_name(this_node), when_member, when_online,
pcmk__s(join, "<null>"), pcmk__s(exp_state, "<null>"));
} else {
/* mark it unclean */
pe_fence_node(scheduler, this_node, "peer is unexpectedly down", FALSE);
crm_info("Node %s member@%lld online@%lld join=%s expected=%s",
pe__node_name(this_node), when_member, when_online,
pcmk__s(join, "<null>"), pcmk__s(exp_state, "<null>"));
}
return online;
}
/*!
* \internal
* \brief Check whether a node has taken too long to join controller group
*
* \param[in,out] scheduler Scheduler data
* \param[in] node Node to check
* \param[in] when_member Epoch time when node became a cluster member
* \param[in] when_online Epoch time when node joined controller group
*
* \return true if node has been pending (on the way up) longer than
* \c PCMK_OPT_NODE_PENDING_TIMEOUT, otherwise false
* \note This will also update the cluster's recheck time if appropriate.
*/
static inline bool
pending_too_long(pcmk_scheduler_t *scheduler, const pcmk_node_t *node,
long long when_member, long long when_online)
{
if ((scheduler->node_pending_timeout > 0)
&& (when_member > 0) && (when_online <= 0)) {
// There is a timeout on pending nodes, and node is pending
time_t timeout = when_member + scheduler->node_pending_timeout;
if (get_effective_time(node->details->data_set) >= timeout) {
return true; // Node has timed out
}
// Node is pending, but still has time
pe__update_recheck_time(timeout, scheduler, "pending node timeout");
}
return false;
}
static bool
determine_online_status_fencing(pcmk_scheduler_t *scheduler,
const xmlNode *node_state,
pcmk_node_t *this_node)
{
bool termination_requested = unpack_node_terminate(this_node, node_state);
const char *join = crm_element_value(node_state, PCMK__XA_JOIN);
const char *exp_state = crm_element_value(node_state, PCMK__XA_EXPECTED);
long long when_member = unpack_node_member(node_state, scheduler);
long long when_online = unpack_node_online(node_state);
/*
- PCMK__XA_JOIN ::= member|down|pending|banned
- PCMK__XA_EXPECTED ::= member|down
@COMPAT with entries recorded for DCs < 2.1.7
- PCMK__XA_IN_CCM ::= true|false
- PCMK__XA_CRMD ::= online|offline
Since crm_feature_set 3.18.0 (pacemaker-2.1.7):
- PCMK__XA_IN_CCM ::= <timestamp>|0
Since when node has been a cluster member. A value 0 of means the node is not
a cluster member.
- PCMK__XA_CRMD ::= <timestamp>|0
Since when peer has been online in CPG. A value 0 means the peer is offline
in CPG.
*/
crm_trace("Node %s member@%lld online@%lld join=%s expected=%s%s",
pe__node_name(this_node), when_member, when_online,
pcmk__s(join, "<null>"), pcmk__s(exp_state, "<null>"),
(termination_requested? " (termination requested)" : ""));
if (this_node->details->shutdown) {
crm_debug("%s is shutting down", pe__node_name(this_node));
/* Slightly different criteria since we can't shut down a dead peer */
return (when_online > 0);
}
if (when_member < 0) {
pe_fence_node(scheduler, this_node,
"peer has not been seen by the cluster", FALSE);
return false;
}
if (pcmk__str_eq(join, CRMD_JOINSTATE_NACK, pcmk__str_none)) {
pe_fence_node(scheduler, this_node,
"peer failed Pacemaker membership criteria", FALSE);
} else if (termination_requested) {
if ((when_member <= 0) && (when_online <= 0)
&& pcmk__str_eq(join, CRMD_JOINSTATE_DOWN, pcmk__str_none)) {
crm_info("%s was fenced as requested", pe__node_name(this_node));
return false;
}
pe_fence_node(scheduler, this_node, "fencing was requested", false);
} else if (pcmk__str_eq(exp_state, CRMD_JOINSTATE_DOWN,
pcmk__str_null_matches)) {
if (pending_too_long(scheduler, this_node, when_member, when_online)) {
pe_fence_node(scheduler, this_node,
"peer pending timed out on joining the process group",
FALSE);
} else if ((when_member > 0) || (when_online > 0)) {
crm_info("- %s is not ready to run resources",
pe__node_name(this_node));
this_node->details->standby = TRUE;
this_node->details->pending = TRUE;
} else {
crm_trace("%s is down or still coming up",
pe__node_name(this_node));
}
} else if (when_member <= 0) {
// Consider PCMK_OPT_PRIORITY_FENCING_DELAY for lost nodes
pe_fence_node(scheduler, this_node,
"peer is no longer part of the cluster", TRUE);
} else if (when_online <= 0) {
pe_fence_node(scheduler, this_node,
"peer process is no longer available", FALSE);
/* Everything is running at this point, now check join state */
} else if (pcmk__str_eq(join, CRMD_JOINSTATE_MEMBER, pcmk__str_none)) {
crm_info("%s is active", pe__node_name(this_node));
} else if (pcmk__str_any_of(join, CRMD_JOINSTATE_PENDING,
CRMD_JOINSTATE_DOWN, NULL)) {
crm_info("%s is not ready to run resources", pe__node_name(this_node));
this_node->details->standby = TRUE;
this_node->details->pending = TRUE;
} else {
pe_fence_node(scheduler, this_node, "peer was in an unknown state",
FALSE);
}
return (when_member > 0);
}
static void
determine_remote_online_status(pcmk_scheduler_t *scheduler,
pcmk_node_t *this_node)
{
pcmk_resource_t *rsc = this_node->details->remote_rsc;
pcmk_resource_t *container = NULL;
pcmk_node_t *host = NULL;
/* If there is a node state entry for a (former) Pacemaker Remote node
* but no resource creating that node, the node's connection resource will
* be NULL. Consider it an offline remote node in that case.
*/
if (rsc == NULL) {
this_node->details->online = FALSE;
goto remote_online_done;
}
container = rsc->container;
if (container && pcmk__list_of_1(rsc->running_on)) {
host = rsc->running_on->data;
}
/* If the resource is currently started, mark it online. */
if (rsc->role == pcmk_role_started) {
crm_trace("%s node %s presumed ONLINE because connection resource is started",
(container? "Guest" : "Remote"), this_node->details->id);
this_node->details->online = TRUE;
}
/* consider this node shutting down if transitioning start->stop */
if ((rsc->role == pcmk_role_started)
&& (rsc->next_role == pcmk_role_stopped)) {
crm_trace("%s node %s shutting down because connection resource is stopping",
(container? "Guest" : "Remote"), this_node->details->id);
this_node->details->shutdown = TRUE;
}
/* Now check all the failure conditions. */
if(container && pcmk_is_set(container->flags, pcmk_rsc_failed)) {
crm_trace("Guest node %s UNCLEAN because guest resource failed",
this_node->details->id);
this_node->details->online = FALSE;
this_node->details->remote_requires_reset = TRUE;
} else if (pcmk_is_set(rsc->flags, pcmk_rsc_failed)) {
crm_trace("%s node %s OFFLINE because connection resource failed",
(container? "Guest" : "Remote"), this_node->details->id);
this_node->details->online = FALSE;
} else if ((rsc->role == pcmk_role_stopped)
|| ((container != NULL)
&& (container->role == pcmk_role_stopped))) {
crm_trace("%s node %s OFFLINE because its resource is stopped",
(container? "Guest" : "Remote"), this_node->details->id);
this_node->details->online = FALSE;
this_node->details->remote_requires_reset = FALSE;
} else if (host && (host->details->online == FALSE)
&& host->details->unclean) {
crm_trace("Guest node %s UNCLEAN because host is unclean",
this_node->details->id);
this_node->details->online = FALSE;
this_node->details->remote_requires_reset = TRUE;
}
remote_online_done:
crm_trace("Remote node %s online=%s",
this_node->details->id, this_node->details->online ? "TRUE" : "FALSE");
}
static void
determine_online_status(const xmlNode *node_state, pcmk_node_t *this_node,
pcmk_scheduler_t *scheduler)
{
gboolean online = FALSE;
const char *exp_state = crm_element_value(node_state, PCMK__XA_EXPECTED);
CRM_CHECK(this_node != NULL, return);
this_node->details->shutdown = FALSE;
this_node->details->expected_up = FALSE;
if (pe__shutdown_requested(this_node)) {
this_node->details->shutdown = TRUE;
} else if (pcmk__str_eq(exp_state, CRMD_JOINSTATE_MEMBER, pcmk__str_casei)) {
this_node->details->expected_up = TRUE;
}
if (this_node->details->type == node_ping) {
this_node->details->unclean = FALSE;
online = FALSE; /* As far as resource management is concerned,
* the node is safely offline.
* Anyone caught abusing this logic will be shot
*/
} else if (!pcmk_is_set(scheduler->flags, pcmk_sched_fencing_enabled)) {
online = determine_online_status_no_fencing(scheduler, node_state,
this_node);
} else {
online = determine_online_status_fencing(scheduler, node_state,
this_node);
}
if (online) {
this_node->details->online = TRUE;
} else {
/* remove node from contention */
this_node->fixed = TRUE; // @COMPAT deprecated and unused
this_node->weight = -INFINITY;
}
if (online && this_node->details->shutdown) {
/* don't run resources here */
this_node->fixed = TRUE; // @COMPAT deprecated and unused
this_node->weight = -INFINITY;
}
if (this_node->details->type == node_ping) {
crm_info("%s is not a Pacemaker node", pe__node_name(this_node));
} else if (this_node->details->unclean) {
pcmk__sched_warn("%s is unclean", pe__node_name(this_node));
} else if (this_node->details->online) {
crm_info("%s is %s", pe__node_name(this_node),
this_node->details->shutdown ? "shutting down" :
this_node->details->pending ? "pending" :
this_node->details->standby ? "standby" :
this_node->details->maintenance ? "maintenance" : "online");
} else {
crm_trace("%s is offline", pe__node_name(this_node));
}
}
/*!
* \internal
* \brief Find the end of a resource's name, excluding any clone suffix
*
* \param[in] id Resource ID to check
*
* \return Pointer to last character of resource's base name
*/
const char *
pe_base_name_end(const char *id)
{
if (!pcmk__str_empty(id)) {
const char *end = id + strlen(id) - 1;
for (const char *s = end; s > id; --s) {
switch (*s) {
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
break;
case ':':
return (s == end)? s : (s - 1);
default:
return end;
}
}
return end;
}
return NULL;
}
/*!
* \internal
* \brief Get a resource name excluding any clone suffix
*
* \param[in] last_rsc_id Resource ID to check
*
* \return Pointer to newly allocated string with resource's base name
* \note It is the caller's responsibility to free() the result.
* This asserts on error, so callers can assume result is not NULL.
*/
char *
clone_strip(const char *last_rsc_id)
{
const char *end = pe_base_name_end(last_rsc_id);
char *basename = NULL;
CRM_ASSERT(end);
basename = strndup(last_rsc_id, end - last_rsc_id + 1);
CRM_ASSERT(basename);
return basename;
}
/*!
* \internal
* \brief Get the name of the first instance of a cloned resource
*
* \param[in] last_rsc_id Resource ID to check
*
* \return Pointer to newly allocated string with resource's base name plus :0
* \note It is the caller's responsibility to free() the result.
* This asserts on error, so callers can assume result is not NULL.
*/
char *
clone_zero(const char *last_rsc_id)
{
const char *end = pe_base_name_end(last_rsc_id);
size_t base_name_len = end - last_rsc_id + 1;
char *zero = NULL;
CRM_ASSERT(end);
zero = calloc(base_name_len + 3, sizeof(char));
CRM_ASSERT(zero);
memcpy(zero, last_rsc_id, base_name_len);
zero[base_name_len] = ':';
zero[base_name_len + 1] = '0';
return zero;
}
static pcmk_resource_t *
create_fake_resource(const char *rsc_id, const xmlNode *rsc_entry,
pcmk_scheduler_t *scheduler)
{
pcmk_resource_t *rsc = NULL;
xmlNode *xml_rsc = create_xml_node(NULL, XML_CIB_TAG_RESOURCE);
copy_in_properties(xml_rsc, rsc_entry);
crm_xml_add(xml_rsc, XML_ATTR_ID, rsc_id);
crm_log_xml_debug(xml_rsc, "Orphan resource");
if (pe__unpack_resource(xml_rsc, &rsc, NULL, scheduler) != pcmk_rc_ok) {
return NULL;
}
if (xml_contains_remote_node(xml_rsc)) {
pcmk_node_t *node;
crm_debug("Detected orphaned remote node %s", rsc_id);
node = pe_find_node(scheduler->nodes, rsc_id);
if (node == NULL) {
node = pe_create_node(rsc_id, rsc_id, "remote", NULL, scheduler);
}
link_rsc2remotenode(scheduler, rsc);
if (node) {
crm_trace("Setting node %s as shutting down due to orphaned connection resource", rsc_id);
node->details->shutdown = TRUE;
}
}
if (crm_element_value(rsc_entry, XML_RSC_ATTR_CONTAINER)) {
/* This orphaned rsc needs to be mapped to a container. */
crm_trace("Detected orphaned container filler %s", rsc_id);
pe__set_resource_flags(rsc, pcmk_rsc_removed_filler);
}
pe__set_resource_flags(rsc, pcmk_rsc_removed);
scheduler->resources = g_list_append(scheduler->resources, rsc);
return rsc;
}
/*!
* \internal
* \brief Create orphan instance for anonymous clone resource history
*
* \param[in,out] parent Clone resource that orphan will be added to
* \param[in] rsc_id Orphan's resource ID
* \param[in] node Where orphan is active (for logging only)
* \param[in,out] scheduler Scheduler data
*
* \return Newly added orphaned instance of \p parent
*/
static pcmk_resource_t *
create_anonymous_orphan(pcmk_resource_t *parent, const char *rsc_id,
const pcmk_node_t *node, pcmk_scheduler_t *scheduler)
{
pcmk_resource_t *top = pe__create_clone_child(parent, scheduler);
// find_rsc() because we might be a cloned group
pcmk_resource_t *orphan = top->fns->find_rsc(top, rsc_id, NULL,
pcmk_rsc_match_clone_only);
pcmk__rsc_debug(parent, "Created orphan %s for %s: %s on %s",
top->id, parent->id, rsc_id, pe__node_name(node));
return orphan;
}
/*!
* \internal
* \brief Check a node for an instance of an anonymous clone
*
* Return a child instance of the specified anonymous clone, in order of
* preference: (1) the instance running on the specified node, if any;
* (2) an inactive instance (i.e. within the total of clone-max instances);
* (3) a newly created orphan (i.e. clone-max instances are already active).
*
* \param[in,out] scheduler Scheduler data
* \param[in] node Node on which to check for instance
* \param[in,out] parent Clone to check
* \param[in] rsc_id Name of cloned resource in history (no instance)
*/
static pcmk_resource_t *
find_anonymous_clone(pcmk_scheduler_t *scheduler, const pcmk_node_t *node,
pcmk_resource_t *parent, const char *rsc_id)
{
GList *rIter = NULL;
pcmk_resource_t *rsc = NULL;
pcmk_resource_t *inactive_instance = NULL;
gboolean skip_inactive = FALSE;
CRM_ASSERT(parent != NULL);
CRM_ASSERT(pe_rsc_is_clone(parent));
CRM_ASSERT(!pcmk_is_set(parent->flags, pcmk_rsc_unique));
// Check for active (or partially active, for cloned groups) instance
pcmk__rsc_trace(parent, "Looking for %s on %s in %s",
rsc_id, pe__node_name(node), parent->id);
for (rIter = parent->children; rsc == NULL && rIter; rIter = rIter->next) {
GList *locations = NULL;
pcmk_resource_t *child = rIter->data;
/* Check whether this instance is already known to be active or pending
* anywhere, at this stage of unpacking. Because this function is called
* for a resource before the resource's individual operation history
* entries are unpacked, locations will generally not contain the
* desired node.
*
* However, there are three exceptions:
* (1) when child is a cloned group and we have already unpacked the
* history of another member of the group on the same node;
* (2) when we've already unpacked the history of another numbered
* instance on the same node (which can happen if globally-unique
* was flipped from true to false); and
* (3) when we re-run calculations on the same scheduler data as part of
* a simulation.
*/
child->fns->location(child, &locations, 2);
if (locations) {
/* We should never associate the same numbered anonymous clone
* instance with multiple nodes, and clone instances can't migrate,
* so there must be only one location, regardless of history.
*/
CRM_LOG_ASSERT(locations->next == NULL);
if (((pcmk_node_t *) locations->data)->details == node->details) {
/* This child instance is active on the requested node, so check
* for a corresponding configured resource. We use find_rsc()
* instead of child because child may be a cloned group, and we
* need the particular member corresponding to rsc_id.
*
* If the history entry is orphaned, rsc will be NULL.
*/
rsc = parent->fns->find_rsc(child, rsc_id, NULL,
pcmk_rsc_match_clone_only);
if (rsc) {
/* If there are multiple instance history entries for an
* anonymous clone in a single node's history (which can
* happen if globally-unique is switched from true to
* false), we want to consider the instances beyond the
* first as orphans, even if there are inactive instance
* numbers available.
*/
if (rsc->running_on) {
crm_notice("Active (now-)anonymous clone %s has "
"multiple (orphan) instance histories on %s",
parent->id, pe__node_name(node));
skip_inactive = TRUE;
rsc = NULL;
} else {
pcmk__rsc_trace(parent, "Resource %s, active", rsc->id);
}
}
}
g_list_free(locations);
} else {
pcmk__rsc_trace(parent, "Resource %s, skip inactive", child->id);
if (!skip_inactive && !inactive_instance
&& !pcmk_is_set(child->flags, pcmk_rsc_blocked)) {
// Remember one inactive instance in case we don't find active
inactive_instance = parent->fns->find_rsc(child, rsc_id, NULL,
pcmk_rsc_match_clone_only);
/* ... but don't use it if it was already associated with a
* pending action on another node
*/
if (inactive_instance && inactive_instance->pending_node
&& (inactive_instance->pending_node->details != node->details)) {
inactive_instance = NULL;
}
}
}
}
if ((rsc == NULL) && !skip_inactive && (inactive_instance != NULL)) {
pcmk__rsc_trace(parent, "Resource %s, empty slot",
inactive_instance->id);
rsc = inactive_instance;
}
/* If the resource has "requires" set to "quorum" or "nothing", and we don't
* have a clone instance for every node, we don't want to consume a valid
* instance number for unclean nodes. Such instances may appear to be active
* according to the history, but should be considered inactive, so we can
* start an instance elsewhere. Treat such instances as orphans.
*
* An exception is instances running on guest nodes -- since guest node
* "fencing" is actually just a resource stop, requires shouldn't apply.
*
* @TODO Ideally, we'd use an inactive instance number if it is not needed
* for any clean instances. However, we don't know that at this point.
*/
if ((rsc != NULL) && !pcmk_is_set(rsc->flags, pcmk_rsc_needs_fencing)
&& (!node->details->online || node->details->unclean)
&& !pe__is_guest_node(node)
&& !pe__is_universal_clone(parent, scheduler)) {
rsc = NULL;
}
if (rsc == NULL) {
rsc = create_anonymous_orphan(parent, rsc_id, node, scheduler);
pcmk__rsc_trace(parent, "Resource %s, orphan", rsc->id);
}
return rsc;
}
static pcmk_resource_t *
unpack_find_resource(pcmk_scheduler_t *scheduler, const pcmk_node_t *node,
const char *rsc_id)
{
pcmk_resource_t *rsc = NULL;
pcmk_resource_t *parent = NULL;
crm_trace("looking for %s", rsc_id);
rsc = pe_find_resource(scheduler->resources, rsc_id);
if (rsc == NULL) {
/* If we didn't find the resource by its name in the operation history,
* check it again as a clone instance. Even when clone-max=0, we create
* a single :0 orphan to match against here.
*/
char *clone0_id = clone_zero(rsc_id);
pcmk_resource_t *clone0 = pe_find_resource(scheduler->resources,
clone0_id);
if (clone0 && !pcmk_is_set(clone0->flags, pcmk_rsc_unique)) {
rsc = clone0;
parent = uber_parent(clone0);
crm_trace("%s found as %s (%s)", rsc_id, clone0_id, parent->id);
} else {
crm_trace("%s is not known as %s either (orphan)",
rsc_id, clone0_id);
}
free(clone0_id);
} else if (rsc->variant > pcmk_rsc_variant_primitive) {
crm_trace("Resource history for %s is orphaned because it is no longer primitive",
rsc_id);
return NULL;
} else {
parent = uber_parent(rsc);
}
if (pe_rsc_is_anon_clone(parent)) {
if (pe_rsc_is_bundled(parent)) {
rsc = pe__find_bundle_replica(parent->parent, node);
} else {
char *base = clone_strip(rsc_id);
rsc = find_anonymous_clone(scheduler, node, parent, base);
free(base);
CRM_ASSERT(rsc != NULL);
}
}
if (rsc && !pcmk__str_eq(rsc_id, rsc->id, pcmk__str_casei)
&& !pcmk__str_eq(rsc_id, rsc->clone_name, pcmk__str_casei)) {
pcmk__str_update(&rsc->clone_name, rsc_id);
pcmk__rsc_debug(rsc, "Internally renamed %s on %s to %s%s",
rsc_id, pe__node_name(node), rsc->id,
pcmk_is_set(rsc->flags, pcmk_rsc_removed)? " (ORPHAN)" : "");
}
return rsc;
}
static pcmk_resource_t *
process_orphan_resource(const xmlNode *rsc_entry, const pcmk_node_t *node,
pcmk_scheduler_t *scheduler)
{
pcmk_resource_t *rsc = NULL;
const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID);
crm_debug("Detected orphan resource %s on %s", rsc_id, pe__node_name(node));
rsc = create_fake_resource(rsc_id, rsc_entry, scheduler);
if (rsc == NULL) {
return NULL;
}
if (!pcmk_is_set(scheduler->flags, pcmk_sched_stop_removed_resources)) {
pe__clear_resource_flags(rsc, pcmk_rsc_managed);
} else {
CRM_CHECK(rsc != NULL, return NULL);
pcmk__rsc_trace(rsc, "Added orphan %s", rsc->id);
resource_location(rsc, NULL, -INFINITY, "__orphan_do_not_run__",
scheduler);
}
return rsc;
}
static void
process_rsc_state(pcmk_resource_t *rsc, pcmk_node_t *node,
enum action_fail_response on_fail)
{
pcmk_node_t *tmpnode = NULL;
char *reason = NULL;
enum action_fail_response save_on_fail = pcmk_on_fail_ignore;
CRM_ASSERT(rsc);
pcmk__rsc_trace(rsc, "Resource %s is %s on %s: on_fail=%s",
rsc->id, role2text(rsc->role), pe__node_name(node),
fail2text(on_fail));
/* process current state */
if (rsc->role != pcmk_role_unknown) {
pcmk_resource_t *iter = rsc;
while (iter) {
if (g_hash_table_lookup(iter->known_on, node->details->id) == NULL) {
pcmk_node_t *n = pe__copy_node(node);
pcmk__rsc_trace(rsc, "%s%s%s known on %s",
rsc->id,
((rsc->clone_name == NULL)? "" : " also known as "),
((rsc->clone_name == NULL)? "" : rsc->clone_name),
pe__node_name(n));
g_hash_table_insert(iter->known_on, (gpointer) n->details->id, n);
}
if (pcmk_is_set(iter->flags, pcmk_rsc_unique)) {
break;
}
iter = iter->parent;
}
}
/* If a managed resource is believed to be running, but node is down ... */
if ((rsc->role > pcmk_role_stopped)
&& node->details->online == FALSE
&& node->details->maintenance == FALSE
&& pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
gboolean should_fence = FALSE;
/* If this is a guest node, fence it (regardless of whether fencing is
* enabled, because guest node fencing is done by recovery of the
* container resource rather than by the fencer). Mark the resource
* we're processing as failed. When the guest comes back up, its
* operation history in the CIB will be cleared, freeing the affected
* resource to run again once we are sure we know its state.
*/
if (pe__is_guest_node(node)) {
pe__set_resource_flags(rsc,
pcmk_rsc_failed|pcmk_rsc_stop_if_failed);
should_fence = TRUE;
} else if (pcmk_is_set(rsc->cluster->flags,
pcmk_sched_fencing_enabled)) {
if (pe__is_remote_node(node) && node->details->remote_rsc
&& !pcmk_is_set(node->details->remote_rsc->flags,
pcmk_rsc_failed)) {
/* Setting unseen means that fencing of the remote node will
* occur only if the connection resource is not going to start
* somewhere. This allows connection resources on a failed
* cluster node to move to another node without requiring the
* remote nodes to be fenced as well.
*/
node->details->unseen = TRUE;
reason = crm_strdup_printf("%s is active there (fencing will be"
" revoked if remote connection can "
"be re-established elsewhere)",
rsc->id);
}
should_fence = TRUE;
}
if (should_fence) {
if (reason == NULL) {
reason = crm_strdup_printf("%s is thought to be active there", rsc->id);
}
pe_fence_node(rsc->cluster, node, reason, FALSE);
}
free(reason);
}
/* In order to calculate priority_fencing_delay correctly, save the failure information and pass it to native_add_running(). */
save_on_fail = on_fail;
if (node->details->unclean) {
/* No extra processing needed
* Also allows resources to be started again after a node is shot
*/
on_fail = pcmk_on_fail_ignore;
}
switch (on_fail) {
case pcmk_on_fail_ignore:
/* nothing to do */
break;
case pcmk_on_fail_demote:
pe__set_resource_flags(rsc, pcmk_rsc_failed);
demote_action(rsc, node, FALSE);
break;
case pcmk_on_fail_fence_node:
/* treat it as if it is still running
* but also mark the node as unclean
*/
reason = crm_strdup_printf("%s failed there", rsc->id);
pe_fence_node(rsc->cluster, node, reason, FALSE);
free(reason);
break;
case pcmk_on_fail_standby_node:
node->details->standby = TRUE;
node->details->standby_onfail = TRUE;
break;
case pcmk_on_fail_block:
/* is_managed == FALSE will prevent any
* actions being sent for the resource
*/
pe__clear_resource_flags(rsc, pcmk_rsc_managed);
pe__set_resource_flags(rsc, pcmk_rsc_blocked);
break;
case pcmk_on_fail_ban:
/* make sure it comes up somewhere else
* or not at all
*/
resource_location(rsc, node, -INFINITY, "__action_migration_auto__",
rsc->cluster);
break;
case pcmk_on_fail_stop:
pe__set_next_role(rsc, pcmk_role_stopped, "on-fail=stop");
break;
case pcmk_on_fail_restart:
if ((rsc->role != pcmk_role_stopped)
&& (rsc->role != pcmk_role_unknown)) {
pe__set_resource_flags(rsc,
pcmk_rsc_failed|pcmk_rsc_stop_if_failed);
stop_action(rsc, node, FALSE);
}
break;
case pcmk_on_fail_restart_container:
pe__set_resource_flags(rsc,
pcmk_rsc_failed|pcmk_rsc_stop_if_failed);
if (rsc->container && pe_rsc_is_bundled(rsc)) {
/* A bundle's remote connection can run on a different node than
* the bundle's container. We don't necessarily know where the
* container is running yet, so remember it and add a stop
* action for it later.
*/
rsc->cluster->stop_needed =
g_list_prepend(rsc->cluster->stop_needed, rsc->container);
} else if (rsc->container) {
stop_action(rsc->container, node, FALSE);
} else if ((rsc->role != pcmk_role_stopped)
&& (rsc->role != pcmk_role_unknown)) {
stop_action(rsc, node, FALSE);
}
break;
case pcmk_on_fail_reset_remote:
pe__set_resource_flags(rsc,
pcmk_rsc_failed|pcmk_rsc_stop_if_failed);
if (pcmk_is_set(rsc->cluster->flags, pcmk_sched_fencing_enabled)) {
tmpnode = NULL;
if (rsc->is_remote_node) {
tmpnode = pe_find_node(rsc->cluster->nodes, rsc->id);
}
if (tmpnode &&
pe__is_remote_node(tmpnode) &&
tmpnode->details->remote_was_fenced == 0) {
/* The remote connection resource failed in a way that
* should result in fencing the remote node.
*/
pe_fence_node(rsc->cluster, tmpnode,
"remote connection is unrecoverable", FALSE);
}
}
/* require the stop action regardless if fencing is occurring or not. */
if (rsc->role > pcmk_role_stopped) {
stop_action(rsc, node, FALSE);
}
/* if reconnect delay is in use, prevent the connection from exiting the
* "STOPPED" role until the failure is cleared by the delay timeout. */
if (rsc->remote_reconnect_ms) {
pe__set_next_role(rsc, pcmk_role_stopped, "remote reset");
}
break;
}
/* ensure a remote-node connection failure forces an unclean remote-node
* to be fenced. By setting unseen = FALSE, the remote-node failure will
* result in a fencing operation regardless if we're going to attempt to
* reconnect to the remote-node in this transition or not. */
if (pcmk_is_set(rsc->flags, pcmk_rsc_failed) && rsc->is_remote_node) {
tmpnode = pe_find_node(rsc->cluster->nodes, rsc->id);
if (tmpnode && tmpnode->details->unclean) {
tmpnode->details->unseen = FALSE;
}
}
if ((rsc->role != pcmk_role_stopped)
&& (rsc->role != pcmk_role_unknown)) {
if (pcmk_is_set(rsc->flags, pcmk_rsc_removed)) {
if (pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
pcmk__config_warn("Detected active orphan %s running on %s",
rsc->id, pe__node_name(node));
} else {
pcmk__config_warn("Resource '%s' must be stopped manually on "
"%s because cluster is configured not to "
"stop active orphans",
rsc->id, pe__node_name(node));
}
}
native_add_running(rsc, node, rsc->cluster,
(save_on_fail != pcmk_on_fail_ignore));
switch (on_fail) {
case pcmk_on_fail_ignore:
break;
case pcmk_on_fail_demote:
case pcmk_on_fail_block:
pe__set_resource_flags(rsc, pcmk_rsc_failed);
break;
default:
pe__set_resource_flags(rsc,
pcmk_rsc_failed|pcmk_rsc_stop_if_failed);
break;
}
} else if (rsc->clone_name && strchr(rsc->clone_name, ':') != NULL) {
/* Only do this for older status sections that included instance numbers
* Otherwise stopped instances will appear as orphans
*/
pcmk__rsc_trace(rsc, "Resetting clone_name %s for %s (stopped)",
rsc->clone_name, rsc->id);
free(rsc->clone_name);
rsc->clone_name = NULL;
} else {
GList *possible_matches = pe__resource_actions(rsc, node,
PCMK_ACTION_STOP, FALSE);
GList *gIter = possible_matches;
for (; gIter != NULL; gIter = gIter->next) {
pcmk_action_t *stop = (pcmk_action_t *) gIter->data;
pe__set_action_flags(stop, pcmk_action_optional);
}
g_list_free(possible_matches);
}
/* A successful stop after migrate_to on the migration source doesn't make
* the partially migrated resource stopped on the migration target.
*/
if ((rsc->role == pcmk_role_stopped)
&& rsc->partial_migration_source
&& rsc->partial_migration_source->details == node->details
&& rsc->partial_migration_target
&& rsc->running_on) {
rsc->role = pcmk_role_started;
}
}
/* create active recurring operations as optional */
static void
process_recurring(pcmk_node_t *node, pcmk_resource_t *rsc,
int start_index, int stop_index,
GList *sorted_op_list, pcmk_scheduler_t *scheduler)
{
int counter = -1;
const char *task = NULL;
const char *status = NULL;
GList *gIter = sorted_op_list;
CRM_ASSERT(rsc);
pcmk__rsc_trace(rsc, "%s: Start index %d, stop index = %d",
rsc->id, start_index, stop_index);
for (; gIter != NULL; gIter = gIter->next) {
xmlNode *rsc_op = (xmlNode *) gIter->data;
guint interval_ms = 0;
char *key = NULL;
const char *id = ID(rsc_op);
counter++;
if (node->details->online == FALSE) {
pcmk__rsc_trace(rsc, "Skipping %s on %s: node is offline",
rsc->id, pe__node_name(node));
break;
/* Need to check if there's a monitor for role="Stopped" */
} else if (start_index < stop_index && counter <= stop_index) {
pcmk__rsc_trace(rsc, "Skipping %s on %s: resource is not active",
id, pe__node_name(node));
continue;
} else if (counter < start_index) {
pcmk__rsc_trace(rsc, "Skipping %s on %s: old %d",
id, pe__node_name(node), counter);
continue;
}
crm_element_value_ms(rsc_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
if (interval_ms == 0) {
pcmk__rsc_trace(rsc, "Skipping %s on %s: non-recurring",
id, pe__node_name(node));
continue;
}
status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS);
if (pcmk__str_eq(status, "-1", pcmk__str_casei)) {
pcmk__rsc_trace(rsc, "Skipping %s on %s: status",
id, pe__node_name(node));
continue;
}
task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
/* create the action */
key = pcmk__op_key(rsc->id, task, interval_ms);
pcmk__rsc_trace(rsc, "Creating %s on %s", key, pe__node_name(node));
custom_action(rsc, key, task, node, TRUE, scheduler);
}
}
void
calculate_active_ops(const GList *sorted_op_list, int *start_index,
int *stop_index)
{
int counter = -1;
int implied_monitor_start = -1;
int implied_clone_start = -1;
const char *task = NULL;
const char *status = NULL;
*stop_index = -1;
*start_index = -1;
for (const GList *iter = sorted_op_list; iter != NULL; iter = iter->next) {
const xmlNode *rsc_op = (const xmlNode *) iter->data;
counter++;
task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS);
if (pcmk__str_eq(task, PCMK_ACTION_STOP, pcmk__str_casei)
&& pcmk__str_eq(status, "0", pcmk__str_casei)) {
*stop_index = counter;
} else if (pcmk__strcase_any_of(task, PCMK_ACTION_START,
PCMK_ACTION_MIGRATE_FROM, NULL)) {
*start_index = counter;
} else if ((implied_monitor_start <= *stop_index)
&& pcmk__str_eq(task, PCMK_ACTION_MONITOR,
pcmk__str_casei)) {
const char *rc = crm_element_value(rsc_op, XML_LRM_ATTR_RC);
if (pcmk__strcase_any_of(rc, "0", "8", NULL)) {
implied_monitor_start = counter;
}
} else if (pcmk__strcase_any_of(task, PCMK_ACTION_PROMOTE,
PCMK_ACTION_DEMOTE, NULL)) {
implied_clone_start = counter;
}
}
if (*start_index == -1) {
if (implied_clone_start != -1) {
*start_index = implied_clone_start;
} else if (implied_monitor_start != -1) {
*start_index = implied_monitor_start;
}
}
}
// If resource history entry has shutdown lock, remember lock node and time
static void
unpack_shutdown_lock(const xmlNode *rsc_entry, pcmk_resource_t *rsc,
const pcmk_node_t *node, pcmk_scheduler_t *scheduler)
{
time_t lock_time = 0; // When lock started (i.e. node shutdown time)
if ((crm_element_value_epoch(rsc_entry, PCMK_OPT_SHUTDOWN_LOCK,
&lock_time) == pcmk_ok) && (lock_time != 0)) {
if ((scheduler->shutdown_lock > 0)
&& (get_effective_time(scheduler)
> (lock_time + scheduler->shutdown_lock))) {
pcmk__rsc_info(rsc, "Shutdown lock for %s on %s expired",
rsc->id, pe__node_name(node));
pe__clear_resource_history(rsc, node);
} else {
/* @COMPAT I don't like breaking const signatures, but
* rsc->lock_node should really be const -- we just can't change it
* until the next API compatibility break.
*/
rsc->lock_node = (pcmk_node_t *) node;
rsc->lock_time = lock_time;
}
}
}
/*!
* \internal
* \brief Unpack one lrm_resource entry from a node's CIB status
*
* \param[in,out] node Node whose status is being unpacked
* \param[in] rsc_entry lrm_resource XML being unpacked
* \param[in,out] scheduler Scheduler data
*
* \return Resource corresponding to the entry, or NULL if no operation history
*/
static pcmk_resource_t *
unpack_lrm_resource(pcmk_node_t *node, const xmlNode *lrm_resource,
pcmk_scheduler_t *scheduler)
{
GList *gIter = NULL;
int stop_index = -1;
int start_index = -1;
enum rsc_role_e req_role = pcmk_role_unknown;
const char *rsc_id = ID(lrm_resource);
pcmk_resource_t *rsc = NULL;
GList *op_list = NULL;
GList *sorted_op_list = NULL;
xmlNode *rsc_op = NULL;
xmlNode *last_failure = NULL;
enum action_fail_response on_fail = pcmk_on_fail_ignore;
enum rsc_role_e saved_role = pcmk_role_unknown;
if (rsc_id == NULL) {
pcmk__config_err("Ignoring invalid " XML_LRM_TAG_RESOURCE
" entry: No " XML_ATTR_ID);
crm_log_xml_info(lrm_resource, "missing-id");
return NULL;
}
crm_trace("Unpacking " XML_LRM_TAG_RESOURCE " for %s on %s",
rsc_id, pe__node_name(node));
// Build a list of individual lrm_rsc_op entries, so we can sort them
for (rsc_op = first_named_child(lrm_resource, XML_LRM_TAG_RSC_OP);
rsc_op != NULL; rsc_op = crm_next_same_xml(rsc_op)) {
op_list = g_list_prepend(op_list, rsc_op);
}
if (!pcmk_is_set(scheduler->flags, pcmk_sched_shutdown_lock)) {
if (op_list == NULL) {
// If there are no operations, there is nothing to do
return NULL;
}
}
/* find the resource */
rsc = unpack_find_resource(scheduler, node, rsc_id);
if (rsc == NULL) {
if (op_list == NULL) {
// If there are no operations, there is nothing to do
return NULL;
} else {
rsc = process_orphan_resource(lrm_resource, node, scheduler);
}
}
CRM_ASSERT(rsc != NULL);
// Check whether the resource is "shutdown-locked" to this node
if (pcmk_is_set(scheduler->flags, pcmk_sched_shutdown_lock)) {
unpack_shutdown_lock(lrm_resource, rsc, node, scheduler);
}
/* process operations */
saved_role = rsc->role;
rsc->role = pcmk_role_unknown;
sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
xmlNode *rsc_op = (xmlNode *) gIter->data;
unpack_rsc_op(rsc, node, rsc_op, &last_failure, &on_fail);
}
/* create active recurring operations as optional */
calculate_active_ops(sorted_op_list, &start_index, &stop_index);
process_recurring(node, rsc, start_index, stop_index, sorted_op_list,
scheduler);
/* no need to free the contents */
g_list_free(sorted_op_list);
process_rsc_state(rsc, node, on_fail);
if (get_target_role(rsc, &req_role)) {
if ((rsc->next_role == pcmk_role_unknown)
|| (req_role < rsc->next_role)) {
pe__set_next_role(rsc, req_role, XML_RSC_ATTR_TARGET_ROLE);
} else if (req_role > rsc->next_role) {
pcmk__rsc_info(rsc,
"%s: Not overwriting calculated next role %s"
" with requested next role %s",
rsc->id, role2text(rsc->next_role),
role2text(req_role));
}
}
if (saved_role > rsc->role) {
rsc->role = saved_role;
}
return rsc;
}
static void
handle_orphaned_container_fillers(const xmlNode *lrm_rsc_list,
pcmk_scheduler_t *scheduler)
{
for (const xmlNode *rsc_entry = pcmk__xe_first_child(lrm_rsc_list);
rsc_entry != NULL; rsc_entry = pcmk__xe_next(rsc_entry)) {
pcmk_resource_t *rsc;
pcmk_resource_t *container;
const char *rsc_id;
const char *container_id;
if (!pcmk__str_eq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE, pcmk__str_casei)) {
continue;
}
container_id = crm_element_value(rsc_entry, XML_RSC_ATTR_CONTAINER);
rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID);
if (container_id == NULL || rsc_id == NULL) {
continue;
}
container = pe_find_resource(scheduler->resources, container_id);
if (container == NULL) {
continue;
}
rsc = pe_find_resource(scheduler->resources, rsc_id);
if ((rsc == NULL) || (rsc->container != NULL)
|| !pcmk_is_set(rsc->flags, pcmk_rsc_removed_filler)) {
continue;
}
pcmk__rsc_trace(rsc, "Mapped container of orphaned resource %s to %s",
rsc->id, container_id);
rsc->container = container;
container->fillers = g_list_append(container->fillers, rsc);
}
}
/*!
* \internal
* \brief Unpack one node's lrm status section
*
* \param[in,out] node Node whose status is being unpacked
* \param[in] xml CIB node state XML
* \param[in,out] scheduler Scheduler data
*/
static void
unpack_node_lrm(pcmk_node_t *node, const xmlNode *xml,
pcmk_scheduler_t *scheduler)
{
bool found_orphaned_container_filler = false;
// Drill down to lrm_resources section
xml = find_xml_node(xml, XML_CIB_TAG_LRM, FALSE);
if (xml == NULL) {
return;
}
xml = find_xml_node(xml, XML_LRM_TAG_RESOURCES, FALSE);
if (xml == NULL) {
return;
}
// Unpack each lrm_resource entry
for (const xmlNode *rsc_entry = first_named_child(xml, XML_LRM_TAG_RESOURCE);
rsc_entry != NULL; rsc_entry = crm_next_same_xml(rsc_entry)) {
pcmk_resource_t *rsc = unpack_lrm_resource(node, rsc_entry, scheduler);
if ((rsc != NULL)
&& pcmk_is_set(rsc->flags, pcmk_rsc_removed_filler)) {
found_orphaned_container_filler = true;
}
}
/* Now that all resource state has been unpacked for this node, map any
* orphaned container fillers to their container resource.
*/
if (found_orphaned_container_filler) {
handle_orphaned_container_fillers(xml, scheduler);
}
}
static void
set_active(pcmk_resource_t *rsc)
{
const pcmk_resource_t *top = pe__const_top_resource(rsc, false);
if (top && pcmk_is_set(top->flags, pcmk_rsc_promotable)) {
rsc->role = pcmk_role_unpromoted;
} else {
rsc->role = pcmk_role_started;
}
}
static void
set_node_score(gpointer key, gpointer value, gpointer user_data)
{
pcmk_node_t *node = value;
int *score = user_data;
node->weight = *score;
}
#define XPATH_NODE_STATE "/" XML_TAG_CIB "/" XML_CIB_TAG_STATUS \
"/" XML_CIB_TAG_STATE
#define SUB_XPATH_LRM_RESOURCE "/" XML_CIB_TAG_LRM \
"/" XML_LRM_TAG_RESOURCES \
"/" XML_LRM_TAG_RESOURCE
#define SUB_XPATH_LRM_RSC_OP "/" XML_LRM_TAG_RSC_OP
static xmlNode *
find_lrm_op(const char *resource, const char *op, const char *node, const char *source,
int target_rc, pcmk_scheduler_t *scheduler)
{
GString *xpath = NULL;
xmlNode *xml = NULL;
CRM_CHECK((resource != NULL) && (op != NULL) && (node != NULL),
return NULL);
xpath = g_string_sized_new(256);
pcmk__g_strcat(xpath,
XPATH_NODE_STATE "[@" XML_ATTR_UNAME "='", node, "']"
SUB_XPATH_LRM_RESOURCE "[@" XML_ATTR_ID "='", resource, "']"
SUB_XPATH_LRM_RSC_OP "[@" XML_LRM_ATTR_TASK "='", op, "'",
NULL);
/* Need to check against transition_magic too? */
if ((source != NULL) && (strcmp(op, PCMK_ACTION_MIGRATE_TO) == 0)) {
pcmk__g_strcat(xpath,
" and @" XML_LRM_ATTR_MIGRATE_TARGET "='", source, "']",
NULL);
} else if ((source != NULL)
&& (strcmp(op, PCMK_ACTION_MIGRATE_FROM) == 0)) {
pcmk__g_strcat(xpath,
" and @" XML_LRM_ATTR_MIGRATE_SOURCE "='", source, "']",
NULL);
} else {
g_string_append_c(xpath, ']');
}
xml = get_xpath_object((const char *) xpath->str, scheduler->input,
LOG_DEBUG);
g_string_free(xpath, TRUE);
if (xml && target_rc >= 0) {
int rc = PCMK_OCF_UNKNOWN_ERROR;
int status = PCMK_EXEC_ERROR;
crm_element_value_int(xml, XML_LRM_ATTR_RC, &rc);
crm_element_value_int(xml, XML_LRM_ATTR_OPSTATUS, &status);
if ((rc != target_rc) || (status != PCMK_EXEC_DONE)) {
return NULL;
}
}
return xml;
}
static xmlNode *
find_lrm_resource(const char *rsc_id, const char *node_name,
pcmk_scheduler_t *scheduler)
{
GString *xpath = NULL;
xmlNode *xml = NULL;
CRM_CHECK((rsc_id != NULL) && (node_name != NULL), return NULL);
xpath = g_string_sized_new(256);
pcmk__g_strcat(xpath,
XPATH_NODE_STATE "[@" XML_ATTR_UNAME "='", node_name, "']"
SUB_XPATH_LRM_RESOURCE "[@" XML_ATTR_ID "='", rsc_id, "']",
NULL);
xml = get_xpath_object((const char *) xpath->str, scheduler->input,
LOG_DEBUG);
g_string_free(xpath, TRUE);
return xml;
}
/*!
* \internal
* \brief Check whether a resource has no completed action history on a node
*
* \param[in,out] rsc Resource to check
* \param[in] node_name Node to check
*
* \return true if \p rsc_id is unknown on \p node_name, otherwise false
*/
static bool
unknown_on_node(pcmk_resource_t *rsc, const char *node_name)
{
bool result = false;
xmlXPathObjectPtr search;
GString *xpath = g_string_sized_new(256);
pcmk__g_strcat(xpath,
XPATH_NODE_STATE "[@" XML_ATTR_UNAME "='", node_name, "']"
SUB_XPATH_LRM_RESOURCE "[@" XML_ATTR_ID "='", rsc->id, "']"
SUB_XPATH_LRM_RSC_OP "[@" XML_LRM_ATTR_RC "!='193']",
NULL);
search = xpath_search(rsc->cluster->input, (const char *) xpath->str);
result = (numXpathResults(search) == 0);
freeXpathObject(search);
g_string_free(xpath, TRUE);
return result;
}
/*!
* \brief Check whether a probe/monitor indicating the resource was not running
* on a node happened after some event
*
* \param[in] rsc_id Resource being checked
* \param[in] node_name Node being checked
* \param[in] xml_op Event that monitor is being compared to
* \param[in] same_node Whether the operations are on the same node
* \param[in,out] scheduler Scheduler data
*
* \return true if such a monitor happened after event, false otherwise
*/
static bool
monitor_not_running_after(const char *rsc_id, const char *node_name,
const xmlNode *xml_op, bool same_node,
pcmk_scheduler_t *scheduler)
{
/* Any probe/monitor operation on the node indicating it was not running
* there
*/
xmlNode *monitor = find_lrm_op(rsc_id, PCMK_ACTION_MONITOR, node_name,
NULL, PCMK_OCF_NOT_RUNNING, scheduler);
return (monitor && pe__is_newer_op(monitor, xml_op, same_node) > 0);
}
/*!
* \brief Check whether any non-monitor operation on a node happened after some
* event
*
* \param[in] rsc_id Resource being checked
* \param[in] node_name Node being checked
* \param[in] xml_op Event that non-monitor is being compared to
* \param[in] same_node Whether the operations are on the same node
* \param[in,out] scheduler Scheduler data
*
* \return true if such a operation happened after event, false otherwise
*/
static bool
non_monitor_after(const char *rsc_id, const char *node_name,
const xmlNode *xml_op, bool same_node,
pcmk_scheduler_t *scheduler)
{
xmlNode *lrm_resource = NULL;
lrm_resource = find_lrm_resource(rsc_id, node_name, scheduler);
if (lrm_resource == NULL) {
return false;
}
for (xmlNode *op = first_named_child(lrm_resource, XML_LRM_TAG_RSC_OP);
op != NULL; op = crm_next_same_xml(op)) {
const char * task = NULL;
if (op == xml_op) {
continue;
}
task = crm_element_value(op, XML_LRM_ATTR_TASK);
if (pcmk__str_any_of(task, PCMK_ACTION_START, PCMK_ACTION_STOP,
PCMK_ACTION_MIGRATE_TO, PCMK_ACTION_MIGRATE_FROM,
NULL)
&& pe__is_newer_op(op, xml_op, same_node) > 0) {
return true;
}
}
return false;
}
/*!
* \brief Check whether the resource has newer state on a node after a migration
* attempt
*
* \param[in] rsc_id Resource being checked
* \param[in] node_name Node being checked
* \param[in] migrate_to Any migrate_to event that is being compared to
* \param[in] migrate_from Any migrate_from event that is being compared to
* \param[in,out] scheduler Scheduler data
*
* \return true if such a operation happened after event, false otherwise
*/
static bool
newer_state_after_migrate(const char *rsc_id, const char *node_name,
const xmlNode *migrate_to,
const xmlNode *migrate_from,
pcmk_scheduler_t *scheduler)
{
const xmlNode *xml_op = migrate_to;
const char *source = NULL;
const char *target = NULL;
bool same_node = false;
if (migrate_from) {
xml_op = migrate_from;
}
source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE);
target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET);
/* It's preferred to compare to the migrate event on the same node if
* existing, since call ids are more reliable.
*/
if (pcmk__str_eq(node_name, target, pcmk__str_casei)) {
if (migrate_from) {
xml_op = migrate_from;
same_node = true;
} else {
xml_op = migrate_to;
}
} else if (pcmk__str_eq(node_name, source, pcmk__str_casei)) {
if (migrate_to) {
xml_op = migrate_to;
same_node = true;
} else {
xml_op = migrate_from;
}
}
/* If there's any newer non-monitor operation on the node, or any newer
* probe/monitor operation on the node indicating it was not running there,
* the migration events potentially no longer matter for the node.
*/
return non_monitor_after(rsc_id, node_name, xml_op, same_node, scheduler)
|| monitor_not_running_after(rsc_id, node_name, xml_op, same_node,
scheduler);
}
/*!
* \internal
* \brief Parse migration source and target node names from history entry
*
* \param[in] entry Resource history entry for a migration action
* \param[in] source_node If not NULL, source must match this node
* \param[in] target_node If not NULL, target must match this node
* \param[out] source_name Where to store migration source node name
* \param[out] target_name Where to store migration target node name
*
* \return Standard Pacemaker return code
*/
static int
get_migration_node_names(const xmlNode *entry, const pcmk_node_t *source_node,
const pcmk_node_t *target_node,
const char **source_name, const char **target_name)
{
*source_name = crm_element_value(entry, XML_LRM_ATTR_MIGRATE_SOURCE);
*target_name = crm_element_value(entry, XML_LRM_ATTR_MIGRATE_TARGET);
if ((*source_name == NULL) || (*target_name == NULL)) {
pcmk__config_err("Ignoring resource history entry %s without "
XML_LRM_ATTR_MIGRATE_SOURCE " and "
XML_LRM_ATTR_MIGRATE_TARGET, ID(entry));
return pcmk_rc_unpack_error;
}
if ((source_node != NULL)
&& !pcmk__str_eq(*source_name, source_node->details->uname,
pcmk__str_casei|pcmk__str_null_matches)) {
pcmk__config_err("Ignoring resource history entry %s because "
XML_LRM_ATTR_MIGRATE_SOURCE "='%s' does not match %s",
ID(entry), *source_name, pe__node_name(source_node));
return pcmk_rc_unpack_error;
}
if ((target_node != NULL)
&& !pcmk__str_eq(*target_name, target_node->details->uname,
pcmk__str_casei|pcmk__str_null_matches)) {
pcmk__config_err("Ignoring resource history entry %s because "
XML_LRM_ATTR_MIGRATE_TARGET "='%s' does not match %s",
ID(entry), *target_name, pe__node_name(target_node));
return pcmk_rc_unpack_error;
}
return pcmk_rc_ok;
}
/*
* \internal
* \brief Add a migration source to a resource's list of dangling migrations
*
* If the migrate_to and migrate_from actions in a live migration both
* succeeded, but there is no stop on the source, the migration is considered
* "dangling." Add the source to the resource's dangling migration list, which
* will be used to schedule a stop on the source without affecting the target.
*
* \param[in,out] rsc Resource involved in migration
* \param[in] node Migration source
*/
static void
add_dangling_migration(pcmk_resource_t *rsc, const pcmk_node_t *node)
{
pcmk__rsc_trace(rsc, "Dangling migration of %s requires stop on %s",
rsc->id, pe__node_name(node));
rsc->role = pcmk_role_stopped;
rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations,
(gpointer) node);
}
/*!
* \internal
* \brief Update resource role etc. after a successful migrate_to action
*
* \param[in,out] history Parsed action result history
*/
static void
unpack_migrate_to_success(struct action_history *history)
{
/* A complete migration sequence is:
* 1. migrate_to on source node (which succeeded if we get to this function)
* 2. migrate_from on target node
* 3. stop on source node
*
* If no migrate_from has happened, the migration is considered to be
* "partial". If the migrate_from succeeded but no stop has happened, the
* migration is considered to be "dangling".
*
* If a successful migrate_to and stop have happened on the source node, we
* still need to check for a partial migration, due to scenarios (easier to
* produce with batch-limit=1) like:
*
* - A resource is migrating from node1 to node2, and a migrate_to is
* initiated for it on node1.
*
* - node2 goes into standby mode while the migrate_to is pending, which
* aborts the transition.
*
* - Upon completion of the migrate_to, a new transition schedules a stop
* on both nodes and a start on node1.
*
* - If the new transition is aborted for any reason while the resource is
* stopping on node1, the transition after that stop completes will see
* the migrate_to and stop on the source, but it's still a partial
* migration, and the resource must be stopped on node2 because it is
* potentially active there due to the migrate_to.
*
* We also need to take into account that either node's history may be
* cleared at any point in the migration process.
*/
int from_rc = PCMK_OCF_OK;
int from_status = PCMK_EXEC_PENDING;
pcmk_node_t *target_node = NULL;
xmlNode *migrate_from = NULL;
const char *source = NULL;
const char *target = NULL;
bool source_newer_op = false;
bool target_newer_state = false;
bool active_on_target = false;
// Get source and target node names from XML
if (get_migration_node_names(history->xml, history->node, NULL, &source,
&target) != pcmk_rc_ok) {
return;
}
// Check for newer state on the source
source_newer_op = non_monitor_after(history->rsc->id, source, history->xml,
true, history->rsc->cluster);
// Check for a migrate_from action from this source on the target
migrate_from = find_lrm_op(history->rsc->id, PCMK_ACTION_MIGRATE_FROM,
target, source, -1, history->rsc->cluster);
if (migrate_from != NULL) {
if (source_newer_op) {
/* There's a newer non-monitor operation on the source and a
* migrate_from on the target, so this migrate_to is irrelevant to
* the resource's state.
*/
return;
}
crm_element_value_int(migrate_from, XML_LRM_ATTR_RC, &from_rc);
crm_element_value_int(migrate_from, XML_LRM_ATTR_OPSTATUS,
&from_status);
}
/* If the resource has newer state on both the source and target after the
* migration events, this migrate_to is irrelevant to the resource's state.
*/
target_newer_state = newer_state_after_migrate(history->rsc->id, target,
history->xml, migrate_from,
history->rsc->cluster);
if (source_newer_op && target_newer_state) {
return;
}
/* Check for dangling migration (migrate_from succeeded but stop not done).
* We know there's no stop because we already returned if the target has a
* migrate_from and the source has any newer non-monitor operation.
*/
if ((from_rc == PCMK_OCF_OK) && (from_status == PCMK_EXEC_DONE)) {
add_dangling_migration(history->rsc, history->node);
return;
}
/* Without newer state, this migrate_to implies the resource is active.
* (Clones are not allowed to migrate, so role can't be promoted.)
*/
history->rsc->role = pcmk_role_started;
target_node = pe_find_node(history->rsc->cluster->nodes, target);
active_on_target = !target_newer_state && (target_node != NULL)
&& target_node->details->online;
if (from_status != PCMK_EXEC_PENDING) { // migrate_from failed on target
if (active_on_target) {
native_add_running(history->rsc, target_node, history->rsc->cluster,
TRUE);
} else {
// Mark resource as failed, require recovery, and prevent migration
pe__set_resource_flags(history->rsc,
pcmk_rsc_failed|pcmk_rsc_stop_if_failed);
pe__clear_resource_flags(history->rsc, pcmk_rsc_migratable);
}
return;
}
// The migrate_from is pending, complete but erased, or to be scheduled
/* If there is no history at all for the resource on an online target, then
* it was likely cleaned. Just return, and we'll schedule a probe. Once we
* have the probe result, it will be reflected in target_newer_state.
*/
if ((target_node != NULL) && target_node->details->online
&& unknown_on_node(history->rsc, target)) {
return;
}
if (active_on_target) {
pcmk_node_t *source_node = pe_find_node(history->rsc->cluster->nodes,
source);
native_add_running(history->rsc, target_node, history->rsc->cluster,
FALSE);
if ((source_node != NULL) && source_node->details->online) {
/* This is a partial migration: the migrate_to completed
* successfully on the source, but the migrate_from has not
* completed. Remember the source and target; if the newly
* chosen target remains the same when we schedule actions
* later, we may continue with the migration.
*/
history->rsc->partial_migration_target = target_node;
history->rsc->partial_migration_source = source_node;
}
} else if (!source_newer_op) {
// Mark resource as failed, require recovery, and prevent migration
pe__set_resource_flags(history->rsc,
pcmk_rsc_failed|pcmk_rsc_stop_if_failed);
pe__clear_resource_flags(history->rsc, pcmk_rsc_migratable);
}
}
/*!
* \internal
* \brief Update resource role etc. after a failed migrate_to action
*
* \param[in,out] history Parsed action result history
*/
static void
unpack_migrate_to_failure(struct action_history *history)
{
xmlNode *target_migrate_from = NULL;
const char *source = NULL;
const char *target = NULL;
// Get source and target node names from XML
if (get_migration_node_names(history->xml, history->node, NULL, &source,
&target) != pcmk_rc_ok) {
return;
}
/* If a migration failed, we have to assume the resource is active. Clones
* are not allowed to migrate, so role can't be promoted.
*/
history->rsc->role = pcmk_role_started;
// Check for migrate_from on the target
target_migrate_from = find_lrm_op(history->rsc->id,
PCMK_ACTION_MIGRATE_FROM, target, source,
PCMK_OCF_OK, history->rsc->cluster);
if (/* If the resource state is unknown on the target, it will likely be
* probed there.
* Don't just consider it running there. We will get back here anyway in
* case the probe detects it's running there.
*/
!unknown_on_node(history->rsc, target)
/* If the resource has newer state on the target after the migration
* events, this migrate_to no longer matters for the target.
*/
&& !newer_state_after_migrate(history->rsc->id, target, history->xml,
target_migrate_from,
history->rsc->cluster)) {
/* The resource has no newer state on the target, so assume it's still
* active there.
* (if it is up).
*/
pcmk_node_t *target_node = pe_find_node(history->rsc->cluster->nodes,
target);
if (target_node && target_node->details->online) {
native_add_running(history->rsc, target_node, history->rsc->cluster,
FALSE);
}
} else if (!non_monitor_after(history->rsc->id, source, history->xml, true,
history->rsc->cluster)) {
/* We know the resource has newer state on the target, but this
* migrate_to still matters for the source as long as there's no newer
* non-monitor operation there.
*/
// Mark node as having dangling migration so we can force a stop later
history->rsc->dangling_migrations =
g_list_prepend(history->rsc->dangling_migrations,
(gpointer) history->node);
}
}
/*!
* \internal
* \brief Update resource role etc. after a failed migrate_from action
*
* \param[in,out] history Parsed action result history
*/
static void
unpack_migrate_from_failure(struct action_history *history)
{
xmlNode *source_migrate_to = NULL;
const char *source = NULL;
const char *target = NULL;
// Get source and target node names from XML
if (get_migration_node_names(history->xml, NULL, history->node, &source,
&target) != pcmk_rc_ok) {
return;
}
/* If a migration failed, we have to assume the resource is active. Clones
* are not allowed to migrate, so role can't be promoted.
*/
history->rsc->role = pcmk_role_started;
// Check for a migrate_to on the source
source_migrate_to = find_lrm_op(history->rsc->id, PCMK_ACTION_MIGRATE_TO,
source, target, PCMK_OCF_OK,
history->rsc->cluster);
if (/* If the resource state is unknown on the source, it will likely be
* probed there.
* Don't just consider it running there. We will get back here anyway in
* case the probe detects it's running there.
*/
!unknown_on_node(history->rsc, source)
/* If the resource has newer state on the source after the migration
* events, this migrate_from no longer matters for the source.
*/
&& !newer_state_after_migrate(history->rsc->id, source,
source_migrate_to, history->xml,
history->rsc->cluster)) {
/* The resource has no newer state on the source, so assume it's still
* active there (if it is up).
*/
pcmk_node_t *source_node = pe_find_node(history->rsc->cluster->nodes,
source);
if (source_node && source_node->details->online) {
native_add_running(history->rsc, source_node, history->rsc->cluster,
TRUE);
}
}
}
/*!
* \internal
* \brief Add an action to cluster's list of failed actions
*
* \param[in,out] history Parsed action result history
*/
static void
record_failed_op(struct action_history *history)
{
if (!(history->node->details->online)) {
return;
}
for (const xmlNode *xIter = history->rsc->cluster->failed->children;
xIter != NULL; xIter = xIter->next) {
const char *key = pe__xe_history_key(xIter);
const char *uname = crm_element_value(xIter, XML_ATTR_UNAME);
if (pcmk__str_eq(history->key, key, pcmk__str_none)
&& pcmk__str_eq(uname, history->node->details->uname,
pcmk__str_casei)) {
crm_trace("Skipping duplicate entry %s on %s",
history->key, pe__node_name(history->node));
return;
}
}
crm_trace("Adding entry for %s on %s to failed action list",
history->key, pe__node_name(history->node));
crm_xml_add(history->xml, XML_ATTR_UNAME, history->node->details->uname);
crm_xml_add(history->xml, XML_LRM_ATTR_RSCID, history->rsc->id);
add_node_copy(history->rsc->cluster->failed, history->xml);
}
static char *
last_change_str(const xmlNode *xml_op)
{
time_t when;
char *result = NULL;
if (crm_element_value_epoch(xml_op, XML_RSC_OP_LAST_CHANGE,
&when) == pcmk_ok) {
char *when_s = pcmk__epoch2str(&when, 0);
const char *p = strchr(when_s, ' ');
// Skip day of week to make message shorter
if ((p != NULL) && (*(++p) != '\0')) {
result = strdup(p);
CRM_ASSERT(result != NULL);
}
free(when_s);
}
if (result == NULL) {
result = strdup("unknown time");
CRM_ASSERT(result != NULL);
}
return result;
}
/*!
* \internal
* \brief Compare two on-fail values
*
* \param[in] first One on-fail value to compare
* \param[in] second The other on-fail value to compare
*
* \return A negative number if second is more severe than first, zero if they
* are equal, or a positive number if first is more severe than second.
* \note This is only needed until the action_fail_response values can be
* renumbered at the next API compatibility break.
*/
static int
cmp_on_fail(enum action_fail_response first, enum action_fail_response second)
{
switch (first) {
case pcmk_on_fail_demote:
switch (second) {
case pcmk_on_fail_ignore:
return 1;
case pcmk_on_fail_demote:
return 0;
default:
return -1;
}
break;
case pcmk_on_fail_reset_remote:
switch (second) {
case pcmk_on_fail_ignore:
case pcmk_on_fail_demote:
case pcmk_on_fail_restart:
return 1;
case pcmk_on_fail_reset_remote:
return 0;
default:
return -1;
}
break;
case pcmk_on_fail_restart_container:
switch (second) {
case pcmk_on_fail_ignore:
case pcmk_on_fail_demote:
case pcmk_on_fail_restart:
case pcmk_on_fail_reset_remote:
return 1;
case pcmk_on_fail_restart_container:
return 0;
default:
return -1;
}
break;
default:
break;
}
switch (second) {
case pcmk_on_fail_demote:
return (first == pcmk_on_fail_ignore)? -1 : 1;
case pcmk_on_fail_reset_remote:
switch (first) {
case pcmk_on_fail_ignore:
case pcmk_on_fail_demote:
case pcmk_on_fail_restart:
return -1;
default:
return 1;
}
break;
case pcmk_on_fail_restart_container:
switch (first) {
case pcmk_on_fail_ignore:
case pcmk_on_fail_demote:
case pcmk_on_fail_restart:
case pcmk_on_fail_reset_remote:
return -1;
default:
return 1;
}
break;
default:
break;
}
return first - second;
}
/*!
* \internal
* \brief Ban a resource (or its clone if an anonymous instance) from all nodes
*
* \param[in,out] rsc Resource to ban
*/
static void
ban_from_all_nodes(pcmk_resource_t *rsc)
{
int score = -INFINITY;
pcmk_resource_t *fail_rsc = rsc;
if (fail_rsc->parent != NULL) {
pcmk_resource_t *parent = uber_parent(fail_rsc);
if (pe_rsc_is_anon_clone(parent)) {
/* For anonymous clones, if an operation with on-fail=stop fails for
* any instance, the entire clone must stop.
*/
fail_rsc = parent;
}
}
// Ban the resource from all nodes
crm_notice("%s will not be started under current conditions", fail_rsc->id);
if (fail_rsc->allowed_nodes != NULL) {
g_hash_table_destroy(fail_rsc->allowed_nodes);
}
fail_rsc->allowed_nodes = pe__node_list2table(rsc->cluster->nodes);
g_hash_table_foreach(fail_rsc->allowed_nodes, set_node_score, &score);
}
/*!
* \internal
* \brief Get configured failure handling and role after failure for an action
*
* \param[in,out] history Unpacked action history entry
* \param[out] on_fail Where to set configured failure handling
* \param[out] fail_role Where to set to role after failure
*/
static void
unpack_failure_handling(struct action_history *history,
enum action_fail_response *on_fail,
enum rsc_role_e *fail_role)
{
xmlNode *config = pcmk__find_action_config(history->rsc, history->task,
history->interval_ms, true);
GHashTable *meta = pcmk__unpack_action_meta(history->rsc, history->node,
history->task,
history->interval_ms, config);
const char *on_fail_str = g_hash_table_lookup(meta, XML_OP_ATTR_ON_FAIL);
*on_fail = pcmk__parse_on_fail(history->rsc, history->task,
history->interval_ms, on_fail_str);
*fail_role = pcmk__role_after_failure(history->rsc, history->task, *on_fail,
meta);
g_hash_table_destroy(meta);
}
/*!
* \internal
* \brief Update resource role, failure handling, etc., after a failed action
*
* \param[in,out] history Parsed action result history
* \param[in] config_on_fail Action failure handling from configuration
* \param[in] fail_role Resource's role after failure of this action
* \param[out] last_failure This will be set to the history XML
* \param[in,out] on_fail Actual handling of action result
*/
static void
unpack_rsc_op_failure(struct action_history *history,
enum action_fail_response config_on_fail,
enum rsc_role_e fail_role, xmlNode **last_failure,
enum action_fail_response *on_fail)
{
bool is_probe = false;
char *last_change_s = NULL;
*last_failure = history->xml;
is_probe = pcmk_xe_is_probe(history->xml);
last_change_s = last_change_str(history->xml);
if (!pcmk_is_set(history->rsc->cluster->flags, pcmk_sched_symmetric_cluster)
&& (history->exit_status == PCMK_OCF_NOT_INSTALLED)) {
crm_trace("Unexpected result (%s%s%s) was recorded for "
"%s of %s on %s at %s " CRM_XS " exit-status=%d id=%s",
services_ocf_exitcode_str(history->exit_status),
(pcmk__str_empty(history->exit_reason)? "" : ": "),
pcmk__s(history->exit_reason, ""),
(is_probe? "probe" : history->task), history->rsc->id,
pe__node_name(history->node), last_change_s,
history->exit_status, history->id);
} else {
pcmk__sched_warn("Unexpected result (%s%s%s) was recorded for %s of "
"%s on %s at %s " CRM_XS " exit-status=%d id=%s",
services_ocf_exitcode_str(history->exit_status),
(pcmk__str_empty(history->exit_reason)? "" : ": "),
pcmk__s(history->exit_reason, ""),
(is_probe? "probe" : history->task), history->rsc->id,
pe__node_name(history->node), last_change_s,
history->exit_status, history->id);
if (is_probe && (history->exit_status != PCMK_OCF_OK)
&& (history->exit_status != PCMK_OCF_NOT_RUNNING)
&& (history->exit_status != PCMK_OCF_RUNNING_PROMOTED)) {
/* A failed (not just unexpected) probe result could mean the user
* didn't know resources will be probed even where they can't run.
*/
crm_notice("If it is not possible for %s to run on %s, see "
"the resource-discovery option for location constraints",
history->rsc->id, pe__node_name(history->node));
}
record_failed_op(history);
}
free(last_change_s);
if (cmp_on_fail(*on_fail, config_on_fail) < 0) {
pcmk__rsc_trace(history->rsc, "on-fail %s -> %s for %s",
fail2text(*on_fail), fail2text(config_on_fail),
history->key);
*on_fail = config_on_fail;
}
if (strcmp(history->task, PCMK_ACTION_STOP) == 0) {
resource_location(history->rsc, history->node, -INFINITY,
"__stop_fail__", history->rsc->cluster);
} else if (strcmp(history->task, PCMK_ACTION_MIGRATE_TO) == 0) {
unpack_migrate_to_failure(history);
} else if (strcmp(history->task, PCMK_ACTION_MIGRATE_FROM) == 0) {
unpack_migrate_from_failure(history);
} else if (strcmp(history->task, PCMK_ACTION_PROMOTE) == 0) {
history->rsc->role = pcmk_role_promoted;
} else if (strcmp(history->task, PCMK_ACTION_DEMOTE) == 0) {
if (config_on_fail == pcmk_on_fail_block) {
history->rsc->role = pcmk_role_promoted;
pe__set_next_role(history->rsc, pcmk_role_stopped,
"demote with on-fail=block");
} else if (history->exit_status == PCMK_OCF_NOT_RUNNING) {
history->rsc->role = pcmk_role_stopped;
} else {
/* Staying in the promoted role would put the scheduler and
* controller into a loop. Setting the role to unpromoted is not
* dangerous because the resource will be stopped as part of
* recovery, and any promotion will be ordered after that stop.
*/
history->rsc->role = pcmk_role_unpromoted;
}
}
if (is_probe && (history->exit_status == PCMK_OCF_NOT_INSTALLED)) {
/* leave stopped */
pcmk__rsc_trace(history->rsc, "Leaving %s stopped", history->rsc->id);
history->rsc->role = pcmk_role_stopped;
} else if (history->rsc->role < pcmk_role_started) {
pcmk__rsc_trace(history->rsc, "Setting %s active", history->rsc->id);
set_active(history->rsc);
}
pcmk__rsc_trace(history->rsc,
"Resource %s: role=%s unclean=%s on_fail=%s fail_role=%s",
history->rsc->id, role2text(history->rsc->role),
pcmk__btoa(history->node->details->unclean),
fail2text(config_on_fail), role2text(fail_role));
if ((fail_role != pcmk_role_started)
&& (history->rsc->next_role < fail_role)) {
pe__set_next_role(history->rsc, fail_role, "failure");
}
if (fail_role == pcmk_role_stopped) {
ban_from_all_nodes(history->rsc);
}
}
/*!
* \internal
* \brief Block a resource with a failed action if it cannot be recovered
*
* If resource action is a failed stop and fencing is not possible, mark the
* resource as unmanaged and blocked, since recovery cannot be done.
*
* \param[in,out] history Parsed action history entry
*/
static void
block_if_unrecoverable(struct action_history *history)
{
char *last_change_s = NULL;
if (strcmp(history->task, PCMK_ACTION_STOP) != 0) {
return; // All actions besides stop are always recoverable
}
if (pe_can_fence(history->node->details->data_set, history->node)) {
return; // Failed stops are recoverable via fencing
}
last_change_s = last_change_str(history->xml);
pcmk__sched_err("No further recovery can be attempted for %s "
"because %s on %s failed (%s%s%s) at %s "
CRM_XS " rc=%d id=%s",
history->rsc->id, history->task,
pe__node_name(history->node),
services_ocf_exitcode_str(history->exit_status),
(pcmk__str_empty(history->exit_reason)? "" : ": "),
pcmk__s(history->exit_reason, ""),
last_change_s, history->exit_status, history->id);
free(last_change_s);
pe__clear_resource_flags(history->rsc, pcmk_rsc_managed);
pe__set_resource_flags(history->rsc, pcmk_rsc_blocked);
}
/*!
* \internal
* \brief Update action history's execution status and why
*
* \param[in,out] history Parsed action history entry
* \param[out] why Where to store reason for update
* \param[in] value New value
* \param[in] reason Description of why value was changed
*/
static inline void
remap_because(struct action_history *history, const char **why, int value,
const char *reason)
{
if (history->execution_status != value) {
history->execution_status = value;
*why = reason;
}
}
/*!
* \internal
* \brief Remap informational monitor results and operation status
*
* For the monitor results, certain OCF codes are for providing extended information
* to the user about services that aren't yet failed but not entirely healthy either.
* These must be treated as the "normal" result by Pacemaker.
*
* For operation status, the action result can be used to determine an appropriate
* status for the purposes of responding to the action. The status provided by the
* executor is not directly usable since the executor does not know what was expected.
*
* \param[in,out] history Parsed action history entry
* \param[in,out] on_fail What should be done about the result
* \param[in] expired Whether result is expired
*
* \note If the result is remapped and the node is not shutting down or failed,
* the operation will be recorded in the scheduler data's list of failed
* operations to highlight it for the user.
*
* \note This may update the resource's current and next role.
*/
static void
remap_operation(struct action_history *history,
enum action_fail_response *on_fail, bool expired)
{
bool is_probe = false;
int orig_exit_status = history->exit_status;
int orig_exec_status = history->execution_status;
const char *why = NULL;
const char *task = history->task;
// Remap degraded results to their successful counterparts
history->exit_status = pcmk__effective_rc(history->exit_status);
if (history->exit_status != orig_exit_status) {
why = "degraded result";
if (!expired && (!history->node->details->shutdown
|| history->node->details->online)) {
record_failed_op(history);
}
}
if (!pe_rsc_is_bundled(history->rsc)
&& pcmk_xe_mask_probe_failure(history->xml)
&& ((history->execution_status != PCMK_EXEC_DONE)
|| (history->exit_status != PCMK_OCF_NOT_RUNNING))) {
history->execution_status = PCMK_EXEC_DONE;
history->exit_status = PCMK_OCF_NOT_RUNNING;
why = "equivalent probe result";
}
/* If the executor reported an execution status of anything but done or
* error, consider that final. But for done or error, we know better whether
* it should be treated as a failure or not, because we know the expected
* result.
*/
switch (history->execution_status) {
case PCMK_EXEC_DONE:
case PCMK_EXEC_ERROR:
break;
// These should be treated as node-fatal
case PCMK_EXEC_NO_FENCE_DEVICE:
case PCMK_EXEC_NO_SECRETS:
remap_because(history, &why, PCMK_EXEC_ERROR_HARD,
"node-fatal error");
goto remap_done;
default:
goto remap_done;
}
is_probe = pcmk_xe_is_probe(history->xml);
if (is_probe) {
task = "probe";
}
if (history->expected_exit_status < 0) {
/* Pre-1.0 Pacemaker versions, and Pacemaker 1.1.6 or earlier with
* Heartbeat 2.0.7 or earlier as the cluster layer, did not include the
* expected exit status in the transition key, which (along with the
* similar case of a corrupted transition key in the CIB) will be
* reported to this function as -1. Pacemaker 2.0+ does not support
* rolling upgrades from those versions or processing of saved CIB files
* from those versions, so we do not need to care much about this case.
*/
remap_because(history, &why, PCMK_EXEC_ERROR,
"obsolete history format");
pcmk__config_warn("Expected result not found for %s on %s "
"(corrupt or obsolete CIB?)",
history->key, pe__node_name(history->node));
} else if (history->exit_status == history->expected_exit_status) {
remap_because(history, &why, PCMK_EXEC_DONE, "expected result");
} else {
remap_because(history, &why, PCMK_EXEC_ERROR, "unexpected result");
pcmk__rsc_debug(history->rsc,
"%s on %s: expected %d (%s), got %d (%s%s%s)",
history->key, pe__node_name(history->node),
history->expected_exit_status,
services_ocf_exitcode_str(history->expected_exit_status),
history->exit_status,
services_ocf_exitcode_str(history->exit_status),
(pcmk__str_empty(history->exit_reason)? "" : ": "),
pcmk__s(history->exit_reason, ""));
}
switch (history->exit_status) {
case PCMK_OCF_OK:
if (is_probe
&& (history->expected_exit_status == PCMK_OCF_NOT_RUNNING)) {
char *last_change_s = last_change_str(history->xml);
remap_because(history, &why, PCMK_EXEC_DONE, "probe");
pcmk__rsc_info(history->rsc,
"Probe found %s active on %s at %s",
history->rsc->id, pe__node_name(history->node),
last_change_s);
free(last_change_s);
}
break;
case PCMK_OCF_NOT_RUNNING:
if (is_probe
|| (history->expected_exit_status == history->exit_status)
|| !pcmk_is_set(history->rsc->flags, pcmk_rsc_managed)) {
/* For probes, recurring monitors for the Stopped role, and
* unmanaged resources, "not running" is not considered a
* failure.
*/
remap_because(history, &why, PCMK_EXEC_DONE, "exit status");
history->rsc->role = pcmk_role_stopped;
*on_fail = pcmk_on_fail_ignore;
pe__set_next_role(history->rsc, pcmk_role_unknown,
"not running");
}
break;
case PCMK_OCF_RUNNING_PROMOTED:
if (is_probe
&& (history->exit_status != history->expected_exit_status)) {
char *last_change_s = last_change_str(history->xml);
remap_because(history, &why, PCMK_EXEC_DONE, "probe");
pcmk__rsc_info(history->rsc,
"Probe found %s active and promoted on %s at %s",
history->rsc->id, pe__node_name(history->node),
last_change_s);
free(last_change_s);
}
if (!expired
|| (history->exit_status == history->expected_exit_status)) {
history->rsc->role = pcmk_role_promoted;
}
break;
case PCMK_OCF_FAILED_PROMOTED:
if (!expired) {
history->rsc->role = pcmk_role_promoted;
}
remap_because(history, &why, PCMK_EXEC_ERROR, "exit status");
break;
case PCMK_OCF_NOT_CONFIGURED:
remap_because(history, &why, PCMK_EXEC_ERROR_FATAL, "exit status");
break;
case PCMK_OCF_UNIMPLEMENT_FEATURE:
{
guint interval_ms = 0;
crm_element_value_ms(history->xml, XML_LRM_ATTR_INTERVAL_MS,
&interval_ms);
if (interval_ms == 0) {
if (!expired) {
block_if_unrecoverable(history);
}
remap_because(history, &why, PCMK_EXEC_ERROR_HARD,
"exit status");
} else {
remap_because(history, &why, PCMK_EXEC_NOT_SUPPORTED,
"exit status");
}
}
break;
case PCMK_OCF_NOT_INSTALLED:
case PCMK_OCF_INVALID_PARAM:
case PCMK_OCF_INSUFFICIENT_PRIV:
if (!expired) {
block_if_unrecoverable(history);
}
remap_because(history, &why, PCMK_EXEC_ERROR_HARD, "exit status");
break;
default:
if (history->execution_status == PCMK_EXEC_DONE) {
char *last_change_s = last_change_str(history->xml);
crm_info("Treating unknown exit status %d from %s of %s "
"on %s at %s as failure",
history->exit_status, task, history->rsc->id,
pe__node_name(history->node), last_change_s);
remap_because(history, &why, PCMK_EXEC_ERROR,
"unknown exit status");
free(last_change_s);
}
break;
}
remap_done:
if (why != NULL) {
pcmk__rsc_trace(history->rsc,
"Remapped %s result from [%s: %s] to [%s: %s] "
"because of %s",
history->key, pcmk_exec_status_str(orig_exec_status),
crm_exit_str(orig_exit_status),
pcmk_exec_status_str(history->execution_status),
crm_exit_str(history->exit_status), why);
}
}
// return TRUE if start or monitor last failure but parameters changed
static bool
should_clear_for_param_change(const xmlNode *xml_op, const char *task,
pcmk_resource_t *rsc, pcmk_node_t *node)
{
if (pcmk__str_any_of(task, PCMK_ACTION_START, PCMK_ACTION_MONITOR, NULL)) {
if (pe__bundle_needs_remote_name(rsc)) {
/* We haven't allocated resources yet, so we can't reliably
* substitute addr parameters for the REMOTE_CONTAINER_HACK.
* When that's needed, defer the check until later.
*/
pe__add_param_check(xml_op, rsc, node, pcmk__check_last_failure,
rsc->cluster);
} else {
pcmk__op_digest_t *digest_data = NULL;
digest_data = rsc_action_digest_cmp(rsc, xml_op, node,
rsc->cluster);
switch (digest_data->rc) {
case pcmk__digest_unknown:
crm_trace("Resource %s history entry %s on %s"
" has no digest to compare",
rsc->id, pe__xe_history_key(xml_op),
node->details->id);
break;
case pcmk__digest_match:
break;
default:
return TRUE;
}
}
}
return FALSE;
}
// Order action after fencing of remote node, given connection rsc
static void
order_after_remote_fencing(pcmk_action_t *action, pcmk_resource_t *remote_conn,
pcmk_scheduler_t *scheduler)
{
pcmk_node_t *remote_node = pe_find_node(scheduler->nodes, remote_conn->id);
if (remote_node) {
pcmk_action_t *fence = pe_fence_op(remote_node, NULL, TRUE, NULL,
FALSE, scheduler);
order_actions(fence, action, pcmk__ar_first_implies_then);
}
}
static bool
should_ignore_failure_timeout(const pcmk_resource_t *rsc, const char *task,
guint interval_ms, bool is_last_failure)
{
/* Clearing failures of recurring monitors has special concerns. The
* executor reports only changes in the monitor result, so if the
* monitor is still active and still getting the same failure result,
* that will go undetected after the failure is cleared.
*
* Also, the operation history will have the time when the recurring
* monitor result changed to the given code, not the time when the
* result last happened.
*
* @TODO We probably should clear such failures only when the failure
* timeout has passed since the last occurrence of the failed result.
* However we don't record that information. We could maybe approximate
* that by clearing only if there is a more recent successful monitor or
* stop result, but we don't even have that information at this point
* since we are still unpacking the resource's operation history.
*
* This is especially important for remote connection resources with a
* reconnect interval, so in that case, we skip clearing failures
* if the remote node hasn't been fenced.
*/
if (rsc->remote_reconnect_ms
&& pcmk_is_set(rsc->cluster->flags, pcmk_sched_fencing_enabled)
&& (interval_ms != 0)
&& pcmk__str_eq(task, PCMK_ACTION_MONITOR, pcmk__str_casei)) {
pcmk_node_t *remote_node = pe_find_node(rsc->cluster->nodes, rsc->id);
if (remote_node && !remote_node->details->remote_was_fenced) {
if (is_last_failure) {
crm_info("Waiting to clear monitor failure for remote node %s"
" until fencing has occurred", rsc->id);
}
return TRUE;
}
}
return FALSE;
}
/*!
* \internal
* \brief Check operation age and schedule failure clearing when appropriate
*
* This function has two distinct purposes. The first is to check whether an
* operation history entry is expired (i.e. the resource has a failure timeout,
* the entry is older than the timeout, and the resource either has no fail
* count or its fail count is entirely older than the timeout). The second is to
* schedule fail count clearing when appropriate (i.e. the operation is expired
* and either the resource has an expired fail count or the operation is a
* last_failure for a remote connection resource with a reconnect interval,
* or the operation is a last_failure for a start or monitor operation and the
* resource's parameters have changed since the operation).
*
* \param[in,out] history Parsed action result history
*
* \return true if operation history entry is expired, otherwise false
*/
static bool
check_operation_expiry(struct action_history *history)
{
bool expired = false;
bool is_last_failure = pcmk__ends_with(history->id, "_last_failure_0");
time_t last_run = 0;
int unexpired_fail_count = 0;
const char *clear_reason = NULL;
if (history->execution_status == PCMK_EXEC_NOT_INSTALLED) {
pcmk__rsc_trace(history->rsc,
"Resource history entry %s on %s is not expired: "
"Not Installed does not expire",
history->id, pe__node_name(history->node));
return false; // "Not installed" must always be cleared manually
}
if ((history->rsc->failure_timeout > 0)
&& (crm_element_value_epoch(history->xml, XML_RSC_OP_LAST_CHANGE,
&last_run) == 0)) {
// Resource has a failure-timeout, and history entry has a timestamp
time_t now = get_effective_time(history->rsc->cluster);
time_t last_failure = 0;
// Is this particular operation history older than the failure timeout?
if ((now >= (last_run + history->rsc->failure_timeout))
&& !should_ignore_failure_timeout(history->rsc, history->task,
history->interval_ms,
is_last_failure)) {
expired = true;
}
// Does the resource as a whole have an unexpired fail count?
unexpired_fail_count = pe_get_failcount(history->node, history->rsc,
&last_failure,
pcmk__fc_effective,
history->xml);
// Update scheduler recheck time according to *last* failure
crm_trace("%s@%lld is %sexpired @%lld with unexpired_failures=%d timeout=%ds"
" last-failure@%lld",
history->id, (long long) last_run, (expired? "" : "not "),
(long long) now, unexpired_fail_count,
history->rsc->failure_timeout, (long long) last_failure);
last_failure += history->rsc->failure_timeout + 1;
if (unexpired_fail_count && (now < last_failure)) {
pe__update_recheck_time(last_failure, history->rsc->cluster,
"fail count expiration");
}
}
if (expired) {
if (pe_get_failcount(history->node, history->rsc, NULL,
pcmk__fc_default, history->xml)) {
// There is a fail count ignoring timeout
if (unexpired_fail_count == 0) {
// There is no fail count considering timeout
clear_reason = "it expired";
} else {
/* This operation is old, but there is an unexpired fail count.
* In a properly functioning cluster, this should only be
* possible if this operation is not a failure (otherwise the
* fail count should be expired too), so this is really just a
* failsafe.
*/
pcmk__rsc_trace(history->rsc,
"Resource history entry %s on %s is not "
"expired: Unexpired fail count",
history->id, pe__node_name(history->node));
expired = false;
}
} else if (is_last_failure
&& (history->rsc->remote_reconnect_ms != 0)) {
/* Clear any expired last failure when reconnect interval is set,
* even if there is no fail count.
*/
clear_reason = "reconnect interval is set";
}
}
if (!expired && is_last_failure
&& should_clear_for_param_change(history->xml, history->task,
history->rsc, history->node)) {
clear_reason = "resource parameters have changed";
}
if (clear_reason != NULL) {
pcmk_action_t *clear_op = NULL;
// Schedule clearing of the fail count
clear_op = pe__clear_failcount(history->rsc, history->node,
clear_reason, history->rsc->cluster);
if (pcmk_is_set(history->rsc->cluster->flags,
pcmk_sched_fencing_enabled)
&& (history->rsc->remote_reconnect_ms != 0)) {
/* If we're clearing a remote connection due to a reconnect
* interval, we want to wait until any scheduled fencing
* completes.
*
* We could limit this to remote_node->details->unclean, but at
* this point, that's always true (it won't be reliable until
* after unpack_node_history() is done).
*/
crm_info("Clearing %s failure will wait until any scheduled "
"fencing of %s completes",
history->task, history->rsc->id);
order_after_remote_fencing(clear_op, history->rsc,
history->rsc->cluster);
}
}
if (expired && (history->interval_ms == 0)
&& pcmk__str_eq(history->task, PCMK_ACTION_MONITOR, pcmk__str_none)) {
switch (history->exit_status) {
case PCMK_OCF_OK:
case PCMK_OCF_NOT_RUNNING:
case PCMK_OCF_RUNNING_PROMOTED:
case PCMK_OCF_DEGRADED:
case PCMK_OCF_DEGRADED_PROMOTED:
// Don't expire probes that return these values
pcmk__rsc_trace(history->rsc,
"Resource history entry %s on %s is not "
"expired: Probe result",
history->id, pe__node_name(history->node));
expired = false;
break;
}
}
return expired;
}
int
pe__target_rc_from_xml(const xmlNode *xml_op)
{
int target_rc = 0;
const char *key = crm_element_value(xml_op, XML_ATTR_TRANSITION_KEY);
if (key == NULL) {
return -1;
}
decode_transition_key(key, NULL, NULL, NULL, &target_rc);
return target_rc;
}
/*!
* \internal
* \brief Update a resource's state for an action result
*
* \param[in,out] history Parsed action history entry
* \param[in] exit_status Exit status to base new state on
* \param[in] last_failure Resource's last_failure entry, if known
* \param[in,out] on_fail Resource's current failure handling
*/
static void
update_resource_state(struct action_history *history, int exit_status,
const xmlNode *last_failure,
enum action_fail_response *on_fail)
{
bool clear_past_failure = false;
if ((exit_status == PCMK_OCF_NOT_INSTALLED)
|| (!pe_rsc_is_bundled(history->rsc)
&& pcmk_xe_mask_probe_failure(history->xml))) {
history->rsc->role = pcmk_role_stopped;
} else if (exit_status == PCMK_OCF_NOT_RUNNING) {
clear_past_failure = true;
} else if (pcmk__str_eq(history->task, PCMK_ACTION_MONITOR,
pcmk__str_none)) {
if ((last_failure != NULL)
&& pcmk__str_eq(history->key, pe__xe_history_key(last_failure),
pcmk__str_none)) {
clear_past_failure = true;
}
if (history->rsc->role < pcmk_role_started) {
set_active(history->rsc);
}
} else if (pcmk__str_eq(history->task, PCMK_ACTION_START, pcmk__str_none)) {
history->rsc->role = pcmk_role_started;
clear_past_failure = true;
} else if (pcmk__str_eq(history->task, PCMK_ACTION_STOP, pcmk__str_none)) {
history->rsc->role = pcmk_role_stopped;
clear_past_failure = true;
} else if (pcmk__str_eq(history->task, PCMK_ACTION_PROMOTE,
pcmk__str_none)) {
history->rsc->role = pcmk_role_promoted;
clear_past_failure = true;
} else if (pcmk__str_eq(history->task, PCMK_ACTION_DEMOTE,
pcmk__str_none)) {
if (*on_fail == pcmk_on_fail_demote) {
// Demote clears an error only if on-fail=demote
clear_past_failure = true;
}
history->rsc->role = pcmk_role_unpromoted;
} else if (pcmk__str_eq(history->task, PCMK_ACTION_MIGRATE_FROM,
pcmk__str_none)) {
history->rsc->role = pcmk_role_started;
clear_past_failure = true;
} else if (pcmk__str_eq(history->task, PCMK_ACTION_MIGRATE_TO,
pcmk__str_none)) {
unpack_migrate_to_success(history);
} else if (history->rsc->role < pcmk_role_started) {
pcmk__rsc_trace(history->rsc, "%s active on %s",
history->rsc->id, pe__node_name(history->node));
set_active(history->rsc);
}
if (!clear_past_failure) {
return;
}
switch (*on_fail) {
case pcmk_on_fail_stop:
case pcmk_on_fail_ban:
case pcmk_on_fail_standby_node:
case pcmk_on_fail_fence_node:
pcmk__rsc_trace(history->rsc,
"%s (%s) is not cleared by a completed %s",
history->rsc->id, fail2text(*on_fail),
history->task);
break;
case pcmk_on_fail_block:
case pcmk_on_fail_ignore:
case pcmk_on_fail_demote:
case pcmk_on_fail_restart:
case pcmk_on_fail_restart_container:
*on_fail = pcmk_on_fail_ignore;
pe__set_next_role(history->rsc, pcmk_role_unknown,
"clear past failures");
break;
case pcmk_on_fail_reset_remote:
if (history->rsc->remote_reconnect_ms == 0) {
/* With no reconnect interval, the connection is allowed to
* start again after the remote node is fenced and
* completely stopped. (With a reconnect interval, we wait
* for the failure to be cleared entirely before attempting
* to reconnect.)
*/
*on_fail = pcmk_on_fail_ignore;
pe__set_next_role(history->rsc, pcmk_role_unknown,
"clear past failures and reset remote");
}
break;
}
}
/*!
* \internal
* \brief Check whether a given history entry matters for resource state
*
* \param[in] history Parsed action history entry
*
* \return true if action can affect resource state, otherwise false
*/
static inline bool
can_affect_state(struct action_history *history)
{
#if 0
/* @COMPAT It might be better to parse only actions we know we're interested
* in, rather than exclude a couple we don't. However that would be a
* behavioral change that should be done at a major or minor series release.
* Currently, unknown operations can affect whether a resource is considered
* active and/or failed.
*/
return pcmk__str_any_of(history->task, PCMK_ACTION_MONITOR,
PCMK_ACTION_START, PCMK_ACTION_STOP,
PCMK_ACTION_PROMOTE, PCMK_ACTION_DEMOTE,
PCMK_ACTION_MIGRATE_TO, PCMK_ACTION_MIGRATE_FROM,
"asyncmon", NULL);
#else
return !pcmk__str_any_of(history->task, PCMK_ACTION_NOTIFY,
PCMK_ACTION_META_DATA, NULL);
#endif
}
/*!
* \internal
* \brief Unpack execution/exit status and exit reason from a history entry
*
* \param[in,out] history Action history entry to unpack
*
* \return Standard Pacemaker return code
*/
static int
unpack_action_result(struct action_history *history)
{
if ((crm_element_value_int(history->xml, XML_LRM_ATTR_OPSTATUS,
&(history->execution_status)) < 0)
|| (history->execution_status < PCMK_EXEC_PENDING)
|| (history->execution_status > PCMK_EXEC_MAX)
|| (history->execution_status == PCMK_EXEC_CANCELLED)) {
pcmk__config_err("Ignoring resource history entry %s for %s on %s "
"with invalid " XML_LRM_ATTR_OPSTATUS " '%s'",
history->id, history->rsc->id,
pe__node_name(history->node),
pcmk__s(crm_element_value(history->xml,
XML_LRM_ATTR_OPSTATUS),
""));
return pcmk_rc_unpack_error;
}
if ((crm_element_value_int(history->xml, XML_LRM_ATTR_RC,
&(history->exit_status)) < 0)
|| (history->exit_status < 0) || (history->exit_status > CRM_EX_MAX)) {
#if 0
/* @COMPAT We should ignore malformed entries, but since that would
* change behavior, it should be done at a major or minor series
* release.
*/
pcmk__config_err("Ignoring resource history entry %s for %s on %s "
"with invalid " XML_LRM_ATTR_RC " '%s'",
history->id, history->rsc->id,
pe__node_name(history->node),
pcmk__s(crm_element_value(history->xml,
XML_LRM_ATTR_RC),
""));
return pcmk_rc_unpack_error;
#else
history->exit_status = CRM_EX_ERROR;
#endif
}
history->exit_reason = crm_element_value(history->xml,
XML_LRM_ATTR_EXIT_REASON);
return pcmk_rc_ok;
}
/*!
* \internal
* \brief Process an action history entry whose result expired
*
* \param[in,out] history Parsed action history entry
* \param[in] orig_exit_status Action exit status before remapping
*
* \return Standard Pacemaker return code (in particular, pcmk_rc_ok means the
* entry needs no further processing)
*/
static int
process_expired_result(struct action_history *history, int orig_exit_status)
{
if (!pe_rsc_is_bundled(history->rsc)
&& pcmk_xe_mask_probe_failure(history->xml)
&& (orig_exit_status != history->expected_exit_status)) {
if (history->rsc->role <= pcmk_role_stopped) {
history->rsc->role = pcmk_role_unknown;
}
crm_trace("Ignoring resource history entry %s for probe of %s on %s: "
"Masked failure expired",
history->id, history->rsc->id,
pe__node_name(history->node));
return pcmk_rc_ok;
}
if (history->exit_status == history->expected_exit_status) {
return pcmk_rc_undetermined; // Only failures expire
}
if (history->interval_ms == 0) {
crm_notice("Ignoring resource history entry %s for %s of %s on %s: "
"Expired failure",
history->id, history->task, history->rsc->id,
pe__node_name(history->node));
return pcmk_rc_ok;
}
if (history->node->details->online && !history->node->details->unclean) {
/* Reschedule the recurring action. schedule_cancel() won't work at
* this stage, so as a hacky workaround, forcibly change the restart
* digest so pcmk__check_action_config() does what we want later.
*
* @TODO We should skip this if there is a newer successful monitor.
* Also, this causes rescheduling only if the history entry
* has an op-digest (which the expire-non-blocked-failure
* scheduler regression test doesn't, but that may not be a
* realistic scenario in production).
*/
crm_notice("Rescheduling %s-interval %s of %s on %s "
"after failure expired",
pcmk__readable_interval(history->interval_ms), history->task,
history->rsc->id, pe__node_name(history->node));
crm_xml_add(history->xml, XML_LRM_ATTR_RESTART_DIGEST,
"calculated-failure-timeout");
return pcmk_rc_ok;
}
return pcmk_rc_undetermined;
}
/*!
* \internal
* \brief Process a masked probe failure
*
* \param[in,out] history Parsed action history entry
* \param[in] orig_exit_status Action exit status before remapping
* \param[in] last_failure Resource's last_failure entry, if known
* \param[in,out] on_fail Resource's current failure handling
*/
static void
mask_probe_failure(struct action_history *history, int orig_exit_status,
const xmlNode *last_failure,
enum action_fail_response *on_fail)
{
pcmk_resource_t *ban_rsc = history->rsc;
if (!pcmk_is_set(history->rsc->flags, pcmk_rsc_unique)) {
ban_rsc = uber_parent(history->rsc);
}
crm_notice("Treating probe result '%s' for %s on %s as 'not running'",
services_ocf_exitcode_str(orig_exit_status), history->rsc->id,
pe__node_name(history->node));
update_resource_state(history, history->expected_exit_status, last_failure,
on_fail);
crm_xml_add(history->xml, XML_ATTR_UNAME, history->node->details->uname);
record_failed_op(history);
resource_location(ban_rsc, history->node, -INFINITY, "masked-probe-failure",
history->rsc->cluster);
}
/*!
* \internal Check whether a given failure is for a given pending action
*
* \param[in] history Parsed history entry for pending action
* \param[in] last_failure Resource's last_failure entry, if known
*
* \return true if \p last_failure is failure of pending action in \p history,
* otherwise false
* \note Both \p history and \p last_failure must come from the same
* lrm_resource block, as node and resource are assumed to be the same.
*/
static bool
failure_is_newer(const struct action_history *history,
const xmlNode *last_failure)
{
guint failure_interval_ms = 0U;
long long failure_change = 0LL;
long long this_change = 0LL;
if (last_failure == NULL) {
return false; // Resource has no last_failure entry
}
if (!pcmk__str_eq(history->task,
crm_element_value(last_failure, XML_LRM_ATTR_TASK),
pcmk__str_none)) {
return false; // last_failure is for different action
}
if ((crm_element_value_ms(last_failure, XML_LRM_ATTR_INTERVAL_MS,
&failure_interval_ms) != pcmk_ok)
|| (history->interval_ms != failure_interval_ms)) {
return false; // last_failure is for action with different interval
}
if ((pcmk__scan_ll(crm_element_value(history->xml, XML_RSC_OP_LAST_CHANGE),
&this_change, 0LL) != pcmk_rc_ok)
|| (pcmk__scan_ll(crm_element_value(last_failure,
XML_RSC_OP_LAST_CHANGE),
&failure_change, 0LL) != pcmk_rc_ok)
|| (failure_change < this_change)) {
return false; // Failure is not known to be newer
}
return true;
}
/*!
* \internal
* \brief Update a resource's role etc. for a pending action
*
* \param[in,out] history Parsed history entry for pending action
* \param[in] last_failure Resource's last_failure entry, if known
*/
static void
process_pending_action(struct action_history *history,
const xmlNode *last_failure)
{
/* For recurring monitors, a failure is recorded only in RSC_last_failure_0,
* and there might be a RSC_monitor_INTERVAL entry with the last successful
* or pending result.
*
* If last_failure contains the failure of the pending recurring monitor
* we're processing here, and is newer, the action is no longer pending.
* (Pending results have call ID -1, which sorts last, so the last failure
* if any should be known.)
*/
if (failure_is_newer(history, last_failure)) {
return;
}
if (strcmp(history->task, PCMK_ACTION_START) == 0) {
pe__set_resource_flags(history->rsc, pcmk_rsc_start_pending);
set_active(history->rsc);
} else if (strcmp(history->task, PCMK_ACTION_PROMOTE) == 0) {
history->rsc->role = pcmk_role_promoted;
} else if ((strcmp(history->task, PCMK_ACTION_MIGRATE_TO) == 0)
&& history->node->details->unclean) {
/* A migrate_to action is pending on a unclean source, so force a stop
* on the target.
*/
const char *migrate_target = NULL;
pcmk_node_t *target = NULL;
migrate_target = crm_element_value(history->xml,
XML_LRM_ATTR_MIGRATE_TARGET);
target = pe_find_node(history->rsc->cluster->nodes, migrate_target);
if (target != NULL) {
stop_action(history->rsc, target, FALSE);
}
}
if (history->rsc->pending_task != NULL) {
/* There should never be multiple pending actions, but as a failsafe,
* just remember the first one processed for display purposes.
*/
return;
}
if (pcmk_is_probe(history->task, history->interval_ms)) {
/* Pending probes are currently never displayed, even if pending
* operations are requested. If we ever want to change that,
* enable the below and the corresponding part of
* native.c:native_pending_task().
*/
#if 0
history->rsc->pending_task = strdup("probe");
history->rsc->pending_node = history->node;
#endif
} else {
history->rsc->pending_task = strdup(history->task);
history->rsc->pending_node = history->node;
}
}
static void
unpack_rsc_op(pcmk_resource_t *rsc, pcmk_node_t *node, xmlNode *xml_op,
xmlNode **last_failure, enum action_fail_response *on_fail)
{
int old_rc = 0;
bool expired = false;
pcmk_resource_t *parent = rsc;
enum rsc_role_e fail_role = pcmk_role_unknown;
enum action_fail_response failure_strategy = pcmk_on_fail_restart;
struct action_history history = {
.rsc = rsc,
.node = node,
.xml = xml_op,
.execution_status = PCMK_EXEC_UNKNOWN,
};
CRM_CHECK(rsc && node && xml_op, return);
history.id = ID(xml_op);
if (history.id == NULL) {
pcmk__config_err("Ignoring resource history entry for %s on %s "
"without ID", rsc->id, pe__node_name(node));
return;
}
// Task and interval
history.task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
if (history.task == NULL) {
pcmk__config_err("Ignoring resource history entry %s for %s on %s "
"without " XML_LRM_ATTR_TASK,
history.id, rsc->id, pe__node_name(node));
return;
}
crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS,
&(history.interval_ms));
if (!can_affect_state(&history)) {
pcmk__rsc_trace(rsc,
"Ignoring resource history entry %s for %s on %s "
"with irrelevant action '%s'",
history.id, rsc->id, pe__node_name(node), history.task);
return;
}
if (unpack_action_result(&history) != pcmk_rc_ok) {
return; // Error already logged
}
history.expected_exit_status = pe__target_rc_from_xml(xml_op);
history.key = pe__xe_history_key(xml_op);
crm_element_value_int(xml_op, XML_LRM_ATTR_CALLID, &(history.call_id));
pcmk__rsc_trace(rsc, "Unpacking %s (%s call %d on %s): %s (%s)",
history.id, history.task, history.call_id,
pe__node_name(node),
pcmk_exec_status_str(history.execution_status),
crm_exit_str(history.exit_status));
if (node->details->unclean) {
pcmk__rsc_trace(rsc,
"%s is running on %s, which is unclean (further action "
"depends on value of stop's on-fail attribute)",
rsc->id, pe__node_name(node));
}
expired = check_operation_expiry(&history);
old_rc = history.exit_status;
remap_operation(&history, on_fail, expired);
if (expired && (process_expired_result(&history, old_rc) == pcmk_rc_ok)) {
goto done;
}
if (!pe_rsc_is_bundled(rsc) && pcmk_xe_mask_probe_failure(xml_op)) {
mask_probe_failure(&history, old_rc, *last_failure, on_fail);
goto done;
}
if (!pcmk_is_set(rsc->flags, pcmk_rsc_unique)) {
parent = uber_parent(rsc);
}
switch (history.execution_status) {
case PCMK_EXEC_PENDING:
process_pending_action(&history, *last_failure);
goto done;
case PCMK_EXEC_DONE:
update_resource_state(&history, history.exit_status, *last_failure,
on_fail);
goto done;
case PCMK_EXEC_NOT_INSTALLED:
unpack_failure_handling(&history, &failure_strategy, &fail_role);
if (failure_strategy == pcmk_on_fail_ignore) {
crm_warn("Cannot ignore failed %s of %s on %s: "
"Resource agent doesn't exist "
CRM_XS " status=%d rc=%d id=%s",
history.task, rsc->id, pe__node_name(node),
history.execution_status, history.exit_status,
history.id);
/* Also for printing it as "FAILED" by marking it as
* pcmk_rsc_failed later
*/
*on_fail = pcmk_on_fail_ban;
}
resource_location(parent, node, -INFINITY, "hard-error",
rsc->cluster);
unpack_rsc_op_failure(&history, failure_strategy, fail_role,
last_failure, on_fail);
goto done;
case PCMK_EXEC_NOT_CONNECTED:
if (pe__is_guest_or_remote_node(node)
&& pcmk_is_set(node->details->remote_rsc->flags,
pcmk_rsc_managed)) {
/* We should never get into a situation where a managed remote
* connection resource is considered OK but a resource action
* behind the connection gets a "not connected" status. But as a
* fail-safe in case a bug or unusual circumstances do lead to
* that, ensure the remote connection is considered failed.
*/
pe__set_resource_flags(node->details->remote_rsc,
pcmk_rsc_failed|pcmk_rsc_stop_if_failed);
}
break; // Not done, do error handling
case PCMK_EXEC_ERROR:
case PCMK_EXEC_ERROR_HARD:
case PCMK_EXEC_ERROR_FATAL:
case PCMK_EXEC_TIMEOUT:
case PCMK_EXEC_NOT_SUPPORTED:
case PCMK_EXEC_INVALID:
break; // Not done, do error handling
default: // No other value should be possible at this point
break;
}
unpack_failure_handling(&history, &failure_strategy, &fail_role);
if ((failure_strategy == pcmk_on_fail_ignore)
|| ((failure_strategy == pcmk_on_fail_restart_container)
&& (strcmp(history.task, PCMK_ACTION_STOP) == 0))) {
char *last_change_s = last_change_str(xml_op);
crm_warn("Pretending failed %s (%s%s%s) of %s on %s at %s succeeded "
CRM_XS " %s",
history.task, services_ocf_exitcode_str(history.exit_status),
(pcmk__str_empty(history.exit_reason)? "" : ": "),
pcmk__s(history.exit_reason, ""), rsc->id, pe__node_name(node),
last_change_s, history.id);
free(last_change_s);
update_resource_state(&history, history.expected_exit_status,
*last_failure, on_fail);
crm_xml_add(xml_op, XML_ATTR_UNAME, node->details->uname);
pe__set_resource_flags(rsc, pcmk_rsc_ignore_failure);
record_failed_op(&history);
if ((failure_strategy == pcmk_on_fail_restart_container)
&& cmp_on_fail(*on_fail, pcmk_on_fail_restart) <= 0) {
*on_fail = failure_strategy;
}
} else {
unpack_rsc_op_failure(&history, failure_strategy, fail_role,
last_failure, on_fail);
if (history.execution_status == PCMK_EXEC_ERROR_HARD) {
uint8_t log_level = LOG_ERR;
if (history.exit_status == PCMK_OCF_NOT_INSTALLED) {
log_level = LOG_NOTICE;
}
do_crm_log(log_level,
"Preventing %s from restarting on %s because "
"of hard failure (%s%s%s) " CRM_XS " %s",
parent->id, pe__node_name(node),
services_ocf_exitcode_str(history.exit_status),
(pcmk__str_empty(history.exit_reason)? "" : ": "),
pcmk__s(history.exit_reason, ""), history.id);
resource_location(parent, node, -INFINITY, "hard-error",
rsc->cluster);
} else if (history.execution_status == PCMK_EXEC_ERROR_FATAL) {
pcmk__sched_err("Preventing %s from restarting anywhere because "
"of fatal failure (%s%s%s) " CRM_XS " %s",
parent->id,
services_ocf_exitcode_str(history.exit_status),
(pcmk__str_empty(history.exit_reason)? "" : ": "),
pcmk__s(history.exit_reason, ""), history.id);
resource_location(parent, NULL, -INFINITY, "fatal-error",
rsc->cluster);
}
}
done:
pcmk__rsc_trace(rsc, "%s role on %s after %s is %s (next %s)",
rsc->id, pe__node_name(node), history.id,
role2text(rsc->role), role2text(rsc->next_role));
}
static void
add_node_attrs(const xmlNode *xml_obj, pcmk_node_t *node, bool overwrite,
pcmk_scheduler_t *scheduler)
{
const char *cluster_name = NULL;
pe_rule_eval_data_t rule_data = {
.node_hash = NULL,
.role = pcmk_role_unknown,
.now = scheduler->now,
.match_data = NULL,
.rsc_data = NULL,
.op_data = NULL
};
g_hash_table_insert(node->details->attrs,
strdup(CRM_ATTR_UNAME), strdup(node->details->uname));
g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_ID),
strdup(node->details->id));
if (pcmk__str_eq(node->details->id, scheduler->dc_uuid, pcmk__str_casei)) {
scheduler->dc_node = node;
node->details->is_dc = TRUE;
g_hash_table_insert(node->details->attrs,
strdup(CRM_ATTR_IS_DC), strdup(XML_BOOLEAN_TRUE));
} else {
g_hash_table_insert(node->details->attrs,
strdup(CRM_ATTR_IS_DC), strdup(XML_BOOLEAN_FALSE));
}
cluster_name = g_hash_table_lookup(scheduler->config_hash,
PCMK_OPT_CLUSTER_NAME);
if (cluster_name) {
g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_CLUSTER_NAME),
strdup(cluster_name));
}
pe__unpack_dataset_nvpairs(xml_obj, XML_TAG_ATTR_SETS, &rule_data,
node->details->attrs, NULL, overwrite,
scheduler);
pe__unpack_dataset_nvpairs(xml_obj, XML_TAG_UTILIZATION, &rule_data,
node->details->utilization, NULL,
FALSE, scheduler);
if (pe_node_attribute_raw(node, CRM_ATTR_SITE_NAME) == NULL) {
const char *site_name = pe_node_attribute_raw(node, "site-name");
if (site_name) {
g_hash_table_insert(node->details->attrs,
strdup(CRM_ATTR_SITE_NAME),
strdup(site_name));
} else if (cluster_name) {
/* Default to cluster-name if unset */
g_hash_table_insert(node->details->attrs,
strdup(CRM_ATTR_SITE_NAME),
strdup(cluster_name));
}
}
}
static GList *
extract_operations(const char *node, const char *rsc, xmlNode * rsc_entry, gboolean active_filter)
{
int counter = -1;
int stop_index = -1;
int start_index = -1;
xmlNode *rsc_op = NULL;
GList *gIter = NULL;
GList *op_list = NULL;
GList *sorted_op_list = NULL;
/* extract operations */
op_list = NULL;
sorted_op_list = NULL;
for (rsc_op = pcmk__xe_first_child(rsc_entry);
rsc_op != NULL; rsc_op = pcmk__xe_next(rsc_op)) {
if (pcmk__str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP,
pcmk__str_none)) {
crm_xml_add(rsc_op, "resource", rsc);
crm_xml_add(rsc_op, XML_ATTR_UNAME, node);
op_list = g_list_prepend(op_list, rsc_op);
}
}
if (op_list == NULL) {
/* if there are no operations, there is nothing to do */
return NULL;
}
sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
/* create active recurring operations as optional */
if (active_filter == FALSE) {
return sorted_op_list;
}
op_list = NULL;
calculate_active_ops(sorted_op_list, &start_index, &stop_index);
for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
xmlNode *rsc_op = (xmlNode *) gIter->data;
counter++;
if (start_index < stop_index) {
crm_trace("Skipping %s: not active", ID(rsc_entry));
break;
} else if (counter < start_index) {
crm_trace("Skipping %s: old", ID(rsc_op));
continue;
}
op_list = g_list_append(op_list, rsc_op);
}
g_list_free(sorted_op_list);
return op_list;
}
GList *
find_operations(const char *rsc, const char *node, gboolean active_filter,
pcmk_scheduler_t *scheduler)
{
GList *output = NULL;
GList *intermediate = NULL;
xmlNode *tmp = NULL;
xmlNode *status = find_xml_node(scheduler->input, XML_CIB_TAG_STATUS, TRUE);
pcmk_node_t *this_node = NULL;
xmlNode *node_state = NULL;
for (node_state = pcmk__xe_first_child(status); node_state != NULL;
node_state = pcmk__xe_next(node_state)) {
if (pcmk__str_eq((const char *)node_state->name, XML_CIB_TAG_STATE, pcmk__str_none)) {
const char *uname = crm_element_value(node_state, XML_ATTR_UNAME);
if (node != NULL && !pcmk__str_eq(uname, node, pcmk__str_casei)) {
continue;
}
this_node = pe_find_node(scheduler->nodes, uname);
if(this_node == NULL) {
CRM_LOG_ASSERT(this_node != NULL);
continue;
} else if (pe__is_guest_or_remote_node(this_node)) {
determine_remote_online_status(scheduler, this_node);
} else {
determine_online_status(node_state, this_node, scheduler);
}
if (this_node->details->online
|| pcmk_is_set(scheduler->flags, pcmk_sched_fencing_enabled)) {
/* offline nodes run no resources...
* unless stonith is enabled in which case we need to
* make sure rsc start events happen after the stonith
*/
xmlNode *lrm_rsc = NULL;
tmp = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE);
tmp = find_xml_node(tmp, XML_LRM_TAG_RESOURCES, FALSE);
for (lrm_rsc = pcmk__xe_first_child(tmp); lrm_rsc != NULL;
lrm_rsc = pcmk__xe_next(lrm_rsc)) {
if (pcmk__str_eq((const char *)lrm_rsc->name,
XML_LRM_TAG_RESOURCE, pcmk__str_none)) {
const char *rsc_id = crm_element_value(lrm_rsc, XML_ATTR_ID);
if (rsc != NULL && !pcmk__str_eq(rsc_id, rsc, pcmk__str_casei)) {
continue;
}
intermediate = extract_operations(uname, rsc_id, lrm_rsc, active_filter);
output = g_list_concat(output, intermediate);
}
}
}
}
}
return output;
}
diff --git a/tools/crm_mon.c b/tools/crm_mon.c
index f12923e3be..39791f60e0 100644
--- a/tools/crm_mon.c
+++ b/tools/crm_mon.c
@@ -1,2287 +1,2287 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <crm/crm.h>
#include <stdint.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <stdlib.h>
#include <errno.h>
#include <fcntl.h>
#include <libgen.h>
#include <signal.h>
#include <sys/utsname.h>
#include <crm/msg_xml.h>
#include <crm/services.h>
#include <crm/lrmd.h>
#include <crm/common/cmdline_internal.h>
#include <crm/common/internal.h> // pcmk__ends_with_ext()
#include <crm/common/ipc.h>
#include <crm/common/mainloop.h>
#include <crm/common/output.h>
#include <crm/common/output_internal.h>
#include <crm/common/results.h>
#include <crm/common/util.h>
#include <crm/common/xml.h>
#include <crm/common/xml_internal.h>
#include <crm/cib/internal.h>
#include <crm/pengine/status.h>
#include <crm/pengine/internal.h>
#include <pacemaker-internal.h>
#include <crm/stonith-ng.h>
#include <crm/fencing/internal.h>
#include "crm_mon.h"
#define SUMMARY "Provides a summary of cluster's current state.\n\n" \
"Outputs varying levels of detail in a number of different formats."
/*
* Definitions indicating which items to print
*/
static uint32_t show;
static uint32_t show_opts = pcmk_show_pending;
/*
* Definitions indicating how to output
*/
static mon_output_format_t output_format = mon_output_unset;
/* other globals */
static GIOChannel *io_channel = NULL;
static GMainLoop *mainloop = NULL;
static guint reconnect_timer = 0;
static mainloop_timer_t *refresh_timer = NULL;
static enum pcmk_pacemakerd_state pcmkd_state = pcmk_pacemakerd_state_invalid;
static cib_t *cib = NULL;
static stonith_t *st = NULL;
static xmlNode *current_cib = NULL;
static GError *error = NULL;
static pcmk__common_args_t *args = NULL;
static pcmk__output_t *out = NULL;
static GOptionContext *context = NULL;
static gchar **processed_args = NULL;
static time_t last_refresh = 0;
volatile crm_trigger_t *refresh_trigger = NULL;
static enum pcmk__fence_history fence_history = pcmk__fence_history_none;
int interactive_fence_level = 0;
static pcmk__supported_format_t formats[] = {
#if CURSES_ENABLED
CRM_MON_SUPPORTED_FORMAT_CURSES,
#endif
PCMK__SUPPORTED_FORMAT_HTML,
PCMK__SUPPORTED_FORMAT_NONE,
PCMK__SUPPORTED_FORMAT_TEXT,
PCMK__SUPPORTED_FORMAT_XML,
{ NULL, NULL, NULL }
};
PCMK__OUTPUT_ARGS("crm-mon-disconnected", "const char *",
"enum pcmk_pacemakerd_state")
static int
crm_mon_disconnected_default(pcmk__output_t *out, va_list args)
{
return pcmk_rc_no_output;
}
PCMK__OUTPUT_ARGS("crm-mon-disconnected", "const char *",
"enum pcmk_pacemakerd_state")
static int
crm_mon_disconnected_html(pcmk__output_t *out, va_list args)
{
const char *desc = va_arg(args, const char *);
enum pcmk_pacemakerd_state state =
(enum pcmk_pacemakerd_state) va_arg(args, int);
if (out->dest != stdout) {
out->reset(out);
}
pcmk__output_create_xml_text_node(out, "span", "Not connected to CIB");
if (desc != NULL) {
pcmk__output_create_xml_text_node(out, "span", ": ");
pcmk__output_create_xml_text_node(out, "span", desc);
}
if (state != pcmk_pacemakerd_state_invalid) {
const char *state_s = pcmk__pcmkd_state_enum2friendly(state);
pcmk__output_create_xml_text_node(out, "span", " (");
pcmk__output_create_xml_text_node(out, "span", state_s);
pcmk__output_create_xml_text_node(out, "span", ")");
}
out->finish(out, CRM_EX_DISCONNECT, true, NULL);
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("crm-mon-disconnected", "const char *",
"enum pcmk_pacemakerd_state")
static int
crm_mon_disconnected_text(pcmk__output_t *out, va_list args)
{
const char *desc = va_arg(args, const char *);
enum pcmk_pacemakerd_state state =
(enum pcmk_pacemakerd_state) va_arg(args, int);
int rc = pcmk_rc_ok;
if (out->dest != stdout) {
out->reset(out);
}
if (state != pcmk_pacemakerd_state_invalid) {
rc = out->info(out, "Not connected to CIB%s%s (%s)",
(desc != NULL)? ": " : "", pcmk__s(desc, ""),
pcmk__pcmkd_state_enum2friendly(state));
} else {
rc = out->info(out, "Not connected to CIB%s%s",
(desc != NULL)? ": " : "", pcmk__s(desc, ""));
}
out->finish(out, CRM_EX_DISCONNECT, true, NULL);
return rc;
}
PCMK__OUTPUT_ARGS("crm-mon-disconnected", "const char *",
"enum pcmk_pacemakerd_state")
static int
crm_mon_disconnected_xml(pcmk__output_t *out, va_list args)
{
const char *desc = va_arg(args, const char *);
enum pcmk_pacemakerd_state state =
(enum pcmk_pacemakerd_state) va_arg(args, int);
const char *state_s = NULL;
if (out->dest != stdout) {
out->reset(out);
}
if (state != pcmk_pacemakerd_state_invalid) {
state_s = pcmk_pacemakerd_api_daemon_state_enum2text(state);
}
pcmk__output_create_xml_node(out, "crm-mon-disconnected",
XML_ATTR_DESC, desc,
"pacemakerd-state", state_s,
NULL);
out->finish(out, CRM_EX_DISCONNECT, true, NULL);
return pcmk_rc_ok;
}
static pcmk__message_entry_t fmt_functions[] = {
{ "crm-mon-disconnected", "default", crm_mon_disconnected_default },
{ "crm-mon-disconnected", "html", crm_mon_disconnected_html },
{ "crm-mon-disconnected", "text", crm_mon_disconnected_text },
{ "crm-mon-disconnected", "xml", crm_mon_disconnected_xml },
{ NULL, NULL, NULL },
};
/* Define exit codes for monitoring-compatible output
* For nagios plugins, the possibilities are
* OK=0, WARN=1, CRIT=2, and UNKNOWN=3
*/
#define MON_STATUS_WARN CRM_EX_ERROR
#define MON_STATUS_CRIT CRM_EX_INVALID_PARAM
#define MON_STATUS_UNKNOWN CRM_EX_UNIMPLEMENT_FEATURE
#define RECONNECT_MSECS 5000
struct {
guint reconnect_ms;
enum mon_exec_mode exec_mode;
gboolean fence_connect;
gboolean print_pending;
gboolean show_bans;
gboolean watch_fencing;
char *pid_file;
char *external_agent;
char *external_recipient;
char *neg_location_prefix;
char *only_node;
char *only_rsc;
GSList *user_includes_excludes;
GSList *includes_excludes;
} options = {
.reconnect_ms = RECONNECT_MSECS,
.exec_mode = mon_exec_unset,
.fence_connect = TRUE,
};
static crm_exit_t clean_up(crm_exit_t exit_code);
static void crm_diff_update(const char *event, xmlNode * msg);
static void clean_up_on_connection_failure(int rc);
static int mon_refresh_display(gpointer user_data);
static int setup_cib_connection(void);
static int setup_fencer_connection(void);
static int setup_api_connections(void);
static void mon_st_callback_event(stonith_t * st, stonith_event_t * e);
static void mon_st_callback_display(stonith_t * st, stonith_event_t * e);
static void refresh_after_event(gboolean data_updated, gboolean enforce);
static uint32_t
all_includes(mon_output_format_t fmt) {
if (fmt == mon_output_monitor || fmt == mon_output_plain || fmt == mon_output_console) {
return ~pcmk_section_options;
} else {
return pcmk_section_all;
}
}
static uint32_t
default_includes(mon_output_format_t fmt) {
switch (fmt) {
case mon_output_monitor:
case mon_output_plain:
case mon_output_console:
case mon_output_html:
case mon_output_cgi:
return pcmk_section_summary
|pcmk_section_nodes
|pcmk_section_resources
|pcmk_section_failures;
case mon_output_xml:
return all_includes(fmt);
default:
return 0;
}
}
struct {
const char *name;
uint32_t bit;
} sections[] = {
{ "attributes", pcmk_section_attributes },
{ "bans", pcmk_section_bans },
{ "counts", pcmk_section_counts },
{ "dc", pcmk_section_dc },
{ "failcounts", pcmk_section_failcounts },
{ "failures", pcmk_section_failures },
{ PCMK__VALUE_FENCING, pcmk_section_fencing_all },
{ "fencing-failed", pcmk_section_fence_failed },
{ "fencing-pending", pcmk_section_fence_pending },
{ "fencing-succeeded", pcmk_section_fence_worked },
{ "maint-mode", pcmk_section_maint_mode },
{ "nodes", pcmk_section_nodes },
{ "operations", pcmk_section_operations },
{ "options", pcmk_section_options },
{ "resources", pcmk_section_resources },
{ "stack", pcmk_section_stack },
{ "summary", pcmk_section_summary },
{ "tickets", pcmk_section_tickets },
{ "times", pcmk_section_times },
{ NULL }
};
static uint32_t
find_section_bit(const char *name) {
for (int i = 0; sections[i].name != NULL; i++) {
if (pcmk__str_eq(sections[i].name, name, pcmk__str_casei)) {
return sections[i].bit;
}
}
return 0;
}
static gboolean
apply_exclude(const gchar *excludes, GError **error) {
char **parts = NULL;
gboolean result = TRUE;
parts = g_strsplit(excludes, ",", 0);
for (char **s = parts; *s != NULL; s++) {
uint32_t bit = find_section_bit(*s);
if (pcmk__str_eq(*s, "all", pcmk__str_none)) {
show = 0;
} else if (pcmk__str_eq(*s, PCMK__VALUE_NONE, pcmk__str_none)) {
show = all_includes(output_format);
} else if (bit != 0) {
show &= ~bit;
} else {
g_set_error(error, PCMK__EXITC_ERROR, CRM_EX_USAGE,
"--exclude options: all, attributes, bans, counts, dc, "
"failcounts, failures, fencing, fencing-failed, "
"fencing-pending, fencing-succeeded, maint-mode, nodes, "
PCMK__VALUE_NONE ", operations, options, resources, "
"stack, summary, tickets, times");
result = FALSE;
break;
}
}
g_strfreev(parts);
return result;
}
static gboolean
apply_include(const gchar *includes, GError **error) {
char **parts = NULL;
gboolean result = TRUE;
parts = g_strsplit(includes, ",", 0);
for (char **s = parts; *s != NULL; s++) {
uint32_t bit = find_section_bit(*s);
if (pcmk__str_eq(*s, "all", pcmk__str_none)) {
show = all_includes(output_format);
} else if (pcmk__starts_with(*s, "bans")) {
show |= pcmk_section_bans;
if (options.neg_location_prefix != NULL) {
free(options.neg_location_prefix);
options.neg_location_prefix = NULL;
}
if (strlen(*s) > 4 && (*s)[4] == ':') {
options.neg_location_prefix = strdup(*s+5);
}
} else if (pcmk__str_any_of(*s, "default", "defaults", NULL)) {
show |= default_includes(output_format);
} else if (pcmk__str_eq(*s, PCMK__VALUE_NONE, pcmk__str_none)) {
show = 0;
} else if (bit != 0) {
show |= bit;
} else {
g_set_error(error, PCMK__EXITC_ERROR, CRM_EX_USAGE,
"--include options: all, attributes, bans[:PREFIX], counts, dc, "
"default, failcounts, failures, fencing, fencing-failed, "
"fencing-pending, fencing-succeeded, maint-mode, nodes, "
PCMK__VALUE_NONE ", operations, options, resources, "
"stack, summary, tickets, times");
result = FALSE;
break;
}
}
g_strfreev(parts);
return result;
}
static gboolean
apply_include_exclude(GSList *lst, GError **error) {
gboolean rc = TRUE;
GSList *node = lst;
while (node != NULL) {
char *s = node->data;
if (pcmk__starts_with(s, "--include=")) {
rc = apply_include(s+10, error);
} else if (pcmk__starts_with(s, "-I=")) {
rc = apply_include(s+3, error);
} else if (pcmk__starts_with(s, "--exclude=")) {
rc = apply_exclude(s+10, error);
} else if (pcmk__starts_with(s, "-U=")) {
rc = apply_exclude(s+3, error);
}
if (rc != TRUE) {
break;
}
node = node->next;
}
return rc;
}
static gboolean
user_include_exclude_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
char *s = crm_strdup_printf("%s=%s", option_name, optarg);
options.user_includes_excludes = g_slist_append(options.user_includes_excludes, s);
return TRUE;
}
static gboolean
include_exclude_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
char *s = crm_strdup_printf("%s=%s", option_name, optarg);
options.includes_excludes = g_slist_append(options.includes_excludes, s);
return TRUE;
}
static gboolean
as_cgi_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
pcmk__str_update(&args->output_ty, "html");
output_format = mon_output_cgi;
options.exec_mode = mon_exec_one_shot;
return TRUE;
}
static gboolean
as_html_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
pcmk__str_update(&args->output_dest, optarg);
pcmk__str_update(&args->output_ty, "html");
output_format = mon_output_html;
umask(S_IWGRP | S_IWOTH); // World-readable HTML
return TRUE;
}
static gboolean
as_simple_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
pcmk__str_update(&args->output_ty, "text");
output_format = mon_output_monitor;
options.exec_mode = mon_exec_one_shot;
return TRUE;
}
static gboolean
as_xml_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
pcmk__str_update(&args->output_ty, "xml");
output_format = mon_output_legacy_xml;
return TRUE;
}
static gboolean
fence_history_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
if (optarg == NULL) {
interactive_fence_level = 2;
} else {
pcmk__scan_min_int(optarg, &interactive_fence_level, 0);
}
switch (interactive_fence_level) {
case 3:
options.fence_connect = TRUE;
fence_history = pcmk__fence_history_full;
return include_exclude_cb("--include", PCMK__VALUE_FENCING, data,
err);
case 2:
options.fence_connect = TRUE;
fence_history = pcmk__fence_history_full;
return include_exclude_cb("--include", PCMK__VALUE_FENCING, data,
err);
case 1:
options.fence_connect = TRUE;
fence_history = pcmk__fence_history_full;
return include_exclude_cb("--include", "fencing-failed,fencing-pending", data, err);
case 0:
options.fence_connect = FALSE;
fence_history = pcmk__fence_history_none;
return include_exclude_cb("--exclude", PCMK__VALUE_FENCING, data,
err);
default:
g_set_error(err, PCMK__EXITC_ERROR, CRM_EX_INVALID_PARAM, "Fence history must be 0-3");
return FALSE;
}
}
static gboolean
group_by_node_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
show_opts |= pcmk_show_rscs_by_node;
return TRUE;
}
static gboolean
hide_headers_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
return user_include_exclude_cb("--exclude", "summary", data, err);
}
static gboolean
inactive_resources_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
show_opts |= pcmk_show_inactive_rscs;
return TRUE;
}
static gboolean
no_curses_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
pcmk__str_update(&args->output_ty, "text");
output_format = mon_output_plain;
return TRUE;
}
static gboolean
print_brief_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
show_opts |= pcmk_show_brief;
return TRUE;
}
static gboolean
print_detail_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
show_opts |= pcmk_show_details;
return TRUE;
}
static gboolean
print_description_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
show_opts |= pcmk_show_description;
return TRUE;
}
static gboolean
print_timing_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
show_opts |= pcmk_show_timing;
return user_include_exclude_cb("--include", "operations", data, err);
}
static gboolean
reconnect_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
int rc = crm_get_msec(optarg);
if (rc == -1) {
g_set_error(err, PCMK__EXITC_ERROR, CRM_EX_INVALID_PARAM, "Invalid value for -i: %s", optarg);
return FALSE;
} else {
- pcmk__parse_interval_spec(optarg, &options.reconnect_ms);
+ pcmk_parse_interval_spec(optarg, &options.reconnect_ms);
if (options.exec_mode != mon_exec_daemonized) {
// Reconnect interval applies to daemonized too, so don't override
options.exec_mode = mon_exec_update;
}
}
return TRUE;
}
/*!
* \internal
* \brief Enable one-shot mode
*
* \param[in] option_name Name of option being parsed (ignored)
* \param[in] optarg Value to be parsed (ignored)
* \param[in] data User data (ignored)
* \param[out] err Where to store error (ignored)
*/
static gboolean
one_shot_cb(const gchar *option_name, const gchar *optarg, gpointer data,
GError **err)
{
options.exec_mode = mon_exec_one_shot;
return TRUE;
}
/*!
* \internal
* \brief Enable daemonized mode
*
* \param[in] option_name Name of option being parsed (ignored)
* \param[in] optarg Value to be parsed (ignored)
* \param[in] data User data (ignored)
* \param[out] err Where to store error (ignored)
*/
static gboolean
daemonize_cb(const gchar *option_name, const gchar *optarg, gpointer data,
GError **err)
{
options.exec_mode = mon_exec_daemonized;
return TRUE;
}
static gboolean
show_attributes_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
return user_include_exclude_cb("--include", "attributes", data, err);
}
static gboolean
show_bans_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
if (optarg != NULL) {
char *s = crm_strdup_printf("bans:%s", optarg);
gboolean rc = user_include_exclude_cb("--include", s, data, err);
free(s);
return rc;
} else {
return user_include_exclude_cb("--include", "bans", data, err);
}
}
static gboolean
show_failcounts_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
return user_include_exclude_cb("--include", "failcounts", data, err);
}
static gboolean
show_operations_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
return user_include_exclude_cb("--include", "failcounts,operations", data, err);
}
static gboolean
show_tickets_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
return user_include_exclude_cb("--include", "tickets", data, err);
}
static gboolean
use_cib_file_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) {
setenv("CIB_file", optarg, 1);
options.exec_mode = mon_exec_one_shot;
return TRUE;
}
#define INDENT " "
/* *INDENT-OFF* */
static GOptionEntry addl_entries[] = {
{ "interval", 'i', 0, G_OPTION_ARG_CALLBACK, reconnect_cb,
"Update frequency (default is 5 seconds)",
"TIMESPEC" },
{ "one-shot", '1', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK,
one_shot_cb,
"Display the cluster status once and exit",
NULL },
{ "daemonize", 'd', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK,
daemonize_cb,
"Run in the background as a daemon.\n"
INDENT "Requires at least one of --output-to and --external-agent.",
NULL },
{ "pid-file", 'p', 0, G_OPTION_ARG_FILENAME, &options.pid_file,
"(Advanced) Daemon pid file location",
"FILE" },
{ "external-agent", 'E', 0, G_OPTION_ARG_FILENAME, &options.external_agent,
"A program to run when resource operations take place",
"FILE" },
{ "external-recipient", 'e', 0, G_OPTION_ARG_STRING, &options.external_recipient,
"A recipient for your program (assuming you want the program to send something to someone).",
"RCPT" },
{ "watch-fencing", 'W', 0, G_OPTION_ARG_NONE, &options.watch_fencing,
"Listen for fencing events. For use with --external-agent.",
NULL },
{ "xml-file", 'x', G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_CALLBACK, use_cib_file_cb,
NULL,
NULL },
{ NULL }
};
static GOptionEntry display_entries[] = {
{ "include", 'I', 0, G_OPTION_ARG_CALLBACK, user_include_exclude_cb,
"A list of sections to include in the output.\n"
INDENT "See `Output Control` help for more information.",
"SECTION(s)" },
{ "exclude", 'U', 0, G_OPTION_ARG_CALLBACK, user_include_exclude_cb,
"A list of sections to exclude from the output.\n"
INDENT "See `Output Control` help for more information.",
"SECTION(s)" },
{ "node", 0, 0, G_OPTION_ARG_STRING, &options.only_node,
"When displaying information about nodes, show only what's related to the given\n"
INDENT "node, or to all nodes tagged with the given tag",
"NODE" },
{ "resource", 0, 0, G_OPTION_ARG_STRING, &options.only_rsc,
"When displaying information about resources, show only what's related to the given\n"
INDENT "resource, or to all resources tagged with the given tag",
"RSC" },
{ "group-by-node", 'n', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, group_by_node_cb,
"Group resources by node",
NULL },
{ "inactive", 'r', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, inactive_resources_cb,
"Display inactive resources",
NULL },
{ "failcounts", 'f', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, show_failcounts_cb,
"Display resource fail counts",
NULL },
{ "operations", 'o', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, show_operations_cb,
"Display resource operation history",
NULL },
{ "timing-details", 't', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, print_timing_cb,
"Display resource operation history with timing details",
NULL },
{ "tickets", 'c', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, show_tickets_cb,
"Display cluster tickets",
NULL },
{ "fence-history", 'm', G_OPTION_FLAG_OPTIONAL_ARG, G_OPTION_ARG_CALLBACK, fence_history_cb,
"Show fence history:\n"
INDENT "0=off, 1=failures and pending (default without option),\n"
INDENT "2=add successes (default without value for option),\n"
INDENT "3=show full history without reduction to most recent of each flavor",
"LEVEL" },
{ "neg-locations", 'L', G_OPTION_FLAG_OPTIONAL_ARG, G_OPTION_ARG_CALLBACK, show_bans_cb,
"Display negative location constraints [optionally filtered by id prefix]",
NULL },
{ "show-node-attributes", 'A', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, show_attributes_cb,
"Display node attributes",
NULL },
{ "hide-headers", 'D', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, hide_headers_cb,
"Hide all headers",
NULL },
{ "show-detail", 'R', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, print_detail_cb,
"Show more details (node IDs, individual clone instances)",
NULL },
{ "show-description", 0, G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, print_description_cb,
"Show resource descriptions",
NULL },
{ "brief", 'b', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, print_brief_cb,
"Brief output",
NULL },
{ "pending", 'j', G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_NONE, &options.print_pending,
"Display pending state if 'record-pending' is enabled",
NULL },
{ NULL }
};
static GOptionEntry deprecated_entries[] = {
{ "as-html", 'h', G_OPTION_FLAG_FILENAME, G_OPTION_ARG_CALLBACK, as_html_cb,
"Write cluster status to the named HTML file.\n"
INDENT "Use --output-as=html --output-to=FILE instead.",
"FILE" },
{ "as-xml", 'X', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, as_xml_cb,
"Write cluster status as XML to stdout. This will enable one-shot mode.\n"
INDENT "Use --output-as=xml instead.",
NULL },
{ "simple-status", 's', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK,
as_simple_cb,
"Display the cluster status once as a simple one line output\n"
INDENT "(suitable for nagios)",
NULL },
{ "disable-ncurses", 'N', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, no_curses_cb,
"Disable the use of ncurses.\n"
INDENT "Use --output-as=text instead.",
NULL },
{ "web-cgi", 'w', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, as_cgi_cb,
"Web mode with output suitable for CGI (preselected when run as *.cgi).\n"
INDENT "Use --output-as=html --html-cgi instead.",
NULL },
{ NULL }
};
/* *INDENT-ON* */
/* Reconnect to the CIB and fencing agent after reconnect_ms has passed. This sounds
* like it would be more broadly useful, but only ever happens after a disconnect via
* mon_cib_connection_destroy.
*/
static gboolean
reconnect_after_timeout(gpointer data)
{
#if CURSES_ENABLED
if (output_format == mon_output_console) {
clear();
refresh();
}
#endif
out->transient(out, "Reconnecting...");
if (setup_api_connections() == pcmk_rc_ok) {
// Trigger redrawing the screen (needs reconnect_timer == 0)
reconnect_timer = 0;
refresh_after_event(FALSE, TRUE);
return G_SOURCE_REMOVE;
}
out->message(out, "crm-mon-disconnected",
"Latest connection attempt failed", pcmkd_state);
reconnect_timer = g_timeout_add(options.reconnect_ms,
reconnect_after_timeout, NULL);
return G_SOURCE_REMOVE;
}
/* Called from various places when we are disconnected from the CIB or from the
* fencing agent. If the CIB connection is still valid, this function will also
* attempt to sign off and reconnect.
*/
static void
mon_cib_connection_destroy(gpointer user_data)
{
const char *msg = "Connection to the cluster lost";
pcmkd_state = pcmk_pacemakerd_state_invalid;
/* No crm-mon-disconnected message for console; a working implementation
* is not currently worth the effort
*/
out->transient(out, "%s", msg);
out->message(out, "crm-mon-disconnected", msg, pcmkd_state);
if (refresh_timer != NULL) {
/* we'll trigger a refresh after reconnect */
mainloop_timer_stop(refresh_timer);
}
if (reconnect_timer) {
/* we'll trigger a new reconnect-timeout at the end */
g_source_remove(reconnect_timer);
reconnect_timer = 0;
}
/* the client API won't properly reconnect notifications if they are still
* in the table - so remove them
*/
stonith_api_delete(st);
st = NULL;
if (cib) {
cib->cmds->signoff(cib);
reconnect_timer = g_timeout_add(options.reconnect_ms,
reconnect_after_timeout, NULL);
}
}
/* Signal handler installed into the mainloop for normal program shutdown */
static void
mon_shutdown(int nsig)
{
clean_up(CRM_EX_OK);
}
#if CURSES_ENABLED
static volatile sighandler_t ncurses_winch_handler;
/* Signal handler installed the regular way (not into the main loop) for when
* the screen is resized. Commonly, this happens when running in an xterm and
* the user changes its size.
*/
static void
mon_winresize(int nsig)
{
static int not_done;
int lines = 0, cols = 0;
if (!not_done++) {
if (ncurses_winch_handler)
/* the original ncurses WINCH signal handler does the
* magic of retrieving the new window size;
* otherwise, we'd have to use ioctl or tgetent */
(*ncurses_winch_handler) (SIGWINCH);
getmaxyx(stdscr, lines, cols);
resizeterm(lines, cols);
/* Alert the mainloop code we'd like the refresh_trigger to run next
* time the mainloop gets around to checking.
*/
mainloop_set_trigger((crm_trigger_t *) refresh_trigger);
}
not_done--;
}
#endif
static int
setup_fencer_connection(void)
{
int rc = pcmk_ok;
if (options.fence_connect && st == NULL) {
st = stonith_api_new();
}
if (!options.fence_connect || st == NULL || st->state != stonith_disconnected) {
return rc;
}
rc = st->cmds->connect(st, crm_system_name, NULL);
if (rc == pcmk_ok) {
crm_trace("Setting up stonith callbacks");
if (options.watch_fencing) {
st->cmds->register_notification(st, T_STONITH_NOTIFY_DISCONNECT,
mon_st_callback_event);
st->cmds->register_notification(st, T_STONITH_NOTIFY_FENCE, mon_st_callback_event);
} else {
st->cmds->register_notification(st, T_STONITH_NOTIFY_DISCONNECT,
mon_st_callback_display);
st->cmds->register_notification(st, T_STONITH_NOTIFY_HISTORY, mon_st_callback_display);
}
} else {
stonith_api_delete(st);
st = NULL;
}
return rc;
}
static int
setup_cib_connection(void)
{
int rc = pcmk_rc_ok;
CRM_CHECK(cib != NULL, return EINVAL);
if (cib->state != cib_disconnected) {
// Already connected with notifications registered for CIB updates
return rc;
}
rc = cib__signon_query(out, &cib, &current_cib);
if (rc == pcmk_rc_ok) {
rc = pcmk_legacy2rc(cib->cmds->set_connection_dnotify(cib,
mon_cib_connection_destroy));
if (rc == EPROTONOSUPPORT) {
out->err(out,
"CIB client does not support connection loss "
"notifications; crm_mon will be unable to reconnect after "
"connection loss");
rc = pcmk_rc_ok;
}
if (rc == pcmk_rc_ok) {
cib->cmds->del_notify_callback(cib, T_CIB_DIFF_NOTIFY,
crm_diff_update);
rc = pcmk_legacy2rc(cib->cmds->add_notify_callback(cib,
T_CIB_DIFF_NOTIFY, crm_diff_update));
}
if (rc != pcmk_rc_ok) {
if (rc == EPROTONOSUPPORT) {
out->err(out,
"CIB client does not support CIB diff "
"notifications");
} else {
out->err(out, "CIB diff notification setup failed");
}
out->err(out, "Cannot monitor CIB changes; exiting");
cib__clean_up_connection(&cib);
stonith_api_delete(st);
st = NULL;
}
}
return rc;
}
/* This is used to set up the fencing options after the interactive UI has been stared.
* fence_history_cb can't be used because it builds up a list of includes/excludes that
* then have to be processed with apply_include_exclude and that could affect other
* things.
*/
static void
set_fencing_options(int level)
{
switch (level) {
case 3:
options.fence_connect = TRUE;
fence_history = pcmk__fence_history_full;
show |= pcmk_section_fencing_all;
break;
case 2:
options.fence_connect = TRUE;
fence_history = pcmk__fence_history_full;
show |= pcmk_section_fencing_all;
break;
case 1:
options.fence_connect = TRUE;
fence_history = pcmk__fence_history_full;
show |= pcmk_section_fence_failed | pcmk_section_fence_pending;
break;
default:
interactive_fence_level = 0;
options.fence_connect = FALSE;
fence_history = pcmk__fence_history_none;
show &= ~pcmk_section_fencing_all;
break;
}
}
static int
setup_api_connections(void)
{
int rc = pcmk_rc_ok;
CRM_CHECK(cib != NULL, return EINVAL);
if (cib->state != cib_disconnected) {
return rc;
}
if (cib->variant == cib_native) {
rc = pcmk__pacemakerd_status(out, crm_system_name,
options.reconnect_ms / 2, false,
&pcmkd_state);
if (rc != pcmk_rc_ok) {
return rc;
}
switch (pcmkd_state) {
case pcmk_pacemakerd_state_running:
case pcmk_pacemakerd_state_remote:
case pcmk_pacemakerd_state_shutting_down:
/* Fencer and CIB may still be available while shutting down or
* running on a Pacemaker Remote node
*/
break;
default:
// Fencer and CIB are definitely unavailable
return ENOTCONN;
}
setup_fencer_connection();
}
rc = setup_cib_connection();
return rc;
}
#if CURSES_ENABLED
static const char *
get_option_desc(char c)
{
const char *desc = "No help available";
for (GOptionEntry *entry = display_entries; entry != NULL; entry++) {
if (entry->short_name == c) {
desc = entry->description;
break;
}
}
return desc;
}
#define print_option_help(out, option, condition) \
curses_formatted_printf(out, "%c %c: \t%s\n", ((condition)? '*': ' '), option, get_option_desc(option));
/* This function is called from the main loop when there is something to be read
* on stdin, like an interactive user's keystroke. All it does is read the keystroke,
* set flags (or show the page showing which keystrokes are valid), and redraw the
* screen. It does not do anything with connections to the CIB or fencing agent
* agent what would happen in mon_refresh_display.
*/
static gboolean
detect_user_input(GIOChannel *channel, GIOCondition condition, gpointer user_data)
{
int c;
gboolean config_mode = FALSE;
while (1) {
/* Get user input */
c = getchar();
switch (c) {
case 'm':
interactive_fence_level++;
if (interactive_fence_level > 3) {
interactive_fence_level = 0;
}
set_fencing_options(interactive_fence_level);
break;
case 'c':
show ^= pcmk_section_tickets;
break;
case 'f':
show ^= pcmk_section_failcounts;
break;
case 'n':
show_opts ^= pcmk_show_rscs_by_node;
break;
case 'o':
show ^= pcmk_section_operations;
if (!pcmk_is_set(show, pcmk_section_operations)) {
show_opts &= ~pcmk_show_timing;
}
break;
case 'r':
show_opts ^= pcmk_show_inactive_rscs;
break;
case 'R':
show_opts ^= pcmk_show_details;
#ifdef PCMK__COMPAT_2_0
// Keep failed action output the same as 2.0.x
show_opts |= pcmk_show_failed_detail;
#endif
break;
case 't':
show_opts ^= pcmk_show_timing;
if (pcmk_is_set(show_opts, pcmk_show_timing)) {
show |= pcmk_section_operations;
}
break;
case 'A':
show ^= pcmk_section_attributes;
break;
case 'L':
show ^= pcmk_section_bans;
break;
case 'D':
/* If any header is shown, clear them all, otherwise set them all */
if (pcmk_any_flags_set(show, pcmk_section_summary)) {
show &= ~pcmk_section_summary;
} else {
show |= pcmk_section_summary;
}
/* Regardless, we don't show options in console mode. */
show &= ~pcmk_section_options;
break;
case 'b':
show_opts ^= pcmk_show_brief;
break;
case 'j':
show_opts ^= pcmk_show_pending;
break;
case '?':
config_mode = TRUE;
break;
default:
/* All other keys just redraw the screen. */
goto refresh;
}
if (!config_mode)
goto refresh;
clear();
refresh();
curses_formatted_printf(out, "%s", "Display option change mode\n");
print_option_help(out, 'c', pcmk_is_set(show, pcmk_section_tickets));
print_option_help(out, 'f', pcmk_is_set(show, pcmk_section_failcounts));
print_option_help(out, 'n', pcmk_is_set(show_opts, pcmk_show_rscs_by_node));
print_option_help(out, 'o', pcmk_is_set(show, pcmk_section_operations));
print_option_help(out, 'r', pcmk_is_set(show_opts, pcmk_show_inactive_rscs));
print_option_help(out, 't', pcmk_is_set(show_opts, pcmk_show_timing));
print_option_help(out, 'A', pcmk_is_set(show, pcmk_section_attributes));
print_option_help(out, 'L', pcmk_is_set(show, pcmk_section_bans));
print_option_help(out, 'D', !pcmk_is_set(show, pcmk_section_summary));
#ifdef PCMK__COMPAT_2_0
print_option_help(out, 'R', pcmk_any_flags_set(show_opts, pcmk_show_details & ~pcmk_show_failed_detail));
#else
print_option_help(out, 'R', pcmk_any_flags_set(show_opts, pcmk_show_details));
#endif
print_option_help(out, 'b', pcmk_is_set(show_opts, pcmk_show_brief));
print_option_help(out, 'j', pcmk_is_set(show_opts, pcmk_show_pending));
curses_formatted_printf(out, "%d m: \t%s\n", interactive_fence_level, get_option_desc('m'));
curses_formatted_printf(out, "%s", "\nToggle fields via field letter, type any other key to return\n");
}
refresh:
refresh_after_event(FALSE, TRUE);
return TRUE;
}
#endif // CURSES_ENABLED
// Basically crm_signal_handler(SIGCHLD, SIG_IGN) plus the SA_NOCLDWAIT flag
static void
avoid_zombies(void)
{
struct sigaction sa;
memset(&sa, 0, sizeof(struct sigaction));
if (sigemptyset(&sa.sa_mask) < 0) {
crm_warn("Cannot avoid zombies: %s", pcmk_rc_str(errno));
return;
}
sa.sa_handler = SIG_IGN;
sa.sa_flags = SA_RESTART|SA_NOCLDWAIT;
if (sigaction(SIGCHLD, &sa, NULL) < 0) {
crm_warn("Cannot avoid zombies: %s", pcmk_rc_str(errno));
}
}
static GOptionContext *
build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) {
GOptionContext *context = NULL;
GOptionEntry extra_prog_entries[] = {
{ "quiet", 'Q', 0, G_OPTION_ARG_NONE, &(args->quiet),
"Be less descriptive in output.",
NULL },
{ NULL }
};
#if CURSES_ENABLED
const char *fmts = "console (default), html, text, xml, none";
#else
const char *fmts = "text (default), html, xml, none";
#endif // CURSES_ENABLED
const char *desc = NULL;
desc = "Notes:\n\n"
"If this program is called as crm_mon.cgi, --output-as=html and\n"
"--html-cgi are automatically added to the command line\n"
"arguments.\n\n"
"Time Specification:\n\n"
"The TIMESPEC in any command line option can be specified in many\n"
"different formats. It can be an integer number of seconds, a\n"
"number plus units (us/usec/ms/msec/s/sec/m/min/h/hr), or an ISO\n"
"8601 period specification.\n\n"
"Output Control:\n\n"
"By default, a particular set of sections are written to the\n"
"output destination. The default varies based on the output\n"
"format: XML includes all sections by default, while other output\n"
"formats include less. This set can be modified with the --include\n"
"and --exclude command line options. Each option may be passed\n"
"multiple times, and each can specify a comma-separated list of\n"
"sections. The options are applied to the default set, in order\n"
"from left to right as they are passed on the command line. For a\n"
"list of valid sections, pass --include=list or --exclude=list.\n\n"
"Interactive Use:\n\n"
#if CURSES_ENABLED
"When run interactively, crm_mon can be told to hide and show\n"
"various sections of output. To see a help screen explaining the\n"
"options, press '?'. Any key stroke aside from those listed will\n"
"cause the screen to refresh.\n\n"
#else
"The local installation of Pacemaker was built without support for\n"
"interactive (console) mode. A curses library must be available at\n"
"build time to support interactive mode.\n\n"
#endif // CURSES_ENABLED
"Examples:\n\n"
#if CURSES_ENABLED
"Display the cluster status on the console with updates as they\n"
"occur:\n\n"
"\tcrm_mon\n\n"
#endif // CURSES_ENABLED
"Display the cluster status once and exit:\n\n"
"\tcrm_mon -1\n\n"
"Display the cluster status, group resources by node, and include\n"
"inactive resources in the list:\n\n"
"\tcrm_mon --group-by-node --inactive\n\n"
"Start crm_mon as a background daemon and have it write the\n"
"cluster status to an HTML file:\n\n"
"\tcrm_mon --daemonize --output-as html "
"--output-to /path/to/docroot/filename.html\n\n"
"Display the cluster status as XML:\n\n"
"\tcrm_mon --output-as xml\n\n";
context = pcmk__build_arg_context(args, fmts, group, NULL);
pcmk__add_main_args(context, extra_prog_entries);
g_option_context_set_description(context, desc);
pcmk__add_arg_group(context, "display", "Display Options:",
"Show display options", display_entries);
pcmk__add_arg_group(context, "additional", "Additional Options:",
"Show additional options", addl_entries);
pcmk__add_arg_group(context, "deprecated", "Deprecated Options:",
"Show deprecated options", deprecated_entries);
return context;
}
/* If certain format options were specified, we want to set some extra
* options. We can just process these like they were given on the
* command line.
*/
static void
add_output_args(void) {
GError *err = NULL;
if (output_format == mon_output_plain) {
if (!pcmk__force_args(context, &err, "%s --text-fancy", g_get_prgname())) {
g_propagate_error(&error, err);
clean_up(CRM_EX_USAGE);
}
} else if (output_format == mon_output_cgi) {
if (!pcmk__force_args(context, &err, "%s --html-cgi", g_get_prgname())) {
g_propagate_error(&error, err);
clean_up(CRM_EX_USAGE);
}
} else if (output_format == mon_output_xml) {
if (!pcmk__force_args(context, &err, "%s --xml-simple-list --xml-substitute", g_get_prgname())) {
g_propagate_error(&error, err);
clean_up(CRM_EX_USAGE);
}
} else if (output_format == mon_output_legacy_xml) {
output_format = mon_output_xml;
if (!pcmk__force_args(context, &err, "%s --xml-legacy --xml-substitute", g_get_prgname())) {
g_propagate_error(&error, err);
clean_up(CRM_EX_USAGE);
}
}
}
/*!
* \internal
* \brief Set output format based on \p --output-as arguments and mode arguments
*
* When the deprecated output format arguments (\p --as-cgi, \p --as-html,
* \p --simple-status, \p --as-xml) are parsed, callback functions set
* \p output_format (and the umask if appropriate). If none of the deprecated
* arguments were specified, this function does the same based on the current
* \p --output-as arguments and the \p --one-shot and \p --daemonize arguments.
*
* \param[in,out] args Command line arguments
*/
static void
reconcile_output_format(pcmk__common_args_t *args)
{
if (output_format != mon_output_unset) {
/* One of the deprecated arguments was used, and we're finished. Note
* that this means the deprecated arguments take precedence.
*/
return;
}
if (pcmk__str_eq(args->output_ty, "none", pcmk__str_none)) {
output_format = mon_output_none;
} else if (pcmk__str_eq(args->output_ty, "html", pcmk__str_none)) {
output_format = mon_output_html;
umask(S_IWGRP | S_IWOTH); // World-readable HTML
} else if (pcmk__str_eq(args->output_ty, "xml", pcmk__str_none)) {
output_format = mon_output_xml;
#if CURSES_ENABLED
} else if (pcmk__str_eq(args->output_ty, "console",
pcmk__str_null_matches)) {
/* Console is the default format if no conflicting options are given.
*
* Use text output instead if one of the following conditions is met:
* * We've requested daemonized or one-shot mode (console output is
* incompatible with modes other than mon_exec_update)
* * We requested the version, which is effectively one-shot
* * We specified a non-stdout output destination (console mode is
* compatible only with stdout)
*/
if ((options.exec_mode == mon_exec_daemonized)
|| (options.exec_mode == mon_exec_one_shot)
|| args->version
|| !pcmk__str_eq(args->output_dest, "-", pcmk__str_null_matches)) {
pcmk__str_update(&args->output_ty, "text");
output_format = mon_output_plain;
} else {
pcmk__str_update(&args->output_ty, "console");
output_format = mon_output_console;
crm_enable_stderr(FALSE);
}
#endif // CURSES_ENABLED
} else if (pcmk__str_eq(args->output_ty, "text", pcmk__str_null_matches)) {
/* Text output was explicitly requested, or it's the default because
* curses is not enabled
*/
pcmk__str_update(&args->output_ty, "text");
output_format = mon_output_plain;
}
// Otherwise, invalid format. Let pcmk__output_new() throw an error.
}
/*!
* \internal
* \brief Set execution mode to the output format's default if appropriate
*
* \param[in,out] args Command line arguments
*/
static void
set_default_exec_mode(const pcmk__common_args_t *args)
{
if (output_format == mon_output_console) {
/* Update is the only valid mode for console, but set here instead of
* reconcile_output_format() for isolation and consistency
*/
options.exec_mode = mon_exec_update;
} else if (options.exec_mode == mon_exec_unset) {
// Default to one-shot mode for all other formats
options.exec_mode = mon_exec_one_shot;
} else if ((options.exec_mode == mon_exec_update)
&& pcmk__str_eq(args->output_dest, "-",
pcmk__str_null_matches)) {
// If not using console format, update mode cannot be used with stdout
options.exec_mode = mon_exec_one_shot;
}
}
static void
clean_up_on_connection_failure(int rc)
{
if (output_format == mon_output_monitor) {
g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "CLUSTER CRIT: Connection to cluster failed: %s",
pcmk_rc_str(rc));
clean_up(MON_STATUS_CRIT);
} else if (rc == ENOTCONN) {
if (pcmkd_state == pcmk_pacemakerd_state_remote) {
g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Error: remote-node not connected to cluster");
} else {
g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Error: cluster is not available on this node");
}
} else {
g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Connection to cluster failed: %s", pcmk_rc_str(rc));
}
clean_up(pcmk_rc2exitc(rc));
}
static void
one_shot(void)
{
int rc = pcmk__status(out, cib, fence_history, show, show_opts,
options.only_node, options.only_rsc,
options.neg_location_prefix,
output_format == mon_output_monitor, 0);
if (rc == pcmk_rc_ok) {
clean_up(pcmk_rc2exitc(rc));
} else {
clean_up_on_connection_failure(rc);
}
}
static void
exit_on_invalid_cib(void)
{
if (cib != NULL) {
return;
}
// Shouldn't really be possible
g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Invalid CIB source");
clean_up(CRM_EX_ERROR);
}
int
main(int argc, char **argv)
{
int rc = pcmk_rc_ok;
GOptionGroup *output_group = NULL;
args = pcmk__new_common_args(SUMMARY);
context = build_arg_context(args, &output_group);
pcmk__register_formats(output_group, formats);
options.pid_file = strdup("/tmp/ClusterMon.pid");
pcmk__cli_init_logging("crm_mon", 0);
// Avoid needing to wait for subprocesses forked for -E/--external-agent
avoid_zombies();
if (pcmk__ends_with_ext(argv[0], ".cgi")) {
output_format = mon_output_cgi;
options.exec_mode = mon_exec_one_shot;
}
processed_args = pcmk__cmdline_preproc(argv, "ehimpxEILU");
fence_history_cb("--fence-history", "1", NULL, NULL);
/* Set an HTML title regardless of what format we will eventually use. This can't
* be done in add_output_args. That function is called after command line
* arguments are processed in the next block, which means it'll override whatever
* title the user provides. Doing this here means the user can give their own
* title on the command line.
*/
if (!pcmk__force_args(context, &error, "%s --html-title \"Cluster Status\"",
g_get_prgname())) {
return clean_up(CRM_EX_USAGE);
}
if (!g_option_context_parse_strv(context, &processed_args, &error)) {
return clean_up(CRM_EX_USAGE);
}
for (int i = 0; i < args->verbosity; i++) {
crm_bump_log_level(argc, argv);
}
if (!args->version) {
if (args->quiet) {
include_exclude_cb("--exclude", "times", NULL, NULL);
}
if (options.watch_fencing) {
fence_history_cb("--fence-history", "0", NULL, NULL);
options.fence_connect = TRUE;
}
/* create the cib-object early to be able to do further
* decisions based on the cib-source
*/
cib = cib_new();
exit_on_invalid_cib();
switch (cib->variant) {
case cib_native:
// Everything (fencer, CIB, pcmkd status) should be available
break;
case cib_file:
// Live fence history is not meaningful
fence_history_cb("--fence-history", "0", NULL, NULL);
/* Notifications are unsupported; nothing to monitor
* @COMPAT: Let setup_cib_connection() handle this by exiting?
*/
options.exec_mode = mon_exec_one_shot;
break;
case cib_remote:
// We won't receive any fencing updates
fence_history_cb("--fence-history", "0", NULL, NULL);
break;
default:
/* something is odd */
exit_on_invalid_cib();
break;
}
if ((options.exec_mode == mon_exec_daemonized)
&& !options.external_agent
&& pcmk__str_eq(args->output_dest, "-", pcmk__str_null_matches)) {
g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE,
"--daemonize requires at least one of --output-to "
"(with value not set to '-') and --external-agent");
return clean_up(CRM_EX_USAGE);
}
}
reconcile_output_format(args);
set_default_exec_mode(args);
add_output_args();
/* output_format MUST NOT BE CHANGED AFTER THIS POINT. */
rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv);
if (rc != pcmk_rc_ok) {
g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Error creating output format %s: %s",
args->output_ty, pcmk_rc_str(rc));
return clean_up(CRM_EX_ERROR);
}
/* If we had a valid format for pcmk__output_new(), output_format should be
* set by now.
*/
CRM_ASSERT(output_format != mon_output_unset);
if (options.exec_mode == mon_exec_daemonized) {
if (!options.external_agent && (output_format == mon_output_none)) {
g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE,
"--daemonize requires --external-agent if used with "
"--output-as=none");
return clean_up(CRM_EX_USAGE);
}
crm_enable_stderr(FALSE);
cib_delete(cib);
cib = NULL;
pcmk__daemonize(crm_system_name, options.pid_file);
cib = cib_new();
exit_on_invalid_cib();
}
show = default_includes(output_format);
/* Apply --include/--exclude flags we used internally. There's no error reporting
* here because this would be a programming error.
*/
apply_include_exclude(options.includes_excludes, &error);
/* And now apply any --include/--exclude flags the user gave on the command line.
* These are done in a separate pass from the internal ones because we want to
* make sure whatever the user specifies overrides whatever we do.
*/
if (!apply_include_exclude(options.user_includes_excludes, &error)) {
return clean_up(CRM_EX_USAGE);
}
/* Sync up the initial value of interactive_fence_level with whatever was set with
* --include/--exclude= options.
*/
if (pcmk_all_flags_set(show, pcmk_section_fencing_all)) {
interactive_fence_level = 3;
} else if (pcmk_is_set(show, pcmk_section_fence_worked)) {
interactive_fence_level = 2;
} else if (pcmk_any_flags_set(show, pcmk_section_fence_failed | pcmk_section_fence_pending)) {
interactive_fence_level = 1;
} else {
interactive_fence_level = 0;
}
pcmk__register_lib_messages(out);
crm_mon_register_messages(out);
pe__register_messages(out);
stonith__register_messages(out);
// Messages internal to this file, nothing curses-specific
pcmk__register_messages(out, fmt_functions);
if (args->version) {
out->version(out, false);
return clean_up(CRM_EX_OK);
}
/* Extra sanity checks when in CGI mode */
if (output_format == mon_output_cgi) {
if (cib->variant == cib_file) {
g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE, "CGI mode used with CIB file");
return clean_up(CRM_EX_USAGE);
} else if (options.external_agent != NULL) {
g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE, "CGI mode cannot be used with --external-agent");
return clean_up(CRM_EX_USAGE);
} else if (options.exec_mode == mon_exec_daemonized) {
g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE, "CGI mode cannot be used with -d");
return clean_up(CRM_EX_USAGE);
}
}
if (output_format == mon_output_xml) {
show_opts |= pcmk_show_inactive_rscs | pcmk_show_timing;
}
if ((output_format == mon_output_html || output_format == mon_output_cgi) &&
out->dest != stdout) {
pcmk__html_add_header("meta", "http-equiv", "refresh", "content",
pcmk__itoa(options.reconnect_ms / 1000), NULL);
}
#ifdef PCMK__COMPAT_2_0
// Keep failed action output the same as 2.0.x
show_opts |= pcmk_show_failed_detail;
#endif
crm_info("Starting %s", crm_system_name);
cib__set_output(cib, out);
if (options.exec_mode == mon_exec_one_shot) {
one_shot();
}
out->message(out, "crm-mon-disconnected",
"Waiting for initial connection", pcmkd_state);
do {
out->transient(out, "Connecting to cluster...");
rc = setup_api_connections();
if (rc != pcmk_rc_ok) {
if ((rc == ENOTCONN) || (rc == ECONNREFUSED)) {
out->transient(out, "Connection failed. Retrying in %ums...",
options.reconnect_ms);
}
// Give some time to view all output even if we won't retry
pcmk__sleep_ms(options.reconnect_ms);
#if CURSES_ENABLED
if (output_format == mon_output_console) {
clear();
refresh();
}
#endif
}
} while ((rc == ENOTCONN) || (rc == ECONNREFUSED));
if (rc != pcmk_rc_ok) {
clean_up_on_connection_failure(rc);
}
set_fencing_options(interactive_fence_level);
mon_refresh_display(NULL);
mainloop = g_main_loop_new(NULL, FALSE);
mainloop_add_signal(SIGTERM, mon_shutdown);
mainloop_add_signal(SIGINT, mon_shutdown);
#if CURSES_ENABLED
if (output_format == mon_output_console) {
ncurses_winch_handler = crm_signal_handler(SIGWINCH, mon_winresize);
if (ncurses_winch_handler == SIG_DFL ||
ncurses_winch_handler == SIG_IGN || ncurses_winch_handler == SIG_ERR)
ncurses_winch_handler = NULL;
io_channel = g_io_channel_unix_new(STDIN_FILENO);
g_io_add_watch(io_channel, G_IO_IN, detect_user_input, NULL);
}
#endif
/* When refresh_trigger->trigger is set to TRUE, call mon_refresh_display. In
* this file, that is anywhere mainloop_set_trigger is called.
*/
refresh_trigger = mainloop_add_trigger(G_PRIORITY_LOW, mon_refresh_display, NULL);
g_main_loop_run(mainloop);
g_main_loop_unref(mainloop);
if (io_channel != NULL) {
g_io_channel_shutdown(io_channel, TRUE, NULL);
}
crm_info("Exiting %s", crm_system_name);
return clean_up(CRM_EX_OK);
}
static int
send_custom_trap(const char *node, const char *rsc, const char *task, int target_rc, int rc,
int status, const char *desc)
{
pid_t pid;
/*setenv needs chars, these are ints */
char *rc_s = pcmk__itoa(rc);
char *status_s = pcmk__itoa(status);
char *target_rc_s = pcmk__itoa(target_rc);
crm_debug("Sending external notification to '%s' via '%s'", options.external_recipient, options.external_agent);
if(rsc) {
setenv("CRM_notify_rsc", rsc, 1);
}
if (options.external_recipient) {
setenv("CRM_notify_recipient", options.external_recipient, 1);
}
setenv("CRM_notify_node", node, 1);
setenv("CRM_notify_task", task, 1);
setenv("CRM_notify_desc", desc, 1);
setenv("CRM_notify_rc", rc_s, 1);
setenv("CRM_notify_target_rc", target_rc_s, 1);
setenv("CRM_notify_status", status_s, 1);
pid = fork();
if (pid == -1) {
out->err(out, "notification fork() failed: %s", strerror(errno));
}
if (pid == 0) {
/* crm_debug("notification: I am the child. Executing the nofitication program."); */
execl(options.external_agent, options.external_agent, NULL);
exit(CRM_EX_ERROR);
}
crm_trace("Finished running custom notification program '%s'.", options.external_agent);
free(target_rc_s);
free(status_s);
free(rc_s);
return 0;
}
static int
handle_rsc_op(xmlNode *xml, void *userdata)
{
const char *node_id = (const char *) userdata;
int rc = -1;
int status = -1;
int target_rc = -1;
gboolean notify = TRUE;
char *rsc = NULL;
char *task = NULL;
const char *desc = NULL;
const char *magic = NULL;
const char *id = NULL;
const char *node = NULL;
xmlNode *n = xml;
xmlNode * rsc_op = xml;
if(strcmp((const char*)xml->name, XML_LRM_TAG_RSC_OP) != 0) {
pcmk__xe_foreach_child(xml, NULL, handle_rsc_op, (void *) node_id);
return pcmk_rc_ok;
}
id = pe__xe_history_key(rsc_op);
magic = crm_element_value(rsc_op, XML_ATTR_TRANSITION_MAGIC);
if (magic == NULL) {
/* non-change */
return pcmk_rc_ok;
}
if (!decode_transition_magic(magic, NULL, NULL, NULL, &status, &rc,
&target_rc)) {
crm_err("Invalid event %s detected for %s", magic, id);
return pcmk_rc_ok;
}
if (parse_op_key(id, &rsc, &task, NULL) == FALSE) {
crm_err("Invalid event detected for %s", id);
goto bail;
}
node = crm_element_value(rsc_op, XML_LRM_ATTR_TARGET);
while ((n != NULL) && !pcmk__xe_is(n, XML_CIB_TAG_STATE)) {
n = n->parent;
}
if(node == NULL && n) {
node = crm_element_value(n, XML_ATTR_UNAME);
}
if (node == NULL && n) {
node = ID(n);
}
if (node == NULL) {
node = node_id;
}
if (node == NULL) {
crm_err("No node detected for event %s (%s)", magic, id);
goto bail;
}
/* look up where we expected it to be? */
desc = pcmk_rc_str(pcmk_rc_ok);
if ((status == PCMK_EXEC_DONE) && (target_rc == rc)) {
crm_notice("%s of %s on %s completed: %s", task, rsc, node, desc);
if (rc == PCMK_OCF_NOT_RUNNING) {
notify = FALSE;
}
} else if (status == PCMK_EXEC_DONE) {
desc = services_ocf_exitcode_str(rc);
crm_warn("%s of %s on %s failed: %s", task, rsc, node, desc);
} else {
desc = pcmk_exec_status_str(status);
crm_warn("%s of %s on %s failed: %s", task, rsc, node, desc);
}
if (notify && options.external_agent) {
send_custom_trap(node, rsc, task, target_rc, rc, status, desc);
}
bail:
free(rsc);
free(task);
return pcmk_rc_ok;
}
/* This function is just a wrapper around mainloop_set_trigger so that it can be
* called from a mainloop directly. It's simply another way of ensuring the screen
* gets redrawn.
*/
static gboolean
mon_trigger_refresh(gpointer user_data)
{
mainloop_set_trigger((crm_trigger_t *) refresh_trigger);
return FALSE;
}
static int
handle_op_for_node(xmlNode *xml, void *userdata)
{
const char *node = crm_element_value(xml, XML_ATTR_UNAME);
if (node == NULL) {
node = ID(xml);
}
handle_rsc_op(xml, (void *) node);
return pcmk_rc_ok;
}
static void
crm_diff_update_v2(const char *event, xmlNode * msg)
{
xmlNode *change = NULL;
xmlNode *diff = get_message_xml(msg, F_CIB_UPDATE_RESULT);
for (change = pcmk__xml_first_child(diff); change != NULL;
change = pcmk__xml_next(change)) {
const char *name = NULL;
const char *op = crm_element_value(change, XML_DIFF_OP);
const char *xpath = crm_element_value(change, XML_DIFF_PATH);
xmlNode *match = NULL;
const char *node = NULL;
if(op == NULL) {
continue;
} else if(strcmp(op, "create") == 0) {
match = change->children;
} else if(strcmp(op, "move") == 0) {
continue;
} else if(strcmp(op, "delete") == 0) {
continue;
} else if(strcmp(op, "modify") == 0) {
match = first_named_child(change, XML_DIFF_RESULT);
if(match) {
match = match->children;
}
}
if(match) {
name = (const char *)match->name;
}
crm_trace("Handling %s operation for %s %p, %s", op, xpath, match, name);
if(xpath == NULL) {
/* Version field, ignore */
} else if(name == NULL) {
crm_debug("No result for %s operation to %s", op, xpath);
CRM_ASSERT(strcmp(op, "delete") == 0 || strcmp(op, "move") == 0);
} else if(strcmp(name, XML_TAG_CIB) == 0) {
pcmk__xe_foreach_child(first_named_child(match, XML_CIB_TAG_STATUS),
NULL, handle_op_for_node, NULL);
} else if(strcmp(name, XML_CIB_TAG_STATUS) == 0) {
pcmk__xe_foreach_child(match, NULL, handle_op_for_node, NULL);
} else if(strcmp(name, XML_CIB_TAG_STATE) == 0) {
node = crm_element_value(match, XML_ATTR_UNAME);
if (node == NULL) {
node = ID(match);
}
handle_rsc_op(match, (void *) node);
} else if(strcmp(name, XML_CIB_TAG_LRM) == 0) {
node = ID(match);
handle_rsc_op(match, (void *) node);
} else if(strcmp(name, XML_LRM_TAG_RESOURCES) == 0) {
char *local_node = pcmk__xpath_node_id(xpath, "lrm");
handle_rsc_op(match, local_node);
free(local_node);
} else if(strcmp(name, XML_LRM_TAG_RESOURCE) == 0) {
char *local_node = pcmk__xpath_node_id(xpath, "lrm");
handle_rsc_op(match, local_node);
free(local_node);
} else if(strcmp(name, XML_LRM_TAG_RSC_OP) == 0) {
char *local_node = pcmk__xpath_node_id(xpath, "lrm");
handle_rsc_op(match, local_node);
free(local_node);
} else {
crm_trace("Ignoring %s operation for %s %p, %s", op, xpath, match, name);
}
}
}
static void
crm_diff_update_v1(const char *event, xmlNode * msg)
{
/* Process operation updates */
xmlXPathObject *xpathObj = xpath_search(msg,
"//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED
"//" XML_LRM_TAG_RSC_OP);
int lpc = 0, max = numXpathResults(xpathObj);
for (lpc = 0; lpc < max; lpc++) {
xmlNode *rsc_op = getXpathResult(xpathObj, lpc);
handle_rsc_op(rsc_op, NULL);
}
freeXpathObject(xpathObj);
}
static void
crm_diff_update(const char *event, xmlNode * msg)
{
int rc = -1;
static bool stale = FALSE;
gboolean cib_updated = FALSE;
xmlNode *diff = get_message_xml(msg, F_CIB_UPDATE_RESULT);
out->progress(out, false);
if (current_cib != NULL) {
rc = xml_apply_patchset(current_cib, diff, TRUE);
switch (rc) {
case -pcmk_err_diff_resync:
case -pcmk_err_diff_failed:
crm_notice("[%s] Patch aborted: %s (%d)", event, pcmk_strerror(rc), rc);
free_xml(current_cib); current_cib = NULL;
break;
case pcmk_ok:
cib_updated = TRUE;
break;
default:
crm_notice("[%s] ABORTED: %s (%d)", event, pcmk_strerror(rc), rc);
free_xml(current_cib); current_cib = NULL;
}
}
if (current_cib == NULL) {
crm_trace("Re-requesting the full cib");
cib->cmds->query(cib, NULL, &current_cib, cib_scope_local | cib_sync_call);
}
if (options.external_agent) {
int format = 0;
crm_element_value_int(diff, PCMK_XA_FORMAT, &format);
switch(format) {
case 1:
crm_diff_update_v1(event, msg);
break;
case 2:
crm_diff_update_v2(event, msg);
break;
default:
crm_err("Unknown patch format: %d", format);
}
}
if (current_cib == NULL) {
if(!stale) {
out->info(out, "--- Stale data ---");
}
stale = TRUE;
return;
}
stale = FALSE;
refresh_after_event(cib_updated, FALSE);
}
static int
mon_refresh_display(gpointer user_data)
{
int rc = pcmk_rc_ok;
last_refresh = time(NULL);
if (output_format == mon_output_none) {
return G_SOURCE_REMOVE;
}
if (fence_history == pcmk__fence_history_full &&
!pcmk_all_flags_set(show, pcmk_section_fencing_all) &&
output_format != mon_output_xml) {
fence_history = pcmk__fence_history_reduced;
}
// Get an up-to-date pacemakerd status for the cluster summary
if (cib->variant == cib_native) {
pcmk__pacemakerd_status(out, crm_system_name, options.reconnect_ms / 2,
false, &pcmkd_state);
}
if (out->dest != stdout) {
out->reset(out);
}
rc = pcmk__output_cluster_status(out, st, cib, current_cib, pcmkd_state,
fence_history, show, show_opts,
options.only_node,options.only_rsc,
options.neg_location_prefix,
output_format == mon_output_monitor);
if (output_format == mon_output_monitor && rc != pcmk_rc_ok) {
clean_up(MON_STATUS_WARN);
return G_SOURCE_REMOVE;
} else if (rc == pcmk_rc_schema_validation) {
clean_up(CRM_EX_CONFIG);
return G_SOURCE_REMOVE;
}
if (out->dest != stdout) {
out->finish(out, CRM_EX_OK, true, NULL);
}
return G_SOURCE_CONTINUE;
}
/* This function is called for fencing events (see setup_fencer_connection() for
* which ones) when --watch-fencing is used on the command line
*/
static void
mon_st_callback_event(stonith_t * st, stonith_event_t * e)
{
if (st->state == stonith_disconnected) {
/* disconnect cib as well and have everything reconnect */
mon_cib_connection_destroy(NULL);
} else if (options.external_agent) {
char *desc = stonith__event_description(e);
send_custom_trap(e->target, NULL, e->operation, pcmk_ok, e->result, 0, desc);
free(desc);
}
}
/* Cause the screen to be redrawn (via mainloop_set_trigger) when various conditions are met:
*
* - If the last update occurred more than reconnect_ms ago (defaults to 5s, but
* can be changed via the -i command line option), or
* - After every 10 CIB updates, or
* - If it's been 2s since the last update
*
* This function sounds like it would be more broadly useful, but it is only called when a
* fencing event is received or a CIB diff occurrs.
*/
static void
refresh_after_event(gboolean data_updated, gboolean enforce)
{
static int updates = 0;
time_t now = time(NULL);
if (data_updated) {
updates++;
}
if(refresh_timer == NULL) {
refresh_timer = mainloop_timer_add("refresh", 2000, FALSE, mon_trigger_refresh, NULL);
}
if (reconnect_timer > 0) {
/* we will receive a refresh request after successful reconnect */
mainloop_timer_stop(refresh_timer);
return;
}
/* as we're not handling initial failure of fencer-connection as
* fatal give it a retry here
* not getting here if cib-reconnection is already on the way
*/
setup_fencer_connection();
if (enforce ||
((now - last_refresh) > (options.reconnect_ms / 1000)) ||
updates >= 10) {
mainloop_set_trigger((crm_trigger_t *) refresh_trigger);
mainloop_timer_stop(refresh_timer);
updates = 0;
} else {
mainloop_timer_start(refresh_timer);
}
}
/* This function is called for fencing events (see setup_fencer_connection() for
* which ones) when --watch-fencing is NOT used on the command line
*/
static void
mon_st_callback_display(stonith_t * st, stonith_event_t * e)
{
if (st->state == stonith_disconnected) {
/* disconnect cib as well and have everything reconnect */
mon_cib_connection_destroy(NULL);
} else {
out->progress(out, false);
refresh_after_event(TRUE, FALSE);
}
}
/*
* De-init ncurses, disconnect from the CIB manager, disconnect fencing,
* deallocate memory and show usage-message if requested.
*
* We don't actually return, but nominally returning crm_exit_t allows a usage
* like "return clean_up(exit_code);" which helps static analysis understand the
* code flow.
*/
static crm_exit_t
clean_up(crm_exit_t exit_code)
{
/* Quitting crm_mon is much more complicated than it ought to be. */
/* (1) Close connections, free things, etc. */
cib__clean_up_connection(&cib);
stonith_api_delete(st);
free(options.neg_location_prefix);
free(options.only_node);
free(options.only_rsc);
free(options.pid_file);
g_slist_free_full(options.includes_excludes, free);
g_strfreev(processed_args);
/* (2) If this is abnormal termination and we're in curses mode, shut down
* curses first. Any messages displayed to the screen before curses is shut
* down will be lost because doing the shut down will also restore the
* screen to whatever it looked like before crm_mon was started.
*/
if (((error != NULL) || (exit_code == CRM_EX_USAGE))
&& (output_format == mon_output_console)
&& (out != NULL)) {
out->finish(out, exit_code, false, NULL);
pcmk__output_free(out);
out = NULL;
}
/* (3) If this is a command line usage related failure, print the usage
* message.
*/
if (exit_code == CRM_EX_USAGE && (output_format == mon_output_console || output_format == mon_output_plain)) {
char *help = g_option_context_get_help(context, TRUE, NULL);
fprintf(stderr, "%s", help);
g_free(help);
}
pcmk__free_arg_context(context);
/* (4) If this is any kind of error, print the error out and exit. Make
* sure to handle situations both before and after formatted output is
* set up. We want errors to appear formatted if at all possible.
*/
if (error != NULL) {
if (out != NULL) {
out->err(out, "%s: %s", g_get_prgname(), error->message);
out->finish(out, exit_code, true, NULL);
pcmk__output_free(out);
} else {
fprintf(stderr, "%s: %s\n", g_get_prgname(), error->message);
}
g_clear_error(&error);
crm_exit(exit_code);
}
/* (5) Print formatted output to the screen if we made it far enough in
* crm_mon to be able to do so.
*/
if (out != NULL) {
if (options.exec_mode != mon_exec_daemonized) {
out->finish(out, exit_code, true, NULL);
}
pcmk__output_free(out);
pcmk__unregister_formats();
}
crm_exit(exit_code);
}
diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c
index b7cca6a583..738fb2366c 100644
--- a/tools/crm_resource_runtime.c
+++ b/tools/crm_resource_runtime.c
@@ -1,2249 +1,2249 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <crm_resource.h>
#include <crm/common/ipc_attrd_internal.h>
#include <crm/common/ipc_controld.h>
#include <crm/common/lists_internal.h>
#include <crm/services_internal.h>
static GList *
build_node_info_list(const pcmk_resource_t *rsc)
{
GList *retval = NULL;
for (const GList *iter = rsc->children; iter != NULL; iter = iter->next) {
const pcmk_resource_t *child = (const pcmk_resource_t *) iter->data;
for (const GList *iter2 = child->running_on;
iter2 != NULL; iter2 = iter2->next) {
const pcmk_node_t *node = (const pcmk_node_t *) iter2->data;
node_info_t *ni = calloc(1, sizeof(node_info_t));
ni->node_name = node->details->uname;
ni->promoted = pcmk_is_set(rsc->flags, pcmk_rsc_promotable) &&
child->fns->state(child, TRUE) == pcmk_role_promoted;
retval = g_list_prepend(retval, ni);
}
}
return retval;
}
GList *
cli_resource_search(pcmk_resource_t *rsc, const char *requested_name,
pcmk_scheduler_t *scheduler)
{
GList *retval = NULL;
const pcmk_resource_t *parent = pe__const_top_resource(rsc, false);
if (pe_rsc_is_clone(rsc)) {
retval = build_node_info_list(rsc);
/* The anonymous clone children's common ID is supplied */
} else if (pe_rsc_is_clone(parent)
&& !pcmk_is_set(rsc->flags, pcmk_rsc_unique)
&& rsc->clone_name
&& pcmk__str_eq(requested_name, rsc->clone_name, pcmk__str_casei)
&& !pcmk__str_eq(requested_name, rsc->id, pcmk__str_casei)) {
retval = build_node_info_list(parent);
} else if (rsc->running_on != NULL) {
for (GList *iter = rsc->running_on; iter != NULL; iter = iter->next) {
pcmk_node_t *node = (pcmk_node_t *) iter->data;
node_info_t *ni = calloc(1, sizeof(node_info_t));
ni->node_name = node->details->uname;
ni->promoted = (rsc->fns->state(rsc, TRUE) == pcmk_role_promoted);
retval = g_list_prepend(retval, ni);
}
}
return retval;
}
// \return Standard Pacemaker return code
static int
find_resource_attr(pcmk__output_t *out, cib_t * the_cib, const char *attr,
const char *rsc, const char *attr_set_type, const char *set_name,
const char *attr_id, const char *attr_name, char **value)
{
int rc = pcmk_rc_ok;
xmlNode *xml_search = NULL;
GString *xpath = NULL;
const char *xpath_base = NULL;
if(value) {
*value = NULL;
}
if(the_cib == NULL) {
return ENOTCONN;
}
xpath_base = pcmk_cib_xpath_for(XML_CIB_TAG_RESOURCES);
if (xpath_base == NULL) {
crm_err(XML_CIB_TAG_RESOURCES " CIB element not known (bug?)");
return ENOMSG;
}
xpath = g_string_sized_new(1024);
pcmk__g_strcat(xpath,
xpath_base, "//*[@" XML_ATTR_ID "=\"", rsc, "\"]", NULL);
if (attr_set_type != NULL) {
pcmk__g_strcat(xpath, "/", attr_set_type, NULL);
if (set_name != NULL) {
pcmk__g_strcat(xpath, "[@" XML_ATTR_ID "=\"", set_name, "\"]",
NULL);
}
}
g_string_append(xpath, "//" XML_CIB_TAG_NVPAIR "[");
if (attr_id != NULL) {
pcmk__g_strcat(xpath, "@" XML_ATTR_ID "=\"", attr_id, "\"", NULL);
}
if (attr_name != NULL) {
if (attr_id != NULL) {
g_string_append(xpath, " and ");
}
pcmk__g_strcat(xpath, "@" XML_NVPAIR_ATTR_NAME "=\"", attr_name, "\"",
NULL);
}
g_string_append_c(xpath, ']');
rc = the_cib->cmds->query(the_cib, (const char *) xpath->str, &xml_search,
cib_sync_call | cib_scope_local | cib_xpath);
rc = pcmk_legacy2rc(rc);
if (rc != pcmk_rc_ok) {
goto done;
}
crm_log_xml_debug(xml_search, "Match");
if (xml_search->children != NULL) {
xmlNode *child = NULL;
rc = ENOTUNIQ;
out->info(out, "Multiple attributes match name=%s", attr_name);
for (child = pcmk__xml_first_child(xml_search); child != NULL;
child = pcmk__xml_next(child)) {
out->info(out, " Value: %s \t(id=%s)",
crm_element_value(child, XML_NVPAIR_ATTR_VALUE), ID(child));
}
out->spacer(out);
} else if(value) {
pcmk__str_update(value, crm_element_value(xml_search, attr));
}
done:
g_string_free(xpath, TRUE);
free_xml(xml_search);
return rc;
}
/* PRIVATE. Use the find_matching_attr_resources instead. */
static void
find_matching_attr_resources_recursive(pcmk__output_t *out,
GList /* <pcmk_resource_t*> */ **result,
pcmk_resource_t *rsc, const char *rsc_id,
const char * attr_set, const char * attr_set_type,
const char * attr_id, const char * attr_name,
cib_t * cib, const char * cmd, int depth)
{
int rc = pcmk_rc_ok;
char *lookup_id = clone_strip(rsc->id);
char *local_attr_id = NULL;
/* visit the children */
for(GList *gIter = rsc->children; gIter; gIter = gIter->next) {
find_matching_attr_resources_recursive(out, result,
(pcmk_resource_t *) gIter->data,
rsc_id, attr_set, attr_set_type,
attr_id, attr_name, cib, cmd, depth+1);
/* do it only once for clones */
if (rsc->variant == pcmk_rsc_variant_clone) {
break;
}
}
rc = find_resource_attr(out, cib, XML_ATTR_ID, lookup_id, attr_set_type,
attr_set, attr_id, attr_name, &local_attr_id);
/* Post-order traversal.
* The root is always on the list and it is the last item. */
if((0 == depth) || (pcmk_rc_ok == rc)) {
/* push the head */
*result = g_list_append(*result, rsc);
}
free(local_attr_id);
free(lookup_id);
}
/* The result is a linearized pre-ordered tree of resources. */
static GList/*<pcmk_resource_t*>*/ *
find_matching_attr_resources(pcmk__output_t *out, pcmk_resource_t *rsc,
const char * rsc_id, const char * attr_set,
const char * attr_set_type, const char * attr_id,
const char * attr_name, cib_t * cib, const char * cmd,
gboolean force)
{
int rc = pcmk_rc_ok;
char *lookup_id = NULL;
char *local_attr_id = NULL;
GList * result = NULL;
/* If --force is used, update only the requested resource (clone or primitive).
* Otherwise, if the primitive has the attribute, use that.
* Otherwise use the clone. */
if(force == TRUE) {
return g_list_append(result, rsc);
}
if ((rsc->parent != NULL)
&& (rsc->parent->variant == pcmk_rsc_variant_clone)) {
int rc = pcmk_rc_ok;
char *local_attr_id = NULL;
rc = find_resource_attr(out, cib, XML_ATTR_ID, rsc_id, attr_set_type,
attr_set, attr_id, attr_name, &local_attr_id);
free(local_attr_id);
if(rc != pcmk_rc_ok) {
rsc = rsc->parent;
out->info(out, "Performing %s of '%s' on '%s', the parent of '%s'",
cmd, attr_name, rsc->id, rsc_id);
}
return g_list_append(result, rsc);
} else if ((rsc->parent == NULL) && (rsc->children != NULL)
&& (rsc->variant == pcmk_rsc_variant_clone)) {
pcmk_resource_t *child = rsc->children->data;
if (child->variant == pcmk_rsc_variant_primitive) {
lookup_id = clone_strip(child->id); /* Could be a cloned group! */
rc = find_resource_attr(out, cib, XML_ATTR_ID, lookup_id, attr_set_type,
attr_set, attr_id, attr_name, &local_attr_id);
if(rc == pcmk_rc_ok) {
rsc = child;
out->info(out, "A value for '%s' already exists in child '%s', performing %s on that instead of '%s'",
attr_name, lookup_id, cmd, rsc_id);
}
free(local_attr_id);
free(lookup_id);
}
return g_list_append(result, rsc);
}
/* If the resource is a group ==> children inherit the attribute if defined. */
find_matching_attr_resources_recursive(out, &result, rsc, rsc_id, attr_set,
attr_set_type, attr_id, attr_name,
cib, cmd, 0);
return result;
}
// \return Standard Pacemaker return code
int
cli_resource_update_attribute(pcmk_resource_t *rsc, const char *requested_name,
const char *attr_set, const char *attr_set_type,
const char *attr_id, const char *attr_name,
const char *attr_value, gboolean recursive,
cib_t *cib, int cib_options, gboolean force)
{
pcmk__output_t *out = rsc->cluster->priv;
int rc = pcmk_rc_ok;
char *found_attr_id = NULL;
GList/*<pcmk_resource_t*>*/ *resources = NULL;
const char *top_id = pe__const_top_resource(rsc, false)->id;
if ((attr_id == NULL) && !force) {
find_resource_attr(out, cib, XML_ATTR_ID, top_id, NULL, NULL, NULL,
attr_name, NULL);
}
if (pcmk__str_eq(attr_set_type, XML_TAG_ATTR_SETS, pcmk__str_casei)) {
if (!force) {
rc = find_resource_attr(out, cib, XML_ATTR_ID, top_id,
XML_TAG_META_SETS, attr_set, attr_id,
attr_name, &found_attr_id);
if ((rc == pcmk_rc_ok) && !out->is_quiet(out)) {
out->err(out,
"WARNING: There is already a meta attribute "
"for '%s' called '%s' (id=%s)",
top_id, attr_name, found_attr_id);
out->err(out,
" Delete '%s' first or use the force option "
"to override", found_attr_id);
}
free(found_attr_id);
if (rc == pcmk_rc_ok) {
return ENOTUNIQ;
}
}
resources = g_list_append(resources, rsc);
} else if (pcmk__str_eq(attr_set_type, ATTR_SET_ELEMENT, pcmk__str_none)) {
crm_xml_add(rsc->xml, attr_name, attr_value);
CRM_ASSERT(cib != NULL);
rc = cib->cmds->replace(cib, XML_CIB_TAG_RESOURCES, rsc->xml,
cib_options);
rc = pcmk_legacy2rc(rc);
if (rc == pcmk_rc_ok) {
out->info(out, "Set attribute: name=%s value=%s",
attr_name, attr_value);
}
return rc;
} else {
resources = find_matching_attr_resources(out, rsc, requested_name,
attr_set, attr_set_type,
attr_id, attr_name, cib,
"update", force);
}
/* If the user specified attr_set or attr_id, the intent is to modify a
* single resource, which will be the last item in the list.
*/
if ((attr_set != NULL) || (attr_id != NULL)) {
GList *last = g_list_last(resources);
resources = g_list_remove_link(resources, last);
g_list_free(resources);
resources = last;
}
for (GList *iter = resources; iter != NULL; iter = iter->next) {
char *lookup_id = NULL;
char *local_attr_set = NULL;
const char *rsc_attr_id = attr_id;
const char *rsc_attr_set = attr_set;
xmlNode *xml_top = NULL;
xmlNode *xml_obj = NULL;
found_attr_id = NULL;
rsc = (pcmk_resource_t *) iter->data;
lookup_id = clone_strip(rsc->id); /* Could be a cloned group! */
rc = find_resource_attr(out, cib, XML_ATTR_ID, lookup_id, attr_set_type,
attr_set, attr_id, attr_name, &found_attr_id);
switch (rc) {
case pcmk_rc_ok:
crm_debug("Found a match for name=%s: id=%s",
attr_name, found_attr_id);
rsc_attr_id = found_attr_id;
break;
case ENXIO:
if (rsc_attr_set == NULL) {
local_attr_set = crm_strdup_printf("%s-%s", lookup_id,
attr_set_type);
rsc_attr_set = local_attr_set;
}
if (rsc_attr_id == NULL) {
found_attr_id = crm_strdup_printf("%s-%s",
rsc_attr_set, attr_name);
rsc_attr_id = found_attr_id;
}
xml_top = create_xml_node(NULL, (const char *) rsc->xml->name);
crm_xml_add(xml_top, XML_ATTR_ID, lookup_id);
xml_obj = create_xml_node(xml_top, attr_set_type);
crm_xml_add(xml_obj, XML_ATTR_ID, rsc_attr_set);
break;
default:
free(lookup_id);
free(found_attr_id);
g_list_free(resources);
return rc;
}
xml_obj = crm_create_nvpair_xml(xml_obj, rsc_attr_id, attr_name,
attr_value);
if (xml_top == NULL) {
xml_top = xml_obj;
}
crm_log_xml_debug(xml_top, "Update");
rc = cib->cmds->modify(cib, XML_CIB_TAG_RESOURCES, xml_top,
cib_options);
rc = pcmk_legacy2rc(rc);
if (rc == pcmk_rc_ok) {
out->info(out, "Set '%s' option: id=%s%s%s%s%s value=%s",
lookup_id, found_attr_id,
((rsc_attr_set == NULL)? "" : " set="),
pcmk__s(rsc_attr_set, ""),
((attr_name == NULL)? "" : " name="),
pcmk__s(attr_name, ""), attr_value);
}
free_xml(xml_top);
free(lookup_id);
free(found_attr_id);
free(local_attr_set);
if (recursive
&& pcmk__str_eq(attr_set_type, XML_TAG_META_SETS,
pcmk__str_casei)) {
GList *lpc = NULL;
static bool need_init = true;
if (need_init) {
need_init = false;
pcmk__unpack_constraints(rsc->cluster);
pe__clear_resource_flags_on_all(rsc->cluster,
pcmk_rsc_detect_loop);
}
/* We want to set the attribute only on resources explicitly
* colocated with this one, so we use rsc->rsc_cons_lhs directly
* rather than the with_this_colocations() method.
*/
pe__set_resource_flags(rsc, pcmk_rsc_detect_loop);
for (lpc = rsc->rsc_cons_lhs; lpc != NULL; lpc = lpc->next) {
pcmk__colocation_t *cons = (pcmk__colocation_t *) lpc->data;
crm_debug("Checking %s %d", cons->id, cons->score);
if (!pcmk_is_set(cons->dependent->flags, pcmk_rsc_detect_loop)
&& (cons->score > 0)) {
crm_debug("Setting %s=%s for dependent resource %s",
attr_name, attr_value, cons->dependent->id);
cli_resource_update_attribute(cons->dependent,
cons->dependent->id, NULL,
attr_set_type, NULL,
attr_name, attr_value,
recursive, cib, cib_options,
force);
}
}
}
}
g_list_free(resources);
return rc;
}
// \return Standard Pacemaker return code
int
cli_resource_delete_attribute(pcmk_resource_t *rsc, const char *requested_name,
const char *attr_set, const char *attr_set_type,
const char *attr_id, const char *attr_name,
cib_t *cib, int cib_options, gboolean force)
{
pcmk__output_t *out = rsc->cluster->priv;
int rc = pcmk_rc_ok;
GList/*<pcmk_resource_t*>*/ *resources = NULL;
if ((attr_id == NULL) && !force) {
find_resource_attr(out, cib, XML_ATTR_ID,
pe__const_top_resource(rsc, false)->id, NULL,
NULL, NULL, attr_name, NULL);
}
if (pcmk__str_eq(attr_set_type, XML_TAG_META_SETS, pcmk__str_casei)) {
resources = find_matching_attr_resources(out, rsc, requested_name,
attr_set, attr_set_type,
attr_id, attr_name, cib,
"delete", force);
} else if (pcmk__str_eq(attr_set_type, ATTR_SET_ELEMENT, pcmk__str_none)) {
xml_remove_prop(rsc->xml, attr_name);
CRM_ASSERT(cib != NULL);
rc = cib->cmds->replace(cib, XML_CIB_TAG_RESOURCES, rsc->xml,
cib_options);
rc = pcmk_legacy2rc(rc);
if (rc == pcmk_rc_ok) {
out->info(out, "Deleted attribute: %s", attr_name);
}
return rc;
} else {
resources = g_list_append(resources, rsc);
}
for (GList *iter = resources; iter != NULL; iter = iter->next) {
char *lookup_id = NULL;
xmlNode *xml_obj = NULL;
char *found_attr_id = NULL;
const char *rsc_attr_id = attr_id;
rsc = (pcmk_resource_t *) iter->data;
lookup_id = clone_strip(rsc->id);
rc = find_resource_attr(out, cib, XML_ATTR_ID, lookup_id, attr_set_type,
attr_set, attr_id, attr_name, &found_attr_id);
switch (rc) {
case pcmk_rc_ok:
break;
case ENXIO:
free(lookup_id);
rc = pcmk_rc_ok;
continue;
default:
free(lookup_id);
g_list_free(resources);
return rc;
}
if (rsc_attr_id == NULL) {
rsc_attr_id = found_attr_id;
}
xml_obj = crm_create_nvpair_xml(NULL, rsc_attr_id, attr_name, NULL);
crm_log_xml_debug(xml_obj, "Delete");
CRM_ASSERT(cib);
rc = cib->cmds->remove(cib, XML_CIB_TAG_RESOURCES, xml_obj,
cib_options);
rc = pcmk_legacy2rc(rc);
if (rc == pcmk_rc_ok) {
out->info(out, "Deleted '%s' option: id=%s%s%s%s%s",
lookup_id, found_attr_id,
((attr_set == NULL)? "" : " set="),
pcmk__s(attr_set, ""),
((attr_name == NULL)? "" : " name="),
pcmk__s(attr_name, ""));
}
free(lookup_id);
free_xml(xml_obj);
free(found_attr_id);
}
g_list_free(resources);
return rc;
}
// \return Standard Pacemaker return code
static int
send_lrm_rsc_op(pcmk_ipc_api_t *controld_api, bool do_fail_resource,
const char *host_uname, const char *rsc_id,
pcmk_scheduler_t *scheduler)
{
pcmk__output_t *out = scheduler->priv;
const char *router_node = host_uname;
const char *rsc_api_id = NULL;
const char *rsc_long_id = NULL;
const char *rsc_class = NULL;
const char *rsc_provider = NULL;
const char *rsc_type = NULL;
bool cib_only = false;
pcmk_resource_t *rsc = pe_find_resource(scheduler->resources, rsc_id);
if (rsc == NULL) {
out->err(out, "Resource %s not found", rsc_id);
return ENXIO;
} else if (rsc->variant != pcmk_rsc_variant_primitive) {
out->err(out, "We can only process primitive resources, not %s", rsc_id);
return EINVAL;
}
rsc_class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
rsc_provider = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER),
rsc_type = crm_element_value(rsc->xml, XML_ATTR_TYPE);
if ((rsc_class == NULL) || (rsc_type == NULL)) {
out->err(out, "Resource %s does not have a class and type", rsc_id);
return EINVAL;
}
{
pcmk_node_t *node = pe_find_node(scheduler->nodes, host_uname);
if (node == NULL) {
out->err(out, "Node %s not found", host_uname);
return pcmk_rc_node_unknown;
}
if (!(node->details->online)) {
if (do_fail_resource) {
out->err(out, "Node %s is not online", host_uname);
return ENOTCONN;
} else {
cib_only = true;
}
}
if (!cib_only && pe__is_guest_or_remote_node(node)) {
node = pe__current_node(node->details->remote_rsc);
if (node == NULL) {
out->err(out, "No cluster connection to Pacemaker Remote node %s detected",
host_uname);
return ENOTCONN;
}
router_node = node->details->uname;
}
}
if (rsc->clone_name) {
rsc_api_id = rsc->clone_name;
rsc_long_id = rsc->id;
} else {
rsc_api_id = rsc->id;
}
if (do_fail_resource) {
return pcmk_controld_api_fail(controld_api, host_uname, router_node,
rsc_api_id, rsc_long_id,
rsc_class, rsc_provider, rsc_type);
} else {
return pcmk_controld_api_refresh(controld_api, host_uname, router_node,
rsc_api_id, rsc_long_id, rsc_class,
rsc_provider, rsc_type, cib_only);
}
}
/*!
* \internal
* \brief Get resource name as used in failure-related node attributes
*
* \param[in] rsc Resource to check
*
* \return Newly allocated string containing resource's fail name
* \note The caller is responsible for freeing the result.
*/
static inline char *
rsc_fail_name(const pcmk_resource_t *rsc)
{
const char *name = (rsc->clone_name? rsc->clone_name : rsc->id);
if (pcmk_is_set(rsc->flags, pcmk_rsc_unique)) {
return strdup(name);
}
return clone_strip(name);
}
// \return Standard Pacemaker return code
static int
clear_rsc_history(pcmk_ipc_api_t *controld_api, const char *host_uname,
const char *rsc_id, pcmk_scheduler_t *scheduler)
{
int rc = pcmk_rc_ok;
/* Erase the resource's entire LRM history in the CIB, even if we're only
* clearing a single operation's fail count. If we erased only entries for a
* single operation, we might wind up with a wrong idea of the current
* resource state, and we might not re-probe the resource.
*/
rc = send_lrm_rsc_op(controld_api, false, host_uname, rsc_id, scheduler);
if (rc != pcmk_rc_ok) {
return rc;
}
crm_trace("Processing %d mainloop inputs",
pcmk_controld_api_replies_expected(controld_api));
while (g_main_context_iteration(NULL, FALSE)) {
crm_trace("Processed mainloop input, %d still remaining",
pcmk_controld_api_replies_expected(controld_api));
}
return rc;
}
// \return Standard Pacemaker return code
static int
clear_rsc_failures(pcmk__output_t *out, pcmk_ipc_api_t *controld_api,
const char *node_name, const char *rsc_id, const char *operation,
const char *interval_spec, pcmk_scheduler_t *scheduler)
{
int rc = pcmk_rc_ok;
const char *failed_value = NULL;
const char *failed_id = NULL;
char *interval_ms_s = NULL;
GHashTable *rscs = NULL;
GHashTableIter iter;
/* Create a hash table to use as a set of resources to clean. This lets us
* clean each resource only once (per node) regardless of how many failed
* operations it has.
*/
rscs = pcmk__strkey_table(NULL, NULL);
// Normalize interval to milliseconds for comparison to history entry
if (operation) {
guint interval_ms = 0U;
- pcmk__parse_interval_spec(interval_spec, &interval_ms);
+ pcmk_parse_interval_spec(interval_spec, &interval_ms);
interval_ms_s = crm_strdup_printf("%u", interval_ms);
}
for (xmlNode *xml_op = pcmk__xml_first_child(scheduler->failed);
xml_op != NULL;
xml_op = pcmk__xml_next(xml_op)) {
failed_id = crm_element_value(xml_op, XML_LRM_ATTR_RSCID);
if (failed_id == NULL) {
// Malformed history entry, should never happen
continue;
}
// No resource specified means all resources match
if (rsc_id) {
pcmk_resource_t *fail_rsc = NULL;
fail_rsc = pe_find_resource_with_flags(scheduler->resources,
failed_id,
pcmk_rsc_match_history
|pcmk_rsc_match_anon_basename);
if (!fail_rsc || !pcmk__str_eq(rsc_id, fail_rsc->id, pcmk__str_casei)) {
continue;
}
}
// Host name should always have been provided by this point
failed_value = crm_element_value(xml_op, XML_ATTR_UNAME);
if (!pcmk__str_eq(node_name, failed_value, pcmk__str_casei)) {
continue;
}
// No operation specified means all operations match
if (operation) {
failed_value = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
if (!pcmk__str_eq(operation, failed_value, pcmk__str_casei)) {
continue;
}
// Interval (if operation was specified) defaults to 0 (not all)
failed_value = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL_MS);
if (!pcmk__str_eq(interval_ms_s, failed_value, pcmk__str_casei)) {
continue;
}
}
g_hash_table_add(rscs, (gpointer) failed_id);
}
free(interval_ms_s);
g_hash_table_iter_init(&iter, rscs);
while (g_hash_table_iter_next(&iter, (gpointer *) &failed_id, NULL)) {
crm_debug("Erasing failures of %s on %s", failed_id, node_name);
rc = clear_rsc_history(controld_api, node_name, failed_id, scheduler);
if (rc != pcmk_rc_ok) {
return rc;
}
}
g_hash_table_destroy(rscs);
return rc;
}
// \return Standard Pacemaker return code
static int
clear_rsc_fail_attrs(const pcmk_resource_t *rsc, const char *operation,
const char *interval_spec, const pcmk_node_t *node)
{
int rc = pcmk_rc_ok;
int attr_options = pcmk__node_attr_none;
char *rsc_name = rsc_fail_name(rsc);
if (pe__is_guest_or_remote_node(node)) {
attr_options |= pcmk__node_attr_remote;
}
rc = pcmk__attrd_api_clear_failures(NULL, node->details->uname, rsc_name,
operation, interval_spec, NULL,
attr_options);
free(rsc_name);
return rc;
}
// \return Standard Pacemaker return code
int
cli_resource_delete(pcmk_ipc_api_t *controld_api, const char *host_uname,
const pcmk_resource_t *rsc, const char *operation,
const char *interval_spec, bool just_failures,
pcmk_scheduler_t *scheduler, gboolean force)
{
pcmk__output_t *out = scheduler->priv;
int rc = pcmk_rc_ok;
pcmk_node_t *node = NULL;
if (rsc == NULL) {
return ENXIO;
} else if (rsc->children) {
for (const GList *lpc = rsc->children; lpc != NULL; lpc = lpc->next) {
const pcmk_resource_t *child = (const pcmk_resource_t *) lpc->data;
rc = cli_resource_delete(controld_api, host_uname, child, operation,
interval_spec, just_failures, scheduler,
force);
if (rc != pcmk_rc_ok) {
return rc;
}
}
return pcmk_rc_ok;
} else if (host_uname == NULL) {
GList *lpc = NULL;
GList *nodes = g_hash_table_get_values(rsc->known_on);
if(nodes == NULL && force) {
nodes = pcmk__copy_node_list(scheduler->nodes, false);
} else if(nodes == NULL && rsc->exclusive_discover) {
GHashTableIter iter;
pcmk_node_t *node = NULL;
g_hash_table_iter_init(&iter, rsc->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void**)&node)) {
if(node->weight >= 0) {
nodes = g_list_prepend(nodes, node);
}
}
} else if(nodes == NULL) {
nodes = g_hash_table_get_values(rsc->allowed_nodes);
}
for (lpc = nodes; lpc != NULL; lpc = lpc->next) {
node = (pcmk_node_t *) lpc->data;
if (node->details->online) {
rc = cli_resource_delete(controld_api, node->details->uname, rsc,
operation, interval_spec, just_failures,
scheduler, force);
}
if (rc != pcmk_rc_ok) {
g_list_free(nodes);
return rc;
}
}
g_list_free(nodes);
return pcmk_rc_ok;
}
node = pe_find_node(scheduler->nodes, host_uname);
if (node == NULL) {
out->err(out, "Unable to clean up %s because node %s not found",
rsc->id, host_uname);
return ENODEV;
}
if (!node->details->rsc_discovery_enabled) {
out->err(out, "Unable to clean up %s because resource discovery disabled on %s",
rsc->id, host_uname);
return EOPNOTSUPP;
}
if (controld_api == NULL) {
out->err(out, "Dry run: skipping clean-up of %s on %s due to CIB_file",
rsc->id, host_uname);
return pcmk_rc_ok;
}
rc = clear_rsc_fail_attrs(rsc, operation, interval_spec, node);
if (rc != pcmk_rc_ok) {
out->err(out, "Unable to clean up %s failures on %s: %s",
rsc->id, host_uname, pcmk_rc_str(rc));
return rc;
}
if (just_failures) {
rc = clear_rsc_failures(out, controld_api, host_uname, rsc->id, operation,
interval_spec, scheduler);
} else {
rc = clear_rsc_history(controld_api, host_uname, rsc->id, scheduler);
}
if (rc != pcmk_rc_ok) {
out->err(out, "Cleaned %s failures on %s, but unable to clean history: %s",
rsc->id, host_uname, pcmk_rc_str(rc));
} else {
out->info(out, "Cleaned up %s on %s", rsc->id, host_uname);
}
return rc;
}
// \return Standard Pacemaker return code
int
cli_cleanup_all(pcmk_ipc_api_t *controld_api, const char *node_name,
const char *operation, const char *interval_spec,
pcmk_scheduler_t *scheduler)
{
pcmk__output_t *out = scheduler->priv;
int rc = pcmk_rc_ok;
int attr_options = pcmk__node_attr_none;
const char *display_name = node_name? node_name : "all nodes";
if (controld_api == NULL) {
out->info(out, "Dry run: skipping clean-up of %s due to CIB_file",
display_name);
return rc;
}
if (node_name) {
pcmk_node_t *node = pe_find_node(scheduler->nodes, node_name);
if (node == NULL) {
out->err(out, "Unknown node: %s", node_name);
return ENXIO;
}
if (pe__is_guest_or_remote_node(node)) {
attr_options |= pcmk__node_attr_remote;
}
}
rc = pcmk__attrd_api_clear_failures(NULL, node_name, NULL, operation,
interval_spec, NULL, attr_options);
if (rc != pcmk_rc_ok) {
out->err(out, "Unable to clean up all failures on %s: %s",
display_name, pcmk_rc_str(rc));
return rc;
}
if (node_name) {
rc = clear_rsc_failures(out, controld_api, node_name, NULL,
operation, interval_spec, scheduler);
if (rc != pcmk_rc_ok) {
out->err(out, "Cleaned all resource failures on %s, but unable to clean history: %s",
node_name, pcmk_rc_str(rc));
return rc;
}
} else {
for (GList *iter = scheduler->nodes; iter; iter = iter->next) {
pcmk_node_t *node = (pcmk_node_t *) iter->data;
rc = clear_rsc_failures(out, controld_api, node->details->uname, NULL,
operation, interval_spec, scheduler);
if (rc != pcmk_rc_ok) {
out->err(out, "Cleaned all resource failures on all nodes, but unable to clean history: %s",
pcmk_rc_str(rc));
return rc;
}
}
}
out->info(out, "Cleaned up all resources on %s", display_name);
return rc;
}
static void
check_role(resource_checks_t *checks)
{
const char *role_s = g_hash_table_lookup(checks->rsc->meta,
XML_RSC_ATTR_TARGET_ROLE);
if (role_s == NULL) {
return;
}
switch (text2role(role_s)) {
case pcmk_role_stopped:
checks->flags |= rsc_remain_stopped;
break;
case pcmk_role_unpromoted:
if (pcmk_is_set(pe__const_top_resource(checks->rsc, false)->flags,
pcmk_rsc_promotable)) {
checks->flags |= rsc_unpromotable;
}
break;
default:
break;
}
}
static void
check_managed(resource_checks_t *checks)
{
const char *managed_s = g_hash_table_lookup(checks->rsc->meta,
XML_RSC_ATTR_MANAGED);
if ((managed_s != NULL) && !crm_is_true(managed_s)) {
checks->flags |= rsc_unmanaged;
}
}
static void
check_locked(resource_checks_t *checks)
{
if (checks->rsc->lock_node != NULL) {
checks->flags |= rsc_locked;
checks->lock_node = checks->rsc->lock_node->details->uname;
}
}
static bool
node_is_unhealthy(pcmk_node_t *node)
{
switch (pe__health_strategy(node->details->data_set)) {
case pcmk__health_strategy_none:
break;
case pcmk__health_strategy_no_red:
if (pe__node_health(node) < 0) {
return true;
}
break;
case pcmk__health_strategy_only_green:
if (pe__node_health(node) <= 0) {
return true;
}
break;
case pcmk__health_strategy_progressive:
case pcmk__health_strategy_custom:
/* @TODO These are finite scores, possibly with rules, and possibly
* combining with other scores, so attributing these as a cause is
* nontrivial.
*/
break;
}
return false;
}
static void
check_node_health(resource_checks_t *checks, pcmk_node_t *node)
{
if (node == NULL) {
GHashTableIter iter;
bool allowed = false;
bool all_nodes_unhealthy = true;
g_hash_table_iter_init(&iter, checks->rsc->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) {
allowed = true;
if (!node_is_unhealthy(node)) {
all_nodes_unhealthy = false;
break;
}
}
if (allowed && all_nodes_unhealthy) {
checks->flags |= rsc_node_health;
}
} else if (node_is_unhealthy(node)) {
checks->flags |= rsc_node_health;
}
}
int
cli_resource_check(pcmk__output_t *out, pcmk_resource_t *rsc, pcmk_node_t *node)
{
resource_checks_t checks = { .rsc = rsc };
check_role(&checks);
check_managed(&checks);
check_locked(&checks);
check_node_health(&checks, node);
return out->message(out, "resource-check-list", &checks);
}
// \return Standard Pacemaker return code
int
cli_resource_fail(pcmk_ipc_api_t *controld_api, const char *host_uname,
const char *rsc_id, pcmk_scheduler_t *scheduler)
{
crm_notice("Failing %s on %s", rsc_id, host_uname);
return send_lrm_rsc_op(controld_api, true, host_uname, rsc_id, scheduler);
}
static GHashTable *
generate_resource_params(pcmk_resource_t *rsc, pcmk_node_t *node,
pcmk_scheduler_t *scheduler)
{
GHashTable *params = NULL;
GHashTable *meta = NULL;
GHashTable *combined = NULL;
GHashTableIter iter;
char *key = NULL;
char *value = NULL;
combined = pcmk__strkey_table(free, free);
params = pe_rsc_params(rsc, node, scheduler);
if (params != NULL) {
g_hash_table_iter_init(&iter, params);
while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
g_hash_table_insert(combined, strdup(key), strdup(value));
}
}
meta = pcmk__strkey_table(free, free);
get_meta_attributes(meta, rsc, node, scheduler);
if (meta != NULL) {
g_hash_table_iter_init(&iter, meta);
while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
char *crm_name = crm_meta_name(key);
g_hash_table_insert(combined, crm_name, strdup(value));
}
g_hash_table_destroy(meta);
}
return combined;
}
bool resource_is_running_on(pcmk_resource_t *rsc, const char *host)
{
bool found = true;
GList *hIter = NULL;
GList *hosts = NULL;
if (rsc == NULL) {
return false;
}
rsc->fns->location(rsc, &hosts, TRUE);
for (hIter = hosts; host != NULL && hIter != NULL; hIter = hIter->next) {
pcmk_node_t *node = (pcmk_node_t *) hIter->data;
if (pcmk__strcase_any_of(host, node->details->uname, node->details->id, NULL)) {
crm_trace("Resource %s is running on %s\n", rsc->id, host);
goto done;
}
}
if (host != NULL) {
crm_trace("Resource %s is not running on: %s\n", rsc->id, host);
found = false;
} else if(host == NULL && hosts == NULL) {
crm_trace("Resource %s is not running\n", rsc->id);
found = false;
}
done:
g_list_free(hosts);
return found;
}
/*!
* \internal
* \brief Create a list of all resources active on host from a given list
*
* \param[in] host Name of host to check whether resources are active
* \param[in] rsc_list List of resources to check
*
* \return New list of resources from list that are active on host
*/
static GList *
get_active_resources(const char *host, GList *rsc_list)
{
GList *rIter = NULL;
GList *active = NULL;
for (rIter = rsc_list; rIter != NULL; rIter = rIter->next) {
pcmk_resource_t *rsc = (pcmk_resource_t *) rIter->data;
/* Expand groups to their members, because if we're restarting a member
* other than the first, we can't otherwise tell which resources are
* stopping and starting.
*/
if (rsc->variant == pcmk_rsc_variant_group) {
active = g_list_concat(active,
get_active_resources(host, rsc->children));
} else if (resource_is_running_on(rsc, host)) {
active = g_list_append(active, strdup(rsc->id));
}
}
return active;
}
static void dump_list(GList *items, const char *tag)
{
int lpc = 0;
GList *item = NULL;
for (item = items; item != NULL; item = item->next) {
crm_trace("%s[%d]: %s", tag, lpc, (char*)item->data);
lpc++;
}
}
static void display_list(pcmk__output_t *out, GList *items, const char *tag)
{
GList *item = NULL;
for (item = items; item != NULL; item = item->next) {
out->info(out, "%s%s", tag, (const char *)item->data);
}
}
/*!
* \internal
* \brief Upgrade XML to latest schema version and use it as scheduler input
*
* This also updates the scheduler timestamp to the current time.
*
* \param[in,out] scheduler Scheduler data to update
* \param[in,out] xml XML to use as input
*
* \return Standard Pacemaker return code
* \note On success, caller is responsible for freeing memory allocated for
* scheduler->now.
* \todo This follows the example of other callers of cli_config_update()
* and returns ENOKEY ("Required key not available") if that fails,
* but perhaps pcmk_rc_schema_validation would be better in that case.
*/
int
update_scheduler_input(pcmk_scheduler_t *scheduler, xmlNode **xml)
{
if (cli_config_update(xml, NULL, FALSE) == FALSE) {
return ENOKEY;
}
scheduler->input = *xml;
scheduler->now = crm_time_new(NULL);
return pcmk_rc_ok;
}
/*!
* \internal
* \brief Update scheduler XML input based on a CIB query
*
* \param[in] scheduler Scheduler data to initialize
* \param[in] cib Connection to the CIB manager
*
* \return Standard Pacemaker return code
* \note On success, caller is responsible for freeing memory allocated for
* scheduler->input and scheduler->now.
*/
static int
update_scheduler_input_to_cib(pcmk__output_t *out, pcmk_scheduler_t *scheduler,
cib_t *cib)
{
xmlNode *cib_xml_copy = NULL;
int rc = pcmk_rc_ok;
rc = cib->cmds->query(cib, NULL, &cib_xml_copy, cib_scope_local | cib_sync_call);
rc = pcmk_legacy2rc(rc);
if (rc != pcmk_rc_ok) {
out->err(out, "Could not obtain the current CIB: %s (%d)", pcmk_rc_str(rc), rc);
return rc;
}
rc = update_scheduler_input(scheduler, &cib_xml_copy);
if (rc != pcmk_rc_ok) {
out->err(out, "Could not upgrade the current CIB XML");
free_xml(cib_xml_copy);
return rc;
}
return rc;
}
// \return Standard Pacemaker return code
static int
update_dataset(cib_t *cib, pcmk_scheduler_t *scheduler, bool simulate)
{
char *pid = NULL;
char *shadow_file = NULL;
cib_t *shadow_cib = NULL;
int rc = pcmk_rc_ok;
pcmk__output_t *out = scheduler->priv;
pe_reset_working_set(scheduler);
pe__set_working_set_flags(scheduler,
pcmk_sched_no_counts|pcmk_sched_no_compat);
rc = update_scheduler_input_to_cib(out, scheduler, cib);
if (rc != pcmk_rc_ok) {
return rc;
}
if(simulate) {
bool prev_quiet = false;
pid = pcmk__getpid_s();
shadow_cib = cib_shadow_new(pid);
shadow_file = get_shadow_file(pid);
if (shadow_cib == NULL) {
out->err(out, "Could not create shadow cib: '%s'", pid);
rc = ENXIO;
goto done;
}
rc = write_xml_file(scheduler->input, shadow_file, FALSE);
if (rc < 0) {
out->err(out, "Could not populate shadow cib: %s (%d)", pcmk_strerror(rc), rc);
goto done;
}
rc = shadow_cib->cmds->signon(shadow_cib, crm_system_name, cib_command);
rc = pcmk_legacy2rc(rc);
if (rc != pcmk_rc_ok) {
out->err(out, "Could not connect to shadow cib: %s (%d)", pcmk_rc_str(rc), rc);
goto done;
}
pcmk__schedule_actions(scheduler->input,
pcmk_sched_no_counts|pcmk_sched_no_compat,
scheduler);
prev_quiet = out->is_quiet(out);
out->quiet = true;
pcmk__simulate_transition(scheduler, shadow_cib, NULL);
out->quiet = prev_quiet;
rc = update_dataset(shadow_cib, scheduler, false);
} else {
cluster_status(scheduler);
}
done:
// Do not free scheduler->input here, we need rsc->xml to be valid later on
cib_delete(shadow_cib);
free(pid);
if(shadow_file) {
unlink(shadow_file);
free(shadow_file);
}
return rc;
}
/*!
* \internal
* \brief Find the maximum stop timeout of a resource and its children (if any)
*
* \param[in,out] rsc Resource to get timeout for
*
* \return Maximum stop timeout for \p rsc (in milliseconds)
*/
static int
max_rsc_stop_timeout(pcmk_resource_t *rsc)
{
long long result_ll;
int max_delay = 0;
xmlNode *config = NULL;
GHashTable *meta = NULL;
if (rsc == NULL) {
return 0;
}
// If resource is collective, use maximum of its children's stop timeouts
if (rsc->children != NULL) {
for (GList *iter = rsc->children; iter; iter = iter->next) {
pcmk_resource_t *child = iter->data;
int delay = max_rsc_stop_timeout(child);
if (delay > max_delay) {
pcmk__rsc_trace(rsc,
"Maximum stop timeout for %s is now %s "
"due to %s", rsc->id,
pcmk__readable_interval(delay), child->id);
max_delay = delay;
}
}
return max_delay;
}
// Get resource's stop action configuration from CIB
config = pcmk__find_action_config(rsc, PCMK_ACTION_STOP, 0, true);
/* Get configured timeout for stop action (fully evaluated for rules,
* defaults, etc.).
*
* @TODO This currently ignores node (which might matter for rules)
*/
meta = pcmk__unpack_action_meta(rsc, NULL, PCMK_ACTION_STOP, 0, config);
if ((pcmk__scan_ll(g_hash_table_lookup(meta, XML_ATTR_TIMEOUT),
&result_ll, -1LL) == pcmk_rc_ok)
&& (result_ll >= 0) && (result_ll <= INT_MAX)) {
max_delay = (int) result_ll;
}
g_hash_table_destroy(meta);
return max_delay;
}
/*!
* \internal
* \brief Find a reasonable waiting time for stopping any one resource in a list
*
* \param[in,out] scheduler Scheduler data
* \param[in] resources List of names of resources that will be stopped
*
* \return Rough estimate of a reasonable time to wait (in seconds) to stop any
* one resource in \p resources
* \note This estimate is very rough, simply the maximum stop timeout of all
* given resources and their children, plus a small fudge factor. It does
* not account for children that must be stopped in sequence, action
* throttling, or any demotions needed. It checks the stop timeout, even
* if the resources in question are actually being started.
*/
static int
wait_time_estimate(pcmk_scheduler_t *scheduler, const GList *resources)
{
int max_delay = 0;
// Find maximum stop timeout in milliseconds
for (const GList *item = resources; item != NULL; item = item->next) {
pcmk_resource_t *rsc = pe_find_resource(scheduler->resources,
(const char *) item->data);
int delay = max_rsc_stop_timeout(rsc);
if (delay > max_delay) {
pcmk__rsc_trace(rsc,
"Wait time is now %s due to %s",
pcmk__readable_interval(delay), rsc->id);
max_delay = delay;
}
}
return (max_delay / 1000) + 5;
}
#define waiting_for_starts(d, r, h) ((d != NULL) || \
(!resource_is_running_on((r), (h))))
/*!
* \internal
* \brief Restart a resource (on a particular host if requested).
*
* \param[in,out] out Output object
* \param[in,out] rsc The resource to restart
* \param[in] node Node to restart resource on (NULL for all)
* \param[in] move_lifetime If not NULL, how long constraint should
* remain in effect (as ISO 8601 string)
* \param[in] timeout_ms Consider failed if actions do not complete
* in this time (specified in milliseconds,
* but a two-second granularity is actually
* used; if 0, it will be calculated based on
* the resource timeout)
* \param[in,out] cib Connection to the CIB manager
* \param[in] cib_options Group of enum cib_call_options flags to
* use with CIB calls
* \param[in] promoted_role_only If true, limit to promoted instances
* \param[in] force If true, apply only to requested instance
* if part of a collective resource
*
* \return Standard Pacemaker return code (exits on certain failures)
*/
int
cli_resource_restart(pcmk__output_t *out, pcmk_resource_t *rsc,
const pcmk_node_t *node, const char *move_lifetime,
int timeout_ms, cib_t *cib, int cib_options,
gboolean promoted_role_only, gboolean force)
{
int rc = pcmk_rc_ok;
int lpc = 0;
int before = 0;
int step_timeout_s = 0;
int sleep_interval = 2;
int timeout = timeout_ms / 1000;
bool stop_via_ban = false;
char *rsc_id = NULL;
char *lookup_id = NULL;
char *orig_target_role = NULL;
GList *list_delta = NULL;
GList *target_active = NULL;
GList *current_active = NULL;
GList *restart_target_active = NULL;
pcmk_scheduler_t *scheduler = NULL;
pcmk_resource_t *parent = uber_parent(rsc);
bool running = false;
const char *id = rsc->clone_name ? rsc->clone_name : rsc->id;
const char *host = node ? node->details->uname : NULL;
/* If the implicit resource or primitive resource of a bundle is given, operate on the
* bundle itself instead.
*/
if (pe_rsc_is_bundled(rsc)) {
rsc = parent->parent;
}
running = resource_is_running_on(rsc, host);
if (pe_rsc_is_clone(parent) && !running) {
if (pe_rsc_is_unique_clone(parent)) {
lookup_id = strdup(rsc->id);
} else {
lookup_id = clone_strip(rsc->id);
}
rsc = parent->fns->find_rsc(parent, lookup_id, node,
pcmk_rsc_match_basename
|pcmk_rsc_match_current_node);
free(lookup_id);
running = resource_is_running_on(rsc, host);
}
if (!running) {
if (host) {
out->err(out, "%s is not running on %s and so cannot be restarted", id, host);
} else {
out->err(out, "%s is not running anywhere and so cannot be restarted", id);
}
return ENXIO;
}
if (!pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
out->err(out, "Unmanaged resources cannot be restarted.");
return EAGAIN;
}
rsc_id = strdup(rsc->id);
if (pe_rsc_is_unique_clone(parent)) {
lookup_id = strdup(rsc->id);
} else {
lookup_id = clone_strip(rsc->id);
}
if (host) {
if (pe_rsc_is_clone(rsc) || pe_bundle_replicas(rsc)) {
stop_via_ban = true;
} else if (pe_rsc_is_clone(parent)) {
stop_via_ban = true;
free(lookup_id);
lookup_id = strdup(parent->id);
}
}
/*
grab full cib
determine originally active resources
disable or ban
poll cib and watch for affected resources to get stopped
without --timeout, calculate the stop timeout for each step and wait for that
if we hit --timeout or the service timeout, re-enable or un-ban, report failure and indicate which resources we couldn't take down
if everything stopped, re-enable or un-ban
poll cib and watch for affected resources to get started
without --timeout, calculate the start timeout for each step and wait for that
if we hit --timeout or the service timeout, report (different) failure and indicate which resources we couldn't bring back up
report success
Optimizations:
- use constraints to determine ordered list of affected resources
- Allow a --no-deps option (aka. --force-restart)
*/
scheduler = pe_new_working_set();
if (scheduler == NULL) {
rc = errno;
out->err(out, "Could not allocate scheduler data: %s", pcmk_rc_str(rc));
goto done;
}
scheduler->priv = out;
rc = update_dataset(cib, scheduler, false);
if(rc != pcmk_rc_ok) {
out->err(out, "Could not get new resource list: %s (%d)", pcmk_rc_str(rc), rc);
goto done;
}
restart_target_active = get_active_resources(host, scheduler->resources);
current_active = get_active_resources(host, scheduler->resources);
dump_list(current_active, "Origin");
if (stop_via_ban) {
/* Stop the clone or bundle instance by banning it from the host */
out->quiet = true;
rc = cli_resource_ban(out, lookup_id, host, move_lifetime, cib,
cib_options, promoted_role_only,
PCMK__ROLE_PROMOTED);
} else {
/* Stop the resource by setting target-role to Stopped.
* Remember any existing target-role so we can restore it later
* (though it only makes any difference if it's Unpromoted).
*/
find_resource_attr(out, cib, XML_NVPAIR_ATTR_VALUE, lookup_id, NULL, NULL,
NULL, XML_RSC_ATTR_TARGET_ROLE, &orig_target_role);
rc = cli_resource_update_attribute(rsc, rsc_id, NULL, XML_TAG_META_SETS,
NULL, XML_RSC_ATTR_TARGET_ROLE,
PCMK_ACTION_STOPPED, FALSE, cib,
cib_options, force);
}
if(rc != pcmk_rc_ok) {
out->err(out, "Could not set target-role for %s: %s (%d)", rsc_id, pcmk_rc_str(rc), rc);
if (current_active != NULL) {
g_list_free_full(current_active, free);
current_active = NULL;
}
if (restart_target_active != NULL) {
g_list_free_full(restart_target_active, free);
restart_target_active = NULL;
}
goto done;
}
rc = update_dataset(cib, scheduler, true);
if(rc != pcmk_rc_ok) {
out->err(out, "Could not determine which resources would be stopped");
goto failure;
}
target_active = get_active_resources(host, scheduler->resources);
dump_list(target_active, "Target");
list_delta = pcmk__subtract_lists(current_active, target_active, (GCompareFunc) strcmp);
out->info(out, "Waiting for %d resources to stop:", g_list_length(list_delta));
display_list(out, list_delta, " * ");
step_timeout_s = timeout / sleep_interval;
while (list_delta != NULL) {
before = g_list_length(list_delta);
if(timeout_ms == 0) {
step_timeout_s = wait_time_estimate(scheduler, list_delta)
/ sleep_interval;
}
/* We probably don't need the entire step timeout */
for(lpc = 0; (lpc < step_timeout_s) && (list_delta != NULL); lpc++) {
sleep(sleep_interval);
if(timeout) {
timeout -= sleep_interval;
crm_trace("%ds remaining", timeout);
}
rc = update_dataset(cib, scheduler, FALSE);
if(rc != pcmk_rc_ok) {
out->err(out, "Could not determine which resources were stopped");
goto failure;
}
if (current_active != NULL) {
g_list_free_full(current_active, free);
}
current_active = get_active_resources(host, scheduler->resources);
g_list_free(list_delta);
list_delta = pcmk__subtract_lists(current_active, target_active, (GCompareFunc) strcmp);
dump_list(current_active, "Current");
dump_list(list_delta, "Delta");
}
crm_trace("%d (was %d) resources remaining", g_list_length(list_delta), before);
if(before == g_list_length(list_delta)) {
/* aborted during stop phase, print the contents of list_delta */
out->err(out, "Could not complete shutdown of %s, %d resources remaining", rsc_id, g_list_length(list_delta));
display_list(out, list_delta, " * ");
rc = ETIME;
goto failure;
}
}
if (stop_via_ban) {
rc = cli_resource_clear(lookup_id, host, NULL, cib, cib_options, true, force);
} else if (orig_target_role) {
rc = cli_resource_update_attribute(rsc, rsc_id, NULL, XML_TAG_META_SETS,
NULL, XML_RSC_ATTR_TARGET_ROLE,
orig_target_role, FALSE, cib,
cib_options, force);
free(orig_target_role);
orig_target_role = NULL;
} else {
rc = cli_resource_delete_attribute(rsc, rsc_id, NULL, XML_TAG_META_SETS,
NULL, XML_RSC_ATTR_TARGET_ROLE, cib,
cib_options, force);
}
if(rc != pcmk_rc_ok) {
out->err(out, "Could not unset target-role for %s: %s (%d)", rsc_id, pcmk_rc_str(rc), rc);
goto done;
}
if (target_active != NULL) {
g_list_free_full(target_active, free);
}
target_active = restart_target_active;
list_delta = pcmk__subtract_lists(target_active, current_active, (GCompareFunc) strcmp);
out->info(out, "Waiting for %d resources to start again:", g_list_length(list_delta));
display_list(out, list_delta, " * ");
step_timeout_s = timeout / sleep_interval;
while (waiting_for_starts(list_delta, rsc, host)) {
before = g_list_length(list_delta);
if(timeout_ms == 0) {
step_timeout_s = wait_time_estimate(scheduler, list_delta)
/ sleep_interval;
}
/* We probably don't need the entire step timeout */
for (lpc = 0; (lpc < step_timeout_s) && waiting_for_starts(list_delta, rsc, host); lpc++) {
sleep(sleep_interval);
if(timeout) {
timeout -= sleep_interval;
crm_trace("%ds remaining", timeout);
}
rc = update_dataset(cib, scheduler, false);
if(rc != pcmk_rc_ok) {
out->err(out, "Could not determine which resources were started");
goto failure;
}
/* It's OK if dependent resources moved to a different node,
* so we check active resources on all nodes.
*/
if (current_active != NULL) {
g_list_free_full(current_active, free);
}
current_active = get_active_resources(NULL, scheduler->resources);
g_list_free(list_delta);
list_delta = pcmk__subtract_lists(target_active, current_active, (GCompareFunc) strcmp);
dump_list(current_active, "Current");
dump_list(list_delta, "Delta");
}
if(before == g_list_length(list_delta)) {
/* aborted during start phase, print the contents of list_delta */
out->err(out, "Could not complete restart of %s, %d resources remaining", rsc_id, g_list_length(list_delta));
display_list(out, list_delta, " * ");
rc = ETIME;
goto failure;
}
}
rc = pcmk_rc_ok;
goto done;
failure:
if (stop_via_ban) {
cli_resource_clear(lookup_id, host, NULL, cib, cib_options, true, force);
} else if (orig_target_role) {
cli_resource_update_attribute(rsc, rsc_id, NULL, XML_TAG_META_SETS, NULL,
XML_RSC_ATTR_TARGET_ROLE, orig_target_role,
FALSE, cib, cib_options, force);
free(orig_target_role);
} else {
cli_resource_delete_attribute(rsc, rsc_id, NULL, XML_TAG_META_SETS,
NULL, XML_RSC_ATTR_TARGET_ROLE, cib,
cib_options, force);
}
done:
if (list_delta != NULL) {
g_list_free(list_delta);
}
if (current_active != NULL) {
g_list_free_full(current_active, free);
}
if (target_active != NULL && (target_active != restart_target_active)) {
g_list_free_full(target_active, free);
}
if (restart_target_active != NULL) {
g_list_free_full(restart_target_active, free);
}
free(rsc_id);
free(lookup_id);
pe_free_working_set(scheduler);
return rc;
}
static inline bool
action_is_pending(const pcmk_action_t *action)
{
if (pcmk_any_flags_set(action->flags,
pcmk_action_optional|pcmk_action_pseudo)
|| !pcmk_is_set(action->flags, pcmk_action_runnable)
|| pcmk__str_eq(PCMK_ACTION_NOTIFY, action->task, pcmk__str_casei)) {
return false;
}
return true;
}
/*!
* \internal
* \brief Check whether any actions in a list are pending
*
* \param[in] actions List of actions to check
*
* \return true if any actions in the list are pending, otherwise false
*/
static bool
actions_are_pending(const GList *actions)
{
for (const GList *action = actions; action != NULL; action = action->next) {
const pcmk_action_t *a = (const pcmk_action_t *) action->data;
if (action_is_pending(a)) {
crm_notice("Waiting for %s (flags=%#.8x)", a->uuid, a->flags);
return true;
}
}
return false;
}
static void
print_pending_actions(pcmk__output_t *out, GList *actions)
{
GList *action;
out->info(out, "Pending actions:");
for (action = actions; action != NULL; action = action->next) {
pcmk_action_t *a = (pcmk_action_t *) action->data;
if (!action_is_pending(a)) {
continue;
}
if (a->node) {
out->info(out, "\tAction %d: %s\ton %s",
a->id, a->uuid, pe__node_name(a->node));
} else {
out->info(out, "\tAction %d: %s", a->id, a->uuid);
}
}
}
/* For --wait, timeout (in seconds) to use if caller doesn't specify one */
#define WAIT_DEFAULT_TIMEOUT_S (60 * 60)
/* For --wait, how long to sleep between cluster state checks */
#define WAIT_SLEEP_S (2)
/*!
* \internal
* \brief Wait until all pending cluster actions are complete
*
* This waits until either the CIB's transition graph is idle or a timeout is
* reached.
*
* \param[in,out] out Output object
* \param[in] timeout_ms Consider failed if actions do not complete in
* this time (specified in milliseconds, but
* one-second granularity is actually used; if 0, a
* default will be used)
* \param[in,out] cib Connection to the CIB manager
*
* \return Standard Pacemaker return code
*/
int
wait_till_stable(pcmk__output_t *out, int timeout_ms, cib_t * cib)
{
pcmk_scheduler_t *scheduler = NULL;
xmlXPathObjectPtr search;
int rc = pcmk_rc_ok;
bool pending_unknown_state_resources;
int timeout_s = timeout_ms? ((timeout_ms + 999) / 1000) : WAIT_DEFAULT_TIMEOUT_S;
time_t expire_time = time(NULL) + timeout_s;
time_t time_diff;
bool printed_version_warning = out->is_quiet(out); // i.e. don't print if quiet
scheduler = pe_new_working_set();
if (scheduler == NULL) {
return ENOMEM;
}
do {
/* Abort if timeout is reached */
time_diff = expire_time - time(NULL);
if (time_diff > 0) {
crm_info("Waiting up to %lld seconds for cluster actions to complete", (long long) time_diff);
} else {
print_pending_actions(out, scheduler->actions);
pe_free_working_set(scheduler);
return ETIME;
}
if (rc == pcmk_rc_ok) { /* this avoids sleep on first loop iteration */
sleep(WAIT_SLEEP_S);
}
/* Get latest transition graph */
pe_reset_working_set(scheduler);
rc = update_scheduler_input_to_cib(out, scheduler, cib);
if (rc != pcmk_rc_ok) {
pe_free_working_set(scheduler);
return rc;
}
pcmk__schedule_actions(scheduler->input,
pcmk_sched_no_counts|pcmk_sched_no_compat,
scheduler);
if (!printed_version_warning) {
/* If the DC has a different version than the local node, the two
* could come to different conclusions about what actions need to be
* done. Warn the user in this case.
*
* @TODO A possible long-term solution would be to reimplement the
* wait as a new controller operation that would be forwarded to the
* DC. However, that would have potential problems of its own.
*/
const char *dc_version = g_hash_table_lookup(scheduler->config_hash,
PCMK_OPT_DC_VERSION);
if (!pcmk__str_eq(dc_version, PACEMAKER_VERSION "-" BUILD_VERSION, pcmk__str_casei)) {
out->info(out, "warning: wait option may not work properly in "
"mixed-version cluster");
printed_version_warning = true;
}
}
search = xpath_search(scheduler->input, "/cib/status/node_state/lrm/lrm_resources/lrm_resource/"
XML_LRM_TAG_RSC_OP "[@" XML_LRM_ATTR_RC "='193']");
pending_unknown_state_resources = (numXpathResults(search) > 0);
freeXpathObject(search);
} while (actions_are_pending(scheduler->actions) || pending_unknown_state_resources);
pe_free_working_set(scheduler);
return rc;
}
static const char *
get_action(const char *rsc_action) {
const char *action = NULL;
if (pcmk__str_eq(rsc_action, "validate", pcmk__str_casei)) {
action = PCMK_ACTION_VALIDATE_ALL;
} else if (pcmk__str_eq(rsc_action, "force-check", pcmk__str_casei)) {
action = PCMK_ACTION_MONITOR;
} else if (pcmk__strcase_any_of(rsc_action, "force-start", "force-stop",
"force-demote", "force-promote", NULL)) {
action = rsc_action+6;
} else {
action = rsc_action;
}
return action;
}
/*!
* \brief Set up environment variables as expected by resource agents
*
* When the cluster executes resource agents, it adds certain environment
* variables (directly or via resource meta-attributes) expected by some
* resource agents. Add the essential ones that many resource agents expect, so
* the behavior is the same for command-line execution.
*
* \param[in,out] params Resource parameters that will be passed to agent
* \param[in] timeout_ms Action timeout (in milliseconds)
* \param[in] check_level OCF check level
* \param[in] verbosity Verbosity level
*/
static void
set_agent_environment(GHashTable *params, int timeout_ms, int check_level,
int verbosity)
{
g_hash_table_insert(params, strdup("CRM_meta_timeout"),
crm_strdup_printf("%d", timeout_ms));
g_hash_table_insert(params, strdup(XML_ATTR_CRM_VERSION),
strdup(CRM_FEATURE_SET));
if (check_level >= 0) {
char *level = crm_strdup_printf("%d", check_level);
setenv("OCF_CHECK_LEVEL", level, 1);
free(level);
}
pcmk__set_env_option(PCMK__ENV_DEBUG, ((verbosity > 0)? "1" : "0"), true);
if (verbosity > 1) {
setenv("OCF_TRACE_RA", "1", 1);
}
/* A resource agent using the standard ocf-shellfuncs library will not print
* messages to stderr if it doesn't have a controlling terminal (e.g. if
* crm_resource is called via script or ssh). This forces it to do so.
*/
setenv("OCF_TRACE_FILE", "/dev/stderr", 0);
}
/*!
* \internal
* \brief Apply command-line overrides to resource parameters
*
* \param[in,out] params Parameters to be passed to agent
* \param[in] overrides Parameters to override (or NULL if none)
*/
static void
apply_overrides(GHashTable *params, GHashTable *overrides)
{
if (overrides != NULL) {
GHashTableIter iter;
char *name = NULL;
char *value = NULL;
g_hash_table_iter_init(&iter, overrides);
while (g_hash_table_iter_next(&iter, (gpointer *) &name,
(gpointer *) &value)) {
g_hash_table_replace(params, strdup(name), strdup(value));
}
}
}
crm_exit_t
cli_resource_execute_from_params(pcmk__output_t *out, const char *rsc_name,
const char *rsc_class, const char *rsc_prov,
const char *rsc_type, const char *rsc_action,
GHashTable *params, GHashTable *override_hash,
int timeout_ms, int resource_verbose, gboolean force,
int check_level)
{
const char *class = rsc_class;
const char *action = get_action(rsc_action);
crm_exit_t exit_code = CRM_EX_OK;
svc_action_t *op = NULL;
// If no timeout was provided, use the same default as the cluster
if (timeout_ms == 0) {
timeout_ms = PCMK_DEFAULT_ACTION_TIMEOUT_MS;
}
set_agent_environment(params, timeout_ms, check_level, resource_verbose);
apply_overrides(params, override_hash);
op = services__create_resource_action(rsc_name? rsc_name : "test",
rsc_class, rsc_prov, rsc_type, action,
0, timeout_ms, params, 0);
if (op == NULL) {
out->err(out, "Could not execute %s using %s%s%s:%s: %s",
action, rsc_class, (rsc_prov? ":" : ""),
(rsc_prov? rsc_prov : ""), rsc_type, strerror(ENOMEM));
g_hash_table_destroy(params);
return CRM_EX_OSERR;
}
if (pcmk__str_eq(rsc_class, PCMK_RESOURCE_CLASS_SERVICE, pcmk__str_casei)) {
class = resources_find_service_class(rsc_type);
}
if (!pcmk__strcase_any_of(class, PCMK_RESOURCE_CLASS_OCF,
PCMK_RESOURCE_CLASS_LSB, NULL)) {
services__format_result(op, CRM_EX_UNIMPLEMENT_FEATURE, PCMK_EXEC_ERROR,
"Manual execution of the %s standard is "
"unsupported", pcmk__s(class, "unspecified"));
}
if (op->rc != PCMK_OCF_UNKNOWN) {
exit_code = op->rc;
goto done;
}
services_action_sync(op);
// Map results to OCF codes for consistent reporting to user
{
enum ocf_exitcode ocf_code = services_result2ocf(class, action, op->rc);
// Cast variable instead of function return to keep compilers happy
exit_code = (crm_exit_t) ocf_code;
}
done:
out->message(out, "resource-agent-action", resource_verbose, rsc_class,
rsc_prov, rsc_type, rsc_name, rsc_action, override_hash,
exit_code, op->status, services__exit_reason(op),
op->stdout_data, op->stderr_data);
services_action_free(op);
return exit_code;
}
crm_exit_t
cli_resource_execute(pcmk_resource_t *rsc, const char *requested_name,
const char *rsc_action, GHashTable *override_hash,
int timeout_ms, cib_t *cib, pcmk_scheduler_t *scheduler,
int resource_verbose, gboolean force, int check_level)
{
pcmk__output_t *out = scheduler->priv;
crm_exit_t exit_code = CRM_EX_OK;
const char *rid = NULL;
const char *rtype = NULL;
const char *rprov = NULL;
const char *rclass = NULL;
GHashTable *params = NULL;
if (pcmk__strcase_any_of(rsc_action, "force-start", "force-demote",
"force-promote", NULL)) {
if(pe_rsc_is_clone(rsc)) {
GList *nodes = cli_resource_search(rsc, requested_name, scheduler);
if(nodes != NULL && force == FALSE) {
out->err(out, "It is not safe to %s %s here: the cluster claims it is already active",
rsc_action, rsc->id);
out->err(out, "Try setting target-role=Stopped first or specifying "
"the force option");
return CRM_EX_UNSAFE;
}
g_list_free_full(nodes, free);
}
}
if(pe_rsc_is_clone(rsc)) {
/* Grab the first child resource in the hope it's not a group */
rsc = rsc->children->data;
}
if (rsc->variant == pcmk_rsc_variant_group) {
out->err(out, "Sorry, the %s option doesn't support group resources", rsc_action);
return CRM_EX_UNIMPLEMENT_FEATURE;
} else if (pe_rsc_is_bundled(rsc)) {
out->err(out, "Sorry, the %s option doesn't support bundled resources", rsc_action);
return CRM_EX_UNIMPLEMENT_FEATURE;
}
rclass = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
rprov = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER);
rtype = crm_element_value(rsc->xml, XML_ATTR_TYPE);
params = generate_resource_params(rsc, NULL /* @TODO use local node */,
scheduler);
if (timeout_ms == 0) {
timeout_ms = pe_get_configured_timeout(rsc, get_action(rsc_action),
scheduler);
}
rid = pe_rsc_is_anon_clone(rsc->parent)? requested_name : rsc->id;
exit_code = cli_resource_execute_from_params(out, rid, rclass, rprov, rtype, rsc_action,
params, override_hash, timeout_ms,
resource_verbose, force, check_level);
return exit_code;
}
// \return Standard Pacemaker return code
int
cli_resource_move(const pcmk_resource_t *rsc, const char *rsc_id,
const char *host_name, const char *move_lifetime, cib_t *cib,
int cib_options, pcmk_scheduler_t *scheduler,
gboolean promoted_role_only, gboolean force)
{
pcmk__output_t *out = scheduler->priv;
int rc = pcmk_rc_ok;
unsigned int count = 0;
pcmk_node_t *current = NULL;
pcmk_node_t *dest = pe_find_node(scheduler->nodes, host_name);
bool cur_is_dest = false;
if (dest == NULL) {
return pcmk_rc_node_unknown;
}
if (promoted_role_only
&& !pcmk_is_set(rsc->flags, pcmk_rsc_promotable)) {
const pcmk_resource_t *p = pe__const_top_resource(rsc, false);
if (pcmk_is_set(p->flags, pcmk_rsc_promotable)) {
out->info(out, "Using parent '%s' for move instead of '%s'.", rsc->id, rsc_id);
rsc_id = p->id;
rsc = p;
} else {
out->info(out, "Ignoring --promoted option: %s is not promotable",
rsc_id);
promoted_role_only = FALSE;
}
}
current = pe__find_active_requires(rsc, &count);
if (pcmk_is_set(rsc->flags, pcmk_rsc_promotable)) {
unsigned int promoted_count = 0;
pcmk_node_t *promoted_node = NULL;
for (const GList *iter = rsc->children; iter; iter = iter->next) {
const pcmk_resource_t *child = (const pcmk_resource_t *) iter->data;
enum rsc_role_e child_role = child->fns->state(child, TRUE);
if (child_role == pcmk_role_promoted) {
rsc = child;
promoted_node = pe__current_node(child);
promoted_count++;
}
}
if (promoted_role_only || (promoted_count != 0)) {
count = promoted_count;
current = promoted_node;
}
}
if (count > 1) {
if (pe_rsc_is_clone(rsc)) {
current = NULL;
} else {
return pcmk_rc_multiple;
}
}
if (current && (current->details == dest->details)) {
cur_is_dest = true;
if (force) {
crm_info("%s is already %s on %s, reinforcing placement with location constraint.",
rsc_id, promoted_role_only?"promoted":"active",
pe__node_name(dest));
} else {
return pcmk_rc_already;
}
}
/* Clear any previous prefer constraints across all nodes. */
cli_resource_clear(rsc_id, NULL, scheduler->nodes, cib, cib_options, false,
force);
/* Clear any previous ban constraints on 'dest'. */
cli_resource_clear(rsc_id, dest->details->uname, scheduler->nodes, cib,
cib_options, TRUE, force);
/* Record an explicit preference for 'dest' */
rc = cli_resource_prefer(out, rsc_id, dest->details->uname, move_lifetime,
cib, cib_options, promoted_role_only,
PCMK__ROLE_PROMOTED);
crm_trace("%s%s now prefers %s%s",
rsc->id, (promoted_role_only? " (promoted)" : ""),
pe__node_name(dest), force?"(forced)":"");
/* only ban the previous location if current location != destination location.
* it is possible to use -M to enforce a location without regard of where the
* resource is currently located */
if (force && !cur_is_dest) {
/* Ban the original location if possible */
if(current) {
(void)cli_resource_ban(out, rsc_id, current->details->uname, move_lifetime,
cib, cib_options, promoted_role_only,
PCMK__ROLE_PROMOTED);
} else if(count > 1) {
out->info(out, "Resource '%s' is currently %s in %d locations. "
"One may now move to %s",
rsc_id, (promoted_role_only? "promoted" : "active"),
count, pe__node_name(dest));
out->info(out, "To prevent '%s' from being %s at a specific location, "
"specify a node.",
rsc_id, (promoted_role_only? "promoted" : "active"));
} else {
crm_trace("Not banning %s from its current location: not active", rsc_id);
}
}
return rc;
}
diff --git a/tools/crmadmin.c b/tools/crmadmin.c
index 0a31c5c7d7..01f96125c1 100644
--- a/tools/crmadmin.c
+++ b/tools/crmadmin.c
@@ -1,272 +1,271 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <stdio.h>
#include <stdbool.h>
#include <stdlib.h> // atoi()
#include <glib.h> // gboolean, GMainLoop, etc.
#include <libxml/tree.h> // xmlNode
#include <pacemaker-internal.h>
#include <crm/common/cmdline_internal.h>
#include <crm/common/output_internal.h>
#define SUMMARY "query and manage the Pacemaker controller"
static enum {
cmd_none,
cmd_health,
cmd_whois_dc,
cmd_list_nodes,
cmd_pacemakerd_health,
} command = cmd_none;
struct {
gboolean health;
guint timeout;
char *optarg;
char *ipc_name;
gboolean bash_export;
} options = {
.optarg = NULL,
.ipc_name = NULL,
.bash_export = FALSE
};
gboolean command_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error);
static GOptionEntry command_options[] = {
{ "status", 'S', 0, G_OPTION_ARG_CALLBACK, command_cb,
"Display the status of the specified node."
"\n Result is state of node's internal finite state"
"\n machine, which can be useful for debugging",
"NODE"
},
{ "pacemakerd", 'P', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb,
"Display the status of local pacemakerd."
"\n Result is the state of the sub-daemons watched"
"\n by pacemakerd.",
NULL
},
{ "dc_lookup", 'D', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb,
"Display the uname of the node co-ordinating the cluster."
"\n This is an internal detail rarely useful to"
"\n administrators except when deciding on which"
"\n node to examine the logs.",
NULL
},
{ "nodes", 'N', G_OPTION_FLAG_OPTIONAL_ARG, G_OPTION_ARG_CALLBACK, command_cb,
"Display the uname of all member nodes [optionally filtered by type (comma-separated)]"
"\n Types: all (default), cluster, guest, remote",
"TYPE"
},
{ "health", 'H', G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_NONE, &options.health,
NULL,
NULL
},
{ NULL }
};
static GOptionEntry additional_options[] = {
{ "timeout", 't', 0, G_OPTION_ARG_CALLBACK, command_cb,
"Time to wait before declaring the operation"
"\n failed",
"TIMESPEC"
},
{ "bash-export", 'B', 0, G_OPTION_ARG_NONE, &options.bash_export,
"Display nodes as shell commands of the form 'export uname=uuid'"
"\n (valid with -N/--nodes)",
},
{ "ipc-name", 'i', 0, G_OPTION_ARG_STRING, &options.ipc_name,
"Name to use for ipc instead of 'crmadmin' (with -P/--pacemakerd).",
"NAME"
},
{ NULL }
};
gboolean
command_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error)
{
if (!strcmp(option_name, "--status") || !strcmp(option_name, "-S")) {
command = cmd_health;
crm_trace("Option %c => %s", 'S', optarg);
}
if (!strcmp(option_name, "--pacemakerd") || !strcmp(option_name, "-P")) {
command = cmd_pacemakerd_health;
}
if (!strcmp(option_name, "--dc_lookup") || !strcmp(option_name, "-D")) {
command = cmd_whois_dc;
}
if (!strcmp(option_name, "--nodes") || !strcmp(option_name, "-N")) {
command = cmd_list_nodes;
}
if (!strcmp(option_name, "--timeout") || !strcmp(option_name, "-t")) {
- return pcmk__parse_interval_spec(optarg,
- &options.timeout) == pcmk_rc_ok;
+ return pcmk_parse_interval_spec(optarg, &options.timeout) == pcmk_rc_ok;
}
pcmk__str_update(&options.optarg, optarg);
return TRUE;
}
static pcmk__supported_format_t formats[] = {
PCMK__SUPPORTED_FORMAT_NONE,
PCMK__SUPPORTED_FORMAT_TEXT,
PCMK__SUPPORTED_FORMAT_XML,
{ NULL, NULL, NULL }
};
static GOptionContext *
build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) {
GOptionContext *context = NULL;
const char *description = "Notes:\n\n"
"Time Specification:\n\n"
"The TIMESPEC in any command line option can be specified in many different\n"
"formats. It can be just an integer number of seconds, a number plus units\n"
"(ms/msec/us/usec/s/sec/m/min/h/hr), or an ISO 8601 period specification.\n\n"
"Report bugs to " PCMK__BUG_URL;
GOptionEntry extra_prog_entries[] = {
{ "quiet", 'q', 0, G_OPTION_ARG_NONE, &(args->quiet),
"Display only the essential query information",
NULL },
{ NULL }
};
context = pcmk__build_arg_context(args, "text (default), xml", group, NULL);
g_option_context_set_description(context, description);
/* Add the -q option, which cannot be part of the globally supported options
* because some tools use that flag for something else.
*/
pcmk__add_main_args(context, extra_prog_entries);
pcmk__add_arg_group(context, "command", "Commands:",
"Show command options", command_options);
pcmk__add_arg_group(context, "additional", "Additional Options:",
"Show additional options", additional_options);
return context;
}
int
main(int argc, char **argv)
{
crm_exit_t exit_code = CRM_EX_OK;
int rc;
int argerr = 0;
GError *error = NULL;
pcmk__output_t *out = NULL;
GOptionGroup *output_group = NULL;
pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY);
gchar **processed_args = pcmk__cmdline_preproc(argv, "itKNS");
GOptionContext *context = build_arg_context(args, &output_group);
pcmk__register_formats(output_group, formats);
if (!g_option_context_parse_strv(context, &processed_args, &error)) {
exit_code = CRM_EX_USAGE;
goto done;
}
pcmk__cli_init_logging("crmadmin", args->verbosity);
rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv);
if (rc != pcmk_rc_ok) {
exit_code = CRM_EX_ERROR;
g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Error creating output format %s: %s",
args->output_ty, pcmk_rc_str(rc));
goto done;
}
pcmk__register_lib_messages(out);
out->quiet = args->quiet;
if (!pcmk__force_args(context, &error, "%s --xml-simple-list --xml-substitute", g_get_prgname())) {
goto done;
}
if (args->version) {
out->version(out, false);
goto done;
}
if (options.health) {
out->err(out, "Cluster-wide health option not supported");
++argerr;
}
if (command == cmd_none) {
out->err(out, "error: Must specify a command option");
++argerr;
}
if (argerr) {
char *help = g_option_context_get_help(context, TRUE, NULL);
out->err(out, "%s", help);
g_free(help);
exit_code = CRM_EX_USAGE;
goto done;
}
switch (command) {
case cmd_health:
rc = pcmk__controller_status(out, options.optarg,
(unsigned int) options.timeout);
break;
case cmd_pacemakerd_health:
rc = pcmk__pacemakerd_status(out, options.ipc_name,
(unsigned int) options.timeout, true,
NULL);
break;
case cmd_list_nodes:
rc = pcmk__list_nodes(out, options.optarg, options.bash_export);
break;
case cmd_whois_dc:
rc = pcmk__designated_controller(out,
(unsigned int) options.timeout);
break;
case cmd_none:
rc = pcmk_rc_error;
break;
}
if (rc != pcmk_rc_ok) {
out->err(out, "error: Command failed: %s", pcmk_rc_str(rc));
exit_code = pcmk_rc2exitc(rc);
}
done:
g_strfreev(processed_args);
pcmk__free_arg_context(context);
pcmk__output_and_clear_error(&error, out);
if (out != NULL) {
out->finish(out, exit_code, true, NULL);
pcmk__output_free(out);
}
pcmk__unregister_formats();
return crm_exit(exit_code);
}

File Metadata

Mime Type
text/x-diff
Expires
Mon, Apr 21, 6:53 PM (22 h, 48 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1665250
Default Alt Text
(909 KB)

Event Timeline