Page MenuHomeClusterLabs Projects

No OneTemporary

This file is larger than 256 KB, so syntax highlighting was skipped.
diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c
index a9565a5dd3..e5b651d3ff 100644
--- a/daemons/attrd/attrd_corosync.c
+++ b/daemons/attrd/attrd_corosync.c
@@ -1,608 +1,608 @@
/*
* Copyright 2013-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <errno.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#include <crm/cluster.h>
#include <crm/cluster/internal.h>
#include <crm/common/logging.h>
#include <crm/common/results.h>
#include <crm/common/strings_internal.h>
#include <crm/common/xml.h>
#include "pacemaker-attrd.h"
static xmlNode *
attrd_confirmation(int callid)
{
xmlNode *node = pcmk__xe_create(NULL, __func__);
crm_xml_add(node, PCMK__XA_T, PCMK__VALUE_ATTRD);
crm_xml_add(node, PCMK__XA_SRC, get_local_node_name());
crm_xml_add(node, PCMK_XA_TASK, PCMK__ATTRD_CMD_CONFIRM);
crm_xml_add_int(node, PCMK__XA_CALL_ID, callid);
return node;
}
static void
attrd_peer_message(crm_node_t *peer, xmlNode *xml)
{
const char *election_op = crm_element_value(xml, PCMK__XA_CRM_TASK);
if (election_op) {
attrd_handle_election_op(peer, xml);
return;
}
if (attrd_shutting_down(false)) {
/* If we're shutting down, we want to continue responding to election
* ops as long as we're a cluster member (because our vote may be
* needed). Ignore all other messages.
*/
return;
} else {
pcmk__request_t request = {
.ipc_client = NULL,
.ipc_id = 0,
.ipc_flags = 0,
.peer = peer->uname,
.xml = xml,
.call_options = 0,
.result = PCMK__UNKNOWN_RESULT,
};
request.op = crm_element_value_copy(request.xml, PCMK_XA_TASK);
CRM_CHECK(request.op != NULL, return);
attrd_handle_request(&request);
/* Having finished handling the request, check to see if the originating
* peer requested confirmation. If so, send that confirmation back now.
*/
if (pcmk__xe_attr_is_true(xml, PCMK__XA_CONFIRM) &&
!pcmk__str_eq(request.op, PCMK__ATTRD_CMD_CONFIRM, pcmk__str_none)) {
int callid = 0;
xmlNode *reply = NULL;
/* Add the confirmation ID for the message we are confirming to the
* response so the originating peer knows what they're a confirmation
* for.
*/
crm_element_value_int(xml, PCMK__XA_CALL_ID, &callid);
reply = attrd_confirmation(callid);
/* And then send the confirmation back to the originating peer. This
* ends up right back in this same function (attrd_peer_message) on the
* peer where it will have to do something with a PCMK__XA_CONFIRM type
* message.
*/
crm_debug("Sending %s a confirmation", peer->uname);
attrd_send_message(peer, reply, false);
free_xml(reply);
}
pcmk__reset_request(&request);
}
}
static void
attrd_cpg_dispatch(cpg_handle_t handle,
const struct cpg_name *groupName,
uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len)
{
uint32_t kind = 0;
xmlNode *xml = NULL;
const char *from = NULL;
char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from);
if(data == NULL) {
return;
}
if (kind == crm_class_cluster) {
xml = pcmk__xml_parse(data);
}
if (xml == NULL) {
crm_err("Bad message of class %d received from %s[%u]: '%.120s'", kind, from, nodeid, data);
} else {
attrd_peer_message(pcmk__get_node(nodeid, from, NULL,
- pcmk__node_search_cluster),
+ pcmk__node_search_cluster_member),
xml);
}
free_xml(xml);
free(data);
}
static void
attrd_cpg_destroy(gpointer unused)
{
if (attrd_shutting_down(false)) {
crm_info("Disconnected from Corosync process group");
} else {
crm_crit("Lost connection to Corosync process group, shutting down");
attrd_exit_status = CRM_EX_DISCONNECT;
attrd_shutdown(0);
}
}
/*!
* \internal
* \brief Broadcast an update for a single attribute value
*
* \param[in] a Attribute to broadcast
* \param[in] v Attribute value to broadcast
*/
void
attrd_broadcast_value(const attribute_t *a, const attribute_value_t *v)
{
xmlNode *op = pcmk__xe_create(NULL, PCMK_XE_OP);
crm_xml_add(op, PCMK_XA_TASK, PCMK__ATTRD_CMD_UPDATE);
attrd_add_value_xml(op, a, v, false);
attrd_send_message(NULL, op, false);
free_xml(op);
}
#define state_text(state) pcmk__s((state), "in unknown state")
static void
attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *data)
{
bool gone = false;
bool is_remote = pcmk_is_set(peer->flags, crm_remote_node);
switch (kind) {
case crm_status_uname:
crm_debug("%s node %s is now %s",
(is_remote? "Remote" : "Cluster"),
peer->uname, state_text(peer->state));
break;
case crm_status_processes:
if (!pcmk_is_set(peer->processes, crm_get_cluster_proc())) {
gone = true;
}
crm_debug("Node %s is %s a peer",
peer->uname, (gone? "no longer" : "now"));
break;
case crm_status_nstate:
crm_debug("%s node %s is now %s (was %s)",
(is_remote? "Remote" : "Cluster"),
peer->uname, state_text(peer->state), state_text(data));
if (pcmk__str_eq(peer->state, CRM_NODE_MEMBER, pcmk__str_casei)) {
/* If we're the writer, send new peers a list of all attributes
* (unless it's a remote node, which doesn't run its own attrd)
*/
if (attrd_election_won()
&& !pcmk_is_set(peer->flags, crm_remote_node)) {
attrd_peer_sync(peer);
}
} else {
// Remove all attribute values associated with lost nodes
attrd_peer_remove(peer->uname, false, "loss");
gone = true;
}
break;
}
// Remove votes from cluster nodes that leave, in case election in progress
if (gone && !is_remote) {
attrd_remove_voter(peer);
attrd_remove_peer_protocol_ver(peer->uname);
attrd_do_not_expect_from_peer(peer->uname);
}
}
static void
record_peer_nodeid(attribute_value_t *v, const char *host)
{
crm_node_t *known_peer = pcmk__get_node(v->nodeid, host, NULL,
- pcmk__node_search_cluster);
+ pcmk__node_search_cluster_member);
crm_trace("Learned %s has node id %s", known_peer->uname, known_peer->uuid);
if (attrd_election_won()) {
attrd_write_attributes(attrd_write_changed);
}
}
#define readable_value(rv_v) pcmk__s((rv_v)->current, "(unset)")
#define readable_peer(p) \
(((p) == NULL)? "all peers" : pcmk__s((p)->uname, "unknown peer"))
static void
update_attr_on_host(attribute_t *a, const crm_node_t *peer, const xmlNode *xml,
const char *attr, const char *value, const char *host,
bool filter)
{
int is_remote = 0;
bool changed = false;
attribute_value_t *v = NULL;
// Create entry for value if not already existing
v = g_hash_table_lookup(a->values, host);
if (v == NULL) {
v = pcmk__assert_alloc(1, sizeof(attribute_value_t));
v->nodename = pcmk__str_copy(host);
g_hash_table_replace(a->values, v->nodename, v);
}
// If value is for a Pacemaker Remote node, remember that
crm_element_value_int(xml, PCMK__XA_ATTR_IS_REMOTE, &is_remote);
if (is_remote) {
attrd_set_value_flags(v, attrd_value_remote);
CRM_ASSERT(pcmk__cluster_lookup_remote_node(host) != NULL);
}
// Check whether the value changed
changed = !pcmk__str_eq(v->current, value, pcmk__str_casei);
if (changed && filter && pcmk__str_eq(host, attrd_cluster->uname,
pcmk__str_casei)) {
/* Broadcast the local value for an attribute that differs from the
* value provided in a peer's attribute synchronization response. This
* ensures a node's values for itself take precedence and all peers are
* kept in sync.
*/
v = g_hash_table_lookup(a->values, attrd_cluster->uname);
crm_notice("%s[%s]: local value '%s' takes priority over '%s' from %s",
attr, host, readable_value(v), value, peer->uname);
attrd_broadcast_value(a, v);
} else if (changed) {
crm_notice("Setting %s[%s]%s%s: %s -> %s "
CRM_XS " from %s with %s write delay",
attr, host, a->set_type ? " in " : "",
pcmk__s(a->set_type, ""), readable_value(v),
pcmk__s(value, "(unset)"), peer->uname,
(a->timeout_ms == 0)? "no" : pcmk__readable_interval(a->timeout_ms));
pcmk__str_update(&v->current, value);
attrd_set_attr_flags(a, attrd_attr_changed);
if (pcmk__str_eq(host, attrd_cluster->uname, pcmk__str_casei)
&& pcmk__str_eq(attr, PCMK__NODE_ATTR_SHUTDOWN, pcmk__str_none)) {
if (!pcmk__str_eq(value, "0", pcmk__str_null_matches)) {
attrd_set_requesting_shutdown();
} else {
attrd_clear_requesting_shutdown();
}
}
// Write out new value or start dampening timer
if (a->timeout_ms && a->timer) {
crm_trace("Delaying write of %s %s for dampening",
attr, pcmk__readable_interval(a->timeout_ms));
mainloop_timer_start(a->timer);
} else {
attrd_write_or_elect_attribute(a);
}
} else {
int is_force_write = 0;
crm_element_value_int(xml, PCMK__XA_ATTRD_IS_FORCE_WRITE,
&is_force_write);
if (is_force_write == 1 && a->timeout_ms && a->timer) {
/* Save forced writing and set change flag. */
/* The actual attribute is written by Writer after election. */
crm_trace("%s[%s] from %s is unchanged (%s), forcing write",
attr, host, peer->uname, pcmk__s(value, "unset"));
attrd_set_attr_flags(a, attrd_attr_force_write);
} else {
crm_trace("%s[%s] from %s is unchanged (%s)",
attr, host, peer->uname, pcmk__s(value, "unset"));
}
}
// This allows us to later detect local values that peer doesn't know about
attrd_set_value_flags(v, attrd_value_from_peer);
/* If this is a cluster node whose node ID we are learning, remember it */
if ((v->nodeid == 0) && !pcmk_is_set(v->flags, attrd_value_remote)
&& (crm_element_value_int(xml, PCMK__XA_ATTR_HOST_ID,
(int*)&v->nodeid) == 0) && (v->nodeid > 0)) {
record_peer_nodeid(v, host);
}
}
static void
attrd_peer_update_one(const crm_node_t *peer, xmlNode *xml, bool filter)
{
attribute_t *a = NULL;
const char *attr = crm_element_value(xml, PCMK__XA_ATTR_NAME);
const char *value = crm_element_value(xml, PCMK__XA_ATTR_VALUE);
const char *host = crm_element_value(xml, PCMK__XA_ATTR_HOST);
if (attr == NULL) {
crm_warn("Could not update attribute: peer did not specify name");
return;
}
a = attrd_populate_attribute(xml, attr);
if (a == NULL) {
return;
}
if (host == NULL) {
// If no host was specified, update all hosts
GHashTableIter vIter;
crm_debug("Setting %s for all hosts to %s", attr, value);
pcmk__xe_remove_attr(xml, PCMK__XA_ATTR_HOST_ID);
g_hash_table_iter_init(&vIter, a->values);
while (g_hash_table_iter_next(&vIter, (gpointer *) & host, NULL)) {
update_attr_on_host(a, peer, xml, attr, value, host, filter);
}
} else {
// Update attribute value for the given host
update_attr_on_host(a, peer, xml, attr, value, host, filter);
}
/* If this is a message from some attrd instance broadcasting its protocol
* version, check to see if it's a new minimum version.
*/
if (pcmk__str_eq(attr, CRM_ATTR_PROTOCOL, pcmk__str_none)) {
attrd_update_minimum_protocol_ver(peer->uname, value);
}
}
static void
broadcast_unseen_local_values(void)
{
GHashTableIter aIter;
GHashTableIter vIter;
attribute_t *a = NULL;
attribute_value_t *v = NULL;
xmlNode *sync = NULL;
g_hash_table_iter_init(&aIter, attributes);
while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) {
g_hash_table_iter_init(&vIter, a->values);
while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) {
if (!pcmk_is_set(v->flags, attrd_value_from_peer)
&& pcmk__str_eq(v->nodename, attrd_cluster->uname,
pcmk__str_casei)) {
crm_trace("* %s[%s]='%s' is local-only",
a->id, v->nodename, readable_value(v));
if (sync == NULL) {
sync = pcmk__xe_create(NULL, __func__);
crm_xml_add(sync, PCMK_XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE);
}
attrd_add_value_xml(sync, a, v, a->timeout_ms && a->timer);
}
}
}
if (sync != NULL) {
crm_debug("Broadcasting local-only values");
attrd_send_message(NULL, sync, false);
free_xml(sync);
}
}
int
attrd_cluster_connect(void)
{
int rc = pcmk_rc_ok;
attrd_cluster = pcmk_cluster_new();
pcmk_cluster_set_destroy_fn(attrd_cluster, attrd_cpg_destroy);
pcmk_cpg_set_deliver_fn(attrd_cluster, attrd_cpg_dispatch);
pcmk_cpg_set_confchg_fn(attrd_cluster, pcmk_cpg_membership);
crm_set_status_callback(&attrd_peer_change_cb);
rc = pcmk_cluster_connect(attrd_cluster);
rc = pcmk_rc2legacy(rc);
if (rc != pcmk_ok) {
crm_err("Cluster connection failed");
return rc;
}
return pcmk_ok;
}
void
attrd_peer_clear_failure(pcmk__request_t *request)
{
xmlNode *xml = request->xml;
const char *rsc = crm_element_value(xml, PCMK__XA_ATTR_RESOURCE);
const char *host = crm_element_value(xml, PCMK__XA_ATTR_HOST);
const char *op = crm_element_value(xml, PCMK__XA_ATTR_CLEAR_OPERATION);
const char *interval_spec = crm_element_value(xml,
PCMK__XA_ATTR_CLEAR_INTERVAL);
guint interval_ms = 0U;
char *attr = NULL;
GHashTableIter iter;
regex_t regex;
crm_node_t *peer = pcmk__get_node(0, request->peer, NULL,
- pcmk__node_search_cluster);
+ pcmk__node_search_cluster_member);
pcmk_parse_interval_spec(interval_spec, &interval_ms);
if (attrd_failure_regex(&regex, rsc, op, interval_ms) != pcmk_ok) {
crm_info("Ignoring invalid request to clear failures for %s",
pcmk__s(rsc, "all resources"));
return;
}
crm_xml_add(xml, PCMK_XA_TASK, PCMK__ATTRD_CMD_UPDATE);
/* Make sure value is not set, so we delete */
pcmk__xe_remove_attr(xml, PCMK__XA_ATTR_VALUE);
g_hash_table_iter_init(&iter, attributes);
while (g_hash_table_iter_next(&iter, (gpointer *) &attr, NULL)) {
if (regexec(&regex, attr, 0, NULL, 0) == 0) {
crm_trace("Matched %s when clearing %s",
attr, pcmk__s(rsc, "all resources"));
crm_xml_add(xml, PCMK__XA_ATTR_NAME, attr);
attrd_peer_update(peer, xml, host, false);
}
}
regfree(&regex);
}
/*!
* \internal
* \brief Load attributes from a peer sync response
*
* \param[in] peer Peer that sent sync response
* \param[in] peer_won Whether peer is the attribute writer
* \param[in,out] xml Request XML
*/
void
attrd_peer_sync_response(const crm_node_t *peer, bool peer_won, xmlNode *xml)
{
crm_info("Processing " PCMK__ATTRD_CMD_SYNC_RESPONSE " from %s",
peer->uname);
if (peer_won) {
/* Initialize the "seen" flag for all attributes to cleared, so we can
* detect attributes that local node has but the writer doesn't.
*/
attrd_clear_value_seen();
}
// Process each attribute update in the sync response
for (xmlNode *child = pcmk__xe_first_child(xml, NULL, NULL, NULL);
child != NULL; child = pcmk__xe_next(child)) {
attrd_peer_update(peer, child,
crm_element_value(child, PCMK__XA_ATTR_HOST), true);
}
if (peer_won) {
/* If any attributes are still not marked as seen, the writer doesn't
* know about them, so send all peers an update with them.
*/
broadcast_unseen_local_values();
}
}
/*!
* \internal
* \brief Remove all attributes and optionally peer cache entries for a node
*
* \param[in] host Name of node to purge
* \param[in] uncache If true, remove node from peer caches
* \param[in] source Who requested removal (only used for logging)
*/
void
attrd_peer_remove(const char *host, bool uncache, const char *source)
{
attribute_t *a = NULL;
GHashTableIter aIter;
CRM_CHECK(host != NULL, return);
crm_notice("Removing all %s attributes for node %s "
CRM_XS " %s reaping node from cache",
host, source, (uncache? "and" : "without"));
g_hash_table_iter_init(&aIter, attributes);
while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) {
if(g_hash_table_remove(a->values, host)) {
crm_debug("Removed %s[%s] for peer %s", a->id, host, source);
}
}
if (uncache) {
pcmk__purge_node_from_cache(host, 0);
}
}
/*!
* \internal
* \brief Send all known attributes and values to a peer
*
* \param[in] peer Peer to send sync to (if NULL, broadcast to all peers)
*/
void
attrd_peer_sync(crm_node_t *peer)
{
GHashTableIter aIter;
GHashTableIter vIter;
attribute_t *a = NULL;
attribute_value_t *v = NULL;
xmlNode *sync = pcmk__xe_create(NULL, __func__);
crm_xml_add(sync, PCMK_XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE);
g_hash_table_iter_init(&aIter, attributes);
while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) {
g_hash_table_iter_init(&vIter, a->values);
while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) {
crm_debug("Syncing %s[%s]='%s' to %s",
a->id, v->nodename, readable_value(v),
readable_peer(peer));
attrd_add_value_xml(sync, a, v, false);
}
}
crm_debug("Syncing values to %s", readable_peer(peer));
attrd_send_message(peer, sync, false);
free_xml(sync);
}
void
attrd_peer_update(const crm_node_t *peer, xmlNode *xml, const char *host,
bool filter)
{
bool handle_sync_point = false;
CRM_CHECK((peer != NULL) && (xml != NULL), return);
if (xml->children != NULL) {
for (xmlNode *child = pcmk__xe_first_child(xml, PCMK_XE_OP, NULL, NULL);
child != NULL; child = pcmk__xe_next_same(child)) {
pcmk__xe_copy_attrs(child, xml, pcmk__xaf_no_overwrite);
attrd_peer_update_one(peer, child, filter);
if (attrd_request_has_sync_point(child)) {
handle_sync_point = true;
}
}
} else {
attrd_peer_update_one(peer, xml, filter);
if (attrd_request_has_sync_point(xml)) {
handle_sync_point = true;
}
}
/* If the update XML specified that the client wanted to wait for a sync
* point, process that now.
*/
if (handle_sync_point) {
crm_trace("Hit local sync point for attribute update");
attrd_ack_waitlist_clients(attrd_sync_point_local, xml);
}
}
diff --git a/daemons/attrd/attrd_ipc.c b/daemons/attrd/attrd_ipc.c
index b4e9f28fa1..7909e12ff8 100644
--- a/daemons/attrd/attrd_ipc.c
+++ b/daemons/attrd/attrd_ipc.c
@@ -1,624 +1,624 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <errno.h>
#include <stdint.h>
#include <stdlib.h>
#include <sys/types.h>
#include <crm/cluster.h>
#include <crm/cluster/internal.h>
#include <crm/common/acl_internal.h>
#include <crm/common/ipc_internal.h>
#include <crm/common/logging.h>
#include <crm/common/results.h>
#include <crm/common/strings_internal.h>
#include <crm/common/util.h>
#include <crm/common/xml.h>
#include "pacemaker-attrd.h"
static qb_ipcs_service_t *ipcs = NULL;
/*!
* \internal
* \brief Build the XML reply to a client query
*
* \param[in] attr Name of requested attribute
* \param[in] host Name of requested host (or NULL for all hosts)
*
* \return New XML reply
* \note Caller is responsible for freeing the resulting XML
*/
static xmlNode *build_query_reply(const char *attr, const char *host)
{
xmlNode *reply = pcmk__xe_create(NULL, __func__);
attribute_t *a;
crm_xml_add(reply, PCMK__XA_T, PCMK__VALUE_ATTRD);
crm_xml_add(reply, PCMK__XA_SUBT, PCMK__ATTRD_CMD_QUERY);
crm_xml_add(reply, PCMK__XA_ATTR_VERSION, ATTRD_PROTOCOL_VERSION);
/* If desired attribute exists, add its value(s) to the reply */
a = g_hash_table_lookup(attributes, attr);
if (a) {
attribute_value_t *v;
xmlNode *host_value;
crm_xml_add(reply, PCMK__XA_ATTR_NAME, attr);
/* Allow caller to use "localhost" to refer to local node */
if (pcmk__str_eq(host, "localhost", pcmk__str_casei)) {
host = attrd_cluster->uname;
crm_trace("Mapped localhost to %s", host);
}
/* If a specific node was requested, add its value */
if (host) {
v = g_hash_table_lookup(a->values, host);
host_value = pcmk__xe_create(reply, PCMK_XE_NODE);
pcmk__xe_add_node(host_value, host, 0);
crm_xml_add(host_value, PCMK__XA_ATTR_VALUE,
(v? v->current : NULL));
/* Otherwise, add all nodes' values */
} else {
GHashTableIter iter;
g_hash_table_iter_init(&iter, a->values);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &v)) {
host_value = pcmk__xe_create(reply, PCMK_XE_NODE);
pcmk__xe_add_node(host_value, v->nodename, 0);
crm_xml_add(host_value, PCMK__XA_ATTR_VALUE, v->current);
}
}
}
return reply;
}
xmlNode *
attrd_client_clear_failure(pcmk__request_t *request)
{
xmlNode *xml = request->xml;
const char *rsc, *op, *interval_spec;
if (minimum_protocol_version >= 2) {
/* Propagate to all peers (including ourselves).
* This ends up at attrd_peer_message().
*/
attrd_send_message(NULL, xml, false);
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
return NULL;
}
rsc = crm_element_value(xml, PCMK__XA_ATTR_RESOURCE);
op = crm_element_value(xml, PCMK__XA_ATTR_CLEAR_OPERATION);
interval_spec = crm_element_value(xml, PCMK__XA_ATTR_CLEAR_INTERVAL);
/* Map this to an update */
crm_xml_add(xml, PCMK_XA_TASK, PCMK__ATTRD_CMD_UPDATE);
/* Add regular expression matching desired attributes */
if (rsc) {
char *pattern;
if (op == NULL) {
pattern = crm_strdup_printf(ATTRD_RE_CLEAR_ONE, rsc);
} else {
guint interval_ms = 0U;
pcmk_parse_interval_spec(interval_spec, &interval_ms);
pattern = crm_strdup_printf(ATTRD_RE_CLEAR_OP,
rsc, op, interval_ms);
}
crm_xml_add(xml, PCMK__XA_ATTR_REGEX, pattern);
free(pattern);
} else {
crm_xml_add(xml, PCMK__XA_ATTR_REGEX, ATTRD_RE_CLEAR_ALL);
}
/* Make sure attribute and value are not set, so we delete via regex */
pcmk__xe_remove_attr(xml, PCMK__XA_ATTR_NAME);
pcmk__xe_remove_attr(xml, PCMK__XA_ATTR_VALUE);
return attrd_client_update(request);
}
xmlNode *
attrd_client_peer_remove(pcmk__request_t *request)
{
xmlNode *xml = request->xml;
// Host and ID are not used in combination, rather host has precedence
const char *host = crm_element_value(xml, PCMK__XA_ATTR_HOST);
char *host_alloc = NULL;
attrd_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags);
if (host == NULL) {
int nodeid = 0;
crm_element_value_int(xml, PCMK__XA_ATTR_HOST_ID, &nodeid);
if (nodeid > 0) {
crm_node_t *node = NULL;
char *host_alloc = NULL;
node = pcmk__search_node_caches(nodeid, NULL,
- pcmk__node_search_cluster);
+ pcmk__node_search_cluster_member);
if (node && node->uname) {
// Use cached name if available
host = node->uname;
} else {
// Otherwise ask cluster layer
host_alloc = get_node_name(nodeid);
host = host_alloc;
}
pcmk__xe_add_node(xml, host, 0);
}
}
if (host) {
crm_info("Client %s is requesting all values for %s be removed",
pcmk__client_name(request->ipc_client), host);
attrd_send_message(NULL, xml, false); /* ends up at attrd_peer_message() */
free(host_alloc);
} else {
crm_info("Ignoring request by client %s to remove all peer values without specifying peer",
pcmk__client_name(request->ipc_client));
}
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
return NULL;
}
xmlNode *
attrd_client_query(pcmk__request_t *request)
{
xmlNode *query = request->xml;
xmlNode *reply = NULL;
const char *attr = NULL;
crm_debug("Query arrived from %s", pcmk__client_name(request->ipc_client));
/* Request must specify attribute name to query */
attr = crm_element_value(query, PCMK__XA_ATTR_NAME);
if (attr == NULL) {
pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR,
"Ignoring malformed query from %s (no attribute name given)",
pcmk__client_name(request->ipc_client));
return NULL;
}
/* Build the XML reply */
reply = build_query_reply(attr,
crm_element_value(query, PCMK__XA_ATTR_HOST));
if (reply == NULL) {
pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR,
"Could not respond to query from %s: could not create XML reply",
pcmk__client_name(request->ipc_client));
return NULL;
} else {
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
}
request->ipc_client->request_id = 0;
return reply;
}
xmlNode *
attrd_client_refresh(pcmk__request_t *request)
{
crm_info("Updating all attributes");
attrd_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags);
attrd_write_attributes(attrd_write_all|attrd_write_no_delay);
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
return NULL;
}
static void
handle_missing_host(xmlNode *xml)
{
const char *host = crm_element_value(xml, PCMK__XA_ATTR_HOST);
if (host == NULL) {
crm_trace("Inferring host");
pcmk__xe_add_node(xml, attrd_cluster->uname, attrd_cluster->nodeid);
}
}
/* Convert a single IPC message with a regex into one with multiple children, one
* for each regex match.
*/
static int
expand_regexes(xmlNode *xml, const char *attr, const char *value, const char *regex)
{
if (attr == NULL && regex) {
bool matched = false;
GHashTableIter aIter;
regex_t r_patt;
crm_debug("Setting %s to %s", regex, value);
if (regcomp(&r_patt, regex, REG_EXTENDED|REG_NOSUB)) {
return EINVAL;
}
g_hash_table_iter_init(&aIter, attributes);
while (g_hash_table_iter_next(&aIter, (gpointer *) & attr, NULL)) {
int status = regexec(&r_patt, attr, 0, NULL, 0);
if (status == 0) {
xmlNode *child = pcmk__xe_create(xml, PCMK_XE_OP);
crm_trace("Matched %s with %s", attr, regex);
matched = true;
/* Copy all the non-conflicting attributes from the parent over,
* but remove the regex and replace it with the name.
*/
pcmk__xe_copy_attrs(child, xml, pcmk__xaf_no_overwrite);
pcmk__xe_remove_attr(child, PCMK__XA_ATTR_REGEX);
crm_xml_add(child, PCMK__XA_ATTR_NAME, attr);
}
}
regfree(&r_patt);
/* Return a code if we never matched anything. This should not be treated
* as an error. It indicates there was a regex, and it was a valid regex,
* but simply did not match anything and the caller should not continue
* doing any regex-related processing.
*/
if (!matched) {
return pcmk_rc_op_unsatisfied;
}
} else if (attr == NULL) {
return pcmk_rc_bad_nvpair;
}
return pcmk_rc_ok;
}
static int
handle_regexes(pcmk__request_t *request)
{
xmlNode *xml = request->xml;
int rc = pcmk_rc_ok;
const char *attr = crm_element_value(xml, PCMK__XA_ATTR_NAME);
const char *value = crm_element_value(xml, PCMK__XA_ATTR_VALUE);
const char *regex = crm_element_value(xml, PCMK__XA_ATTR_REGEX);
rc = expand_regexes(xml, attr, value, regex);
if (rc == EINVAL) {
pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR,
"Bad regex '%s' for update from client %s", regex,
pcmk__client_name(request->ipc_client));
} else if (rc == pcmk_rc_bad_nvpair) {
crm_err("Update request did not specify attribute or regular expression");
pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR,
"Client %s update request did not specify attribute or regular expression",
pcmk__client_name(request->ipc_client));
}
return rc;
}
static int
handle_value_expansion(const char **value, xmlNode *xml, const char *op,
const char *attr)
{
attribute_t *a = g_hash_table_lookup(attributes, attr);
if (a == NULL && pcmk__str_eq(op, PCMK__ATTRD_CMD_UPDATE_DELAY, pcmk__str_none)) {
return EINVAL;
}
if (*value && attrd_value_needs_expansion(*value)) {
int int_value;
attribute_value_t *v = NULL;
if (a) {
const char *host = crm_element_value(xml, PCMK__XA_ATTR_HOST);
v = g_hash_table_lookup(a->values, host);
}
int_value = attrd_expand_value(*value, (v? v->current : NULL));
crm_info("Expanded %s=%s to %d", attr, *value, int_value);
crm_xml_add_int(xml, PCMK__XA_ATTR_VALUE, int_value);
/* Replacing the value frees the previous memory, so re-query it */
*value = crm_element_value(xml, PCMK__XA_ATTR_VALUE);
}
return pcmk_rc_ok;
}
static void
send_update_msg_to_cluster(pcmk__request_t *request, xmlNode *xml)
{
if (pcmk__str_eq(attrd_request_sync_point(xml), PCMK__VALUE_CLUSTER, pcmk__str_none)) {
/* The client is waiting on the cluster-wide sync point. In this case,
* the response ACK is not sent until this attrd broadcasts the update
* and receives its own confirmation back from all peers.
*/
attrd_expect_confirmations(request, attrd_cluster_sync_point_update);
attrd_send_message(NULL, xml, true); /* ends up at attrd_peer_message() */
} else {
/* The client is either waiting on the local sync point or was not
* waiting on any sync point at all. For the local sync point, the
* response ACK is sent in attrd_peer_update. For clients not
* waiting on any sync point, the response ACK is sent in
* handle_update_request immediately before this function was called.
*/
attrd_send_message(NULL, xml, false); /* ends up at attrd_peer_message() */
}
}
static int
send_child_update(xmlNode *child, void *data)
{
pcmk__request_t *request = (pcmk__request_t *) data;
/* Calling pcmk__set_result is handled by one of these calls to
* attrd_client_update, so no need to do it again here.
*/
request->xml = child;
attrd_client_update(request);
return pcmk_rc_ok;
}
xmlNode *
attrd_client_update(pcmk__request_t *request)
{
xmlNode *xml = NULL;
const char *attr, *value, *regex;
CRM_CHECK((request != NULL) && (request->xml != NULL), return NULL);
xml = request->xml;
/* If the message has children, that means it is a message from a newer
* client that supports sending multiple operations at a time. There are
* two ways we can handle that.
*/
if (xml->children != NULL) {
if (ATTRD_SUPPORTS_MULTI_MESSAGE(minimum_protocol_version)) {
/* First, if all peers support a certain protocol version, we can
* just broadcast the big message and they'll handle it. However,
* we also need to apply all the transformations in this function
* to the children since they don't happen anywhere else.
*/
for (xmlNode *child = pcmk__xe_first_child(xml, PCMK_XE_OP, NULL,
NULL);
child != NULL; child = pcmk__xe_next_same(child)) {
attr = crm_element_value(child, PCMK__XA_ATTR_NAME);
value = crm_element_value(child, PCMK__XA_ATTR_VALUE);
handle_missing_host(child);
if (handle_value_expansion(&value, child, request->op, attr) == EINVAL) {
pcmk__format_result(&request->result, CRM_EX_NOSUCH, PCMK_EXEC_ERROR,
"Attribute %s does not exist", attr);
return NULL;
}
}
send_update_msg_to_cluster(request, xml);
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
} else {
/* Save the original xml node pointer so it can be restored after iterating
* over all the children.
*/
xmlNode *orig_xml = request->xml;
/* Second, if they do not support that protocol version, split it
* up into individual messages and call attrd_client_update on
* each one.
*/
pcmk__xe_foreach_child(xml, PCMK_XE_OP, send_child_update, request);
request->xml = orig_xml;
}
return NULL;
}
attr = crm_element_value(xml, PCMK__XA_ATTR_NAME);
value = crm_element_value(xml, PCMK__XA_ATTR_VALUE);
regex = crm_element_value(xml, PCMK__XA_ATTR_REGEX);
if (handle_regexes(request) != pcmk_rc_ok) {
/* Error handling was already dealt with in handle_regexes, so just return. */
return NULL;
} else if (regex) {
/* Recursively call attrd_client_update on the new message with regexes
* expanded. If supported by the attribute daemon, this means that all
* matches can also be handled atomically.
*/
return attrd_client_update(request);
}
handle_missing_host(xml);
if (handle_value_expansion(&value, xml, request->op, attr) == EINVAL) {
pcmk__format_result(&request->result, CRM_EX_NOSUCH, PCMK_EXEC_ERROR,
"Attribute %s does not exist", attr);
return NULL;
}
crm_debug("Broadcasting %s[%s]=%s%s",
attr, crm_element_value(xml, PCMK__XA_ATTR_HOST),
value, (attrd_election_won()? " (writer)" : ""));
send_update_msg_to_cluster(request, xml);
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
return NULL;
}
/*!
* \internal
* \brief Accept a new client IPC connection
*
* \param[in,out] c New connection
* \param[in] uid Client user id
* \param[in] gid Client group id
*
* \return pcmk_ok on success, -errno otherwise
*/
static int32_t
attrd_ipc_accept(qb_ipcs_connection_t *c, uid_t uid, gid_t gid)
{
crm_trace("New client connection %p", c);
if (attrd_shutting_down(false)) {
crm_info("Ignoring new connection from pid %d during shutdown",
pcmk__client_pid(c));
return -ECONNREFUSED;
}
if (pcmk__new_client(c, uid, gid) == NULL) {
return -ENOMEM;
}
return pcmk_ok;
}
/*!
* \internal
* \brief Destroy a client IPC connection
*
* \param[in] c Connection to destroy
*
* \return FALSE (i.e. do not re-run this callback)
*/
static int32_t
attrd_ipc_closed(qb_ipcs_connection_t *c)
{
pcmk__client_t *client = pcmk__find_client(c);
if (client == NULL) {
crm_trace("Ignoring request to clean up unknown connection %p", c);
} else {
crm_trace("Cleaning up closed client connection %p", c);
/* Remove the client from the sync point waitlist if it's present. */
attrd_remove_client_from_waitlist(client);
/* And no longer wait for confirmations from any peers. */
attrd_do_not_wait_for_client(client);
pcmk__free_client(client);
}
return FALSE;
}
/*!
* \internal
* \brief Destroy a client IPC connection
*
* \param[in,out] c Connection to destroy
*
* \note We handle a destroyed connection the same as a closed one,
* but we need a separate handler because the return type is different.
*/
static void
attrd_ipc_destroy(qb_ipcs_connection_t *c)
{
crm_trace("Destroying client connection %p", c);
attrd_ipc_closed(c);
}
static int32_t
attrd_ipc_dispatch(qb_ipcs_connection_t * c, void *data, size_t size)
{
uint32_t id = 0;
uint32_t flags = 0;
pcmk__client_t *client = pcmk__find_client(c);
xmlNode *xml = NULL;
// Sanity-check, and parse XML from IPC data
CRM_CHECK((c != NULL) && (client != NULL), return 0);
if (data == NULL) {
crm_debug("No IPC data from PID %d", pcmk__client_pid(c));
return 0;
}
xml = pcmk__client_data2xml(client, data, &id, &flags);
if (xml == NULL) {
crm_debug("Unrecognizable IPC data from PID %d", pcmk__client_pid(c));
pcmk__ipc_send_ack(client, id, flags, PCMK__XE_ACK, NULL,
CRM_EX_PROTOCOL);
return 0;
} else {
pcmk__request_t request = {
.ipc_client = client,
.ipc_id = id,
.ipc_flags = flags,
.peer = NULL,
.xml = xml,
.call_options = 0,
.result = PCMK__UNKNOWN_RESULT,
};
CRM_ASSERT(client->user != NULL);
pcmk__update_acl_user(xml, PCMK__XA_ATTR_USER, client->user);
request.op = crm_element_value_copy(request.xml, PCMK_XA_TASK);
CRM_CHECK(request.op != NULL, return 0);
attrd_handle_request(&request);
pcmk__reset_request(&request);
}
free_xml(xml);
return 0;
}
static struct qb_ipcs_service_handlers ipc_callbacks = {
.connection_accept = attrd_ipc_accept,
.connection_created = NULL,
.msg_process = attrd_ipc_dispatch,
.connection_closed = attrd_ipc_closed,
.connection_destroyed = attrd_ipc_destroy
};
void
attrd_ipc_fini(void)
{
if (ipcs != NULL) {
pcmk__drop_all_clients(ipcs);
qb_ipcs_destroy(ipcs);
ipcs = NULL;
}
}
/*!
* \internal
* \brief Set up attrd IPC communication
*/
void
attrd_init_ipc(void)
{
pcmk__serve_attrd_ipc(&ipcs, &ipc_callbacks);
}
diff --git a/daemons/attrd/attrd_messages.c b/daemons/attrd/attrd_messages.c
index 1dad9b33a7..9e4a28cf89 100644
--- a/daemons/attrd/attrd_messages.c
+++ b/daemons/attrd/attrd_messages.c
@@ -1,357 +1,357 @@
/*
* Copyright 2022-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <glib.h>
#include <crm/common/messages_internal.h>
#include <crm/cluster/internal.h> // pcmk__get_node()
#include <crm/common/xml.h>
#include "pacemaker-attrd.h"
int minimum_protocol_version = -1;
static GHashTable *attrd_handlers = NULL;
static bool
is_sync_point_attr(xmlAttrPtr attr, void *data)
{
return pcmk__str_eq((const char *) attr->name, PCMK__XA_ATTR_SYNC_POINT, pcmk__str_none);
}
static int
remove_sync_point_attribute(xmlNode *xml, void *data)
{
pcmk__xe_remove_matching_attrs(xml, is_sync_point_attr, NULL);
pcmk__xe_foreach_child(xml, PCMK_XE_OP, remove_sync_point_attribute, NULL);
return pcmk_rc_ok;
}
/* Sync points on a multi-update IPC message to an attrd too old to support
* multi-update messages won't work. Strip the sync point attribute off here
* so we don't pretend to support this situation and instead ACK the client
* immediately.
*/
static void
remove_unsupported_sync_points(pcmk__request_t *request)
{
if (request->xml->children != NULL && !ATTRD_SUPPORTS_MULTI_MESSAGE(minimum_protocol_version) &&
attrd_request_has_sync_point(request->xml)) {
crm_warn("Ignoring sync point in request from %s because not all nodes support it",
pcmk__request_origin(request));
remove_sync_point_attribute(request->xml, NULL);
}
}
static xmlNode *
handle_unknown_request(pcmk__request_t *request)
{
crm_err("Unknown IPC request %s from %s %s",
request->op, pcmk__request_origin_type(request),
pcmk__request_origin(request));
pcmk__format_result(&request->result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID,
"Unknown request type '%s' (bug?)", request->op);
return NULL;
}
static xmlNode *
handle_clear_failure_request(pcmk__request_t *request)
{
if (request->peer != NULL) {
/* It is not currently possible to receive this as a peer command,
* but will be, if we one day enable propagating this operation.
*/
attrd_peer_clear_failure(request);
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
return NULL;
} else {
remove_unsupported_sync_points(request);
if (attrd_request_has_sync_point(request->xml)) {
/* If this client supplied a sync point it wants to wait for, add it to
* the wait list. Clients on this list will not receive an ACK until
* their sync point is hit which will result in the client stalled there
* until it receives a response.
*
* All other clients will receive the expected response as normal.
*/
attrd_add_client_to_waitlist(request);
} else {
/* If the client doesn't want to wait for a sync point, go ahead and send
* the ACK immediately. Otherwise, we'll send the ACK when the appropriate
* sync point is reached.
*/
attrd_send_ack(request->ipc_client, request->ipc_id,
request->ipc_flags);
}
return attrd_client_clear_failure(request);
}
}
static xmlNode *
handle_confirm_request(pcmk__request_t *request)
{
if (request->peer != NULL) {
int callid;
crm_debug("Received confirmation from %s", request->peer);
if (crm_element_value_int(request->xml, PCMK__XA_CALL_ID,
&callid) == -1) {
pcmk__set_result(&request->result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID,
"Could not get callid from XML");
} else {
attrd_handle_confirmation(callid, request->peer);
}
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
return NULL;
} else {
return handle_unknown_request(request);
}
}
static xmlNode *
handle_flush_request(pcmk__request_t *request)
{
if (request->peer != NULL) {
/* Ignore. The flush command was removed in 2.0.0 but may be
* received from peers running older versions.
*/
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
return NULL;
} else {
return handle_unknown_request(request);
}
}
static xmlNode *
handle_query_request(pcmk__request_t *request)
{
if (request->peer != NULL) {
return handle_unknown_request(request);
} else {
return attrd_client_query(request);
}
}
static xmlNode *
handle_remove_request(pcmk__request_t *request)
{
if (request->peer != NULL) {
const char *host = crm_element_value(request->xml, PCMK__XA_ATTR_HOST);
bool reap = false;
if (pcmk__xe_get_bool_attr(request->xml, PCMK__XA_REAP,
&reap) != pcmk_rc_ok) {
reap = true; // Default to true for backward compatibility
}
attrd_peer_remove(host, reap, request->peer);
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
return NULL;
} else {
return attrd_client_peer_remove(request);
}
}
static xmlNode *
handle_refresh_request(pcmk__request_t *request)
{
if (request->peer != NULL) {
return handle_unknown_request(request);
} else {
return attrd_client_refresh(request);
}
}
static xmlNode *
handle_sync_response_request(pcmk__request_t *request)
{
if (request->ipc_client != NULL) {
return handle_unknown_request(request);
} else {
if (request->peer != NULL) {
crm_node_t *peer = pcmk__get_node(0, request->peer, NULL,
- pcmk__node_search_cluster);
+ pcmk__node_search_cluster_member);
bool peer_won = attrd_check_for_new_writer(peer, request->xml);
if (!pcmk__str_eq(peer->uname, attrd_cluster->uname, pcmk__str_casei)) {
attrd_peer_sync_response(peer, peer_won, request->xml);
}
}
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
return NULL;
}
}
static xmlNode *
handle_update_request(pcmk__request_t *request)
{
if (request->peer != NULL) {
const char *host = crm_element_value(request->xml, PCMK__XA_ATTR_HOST);
crm_node_t *peer = pcmk__get_node(0, request->peer, NULL,
- pcmk__node_search_cluster);
+ pcmk__node_search_cluster_member);
attrd_peer_update(peer, request->xml, host, false);
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
return NULL;
} else {
remove_unsupported_sync_points(request);
if (attrd_request_has_sync_point(request->xml)) {
/* If this client supplied a sync point it wants to wait for, add it to
* the wait list. Clients on this list will not receive an ACK until
* their sync point is hit which will result in the client stalled there
* until it receives a response.
*
* All other clients will receive the expected response as normal.
*/
attrd_add_client_to_waitlist(request);
} else {
/* If the client doesn't want to wait for a sync point, go ahead and send
* the ACK immediately. Otherwise, we'll send the ACK when the appropriate
* sync point is reached.
*
* In the normal case, attrd_client_update can be called recursively which
* makes where to send the ACK tricky. Doing it here ensures the client
* only ever receives one.
*/
attrd_send_ack(request->ipc_client, request->ipc_id,
request->flags|crm_ipc_client_response);
}
return attrd_client_update(request);
}
}
static void
attrd_register_handlers(void)
{
pcmk__server_command_t handlers[] = {
{ PCMK__ATTRD_CMD_CLEAR_FAILURE, handle_clear_failure_request },
{ PCMK__ATTRD_CMD_CONFIRM, handle_confirm_request },
{ PCMK__ATTRD_CMD_FLUSH, handle_flush_request },
{ PCMK__ATTRD_CMD_PEER_REMOVE, handle_remove_request },
{ PCMK__ATTRD_CMD_QUERY, handle_query_request },
{ PCMK__ATTRD_CMD_REFRESH, handle_refresh_request },
{ PCMK__ATTRD_CMD_SYNC_RESPONSE, handle_sync_response_request },
{ PCMK__ATTRD_CMD_UPDATE, handle_update_request },
{ PCMK__ATTRD_CMD_UPDATE_DELAY, handle_update_request },
{ PCMK__ATTRD_CMD_UPDATE_BOTH, handle_update_request },
{ NULL, handle_unknown_request },
};
attrd_handlers = pcmk__register_handlers(handlers);
}
void
attrd_unregister_handlers(void)
{
if (attrd_handlers != NULL) {
g_hash_table_destroy(attrd_handlers);
attrd_handlers = NULL;
}
}
void
attrd_handle_request(pcmk__request_t *request)
{
xmlNode *reply = NULL;
char *log_msg = NULL;
const char *reason = NULL;
if (attrd_handlers == NULL) {
attrd_register_handlers();
}
reply = pcmk__process_request(request, attrd_handlers);
if (reply != NULL) {
crm_log_xml_trace(reply, "Reply");
if (request->ipc_client != NULL) {
pcmk__ipc_send_xml(request->ipc_client, request->ipc_id, reply,
request->ipc_flags);
} else {
crm_err("Not sending CPG reply to client");
}
free_xml(reply);
}
reason = request->result.exit_reason;
log_msg = crm_strdup_printf("Processed %s request from %s %s: %s%s%s%s",
request->op, pcmk__request_origin_type(request),
pcmk__request_origin(request),
pcmk_exec_status_str(request->result.execution_status),
(reason == NULL)? "" : " (",
pcmk__s(reason, ""),
(reason == NULL)? "" : ")");
if (!pcmk__result_ok(&request->result)) {
crm_warn("%s", log_msg);
} else {
crm_debug("%s", log_msg);
}
free(log_msg);
pcmk__reset_request(request);
}
/*!
\internal
\brief Broadcast private attribute for local node with protocol version
*/
void
attrd_broadcast_protocol(void)
{
xmlNode *attrd_op = pcmk__xe_create(NULL, __func__);
crm_xml_add(attrd_op, PCMK__XA_T, PCMK__VALUE_ATTRD);
crm_xml_add(attrd_op, PCMK__XA_SRC, crm_system_name);
crm_xml_add(attrd_op, PCMK_XA_TASK, PCMK__ATTRD_CMD_UPDATE);
crm_xml_add(attrd_op, PCMK__XA_ATTR_NAME, CRM_ATTR_PROTOCOL);
crm_xml_add(attrd_op, PCMK__XA_ATTR_VALUE, ATTRD_PROTOCOL_VERSION);
crm_xml_add_int(attrd_op, PCMK__XA_ATTR_IS_PRIVATE, 1);
pcmk__xe_add_node(attrd_op, attrd_cluster->uname, attrd_cluster->nodeid);
crm_debug("Broadcasting attrd protocol version %s for node %s",
ATTRD_PROTOCOL_VERSION, attrd_cluster->uname);
attrd_send_message(NULL, attrd_op, false); /* ends up at attrd_peer_message() */
free_xml(attrd_op);
}
gboolean
attrd_send_message(crm_node_t *node, xmlNode *data, bool confirm)
{
const char *op = crm_element_value(data, PCMK_XA_TASK);
crm_xml_add(data, PCMK__XA_T, PCMK__VALUE_ATTRD);
crm_xml_add(data, PCMK__XA_ATTR_VERSION, ATTRD_PROTOCOL_VERSION);
/* Request a confirmation from the destination peer node (which could
* be all if node is NULL) that the message has been received and
* acted upon.
*/
if (!pcmk__str_eq(op, PCMK__ATTRD_CMD_CONFIRM, pcmk__str_none)) {
pcmk__xe_set_bool_attr(data, PCMK__XA_CONFIRM, confirm);
}
attrd_xml_add_writer(data);
return send_cluster_message(node, crm_msg_attrd, data, TRUE);
}
diff --git a/daemons/based/based_callbacks.c b/daemons/based/based_callbacks.c
index c4fb9344f9..269f9b0a88 100644
--- a/daemons/based/based_callbacks.c
+++ b/daemons/based/based_callbacks.c
@@ -1,1761 +1,1763 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <stdio.h>
#include <sys/types.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdint.h> // uint32_t, uint64_t, UINT64_C()
#include <errno.h>
#include <fcntl.h>
#include <inttypes.h> // PRIu64
#include <glib.h>
#include <libxml/tree.h>
#include <crm/crm.h>
#include <crm/cib.h>
#include <crm/cluster/internal.h>
#include <crm/common/xml.h>
#include <crm/common/remote_internal.h>
#include <pacemaker-based.h>
#define EXIT_ESCALATION_MS 10000
static unsigned long cib_local_bcast_num = 0;
typedef struct cib_local_notify_s {
xmlNode *notify_src;
char *client_id;
gboolean from_peer;
gboolean sync_reply;
} cib_local_notify_t;
int next_client_id = 0;
gboolean legacy_mode = FALSE;
qb_ipcs_service_t *ipcs_ro = NULL;
qb_ipcs_service_t *ipcs_rw = NULL;
qb_ipcs_service_t *ipcs_shm = NULL;
static int cib_process_command(xmlNode *request,
const cib__operation_t *operation,
cib__op_fn_t op_function, xmlNode **reply,
xmlNode **cib_diff, bool privileged);
static gboolean cib_common_callback(qb_ipcs_connection_t *c, void *data,
size_t size, gboolean privileged);
gboolean
cib_legacy_mode(void)
{
return legacy_mode;
}
static int32_t
cib_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
{
if (cib_shutdown_flag) {
crm_info("Ignoring new IPC client [%d] during shutdown",
pcmk__client_pid(c));
return -ECONNREFUSED;
}
if (pcmk__new_client(c, uid, gid) == NULL) {
return -ENOMEM;
}
return 0;
}
static int32_t
cib_ipc_dispatch_rw(qb_ipcs_connection_t * c, void *data, size_t size)
{
pcmk__client_t *client = pcmk__find_client(c);
crm_trace("%p message from %s", c, client->id);
return cib_common_callback(c, data, size, TRUE);
}
static int32_t
cib_ipc_dispatch_ro(qb_ipcs_connection_t * c, void *data, size_t size)
{
pcmk__client_t *client = pcmk__find_client(c);
crm_trace("%p message from %s", c, client->id);
return cib_common_callback(c, data, size, FALSE);
}
/* Error code means? */
static int32_t
cib_ipc_closed(qb_ipcs_connection_t * c)
{
pcmk__client_t *client = pcmk__find_client(c);
if (client == NULL) {
return 0;
}
crm_trace("Connection %p", c);
pcmk__free_client(client);
return 0;
}
static void
cib_ipc_destroy(qb_ipcs_connection_t * c)
{
crm_trace("Connection %p", c);
cib_ipc_closed(c);
if (cib_shutdown_flag) {
cib_shutdown(0);
}
}
struct qb_ipcs_service_handlers ipc_ro_callbacks = {
.connection_accept = cib_ipc_accept,
.connection_created = NULL,
.msg_process = cib_ipc_dispatch_ro,
.connection_closed = cib_ipc_closed,
.connection_destroyed = cib_ipc_destroy
};
struct qb_ipcs_service_handlers ipc_rw_callbacks = {
.connection_accept = cib_ipc_accept,
.connection_created = NULL,
.msg_process = cib_ipc_dispatch_rw,
.connection_closed = cib_ipc_closed,
.connection_destroyed = cib_ipc_destroy
};
/*!
* \internal
* \brief Create reply XML for a CIB request
*
* \param[in] op CIB operation type
* \param[in] call_id CIB call ID
* \param[in] client_id CIB client ID
* \param[in] call_options Group of <tt>enum cib_call_options</tt> flags
* \param[in] rc Request return code
* \param[in] call_data Request output data
*
* \return Reply XML
*
* \note The caller is responsible for freeing the return value using
* \p free_xml().
*/
static xmlNode *
create_cib_reply(const char *op, const char *call_id, const char *client_id,
int call_options, int rc, xmlNode *call_data)
{
xmlNode *reply = pcmk__xe_create(NULL, PCMK__XE_CIB_REPLY);
crm_xml_add(reply, PCMK__XA_T, PCMK__VALUE_CIB);
crm_xml_add(reply, PCMK__XA_CIB_OP, op);
crm_xml_add(reply, PCMK__XA_CIB_CALLID, call_id);
crm_xml_add(reply, PCMK__XA_CIB_CLIENTID, client_id);
crm_xml_add_int(reply, PCMK__XA_CIB_CALLOPT, call_options);
crm_xml_add_int(reply, PCMK__XA_CIB_RC, rc);
if (call_data != NULL) {
xmlNode *wrapper = pcmk__xe_create(reply, PCMK__XE_CIB_CALLDATA);
crm_trace("Attaching reply output");
pcmk__xml_copy(wrapper, call_data);
}
crm_log_xml_explicit(reply, "cib:reply");
return reply;
}
static void
do_local_notify(const xmlNode *notify_src, const char *client_id,
bool sync_reply, bool from_peer)
{
int rid = 0;
int call_id = 0;
pcmk__client_t *client_obj = NULL;
CRM_ASSERT(notify_src && client_id);
crm_element_value_int(notify_src, PCMK__XA_CIB_CALLID, &call_id);
client_obj = pcmk__find_client_by_id(client_id);
if (client_obj == NULL) {
crm_debug("Could not send response %d: client %s not found",
call_id, client_id);
return;
}
if (sync_reply) {
if (client_obj->ipcs) {
CRM_LOG_ASSERT(client_obj->request_id);
rid = client_obj->request_id;
client_obj->request_id = 0;
crm_trace("Sending response %d to client %s%s",
rid, pcmk__client_name(client_obj),
(from_peer? " (originator of delegated request)" : ""));
} else {
crm_trace("Sending response (call %d) to client %s%s",
call_id, pcmk__client_name(client_obj),
(from_peer? " (originator of delegated request)" : ""));
}
} else {
crm_trace("Sending event %d to client %s%s",
call_id, pcmk__client_name(client_obj),
(from_peer? " (originator of delegated request)" : ""));
}
switch (PCMK__CLIENT_TYPE(client_obj)) {
case pcmk__client_ipc:
{
int rc = pcmk__ipc_send_xml(client_obj, rid, notify_src,
(sync_reply? crm_ipc_flags_none
: crm_ipc_server_event));
if (rc != pcmk_rc_ok) {
crm_warn("%s reply to client %s failed: %s " CRM_XS " rc=%d",
(sync_reply? "Synchronous" : "Asynchronous"),
pcmk__client_name(client_obj), pcmk_rc_str(rc),
rc);
}
}
break;
#ifdef HAVE_GNUTLS_GNUTLS_H
case pcmk__client_tls:
#endif
case pcmk__client_tcp:
pcmk__remote_send_xml(client_obj->remote, notify_src);
break;
default:
crm_err("Unknown transport for client %s "
CRM_XS " flags=%#016" PRIx64,
pcmk__client_name(client_obj), client_obj->flags);
}
}
void
cib_common_callback_worker(uint32_t id, uint32_t flags, xmlNode * op_request,
pcmk__client_t *cib_client, gboolean privileged)
{
const char *op = crm_element_value(op_request, PCMK__XA_CIB_OP);
int call_options = cib_none;
crm_element_value_int(op_request, PCMK__XA_CIB_CALLOPT, &call_options);
/* Requests with cib_transaction set should not be sent to based directly
* (outside of a commit-transaction request)
*/
if (pcmk_is_set(call_options, cib_transaction)) {
return;
}
if (pcmk__str_eq(op, CRM_OP_REGISTER, pcmk__str_none)) {
if (flags & crm_ipc_client_response) {
xmlNode *ack = pcmk__xe_create(NULL, __func__);
crm_xml_add(ack, PCMK__XA_CIB_OP, CRM_OP_REGISTER);
crm_xml_add(ack, PCMK__XA_CIB_CLIENTID, cib_client->id);
pcmk__ipc_send_xml(cib_client, id, ack, flags);
cib_client->request_id = 0;
free_xml(ack);
}
return;
} else if (pcmk__str_eq(op, PCMK__VALUE_CIB_NOTIFY, pcmk__str_none)) {
/* Update the notify filters for this client */
int on_off = 0;
crm_exit_t status = CRM_EX_OK;
uint64_t bit = UINT64_C(0);
const char *type = crm_element_value(op_request,
PCMK__XA_CIB_NOTIFY_TYPE);
crm_element_value_int(op_request, PCMK__XA_CIB_NOTIFY_ACTIVATE,
&on_off);
crm_debug("Setting %s callbacks %s for client %s",
type, (on_off? "on" : "off"), pcmk__client_name(cib_client));
if (pcmk__str_eq(type, PCMK__VALUE_CIB_POST_NOTIFY, pcmk__str_none)) {
bit = cib_notify_post;
} else if (pcmk__str_eq(type, PCMK__VALUE_CIB_PRE_NOTIFY,
pcmk__str_none)) {
bit = cib_notify_pre;
} else if (pcmk__str_eq(type, PCMK__VALUE_CIB_UPDATE_CONFIRMATION,
pcmk__str_none)) {
bit = cib_notify_confirm;
} else if (pcmk__str_eq(type, PCMK__VALUE_CIB_DIFF_NOTIFY,
pcmk__str_none)) {
bit = cib_notify_diff;
} else {
status = CRM_EX_INVALID_PARAM;
}
if (bit != 0) {
if (on_off) {
pcmk__set_client_flags(cib_client, bit);
} else {
pcmk__clear_client_flags(cib_client, bit);
}
}
pcmk__ipc_send_ack(cib_client, id, flags, PCMK__XE_ACK, NULL, status);
return;
}
cib_process_request(op_request, privileged, cib_client);
}
int32_t
cib_common_callback(qb_ipcs_connection_t * c, void *data, size_t size, gboolean privileged)
{
uint32_t id = 0;
uint32_t flags = 0;
int call_options = 0;
pcmk__client_t *cib_client = pcmk__find_client(c);
xmlNode *op_request = pcmk__client_data2xml(cib_client, data, &id, &flags);
if (op_request) {
crm_element_value_int(op_request, PCMK__XA_CIB_CALLOPT, &call_options);
}
if (op_request == NULL) {
crm_trace("Invalid message from %p", c);
pcmk__ipc_send_ack(cib_client, id, flags, PCMK__XE_NACK, NULL,
CRM_EX_PROTOCOL);
return 0;
} else if(cib_client == NULL) {
crm_trace("Invalid client %p", c);
return 0;
}
if (pcmk_is_set(call_options, cib_sync_call)) {
CRM_LOG_ASSERT(flags & crm_ipc_client_response);
CRM_LOG_ASSERT(cib_client->request_id == 0); /* This means the client has two synchronous events in-flight */
cib_client->request_id = id; /* Reply only to the last one */
}
if (cib_client->name == NULL) {
const char *value = crm_element_value(op_request,
PCMK__XA_CIB_CLIENTNAME);
if (value == NULL) {
cib_client->name = pcmk__itoa(cib_client->pid);
} else {
cib_client->name = pcmk__str_copy(value);
if (crm_is_daemon_name(value)) {
pcmk__set_client_flags(cib_client, cib_is_daemon);
}
}
}
/* Allow cluster daemons more leeway before being evicted */
if (pcmk_is_set(cib_client->flags, cib_is_daemon)) {
const char *qmax = cib_config_lookup(PCMK_OPT_CLUSTER_IPC_LIMIT);
if (pcmk__set_client_queue_max(cib_client, qmax)) {
crm_trace("IPC threshold for client %s[%u] is now %u",
pcmk__client_name(cib_client), cib_client->pid,
cib_client->queue_max);
}
}
crm_xml_add(op_request, PCMK__XA_CIB_CLIENTID, cib_client->id);
crm_xml_add(op_request, PCMK__XA_CIB_CLIENTNAME, cib_client->name);
CRM_LOG_ASSERT(cib_client->user != NULL);
pcmk__update_acl_user(op_request, PCMK__XA_CIB_USER, cib_client->user);
cib_common_callback_worker(id, flags, op_request, cib_client, privileged);
free_xml(op_request);
return 0;
}
static uint64_t ping_seq = 0;
static char *ping_digest = NULL;
static bool ping_modified_since = FALSE;
static gboolean
cib_digester_cb(gpointer data)
{
if (based_is_primary) {
char buffer[32];
xmlNode *ping = pcmk__xe_create(NULL, PCMK__XE_PING);
ping_seq++;
free(ping_digest);
ping_digest = NULL;
ping_modified_since = FALSE;
snprintf(buffer, 32, "%" PRIu64, ping_seq);
crm_trace("Requesting peer digests (%s)", buffer);
crm_xml_add(ping, PCMK__XA_T, PCMK__VALUE_CIB);
crm_xml_add(ping, PCMK__XA_CIB_OP, CRM_OP_PING);
crm_xml_add(ping, PCMK__XA_CIB_PING_ID, buffer);
crm_xml_add(ping, PCMK_XA_CRM_FEATURE_SET, CRM_FEATURE_SET);
send_cluster_message(NULL, crm_msg_cib, ping, TRUE);
free_xml(ping);
}
return FALSE;
}
static void
process_ping_reply(xmlNode *reply)
{
uint64_t seq = 0;
const char *host = crm_element_value(reply, PCMK__XA_SRC);
xmlNode *wrapper = pcmk__xe_first_child(reply, PCMK__XE_CIB_CALLDATA, NULL,
NULL);
xmlNode *pong = pcmk__xe_first_child(wrapper, NULL, NULL, NULL);
const char *seq_s = crm_element_value(pong, PCMK__XA_CIB_PING_ID);
const char *digest = crm_element_value(pong, PCMK__XA_DIGEST);
if (seq_s == NULL) {
crm_debug("Ignoring ping reply with no " PCMK__XA_CIB_PING_ID);
return;
} else {
long long seq_ll;
if (pcmk__scan_ll(seq_s, &seq_ll, 0LL) != pcmk_rc_ok) {
return;
}
seq = (uint64_t) seq_ll;
}
if(digest == NULL) {
crm_trace("Ignoring ping reply %s from %s with no digest", seq_s, host);
} else if(seq != ping_seq) {
crm_trace("Ignoring out of sequence ping reply %s from %s", seq_s, host);
} else if(ping_modified_since) {
crm_trace("Ignoring ping reply %s from %s: cib updated since", seq_s, host);
} else {
const char *version = crm_element_value(pong, PCMK_XA_CRM_FEATURE_SET);
if(ping_digest == NULL) {
crm_trace("Calculating new digest");
ping_digest = calculate_xml_versioned_digest(the_cib, FALSE, TRUE, version);
}
crm_trace("Processing ping reply %s from %s (%s)", seq_s, host, digest);
if (!pcmk__str_eq(ping_digest, digest, pcmk__str_casei)) {
xmlNode *wrapper = pcmk__xe_first_child(pong, PCMK__XE_CIB_CALLDATA,
NULL, NULL);
xmlNode *remote_cib = pcmk__xe_first_child(wrapper, NULL, NULL, NULL);
const char *admin_epoch_s = NULL;
const char *epoch_s = NULL;
const char *num_updates_s = NULL;
if (remote_cib != NULL) {
admin_epoch_s = crm_element_value(remote_cib,
PCMK_XA_ADMIN_EPOCH);
epoch_s = crm_element_value(remote_cib, PCMK_XA_EPOCH);
num_updates_s = crm_element_value(remote_cib,
PCMK_XA_NUM_UPDATES);
}
crm_notice("Local CIB %s.%s.%s.%s differs from %s: %s.%s.%s.%s %p",
crm_element_value(the_cib, PCMK_XA_ADMIN_EPOCH),
crm_element_value(the_cib, PCMK_XA_EPOCH),
crm_element_value(the_cib, PCMK_XA_NUM_UPDATES),
ping_digest, host,
pcmk__s(admin_epoch_s, "_"),
pcmk__s(epoch_s, "_"),
pcmk__s(num_updates_s, "_"),
digest, remote_cib);
if(remote_cib && remote_cib->children) {
// Additional debug
xml_calculate_changes(the_cib, remote_cib);
pcmk__log_xml_changes(LOG_INFO, remote_cib);
crm_trace("End of differences");
}
free_xml(remote_cib);
sync_our_cib(reply, FALSE);
}
}
}
static void
local_notify_destroy_callback(gpointer data)
{
cib_local_notify_t *notify = data;
free_xml(notify->notify_src);
free(notify->client_id);
free(notify);
}
static void
check_local_notify(int bcast_id)
{
const cib_local_notify_t *notify = NULL;
if (!local_notify_queue) {
return;
}
notify = pcmk__intkey_table_lookup(local_notify_queue, bcast_id);
if (notify) {
do_local_notify(notify->notify_src, notify->client_id, notify->sync_reply,
notify->from_peer);
pcmk__intkey_table_remove(local_notify_queue, bcast_id);
}
}
static void
queue_local_notify(xmlNode * notify_src, const char *client_id, gboolean sync_reply,
gboolean from_peer)
{
cib_local_notify_t *notify = pcmk__assert_alloc(1,
sizeof(cib_local_notify_t));
notify->notify_src = notify_src;
notify->client_id = pcmk__str_copy(client_id);
notify->sync_reply = sync_reply;
notify->from_peer = from_peer;
if (!local_notify_queue) {
local_notify_queue = pcmk__intkey_table(local_notify_destroy_callback);
}
pcmk__intkey_table_insert(local_notify_queue, cib_local_bcast_num, notify);
// cppcheck doesn't know notify will get freed when hash table is destroyed
// cppcheck-suppress memleak
}
static void
parse_local_options_v1(const pcmk__client_t *cib_client,
const cib__operation_t *operation, int call_options,
const char *host, const char *op, gboolean *local_notify,
gboolean *needs_reply, gboolean *process,
gboolean *needs_forward)
{
if (pcmk_is_set(operation->flags, cib__op_attr_modifies)
&& !pcmk_is_set(call_options, cib_inhibit_bcast)) {
/* we need to send an update anyway */
*needs_reply = TRUE;
} else {
*needs_reply = FALSE;
}
if (host == NULL && (call_options & cib_scope_local)) {
crm_trace("Processing locally scoped %s op from client %s",
op, pcmk__client_name(cib_client));
*local_notify = TRUE;
} else if ((host == NULL) && based_is_primary) {
crm_trace("Processing %s op locally from client %s as primary",
op, pcmk__client_name(cib_client));
*local_notify = TRUE;
} else if (pcmk__str_eq(host, OUR_NODENAME, pcmk__str_casei)) {
crm_trace("Processing locally addressed %s op from client %s",
op, pcmk__client_name(cib_client));
*local_notify = TRUE;
} else if (stand_alone) {
*needs_forward = FALSE;
*local_notify = TRUE;
*process = TRUE;
} else {
crm_trace("%s op from %s needs to be forwarded to client %s",
op, pcmk__client_name(cib_client),
pcmk__s(host, "the primary instance"));
*needs_forward = TRUE;
*process = FALSE;
}
}
static void
parse_local_options_v2(const pcmk__client_t *cib_client,
const cib__operation_t *operation, int call_options,
const char *host, const char *op, gboolean *local_notify,
gboolean *needs_reply, gboolean *process,
gboolean *needs_forward)
{
// Process locally and notify local client
*process = TRUE;
*needs_reply = FALSE;
*local_notify = TRUE;
*needs_forward = FALSE;
if (pcmk_is_set(operation->flags, cib__op_attr_local)) {
/* Always process locally if cib__op_attr_local is set.
*
* @COMPAT: Currently host is ignored. At a compatibility break, throw
* an error (from cib_process_request() or earlier) if host is not NULL or
* OUR_NODENAME.
*/
crm_trace("Processing always-local %s op from client %s",
op, pcmk__client_name(cib_client));
if (!pcmk__str_eq(host, OUR_NODENAME,
pcmk__str_casei|pcmk__str_null_matches)) {
crm_warn("Operation '%s' is always local but its target host is "
"set to '%s'",
op, host);
}
return;
}
if (pcmk_is_set(operation->flags, cib__op_attr_modifies)
|| !pcmk__str_eq(host, OUR_NODENAME,
pcmk__str_casei|pcmk__str_null_matches)) {
// Forward modifying and non-local requests via cluster
*process = FALSE;
*needs_reply = FALSE;
*local_notify = FALSE;
*needs_forward = TRUE;
crm_trace("%s op from %s needs to be forwarded to %s",
op, pcmk__client_name(cib_client),
pcmk__s(host, "all nodes"));
return;
}
if (stand_alone) {
crm_trace("Processing %s op from client %s (stand-alone)",
op, pcmk__client_name(cib_client));
} else {
crm_trace("Processing %saddressed %s op from client %s",
((host != NULL)? "locally " : "un"),
op, pcmk__client_name(cib_client));
}
}
static void
parse_local_options(const pcmk__client_t *cib_client,
const cib__operation_t *operation, int call_options,
const char *host, const char *op, gboolean *local_notify,
gboolean *needs_reply, gboolean *process,
gboolean *needs_forward)
{
if(cib_legacy_mode()) {
parse_local_options_v1(cib_client, operation, call_options, host,
op, local_notify, needs_reply, process,
needs_forward);
} else {
parse_local_options_v2(cib_client, operation, call_options, host,
op, local_notify, needs_reply, process,
needs_forward);
}
}
static gboolean
parse_peer_options_v1(const cib__operation_t *operation, xmlNode *request,
gboolean *local_notify, gboolean *needs_reply,
gboolean *process)
{
const char *op = NULL;
const char *host = NULL;
const char *delegated = NULL;
const char *originator = crm_element_value(request, PCMK__XA_SRC);
const char *reply_to = crm_element_value(request, PCMK__XA_CIB_ISREPLYTO);
gboolean is_reply = pcmk__str_eq(reply_to, OUR_NODENAME, pcmk__str_casei);
if (pcmk__xe_attr_is_true(request, PCMK__XA_CIB_UPDATE)) {
*needs_reply = FALSE;
if (is_reply) {
*local_notify = TRUE;
crm_trace("Processing global/peer update from %s"
" that originated from us", originator);
} else {
crm_trace("Processing global/peer update from %s", originator);
}
return TRUE;
}
op = crm_element_value(request, PCMK__XA_CIB_OP);
crm_trace("Processing legacy %s request sent by %s", op, originator);
if (pcmk__str_eq(op, PCMK__CIB_REQUEST_SHUTDOWN, pcmk__str_none)) {
/* Always process these */
*local_notify = FALSE;
if (reply_to == NULL || is_reply) {
*process = TRUE;
}
if (is_reply) {
*needs_reply = FALSE;
}
return *process;
}
if (is_reply && pcmk__str_eq(op, CRM_OP_PING, pcmk__str_casei)) {
process_ping_reply(request);
return FALSE;
}
if (is_reply) {
crm_trace("Forward reply sent from %s to local clients", originator);
*process = FALSE;
*needs_reply = FALSE;
*local_notify = TRUE;
return TRUE;
}
host = crm_element_value(request, PCMK__XA_CIB_HOST);
if (pcmk__str_eq(host, OUR_NODENAME, pcmk__str_casei)) {
crm_trace("Processing %s request sent to us from %s", op, originator);
return TRUE;
} else if(is_reply == FALSE && pcmk__str_eq(op, CRM_OP_PING, pcmk__str_casei)) {
crm_trace("Processing %s request sent to %s by %s", op, host?host:"everyone", originator);
*needs_reply = TRUE;
return TRUE;
} else if ((host == NULL) && based_is_primary) {
crm_trace("Processing %s request sent to primary instance from %s",
op, originator);
return TRUE;
}
delegated = crm_element_value(request, PCMK__XA_CIB_DELEGATED_FROM);
if (delegated != NULL) {
crm_trace("Ignoring message for primary instance");
} else if (host != NULL) {
/* this is for a specific instance and we're not it */
crm_trace("Ignoring msg for instance on %s", host);
} else if ((reply_to == NULL) && !based_is_primary) {
// This is for the primary instance, and we're not it
crm_trace("Ignoring reply for primary instance");
} else if (pcmk__str_eq(op, PCMK__CIB_REQUEST_SHUTDOWN, pcmk__str_none)) {
if (reply_to != NULL) {
crm_debug("Processing %s from %s", op, originator);
*needs_reply = FALSE;
} else {
crm_debug("Processing %s reply from %s", op, originator);
}
return TRUE;
} else {
crm_err("Nothing for us to do?");
crm_log_xml_err(request, "Peer[inbound]");
}
return FALSE;
}
static gboolean
parse_peer_options_v2(const cib__operation_t *operation, xmlNode *request,
gboolean *local_notify, gboolean *needs_reply,
gboolean *process)
{
const char *host = NULL;
const char *delegated = crm_element_value(request,
PCMK__XA_CIB_DELEGATED_FROM);
const char *op = crm_element_value(request, PCMK__XA_CIB_OP);
const char *originator = crm_element_value(request, PCMK__XA_SRC);
const char *reply_to = crm_element_value(request, PCMK__XA_CIB_ISREPLYTO);
gboolean is_reply = pcmk__str_eq(reply_to, OUR_NODENAME, pcmk__str_casei);
if (originator == NULL) { // Shouldn't be possible
originator = "peer";
}
if (pcmk__str_eq(op, PCMK__CIB_REQUEST_REPLACE, pcmk__str_none)) {
// sync_our_cib() sets PCMK__XA_CIB_ISREPLYTO
if (reply_to) {
delegated = reply_to;
}
goto skip_is_reply;
} else if (pcmk__str_eq(op, PCMK__CIB_REQUEST_SYNC_TO_ALL,
pcmk__str_none)) {
// Nothing to do
} else if (is_reply && pcmk__str_eq(op, CRM_OP_PING, pcmk__str_casei)) {
process_ping_reply(request);
return FALSE;
} else if (pcmk__str_eq(op, PCMK__CIB_REQUEST_UPGRADE, pcmk__str_none)) {
/* Only the DC (node with the oldest software) should process
* this operation if PCMK__XA_CIB_SCHEMA_MAX is unset.
*
* If the DC is happy it will then send out another
* PCMK__CIB_REQUEST_UPGRADE which will tell all nodes to do the actual
* upgrade.
*
* Except this time PCMK__XA_CIB_SCHEMA_MAX will be set which puts a
* limit on how far newer nodes will go
*/
const char *max = crm_element_value(request, PCMK__XA_CIB_SCHEMA_MAX);
const char *upgrade_rc = crm_element_value(request,
PCMK__XA_CIB_UPGRADE_RC);
crm_trace("Parsing upgrade %s for %s with max=%s and upgrade_rc=%s",
(is_reply? "reply" : "request"),
(based_is_primary? "primary" : "secondary"),
pcmk__s(max, "none"), pcmk__s(upgrade_rc, "none"));
if (upgrade_rc != NULL) {
// Our upgrade request was rejected by DC, notify clients of result
crm_xml_add(request, PCMK__XA_CIB_RC, upgrade_rc);
} else if ((max == NULL) && based_is_primary) {
/* We are the DC, check if this upgrade is allowed */
goto skip_is_reply;
} else if(max) {
/* Ok, go ahead and upgrade to 'max' */
goto skip_is_reply;
} else {
// Ignore broadcast client requests when we're not primary
return FALSE;
}
} else if (pcmk__xe_attr_is_true(request, PCMK__XA_CIB_UPDATE)) {
crm_info("Detected legacy %s global update from %s", op, originator);
send_sync_request(NULL);
legacy_mode = TRUE;
return FALSE;
} else if (is_reply
&& pcmk_is_set(operation->flags, cib__op_attr_modifies)) {
crm_trace("Ignoring legacy %s reply sent from %s to local clients", op, originator);
return FALSE;
} else if (pcmk__str_eq(op, PCMK__CIB_REQUEST_SHUTDOWN, pcmk__str_none)) {
*local_notify = FALSE;
if (reply_to == NULL) {
*process = TRUE;
} else { // Not possible?
crm_debug("Ignoring shutdown request from %s because reply_to=%s",
originator, reply_to);
}
return *process;
}
if (is_reply) {
crm_trace("Will notify local clients for %s reply from %s",
op, originator);
*process = FALSE;
*needs_reply = FALSE;
*local_notify = TRUE;
return TRUE;
}
skip_is_reply:
*process = TRUE;
*needs_reply = FALSE;
*local_notify = pcmk__str_eq(delegated, OUR_NODENAME, pcmk__str_casei);
host = crm_element_value(request, PCMK__XA_CIB_HOST);
if (pcmk__str_eq(host, OUR_NODENAME, pcmk__str_casei)) {
crm_trace("Processing %s request sent to us from %s", op, originator);
*needs_reply = TRUE;
return TRUE;
} else if (host != NULL) {
crm_trace("Ignoring %s request intended for CIB manager on %s",
op, host);
return FALSE;
} else if(is_reply == FALSE && pcmk__str_eq(op, CRM_OP_PING, pcmk__str_casei)) {
*needs_reply = TRUE;
}
crm_trace("Processing %s request broadcast by %s call %s on %s "
"(local clients will%s be notified)", op,
pcmk__s(crm_element_value(request, PCMK__XA_CIB_CLIENTNAME),
"client"),
pcmk__s(crm_element_value(request, PCMK__XA_CIB_CALLID),
"without ID"),
originator, (*local_notify? "" : "not"));
return TRUE;
}
static gboolean
parse_peer_options(const cib__operation_t *operation, xmlNode *request,
gboolean *local_notify, gboolean *needs_reply,
gboolean *process)
{
/* TODO: What happens when an update comes in after node A
* requests the CIB from node B, but before it gets the reply (and
* sends out the replace operation)
*/
if(cib_legacy_mode()) {
return parse_peer_options_v1(operation, request, local_notify,
needs_reply, process);
} else {
return parse_peer_options_v2(operation, request, local_notify,
needs_reply, process);
}
}
/*!
* \internal
* \brief Forward a CIB request to the appropriate target host(s)
*
* \param[in] request CIB request to forward
*/
static void
forward_request(xmlNode *request)
{
const char *op = crm_element_value(request, PCMK__XA_CIB_OP);
const char *section = crm_element_value(request, PCMK__XA_CIB_SECTION);
const char *host = crm_element_value(request, PCMK__XA_CIB_HOST);
const char *originator = crm_element_value(request, PCMK__XA_SRC);
const char *client_name = crm_element_value(request,
PCMK__XA_CIB_CLIENTNAME);
const char *call_id = crm_element_value(request, PCMK__XA_CIB_CALLID);
crm_node_t *peer = NULL;
int log_level = LOG_INFO;
if (pcmk__str_eq(op, PCMK__CIB_REQUEST_NOOP, pcmk__str_none)) {
log_level = LOG_DEBUG;
}
do_crm_log(log_level,
"Forwarding %s operation for section %s to %s (origin=%s/%s/%s)",
pcmk__s(op, "invalid"),
pcmk__s(section, "all"),
pcmk__s(host, (cib_legacy_mode()? "primary" : "all")),
pcmk__s(originator, "local"),
pcmk__s(client_name, "unspecified"),
pcmk__s(call_id, "unspecified"));
crm_xml_add(request, PCMK__XA_CIB_DELEGATED_FROM, OUR_NODENAME);
if (host != NULL) {
- peer = pcmk__get_node(0, host, NULL, pcmk__node_search_cluster);
+ peer = pcmk__get_node(0, host, NULL, pcmk__node_search_cluster_member);
}
send_cluster_message(peer, crm_msg_cib, request, FALSE);
// Return the request to its original state
pcmk__xe_remove_attr(request, PCMK__XA_CIB_DELEGATED_FROM);
}
static gboolean
send_peer_reply(xmlNode * msg, xmlNode * result_diff, const char *originator, gboolean broadcast)
{
CRM_ASSERT(msg != NULL);
if (broadcast) {
/* @COMPAT: Legacy code
*
* This successful call modified the CIB, and the change needs to be
* broadcast (sent via cluster to all nodes).
*/
int diff_add_updates = 0;
int diff_add_epoch = 0;
int diff_add_admin_epoch = 0;
int diff_del_updates = 0;
int diff_del_epoch = 0;
int diff_del_admin_epoch = 0;
const char *digest = NULL;
int format = 1;
xmlNode *wrapper = NULL;
CRM_LOG_ASSERT(result_diff != NULL);
digest = crm_element_value(result_diff, PCMK__XA_DIGEST);
crm_element_value_int(result_diff, PCMK_XA_FORMAT, &format);
cib_diff_version_details(result_diff,
&diff_add_admin_epoch, &diff_add_epoch, &diff_add_updates,
&diff_del_admin_epoch, &diff_del_epoch, &diff_del_updates);
crm_trace("Sending update diff %d.%d.%d -> %d.%d.%d %s",
diff_del_admin_epoch, diff_del_epoch, diff_del_updates,
diff_add_admin_epoch, diff_add_epoch, diff_add_updates, digest);
crm_xml_add(msg, PCMK__XA_CIB_ISREPLYTO, originator);
pcmk__xe_set_bool_attr(msg, PCMK__XA_CIB_UPDATE, true);
crm_xml_add(msg, PCMK__XA_CIB_OP, PCMK__CIB_REQUEST_APPLY_PATCH);
crm_xml_add(msg, PCMK__XA_CIB_USER, CRM_DAEMON_USER);
if (format == 1) {
CRM_ASSERT(digest != NULL);
}
wrapper = pcmk__xe_create(msg, PCMK__XE_CIB_UPDATE_DIFF);
pcmk__xml_copy(wrapper, result_diff);
crm_log_xml_explicit(msg, "copy");
return send_cluster_message(NULL, crm_msg_cib, msg, TRUE);
} else if (originator != NULL) {
/* send reply via HA to originating node */
+ const crm_node_t *node =
+ pcmk__get_node(0, originator, NULL,
+ pcmk__node_search_cluster_member);
+
crm_trace("Sending request result to %s only", originator);
crm_xml_add(msg, PCMK__XA_CIB_ISREPLYTO, originator);
- return send_cluster_message(pcmk__get_node(0, originator, NULL,
- pcmk__node_search_cluster),
- crm_msg_cib, msg, FALSE);
+ return send_cluster_message(node, crm_msg_cib, msg, FALSE);
}
return FALSE;
}
/*!
* \internal
* \brief Handle an IPC or CPG message containing a request
*
* \param[in,out] request Request XML
* \param[in] privileged Whether privileged commands may be run
* (see cib_server_ops[] definition)
* \param[in] cib_client IPC client that sent request (or NULL if CPG)
*
* \return Legacy Pacemaker return code
*/
int
cib_process_request(xmlNode *request, gboolean privileged,
const pcmk__client_t *cib_client)
{
// @TODO: Break into multiple smaller functions
int call_options = 0;
gboolean process = TRUE; // Whether to process request locally now
gboolean is_update = TRUE; // Whether request would modify CIB
gboolean needs_reply = TRUE; // Whether to build a reply
gboolean local_notify = FALSE; // Whether to notify (local) requester
gboolean needs_forward = FALSE; // Whether to forward request somewhere else
xmlNode *op_reply = NULL;
xmlNode *result_diff = NULL;
int rc = pcmk_ok;
const char *op = crm_element_value(request, PCMK__XA_CIB_OP);
const char *originator = crm_element_value(request, PCMK__XA_SRC);
const char *host = crm_element_value(request, PCMK__XA_CIB_HOST);
const char *target = NULL;
const char *call_id = crm_element_value(request, PCMK__XA_CIB_CALLID);
const char *client_id = crm_element_value(request, PCMK__XA_CIB_CLIENTID);
const char *client_name = crm_element_value(request,
PCMK__XA_CIB_CLIENTNAME);
const char *reply_to = crm_element_value(request, PCMK__XA_CIB_ISREPLYTO);
const cib__operation_t *operation = NULL;
cib__op_fn_t op_function = NULL;
crm_element_value_int(request, PCMK__XA_CIB_CALLOPT, &call_options);
if ((host != NULL) && (*host == '\0')) {
host = NULL;
}
// @TODO: Improve trace messages. Target is accurate only for legacy mode.
if (host) {
target = host;
} else if (call_options & cib_scope_local) {
target = "local host";
} else {
target = "primary";
}
if (cib_client == NULL) {
crm_trace("Processing peer %s operation from %s/%s on %s intended for %s (reply=%s)",
op, client_name, call_id, originator, target, reply_to);
} else {
crm_xml_add(request, PCMK__XA_SRC, OUR_NODENAME);
crm_trace("Processing local %s operation from %s/%s intended for %s", op, client_name, call_id, target);
}
rc = cib__get_operation(op, &operation);
rc = pcmk_rc2legacy(rc);
if (rc != pcmk_ok) {
/* TODO: construct error reply? */
crm_err("Pre-processing of command failed: %s", pcmk_strerror(rc));
return rc;
}
op_function = based_get_op_function(operation);
if (op_function == NULL) {
crm_err("Operation %s not supported by CIB manager", op);
return -EOPNOTSUPP;
}
if (cib_client != NULL) {
parse_local_options(cib_client, operation, call_options, host, op,
&local_notify, &needs_reply, &process,
&needs_forward);
} else if (!parse_peer_options(operation, request, &local_notify,
&needs_reply, &process)) {
return rc;
}
if (pcmk_is_set(call_options, cib_transaction)) {
/* All requests in a transaction are processed locally against a working
* CIB copy, and we don't notify for individual requests because the
* entire transaction is atomic.
*
* We still call the option parser functions above, for the sake of log
* messages and checking whether we're the target for peer requests.
*/
process = TRUE;
needs_reply = FALSE;
local_notify = FALSE;
needs_forward = FALSE;
}
is_update = pcmk_is_set(operation->flags, cib__op_attr_modifies);
if (pcmk_is_set(call_options, cib_discard_reply)) {
/* If the request will modify the CIB, and we are in legacy mode, we
* need to build a reply so we can broadcast a diff, even if the
* requester doesn't want one.
*/
needs_reply = is_update && cib_legacy_mode();
local_notify = FALSE;
crm_trace("Client is not interested in the reply");
}
if (needs_forward) {
forward_request(request);
return rc;
}
if (cib_status != pcmk_ok) {
rc = cib_status;
crm_err("Operation ignored, cluster configuration is invalid."
" Please repair and restart: %s", pcmk_strerror(cib_status));
op_reply = create_cib_reply(op, call_id, client_id, call_options, rc,
the_cib);
} else if (process) {
time_t finished = 0;
time_t now = time(NULL);
int level = LOG_INFO;
const char *section = crm_element_value(request, PCMK__XA_CIB_SECTION);
const char *admin_epoch_s = NULL;
const char *epoch_s = NULL;
const char *num_updates_s = NULL;
rc = cib_process_command(request, operation, op_function, &op_reply,
&result_diff, privileged);
if (!is_update) {
level = LOG_TRACE;
} else if (pcmk__xe_attr_is_true(request, PCMK__XA_CIB_UPDATE)) {
switch (rc) {
case pcmk_ok:
level = LOG_INFO;
break;
case -pcmk_err_old_data:
case -pcmk_err_diff_resync:
case -pcmk_err_diff_failed:
level = LOG_TRACE;
break;
default:
level = LOG_ERR;
}
} else if (rc != pcmk_ok) {
level = LOG_WARNING;
}
if (the_cib != NULL) {
admin_epoch_s = crm_element_value(the_cib, PCMK_XA_ADMIN_EPOCH);
epoch_s = crm_element_value(the_cib, PCMK_XA_EPOCH);
num_updates_s = crm_element_value(the_cib, PCMK_XA_NUM_UPDATES);
}
do_crm_log(level,
"Completed %s operation for section %s: %s (rc=%d, origin=%s/%s/%s, version=%s.%s.%s)",
op, section ? section : "'all'", pcmk_strerror(rc), rc,
originator ? originator : "local", client_name, call_id,
pcmk__s(admin_epoch_s, "0"),
pcmk__s(epoch_s, "0"),
pcmk__s(num_updates_s, "0"));
finished = time(NULL);
if ((finished - now) > 3) {
crm_trace("%s operation took %lds to complete", op, (long)(finished - now));
crm_write_blackbox(0, NULL);
}
if (op_reply == NULL && (needs_reply || local_notify)) {
crm_err("Unexpected NULL reply to message");
crm_log_xml_err(request, "null reply");
needs_reply = FALSE;
local_notify = FALSE;
}
}
if (is_update && !cib_legacy_mode()) {
crm_trace("Completed pre-sync update from %s/%s/%s%s",
originator ? originator : "local", client_name, call_id,
local_notify?" with local notification":"");
} else if (!needs_reply || stand_alone) {
// This was a non-originating secondary update
crm_trace("Completed update as secondary");
} else if (cib_legacy_mode() &&
rc == pcmk_ok && result_diff != NULL && !(call_options & cib_inhibit_bcast)) {
gboolean broadcast = FALSE;
cib_local_bcast_num++;
crm_xml_add_int(request, PCMK__XA_CIB_LOCAL_NOTIFY_ID,
cib_local_bcast_num);
broadcast = send_peer_reply(request, result_diff, originator, TRUE);
if (broadcast && client_id && local_notify && op_reply) {
/* If we have been asked to sync the reply,
* and a bcast msg has gone out, we queue the local notify
* until we know the bcast message has been received */
local_notify = FALSE;
crm_trace("Queuing local %ssync notification for %s",
(call_options & cib_sync_call) ? "" : "a-", client_id);
queue_local_notify(op_reply, client_id,
pcmk_is_set(call_options, cib_sync_call),
(cib_client == NULL));
op_reply = NULL; /* the reply is queued, so don't free here */
}
} else if ((cib_client == NULL)
&& !pcmk_is_set(call_options, cib_discard_reply)) {
if (is_update == FALSE || result_diff == NULL) {
crm_trace("Request not broadcast: R/O call");
} else if (call_options & cib_inhibit_bcast) {
crm_trace("Request not broadcast: inhibited");
} else if (rc != pcmk_ok) {
crm_trace("Request not broadcast: call failed: %s", pcmk_strerror(rc));
} else {
crm_trace("Directing reply to %s", originator);
}
send_peer_reply(op_reply, result_diff, originator, FALSE);
}
if (local_notify && client_id) {
crm_trace("Performing local %ssync notification for %s",
(pcmk_is_set(call_options, cib_sync_call)? "" : "a"),
client_id);
if (process == FALSE) {
do_local_notify(request, client_id,
pcmk_is_set(call_options, cib_sync_call),
(cib_client == NULL));
} else {
do_local_notify(op_reply, client_id,
pcmk_is_set(call_options, cib_sync_call),
(cib_client == NULL));
}
}
free_xml(op_reply);
free_xml(result_diff);
return rc;
}
/*!
* \internal
* \brief Get a CIB operation's input from the request XML
*
* \param[in] request CIB request XML
* \param[in] type CIB operation type
* \param[out] section Where to store CIB section name
*
* \return Input XML for CIB operation
*
* \note If not \c NULL, the return value is a non-const pointer to part of
* \p request. The caller should not free it directly.
*/
static xmlNode *
prepare_input(const xmlNode *request, enum cib__op_type type,
const char **section)
{
xmlNode *wrapper = NULL;
xmlNode *input = NULL;
*section = NULL;
switch (type) {
case cib__op_apply_patch:
{
const char *wrapper_name = PCMK__XE_CIB_CALLDATA;
if (pcmk__xe_attr_is_true(request, PCMK__XA_CIB_UPDATE)) {
wrapper_name = PCMK__XE_CIB_UPDATE_DIFF;
}
wrapper = pcmk__xe_first_child(request, wrapper_name, NULL,
NULL);
input = pcmk__xe_first_child(wrapper, NULL, NULL, NULL);
}
break;
default:
wrapper = pcmk__xe_first_child(request, PCMK__XE_CIB_CALLDATA, NULL,
NULL);
input = pcmk__xe_first_child(wrapper, NULL, NULL, NULL);
*section = crm_element_value(request, PCMK__XA_CIB_SECTION);
break;
}
// Grab the specified section
if ((*section != NULL) && pcmk__xe_is(input, PCMK_XE_CIB)) {
input = pcmk_find_cib_element(input, *section);
}
return input;
}
// v1 and v2 patch formats
#define XPATH_CONFIG_CHANGE \
"//" PCMK_XE_CRM_CONFIG " | " \
"//" PCMK_XE_CHANGE \
"[contains(@" PCMK_XA_PATH ",'/" PCMK_XE_CRM_CONFIG "/')]"
static bool
contains_config_change(xmlNode *diff)
{
bool changed = false;
if (diff) {
xmlXPathObject *xpathObj = xpath_search(diff, XPATH_CONFIG_CHANGE);
if (numXpathResults(xpathObj) > 0) {
changed = true;
}
freeXpathObject(xpathObj);
}
return changed;
}
static int
cib_process_command(xmlNode *request, const cib__operation_t *operation,
cib__op_fn_t op_function, xmlNode **reply,
xmlNode **cib_diff, bool privileged)
{
xmlNode *input = NULL;
xmlNode *output = NULL;
xmlNode *result_cib = NULL;
int call_options = 0;
const char *op = NULL;
const char *section = NULL;
const char *call_id = crm_element_value(request, PCMK__XA_CIB_CALLID);
const char *client_id = crm_element_value(request, PCMK__XA_CIB_CLIENTID);
const char *client_name = crm_element_value(request,
PCMK__XA_CIB_CLIENTNAME);
const char *originator = crm_element_value(request, PCMK__XA_SRC);
int rc = pcmk_ok;
bool config_changed = false;
bool manage_counters = true;
static mainloop_timer_t *digest_timer = NULL;
CRM_ASSERT(cib_status == pcmk_ok);
if(digest_timer == NULL) {
digest_timer = mainloop_timer_add("digester", 5000, FALSE, cib_digester_cb, NULL);
}
*reply = NULL;
*cib_diff = NULL;
/* Start processing the request... */
op = crm_element_value(request, PCMK__XA_CIB_OP);
crm_element_value_int(request, PCMK__XA_CIB_CALLOPT, &call_options);
if (!privileged && pcmk_is_set(operation->flags, cib__op_attr_privileged)) {
rc = -EACCES;
crm_trace("Failed due to lack of privileges: %s", pcmk_strerror(rc));
goto done;
}
input = prepare_input(request, operation->type, &section);
if (!pcmk_is_set(operation->flags, cib__op_attr_modifies)) {
rc = cib_perform_op(NULL, op, call_options, op_function, true, section,
request, input, false, &config_changed, &the_cib,
&result_cib, NULL, &output);
CRM_CHECK(result_cib == NULL, free_xml(result_cib));
goto done;
}
/* @COMPAT: Handle a valid write action (legacy)
*
* @TODO: Re-evaluate whether this is all truly legacy. The cib_force_diff
* portion is. However, PCMK__XA_CIB_UPDATE may be set by a sync operation
* even in non-legacy mode, and manage_counters tells xml_create_patchset()
* whether to update version/epoch info.
*/
if (pcmk__xe_attr_is_true(request, PCMK__XA_CIB_UPDATE)) {
manage_counters = false;
cib__set_call_options(call_options, "call", cib_force_diff);
crm_trace("Global update detected");
CRM_LOG_ASSERT(pcmk__str_any_of(op,
PCMK__CIB_REQUEST_APPLY_PATCH,
PCMK__CIB_REQUEST_REPLACE,
NULL));
}
ping_modified_since = TRUE;
if (pcmk_is_set(call_options, cib_inhibit_bcast)) {
crm_trace("Skipping update: inhibit broadcast");
manage_counters = false;
}
// result_cib must not be modified after cib_perform_op() returns
rc = cib_perform_op(NULL, op, call_options, op_function, false, section,
request, input, manage_counters, &config_changed,
&the_cib, &result_cib, cib_diff, &output);
// @COMPAT: Legacy code
if (!manage_counters) {
int format = 1;
// If the diff is NULL at this point, it's because nothing changed
if (*cib_diff != NULL) {
crm_element_value_int(*cib_diff, PCMK_XA_FORMAT, &format);
}
if (format == 1) {
config_changed = cib__config_changed_v1(NULL, NULL, cib_diff);
}
}
/* Always write to disk for successful ops with the flag set. This also
* negates the need to detect ordering changes.
*/
if ((rc == pcmk_ok)
&& pcmk_is_set(operation->flags, cib__op_attr_writes_through)) {
config_changed = true;
}
if ((rc == pcmk_ok)
&& !pcmk_any_flags_set(call_options, cib_dryrun|cib_transaction)) {
if (result_cib != the_cib) {
if (pcmk_is_set(operation->flags, cib__op_attr_writes_through)) {
config_changed = true;
}
crm_trace("Activating %s->%s%s",
crm_element_value(the_cib, PCMK_XA_NUM_UPDATES),
crm_element_value(result_cib, PCMK_XA_NUM_UPDATES),
(config_changed? " changed" : ""));
rc = activateCibXml(result_cib, config_changed, op);
if (rc != pcmk_ok) {
crm_err("Failed to activate new CIB: %s", pcmk_strerror(rc));
}
}
if ((rc == pcmk_ok) && contains_config_change(*cib_diff)) {
cib_read_config(config_hash, result_cib);
}
/* @COMPAT Nodes older than feature set 3.19.0 don't support
* transactions. In a mixed-version cluster with nodes <3.19.0, we must
* sync the updated CIB, so that the older nodes receive the changes.
* Any node that has already applied the transaction will ignore the
* synced CIB.
*
* To ensure the updated CIB is synced from only one node, we sync it
* from the originator.
*/
if ((operation->type == cib__op_commit_transact)
&& pcmk__str_eq(originator, OUR_NODENAME, pcmk__str_casei)
&& compare_version(crm_element_value(the_cib,
PCMK_XA_CRM_FEATURE_SET),
"3.19.0") < 0) {
sync_our_cib(request, TRUE);
}
mainloop_timer_stop(digest_timer);
mainloop_timer_start(digest_timer);
} else if (rc == -pcmk_err_schema_validation) {
CRM_ASSERT(result_cib != the_cib);
if (output != NULL) {
crm_log_xml_info(output, "cib:output");
free_xml(output);
}
output = result_cib;
} else {
crm_trace("Not activating %d %d %s", rc,
pcmk_is_set(call_options, cib_dryrun),
crm_element_value(result_cib, PCMK_XA_NUM_UPDATES));
if (result_cib != the_cib) {
free_xml(result_cib);
}
}
if (!pcmk_any_flags_set(call_options,
cib_dryrun|cib_inhibit_notify|cib_transaction)) {
crm_trace("Sending notifications %d",
pcmk_is_set(call_options, cib_dryrun));
cib_diff_notify(op, rc, call_id, client_id, client_name, originator,
input, *cib_diff);
}
pcmk__log_xml_patchset(LOG_TRACE, *cib_diff);
done:
if (!pcmk_is_set(call_options, cib_discard_reply) || cib_legacy_mode()) {
*reply = create_cib_reply(op, call_id, client_id, call_options, rc,
output);
}
if (output != the_cib) {
free_xml(output);
}
crm_trace("done");
return rc;
}
void
cib_peer_callback(xmlNode * msg, void *private_data)
{
const char *reason = NULL;
const char *originator = crm_element_value(msg, PCMK__XA_SRC);
if (cib_legacy_mode()
&& pcmk__str_eq(originator, OUR_NODENAME,
pcmk__str_casei|pcmk__str_null_matches)) {
/* message is from ourselves */
int bcast_id = 0;
if (crm_element_value_int(msg, PCMK__XA_CIB_LOCAL_NOTIFY_ID,
&bcast_id) == 0) {
check_local_notify(bcast_id);
}
return;
} else if (crm_peer_cache == NULL) {
reason = "membership not established";
goto bail;
}
if (crm_element_value(msg, PCMK__XA_CIB_CLIENTNAME) == NULL) {
crm_xml_add(msg, PCMK__XA_CIB_CLIENTNAME, originator);
}
/* crm_log_xml_trace(msg, "Peer[inbound]"); */
cib_process_request(msg, TRUE, NULL);
return;
bail:
if (reason) {
const char *op = crm_element_value(msg, PCMK__XA_CIB_OP);
crm_warn("Discarding %s message from %s: %s", op, originator, reason);
}
}
static gboolean
cib_force_exit(gpointer data)
{
crm_notice("Forcing exit!");
terminate_cib(__func__, CRM_EX_ERROR);
return FALSE;
}
static void
disconnect_remote_client(gpointer key, gpointer value, gpointer user_data)
{
pcmk__client_t *a_client = value;
crm_err("Can't disconnect client %s: Not implemented",
pcmk__client_name(a_client));
}
static void
initiate_exit(void)
{
int active = 0;
xmlNode *leaving = NULL;
active = pcmk__cluster_num_active_nodes();
if (active < 2) { // This is the last active node
terminate_cib(__func__, 0);
return;
}
crm_info("Sending shutdown request to %d peers", active);
leaving = pcmk__xe_create(NULL, PCMK__XE_EXIT_NOTIFICATION);
crm_xml_add(leaving, PCMK__XA_T, PCMK__VALUE_CIB);
crm_xml_add(leaving, PCMK__XA_CIB_OP, PCMK__CIB_REQUEST_SHUTDOWN);
send_cluster_message(NULL, crm_msg_cib, leaving, TRUE);
free_xml(leaving);
g_timeout_add(EXIT_ESCALATION_MS, cib_force_exit, NULL);
}
void
cib_shutdown(int nsig)
{
struct qb_ipcs_stats srv_stats;
if (cib_shutdown_flag == FALSE) {
int disconnects = 0;
qb_ipcs_connection_t *c = NULL;
cib_shutdown_flag = TRUE;
c = qb_ipcs_connection_first_get(ipcs_rw);
while (c != NULL) {
qb_ipcs_connection_t *last = c;
c = qb_ipcs_connection_next_get(ipcs_rw, last);
crm_debug("Disconnecting r/w client %p...", last);
qb_ipcs_disconnect(last);
qb_ipcs_connection_unref(last);
disconnects++;
}
c = qb_ipcs_connection_first_get(ipcs_ro);
while (c != NULL) {
qb_ipcs_connection_t *last = c;
c = qb_ipcs_connection_next_get(ipcs_ro, last);
crm_debug("Disconnecting r/o client %p...", last);
qb_ipcs_disconnect(last);
qb_ipcs_connection_unref(last);
disconnects++;
}
c = qb_ipcs_connection_first_get(ipcs_shm);
while (c != NULL) {
qb_ipcs_connection_t *last = c;
c = qb_ipcs_connection_next_get(ipcs_shm, last);
crm_debug("Disconnecting non-blocking r/w client %p...", last);
qb_ipcs_disconnect(last);
qb_ipcs_connection_unref(last);
disconnects++;
}
disconnects += pcmk__ipc_client_count();
crm_debug("Disconnecting %d remote clients", pcmk__ipc_client_count());
pcmk__foreach_ipc_client(disconnect_remote_client, NULL);
crm_info("Disconnected %d clients", disconnects);
}
qb_ipcs_stats_get(ipcs_rw, &srv_stats, QB_FALSE);
if (pcmk__ipc_client_count() == 0) {
crm_info("All clients disconnected (%d)", srv_stats.active_connections);
initiate_exit();
} else {
crm_info("Waiting on %d clients to disconnect (%d)",
pcmk__ipc_client_count(), srv_stats.active_connections);
}
}
extern int remote_fd;
extern int remote_tls_fd;
/*!
* \internal
* \brief Close remote sockets, free the global CIB and quit
*
* \param[in] caller Name of calling function (for log message)
* \param[in] fast If -1, skip disconnect; if positive, exit that
*/
void
terminate_cib(const char *caller, int fast)
{
crm_info("%s: Exiting%s...", caller,
(fast > 0)? " fast" : mainloop ? " from mainloop" : "");
if (remote_fd > 0) {
close(remote_fd);
remote_fd = 0;
}
if (remote_tls_fd > 0) {
close(remote_tls_fd);
remote_tls_fd = 0;
}
uninitializeCib();
if (fast > 0) {
/* Quit fast on error */
pcmk__stop_based_ipc(ipcs_ro, ipcs_rw, ipcs_shm);
crm_exit(fast);
} else if ((mainloop != NULL) && g_main_loop_is_running(mainloop)) {
/* Quit via returning from the main loop. If fast == -1, we skip the
* disconnect here, and it will be done when the main loop returns
* (this allows the peer status callback to avoid messing with the
* peer caches).
*/
if (fast == 0) {
pcmk_cluster_disconnect(crm_cluster);
}
g_main_loop_quit(mainloop);
} else {
/* Quit via clean exit. Even the peer status callback can disconnect
* here, because we're not returning control to the caller. */
pcmk_cluster_disconnect(crm_cluster);
pcmk__stop_based_ipc(ipcs_ro, ipcs_rw, ipcs_shm);
crm_exit(CRM_EX_OK);
}
}
diff --git a/daemons/based/based_messages.c b/daemons/based/based_messages.c
index 6607105cc3..5c336b46fd 100644
--- a/daemons/based/based_messages.c
+++ b/daemons/based/based_messages.c
@@ -1,539 +1,539 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <errno.h>
#include <fcntl.h>
#include <time.h>
#include <sys/param.h>
#include <sys/types.h>
#include <glib.h>
#include <libxml/tree.h>
#include <crm/crm.h>
#include <crm/cib/internal.h>
#include <crm/common/xml.h>
#include <crm/common/ipc_internal.h>
#include <crm/common/xml_internal.h>
#include <crm/cluster/internal.h>
#include <pacemaker-based.h>
/* Maximum number of diffs to ignore while waiting for a resync */
#define MAX_DIFF_RETRY 5
bool based_is_primary = false;
xmlNode *the_cib = NULL;
int
cib_process_shutdown_req(const char *op, int options, const char *section, xmlNode * req,
xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib,
xmlNode ** answer)
{
const char *host = crm_element_value(req, PCMK__XA_SRC);
*answer = NULL;
if (crm_element_value(req, PCMK__XA_CIB_ISREPLYTO) == NULL) {
crm_info("Peer %s is requesting to shut down", host);
return pcmk_ok;
}
if (cib_shutdown_flag == FALSE) {
crm_err("Peer %s mistakenly thinks we wanted to shut down", host);
return -EINVAL;
}
crm_info("Peer %s has acknowledged our shutdown request", host);
terminate_cib(__func__, 0);
return pcmk_ok;
}
// @COMPAT: Remove when PCMK__CIB_REQUEST_NOOP is removed
int
cib_process_noop(const char *op, int options, const char *section, xmlNode *req,
xmlNode *input, xmlNode *existing_cib, xmlNode **result_cib,
xmlNode **answer)
{
crm_trace("Processing \"%s\" event", op);
*answer = NULL;
return pcmk_ok;
}
int
cib_process_readwrite(const char *op, int options, const char *section, xmlNode * req,
xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib,
xmlNode ** answer)
{
int result = pcmk_ok;
crm_trace("Processing \"%s\" event", op);
if (pcmk__str_eq(op, PCMK__CIB_REQUEST_IS_PRIMARY, pcmk__str_none)) {
if (based_is_primary) {
result = pcmk_ok;
} else {
result = -EPERM;
}
return result;
}
if (pcmk__str_eq(op, PCMK__CIB_REQUEST_PRIMARY, pcmk__str_none)) {
if (!based_is_primary) {
crm_info("We are now in R/W mode");
based_is_primary = true;
} else {
crm_debug("We are still in R/W mode");
}
} else if (based_is_primary) {
crm_info("We are now in R/O mode");
based_is_primary = false;
}
return result;
}
/* Set to 1 when a sync is requested, incremented when a diff is ignored,
* reset to 0 when a sync is received
*/
static int sync_in_progress = 0;
void
send_sync_request(const char *host)
{
xmlNode *sync_me = pcmk__xe_create(NULL, "sync-me");
crm_node_t *peer = NULL;
crm_info("Requesting re-sync from %s", (host? host : "all peers"));
sync_in_progress = 1;
crm_xml_add(sync_me, PCMK__XA_T, PCMK__VALUE_CIB);
crm_xml_add(sync_me, PCMK__XA_CIB_OP, PCMK__CIB_REQUEST_SYNC_TO_ONE);
crm_xml_add(sync_me, PCMK__XA_CIB_DELEGATED_FROM,
stand_alone? "localhost" : crm_cluster->uname);
if (host != NULL) {
- peer = pcmk__get_node(0, host, NULL, pcmk__node_search_cluster);
+ peer = pcmk__get_node(0, host, NULL, pcmk__node_search_cluster_member);
}
send_cluster_message(peer, crm_msg_cib, sync_me, FALSE);
free_xml(sync_me);
}
int
cib_process_ping(const char *op, int options, const char *section, xmlNode * req, xmlNode * input,
xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer)
{
const char *host = crm_element_value(req, PCMK__XA_SRC);
const char *seq = crm_element_value(req, PCMK__XA_CIB_PING_ID);
char *digest = calculate_xml_versioned_digest(the_cib, FALSE, TRUE, CRM_FEATURE_SET);
xmlNode *wrapper = NULL;
crm_trace("Processing \"%s\" event %s from %s", op, seq, host);
*answer = pcmk__xe_create(NULL, PCMK__XE_PING_RESPONSE);
crm_xml_add(*answer, PCMK_XA_CRM_FEATURE_SET, CRM_FEATURE_SET);
crm_xml_add(*answer, PCMK__XA_DIGEST, digest);
crm_xml_add(*answer, PCMK__XA_CIB_PING_ID, seq);
wrapper = pcmk__xe_create(*answer, PCMK__XE_CIB_CALLDATA);
if (the_cib != NULL) {
pcmk__if_tracing(
{
/* Append additional detail so the receiver can log the
* differences
*/
pcmk__xml_copy(wrapper, the_cib);
},
{
// Always include at least the version details
const char *name = (const char *) the_cib->name;
xmlNode *shallow = pcmk__xe_create(wrapper, name);
pcmk__xe_copy_attrs(shallow, the_cib, pcmk__xaf_none);
}
);
}
crm_info("Reporting our current digest to %s: %s for %s.%s.%s",
host, digest,
crm_element_value(existing_cib, PCMK_XA_ADMIN_EPOCH),
crm_element_value(existing_cib, PCMK_XA_EPOCH),
crm_element_value(existing_cib, PCMK_XA_NUM_UPDATES));
free(digest);
return pcmk_ok;
}
int
cib_process_sync(const char *op, int options, const char *section, xmlNode * req, xmlNode * input,
xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer)
{
return sync_our_cib(req, TRUE);
}
int
cib_process_upgrade_server(const char *op, int options, const char *section, xmlNode * req, xmlNode * input,
xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer)
{
int rc = pcmk_ok;
*answer = NULL;
if (crm_element_value(req, PCMK__XA_CIB_SCHEMA_MAX) != NULL) {
/* The originator of an upgrade request sends it to the DC, without
* PCMK__XA_CIB_SCHEMA_MAX. If an upgrade is needed, the DC
* re-broadcasts the request with PCMK__XA_CIB_SCHEMA_MAX, and each node
* performs the upgrade (and notifies its local clients) here.
*/
return cib_process_upgrade(
op, options, section, req, input, existing_cib, result_cib, answer);
} else {
xmlNode *scratch = pcmk__xml_copy(NULL, existing_cib);
const char *host = crm_element_value(req, PCMK__XA_SRC);
const char *original_schema = NULL;
const char *new_schema = NULL;
const char *client_id = crm_element_value(req, PCMK__XA_CIB_CLIENTID);
const char *call_opts = crm_element_value(req, PCMK__XA_CIB_CALLOPT);
const char *call_id = crm_element_value(req, PCMK__XA_CIB_CALLID);
crm_trace("Processing \"%s\" event", op);
original_schema = crm_element_value(existing_cib,
PCMK_XA_VALIDATE_WITH);
rc = pcmk__update_schema(&scratch, NULL, true, true);
rc = pcmk_rc2legacy(rc);
new_schema = crm_element_value(scratch, PCMK_XA_VALIDATE_WITH);
if (pcmk__cmp_schemas_by_name(new_schema, original_schema) > 0) {
xmlNode *up = pcmk__xe_create(NULL, __func__);
rc = pcmk_ok;
crm_notice("Upgrade request from %s verified", host);
crm_xml_add(up, PCMK__XA_T, PCMK__VALUE_CIB);
crm_xml_add(up, PCMK__XA_CIB_OP, PCMK__CIB_REQUEST_UPGRADE);
crm_xml_add(up, PCMK__XA_CIB_SCHEMA_MAX, new_schema);
crm_xml_add(up, PCMK__XA_CIB_DELEGATED_FROM, host);
crm_xml_add(up, PCMK__XA_CIB_CLIENTID, client_id);
crm_xml_add(up, PCMK__XA_CIB_CALLOPT, call_opts);
crm_xml_add(up, PCMK__XA_CIB_CALLID, call_id);
if (cib_legacy_mode() && based_is_primary) {
rc = cib_process_upgrade(
op, options, section, up, input, existing_cib, result_cib, answer);
} else {
send_cluster_message(NULL, crm_msg_cib, up, FALSE);
}
free_xml(up);
} else if(rc == pcmk_ok) {
rc = -pcmk_err_schema_unchanged;
}
if (rc != pcmk_ok) {
// Notify originating peer so it can notify its local clients
crm_node_t *origin = NULL;
origin = pcmk__search_node_caches(0, host,
- pcmk__node_search_cluster);
+ pcmk__node_search_cluster_member);
crm_info("Rejecting upgrade request from %s: %s "
CRM_XS " rc=%d peer=%s", host, pcmk_strerror(rc), rc,
(origin? origin->uname : "lost"));
if (origin) {
xmlNode *up = pcmk__xe_create(NULL, __func__);
crm_xml_add(up, PCMK__XA_T, PCMK__VALUE_CIB);
crm_xml_add(up, PCMK__XA_CIB_OP, PCMK__CIB_REQUEST_UPGRADE);
crm_xml_add(up, PCMK__XA_CIB_DELEGATED_FROM, host);
crm_xml_add(up, PCMK__XA_CIB_ISREPLYTO, host);
crm_xml_add(up, PCMK__XA_CIB_CLIENTID, client_id);
crm_xml_add(up, PCMK__XA_CIB_CALLOPT, call_opts);
crm_xml_add(up, PCMK__XA_CIB_CALLID, call_id);
crm_xml_add_int(up, PCMK__XA_CIB_UPGRADE_RC, rc);
if (send_cluster_message(origin, crm_msg_cib, up, TRUE)
== FALSE) {
crm_warn("Could not send CIB upgrade result to %s", host);
}
free_xml(up);
}
}
free_xml(scratch);
}
return rc;
}
int
cib_process_sync_one(const char *op, int options, const char *section, xmlNode * req,
xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib,
xmlNode ** answer)
{
return sync_our_cib(req, FALSE);
}
int
cib_server_process_diff(const char *op, int options, const char *section, xmlNode * req,
xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib,
xmlNode ** answer)
{
int rc = pcmk_ok;
if (sync_in_progress > MAX_DIFF_RETRY) {
/* Don't ignore diffs forever; the last request may have been lost.
* If the diff fails, we'll ask for another full resync.
*/
sync_in_progress = 0;
}
// The primary instance should never ignore a diff
if (sync_in_progress && !based_is_primary) {
int diff_add_updates = 0;
int diff_add_epoch = 0;
int diff_add_admin_epoch = 0;
int diff_del_updates = 0;
int diff_del_epoch = 0;
int diff_del_admin_epoch = 0;
cib_diff_version_details(input,
&diff_add_admin_epoch, &diff_add_epoch, &diff_add_updates,
&diff_del_admin_epoch, &diff_del_epoch, &diff_del_updates);
sync_in_progress++;
crm_notice("Not applying diff %d.%d.%d -> %d.%d.%d (sync in progress)",
diff_del_admin_epoch, diff_del_epoch, diff_del_updates,
diff_add_admin_epoch, diff_add_epoch, diff_add_updates);
return -pcmk_err_diff_resync;
}
rc = cib_process_diff(op, options, section, req, input, existing_cib, result_cib, answer);
crm_trace("result: %s (%d), %s", pcmk_strerror(rc), rc,
(based_is_primary? "primary": "secondary"));
if ((rc == -pcmk_err_diff_resync) && !based_is_primary) {
free_xml(*result_cib);
*result_cib = NULL;
send_sync_request(NULL);
} else if (rc == -pcmk_err_diff_resync) {
rc = -pcmk_err_diff_failed;
if (options & cib_force_diff) {
crm_warn("Not requesting full refresh in R/W mode");
}
} else if ((rc != pcmk_ok) && !based_is_primary && cib_legacy_mode()) {
crm_warn("Requesting full CIB refresh because update failed: %s"
CRM_XS " rc=%d", pcmk_strerror(rc), rc);
pcmk__log_xml_patchset(LOG_INFO, input);
free_xml(*result_cib);
*result_cib = NULL;
send_sync_request(NULL);
}
return rc;
}
int
cib_process_replace_svr(const char *op, int options, const char *section, xmlNode * req,
xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib,
xmlNode ** answer)
{
int rc =
cib_process_replace(op, options, section, req, input, existing_cib, result_cib, answer);
if ((rc == pcmk_ok) && pcmk__xe_is(input, PCMK_XE_CIB)) {
sync_in_progress = 0;
}
return rc;
}
// @COMPAT: Remove when PCMK__CIB_REQUEST_ABS_DELETE is removed
int
cib_process_delete_absolute(const char *op, int options, const char *section, xmlNode * req,
xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib,
xmlNode ** answer)
{
return -EINVAL;
}
static xmlNode *
cib_msg_copy(xmlNode *msg)
{
static const char *field_list[] = {
PCMK__XA_T,
PCMK__XA_CIB_CLIENTID,
PCMK__XA_CIB_CALLOPT,
PCMK__XA_CIB_CALLID,
PCMK__XA_CIB_OP,
PCMK__XA_CIB_ISREPLYTO,
PCMK__XA_CIB_SECTION,
PCMK__XA_CIB_HOST,
PCMK__XA_CIB_RC,
PCMK__XA_CIB_DELEGATED_FROM,
PCMK__XA_CIB_OBJECT,
PCMK__XA_CIB_OBJECT_TYPE,
PCMK__XA_CIB_UPDATE,
PCMK__XA_CIB_CLIENTNAME,
PCMK__XA_CIB_USER,
PCMK__XA_CIB_NOTIFY_TYPE,
PCMK__XA_CIB_NOTIFY_ACTIVATE,
};
xmlNode *copy = pcmk__xe_create(NULL, PCMK__XE_COPY);
for (int lpc = 0; lpc < PCMK__NELEM(field_list); lpc++) {
const char *field = field_list[lpc];
const char *value = crm_element_value(msg, field);
if (value != NULL) {
crm_xml_add(copy, field, value);
}
}
return copy;
}
int
sync_our_cib(xmlNode * request, gboolean all)
{
int result = pcmk_ok;
char *digest = NULL;
const char *host = crm_element_value(request, PCMK__XA_SRC);
const char *op = crm_element_value(request, PCMK__XA_CIB_OP);
crm_node_t *peer = NULL;
xmlNode *replace_request = NULL;
xmlNode *wrapper = NULL;
CRM_CHECK(the_cib != NULL, return -EINVAL);
CRM_CHECK(all || (host != NULL), return -EINVAL);
crm_debug("Syncing CIB to %s", all ? "all peers" : host);
replace_request = cib_msg_copy(request);
if (host != NULL) {
crm_xml_add(replace_request, PCMK__XA_CIB_ISREPLYTO, host);
}
if (all) {
pcmk__xe_remove_attr(replace_request, PCMK__XA_CIB_HOST);
}
crm_xml_add(replace_request, PCMK__XA_CIB_OP, PCMK__CIB_REQUEST_REPLACE);
// @TODO Keep for tracing, or drop?
crm_xml_add(replace_request, PCMK__XA_ORIGINAL_CIB_OP, op);
pcmk__xe_set_bool_attr(replace_request, PCMK__XA_CIB_UPDATE, true);
crm_xml_add(replace_request, PCMK_XA_CRM_FEATURE_SET, CRM_FEATURE_SET);
digest = calculate_xml_versioned_digest(the_cib, FALSE, TRUE, CRM_FEATURE_SET);
crm_xml_add(replace_request, PCMK__XA_DIGEST, digest);
wrapper = pcmk__xe_create(replace_request, PCMK__XE_CIB_CALLDATA);
pcmk__xml_copy(wrapper, the_cib);
if (!all) {
- peer = pcmk__get_node(0, host, NULL, pcmk__node_search_cluster);
+ peer = pcmk__get_node(0, host, NULL, pcmk__node_search_cluster_member);
}
if (!send_cluster_message(peer, crm_msg_cib, replace_request, FALSE)) {
result = -ENOTCONN;
}
free_xml(replace_request);
free(digest);
return result;
}
int
cib_process_commit_transaction(const char *op, int options, const char *section,
xmlNode *req, xmlNode *input,
xmlNode *existing_cib, xmlNode **result_cib,
xmlNode **answer)
{
/* On success, our caller will activate *result_cib locally, trigger a
* replace notification if appropriate, and sync *result_cib to all nodes.
* On failure, our caller will free *result_cib.
*/
int rc = pcmk_rc_ok;
const char *client_id = crm_element_value(req, PCMK__XA_CIB_CLIENTID);
const char *origin = crm_element_value(req, PCMK__XA_SRC);
pcmk__client_t *client = pcmk__find_client_by_id(client_id);
rc = based_commit_transaction(input, client, origin, result_cib);
if (rc != pcmk_rc_ok) {
char *source = based_transaction_source_str(client, origin);
crm_err("Could not commit transaction for %s: %s",
source, pcmk_rc_str(rc));
free(source);
}
return pcmk_rc2legacy(rc);
}
int
cib_process_schemas(const char *op, int options, const char *section, xmlNode *req,
xmlNode *input, xmlNode *existing_cib, xmlNode **result_cib,
xmlNode **answer)
{
xmlNode *wrapper = NULL;
xmlNode *data = NULL;
const char *after_ver = NULL;
GList *schemas = NULL;
GList *already_included = NULL;
*answer = pcmk__xe_create(NULL, PCMK__XA_SCHEMAS);
wrapper = pcmk__xe_first_child(req, PCMK__XE_CIB_CALLDATA, NULL, NULL);
data = pcmk__xe_first_child(wrapper, NULL, NULL, NULL);
if (data == NULL) {
crm_warn("No data specified in request");
return -EPROTO;
}
after_ver = crm_element_value(data, PCMK_XA_VERSION);
if (after_ver == NULL) {
crm_warn("No version specified in request");
return -EPROTO;
}
/* The client requested all schemas after the latest one we know about, which
* means the client is fully up-to-date. Return a properly formatted reply
* with no schemas.
*/
if (pcmk__str_eq(after_ver, pcmk__highest_schema_name(), pcmk__str_none)) {
return pcmk_ok;
}
schemas = pcmk__schema_files_later_than(after_ver);
for (GList *iter = schemas; iter != NULL; iter = iter->next) {
pcmk__build_schema_xml_node(*answer, iter->data, &already_included);
}
g_list_free_full(schemas, free);
g_list_free_full(already_included, free);
return pcmk_ok;
}
diff --git a/daemons/based/pacemaker-based.c b/daemons/based/pacemaker-based.c
index 00e314137c..b6e761e488 100644
--- a/daemons/based/pacemaker-based.c
+++ b/daemons/based/pacemaker-based.c
@@ -1,450 +1,450 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <stdio.h>
#include <stdlib.h>
#include <pwd.h>
#include <grp.h>
#include <bzlib.h>
#include <sys/types.h>
#include <glib.h>
#include <libxml/tree.h>
#include <crm/crm.h>
#include <crm/cib/internal.h>
#include <crm/cluster/internal.h>
#include <crm/common/cmdline_internal.h>
#include <crm/common/mainloop.h>
#include <crm/common/output_internal.h>
#include <crm/common/xml.h>
#include <pacemaker-based.h>
#define SUMMARY "daemon for managing the configuration of a Pacemaker cluster"
extern int init_remote_listener(int port, gboolean encrypted);
gboolean cib_shutdown_flag = FALSE;
int cib_status = pcmk_ok;
pcmk_cluster_t *crm_cluster = NULL;
GMainLoop *mainloop = NULL;
gchar *cib_root = NULL;
static gboolean preserve_status = FALSE;
gboolean cib_writes_enabled = TRUE;
gboolean stand_alone = FALSE;
int remote_fd = 0;
int remote_tls_fd = 0;
GHashTable *config_hash = NULL;
GHashTable *local_notify_queue = NULL;
static void cib_init(void);
void cib_shutdown(int nsig);
static bool startCib(const char *filename);
extern int write_cib_contents(gpointer p);
static crm_exit_t exit_code = CRM_EX_OK;
static void
cib_enable_writes(int nsig)
{
crm_info("(Re)enabling disk writes");
cib_writes_enabled = TRUE;
}
/*!
* \internal
* \brief Set up options, users, and groups for stand-alone mode
*
* \param[out] error GLib error object
*
* \return Standard Pacemaker return code
*/
static int
setup_stand_alone(GError **error)
{
int rc = 0;
struct passwd *pwentry = NULL;
preserve_status = TRUE;
cib_writes_enabled = FALSE;
errno = 0;
pwentry = getpwnam(CRM_DAEMON_USER);
if (pwentry == NULL) {
exit_code = CRM_EX_FATAL;
if (errno != 0) {
g_set_error(error, PCMK__EXITC_ERROR, exit_code,
"Error getting password DB entry for %s: %s",
CRM_DAEMON_USER, strerror(errno));
return errno;
}
g_set_error(error, PCMK__EXITC_ERROR, exit_code,
"Password DB entry for '%s' not found", CRM_DAEMON_USER);
return ENXIO;
}
rc = setgid(pwentry->pw_gid);
if (rc < 0) {
exit_code = CRM_EX_FATAL;
g_set_error(error, PCMK__EXITC_ERROR, exit_code,
"Could not set group to %d: %s",
pwentry->pw_gid, strerror(errno));
return errno;
}
rc = initgroups(CRM_DAEMON_USER, pwentry->pw_gid);
if (rc < 0) {
exit_code = CRM_EX_FATAL;
g_set_error(error, PCMK__EXITC_ERROR, exit_code,
"Could not setup groups for user %d: %s",
pwentry->pw_uid, strerror(errno));
return errno;
}
rc = setuid(pwentry->pw_uid);
if (rc < 0) {
exit_code = CRM_EX_FATAL;
g_set_error(error, PCMK__EXITC_ERROR, exit_code,
"Could not set user to %d: %s",
pwentry->pw_uid, strerror(errno));
return errno;
}
return pcmk_rc_ok;
}
/* @COMPAT Deprecated since 2.1.8. Use pcmk_list_cluster_options() or
* crm_attribute --list-options=cluster instead of querying daemon metadata.
*/
static int
based_metadata(pcmk__output_t *out)
{
return pcmk__daemon_metadata(out, "pacemaker-based",
"Cluster Information Base manager options",
"Cluster options used by Pacemaker's Cluster "
"Information Base manager",
pcmk__opt_based);
}
static GOptionEntry entries[] = {
{ "stand-alone", 's', G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE, &stand_alone,
"(Advanced use only) Run in stand-alone mode", NULL },
{ "disk-writes", 'w', G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE,
&cib_writes_enabled,
"(Advanced use only) Enable disk writes (enabled by default unless in "
"stand-alone mode)", NULL },
{ "cib-root", 'r', G_OPTION_FLAG_NONE, G_OPTION_ARG_FILENAME, &cib_root,
"(Advanced use only) Directory where the CIB XML file should be located "
"(default: " CRM_CONFIG_DIR ")", NULL },
{ NULL }
};
static pcmk__supported_format_t formats[] = {
PCMK__SUPPORTED_FORMAT_NONE,
PCMK__SUPPORTED_FORMAT_TEXT,
PCMK__SUPPORTED_FORMAT_XML,
{ NULL, NULL, NULL }
};
static GOptionContext *
build_arg_context(pcmk__common_args_t *args, GOptionGroup **group)
{
GOptionContext *context = NULL;
context = pcmk__build_arg_context(args, "text (default), xml", group, NULL);
pcmk__add_main_args(context, entries);
return context;
}
int
main(int argc, char **argv)
{
int rc = pcmk_rc_ok;
crm_ipc_t *old_instance = NULL;
pcmk__output_t *out = NULL;
GError *error = NULL;
GOptionGroup *output_group = NULL;
pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY);
gchar **processed_args = pcmk__cmdline_preproc(argv, "r");
GOptionContext *context = build_arg_context(args, &output_group);
crm_log_preinit(NULL, argc, argv);
pcmk__register_formats(output_group, formats);
if (!g_option_context_parse_strv(context, &processed_args, &error)) {
exit_code = CRM_EX_USAGE;
goto done;
}
rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv);
if (rc != pcmk_rc_ok) {
exit_code = CRM_EX_ERROR;
g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
"Error creating output format %s: %s",
args->output_ty, pcmk_rc_str(rc));
goto done;
}
if (args->version) {
out->version(out, false);
goto done;
}
mainloop_add_signal(SIGTERM, cib_shutdown);
mainloop_add_signal(SIGPIPE, cib_enable_writes);
cib_writer = mainloop_add_trigger(G_PRIORITY_LOW, write_cib_contents, NULL);
if ((g_strv_length(processed_args) >= 2)
&& pcmk__str_eq(processed_args[1], "metadata", pcmk__str_none)) {
rc = based_metadata(out);
if (rc != pcmk_rc_ok) {
exit_code = CRM_EX_FATAL;
g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
"Unable to display metadata: %s", pcmk_rc_str(rc));
}
goto done;
}
pcmk__cli_init_logging("pacemaker-based", args->verbosity);
crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE);
crm_notice("Starting Pacemaker CIB manager");
old_instance = crm_ipc_new(PCMK__SERVER_BASED_RO, 0);
if (old_instance == NULL) {
/* crm_ipc_new() will have already logged an error message with
* crm_err()
*/
exit_code = CRM_EX_FATAL;
goto done;
}
if (pcmk__connect_generic_ipc(old_instance) == pcmk_rc_ok) {
/* IPC end-point already up */
crm_ipc_close(old_instance);
crm_ipc_destroy(old_instance);
crm_err("pacemaker-based is already active, aborting startup");
goto done;
} else {
/* not up or not authentic, we'll proceed either way */
crm_ipc_destroy(old_instance);
old_instance = NULL;
}
if (stand_alone) {
rc = setup_stand_alone(&error);
if (rc != pcmk_rc_ok) {
goto done;
}
}
if (cib_root == NULL) {
cib_root = g_strdup(CRM_CONFIG_DIR);
} else {
crm_notice("Using custom config location: %s", cib_root);
}
if (!pcmk__daemon_can_write(cib_root, NULL)) {
exit_code = CRM_EX_FATAL;
crm_err("Terminating due to bad permissions on %s", cib_root);
g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
"Bad permissions on %s (see logs for details)", cib_root);
goto done;
}
- crm_peer_init();
+ pcmk__cluster_init_node_caches();
// Read initial CIB, connect to cluster, and start IPC servers
cib_init();
// Run the main loop
mainloop = g_main_loop_new(NULL, FALSE);
crm_notice("Pacemaker CIB manager successfully started and accepting connections");
g_main_loop_run(mainloop);
/* If main loop returned, clean up and exit. We disconnect in case
* terminate_cib() was called with fast=-1.
*/
pcmk_cluster_disconnect(crm_cluster);
pcmk__stop_based_ipc(ipcs_ro, ipcs_rw, ipcs_shm);
done:
g_strfreev(processed_args);
pcmk__free_arg_context(context);
- crm_peer_destroy();
+ pcmk__cluster_destroy_node_caches();
if (local_notify_queue != NULL) {
g_hash_table_destroy(local_notify_queue);
}
if (config_hash != NULL) {
g_hash_table_destroy(config_hash);
}
pcmk__client_cleanup();
pcmk_cluster_free(crm_cluster);
g_free(cib_root);
pcmk__output_and_clear_error(&error, out);
if (out != NULL) {
out->finish(out, exit_code, true, NULL);
pcmk__output_free(out);
}
pcmk__unregister_formats();
crm_exit(exit_code);
}
#if SUPPORT_COROSYNC
static void
cib_cs_dispatch(cpg_handle_t handle,
const struct cpg_name *groupName,
uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len)
{
uint32_t kind = 0;
xmlNode *xml = NULL;
const char *from = NULL;
char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from);
if(data == NULL) {
return;
}
if (kind == crm_class_cluster) {
xml = pcmk__xml_parse(data);
if (xml == NULL) {
crm_err("Invalid XML: '%.120s'", data);
free(data);
return;
}
crm_xml_add(xml, PCMK__XA_SRC, from);
cib_peer_callback(xml, NULL);
}
free_xml(xml);
free(data);
}
static void
cib_cs_destroy(gpointer user_data)
{
if (cib_shutdown_flag) {
crm_info("Corosync disconnection complete");
} else {
crm_crit("Lost connection to cluster layer, shutting down");
terminate_cib(__func__, CRM_EX_DISCONNECT);
}
}
#endif
static void
cib_peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data)
{
switch (type) {
case crm_status_processes:
if (cib_legacy_mode()
&& !pcmk_is_set(node->processes, crm_get_cluster_proc())) {
uint32_t old = data? *(const uint32_t *)data : 0;
if ((node->processes ^ old) & crm_proc_cpg) {
crm_info("Attempting to disable legacy mode after %s left the cluster",
node->uname);
legacy_mode = FALSE;
}
}
break;
case crm_status_uname:
case crm_status_nstate:
if (cib_shutdown_flag && (pcmk__cluster_num_active_nodes() < 2)
&& (pcmk__ipc_client_count() == 0)) {
crm_info("No more peers");
terminate_cib(__func__, -1);
}
break;
}
}
static void
cib_init(void)
{
crm_cluster = pcmk_cluster_new();
#if SUPPORT_COROSYNC
if (pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync) {
pcmk_cluster_set_destroy_fn(crm_cluster, cib_cs_destroy);
pcmk_cpg_set_deliver_fn(crm_cluster, cib_cs_dispatch);
pcmk_cpg_set_confchg_fn(crm_cluster, pcmk_cpg_membership);
}
#endif // SUPPORT_COROSYNC
config_hash = pcmk__strkey_table(free, free);
if (startCib("cib.xml") == FALSE) {
crm_crit("Cannot start CIB... terminating");
crm_exit(CRM_EX_NOINPUT);
}
if (!stand_alone) {
crm_set_status_callback(&cib_peer_update_callback);
if (pcmk_cluster_connect(crm_cluster) != pcmk_rc_ok) {
crm_crit("Cannot sign in to the cluster... terminating");
crm_exit(CRM_EX_FATAL);
}
}
pcmk__serve_based_ipc(&ipcs_ro, &ipcs_rw, &ipcs_shm, &ipc_ro_callbacks,
&ipc_rw_callbacks);
if (stand_alone) {
based_is_primary = true;
}
}
static bool
startCib(const char *filename)
{
gboolean active = FALSE;
xmlNode *cib = readCibXmlFile(cib_root, filename, !preserve_status);
if (activateCibXml(cib, TRUE, "start") == 0) {
int port = 0;
active = TRUE;
cib_read_config(config_hash, cib);
pcmk__scan_port(crm_element_value(cib, PCMK_XA_REMOTE_TLS_PORT), &port);
if (port >= 0) {
remote_tls_fd = init_remote_listener(port, TRUE);
}
pcmk__scan_port(crm_element_value(cib, PCMK_XA_REMOTE_CLEAR_PORT),
&port);
if (port >= 0) {
remote_fd = init_remote_listener(port, FALSE);
}
}
return active;
}
diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c
index 7f9c1976ed..17fbe06e63 100644
--- a/daemons/controld/controld_control.c
+++ b/daemons/controld/controld_control.c
@@ -1,689 +1,689 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <crm/crm.h>
#include <crm/common/xml.h>
#include <crm/pengine/rules.h>
#include <crm/cluster/internal.h>
#include <crm/cluster/election_internal.h>
#include <crm/common/ipc_internal.h>
#include <pacemaker-controld.h>
static qb_ipcs_service_t *ipcs = NULL;
static crm_trigger_t *config_read_trigger = NULL;
#if SUPPORT_COROSYNC
extern gboolean crm_connect_corosync(pcmk_cluster_t *cluster);
#endif
static void crm_shutdown(int nsig);
static gboolean crm_read_options(gpointer user_data);
/* A_HA_CONNECT */
void
do_ha_control(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
gboolean registered = FALSE;
static pcmk_cluster_t *cluster = NULL;
if (cluster == NULL) {
cluster = pcmk_cluster_new();
}
if (action & A_HA_DISCONNECT) {
pcmk_cluster_disconnect(cluster);
crm_info("Disconnected from the cluster");
controld_set_fsa_input_flags(R_HA_DISCONNECTED);
}
if (action & A_HA_CONNECT) {
crm_set_status_callback(&peer_update_callback);
crm_set_autoreap(FALSE);
#if SUPPORT_COROSYNC
if (pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync) {
registered = crm_connect_corosync(cluster);
}
#endif // SUPPORT_COROSYNC
if (registered) {
controld_election_init(cluster->uname);
controld_globals.our_nodename = cluster->uname;
controld_globals.our_uuid = cluster->uuid;
if(cluster->uuid == NULL) {
crm_err("Could not obtain local uuid");
registered = FALSE;
}
}
if (!registered) {
controld_set_fsa_input_flags(R_HA_DISCONNECTED);
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
return;
}
populate_cib_nodes(node_update_none, __func__);
controld_clear_fsa_input_flags(R_HA_DISCONNECTED);
crm_info("Connected to the cluster");
}
if (action & ~(A_HA_CONNECT | A_HA_DISCONNECT)) {
crm_err("Unexpected action %s in %s", fsa_action2string(action),
__func__);
}
}
/* A_SHUTDOWN */
void
do_shutdown(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
/* just in case */
controld_set_fsa_input_flags(R_SHUTDOWN);
controld_disconnect_fencer(FALSE);
}
/* A_SHUTDOWN_REQ */
void
do_shutdown_req(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
xmlNode *msg = NULL;
controld_set_fsa_input_flags(R_SHUTDOWN);
//controld_set_fsa_input_flags(R_STAYDOWN);
crm_info("Sending shutdown request to all peers (DC is %s)",
pcmk__s(controld_globals.dc_name, "not set"));
msg = create_request(CRM_OP_SHUTDOWN_REQ, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
if (send_cluster_message(NULL, crm_msg_crmd, msg, TRUE) == FALSE) {
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
free_xml(msg);
}
void
crmd_fast_exit(crm_exit_t exit_code)
{
if (pcmk_is_set(controld_globals.fsa_input_register, R_STAYDOWN)) {
crm_warn("Inhibiting respawn "CRM_XS" remapping exit code %d to %d",
exit_code, CRM_EX_FATAL);
exit_code = CRM_EX_FATAL;
} else if ((exit_code == CRM_EX_OK)
&& pcmk_is_set(controld_globals.fsa_input_register,
R_IN_RECOVERY)) {
crm_err("Could not recover from internal error");
exit_code = CRM_EX_ERROR;
}
if (controld_globals.logger_out != NULL) {
controld_globals.logger_out->finish(controld_globals.logger_out,
exit_code, true, NULL);
pcmk__output_free(controld_globals.logger_out);
controld_globals.logger_out = NULL;
}
crm_exit(exit_code);
}
crm_exit_t
crmd_exit(crm_exit_t exit_code)
{
GMainLoop *mloop = controld_globals.mainloop;
static bool in_progress = FALSE;
if (in_progress && (exit_code == CRM_EX_OK)) {
crm_debug("Exit is already in progress");
return exit_code;
} else if(in_progress) {
crm_notice("Error during shutdown process, exiting now with status %d (%s)",
exit_code, crm_exit_str(exit_code));
crm_write_blackbox(SIGTRAP, NULL);
crmd_fast_exit(exit_code);
}
in_progress = TRUE;
crm_trace("Preparing to exit with status %d (%s)",
exit_code, crm_exit_str(exit_code));
/* Suppress secondary errors resulting from us disconnecting everything */
controld_set_fsa_input_flags(R_HA_DISCONNECTED);
/* Close all IPC servers and clients to ensure any and all shared memory files are cleaned up */
if(ipcs) {
crm_trace("Closing IPC server");
mainloop_del_ipc_server(ipcs);
ipcs = NULL;
}
controld_close_attrd_ipc();
controld_shutdown_schedulerd_ipc();
controld_disconnect_fencer(TRUE);
if ((exit_code == CRM_EX_OK) && (controld_globals.mainloop == NULL)) {
crm_debug("No mainloop detected");
exit_code = CRM_EX_ERROR;
}
/* On an error, just get out.
*
* Otherwise, make the effort to have mainloop exit gracefully so
* that it (mostly) cleans up after itself and valgrind has less
* to report on - allowing real errors stand out
*/
if (exit_code != CRM_EX_OK) {
crm_notice("Forcing immediate exit with status %d (%s)",
exit_code, crm_exit_str(exit_code));
crm_write_blackbox(SIGTRAP, NULL);
crmd_fast_exit(exit_code);
}
/* Clean up as much memory as possible for valgrind */
for (GList *iter = controld_globals.fsa_message_queue; iter != NULL;
iter = iter->next) {
fsa_data_t *fsa_data = (fsa_data_t *) iter->data;
crm_info("Dropping %s: [ state=%s cause=%s origin=%s ]",
fsa_input2string(fsa_data->fsa_input),
fsa_state2string(controld_globals.fsa_state),
fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin);
delete_fsa_input(fsa_data);
}
controld_clear_fsa_input_flags(R_MEMBERSHIP);
g_list_free(controld_globals.fsa_message_queue);
controld_globals.fsa_message_queue = NULL;
controld_free_node_pending_timers();
controld_election_fini();
/* Tear down the CIB manager connection, but don't free it yet -- it could
* be used when we drain the mainloop later.
*/
controld_disconnect_cib_manager();
verify_stopped(controld_globals.fsa_state, LOG_WARNING);
controld_clear_fsa_input_flags(R_LRM_CONNECTED);
lrm_state_destroy_all();
mainloop_destroy_trigger(config_read_trigger);
config_read_trigger = NULL;
controld_destroy_fsa_trigger();
controld_destroy_transition_trigger();
pcmk__client_cleanup();
- crm_peer_destroy();
+ pcmk__cluster_destroy_node_caches();
controld_free_fsa_timers();
te_cleanup_stonith_history_sync(NULL, TRUE);
controld_free_sched_timer();
free(controld_globals.our_nodename);
controld_globals.our_nodename = NULL;
free(controld_globals.our_uuid);
controld_globals.our_uuid = NULL;
free(controld_globals.dc_name);
controld_globals.dc_name = NULL;
free(controld_globals.dc_version);
controld_globals.dc_version = NULL;
free(controld_globals.cluster_name);
controld_globals.cluster_name = NULL;
free(controld_globals.te_uuid);
controld_globals.te_uuid = NULL;
free_max_generation();
controld_destroy_failed_sync_table();
controld_destroy_outside_events_table();
mainloop_destroy_signal(SIGPIPE);
mainloop_destroy_signal(SIGUSR1);
mainloop_destroy_signal(SIGTERM);
mainloop_destroy_signal(SIGTRAP);
/* leave SIGCHLD engaged as we might still want to drain some service-actions */
if (mloop) {
GMainContext *ctx = g_main_loop_get_context(controld_globals.mainloop);
/* Don't re-enter this block */
controld_globals.mainloop = NULL;
/* no signals on final draining anymore */
mainloop_destroy_signal(SIGCHLD);
crm_trace("Draining mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx));
{
int lpc = 0;
while((g_main_context_pending(ctx) && lpc < 10)) {
lpc++;
crm_trace("Iteration %d", lpc);
g_main_context_dispatch(ctx);
}
}
crm_trace("Closing mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx));
g_main_loop_quit(mloop);
/* Won't do anything yet, since we're inside it now */
g_main_loop_unref(mloop);
} else {
mainloop_destroy_signal(SIGCHLD);
}
cib_delete(controld_globals.cib_conn);
controld_globals.cib_conn = NULL;
throttle_fini();
/* Graceful */
crm_trace("Done preparing for exit with status %d (%s)",
exit_code, crm_exit_str(exit_code));
return exit_code;
}
/* A_EXIT_0, A_EXIT_1 */
void
do_exit(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
crm_exit_t exit_code = CRM_EX_OK;
if (pcmk_is_set(action, A_EXIT_1)) {
exit_code = CRM_EX_ERROR;
crm_err("Exiting now due to errors");
}
verify_stopped(cur_state, LOG_ERR);
crmd_exit(exit_code);
}
static void sigpipe_ignore(int nsig) { return; }
/* A_STARTUP */
void
do_startup(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
crm_debug("Registering Signal Handlers");
mainloop_add_signal(SIGTERM, crm_shutdown);
mainloop_add_signal(SIGPIPE, sigpipe_ignore);
config_read_trigger = mainloop_add_trigger(G_PRIORITY_HIGH,
crm_read_options, NULL);
controld_init_fsa_trigger();
controld_init_transition_trigger();
crm_debug("Creating CIB manager and executor objects");
controld_globals.cib_conn = cib_new();
lrm_state_init_local();
if (controld_init_fsa_timers() == FALSE) {
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
}
// \return libqb error code (0 on success, -errno on error)
static int32_t
accept_controller_client(qb_ipcs_connection_t *c, uid_t uid, gid_t gid)
{
crm_trace("Accepting new IPC client connection");
if (pcmk__new_client(c, uid, gid) == NULL) {
return -ENOMEM;
}
return 0;
}
// \return libqb error code (0 on success, -errno on error)
static int32_t
dispatch_controller_ipc(qb_ipcs_connection_t * c, void *data, size_t size)
{
uint32_t id = 0;
uint32_t flags = 0;
pcmk__client_t *client = pcmk__find_client(c);
xmlNode *msg = pcmk__client_data2xml(client, data, &id, &flags);
if (msg == NULL) {
pcmk__ipc_send_ack(client, id, flags, PCMK__XE_ACK, NULL,
CRM_EX_PROTOCOL);
return 0;
}
pcmk__ipc_send_ack(client, id, flags, PCMK__XE_ACK, NULL,
CRM_EX_INDETERMINATE);
CRM_ASSERT(client->user != NULL);
pcmk__update_acl_user(msg, PCMK__XA_CRM_USER, client->user);
crm_xml_add(msg, PCMK__XA_CRM_SYS_FROM, client->id);
if (controld_authorize_ipc_message(msg, client, NULL)) {
crm_trace("Processing IPC message from client %s",
pcmk__client_name(client));
route_message(C_IPC_MESSAGE, msg);
}
controld_trigger_fsa();
free_xml(msg);
return 0;
}
static int32_t
ipc_client_disconnected(qb_ipcs_connection_t *c)
{
pcmk__client_t *client = pcmk__find_client(c);
if (client) {
crm_trace("Disconnecting %sregistered client %s (%p/%p)",
(client->userdata? "" : "un"), pcmk__client_name(client),
c, client);
free(client->userdata);
pcmk__free_client(client);
controld_trigger_fsa();
}
return 0;
}
static void
ipc_connection_destroyed(qb_ipcs_connection_t *c)
{
crm_trace("Connection %p", c);
ipc_client_disconnected(c);
}
/* A_STOP */
void
do_stop(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
crm_trace("Closing IPC server");
mainloop_del_ipc_server(ipcs); ipcs = NULL;
register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL);
}
/* A_STARTED */
void
do_started(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
static struct qb_ipcs_service_handlers crmd_callbacks = {
.connection_accept = accept_controller_client,
.connection_created = NULL,
.msg_process = dispatch_controller_ipc,
.connection_closed = ipc_client_disconnected,
.connection_destroyed = ipc_connection_destroyed
};
if (cur_state != S_STARTING) {
crm_err("Start cancelled... %s", fsa_state2string(cur_state));
return;
} else if (!pcmk_is_set(controld_globals.fsa_input_register,
R_MEMBERSHIP)) {
crm_info("Delaying start, no membership data (%.16llx)", R_MEMBERSHIP);
crmd_fsa_stall(TRUE);
return;
} else if (!pcmk_is_set(controld_globals.fsa_input_register,
R_LRM_CONNECTED)) {
crm_info("Delaying start, not connected to executor (%.16llx)", R_LRM_CONNECTED);
crmd_fsa_stall(TRUE);
return;
} else if (!pcmk_is_set(controld_globals.fsa_input_register,
R_CIB_CONNECTED)) {
crm_info("Delaying start, CIB not connected (%.16llx)", R_CIB_CONNECTED);
crmd_fsa_stall(TRUE);
return;
} else if (!pcmk_is_set(controld_globals.fsa_input_register,
R_READ_CONFIG)) {
crm_info("Delaying start, Config not read (%.16llx)", R_READ_CONFIG);
crmd_fsa_stall(TRUE);
return;
} else if (!pcmk_is_set(controld_globals.fsa_input_register, R_PEER_DATA)) {
crm_info("Delaying start, No peer data (%.16llx)", R_PEER_DATA);
crmd_fsa_stall(TRUE);
return;
}
crm_debug("Init server comms");
ipcs = pcmk__serve_controld_ipc(&crmd_callbacks);
if (ipcs == NULL) {
crm_err("Failed to create IPC server: shutting down and inhibiting respawn");
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
} else {
crm_notice("Pacemaker controller successfully started and accepting connections");
}
controld_set_fsa_input_flags(R_ST_REQUIRED);
controld_timer_fencer_connect(GINT_TO_POINTER(TRUE));
controld_clear_fsa_input_flags(R_STARTING);
register_fsa_input(msg_data->fsa_cause, I_PENDING, NULL);
}
/* A_RECOVER */
void
do_recover(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
controld_set_fsa_input_flags(R_IN_RECOVERY);
crm_warn("Fast-tracking shutdown in response to errors");
register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL);
}
static void
config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
const char *value = NULL;
GHashTable *config_hash = NULL;
crm_time_t *now = crm_time_new(NULL);
xmlNode *crmconfig = NULL;
xmlNode *alerts = NULL;
if (rc != pcmk_ok) {
fsa_data_t *msg_data = NULL;
crm_err("Local CIB query resulted in an error: %s", pcmk_strerror(rc));
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
if (rc == -EACCES || rc == -pcmk_err_schema_validation) {
crm_err("The cluster is mis-configured - shutting down and staying down");
controld_set_fsa_input_flags(R_STAYDOWN);
}
goto bail;
}
crmconfig = output;
if ((crmconfig != NULL) && !pcmk__xe_is(crmconfig, PCMK_XE_CRM_CONFIG)) {
crmconfig = pcmk__xe_first_child(crmconfig, PCMK_XE_CRM_CONFIG, NULL,
NULL);
}
if (!crmconfig) {
fsa_data_t *msg_data = NULL;
crm_err("Local CIB query for " PCMK_XE_CRM_CONFIG " section failed");
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
goto bail;
}
crm_debug("Call %d : Parsing CIB options", call_id);
config_hash = pcmk__strkey_table(free, free);
pe_unpack_nvpairs(crmconfig, crmconfig, PCMK_XE_CLUSTER_PROPERTY_SET, NULL,
config_hash, PCMK_VALUE_CIB_BOOTSTRAP_OPTIONS, FALSE, now,
NULL);
// Validate all options, and use defaults if not already present in hash
pcmk__validate_cluster_options(config_hash);
/* Validate the watchdog timeout in the context of the local node
* environment. If invalid, the controller will exit with a fatal error.
*
* We do this via a wrapper in the controller, so that we call
* pcmk__valid_stonith_watchdog_timeout() only if watchdog fencing is
* enabled for the local node. Otherwise, we may exit unnecessarily.
*
* A validator function in libcrmcommon can't act as such a wrapper, because
* it doesn't have a stonith API connection or the local node name.
*/
value = g_hash_table_lookup(config_hash, PCMK_OPT_STONITH_WATCHDOG_TIMEOUT);
controld_verify_stonith_watchdog_timeout(value);
value = g_hash_table_lookup(config_hash, PCMK_OPT_NO_QUORUM_POLICY);
if (pcmk__str_eq(value, PCMK_VALUE_FENCE_LEGACY, pcmk__str_casei)
&& (pcmk__locate_sbd() != 0)) {
controld_set_global_flags(controld_no_quorum_suicide);
}
value = g_hash_table_lookup(config_hash, PCMK_OPT_SHUTDOWN_LOCK);
if (crm_is_true(value)) {
controld_set_global_flags(controld_shutdown_lock_enabled);
} else {
controld_clear_global_flags(controld_shutdown_lock_enabled);
}
value = g_hash_table_lookup(config_hash, PCMK_OPT_SHUTDOWN_LOCK_LIMIT);
pcmk_parse_interval_spec(value, &controld_globals.shutdown_lock_limit);
controld_globals.shutdown_lock_limit /= 1000;
value = g_hash_table_lookup(config_hash, PCMK_OPT_NODE_PENDING_TIMEOUT);
pcmk_parse_interval_spec(value, &controld_globals.node_pending_timeout);
controld_globals.node_pending_timeout /= 1000;
value = g_hash_table_lookup(config_hash, PCMK_OPT_CLUSTER_NAME);
pcmk__str_update(&(controld_globals.cluster_name), value);
// Let subcomponents initialize their own static variables
controld_configure_election(config_hash);
controld_configure_fencing(config_hash);
controld_configure_fsa_timers(config_hash);
controld_configure_throttle(config_hash);
alerts = pcmk__xe_first_child(output, PCMK_XE_ALERTS, NULL, NULL);
crmd_unpack_alerts(alerts);
controld_set_fsa_input_flags(R_READ_CONFIG);
controld_trigger_fsa();
g_hash_table_destroy(config_hash);
bail:
crm_time_free(now);
}
/*!
* \internal
* \brief Trigger read and processing of the configuration
*
* \param[in] fn Calling function name
* \param[in] line Line number where call occurred
*/
void
controld_trigger_config_as(const char *fn, int line)
{
if (config_read_trigger != NULL) {
crm_trace("%s:%d - Triggered config processing", fn, line);
mainloop_set_trigger(config_read_trigger);
}
}
gboolean
crm_read_options(gpointer user_data)
{
cib_t *cib_conn = controld_globals.cib_conn;
int call_id = cib_conn->cmds->query(cib_conn,
"//" PCMK_XE_CRM_CONFIG
" | //" PCMK_XE_ALERTS,
NULL, cib_xpath|cib_scope_local);
fsa_register_cib_callback(call_id, NULL, config_query_callback);
crm_trace("Querying the CIB... call %d", call_id);
return TRUE;
}
/* A_READCONFIG */
void
do_read_config(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
throttle_init();
controld_trigger_config();
}
static void
crm_shutdown(int nsig)
{
const char *value = NULL;
guint default_period_ms = 0;
if ((controld_globals.mainloop == NULL)
|| !g_main_loop_is_running(controld_globals.mainloop)) {
crmd_exit(CRM_EX_OK);
return;
}
if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
crm_err("Escalating shutdown");
register_fsa_input_before(C_SHUTDOWN, I_ERROR, NULL);
return;
}
controld_set_fsa_input_flags(R_SHUTDOWN);
register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL);
/* If shutdown timer doesn't have a period set, use the default
*
* @TODO: Evaluate whether this is still necessary. As long as
* config_query_callback() has been run at least once, it doesn't look like
* anything could have changed the timer period since then.
*/
value = pcmk__cluster_option(NULL, PCMK_OPT_SHUTDOWN_ESCALATION);
pcmk_parse_interval_spec(value, &default_period_ms);
controld_shutdown_start_countdown(default_period_ms);
}
diff --git a/daemons/controld/controld_corosync.c b/daemons/controld/controld_corosync.c
index ef6142a1b9..b792a7d1d4 100644
--- a/daemons/controld/controld_corosync.c
+++ b/daemons/controld/controld_corosync.c
@@ -1,162 +1,162 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <crm/crm.h>
#include <crm/cluster/internal.h>
#include <crm/common/xml.h>
#include <pacemaker-controld.h>
#if SUPPORT_COROSYNC
extern void post_cache_update(int seq);
/* A_HA_CONNECT */
static void
crmd_cs_dispatch(cpg_handle_t handle, const struct cpg_name *groupName,
uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len)
{
uint32_t kind = 0;
const char *from = NULL;
char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from);
if(data == NULL) {
return;
}
if (kind == crm_class_cluster) {
crm_node_t *peer = NULL;
xmlNode *xml = pcmk__xml_parse(data);
if (xml == NULL) {
crm_err("Could not parse message content (%d): %.100s", kind, data);
free(data);
return;
}
crm_xml_add(xml, PCMK__XA_SRC, from);
- peer = pcmk__get_node(0, from, NULL, pcmk__node_search_cluster);
+ peer = pcmk__get_node(0, from, NULL, pcmk__node_search_cluster_member);
if (!pcmk_is_set(peer->processes, crm_proc_cpg)) {
/* If we can still talk to our peer process on that node,
* then it must be part of the corosync membership
*/
crm_warn("Receiving messages from a node we think is dead: %s[%d]",
peer->uname, peer->id);
crm_update_peer_proc(__func__, peer, crm_proc_cpg,
PCMK_VALUE_ONLINE);
}
crmd_ha_msg_filter(xml);
free_xml(xml);
} else {
crm_err("Invalid message class (%d): %.100s", kind, data);
}
free(data);
}
static gboolean
crmd_quorum_callback(unsigned long long seq, gboolean quorate)
{
crm_update_quorum(quorate, FALSE);
post_cache_update(seq);
return TRUE;
}
static void
crmd_cs_destroy(gpointer user_data)
{
if (!pcmk_is_set(controld_globals.fsa_input_register, R_HA_DISCONNECTED)) {
crm_crit("Lost connection to cluster layer, shutting down");
crmd_exit(CRM_EX_DISCONNECT);
}
}
/*!
* \brief Handle a Corosync notification of a CPG configuration change
*
* \param[in] handle CPG connection
* \param[in] cpg_name CPG group name
* \param[in] member_list List of current CPG members
* \param[in] member_list_entries Number of entries in \p member_list
* \param[in] left_list List of CPG members that left
* \param[in] left_list_entries Number of entries in \p left_list
* \param[in] joined_list List of CPG members that joined
* \param[in] joined_list_entries Number of entries in \p joined_list
*/
static void
cpg_membership_callback(cpg_handle_t handle, const struct cpg_name *cpg_name,
const struct cpg_address *member_list,
size_t member_list_entries,
const struct cpg_address *left_list,
size_t left_list_entries,
const struct cpg_address *joined_list,
size_t joined_list_entries)
{
/* When nodes leave CPG, the DC clears their transient node attributes.
*
* However if there is no DC, or the DC is among the nodes that left, each
* remaining node needs to do the clearing, to ensure it gets done.
* Otherwise, the attributes would persist when the nodes rejoin, which
* could have serious consequences for unfencing, agents that use attributes
* for internal logic, etc.
*
* Here, we set a global boolean if the DC is among the nodes that left, for
* use by the peer callback.
*/
if (controld_globals.dc_name != NULL) {
crm_node_t *peer = NULL;
peer = pcmk__search_node_caches(0, controld_globals.dc_name,
- pcmk__node_search_cluster);
+ pcmk__node_search_cluster_member);
if (peer != NULL) {
for (int i = 0; i < left_list_entries; ++i) {
if (left_list[i].nodeid == peer->id) {
controld_set_global_flags(controld_dc_left);
break;
}
}
}
}
// Process the change normally, which will call the peer callback as needed
pcmk_cpg_membership(handle, cpg_name, member_list, member_list_entries,
left_list, left_list_entries,
joined_list, joined_list_entries);
controld_clear_global_flags(controld_dc_left);
}
extern gboolean crm_connect_corosync(pcmk_cluster_t *cluster);
gboolean
crm_connect_corosync(pcmk_cluster_t *cluster)
{
if (pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync) {
crm_set_status_callback(&peer_update_callback);
pcmk_cluster_set_destroy_fn(cluster, crmd_cs_destroy);
pcmk_cpg_set_deliver_fn(cluster, crmd_cs_dispatch);
pcmk_cpg_set_confchg_fn(cluster, cpg_membership_callback);
if (pcmk_cluster_connect(cluster) == pcmk_rc_ok) {
pcmk__corosync_quorum_connect(crmd_quorum_callback,
crmd_cs_destroy);
return TRUE;
}
}
return FALSE;
}
#endif
diff --git a/daemons/controld/controld_election.c b/daemons/controld/controld_election.c
index d924a9efac..1d4ff25d0f 100644
--- a/daemons/controld/controld_election.c
+++ b/daemons/controld/controld_election.c
@@ -1,289 +1,290 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <crm/common/xml.h>
#include <crm/cluster/internal.h>
#include <crm/cluster/election_internal.h>
#include <crm/crm.h>
#include <pacemaker-controld.h>
static election_t *fsa_election = NULL;
static gboolean
election_win_cb(gpointer data)
{
register_fsa_input(C_FSA_INTERNAL, I_ELECTION_DC, NULL);
return FALSE;
}
void
controld_election_init(const char *uname)
{
fsa_election = election_init("DC", uname, 60000 /*60s*/, election_win_cb);
}
/*!
* \internal
* \brief Configure election options based on the CIB
*
* \param[in,out] options Name/value pairs for configured options
*/
void
controld_configure_election(GHashTable *options)
{
const char *value = g_hash_table_lookup(options, PCMK_OPT_ELECTION_TIMEOUT);
guint interval_ms = 0U;
pcmk_parse_interval_spec(value, &interval_ms);
election_timeout_set_period(fsa_election, interval_ms);
}
void
controld_remove_voter(const char *uname)
{
election_remove(fsa_election, uname);
if (pcmk__str_eq(uname, controld_globals.dc_name, pcmk__str_casei)) {
/* Clear any election dampening in effect. Otherwise, if the lost DC had
* just won, an immediate new election could fizzle out with no new DC.
*/
election_clear_dampening(fsa_election);
}
}
void
controld_election_fini(void)
{
election_fini(fsa_election);
fsa_election = NULL;
}
void
controld_stop_current_election_timeout(void)
{
election_timeout_stop(fsa_election);
}
/* A_ELECTION_VOTE */
void
do_election_vote(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
gboolean not_voting = FALSE;
/* don't vote if we're in one of these states or wanting to shut down */
switch (cur_state) {
case S_STARTING:
case S_RECOVERY:
case S_STOPPING:
case S_TERMINATE:
crm_warn("Not voting in election, we're in state %s", fsa_state2string(cur_state));
not_voting = TRUE;
break;
case S_ELECTION:
case S_INTEGRATION:
case S_RELEASE_DC:
break;
default:
crm_err("Broken? Voting in state %s", fsa_state2string(cur_state));
break;
}
if (not_voting == FALSE) {
if (pcmk_is_set(controld_globals.fsa_input_register, R_STARTING)) {
not_voting = TRUE;
}
}
if (not_voting) {
if (AM_I_DC) {
register_fsa_input(C_FSA_INTERNAL, I_RELEASE_DC, NULL);
} else {
register_fsa_input(C_FSA_INTERNAL, I_PENDING, NULL);
}
return;
}
election_vote(fsa_election);
return;
}
void
do_election_check(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
if (controld_globals.fsa_state == S_ELECTION) {
election_check(fsa_election);
} else {
crm_debug("Ignoring election check because we are not in an election");
}
}
/* A_ELECTION_COUNT */
void
do_election_count_vote(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
enum election_result rc = 0;
ha_msg_input_t *vote = fsa_typed_data(fsa_dt_ha_msg);
if(crm_peer_cache == NULL) {
if (!pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
crm_err("Internal error, no peer cache");
}
return;
}
rc = election_count_vote(fsa_election, vote->msg, cur_state != S_STARTING);
switch(rc) {
case election_start:
election_reset(fsa_election);
register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL);
break;
case election_lost:
update_dc(NULL);
if (pcmk_is_set(controld_globals.fsa_input_register, R_THE_DC)) {
cib_t *cib_conn = controld_globals.cib_conn;
register_fsa_input(C_FSA_INTERNAL, I_RELEASE_DC, NULL);
cib_conn->cmds->set_secondary(cib_conn, cib_scope_local);
} else if (cur_state != S_STARTING) {
register_fsa_input(C_FSA_INTERNAL, I_PENDING, NULL);
}
break;
default:
crm_trace("Election message resulted in state %d", rc);
}
}
static void
feature_update_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
if (rc != pcmk_ok) {
fsa_data_t *msg_data = NULL;
crm_notice("Feature update failed: %s "CRM_XS" rc=%d",
pcmk_strerror(rc), rc);
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
}
/*!
* \internal
* \brief Update a node attribute in the CIB during a DC takeover
*
* \param[in] name Name of attribute to update
* \param[in] value New attribute value
*/
#define dc_takeover_update_attr(name, value) do { \
cib__update_node_attr(controld_globals.logger_out, \
controld_globals.cib_conn, cib_none, \
PCMK_XE_CRM_CONFIG, NULL, NULL, NULL, NULL, \
name, value, NULL, NULL); \
} while (0)
/* A_DC_TAKEOVER */
void
do_dc_takeover(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
xmlNode *cib = NULL;
const enum pcmk_cluster_layer cluster_layer = pcmk_get_cluster_layer();
const char *cluster_layer_s = pcmk_cluster_layer_text(cluster_layer);
pid_t watchdog = pcmk__locate_sbd();
crm_info("Taking over DC status for this partition");
controld_set_fsa_input_flags(R_THE_DC);
execute_stonith_cleanup();
election_reset(fsa_election);
controld_set_fsa_input_flags(R_JOIN_OK|R_INVOKE_PE);
controld_globals.cib_conn->cmds->set_primary(controld_globals.cib_conn,
cib_scope_local);
cib = pcmk__xe_create(NULL, PCMK_XE_CIB);
crm_xml_add(cib, PCMK_XA_CRM_FEATURE_SET, CRM_FEATURE_SET);
controld_update_cib(PCMK_XE_CIB, cib, cib_none, feature_update_callback);
dc_takeover_update_attr(PCMK_OPT_HAVE_WATCHDOG, pcmk__btoa(watchdog));
dc_takeover_update_attr(PCMK_OPT_DC_VERSION,
PACEMAKER_VERSION "-" BUILD_VERSION);
dc_takeover_update_attr(PCMK_OPT_CLUSTER_INFRASTRUCTURE, cluster_layer_s);
#if SUPPORT_COROSYNC
if ((controld_globals.cluster_name == NULL)
&& (pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync)) {
char *cluster_name = pcmk__corosync_cluster_name();
if (cluster_name != NULL) {
dc_takeover_update_attr(PCMK_OPT_CLUSTER_NAME, cluster_name);
}
free(cluster_name);
}
#endif
controld_trigger_config();
free_xml(cib);
}
/* A_DC_RELEASE */
void
do_dc_release(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
if (action & A_DC_RELEASE) {
crm_debug("Releasing the role of DC");
controld_clear_fsa_input_flags(R_THE_DC);
controld_expect_sched_reply(NULL);
} else if (action & A_DC_RELEASED) {
crm_info("DC role released");
if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
xmlNode *update = NULL;
- crm_node_t *node = pcmk__get_node(0, controld_globals.our_nodename,
- NULL, pcmk__node_search_cluster);
+ crm_node_t *node =
+ pcmk__get_node(0, controld_globals.our_nodename,
+ NULL, pcmk__node_search_cluster_member);
pcmk__update_peer_expected(__func__, node, CRMD_JOINSTATE_DOWN);
update = create_node_state_update(node, node_update_expected, NULL,
__func__);
/* Don't need a based response because controld will stop. */
fsa_cib_anon_update_discard_reply(PCMK_XE_STATUS, update);
free_xml(update);
}
register_fsa_input(C_FSA_INTERNAL, I_RELEASE_SUCCESS, NULL);
} else {
crm_err("Unknown DC action %s", fsa_action2string(action));
}
crm_trace("Am I still the DC? %s", pcmk__btoa(AM_I_DC));
}
diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
index 58ecad0676..917c8c0b4d 100644
--- a/daemons/controld/controld_execd.c
+++ b/daemons/controld/controld_execd.c
@@ -1,2454 +1,2454 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <regex.h>
#include <sys/param.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <crm/crm.h>
#include <crm/lrmd.h> // lrmd_event_data_t, lrmd_rsc_info_t, etc.
#include <crm/services.h>
#include <crm/common/xml.h>
#include <crm/pengine/rules.h>
#include <crm/lrmd_internal.h>
#include <pacemaker-internal.h>
#include <pacemaker-controld.h>
#define START_DELAY_THRESHOLD 5 * 60 * 1000
#define MAX_LRM_REG_FAILS 30
struct delete_event_s {
int rc;
const char *rsc;
lrm_state_t *lrm_state;
};
static gboolean is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id);
static gboolean build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list);
static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer user_data);
static lrmd_event_data_t *construct_op(const lrm_state_t *lrm_state,
const xmlNode *rsc_op,
const char *rsc_id,
const char *operation);
static void do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc,
xmlNode *msg, struct ra_metadata_s *md);
static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state,
int log_level);
static void
lrm_connection_destroy(void)
{
if (pcmk_is_set(controld_globals.fsa_input_register, R_LRM_CONNECTED)) {
crm_crit("Lost connection to local executor");
register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL);
controld_clear_fsa_input_flags(R_LRM_CONNECTED);
}
}
static char *
make_stop_id(const char *rsc, int call_id)
{
return crm_strdup_printf("%s:%d", rsc, call_id);
}
static void
copy_instance_keys(gpointer key, gpointer value, gpointer user_data)
{
if (strstr(key, CRM_META "_") == NULL) {
pcmk__insert_dup(user_data, (const char *) key, (const char *) value);
}
}
static void
copy_meta_keys(gpointer key, gpointer value, gpointer user_data)
{
if (strstr(key, CRM_META "_") != NULL) {
pcmk__insert_dup(user_data, (const char *) key, (const char *) value);
}
}
/*!
* \internal
* \brief Remove a recurring operation from a resource's history
*
* \param[in,out] history Resource history to modify
* \param[in] op Operation to remove
*
* \return TRUE if the operation was found and removed, FALSE otherwise
*/
static gboolean
history_remove_recurring_op(rsc_history_t *history, const lrmd_event_data_t *op)
{
GList *iter;
for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) {
lrmd_event_data_t *existing = iter->data;
if ((op->interval_ms == existing->interval_ms)
&& pcmk__str_eq(op->rsc_id, existing->rsc_id, pcmk__str_none)
&& pcmk__str_eq(op->op_type, existing->op_type, pcmk__str_casei)) {
history->recurring_op_list = g_list_delete_link(history->recurring_op_list, iter);
lrmd_free_event(existing);
return TRUE;
}
}
return FALSE;
}
/*!
* \internal
* \brief Free all recurring operations in resource history
*
* \param[in,out] history Resource history to modify
*/
static void
history_free_recurring_ops(rsc_history_t *history)
{
GList *iter;
for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) {
lrmd_free_event(iter->data);
}
g_list_free(history->recurring_op_list);
history->recurring_op_list = NULL;
}
/*!
* \internal
* \brief Free resource history
*
* \param[in,out] history Resource history to free
*/
void
history_free(gpointer data)
{
rsc_history_t *history = (rsc_history_t*)data;
if (history->stop_params) {
g_hash_table_destroy(history->stop_params);
}
/* Don't need to free history->rsc.id because it's set to history->id */
free(history->rsc.type);
free(history->rsc.standard);
free(history->rsc.provider);
lrmd_free_event(history->failed);
lrmd_free_event(history->last);
free(history->id);
history_free_recurring_ops(history);
free(history);
}
static void
update_history_cache(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op)
{
int target_rc = 0;
rsc_history_t *entry = NULL;
if (op->rsc_deleted) {
crm_debug("Purged history for '%s' after %s", op->rsc_id, op->op_type);
controld_delete_resource_history(op->rsc_id, lrm_state->node_name,
NULL, crmd_cib_smart_opt());
return;
}
if (pcmk__str_eq(op->op_type, PCMK_ACTION_NOTIFY, pcmk__str_casei)) {
return;
}
crm_debug("Updating history for '%s' with %s op", op->rsc_id, op->op_type);
entry = g_hash_table_lookup(lrm_state->resource_history, op->rsc_id);
if (entry == NULL && rsc) {
entry = pcmk__assert_alloc(1, sizeof(rsc_history_t));
entry->id = pcmk__str_copy(op->rsc_id);
g_hash_table_insert(lrm_state->resource_history, entry->id, entry);
entry->rsc.id = entry->id;
entry->rsc.type = pcmk__str_copy(rsc->type);
entry->rsc.standard = pcmk__str_copy(rsc->standard);
entry->rsc.provider = pcmk__str_copy(rsc->provider);
} else if (entry == NULL) {
crm_info("Resource %s no longer exists, not updating cache", op->rsc_id);
return;
}
entry->last_callid = op->call_id;
target_rc = rsc_op_expected_rc(op);
if (op->op_status == PCMK_EXEC_CANCELLED) {
if (op->interval_ms > 0) {
crm_trace("Removing cancelled recurring op: " PCMK__OP_FMT,
op->rsc_id, op->op_type, op->interval_ms);
history_remove_recurring_op(entry, op);
return;
} else {
crm_trace("Skipping " PCMK__OP_FMT " rc=%d, status=%d",
op->rsc_id, op->op_type, op->interval_ms, op->rc,
op->op_status);
}
} else if (did_rsc_op_fail(op, target_rc)) {
/* Store failed monitors here, otherwise the block below will cause them
* to be forgotten when a stop happens.
*/
if (entry->failed) {
lrmd_free_event(entry->failed);
}
entry->failed = lrmd_copy_event(op);
} else if (op->interval_ms == 0) {
if (entry->last) {
lrmd_free_event(entry->last);
}
entry->last = lrmd_copy_event(op);
if (op->params && pcmk__strcase_any_of(op->op_type, PCMK_ACTION_START,
PCMK_ACTION_RELOAD,
PCMK_ACTION_RELOAD_AGENT,
PCMK_ACTION_MONITOR, NULL)) {
if (entry->stop_params) {
g_hash_table_destroy(entry->stop_params);
}
entry->stop_params = pcmk__strkey_table(free, free);
g_hash_table_foreach(op->params, copy_instance_keys, entry->stop_params);
}
}
if (op->interval_ms > 0) {
/* Ensure there are no duplicates */
history_remove_recurring_op(entry, op);
crm_trace("Adding recurring op: " PCMK__OP_FMT,
op->rsc_id, op->op_type, op->interval_ms);
entry->recurring_op_list = g_list_prepend(entry->recurring_op_list, lrmd_copy_event(op));
} else if ((entry->recurring_op_list != NULL)
&& !pcmk__str_eq(op->op_type, PCMK_ACTION_MONITOR,
pcmk__str_casei)) {
crm_trace("Dropping %d recurring ops because of: " PCMK__OP_FMT,
g_list_length(entry->recurring_op_list), op->rsc_id,
op->op_type, op->interval_ms);
history_free_recurring_ops(entry);
}
}
/*!
* \internal
* \brief Send a direct OK ack for a resource task
*
* \param[in] lrm_state LRM connection
* \param[in] input Input message being ack'ed
* \param[in] rsc_id ID of affected resource
* \param[in] rsc Affected resource (if available)
* \param[in] task Operation task being ack'ed
* \param[in] ack_host Name of host to send ack to
* \param[in] ack_sys IPC system name to ack
*/
static void
send_task_ok_ack(const lrm_state_t *lrm_state, const ha_msg_input_t *input,
const char *rsc_id, const lrmd_rsc_info_t *rsc,
const char *task, const char *ack_host, const char *ack_sys)
{
lrmd_event_data_t *op = construct_op(lrm_state, input->xml, rsc_id, task);
lrmd__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
controld_ack_event_directly(ack_host, ack_sys, rsc, op, rsc_id);
lrmd_free_event(op);
}
static inline const char *
op_node_name(lrmd_event_data_t *op)
{
return pcmk__s(op->remote_nodename, controld_globals.our_nodename);
}
void
lrm_op_callback(lrmd_event_data_t * op)
{
CRM_CHECK(op != NULL, return);
switch (op->type) {
case lrmd_event_disconnect:
if (op->remote_nodename == NULL) {
/* If this is the local executor IPC connection, set the right
* bits in the controller when the connection goes down.
*/
lrm_connection_destroy();
}
break;
case lrmd_event_exec_complete:
{
lrm_state_t *lrm_state = lrm_state_find(op_node_name(op));
CRM_ASSERT(lrm_state != NULL);
process_lrm_event(lrm_state, op, NULL, NULL);
}
break;
default:
break;
}
}
static void
try_local_executor_connect(long long action, fsa_data_t *msg_data,
lrm_state_t *lrm_state)
{
int rc = pcmk_rc_ok;
crm_debug("Connecting to the local executor");
// If we can connect, great
rc = controld_connect_local_executor(lrm_state);
if (rc == pcmk_rc_ok) {
controld_set_fsa_input_flags(R_LRM_CONNECTED);
crm_info("Connection to the local executor established");
return;
}
// Otherwise, if we can try again, set a timer to do so
if (lrm_state->num_lrm_register_fails < MAX_LRM_REG_FAILS) {
crm_warn("Failed to connect to the local executor %d time%s "
"(%d max): %s", lrm_state->num_lrm_register_fails,
pcmk__plural_s(lrm_state->num_lrm_register_fails),
MAX_LRM_REG_FAILS, pcmk_rc_str(rc));
controld_start_wait_timer();
crmd_fsa_stall(FALSE);
return;
}
// Otherwise give up
crm_err("Failed to connect to the executor the max allowed "
"%d time%s: %s", lrm_state->num_lrm_register_fails,
pcmk__plural_s(lrm_state->num_lrm_register_fails),
pcmk_rc_str(rc));
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
/* A_LRM_CONNECT */
void
do_lrm_control(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
/* This only pertains to local executor connections. Remote connections are
* handled as resources within the scheduler. Connecting and disconnecting
* from remote executor instances is handled differently.
*/
lrm_state_t *lrm_state = NULL;
if (controld_globals.our_nodename == NULL) {
return; /* Nothing to do */
}
lrm_state = lrm_state_find_or_create(controld_globals.our_nodename);
if (lrm_state == NULL) {
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
return;
}
if (action & A_LRM_DISCONNECT) {
if (lrm_state_verify_stopped(lrm_state, cur_state, LOG_INFO) == FALSE) {
if (action == A_LRM_DISCONNECT) {
crmd_fsa_stall(FALSE);
return;
}
}
controld_clear_fsa_input_flags(R_LRM_CONNECTED);
lrm_state_disconnect(lrm_state);
lrm_state_reset_tables(lrm_state, FALSE);
}
if (action & A_LRM_CONNECT) {
try_local_executor_connect(action, msg_data, lrm_state);
}
if (action & ~(A_LRM_CONNECT | A_LRM_DISCONNECT)) {
crm_err("Unexpected action %s in %s", fsa_action2string(action),
__func__);
}
}
static gboolean
lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, int log_level)
{
int counter = 0;
gboolean rc = TRUE;
const char *when = "lrm disconnect";
GHashTableIter gIter;
const char *key = NULL;
rsc_history_t *entry = NULL;
active_op_t *pending = NULL;
crm_debug("Checking for active resources before exit");
if (cur_state == S_TERMINATE) {
log_level = LOG_ERR;
when = "shutdown";
} else if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
when = "shutdown... waiting";
}
if ((lrm_state->active_ops != NULL) && lrm_state_is_connected(lrm_state)) {
guint removed = g_hash_table_foreach_remove(lrm_state->active_ops,
stop_recurring_actions,
lrm_state);
guint nremaining = g_hash_table_size(lrm_state->active_ops);
if (removed || nremaining) {
crm_notice("Stopped %u recurring operation%s at %s (%u remaining)",
removed, pcmk__plural_s(removed), when, nremaining);
}
}
if (lrm_state->active_ops != NULL) {
g_hash_table_iter_init(&gIter, lrm_state->active_ops);
while (g_hash_table_iter_next(&gIter, NULL, (void **)&pending)) {
/* Ignore recurring actions in the shutdown calculations */
if (pending->interval_ms == 0) {
counter++;
}
}
}
if (counter > 0) {
do_crm_log(log_level, "%d pending executor operation%s at %s",
counter, pcmk__plural_s(counter), when);
if ((cur_state == S_TERMINATE)
|| !pcmk_is_set(controld_globals.fsa_input_register,
R_SENT_RSC_STOP)) {
g_hash_table_iter_init(&gIter, lrm_state->active_ops);
while (g_hash_table_iter_next(&gIter, (gpointer*)&key, (gpointer*)&pending)) {
do_crm_log(log_level, "Pending action: %s (%s)", key, pending->op_key);
}
} else {
rc = FALSE;
}
return rc;
}
if (lrm_state->resource_history == NULL) {
return rc;
}
if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
/* At this point we're not waiting, we're just shutting down */
when = "shutdown";
}
counter = 0;
g_hash_table_iter_init(&gIter, lrm_state->resource_history);
while (g_hash_table_iter_next(&gIter, NULL, (gpointer*)&entry)) {
if (is_rsc_active(lrm_state, entry->id) == FALSE) {
continue;
}
counter++;
if (log_level == LOG_ERR) {
crm_info("Found %s active at %s", entry->id, when);
} else {
crm_trace("Found %s active at %s", entry->id, when);
}
if (lrm_state->active_ops != NULL) {
GHashTableIter hIter;
g_hash_table_iter_init(&hIter, lrm_state->active_ops);
while (g_hash_table_iter_next(&hIter, (gpointer*)&key, (gpointer*)&pending)) {
if (pcmk__str_eq(entry->id, pending->rsc_id, pcmk__str_none)) {
crm_notice("%sction %s (%s) incomplete at %s",
pending->interval_ms == 0 ? "A" : "Recurring a",
key, pending->op_key, when);
}
}
}
}
if (counter) {
crm_err("%d resource%s active at %s",
counter, (counter == 1)? " was" : "s were", when);
}
return rc;
}
static gboolean
is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id)
{
rsc_history_t *entry = NULL;
entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
if (entry == NULL || entry->last == NULL) {
return FALSE;
}
crm_trace("Processing %s: %s.%d=%d", rsc_id, entry->last->op_type,
entry->last->interval_ms, entry->last->rc);
if ((entry->last->rc == PCMK_OCF_OK)
&& pcmk__str_eq(entry->last->op_type, PCMK_ACTION_STOP,
pcmk__str_casei)) {
return FALSE;
} else if (entry->last->rc == PCMK_OCF_OK
&& pcmk__str_eq(entry->last->op_type, PCMK_ACTION_MIGRATE_TO,
pcmk__str_casei)) {
// A stricter check is too complex ... leave that to the scheduler
return FALSE;
} else if (entry->last->rc == PCMK_OCF_NOT_RUNNING) {
return FALSE;
} else if ((entry->last->interval_ms == 0)
&& (entry->last->rc == PCMK_OCF_NOT_CONFIGURED)) {
/* Badly configured resources can't be reliably stopped */
return FALSE;
}
return TRUE;
}
static gboolean
build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list)
{
GHashTableIter iter;
rsc_history_t *entry = NULL;
g_hash_table_iter_init(&iter, lrm_state->resource_history);
while (g_hash_table_iter_next(&iter, NULL, (void **)&entry)) {
GList *gIter = NULL;
xmlNode *xml_rsc = pcmk__xe_create(rsc_list, PCMK__XE_LRM_RESOURCE);
crm_xml_add(xml_rsc, PCMK_XA_ID, entry->id);
crm_xml_add(xml_rsc, PCMK_XA_TYPE, entry->rsc.type);
crm_xml_add(xml_rsc, PCMK_XA_CLASS, entry->rsc.standard);
crm_xml_add(xml_rsc, PCMK_XA_PROVIDER, entry->rsc.provider);
if (entry->last && entry->last->params) {
static const char *name = CRM_META "_" PCMK__META_CONTAINER;
const char *container = g_hash_table_lookup(entry->last->params,
name);
if (container) {
crm_trace("Resource %s is a part of container resource %s", entry->id, container);
crm_xml_add(xml_rsc, PCMK__META_CONTAINER, container);
}
}
controld_add_resource_history_xml(xml_rsc, &(entry->rsc), entry->failed,
lrm_state->node_name);
controld_add_resource_history_xml(xml_rsc, &(entry->rsc), entry->last,
lrm_state->node_name);
for (gIter = entry->recurring_op_list; gIter != NULL; gIter = gIter->next) {
controld_add_resource_history_xml(xml_rsc, &(entry->rsc), gIter->data,
lrm_state->node_name);
}
}
return FALSE;
}
xmlNode *
controld_query_executor_state(void)
{
xmlNode *xml_state = NULL;
xmlNode *xml_data = NULL;
xmlNode *rsc_list = NULL;
crm_node_t *peer = NULL;
lrm_state_t *lrm_state = lrm_state_find(controld_globals.our_nodename);
if (!lrm_state) {
crm_err("Could not find executor state for node %s",
controld_globals.our_nodename);
return NULL;
}
peer = pcmk__get_node(0, lrm_state->node_name, NULL, pcmk__node_search_any);
CRM_CHECK(peer != NULL, return NULL);
xml_state = create_node_state_update(peer,
node_update_cluster|node_update_peer,
NULL, __func__);
if (xml_state == NULL) {
return NULL;
}
xml_data = pcmk__xe_create(xml_state, PCMK__XE_LRM);
crm_xml_add(xml_data, PCMK_XA_ID, peer->uuid);
rsc_list = pcmk__xe_create(xml_data, PCMK__XE_LRM_RESOURCES);
/* Build a list of active (not always running) resources */
build_active_RAs(lrm_state, rsc_list);
crm_log_xml_trace(xml_state, "Current executor state");
return xml_state;
}
/*!
* \internal
* \brief Map standard Pacemaker return code to operation status and OCF code
*
* \param[out] event Executor event whose status and return code should be set
* \param[in] rc Standard Pacemaker return code
*/
void
controld_rc2event(lrmd_event_data_t *event, int rc)
{
/* This is called for cleanup requests from controller peers/clients, not
* for resource actions, so no exit reason is needed.
*/
switch (rc) {
case pcmk_rc_ok:
lrmd__set_result(event, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
break;
case EACCES:
lrmd__set_result(event, PCMK_OCF_INSUFFICIENT_PRIV,
PCMK_EXEC_ERROR, NULL);
break;
default:
lrmd__set_result(event, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR,
NULL);
break;
}
}
/*!
* \internal
* \brief Trigger a new transition after CIB status was deleted
*
* If a CIB status delete was not expected (as part of the transition graph),
* trigger a new transition by updating the (arbitrary) "last-lrm-refresh"
* cluster property.
*
* \param[in] from_sys IPC name that requested the delete
* \param[in] rsc_id Resource whose status was deleted (for logging only)
*/
void
controld_trigger_delete_refresh(const char *from_sys, const char *rsc_id)
{
if (!pcmk__str_eq(from_sys, CRM_SYSTEM_TENGINE, pcmk__str_casei)) {
char *now_s = crm_strdup_printf("%lld", (long long) time(NULL));
crm_debug("Triggering a refresh after %s cleaned %s", from_sys, rsc_id);
cib__update_node_attr(controld_globals.logger_out,
controld_globals.cib_conn, cib_none,
PCMK_XE_CRM_CONFIG, NULL, NULL, NULL, NULL,
"last-lrm-refresh", now_s, NULL, NULL);
free(now_s);
}
}
static void
notify_deleted(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_id, int rc)
{
lrmd_event_data_t *op = NULL;
const char *from_sys = crm_element_value(input->msg, PCMK__XA_CRM_SYS_FROM);
const char *from_host = crm_element_value(input->msg, PCMK__XA_SRC);
crm_info("Notifying %s on %s that %s was%s deleted",
from_sys, (from_host? from_host : "localhost"), rsc_id,
((rc == pcmk_ok)? "" : " not"));
op = construct_op(lrm_state, input->xml, rsc_id, PCMK_ACTION_DELETE);
controld_rc2event(op, pcmk_legacy2rc(rc));
controld_ack_event_directly(from_host, from_sys, NULL, op, rsc_id);
lrmd_free_event(op);
controld_trigger_delete_refresh(from_sys, rsc_id);
}
static gboolean
lrm_remove_deleted_rsc(gpointer key, gpointer value, gpointer user_data)
{
struct delete_event_s *event = user_data;
struct pending_deletion_op_s *op = value;
if (pcmk__str_eq(event->rsc, op->rsc, pcmk__str_none)) {
notify_deleted(event->lrm_state, op->input, event->rsc, event->rc);
return TRUE;
}
return FALSE;
}
static gboolean
lrm_remove_deleted_op(gpointer key, gpointer value, gpointer user_data)
{
const char *rsc = user_data;
active_op_t *pending = value;
if (pcmk__str_eq(rsc, pending->rsc_id, pcmk__str_none)) {
crm_info("Removing op %s:%d for deleted resource %s",
pending->op_key, pending->call_id, rsc);
return TRUE;
}
return FALSE;
}
static void
delete_rsc_entry(lrm_state_t *lrm_state, ha_msg_input_t *input,
const char *rsc_id, GHashTableIter *rsc_iter, int rc,
const char *user_name, bool from_cib)
{
struct delete_event_s event;
CRM_CHECK(rsc_id != NULL, return);
if (rc == pcmk_ok) {
char *rsc_id_copy = pcmk__str_copy(rsc_id);
if (rsc_iter) {
g_hash_table_iter_remove(rsc_iter);
} else {
g_hash_table_remove(lrm_state->resource_history, rsc_id_copy);
}
if (from_cib) {
controld_delete_resource_history(rsc_id_copy, lrm_state->node_name,
user_name, crmd_cib_smart_opt());
}
g_hash_table_foreach_remove(lrm_state->active_ops,
lrm_remove_deleted_op, rsc_id_copy);
free(rsc_id_copy);
}
if (input) {
notify_deleted(lrm_state, input, rsc_id, rc);
}
event.rc = rc;
event.rsc = rsc_id;
event.lrm_state = lrm_state;
g_hash_table_foreach_remove(lrm_state->deletion_ops, lrm_remove_deleted_rsc, &event);
}
static inline gboolean
last_failed_matches_op(rsc_history_t *entry, const char *op, guint interval_ms)
{
if (entry == NULL) {
return FALSE;
}
if (op == NULL) {
return TRUE;
}
return (pcmk__str_eq(op, entry->failed->op_type, pcmk__str_casei)
&& (interval_ms == entry->failed->interval_ms));
}
/*!
* \internal
* \brief Clear a resource's last failure
*
* Erase a resource's last failure on a particular node from both the
* LRM resource history in the CIB, and the resource history remembered
* for the LRM state.
*
* \param[in] rsc_id Resource name
* \param[in] node_name Node name
* \param[in] operation If specified, only clear if matching this operation
* \param[in] interval_ms If operation is specified, it has this interval
*/
void
lrm_clear_last_failure(const char *rsc_id, const char *node_name,
const char *operation, guint interval_ms)
{
lrm_state_t *lrm_state = lrm_state_find(node_name);
if (lrm_state == NULL) {
return;
}
if (lrm_state->resource_history != NULL) {
rsc_history_t *entry = g_hash_table_lookup(lrm_state->resource_history,
rsc_id);
if (last_failed_matches_op(entry, operation, interval_ms)) {
lrmd_free_event(entry->failed);
entry->failed = NULL;
}
}
}
/* Returns: gboolean - cancellation is in progress */
static gboolean
cancel_op(lrm_state_t * lrm_state, const char *rsc_id, const char *key, int op, gboolean remove)
{
int rc = pcmk_ok;
char *local_key = NULL;
active_op_t *pending = NULL;
CRM_CHECK(op != 0, return FALSE);
CRM_CHECK(rsc_id != NULL, return FALSE);
if (key == NULL) {
local_key = make_stop_id(rsc_id, op);
key = local_key;
}
pending = g_hash_table_lookup(lrm_state->active_ops, key);
if (pending) {
if (remove && !pcmk_is_set(pending->flags, active_op_remove)) {
controld_set_active_op_flags(pending, active_op_remove);
crm_debug("Scheduling %s for removal", key);
}
if (pcmk_is_set(pending->flags, active_op_cancelled)) {
crm_debug("Operation %s already cancelled", key);
free(local_key);
return FALSE;
}
controld_set_active_op_flags(pending, active_op_cancelled);
} else {
crm_info("No pending op found for %s", key);
free(local_key);
return FALSE;
}
crm_debug("Cancelling op %d for %s (%s)", op, rsc_id, key);
rc = lrm_state_cancel(lrm_state, pending->rsc_id, pending->op_type,
pending->interval_ms);
if (rc == pcmk_ok) {
crm_debug("Op %d for %s (%s): cancelled", op, rsc_id, key);
free(local_key);
return TRUE;
}
crm_debug("Op %d for %s (%s): Nothing to cancel", op, rsc_id, key);
/* The caller needs to make sure the entry is
* removed from the active operations list
*
* Usually by returning TRUE inside the worker function
* supplied to g_hash_table_foreach_remove()
*
* Not removing the entry from active operations will block
* the node from shutting down
*/
free(local_key);
return FALSE;
}
struct cancel_data {
gboolean done;
gboolean remove;
const char *key;
lrmd_rsc_info_t *rsc;
lrm_state_t *lrm_state;
};
static gboolean
cancel_action_by_key(gpointer key, gpointer value, gpointer user_data)
{
gboolean remove = FALSE;
struct cancel_data *data = user_data;
active_op_t *op = value;
if (pcmk__str_eq(op->op_key, data->key, pcmk__str_none)) {
data->done = TRUE;
remove = !cancel_op(data->lrm_state, data->rsc->id, key, op->call_id, data->remove);
}
return remove;
}
static gboolean
cancel_op_key(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *key, gboolean remove)
{
guint removed = 0;
struct cancel_data data;
CRM_CHECK(rsc != NULL, return FALSE);
CRM_CHECK(key != NULL, return FALSE);
data.key = key;
data.rsc = rsc;
data.done = FALSE;
data.remove = remove;
data.lrm_state = lrm_state;
removed = g_hash_table_foreach_remove(lrm_state->active_ops,
cancel_action_by_key, &data);
crm_trace("Removed %u op cache entries, new size: %u",
removed, g_hash_table_size(lrm_state->active_ops));
return data.done;
}
/*!
* \internal
* \brief Retrieve resource information from LRM
*
* \param[in,out] lrm_state Executor connection state to use
* \param[in] rsc_xml XML containing resource configuration
* \param[in] do_create If true, register resource if not already
* \param[out] rsc_info Where to store information obtained from executor
*
* \retval pcmk_ok Success (and rsc_info holds newly allocated result)
* \retval -EINVAL Required information is missing from arguments
* \retval -ENOTCONN No active connection to LRM
* \retval -ENODEV Resource not found
* \retval -errno Error communicating with executor when registering resource
*
* \note Caller is responsible for freeing result on success.
*/
static int
get_lrm_resource(lrm_state_t *lrm_state, const xmlNode *rsc_xml,
gboolean do_create, lrmd_rsc_info_t **rsc_info)
{
const char *id = pcmk__xe_id(rsc_xml);
CRM_CHECK(lrm_state && rsc_xml && rsc_info, return -EINVAL);
CRM_CHECK(id, return -EINVAL);
if (lrm_state_is_connected(lrm_state) == FALSE) {
return -ENOTCONN;
}
crm_trace("Retrieving resource information for %s from the executor", id);
*rsc_info = lrm_state_get_rsc_info(lrm_state, id, 0);
// If resource isn't known by ID, try clone name, if provided
if (!*rsc_info) {
const char *long_id = crm_element_value(rsc_xml, PCMK__XA_LONG_ID);
if (long_id) {
*rsc_info = lrm_state_get_rsc_info(lrm_state, long_id, 0);
}
}
if ((*rsc_info == NULL) && do_create) {
const char *class = crm_element_value(rsc_xml, PCMK_XA_CLASS);
const char *provider = crm_element_value(rsc_xml, PCMK_XA_PROVIDER);
const char *type = crm_element_value(rsc_xml, PCMK_XA_TYPE);
int rc;
crm_trace("Registering resource %s with the executor", id);
rc = lrm_state_register_rsc(lrm_state, id, class, provider, type,
lrmd_opt_drop_recurring);
if (rc != pcmk_ok) {
fsa_data_t *msg_data = NULL;
crm_err("Could not register resource %s with the executor on %s: %s "
CRM_XS " rc=%d",
id, lrm_state->node_name, pcmk_strerror(rc), rc);
/* Register this as an internal error if this involves the local
* executor. Otherwise, we're likely dealing with an unresponsive
* remote node, which is not an FSA failure.
*/
if (lrm_state_is_local(lrm_state) == TRUE) {
register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
}
return rc;
}
*rsc_info = lrm_state_get_rsc_info(lrm_state, id, 0);
}
return *rsc_info? pcmk_ok : -ENODEV;
}
static void
delete_resource(lrm_state_t *lrm_state, const char *id, lrmd_rsc_info_t *rsc,
GHashTableIter *iter, const char *sys, const char *user,
ha_msg_input_t *request, bool unregister, bool from_cib)
{
int rc = pcmk_ok;
crm_info("Removing resource %s from executor for %s%s%s",
id, sys, (user? " as " : ""), (user? user : ""));
if (rsc && unregister) {
rc = lrm_state_unregister_rsc(lrm_state, id, 0);
}
if (rc == pcmk_ok) {
crm_trace("Resource %s deleted from executor", id);
} else if (rc == -EINPROGRESS) {
crm_info("Deletion of resource '%s' from executor is pending", id);
if (request) {
struct pending_deletion_op_s *op = NULL;
char *ref = crm_element_value_copy(request->msg, PCMK_XA_REFERENCE);
op = pcmk__assert_alloc(1, sizeof(struct pending_deletion_op_s));
op->rsc = pcmk__str_copy(rsc->id);
op->input = copy_ha_msg_input(request);
g_hash_table_insert(lrm_state->deletion_ops, ref, op);
}
return;
} else {
crm_warn("Could not delete '%s' from executor for %s%s%s: %s "
CRM_XS " rc=%d", id, sys, (user? " as " : ""),
(user? user : ""), pcmk_strerror(rc), rc);
}
delete_rsc_entry(lrm_state, request, id, iter, rc, user, from_cib);
}
static int
get_fake_call_id(lrm_state_t *lrm_state, const char *rsc_id)
{
int call_id = 999999999;
rsc_history_t *entry = NULL;
if(lrm_state) {
entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
}
/* Make sure the call id is greater than the last successful operation,
* otherwise the failure will not result in a possible recovery of the resource
* as it could appear the failure occurred before the successful start */
if (entry) {
call_id = entry->last_callid + 1;
}
if (call_id < 0) {
call_id = 1;
}
return call_id;
}
static void
fake_op_status(lrm_state_t *lrm_state, lrmd_event_data_t *op, int op_status,
enum ocf_exitcode op_exitcode, const char *exit_reason)
{
op->call_id = get_fake_call_id(lrm_state, op->rsc_id);
op->t_run = time(NULL);
op->t_rcchange = op->t_run;
lrmd__set_result(op, op_exitcode, op_status, exit_reason);
}
static void
force_reprobe(lrm_state_t *lrm_state, const char *from_sys,
const char *from_host, const char *user_name,
gboolean is_remote_node, bool reprobe_all_nodes)
{
GHashTableIter gIter;
rsc_history_t *entry = NULL;
crm_info("Clearing resource history on node %s", lrm_state->node_name);
g_hash_table_iter_init(&gIter, lrm_state->resource_history);
while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
/* only unregister the resource during a reprobe if it is not a remote connection
* resource. otherwise unregistering the connection will terminate remote-node
* membership */
bool unregister = true;
if (is_remote_lrmd_ra(NULL, NULL, entry->id)) {
unregister = false;
if (reprobe_all_nodes) {
lrm_state_t *remote_lrm_state = lrm_state_find(entry->id);
if (remote_lrm_state != NULL) {
/* If reprobing all nodes, be sure to reprobe the remote
* node before clearing its connection resource
*/
force_reprobe(remote_lrm_state, from_sys, from_host,
user_name, TRUE, reprobe_all_nodes);
}
}
}
/* Don't delete from the CIB, since we'll delete the whole node's LRM
* state from the CIB soon
*/
delete_resource(lrm_state, entry->id, &entry->rsc, &gIter, from_sys,
user_name, NULL, unregister, false);
}
/* Now delete the copy in the CIB */
controld_delete_node_state(lrm_state->node_name, controld_section_lrm,
cib_scope_local);
// @COMPAT DCs < 1.1.14 need this deleted (in case it was explicitly false)
update_attrd(lrm_state->node_name, CRM_OP_PROBED, NULL, user_name, is_remote_node);
}
/*!
* \internal
* \brief Fail a requested action without actually executing it
*
* For an action that can't be executed, process it similarly to an actual
* execution result, with specified error status (except for notify actions,
* which will always be treated as successful).
*
* \param[in,out] lrm_state Executor connection that action is for
* \param[in] action Action XML from request
* \param[in] rc Desired return code to use
* \param[in] op_status Desired operation status to use
* \param[in] exit_reason Human-friendly detail, if error
*/
static void
synthesize_lrmd_failure(lrm_state_t *lrm_state, const xmlNode *action,
int op_status, enum ocf_exitcode rc,
const char *exit_reason)
{
lrmd_event_data_t *op = NULL;
const char *operation = crm_element_value(action, PCMK_XA_OPERATION);
const char *target_node = crm_element_value(action, PCMK__META_ON_NODE);
xmlNode *xml_rsc = pcmk__xe_first_child(action, PCMK_XE_PRIMITIVE, NULL,
NULL);
if ((xml_rsc == NULL) || (pcmk__xe_id(xml_rsc) == NULL)) {
/* @TODO Should we do something else, like direct ack? */
crm_info("Can't fake %s failure (%d) on %s without resource configuration",
crm_element_value(action, PCMK__XA_OPERATION_KEY), rc,
target_node);
return;
} else if(operation == NULL) {
/* This probably came from crm_resource -C, nothing to do */
crm_info("Can't fake %s failure (%d) on %s without operation",
pcmk__xe_id(xml_rsc), rc, target_node);
return;
}
op = construct_op(lrm_state, action, pcmk__xe_id(xml_rsc), operation);
if (pcmk__str_eq(operation, PCMK_ACTION_NOTIFY, pcmk__str_casei)) {
// Notifications can't fail
fake_op_status(lrm_state, op, PCMK_EXEC_DONE, PCMK_OCF_OK, NULL);
} else {
fake_op_status(lrm_state, op, op_status, rc, exit_reason);
}
crm_info("Faking " PCMK__OP_FMT " result (%d) on %s",
op->rsc_id, op->op_type, op->interval_ms, op->rc, target_node);
// Process the result as if it came from the LRM
process_lrm_event(lrm_state, op, NULL, action);
lrmd_free_event(op);
}
/*!
* \internal
* \brief Get target of an LRM operation (replacing \p NULL with local node
* name)
*
* \param[in] xml LRM operation data XML
*
* \return LRM operation target node name (local node or Pacemaker Remote node)
*/
static const char *
lrm_op_target(const xmlNode *xml)
{
const char *target = NULL;
if (xml) {
target = crm_element_value(xml, PCMK__META_ON_NODE);
}
if (target == NULL) {
target = controld_globals.our_nodename;
}
return target;
}
static void
fail_lrm_resource(xmlNode *xml, lrm_state_t *lrm_state, const char *user_name,
const char *from_host, const char *from_sys)
{
lrmd_event_data_t *op = NULL;
lrmd_rsc_info_t *rsc = NULL;
xmlNode *xml_rsc = pcmk__xe_first_child(xml, PCMK_XE_PRIMITIVE, NULL, NULL);
CRM_CHECK(xml_rsc != NULL, return);
/* The executor simply executes operations and reports the results, without
* any concept of success or failure, so to fail a resource, we must fake
* what a failure looks like.
*
* To do this, we create a fake executor operation event for the resource,
* and pass that event to the executor client callback so it will be
* processed as if it came from the executor.
*/
op = construct_op(lrm_state, xml, pcmk__xe_id(xml_rsc), "asyncmon");
free((char*) op->user_data);
op->user_data = NULL;
op->interval_ms = 0;
if (user_name && !pcmk__is_privileged(user_name)) {
crm_err("%s does not have permission to fail %s",
user_name, pcmk__xe_id(xml_rsc));
fake_op_status(lrm_state, op, PCMK_EXEC_ERROR,
PCMK_OCF_INSUFFICIENT_PRIV,
"Unprivileged user cannot fail resources");
controld_ack_event_directly(from_host, from_sys, NULL, op,
pcmk__xe_id(xml_rsc));
lrmd_free_event(op);
return;
}
if (get_lrm_resource(lrm_state, xml_rsc, TRUE, &rsc) == pcmk_ok) {
crm_info("Failing resource %s...", rsc->id);
fake_op_status(lrm_state, op, PCMK_EXEC_DONE, PCMK_OCF_UNKNOWN_ERROR,
"Simulated failure");
process_lrm_event(lrm_state, op, NULL, xml);
op->rc = PCMK_OCF_OK; // The request to fail the resource succeeded
lrmd_free_rsc_info(rsc);
} else {
crm_info("Cannot find/create resource in order to fail it...");
crm_log_xml_warn(xml, "bad input");
fake_op_status(lrm_state, op, PCMK_EXEC_ERROR, PCMK_OCF_UNKNOWN_ERROR,
"Cannot fail unknown resource");
}
controld_ack_event_directly(from_host, from_sys, NULL, op,
pcmk__xe_id(xml_rsc));
lrmd_free_event(op);
}
static void
handle_reprobe_op(lrm_state_t *lrm_state, const char *from_sys,
const char *from_host, const char *user_name,
gboolean is_remote_node, bool reprobe_all_nodes)
{
crm_notice("Forcing the status of all resources to be redetected");
force_reprobe(lrm_state, from_sys, from_host, user_name, is_remote_node,
reprobe_all_nodes);
if (!pcmk__strcase_any_of(from_sys, CRM_SYSTEM_PENGINE, CRM_SYSTEM_TENGINE, NULL)) {
xmlNode *reply = create_request(CRM_OP_INVOKE_LRM, NULL, from_host,
from_sys, CRM_SYSTEM_LRMD,
controld_globals.our_uuid);
crm_debug("ACK'ing re-probe from %s (%s)", from_sys, from_host);
if (relay_message(reply, TRUE) == FALSE) {
crm_log_xml_err(reply, "Unable to route reply");
}
free_xml(reply);
}
}
static bool do_lrm_cancel(ha_msg_input_t *input, lrm_state_t *lrm_state,
lrmd_rsc_info_t *rsc, const char *from_host, const char *from_sys)
{
char *op_key = NULL;
char *meta_key = NULL;
int call = 0;
const char *call_id = NULL;
const char *op_task = NULL;
guint interval_ms = 0;
gboolean in_progress = FALSE;
xmlNode *params = pcmk__xe_first_child(input->xml, PCMK__XE_ATTRIBUTES,
NULL, NULL);
CRM_CHECK(params != NULL, return FALSE);
meta_key = crm_meta_name(PCMK_XA_OPERATION);
op_task = crm_element_value(params, meta_key);
free(meta_key);
CRM_CHECK(op_task != NULL, return FALSE);
meta_key = crm_meta_name(PCMK_META_INTERVAL);
if (crm_element_value_ms(params, meta_key, &interval_ms) != pcmk_ok) {
free(meta_key);
return FALSE;
}
free(meta_key);
op_key = pcmk__op_key(rsc->id, op_task, interval_ms);
meta_key = crm_meta_name(PCMK__XA_CALL_ID);
call_id = crm_element_value(params, meta_key);
free(meta_key);
crm_debug("Scheduler requested op %s (call=%s) be cancelled",
op_key, (call_id? call_id : "NA"));
pcmk__scan_min_int(call_id, &call, 0);
if (call == 0) {
// Normal case when the scheduler cancels a recurring op
in_progress = cancel_op_key(lrm_state, rsc, op_key, TRUE);
} else {
// Normal case when the scheduler cancels an orphan op
in_progress = cancel_op(lrm_state, rsc->id, NULL, call, TRUE);
}
// Acknowledge cancellation operation if for a remote connection resource
if (!in_progress || is_remote_lrmd_ra(NULL, NULL, rsc->id)) {
char *op_id = make_stop_id(rsc->id, call);
if (is_remote_lrmd_ra(NULL, NULL, rsc->id) == FALSE) {
crm_info("Nothing known about operation %d for %s", call, op_key);
}
controld_delete_action_history_by_key(rsc->id, lrm_state->node_name,
op_key, call);
send_task_ok_ack(lrm_state, input, rsc->id, rsc, op_task,
from_host, from_sys);
/* needed at least for cancellation of a remote operation */
if (lrm_state->active_ops != NULL) {
g_hash_table_remove(lrm_state->active_ops, op_id);
}
free(op_id);
} else {
/* No ack is needed since abcdaa8, but peers with older versions
* in a rolling upgrade need one. We didn't bump the feature set
* at that commit, so we can only compare against the previous
* CRM version (3.0.8). If any peers have feature set 3.0.9 but
* not abcdaa8, they will time out waiting for the ack (no
* released versions of Pacemaker are affected).
*/
const char *peer_version = crm_element_value(params,
PCMK_XA_CRM_FEATURE_SET);
if (compare_version(peer_version, "3.0.8") <= 0) {
crm_info("Sending compatibility ack for %s cancellation to %s (CRM version %s)",
op_key, from_host, peer_version);
send_task_ok_ack(lrm_state, input, rsc->id, rsc, op_task,
from_host, from_sys);
}
}
free(op_key);
return TRUE;
}
static void
do_lrm_delete(ha_msg_input_t *input, lrm_state_t *lrm_state,
lrmd_rsc_info_t *rsc, const char *from_sys, const char *from_host,
bool crm_rsc_delete, const char *user_name)
{
bool unregister = true;
int cib_rc = controld_delete_resource_history(rsc->id, lrm_state->node_name,
user_name,
cib_dryrun|cib_sync_call);
if (cib_rc != pcmk_rc_ok) {
lrmd_event_data_t *op = NULL;
op = construct_op(lrm_state, input->xml, rsc->id, PCMK_ACTION_DELETE);
/* These are resource clean-ups, not actions, so no exit reason is
* needed.
*/
lrmd__set_result(op, pcmk_rc2ocf(cib_rc), PCMK_EXEC_ERROR, NULL);
controld_ack_event_directly(from_host, from_sys, NULL, op, rsc->id);
lrmd_free_event(op);
return;
}
if (crm_rsc_delete && is_remote_lrmd_ra(NULL, NULL, rsc->id)) {
unregister = false;
}
delete_resource(lrm_state, rsc->id, rsc, NULL, from_sys,
user_name, input, unregister, true);
}
// User data for asynchronous metadata execution
struct metadata_cb_data {
lrmd_rsc_info_t *rsc; // Copy of resource information
xmlNode *input_xml; // Copy of FSA input XML
};
static struct metadata_cb_data *
new_metadata_cb_data(lrmd_rsc_info_t *rsc, xmlNode *input_xml)
{
struct metadata_cb_data *data = NULL;
data = pcmk__assert_alloc(1, sizeof(struct metadata_cb_data));
data->input_xml = pcmk__xml_copy(NULL, input_xml);
data->rsc = lrmd_copy_rsc_info(rsc);
return data;
}
static void
free_metadata_cb_data(struct metadata_cb_data *data)
{
lrmd_free_rsc_info(data->rsc);
free_xml(data->input_xml);
free(data);
}
/*!
* \internal
* \brief Execute an action after metadata has been retrieved
*
* \param[in] pid Ignored
* \param[in] result Result of metadata action
* \param[in] user_data Metadata callback data
*/
static void
metadata_complete(int pid, const pcmk__action_result_t *result, void *user_data)
{
struct metadata_cb_data *data = (struct metadata_cb_data *) user_data;
struct ra_metadata_s *md = NULL;
lrm_state_t *lrm_state = lrm_state_find(lrm_op_target(data->input_xml));
if ((lrm_state != NULL) && pcmk__result_ok(result)) {
md = controld_cache_metadata(lrm_state->metadata_cache, data->rsc,
result->action_stdout);
}
if (!pcmk_is_set(controld_globals.fsa_input_register, R_HA_DISCONNECTED)) {
do_lrm_rsc_op(lrm_state, data->rsc, data->input_xml, md);
}
free_metadata_cb_data(data);
}
/* A_LRM_INVOKE */
void
do_lrm_invoke(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
lrm_state_t *lrm_state = NULL;
const char *crm_op = NULL;
const char *from_sys = NULL;
const char *from_host = NULL;
const char *operation = NULL;
ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg);
const char *user_name = NULL;
const char *target_node = lrm_op_target(input->xml);
gboolean is_remote_node = FALSE;
bool crm_rsc_delete = FALSE;
// Message routed to the local node is targeting a specific, non-local node
is_remote_node = !pcmk__str_eq(target_node, controld_globals.our_nodename,
pcmk__str_casei);
lrm_state = lrm_state_find(target_node);
if ((lrm_state == NULL) && is_remote_node) {
crm_err("Failing action because local node has never had connection to remote node %s",
target_node);
synthesize_lrmd_failure(NULL, input->xml, PCMK_EXEC_NOT_CONNECTED,
PCMK_OCF_UNKNOWN_ERROR,
"Local node has no connection to remote");
return;
}
CRM_ASSERT(lrm_state != NULL);
user_name = pcmk__update_acl_user(input->msg, PCMK__XA_CRM_USER, NULL);
crm_op = crm_element_value(input->msg, PCMK__XA_CRM_TASK);
from_sys = crm_element_value(input->msg, PCMK__XA_CRM_SYS_FROM);
if (!pcmk__str_eq(from_sys, CRM_SYSTEM_TENGINE, pcmk__str_none)) {
from_host = crm_element_value(input->msg, PCMK__XA_SRC);
}
if (pcmk__str_eq(crm_op, PCMK_ACTION_LRM_DELETE, pcmk__str_none)) {
if (!pcmk__str_eq(from_sys, CRM_SYSTEM_TENGINE, pcmk__str_none)) {
crm_rsc_delete = TRUE; // from crm_resource
}
operation = PCMK_ACTION_DELETE;
} else if (input->xml != NULL) {
operation = crm_element_value(input->xml, PCMK_XA_OPERATION);
}
CRM_CHECK(!pcmk__str_empty(crm_op) || !pcmk__str_empty(operation), return);
crm_trace("'%s' execution request from %s as %s user",
pcmk__s(crm_op, operation),
pcmk__s(from_sys, "unknown subsystem"),
pcmk__s(user_name, "current"));
if (pcmk__str_eq(crm_op, CRM_OP_LRM_FAIL, pcmk__str_none)) {
fail_lrm_resource(input->xml, lrm_state, user_name, from_host,
from_sys);
} else if (pcmk__str_eq(crm_op, CRM_OP_LRM_REFRESH, pcmk__str_none)) {
/* @COMPAT This can only be sent by crm_resource --refresh on a
* Pacemaker Remote node running Pacemaker 1.1.9, which is extremely
* unlikely. It previously would cause the controller to re-write its
* resource history to the CIB. Just ignore it.
*/
crm_notice("Ignoring refresh request from Pacemaker Remote 1.1.9 node");
// @COMPAT DCs <1.1.14 in a rolling upgrade might schedule this op
} else if (pcmk__str_eq(operation, CRM_OP_PROBED, pcmk__str_none)) {
update_attrd(lrm_state->node_name, CRM_OP_PROBED, PCMK_VALUE_TRUE,
user_name, is_remote_node);
} else if (pcmk__str_eq(crm_op, CRM_OP_REPROBE, pcmk__str_none)
|| pcmk__str_eq(operation, CRM_OP_REPROBE, pcmk__str_none)) {
const char *raw_target = NULL;
if (input->xml != NULL) {
// For CRM_OP_REPROBE, a NULL target means we're targeting all nodes
raw_target = crm_element_value(input->xml, PCMK__META_ON_NODE);
}
handle_reprobe_op(lrm_state, from_sys, from_host, user_name,
is_remote_node, (raw_target == NULL));
} else if (operation != NULL) {
lrmd_rsc_info_t *rsc = NULL;
xmlNode *xml_rsc = pcmk__xe_first_child(input->xml, PCMK_XE_PRIMITIVE,
NULL, NULL);
gboolean create_rsc = !pcmk__str_eq(operation, PCMK_ACTION_DELETE,
pcmk__str_none);
int rc;
// We can't return anything meaningful without a resource ID
CRM_CHECK((xml_rsc != NULL) && (pcmk__xe_id(xml_rsc) != NULL), return);
rc = get_lrm_resource(lrm_state, xml_rsc, create_rsc, &rsc);
if (rc == -ENOTCONN) {
synthesize_lrmd_failure(lrm_state, input->xml,
PCMK_EXEC_NOT_CONNECTED,
PCMK_OCF_UNKNOWN_ERROR,
"Not connected to remote executor");
return;
} else if ((rc < 0) && !create_rsc) {
/* Delete of malformed or nonexistent resource
* (deleting something that does not exist is a success)
*/
crm_notice("Not registering resource '%s' for a %s event "
CRM_XS " get-rc=%d (%s) transition-key=%s",
pcmk__xe_id(xml_rsc), operation,
rc, pcmk_strerror(rc), pcmk__xe_id(input->xml));
delete_rsc_entry(lrm_state, input, pcmk__xe_id(xml_rsc), NULL,
pcmk_ok, user_name, true);
return;
} else if (rc == -EINVAL) {
// Resource operation on malformed resource
crm_err("Invalid resource definition for %s", pcmk__xe_id(xml_rsc));
crm_log_xml_warn(input->msg, "invalid resource");
synthesize_lrmd_failure(lrm_state, input->xml, PCMK_EXEC_ERROR,
PCMK_OCF_NOT_CONFIGURED, // fatal error
"Invalid resource definition");
return;
} else if (rc < 0) {
// Error communicating with the executor
crm_err("Could not register resource '%s' with executor: %s "
CRM_XS " rc=%d",
pcmk__xe_id(xml_rsc), pcmk_strerror(rc), rc);
crm_log_xml_warn(input->msg, "failed registration");
synthesize_lrmd_failure(lrm_state, input->xml, PCMK_EXEC_ERROR,
PCMK_OCF_INVALID_PARAM, // hard error
"Could not register resource with executor");
return;
}
if (pcmk__str_eq(operation, PCMK_ACTION_CANCEL, pcmk__str_none)) {
if (!do_lrm_cancel(input, lrm_state, rsc, from_host, from_sys)) {
crm_log_xml_warn(input->xml, "Bad command");
}
} else if (pcmk__str_eq(operation, PCMK_ACTION_DELETE,
pcmk__str_none)) {
do_lrm_delete(input, lrm_state, rsc, from_sys, from_host,
crm_rsc_delete, user_name);
} else {
struct ra_metadata_s *md = NULL;
/* Getting metadata from cache is OK except for start actions --
* always refresh from the agent for those, in case the resource
* agent was updated.
*
* @TODO Only refresh metadata for starts if the agent actually
* changed (using something like inotify, or a hash or modification
* time of the agent executable).
*/
if (strcmp(operation, PCMK_ACTION_START) != 0) {
md = controld_get_rsc_metadata(lrm_state, rsc,
controld_metadata_from_cache);
}
if ((md == NULL) && crm_op_needs_metadata(rsc->standard,
operation)) {
/* Most likely, we'll need the agent metadata to record the
* pending operation and the operation result. Get it now rather
* than wait until then, so the metadata action doesn't eat into
* the real action's timeout.
*
* @TODO Metadata is retrieved via direct execution of the
* agent, which has a couple of related issues: the executor
* should execute agents, not the controller; and metadata for
* Pacemaker Remote nodes should be collected on those nodes,
* not locally.
*/
struct metadata_cb_data *data = NULL;
data = new_metadata_cb_data(rsc, input->xml);
crm_info("Retrieving metadata for %s (%s%s%s:%s) asynchronously",
rsc->id, rsc->standard,
((rsc->provider == NULL)? "" : ":"),
((rsc->provider == NULL)? "" : rsc->provider),
rsc->type);
(void) lrmd__metadata_async(rsc, metadata_complete,
(void *) data);
} else {
do_lrm_rsc_op(lrm_state, rsc, input->xml, md);
}
}
lrmd_free_rsc_info(rsc);
} else {
crm_err("Invalid execution request: unknown command '%s' (bug?)",
crm_op);
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
}
static lrmd_event_data_t *
construct_op(const lrm_state_t *lrm_state, const xmlNode *rsc_op,
const char *rsc_id, const char *operation)
{
lrmd_event_data_t *op = NULL;
const char *op_delay = NULL;
const char *op_timeout = NULL;
GHashTable *params = NULL;
xmlNode *primitive = NULL;
const char *class = NULL;
const char *transition = NULL;
CRM_ASSERT(rsc_id && operation);
op = lrmd_new_event(rsc_id, operation, 0);
op->type = lrmd_event_exec_complete;
op->timeout = 0;
op->start_delay = 0;
lrmd__set_result(op, PCMK_OCF_UNKNOWN, PCMK_EXEC_PENDING, NULL);
if (rsc_op == NULL) {
CRM_LOG_ASSERT(pcmk__str_eq(operation, PCMK_ACTION_STOP,
pcmk__str_casei));
op->user_data = NULL;
/* the stop_all_resources() case
* by definition there is no DC (or they'd be shutting
* us down).
* So we should put our version here.
*/
op->params = pcmk__strkey_table(free, free);
pcmk__insert_dup(op->params, PCMK_XA_CRM_FEATURE_SET, CRM_FEATURE_SET);
crm_trace("Constructed %s op for %s", operation, rsc_id);
return op;
}
params = xml2list(rsc_op);
g_hash_table_remove(params, CRM_META "_" PCMK__META_OP_TARGET_RC);
op_delay = crm_meta_value(params, PCMK_META_START_DELAY);
pcmk__scan_min_int(op_delay, &op->start_delay, 0);
op_timeout = crm_meta_value(params, PCMK_META_TIMEOUT);
pcmk__scan_min_int(op_timeout, &op->timeout, 0);
if (pcmk__guint_from_hash(params, CRM_META "_" PCMK_META_INTERVAL, 0,
&(op->interval_ms)) != pcmk_rc_ok) {
op->interval_ms = 0;
}
/* Use pcmk_monitor_timeout instead of meta timeout for stonith
recurring monitor, if set */
primitive = pcmk__xe_first_child(rsc_op, PCMK_XE_PRIMITIVE, NULL, NULL);
class = crm_element_value(primitive, PCMK_XA_CLASS);
if (pcmk_is_set(pcmk_get_ra_caps(class), pcmk_ra_cap_fence_params)
&& pcmk__str_eq(operation, PCMK_ACTION_MONITOR, pcmk__str_casei)
&& (op->interval_ms > 0)) {
op_timeout = g_hash_table_lookup(params, "pcmk_monitor_timeout");
if (op_timeout != NULL) {
long long timeout_ms = crm_get_msec(op_timeout);
op->timeout = (int) QB_MIN(timeout_ms, INT_MAX);
}
}
if (!pcmk__str_eq(operation, PCMK_ACTION_STOP, pcmk__str_casei)) {
op->params = params;
} else {
rsc_history_t *entry = NULL;
if (lrm_state) {
entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
}
/* If we do not have stop parameters cached, use
* whatever we are given */
if (!entry || !entry->stop_params) {
op->params = params;
} else {
/* Copy the cached parameter list so that we stop the resource
* with the old attributes, not the new ones */
op->params = pcmk__strkey_table(free, free);
g_hash_table_foreach(params, copy_meta_keys, op->params);
g_hash_table_foreach(entry->stop_params, copy_instance_keys, op->params);
g_hash_table_destroy(params);
params = NULL;
}
}
/* sanity */
if (op->timeout <= 0) {
op->timeout = op->interval_ms;
}
if (op->start_delay < 0) {
op->start_delay = 0;
}
transition = crm_element_value(rsc_op, PCMK__XA_TRANSITION_KEY);
CRM_CHECK(transition != NULL, return op);
op->user_data = pcmk__str_copy(transition);
if (op->interval_ms != 0) {
if (pcmk__strcase_any_of(operation, PCMK_ACTION_START, PCMK_ACTION_STOP,
NULL)) {
crm_err("Start and Stop actions cannot have an interval: %u",
op->interval_ms);
op->interval_ms = 0;
}
}
crm_trace("Constructed %s op for %s: interval=%u",
operation, rsc_id, op->interval_ms);
return op;
}
/*!
* \internal
* \brief Send a (synthesized) event result
*
* Reply with a synthesized event result directly, as opposed to going through
* the executor.
*
* \param[in] to_host Host to send result to
* \param[in] to_sys IPC name to send result (NULL for transition engine)
* \param[in] rsc Type information about resource the result is for
* \param[in,out] op Event with result to send
* \param[in] rsc_id ID of resource the result is for
*/
void
controld_ack_event_directly(const char *to_host, const char *to_sys,
const lrmd_rsc_info_t *rsc, lrmd_event_data_t *op,
const char *rsc_id)
{
xmlNode *reply = NULL;
xmlNode *update, *iter;
crm_node_t *peer = NULL;
CRM_CHECK(op != NULL, return);
if (op->rsc_id == NULL) {
// op->rsc_id is a (const char *) but lrmd_free_event() frees it
CRM_ASSERT(rsc_id != NULL);
op->rsc_id = pcmk__str_copy(rsc_id);
}
if (to_sys == NULL) {
to_sys = CRM_SYSTEM_TENGINE;
}
peer = pcmk__get_node(0, controld_globals.our_nodename, NULL,
- pcmk__node_search_cluster);
+ pcmk__node_search_cluster_member);
update = create_node_state_update(peer, node_update_none, NULL,
__func__);
iter = pcmk__xe_create(update, PCMK__XE_LRM);
crm_xml_add(iter, PCMK_XA_ID, controld_globals.our_uuid);
iter = pcmk__xe_create(iter, PCMK__XE_LRM_RESOURCES);
iter = pcmk__xe_create(iter, PCMK__XE_LRM_RESOURCE);
crm_xml_add(iter, PCMK_XA_ID, op->rsc_id);
controld_add_resource_history_xml(iter, rsc, op,
controld_globals.our_nodename);
reply = create_request(CRM_OP_INVOKE_LRM, update, to_host, to_sys, CRM_SYSTEM_LRMD, NULL);
crm_log_xml_trace(update, "[direct ACK]");
crm_debug("ACK'ing resource op " PCMK__OP_FMT " from %s: %s",
op->rsc_id, op->op_type, op->interval_ms, op->user_data,
crm_element_value(reply, PCMK_XA_REFERENCE));
if (relay_message(reply, TRUE) == FALSE) {
crm_log_xml_err(reply, "Unable to route reply");
}
free_xml(update);
free_xml(reply);
}
gboolean
verify_stopped(enum crmd_fsa_state cur_state, int log_level)
{
gboolean res = TRUE;
GList *lrm_state_list = lrm_state_get_list();
GList *state_entry;
for (state_entry = lrm_state_list; state_entry != NULL; state_entry = state_entry->next) {
lrm_state_t *lrm_state = state_entry->data;
if (!lrm_state_verify_stopped(lrm_state, cur_state, log_level)) {
/* keep iterating through all even when false is returned */
res = FALSE;
}
}
controld_set_fsa_input_flags(R_SENT_RSC_STOP);
g_list_free(lrm_state_list); lrm_state_list = NULL;
return res;
}
struct stop_recurring_action_s {
lrmd_rsc_info_t *rsc;
lrm_state_t *lrm_state;
};
static gboolean
stop_recurring_action_by_rsc(gpointer key, gpointer value, gpointer user_data)
{
gboolean remove = FALSE;
struct stop_recurring_action_s *event = user_data;
active_op_t *op = value;
if ((op->interval_ms != 0)
&& pcmk__str_eq(op->rsc_id, event->rsc->id, pcmk__str_none)) {
crm_debug("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id, (char*)key);
remove = !cancel_op(event->lrm_state, event->rsc->id, key, op->call_id, FALSE);
}
return remove;
}
static gboolean
stop_recurring_actions(gpointer key, gpointer value, gpointer user_data)
{
gboolean remove = FALSE;
lrm_state_t *lrm_state = user_data;
active_op_t *op = value;
if (op->interval_ms != 0) {
crm_info("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id,
(const char *) key);
remove = !cancel_op(lrm_state, op->rsc_id, key, op->call_id, FALSE);
}
return remove;
}
/*!
* \internal
* \brief Check whether recurring actions should be cancelled before an action
*
* \param[in] rsc_id Resource that action is for
* \param[in] action Action being performed
* \param[in] interval_ms Operation interval of \p action (in milliseconds)
*
* \return true if recurring actions should be cancelled, otherwise false
*/
static bool
should_cancel_recurring(const char *rsc_id, const char *action, guint interval_ms)
{
if (is_remote_lrmd_ra(NULL, NULL, rsc_id) && (interval_ms == 0)
&& (strcmp(action, PCMK_ACTION_MIGRATE_TO) == 0)) {
/* Don't stop monitoring a migrating Pacemaker Remote connection
* resource until the entire migration has completed. We must detect if
* the connection is unexpectedly severed, even during a migration.
*/
return false;
}
// Cancel recurring actions before changing resource state
return (interval_ms == 0)
&& !pcmk__str_any_of(action, PCMK_ACTION_MONITOR,
PCMK_ACTION_NOTIFY, NULL);
}
/*!
* \internal
* \brief Check whether an action should not be performed at this time
*
* \param[in] operation Action to be performed
*
* \return Readable description of why action should not be performed,
* or NULL if it should be performed
*/
static const char *
should_nack_action(const char *action)
{
if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)
&& pcmk__str_eq(action, PCMK_ACTION_START, pcmk__str_none)) {
register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL);
return "Not attempting start due to shutdown in progress";
}
switch (controld_globals.fsa_state) {
case S_NOT_DC:
case S_POLICY_ENGINE: // Recalculating
case S_TRANSITION_ENGINE:
break;
default:
if (!pcmk__str_eq(action, PCMK_ACTION_STOP, pcmk__str_none)) {
return "Controller cannot attempt actions at this time";
}
break;
}
return NULL;
}
static void
do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, xmlNode *msg,
struct ra_metadata_s *md)
{
int rc;
int call_id = 0;
char *op_id = NULL;
lrmd_event_data_t *op = NULL;
fsa_data_t *msg_data = NULL;
const char *transition = NULL;
const char *operation = NULL;
const char *nack_reason = NULL;
CRM_CHECK((rsc != NULL) && (msg != NULL), return);
operation = crm_element_value(msg, PCMK_XA_OPERATION);
CRM_CHECK(!pcmk__str_empty(operation), return);
transition = crm_element_value(msg, PCMK__XA_TRANSITION_KEY);
if (pcmk__str_empty(transition)) {
crm_log_xml_err(msg, "Missing transition number");
}
if (lrm_state == NULL) {
// This shouldn't be possible, but provide a failsafe just in case
crm_err("Cannot execute %s of %s: No executor connection "
CRM_XS " transition_key=%s",
operation, rsc->id, pcmk__s(transition, ""));
synthesize_lrmd_failure(NULL, msg, PCMK_EXEC_INVALID,
PCMK_OCF_UNKNOWN_ERROR,
"No executor connection");
return;
}
if (pcmk__str_any_of(operation, PCMK_ACTION_RELOAD,
PCMK_ACTION_RELOAD_AGENT, NULL)) {
/* Pre-2.1.0 DCs will schedule reload actions only, and 2.1.0+ DCs
* will schedule reload-agent actions only. In either case, we need
* to map that to whatever the resource agent actually supports.
* Default to the OCF 1.1 name.
*/
if ((md != NULL)
&& pcmk_is_set(md->ra_flags, ra_supports_legacy_reload)) {
operation = PCMK_ACTION_RELOAD;
} else {
operation = PCMK_ACTION_RELOAD_AGENT;
}
}
op = construct_op(lrm_state, msg, rsc->id, operation);
CRM_CHECK(op != NULL, return);
if (should_cancel_recurring(rsc->id, operation, op->interval_ms)) {
guint removed = 0;
struct stop_recurring_action_s data;
data.rsc = rsc;
data.lrm_state = lrm_state;
removed = g_hash_table_foreach_remove(lrm_state->active_ops,
stop_recurring_action_by_rsc,
&data);
if (removed) {
crm_debug("Stopped %u recurring operation%s in preparation for "
PCMK__OP_FMT, removed, pcmk__plural_s(removed),
rsc->id, operation, op->interval_ms);
}
}
/* now do the op */
crm_notice("Requesting local execution of %s operation for %s on %s "
CRM_XS " transition_key=%s op_key=" PCMK__OP_FMT,
pcmk__readable_action(op->op_type, op->interval_ms), rsc->id,
lrm_state->node_name, pcmk__s(transition, ""), rsc->id,
operation, op->interval_ms);
nack_reason = should_nack_action(operation);
if (nack_reason != NULL) {
crm_notice("Discarding attempt to perform action %s on %s in state %s "
"(shutdown=%s)", operation, rsc->id,
fsa_state2string(controld_globals.fsa_state),
pcmk__flag_text(controld_globals.fsa_input_register,
R_SHUTDOWN));
lrmd__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_INVALID,
nack_reason);
controld_ack_event_directly(NULL, NULL, rsc, op, rsc->id);
lrmd_free_event(op);
free(op_id);
return;
}
controld_record_pending_op(lrm_state->node_name, rsc, op);
op_id = pcmk__op_key(rsc->id, op->op_type, op->interval_ms);
if (op->interval_ms > 0) {
/* cancel it so we can then restart it without conflict */
cancel_op_key(lrm_state, rsc, op_id, FALSE);
}
rc = controld_execute_resource_agent(lrm_state, rsc->id, op->op_type,
op->user_data, op->interval_ms,
op->timeout, op->start_delay,
op->params, &call_id);
if (rc == pcmk_rc_ok) {
/* record all operations so we can wait
* for them to complete during shutdown
*/
char *call_id_s = make_stop_id(rsc->id, call_id);
active_op_t *pending = NULL;
pending = pcmk__assert_alloc(1, sizeof(active_op_t));
crm_trace("Recording pending op: %d - %s %s", call_id, op_id, call_id_s);
pending->call_id = call_id;
pending->interval_ms = op->interval_ms;
pending->op_type = pcmk__str_copy(operation);
pending->op_key = pcmk__str_copy(op_id);
pending->rsc_id = pcmk__str_copy(rsc->id);
pending->start_time = time(NULL);
pending->user_data = pcmk__str_copy(op->user_data);
if (crm_element_value_epoch(msg, PCMK_OPT_SHUTDOWN_LOCK,
&(pending->lock_time)) != pcmk_ok) {
pending->lock_time = 0;
}
g_hash_table_replace(lrm_state->active_ops, call_id_s, pending);
if ((op->interval_ms > 0)
&& (op->start_delay > START_DELAY_THRESHOLD)) {
int target_rc = PCMK_OCF_OK;
crm_info("Faking confirmation of %s: execution postponed for over 5 minutes", op_id);
decode_transition_key(op->user_data, NULL, NULL, NULL, &target_rc);
lrmd__set_result(op, target_rc, PCMK_EXEC_DONE, NULL);
controld_ack_event_directly(NULL, NULL, rsc, op, rsc->id);
}
pending->params = op->params;
op->params = NULL;
} else if (lrm_state_is_local(lrm_state)) {
crm_err("Could not initiate %s action for resource %s locally: %s "
CRM_XS " rc=%d", operation, rsc->id, pcmk_rc_str(rc), rc);
fake_op_status(lrm_state, op, PCMK_EXEC_NOT_CONNECTED,
PCMK_OCF_UNKNOWN_ERROR, pcmk_rc_str(rc));
process_lrm_event(lrm_state, op, NULL, NULL);
register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
} else {
crm_err("Could not initiate %s action for resource %s remotely on %s: "
"%s " CRM_XS " rc=%d",
operation, rsc->id, lrm_state->node_name, pcmk_rc_str(rc), rc);
fake_op_status(lrm_state, op, PCMK_EXEC_NOT_CONNECTED,
PCMK_OCF_UNKNOWN_ERROR, pcmk_rc_str(rc));
process_lrm_event(lrm_state, op, NULL, NULL);
}
free(op_id);
lrmd_free_event(op);
}
void
do_lrm_event(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t * msg_data)
{
CRM_CHECK(FALSE, return);
}
static char *
unescape_newlines(const char *string)
{
char *pch = NULL;
char *ret = NULL;
static const char *escaped_newline = "\\n";
if (!string) {
return NULL;
}
ret = pcmk__str_copy(string);
pch = strstr(ret, escaped_newline);
while (pch != NULL) {
/* Replace newline escape pattern with actual newline (and a space so we
* don't have to shuffle the rest of the buffer)
*/
pch[0] = '\n';
pch[1] = ' ';
pch = strstr(pch, escaped_newline);
}
return ret;
}
static bool
did_lrm_rsc_op_fail(lrm_state_t *lrm_state, const char * rsc_id,
const char * op_type, guint interval_ms)
{
rsc_history_t *entry = NULL;
CRM_CHECK(lrm_state != NULL, return FALSE);
CRM_CHECK(rsc_id != NULL, return FALSE);
CRM_CHECK(op_type != NULL, return FALSE);
entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
if (entry == NULL || entry->failed == NULL) {
return FALSE;
}
if (pcmk__str_eq(entry->failed->rsc_id, rsc_id, pcmk__str_none)
&& pcmk__str_eq(entry->failed->op_type, op_type, pcmk__str_casei)
&& entry->failed->interval_ms == interval_ms) {
return TRUE;
}
return FALSE;
}
/*!
* \internal
* \brief Log the result of an executor action (actual or synthesized)
*
* \param[in] op Executor action to log result for
* \param[in] op_key Operation key for action
* \param[in] node_name Name of node action was performed on, if known
* \param[in] confirmed Whether to log that graph action was confirmed
*/
static void
log_executor_event(const lrmd_event_data_t *op, const char *op_key,
const char *node_name, gboolean confirmed)
{
int log_level = LOG_ERR;
GString *str = g_string_sized_new(100); // reasonable starting size
pcmk__g_strcat(str,
"Result of ",
pcmk__readable_action(op->op_type, op->interval_ms),
" operation for ", op->rsc_id, NULL);
if (node_name != NULL) {
pcmk__g_strcat(str, " on ", node_name, NULL);
}
switch (op->op_status) {
case PCMK_EXEC_DONE:
log_level = LOG_NOTICE;
pcmk__g_strcat(str, ": ", services_ocf_exitcode_str(op->rc), NULL);
break;
case PCMK_EXEC_TIMEOUT:
pcmk__g_strcat(str,
": ", pcmk_exec_status_str(op->op_status), " after ",
pcmk__readable_interval(op->timeout), NULL);
break;
case PCMK_EXEC_CANCELLED:
log_level = LOG_INFO;
/* order of __attribute__ and Fall through comment is IMPORTANT!
* do not change it without proper testing with both clang and gcc
* in multiple versions.
* the clang check allows to build with all versions of clang.
* the has_c_attribute check is to workaround a bug in clang version
* in rhel7. has_attribute would happily return "YES SIR WE GOT IT"
* and fail the build the next line.
*/
#ifdef __clang__
#ifdef __has_c_attribute
#if __has_attribute(fallthrough)
__attribute__((fallthrough));
#endif
#endif
#endif
// Fall through
default:
pcmk__g_strcat(str, ": ", pcmk_exec_status_str(op->op_status),
NULL);
}
if ((op->exit_reason != NULL)
&& ((op->op_status != PCMK_EXEC_DONE) || (op->rc != PCMK_OCF_OK))) {
pcmk__g_strcat(str, " (", op->exit_reason, ")", NULL);
}
g_string_append(str, " " CRM_XS);
g_string_append_printf(str, " graph action %sconfirmed; call=%d key=%s",
(confirmed? "" : "un"), op->call_id, op_key);
if (op->op_status == PCMK_EXEC_DONE) {
g_string_append_printf(str, " rc=%d", op->rc);
}
do_crm_log(log_level, "%s", str->str);
g_string_free(str, TRUE);
/* The services library has already logged the output at info or debug
* level, so just raise to notice if it looks like a failure.
*/
if ((op->output != NULL) && (op->rc != PCMK_OCF_OK)) {
char *prefix = crm_strdup_printf(PCMK__OP_FMT "@%s output",
op->rsc_id, op->op_type,
op->interval_ms, node_name);
crm_log_output(LOG_NOTICE, prefix, op->output);
free(prefix);
}
}
void
process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op,
active_op_t *pending, const xmlNode *action_xml)
{
char *op_id = NULL;
char *op_key = NULL;
gboolean remove = FALSE;
gboolean removed = FALSE;
bool need_direct_ack = FALSE;
lrmd_rsc_info_t *rsc = NULL;
const char *node_name = NULL;
CRM_CHECK(op != NULL, return);
CRM_CHECK(op->rsc_id != NULL, return);
// Remap new status codes for older DCs
if (compare_version(controld_globals.dc_version, "3.2.0") < 0) {
switch (op->op_status) {
case PCMK_EXEC_NOT_CONNECTED:
lrmd__set_result(op, PCMK_OCF_CONNECTION_DIED,
PCMK_EXEC_ERROR, op->exit_reason);
break;
case PCMK_EXEC_INVALID:
lrmd__set_result(op, CRM_DIRECT_NACK_RC, PCMK_EXEC_ERROR,
op->exit_reason);
break;
default:
break;
}
}
op_id = make_stop_id(op->rsc_id, op->call_id);
op_key = pcmk__op_key(op->rsc_id, op->op_type, op->interval_ms);
// Get resource info if available (from executor state or action XML)
if (lrm_state) {
rsc = lrm_state_get_rsc_info(lrm_state, op->rsc_id, 0);
}
if ((rsc == NULL) && action_xml) {
xmlNode *xml = pcmk__xe_first_child(action_xml, PCMK_XE_PRIMITIVE, NULL,
NULL);
const char *standard = crm_element_value(xml, PCMK_XA_CLASS);
const char *provider = crm_element_value(xml, PCMK_XA_PROVIDER);
const char *type = crm_element_value(xml, PCMK_XA_TYPE);
if (standard && type) {
crm_info("%s agent information not cached, using %s%s%s:%s from action XML",
op->rsc_id, standard,
(provider? ":" : ""), (provider? provider : ""), type);
rsc = lrmd_new_rsc_info(op->rsc_id, standard, provider, type);
} else {
crm_err("Can't process %s result because %s agent information not cached or in XML",
op_key, op->rsc_id);
}
}
// Get node name if available (from executor state or action XML)
if (lrm_state) {
node_name = lrm_state->node_name;
} else if (action_xml) {
node_name = crm_element_value(action_xml, PCMK__META_ON_NODE);
}
if(pending == NULL) {
remove = TRUE;
if (lrm_state) {
pending = g_hash_table_lookup(lrm_state->active_ops, op_id);
}
}
if (op->op_status == PCMK_EXEC_ERROR) {
switch(op->rc) {
case PCMK_OCF_NOT_RUNNING:
case PCMK_OCF_RUNNING_PROMOTED:
case PCMK_OCF_DEGRADED:
case PCMK_OCF_DEGRADED_PROMOTED:
// Leave it to the TE/scheduler to decide if this is an error
op->op_status = PCMK_EXEC_DONE;
break;
default:
/* Nothing to do */
break;
}
}
if (op->op_status != PCMK_EXEC_CANCELLED) {
/* We might not record the result, so directly acknowledge it to the
* originator instead, so it doesn't time out waiting for the result
* (especially important if part of a transition).
*/
need_direct_ack = TRUE;
if (controld_action_is_recordable(op->op_type)) {
if (node_name && rsc) {
// We should record the result, and happily, we can
time_t lock_time = (pending == NULL)? 0 : pending->lock_time;
controld_update_resource_history(node_name, rsc, op, lock_time);
need_direct_ack = FALSE;
} else if (op->rsc_deleted) {
/* We shouldn't record the result (likely the resource was
* refreshed, cleaned, or removed while this operation was
* in flight).
*/
crm_notice("Not recording %s result in CIB because "
"resource information was removed since it was initiated",
op_key);
} else {
/* This shouldn't be possible; the executor didn't consider the
* resource deleted, but we couldn't find resource or node
* information.
*/
crm_err("Unable to record %s result in CIB: %s", op_key,
(node_name? "No resource information" : "No node name"));
}
}
} else if (op->interval_ms == 0) {
/* A non-recurring operation was cancelled. Most likely, the
* never-initiated action was removed from the executor's pending
* operations list upon resource removal.
*/
need_direct_ack = TRUE;
} else if (pending == NULL) {
/* This recurring operation was cancelled, but was not pending. No
* transition actions are waiting on it, nothing needs to be done.
*/
} else if (op->user_data == NULL) {
/* This recurring operation was cancelled and pending, but we don't
* have a transition key. This should never happen.
*/
crm_err("Recurring operation %s was cancelled without transition information",
op_key);
} else if (pcmk_is_set(pending->flags, active_op_remove)) {
/* This recurring operation was cancelled (by us) and pending, and we
* have been waiting for it to finish.
*/
if (lrm_state) {
controld_delete_action_history(op);
}
/* Directly acknowledge failed recurring actions here. The above call to
* controld_delete_action_history() will not erase any corresponding
* last_failure entry, which means that the DC won't confirm the
* cancellation via process_op_deletion(), and the transition would
* otherwise wait for the action timer to pop.
*/
if (did_lrm_rsc_op_fail(lrm_state, pending->rsc_id,
pending->op_type, pending->interval_ms)) {
need_direct_ack = TRUE;
}
} else if (op->rsc_deleted) {
/* This recurring operation was cancelled (but not by us, and the
* executor does not have resource information, likely due to resource
* cleanup, refresh, or removal) and pending.
*/
crm_debug("Recurring op %s was cancelled due to resource deletion",
op_key);
need_direct_ack = TRUE;
} else {
/* This recurring operation was cancelled (but not by us, likely by the
* executor before stopping the resource) and pending. We don't need to
* do anything special.
*/
}
if (need_direct_ack) {
controld_ack_event_directly(NULL, NULL, NULL, op, op->rsc_id);
}
if(remove == FALSE) {
/* The caller will do this afterwards, but keep the logging consistent */
removed = TRUE;
} else if (lrm_state && ((op->interval_ms == 0)
|| (op->op_status == PCMK_EXEC_CANCELLED))) {
gboolean found = g_hash_table_remove(lrm_state->active_ops, op_id);
if (op->interval_ms != 0) {
removed = TRUE;
} else if (found) {
removed = TRUE;
crm_trace("Op %s (call=%d, stop-id=%s, remaining=%u): Confirmed",
op_key, op->call_id, op_id,
g_hash_table_size(lrm_state->active_ops));
}
}
log_executor_event(op, op_key, node_name, removed);
if (lrm_state) {
if (!pcmk__str_eq(op->op_type, PCMK_ACTION_META_DATA,
pcmk__str_casei)) {
crmd_alert_resource_op(lrm_state->node_name, op);
} else if (rsc && (op->rc == PCMK_OCF_OK)) {
char *metadata = unescape_newlines(op->output);
controld_cache_metadata(lrm_state->metadata_cache, rsc, metadata);
free(metadata);
}
}
if (op->rsc_deleted) {
crm_info("Deletion of resource '%s' complete after %s", op->rsc_id, op_key);
if (lrm_state) {
delete_rsc_entry(lrm_state, NULL, op->rsc_id, NULL, pcmk_ok, NULL,
true);
}
}
/* If a shutdown was escalated while operations were pending,
* then the FSA will be stalled right now... allow it to continue
*/
controld_trigger_fsa();
if (lrm_state && rsc) {
update_history_cache(lrm_state, rsc, op);
}
lrmd_free_rsc_info(rsc);
free(op_key);
free(op_id);
}
diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c
index 9f67ccce07..652672b63e 100644
--- a/daemons/controld/controld_fencing.c
+++ b/daemons/controld/controld_fencing.c
@@ -1,1119 +1,1120 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <crm/crm.h>
#include <crm/common/xml.h>
#include <crm/stonith-ng.h>
#include <crm/fencing/internal.h>
#include <pacemaker-controld.h>
static void
tengine_stonith_history_synced(stonith_t *st, stonith_event_t *st_event);
/*
* stonith failure counting
*
* We don't want to get stuck in a permanent fencing loop. Keep track of the
* number of fencing failures for each target node, and the most we'll restart a
* transition for.
*/
struct st_fail_rec {
int count;
};
static bool fence_reaction_panic = false;
static unsigned long int stonith_max_attempts = 10;
static GHashTable *stonith_failures = NULL;
/*!
* \internal
* \brief Update max fencing attempts before giving up
*
* \param[in] value New max fencing attempts
*/
static void
update_stonith_max_attempts(const char *value)
{
stonith_max_attempts = char2score(value);
if (stonith_max_attempts < 1UL) {
stonith_max_attempts = 10UL;
}
}
/*!
* \internal
* \brief Configure reaction to notification of local node being fenced
*
* \param[in] reaction_s Reaction type
*/
static void
set_fence_reaction(const char *reaction_s)
{
if (pcmk__str_eq(reaction_s, "panic", pcmk__str_casei)) {
fence_reaction_panic = true;
} else {
if (!pcmk__str_eq(reaction_s, PCMK_VALUE_STOP, pcmk__str_casei)) {
crm_warn("Invalid value '%s' for %s, using 'stop'",
reaction_s, PCMK_OPT_FENCE_REACTION);
}
fence_reaction_panic = false;
}
}
/*!
* \internal
* \brief Configure fencing options based on the CIB
*
* \param[in,out] options Name/value pairs for configured options
*/
void
controld_configure_fencing(GHashTable *options)
{
const char *value = NULL;
value = g_hash_table_lookup(options, PCMK_OPT_FENCE_REACTION);
set_fence_reaction(value);
value = g_hash_table_lookup(options, PCMK_OPT_STONITH_MAX_ATTEMPTS);
update_stonith_max_attempts(value);
}
static gboolean
too_many_st_failures(const char *target)
{
GHashTableIter iter;
const char *key = NULL;
struct st_fail_rec *value = NULL;
if (stonith_failures == NULL) {
return FALSE;
}
if (target == NULL) {
g_hash_table_iter_init(&iter, stonith_failures);
while (g_hash_table_iter_next(&iter, (gpointer *) &key,
(gpointer *) &value)) {
if (value->count >= stonith_max_attempts) {
target = (const char*)key;
goto too_many;
}
}
} else {
value = g_hash_table_lookup(stonith_failures, target);
if ((value != NULL) && (value->count >= stonith_max_attempts)) {
goto too_many;
}
}
return FALSE;
too_many:
crm_warn("Too many failures (%d) to fence %s, giving up",
value->count, target);
return TRUE;
}
/*!
* \internal
* \brief Reset a stonith fail count
*
* \param[in] target Name of node to reset, or NULL for all
*/
void
st_fail_count_reset(const char *target)
{
if (stonith_failures == NULL) {
return;
}
if (target) {
struct st_fail_rec *rec = NULL;
rec = g_hash_table_lookup(stonith_failures, target);
if (rec) {
rec->count = 0;
}
} else {
GHashTableIter iter;
const char *key = NULL;
struct st_fail_rec *rec = NULL;
g_hash_table_iter_init(&iter, stonith_failures);
while (g_hash_table_iter_next(&iter, (gpointer *) &key,
(gpointer *) &rec)) {
rec->count = 0;
}
}
}
static void
st_fail_count_increment(const char *target)
{
struct st_fail_rec *rec = NULL;
if (stonith_failures == NULL) {
stonith_failures = pcmk__strkey_table(free, free);
}
rec = g_hash_table_lookup(stonith_failures, target);
if (rec) {
rec->count++;
} else {
rec = malloc(sizeof(struct st_fail_rec));
if(rec == NULL) {
return;
}
rec->count = 1;
g_hash_table_insert(stonith_failures, pcmk__str_copy(target), rec);
}
}
/* end stonith fail count functions */
static void
cib_fencing_updated(xmlNode *msg, int call_id, int rc, xmlNode *output,
void *user_data)
{
if (rc < pcmk_ok) {
crm_err("Fencing update %d for %s: failed - %s (%d)",
call_id, (char *)user_data, pcmk_strerror(rc), rc);
crm_log_xml_warn(msg, "Failed update");
abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_shutdown,
"CIB update failed", NULL);
} else {
crm_info("Fencing update %d for %s: complete", call_id, (char *)user_data);
}
}
static void
send_stonith_update(pcmk__graph_action_t *action, const char *target,
const char *uuid)
{
int rc = pcmk_ok;
crm_node_t *peer = NULL;
/* We (usually) rely on the membership layer to do node_update_cluster,
* and the peer status callback to do node_update_peer, because the node
* might have already rejoined before we get the stonith result here.
*/
int flags = node_update_join | node_update_expected;
/* zero out the node-status & remove all LRM status info */
xmlNode *node_state = NULL;
CRM_CHECK(target != NULL, return);
CRM_CHECK(uuid != NULL, return);
/* Make sure the membership and join caches are accurate.
* Try getting any existing node cache entry also by node uuid in case it
* doesn't have an uname yet.
*/
peer = pcmk__get_node(0, target, uuid, pcmk__node_search_any);
CRM_CHECK(peer != NULL, return);
if (peer->state == NULL) {
/* Usually, we rely on the membership layer to update the cluster state
* in the CIB. However, if the node has never been seen, do it here, so
* the node is not considered unclean.
*/
flags |= node_update_cluster;
}
if (peer->uuid == NULL) {
crm_info("Recording uuid '%s' for node '%s'", uuid, target);
peer->uuid = pcmk__str_copy(uuid);
}
crmd_peer_down(peer, TRUE);
/* Generate a node state update for the CIB */
node_state = create_node_state_update(peer, flags, NULL, __func__);
/* we have to mark whether or not remote nodes have already been fenced */
if (peer->flags & crm_remote_node) {
char *now_s = pcmk__ttoa(time(NULL));
crm_xml_add(node_state, PCMK__XA_NODE_FENCED, now_s);
free(now_s);
}
/* Force our known ID */
crm_xml_add(node_state, PCMK_XA_ID, uuid);
rc = controld_globals.cib_conn->cmds->modify(controld_globals.cib_conn,
PCMK_XE_STATUS, node_state,
cib_scope_local
|cib_can_create);
/* Delay processing the trigger until the update completes */
crm_debug("Sending fencing update %d for %s", rc, target);
fsa_register_cib_callback(rc, pcmk__str_copy(target), cib_fencing_updated);
// Make sure it sticks
/* controld_globals.cib_conn->cmds->bump_epoch(controld_globals.cib_conn,
* cib_scope_local);
*/
controld_delete_node_state(peer->uname, controld_section_all,
cib_scope_local);
free_xml(node_state);
return;
}
/*!
* \internal
* \brief Abort transition due to stonith failure
*
* \param[in] abort_action Whether to restart or stop transition
* \param[in] target Don't restart if this (NULL for any) has too many failures
* \param[in] reason Log this stonith action XML as abort reason (or NULL)
*/
static void
abort_for_stonith_failure(enum pcmk__graph_next abort_action,
const char *target, const xmlNode *reason)
{
/* If stonith repeatedly fails, we eventually give up on starting a new
* transition for that reason.
*/
if ((abort_action != pcmk__graph_wait) && too_many_st_failures(target)) {
abort_action = pcmk__graph_wait;
}
abort_transition(PCMK_SCORE_INFINITY, abort_action, "Stonith failed",
reason);
}
/*
* stonith cleanup list
*
* If the DC is shot, proper notifications might not go out.
* The stonith cleanup list allows the cluster to (re-)send
* notifications once a new DC is elected.
*/
static GList *stonith_cleanup_list = NULL;
/*!
* \internal
* \brief Add a node to the stonith cleanup list
*
* \param[in] target Name of node to add
*/
void
add_stonith_cleanup(const char *target) {
stonith_cleanup_list = g_list_append(stonith_cleanup_list,
pcmk__str_copy(target));
}
/*!
* \internal
* \brief Remove a node from the stonith cleanup list
*
* \param[in] Name of node to remove
*/
void
remove_stonith_cleanup(const char *target)
{
GList *iter = stonith_cleanup_list;
while (iter != NULL) {
GList *tmp = iter;
char *iter_name = tmp->data;
iter = iter->next;
if (pcmk__str_eq(target, iter_name, pcmk__str_casei)) {
crm_trace("Removing %s from the cleanup list", iter_name);
stonith_cleanup_list = g_list_delete_link(stonith_cleanup_list, tmp);
free(iter_name);
}
}
}
/*!
* \internal
* \brief Purge all entries from the stonith cleanup list
*/
void
purge_stonith_cleanup(void)
{
if (stonith_cleanup_list) {
GList *iter = NULL;
for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) {
char *target = iter->data;
crm_info("Purging %s from stonith cleanup list", target);
free(target);
}
g_list_free(stonith_cleanup_list);
stonith_cleanup_list = NULL;
}
}
/*!
* \internal
* \brief Send stonith updates for all entries in cleanup list, then purge it
*/
void
execute_stonith_cleanup(void)
{
GList *iter;
for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) {
char *target = iter->data;
- crm_node_t *target_node = pcmk__get_node(0, target, NULL,
- pcmk__node_search_cluster);
+ crm_node_t *target_node =
+ pcmk__get_node(0, target, NULL, pcmk__node_search_cluster_member);
const char *uuid = crm_peer_uuid(target_node);
crm_notice("Marking %s, target of a previous stonith action, as clean", target);
send_stonith_update(NULL, target, uuid);
free(target);
}
g_list_free(stonith_cleanup_list);
stonith_cleanup_list = NULL;
}
/* end stonith cleanup list functions */
/* stonith API client
*
* Functions that need to interact directly with the fencer via its API
*/
static stonith_t *stonith_api = NULL;
static mainloop_timer_t *controld_fencer_connect_timer = NULL;
static char *te_client_id = NULL;
static gboolean
fail_incompletable_stonith(pcmk__graph_t *graph)
{
GList *lpc = NULL;
const char *task = NULL;
xmlNode *last_action = NULL;
if (graph == NULL) {
return FALSE;
}
for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) {
GList *lpc2 = NULL;
pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) lpc->data;
if (pcmk_is_set(synapse->flags, pcmk__synapse_confirmed)) {
continue;
}
for (lpc2 = synapse->actions; lpc2 != NULL; lpc2 = lpc2->next) {
pcmk__graph_action_t *action = (pcmk__graph_action_t *) lpc2->data;
if ((action->type != pcmk__cluster_graph_action)
|| pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
continue;
}
task = crm_element_value(action->xml, PCMK_XA_OPERATION);
if (pcmk__str_eq(task, PCMK_ACTION_STONITH, pcmk__str_casei)) {
pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);
last_action = action->xml;
pcmk__update_graph(graph, action);
crm_notice("Failing action %d (%s): fencer terminated",
action->id, pcmk__xe_id(action->xml));
}
}
}
if (last_action != NULL) {
crm_warn("Fencer failure resulted in unrunnable actions");
abort_for_stonith_failure(pcmk__graph_restart, NULL, last_action);
return TRUE;
}
return FALSE;
}
static void
tengine_stonith_connection_destroy(stonith_t *st, stonith_event_t *e)
{
te_cleanup_stonith_history_sync(st, FALSE);
if (pcmk_is_set(controld_globals.fsa_input_register, R_ST_REQUIRED)) {
crm_err("Lost fencer connection (will attempt to reconnect)");
if (!mainloop_timer_running(controld_fencer_connect_timer)) {
mainloop_timer_start(controld_fencer_connect_timer);
}
} else {
crm_info("Disconnected from fencer");
}
if (stonith_api) {
/* the client API won't properly reconnect notifications
* if they are still in the table - so remove them
*/
if (stonith_api->state != stonith_disconnected) {
stonith_api->cmds->disconnect(st);
}
stonith_api->cmds->remove_notification(stonith_api, NULL);
}
if (AM_I_DC) {
fail_incompletable_stonith(controld_globals.transition_graph);
trigger_graph();
}
}
/*!
* \internal
* \brief Handle an event notification from the fencing API
*
* \param[in] st Fencing API connection (ignored)
* \param[in] event Fencing API event notification
*/
static void
handle_fence_notification(stonith_t *st, stonith_event_t *event)
{
bool succeeded = true;
const char *executioner = "the cluster";
const char *client = "a client";
const char *reason = NULL;
int exec_status;
if (te_client_id == NULL) {
te_client_id = crm_strdup_printf("%s.%lu", crm_system_name,
(unsigned long) getpid());
}
if (event == NULL) {
crm_err("Notify data not found");
return;
}
if (event->executioner != NULL) {
executioner = event->executioner;
}
if (event->client_origin != NULL) {
client = event->client_origin;
}
exec_status = stonith__event_execution_status(event);
if ((stonith__event_exit_status(event) != CRM_EX_OK)
|| (exec_status != PCMK_EXEC_DONE)) {
succeeded = false;
if (exec_status == PCMK_EXEC_DONE) {
exec_status = PCMK_EXEC_ERROR;
}
}
reason = stonith__event_exit_reason(event);
crmd_alert_fencing_op(event);
if (pcmk__str_eq(PCMK_ACTION_ON, event->action, pcmk__str_none)) {
// Unfencing doesn't need special handling, just a log message
if (succeeded) {
crm_notice("%s was unfenced by %s at the request of %s@%s",
event->target, executioner, client, event->origin);
} else {
crm_err("Unfencing of %s by %s failed (%s%s%s) with exit status %d",
event->target, executioner,
pcmk_exec_status_str(exec_status),
((reason == NULL)? "" : ": "),
((reason == NULL)? "" : reason),
stonith__event_exit_status(event));
}
return;
}
if (succeeded
&& pcmk__str_eq(event->target, controld_globals.our_nodename,
pcmk__str_casei)) {
/* We were notified of our own fencing. Most likely, either fencing was
* misconfigured, or fabric fencing that doesn't cut cluster
* communication is in use.
*
* Either way, shutting down the local host is a good idea, to require
* administrator intervention. Also, other nodes would otherwise likely
* set our status to lost because of the fencing callback and discard
* our subsequent election votes as "not part of our cluster".
*/
crm_crit("We were allegedly just fenced by %s for %s!",
executioner, event->origin); // Dumps blackbox if enabled
if (fence_reaction_panic) {
pcmk__panic(__func__);
} else {
crm_exit(CRM_EX_FATAL);
}
return; // Should never get here
}
/* Update the count of fencing failures for this target, in case we become
* DC later. The current DC has already updated its fail count in
* tengine_stonith_callback().
*/
if (!AM_I_DC) {
if (succeeded) {
st_fail_count_reset(event->target);
} else {
st_fail_count_increment(event->target);
}
}
crm_notice("Peer %s was%s terminated (%s) by %s on behalf of %s@%s: "
"%s%s%s%s " CRM_XS " event=%s",
event->target, (succeeded? "" : " not"),
event->action, executioner, client, event->origin,
(succeeded? "OK" : pcmk_exec_status_str(exec_status)),
((reason == NULL)? "" : " ("),
((reason == NULL)? "" : reason),
((reason == NULL)? "" : ")"),
event->id);
if (succeeded) {
- crm_node_t *peer = pcmk__search_node_caches(0, event->target,
- pcmk__node_search_any
- |pcmk__node_search_known);
+ const uint32_t flags = pcmk__node_search_any
+ |pcmk__node_search_cluster_cib;
+
+ crm_node_t *peer = pcmk__search_node_caches(0, event->target, flags);
const char *uuid = NULL;
if (peer == NULL) {
return;
}
uuid = crm_peer_uuid(peer);
if (AM_I_DC) {
/* The DC always sends updates */
send_stonith_update(NULL, event->target, uuid);
/* @TODO Ideally, at this point, we'd check whether the fenced node
* hosted any guest nodes, and call remote_node_down() for them.
* Unfortunately, the controller doesn't have a simple, reliable way
* to map hosts to guests. It might be possible to track this in the
* peer cache via refresh_remote_nodes(). For now, we rely on the
* scheduler creating fence pseudo-events for the guests.
*/
if (!pcmk__str_eq(client, te_client_id, pcmk__str_casei)) {
/* Abort the current transition if it wasn't the cluster that
* initiated fencing.
*/
crm_info("External fencing operation from %s fenced %s",
client, event->target);
abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
"External Fencing Operation", NULL);
}
} else if (pcmk__str_eq(controld_globals.dc_name, event->target,
pcmk__str_null_matches|pcmk__str_casei)
&& !pcmk_is_set(peer->flags, crm_remote_node)) {
// Assume the target was our DC if we don't currently have one
if (controld_globals.dc_name != NULL) {
crm_notice("Fencing target %s was our DC", event->target);
} else {
crm_notice("Fencing target %s may have been our DC",
event->target);
}
/* Given the CIB resyncing that occurs around elections,
* have one node update the CIB now and, if the new DC is different,
* have them do so too after the election
*/
if (pcmk__str_eq(event->executioner, controld_globals.our_nodename,
pcmk__str_casei)) {
send_stonith_update(NULL, event->target, uuid);
}
add_stonith_cleanup(event->target);
}
/* If the target is a remote node, and we host its connection,
* immediately fail all monitors so it can be recovered quickly.
* The connection won't necessarily drop when a remote node is fenced,
* so the failure might not otherwise be detected until the next poke.
*/
if (pcmk_is_set(peer->flags, crm_remote_node)) {
remote_ra_fail(event->target);
}
crmd_peer_down(peer, TRUE);
}
}
/*!
* \brief Connect to fencer
*
* \param[in] user_data If NULL, retry failures now, otherwise retry in mainloop timer
*
* \return G_SOURCE_REMOVE on success, G_SOURCE_CONTINUE to retry
* \note If user_data is NULL, this will wait 2s between attempts, for up to
* 30 attempts, meaning the controller could be blocked as long as 58s.
*/
gboolean
controld_timer_fencer_connect(gpointer user_data)
{
int rc = pcmk_ok;
if (stonith_api == NULL) {
stonith_api = stonith_api_new();
if (stonith_api == NULL) {
crm_err("Could not connect to fencer: API memory allocation failed");
return G_SOURCE_REMOVE;
}
}
if (stonith_api->state != stonith_disconnected) {
crm_trace("Already connected to fencer, no need to retry");
return G_SOURCE_REMOVE;
}
if (user_data == NULL) {
// Blocking (retry failures now until successful)
rc = stonith_api_connect_retry(stonith_api, crm_system_name, 30);
if (rc != pcmk_ok) {
crm_err("Could not connect to fencer in 30 attempts: %s "
CRM_XS " rc=%d", pcmk_strerror(rc), rc);
}
} else {
// Non-blocking (retry failures later in main loop)
rc = stonith_api->cmds->connect(stonith_api, crm_system_name, NULL);
if (controld_fencer_connect_timer == NULL) {
controld_fencer_connect_timer =
mainloop_timer_add("controld_fencer_connect", 1000,
TRUE, controld_timer_fencer_connect,
GINT_TO_POINTER(TRUE));
}
if (rc != pcmk_ok) {
if (pcmk_is_set(controld_globals.fsa_input_register,
R_ST_REQUIRED)) {
crm_notice("Fencer connection failed (will retry): %s "
CRM_XS " rc=%d", pcmk_strerror(rc), rc);
if (!mainloop_timer_running(controld_fencer_connect_timer)) {
mainloop_timer_start(controld_fencer_connect_timer);
}
return G_SOURCE_CONTINUE;
} else {
crm_info("Fencer connection failed (ignoring because no longer required): %s "
CRM_XS " rc=%d", pcmk_strerror(rc), rc);
}
return G_SOURCE_REMOVE;
}
}
if (rc == pcmk_ok) {
stonith_api_operations_t *cmds = stonith_api->cmds;
cmds->register_notification(stonith_api,
PCMK__VALUE_ST_NOTIFY_DISCONNECT,
tengine_stonith_connection_destroy);
cmds->register_notification(stonith_api, PCMK__VALUE_ST_NOTIFY_FENCE,
handle_fence_notification);
cmds->register_notification(stonith_api,
PCMK__VALUE_ST_NOTIFY_HISTORY_SYNCED,
tengine_stonith_history_synced);
te_trigger_stonith_history_sync(TRUE);
crm_notice("Fencer successfully connected");
}
return G_SOURCE_REMOVE;
}
void
controld_disconnect_fencer(bool destroy)
{
if (stonith_api) {
// Prevent fencer connection from coming up again
controld_clear_fsa_input_flags(R_ST_REQUIRED);
if (stonith_api->state != stonith_disconnected) {
stonith_api->cmds->disconnect(stonith_api);
}
stonith_api->cmds->remove_notification(stonith_api, NULL);
}
if (destroy) {
if (stonith_api) {
stonith_api->cmds->free(stonith_api);
stonith_api = NULL;
}
if (controld_fencer_connect_timer) {
mainloop_timer_del(controld_fencer_connect_timer);
controld_fencer_connect_timer = NULL;
}
if (te_client_id) {
free(te_client_id);
te_client_id = NULL;
}
}
}
static gboolean
do_stonith_history_sync(gpointer user_data)
{
if (stonith_api && (stonith_api->state != stonith_disconnected)) {
stonith_history_t *history = NULL;
te_cleanup_stonith_history_sync(stonith_api, FALSE);
stonith_api->cmds->history(stonith_api,
st_opt_sync_call | st_opt_broadcast,
NULL, &history, 5);
stonith_history_free(history);
return TRUE;
} else {
crm_info("Skip triggering stonith history-sync as stonith is disconnected");
return FALSE;
}
}
static void
tengine_stonith_callback(stonith_t *stonith, stonith_callback_data_t *data)
{
char *uuid = NULL;
int stonith_id = -1;
int transition_id = -1;
pcmk__graph_action_t *action = NULL;
const char *target = NULL;
if ((data == NULL) || (data->userdata == NULL)) {
crm_err("Ignoring fence operation %d result: "
"No transition key given (bug?)",
((data == NULL)? -1 : data->call_id));
return;
}
if (!AM_I_DC) {
const char *reason = stonith__exit_reason(data);
if (reason == NULL) {
reason = pcmk_exec_status_str(stonith__execution_status(data));
}
crm_notice("Result of fence operation %d: %d (%s) " CRM_XS " key=%s",
data->call_id, stonith__exit_status(data), reason,
(const char *) data->userdata);
return;
}
CRM_CHECK(decode_transition_key(data->userdata, &uuid, &transition_id,
&stonith_id, NULL),
goto bail);
if (controld_globals.transition_graph->complete || (stonith_id < 0)
|| !pcmk__str_eq(uuid, controld_globals.te_uuid, pcmk__str_none)
|| (controld_globals.transition_graph->id != transition_id)) {
crm_info("Ignoring fence operation %d result: "
"Not from current transition " CRM_XS
" complete=%s action=%d uuid=%s (vs %s) transition=%d (vs %d)",
data->call_id,
pcmk__btoa(controld_globals.transition_graph->complete),
stonith_id, uuid, controld_globals.te_uuid, transition_id,
controld_globals.transition_graph->id);
goto bail;
}
action = controld_get_action(stonith_id);
if (action == NULL) {
crm_err("Ignoring fence operation %d result: "
"Action %d not found in transition graph (bug?) "
CRM_XS " uuid=%s transition=%d",
data->call_id, stonith_id, uuid, transition_id);
goto bail;
}
target = crm_element_value(action->xml, PCMK__META_ON_NODE);
if (target == NULL) {
crm_err("Ignoring fence operation %d result: No target given (bug?)",
data->call_id);
goto bail;
}
stop_te_timer(action);
if (stonith__exit_status(data) == CRM_EX_OK) {
const char *uuid = crm_element_value(action->xml,
PCMK__META_ON_NODE_UUID);
const char *op = crm_meta_value(action->params,
PCMK__META_STONITH_ACTION);
crm_info("Fence operation %d for %s succeeded", data->call_id, target);
if (!(pcmk_is_set(action->flags, pcmk__graph_action_confirmed))) {
te_action_confirmed(action, NULL);
if (pcmk__str_eq(PCMK_ACTION_ON, op, pcmk__str_casei)) {
const char *value = NULL;
char *now = pcmk__ttoa(time(NULL));
gboolean is_remote_node = FALSE;
/* This check is not 100% reliable, since this node is not
* guaranteed to have the remote node cached. However, it
* doesn't have to be reliable, since the attribute manager can
* learn a node's "remoteness" by other means sooner or later.
* This allows it to learn more quickly if this node does have
* the information.
*/
if (g_hash_table_lookup(crm_remote_peer_cache, uuid) != NULL) {
is_remote_node = TRUE;
}
update_attrd(target, CRM_ATTR_UNFENCED, now, NULL,
is_remote_node);
free(now);
value = crm_meta_value(action->params, PCMK__META_DIGESTS_ALL);
update_attrd(target, CRM_ATTR_DIGESTS_ALL, value, NULL,
is_remote_node);
value = crm_meta_value(action->params,
PCMK__META_DIGESTS_SECURE);
update_attrd(target, CRM_ATTR_DIGESTS_SECURE, value, NULL,
is_remote_node);
} else if (!(pcmk_is_set(action->flags, pcmk__graph_action_sent_update))) {
send_stonith_update(action, target, uuid);
pcmk__set_graph_action_flags(action,
pcmk__graph_action_sent_update);
}
}
st_fail_count_reset(target);
} else {
enum pcmk__graph_next abort_action = pcmk__graph_restart;
int status = stonith__execution_status(data);
const char *reason = stonith__exit_reason(data);
if (reason == NULL) {
if (status == PCMK_EXEC_DONE) {
reason = "Agent returned error";
} else {
reason = pcmk_exec_status_str(status);
}
}
pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);
/* If no fence devices were available, there's no use in immediately
* checking again, so don't start a new transition in that case.
*/
if (status == PCMK_EXEC_NO_FENCE_DEVICE) {
crm_warn("Fence operation %d for %s failed: %s "
"(aborting transition and giving up for now)",
data->call_id, target, reason);
abort_action = pcmk__graph_wait;
} else {
crm_notice("Fence operation %d for %s failed: %s "
"(aborting transition)", data->call_id, target, reason);
}
/* Increment the fail count now, so abort_for_stonith_failure() can
* check it. Non-DC nodes will increment it in
* handle_fence_notification().
*/
st_fail_count_increment(target);
abort_for_stonith_failure(abort_action, target, NULL);
}
pcmk__update_graph(controld_globals.transition_graph, action);
trigger_graph();
bail:
free(data->userdata);
free(uuid);
return;
}
static int
fence_with_delay(const char *target, const char *type, int delay)
{
uint32_t options = st_opt_none; // Group of enum stonith_call_options
int timeout_sec = (int) (controld_globals.transition_graph->stonith_timeout
/ 1000);
if (crmd_join_phase_count(crm_join_confirmed) == 1) {
stonith__set_call_options(options, target, st_opt_allow_suicide);
}
return stonith_api->cmds->fence_with_delay(stonith_api, options, target,
type, timeout_sec, 0, delay);
}
/*!
* \internal
* \brief Execute a fencing action from a transition graph
*
* \param[in] graph Transition graph being executed (ignored)
* \param[in] action Fencing action to execute
*
* \return Standard Pacemaker return code
*/
int
controld_execute_fence_action(pcmk__graph_t *graph,
pcmk__graph_action_t *action)
{
int rc = 0;
const char *id = pcmk__xe_id(action->xml);
const char *uuid = crm_element_value(action->xml, PCMK__META_ON_NODE_UUID);
const char *target = crm_element_value(action->xml, PCMK__META_ON_NODE);
const char *type = crm_meta_value(action->params,
PCMK__META_STONITH_ACTION);
char *transition_key = NULL;
const char *priority_delay = NULL;
int delay_i = 0;
gboolean invalid_action = FALSE;
int stonith_timeout = (int) (controld_globals.transition_graph->stonith_timeout
/ 1000);
CRM_CHECK(id != NULL, invalid_action = TRUE);
CRM_CHECK(uuid != NULL, invalid_action = TRUE);
CRM_CHECK(type != NULL, invalid_action = TRUE);
CRM_CHECK(target != NULL, invalid_action = TRUE);
if (invalid_action) {
crm_log_xml_warn(action->xml, "BadAction");
return EPROTO;
}
priority_delay = crm_meta_value(action->params,
PCMK_OPT_PRIORITY_FENCING_DELAY);
crm_notice("Requesting fencing (%s) targeting node %s "
CRM_XS " action=%s timeout=%i%s%s",
type, target, id, stonith_timeout,
priority_delay ? " priority_delay=" : "",
priority_delay ? priority_delay : "");
/* Passing NULL means block until we can connect... */
controld_timer_fencer_connect(NULL);
pcmk__scan_min_int(priority_delay, &delay_i, 0);
rc = fence_with_delay(target, type, delay_i);
transition_key = pcmk__transition_key(controld_globals.transition_graph->id,
action->id, 0,
controld_globals.te_uuid),
stonith_api->cmds->register_callback(stonith_api, rc,
(stonith_timeout
+ (delay_i > 0 ? delay_i : 0)),
st_opt_timeout_updates, transition_key,
"tengine_stonith_callback",
tengine_stonith_callback);
return pcmk_rc_ok;
}
bool
controld_verify_stonith_watchdog_timeout(const char *value)
{
long long st_timeout = (value != NULL)? crm_get_msec(value) : 0;
const char *our_nodename = controld_globals.our_nodename;
if (st_timeout == 0
|| (stonith_api && (stonith_api->state != stonith_disconnected) &&
stonith__watchdog_fencing_enabled_for_node_api(stonith_api,
our_nodename))) {
return pcmk__valid_stonith_watchdog_timeout(value);
}
return true;
}
/* end stonith API client functions */
/*
* stonith history synchronization
*
* Each node's fencer keeps track of a cluster-wide fencing history. When a node
* joins or leaves, we need to synchronize the history across all nodes.
*/
static crm_trigger_t *stonith_history_sync_trigger = NULL;
static mainloop_timer_t *stonith_history_sync_timer_short = NULL;
static mainloop_timer_t *stonith_history_sync_timer_long = NULL;
void
te_cleanup_stonith_history_sync(stonith_t *st, bool free_timers)
{
if (free_timers) {
mainloop_timer_del(stonith_history_sync_timer_short);
stonith_history_sync_timer_short = NULL;
mainloop_timer_del(stonith_history_sync_timer_long);
stonith_history_sync_timer_long = NULL;
} else {
mainloop_timer_stop(stonith_history_sync_timer_short);
mainloop_timer_stop(stonith_history_sync_timer_long);
}
if (st) {
st->cmds->remove_notification(st, PCMK__VALUE_ST_NOTIFY_HISTORY_SYNCED);
}
}
static void
tengine_stonith_history_synced(stonith_t *st, stonith_event_t *st_event)
{
te_cleanup_stonith_history_sync(st, FALSE);
crm_debug("Fence-history synced - cancel all timers");
}
static gboolean
stonith_history_sync_set_trigger(gpointer user_data)
{
mainloop_set_trigger(stonith_history_sync_trigger);
return FALSE;
}
void
te_trigger_stonith_history_sync(bool long_timeout)
{
/* trigger a sync in 5s to give more nodes the
* chance to show up so that we don't create
* unnecessary stonith-history-sync traffic
*
* the long timeout of 30s is there as a fallback
* so that after a successful connection to fenced
* we will wait for 30s for the DC to trigger a
* history-sync
* if this doesn't happen we trigger a sync locally
* (e.g. fenced segfaults and is restarted by pacemakerd)
*/
/* as we are finally checking the stonith-connection
* in do_stonith_history_sync we should be fine
* leaving stonith_history_sync_time & stonith_history_sync_trigger
* around
*/
if (stonith_history_sync_trigger == NULL) {
stonith_history_sync_trigger =
mainloop_add_trigger(G_PRIORITY_LOW,
do_stonith_history_sync, NULL);
}
if (long_timeout) {
if(stonith_history_sync_timer_long == NULL) {
stonith_history_sync_timer_long =
mainloop_timer_add("history_sync_long", 30000,
FALSE, stonith_history_sync_set_trigger,
NULL);
}
crm_info("Fence history will be synchronized cluster-wide within 30 seconds");
mainloop_timer_start(stonith_history_sync_timer_long);
} else {
if(stonith_history_sync_timer_short == NULL) {
stonith_history_sync_timer_short =
mainloop_timer_add("history_sync_short", 5000,
FALSE, stonith_history_sync_set_trigger,
NULL);
}
crm_info("Fence history will be synchronized cluster-wide within 5 seconds");
mainloop_timer_start(stonith_history_sync_timer_short);
}
}
/* end stonith history synchronization functions */
diff --git a/daemons/controld/controld_join_client.c b/daemons/controld/controld_join_client.c
index 97e6685a28..e4dcacc6e9 100644
--- a/daemons/controld/controld_join_client.c
+++ b/daemons/controld/controld_join_client.c
@@ -1,366 +1,366 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <crm/crm.h>
#include <crm/cib.h>
#include <crm/common/xml.h>
#include <pacemaker-controld.h>
void join_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data);
extern ha_msg_input_t *copy_ha_msg_input(ha_msg_input_t * orig);
/*!
* \internal
* \brief Remember if DC is shutting down as we join
*
* If we're joining while the current DC is shutting down, update its expected
* state, so we don't fence it if we become the new DC. (We weren't a peer
* when it broadcast its shutdown request.)
*
* \param[in] msg A join message from the DC
*/
static void
update_dc_expected(const xmlNode *msg)
{
if ((controld_globals.dc_name != NULL)
&& pcmk__xe_attr_is_true(msg, PCMK__XA_DC_LEAVING)) {
crm_node_t *dc_node = pcmk__get_node(0, controld_globals.dc_name, NULL,
- pcmk__node_search_cluster);
+ pcmk__node_search_cluster_member);
pcmk__update_peer_expected(__func__, dc_node, CRMD_JOINSTATE_DOWN);
}
}
/* A_CL_JOIN_QUERY */
/* is there a DC out there? */
void
do_cl_join_query(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
xmlNode *req = create_request(CRM_OP_JOIN_ANNOUNCE, NULL, NULL,
CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL);
sleep(1); // Give the cluster layer time to propagate to the DC
update_dc(NULL); /* Unset any existing value so that the result is not discarded */
crm_debug("Querying for a DC");
send_cluster_message(NULL, crm_msg_crmd, req, FALSE);
free_xml(req);
}
/* A_CL_JOIN_ANNOUNCE */
/* this is kind of a workaround for the fact that we may not be around or
* are otherwise unable to reply when the DC sends out A_DC_JOIN_OFFER_ALL
*/
void
do_cl_join_announce(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
/* don't announce if we're in one of these states */
if (cur_state != S_PENDING) {
crm_warn("Not announcing cluster join because in state %s",
fsa_state2string(cur_state));
return;
}
if (!pcmk_is_set(controld_globals.fsa_input_register, R_STARTING)) {
/* send as a broadcast */
xmlNode *req = create_request(CRM_OP_JOIN_ANNOUNCE, NULL, NULL,
CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL);
crm_debug("Announcing availability");
update_dc(NULL);
send_cluster_message(NULL, crm_msg_crmd, req, FALSE);
free_xml(req);
} else {
/* Delay announce until we have finished local startup */
crm_warn("Delaying announce of cluster join until local startup is complete");
return;
}
}
static int query_call_id = 0;
/* A_CL_JOIN_REQUEST */
/* aka. accept the welcome offer */
void
do_cl_join_offer_respond(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
cib_t *cib_conn = controld_globals.cib_conn;
ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg);
const char *welcome_from;
const char *join_id;
CRM_CHECK(input != NULL, return);
welcome_from = crm_element_value(input->msg, PCMK__XA_SRC);
join_id = crm_element_value(input->msg, PCMK__XA_JOIN_ID);
crm_trace("Accepting cluster join offer from node %s "CRM_XS" join-%s",
welcome_from, crm_element_value(input->msg, PCMK__XA_JOIN_ID));
/* we only ever want the last one */
if (query_call_id > 0) {
crm_trace("Cancelling previous join query: %d", query_call_id);
remove_cib_op_callback(query_call_id, FALSE);
query_call_id = 0;
}
if (update_dc(input->msg) == FALSE) {
crm_warn("Discarding cluster join offer from node %s (expected %s)",
welcome_from, controld_globals.dc_name);
return;
}
update_dc_expected(input->msg);
query_call_id = cib_conn->cmds->query(cib_conn, NULL, NULL,
cib_scope_local|cib_no_children);
fsa_register_cib_callback(query_call_id, pcmk__str_copy(join_id),
join_query_callback);
crm_trace("Registered join query callback: %d", query_call_id);
controld_set_fsa_action_flags(A_DC_TIMER_STOP);
controld_trigger_fsa();
}
void
join_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
char *join_id = user_data;
xmlNode *generation = pcmk__xe_create(NULL, PCMK__XE_GENERATION_TUPLE);
CRM_LOG_ASSERT(join_id != NULL);
if (query_call_id != call_id) {
crm_trace("Query %d superseded", call_id);
goto done;
}
query_call_id = 0;
if(rc != pcmk_ok || output == NULL) {
crm_err("Could not retrieve version details for join-%s: %s (%d)",
join_id, pcmk_strerror(rc), rc);
register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__);
} else if (controld_globals.dc_name == NULL) {
crm_debug("Membership is in flux, not continuing join-%s", join_id);
} else {
xmlNode *reply = NULL;
crm_debug("Respond to join offer join-%s from %s",
join_id, controld_globals.dc_name);
pcmk__xe_copy_attrs(generation, output, pcmk__xaf_none);
reply = create_request(CRM_OP_JOIN_REQUEST, generation,
controld_globals.dc_name, CRM_SYSTEM_DC,
CRM_SYSTEM_CRMD, NULL);
crm_xml_add(reply, PCMK__XA_JOIN_ID, join_id);
crm_xml_add(reply, PCMK_XA_CRM_FEATURE_SET, CRM_FEATURE_SET);
send_cluster_message(pcmk__get_node(0, controld_globals.dc_name, NULL,
- pcmk__node_search_cluster),
+ pcmk__node_search_cluster_member),
crm_msg_crmd, reply, TRUE);
free_xml(reply);
}
done:
free_xml(generation);
}
void
set_join_state(const char *start_state, const char *node_name, const char *node_uuid,
bool remote)
{
if (pcmk__str_eq(start_state, PCMK_VALUE_STANDBY, pcmk__str_casei)) {
crm_notice("Forcing node %s to join in %s state per configured "
"environment", node_name, start_state);
cib__update_node_attr(controld_globals.logger_out,
controld_globals.cib_conn, cib_sync_call,
PCMK_XE_NODES, node_uuid,
NULL, NULL, NULL, PCMK_NODE_ATTR_STANDBY,
PCMK_VALUE_TRUE, NULL,
(remote? PCMK_VALUE_REMOTE : NULL));
} else if (pcmk__str_eq(start_state, PCMK_VALUE_ONLINE, pcmk__str_casei)) {
crm_notice("Forcing node %s to join in %s state per configured "
"environment", node_name, start_state);
cib__update_node_attr(controld_globals.logger_out,
controld_globals.cib_conn, cib_sync_call,
PCMK_XE_NODES, node_uuid,
NULL, NULL, NULL, PCMK_NODE_ATTR_STANDBY,
PCMK_VALUE_FALSE, NULL,
(remote? PCMK_VALUE_REMOTE : NULL));
} else if (pcmk__str_eq(start_state, PCMK_VALUE_DEFAULT, pcmk__str_casei)) {
crm_debug("Not forcing a starting state on node %s", node_name);
} else {
crm_warn("Unrecognized start state '%s', using "
"'" PCMK_VALUE_DEFAULT "' (%s)",
start_state, node_name);
}
}
static int
update_conn_host_cache(xmlNode *node, void *userdata)
{
const char *remote = crm_element_value(node, PCMK_XA_ID);
const char *conn_host = crm_element_value(node, PCMK__XA_CONNECTION_HOST);
const char *state = crm_element_value(node, PCMK__XA_NODE_STATE);
crm_node_t *remote_peer = pcmk__cluster_lookup_remote_node(remote);
if (remote_peer == NULL) {
return pcmk_rc_ok;
}
if (conn_host != NULL) {
pcmk__str_update(&remote_peer->conn_host, conn_host);
}
if (state != NULL) {
pcmk__update_peer_state(__func__, remote_peer, state, 0);
}
return pcmk_rc_ok;
}
/* A_CL_JOIN_RESULT */
/* aka. this is notification that we have (or have not) been accepted */
void
do_cl_join_finalize_respond(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
xmlNode *tmp1 = NULL;
gboolean was_nack = TRUE;
static gboolean first_join = TRUE;
ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg);
const char *start_state = pcmk__env_option(PCMK__ENV_NODE_START_STATE);
int join_id = -1;
const char *op = crm_element_value(input->msg, PCMK__XA_CRM_TASK);
const char *welcome_from = crm_element_value(input->msg, PCMK__XA_SRC);
if (!pcmk__str_eq(op, CRM_OP_JOIN_ACKNAK, pcmk__str_casei)) {
crm_trace("Ignoring op=%s message", op);
return;
}
/* calculate if it was an ack or a nack */
if (pcmk__xe_attr_is_true(input->msg, CRM_OP_JOIN_ACKNAK)) {
was_nack = FALSE;
}
crm_element_value_int(input->msg, PCMK__XA_JOIN_ID, &join_id);
if (was_nack) {
crm_err("Shutting down because cluster join with leader %s failed "
CRM_XS" join-%d NACK'd", welcome_from, join_id);
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
controld_set_fsa_input_flags(R_STAYDOWN);
return;
}
if (!AM_I_DC
&& pcmk__str_eq(welcome_from, controld_globals.our_nodename,
pcmk__str_casei)) {
crm_warn("Discarding our own welcome - we're no longer the DC");
return;
}
if (update_dc(input->msg) == FALSE) {
crm_warn("Discarding %s from node %s (expected from %s)",
op, welcome_from, controld_globals.dc_name);
return;
}
update_dc_expected(input->msg);
/* record the node's feature set as a transient attribute */
update_attrd(controld_globals.our_nodename, CRM_ATTR_FEATURE_SET,
CRM_FEATURE_SET, NULL, FALSE);
/* send our status section to the DC */
tmp1 = controld_query_executor_state();
if (tmp1 != NULL) {
xmlNode *remotes = NULL;
xmlNode *reply = create_request(CRM_OP_JOIN_CONFIRM, tmp1,
controld_globals.dc_name, CRM_SYSTEM_DC,
CRM_SYSTEM_CRMD, NULL);
crm_xml_add_int(reply, PCMK__XA_JOIN_ID, join_id);
crm_debug("Confirming join-%d: sending local operation history to %s",
join_id, controld_globals.dc_name);
/*
* If this is the node's first join since the controller started on it,
* set its initial state (standby or member) according to the user's
* preference.
*
* We do not clear the LRM history here. Even if the DC failed to do it
* when we last left, removing them here creates a race condition if the
* controller is being recovered. Instead of a list of active resources
* from the executor, we may end up with a blank status section. If we
* are _NOT_ lucky, we will probe for the "wrong" instance of anonymous
* clones and end up with multiple active instances on the machine.
*/
if (first_join
&& !pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
first_join = FALSE;
if (start_state) {
set_join_state(start_state, controld_globals.our_nodename,
controld_globals.our_uuid, false);
}
}
send_cluster_message(pcmk__get_node(0, controld_globals.dc_name, NULL,
- pcmk__node_search_cluster),
+ pcmk__node_search_cluster_member),
crm_msg_crmd, reply, TRUE);
free_xml(reply);
if (AM_I_DC == FALSE) {
register_fsa_input_adv(cause, I_NOT_DC, NULL, A_NOTHING, TRUE,
__func__);
}
free_xml(tmp1);
/* Update the remote node cache with information about which node
* is hosting the connection.
*/
remotes = pcmk__xe_first_child(input->msg, PCMK_XE_NODES, NULL, NULL);
if (remotes != NULL) {
pcmk__xe_foreach_child(remotes, PCMK_XE_NODE,
update_conn_host_cache, NULL);
}
} else {
crm_err("Could not confirm join-%d with %s: Local operation history "
"failed", join_id, controld_globals.dc_name);
register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
}
}
diff --git a/daemons/controld/controld_join_dc.c b/daemons/controld/controld_join_dc.c
index bf06422361..3356cf8cc2 100644
--- a/daemons/controld/controld_join_dc.c
+++ b/daemons/controld/controld_join_dc.c
@@ -1,1045 +1,1047 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <crm/crm.h>
#include <crm/common/xml.h>
#include <crm/cluster.h>
#include <pacemaker-controld.h>
static char *max_generation_from = NULL;
static xmlNodePtr max_generation_xml = NULL;
/*!
* \internal
* \brief Nodes from which a CIB sync has failed since the peer joined
*
* This table is of the form (<tt>node_name -> join_id</tt>). \p node_name is
* the name of a client node from which a CIB \p sync_from() call has failed in
* \p do_dc_join_finalize() since the client joined the cluster as a peer.
* \p join_id is the ID of the join round in which the \p sync_from() failed,
* and is intended for use in nack log messages.
*/
static GHashTable *failed_sync_nodes = NULL;
void finalize_join_for(gpointer key, gpointer value, gpointer user_data);
void finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data);
gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source);
/* Numeric counter used to identify join rounds (an unsigned int would be
* appropriate, except we get and set it in XML as int)
*/
static int current_join_id = 0;
/*!
* \internal
* \brief Destroy the hash table containing failed sync nodes
*/
void
controld_destroy_failed_sync_table(void)
{
if (failed_sync_nodes != NULL) {
g_hash_table_destroy(failed_sync_nodes);
failed_sync_nodes = NULL;
}
}
/*!
* \internal
* \brief Remove a node from the failed sync nodes table if present
*
* \param[in] node_name Node name to remove
*/
void
controld_remove_failed_sync_node(const char *node_name)
{
if (failed_sync_nodes != NULL) {
g_hash_table_remove(failed_sync_nodes, (gchar *) node_name);
}
}
/*!
* \internal
* \brief Add to a hash table a node whose CIB failed to sync
*
* \param[in] node_name Name of node whose CIB failed to sync
* \param[in] join_id Join round when the failure occurred
*/
static void
record_failed_sync_node(const char *node_name, gint join_id)
{
if (failed_sync_nodes == NULL) {
failed_sync_nodes = pcmk__strikey_table(g_free, NULL);
}
/* If the node is already in the table then we failed to nack it during the
* filter offer step
*/
CRM_LOG_ASSERT(g_hash_table_insert(failed_sync_nodes, g_strdup(node_name),
GINT_TO_POINTER(join_id)));
}
/*!
* \internal
* \brief Look up a node name in the failed sync table
*
* \param[in] node_name Name of node to look up
* \param[out] join_id Where to store the join ID of when the sync failed
*
* \return Standard Pacemaker return code. Specifically, \p pcmk_rc_ok if the
* node name was found, or \p pcmk_rc_node_unknown otherwise.
* \note \p *join_id is set to -1 if the node is not found.
*/
static int
lookup_failed_sync_node(const char *node_name, gint *join_id)
{
*join_id = -1;
if (failed_sync_nodes != NULL) {
gpointer result = g_hash_table_lookup(failed_sync_nodes,
(gchar *) node_name);
if (result != NULL) {
*join_id = GPOINTER_TO_INT(result);
return pcmk_rc_ok;
}
}
return pcmk_rc_node_unknown;
}
void
crm_update_peer_join(const char *source, crm_node_t * node, enum crm_join_phase phase)
{
enum crm_join_phase last = 0;
CRM_CHECK(node != NULL, return);
/* Remote nodes do not participate in joins */
if (pcmk_is_set(node->flags, crm_remote_node)) {
return;
}
last = node->join;
if(phase == last) {
crm_trace("Node %s join-%d phase is still %s "
CRM_XS " nodeid=%u source=%s",
node->uname, current_join_id, crm_join_phase_str(last),
node->id, source);
} else if ((phase <= crm_join_none) || (phase == (last + 1))) {
node->join = phase;
crm_trace("Node %s join-%d phase is now %s (was %s) "
CRM_XS " nodeid=%u source=%s",
node->uname, current_join_id, crm_join_phase_str(phase),
crm_join_phase_str(last), node->id, source);
} else {
crm_warn("Rejecting join-%d phase update for node %s because "
"can't go from %s to %s " CRM_XS " nodeid=%u source=%s",
current_join_id, node->uname, crm_join_phase_str(last),
crm_join_phase_str(phase), node->id, source);
}
}
static void
start_join_round(void)
{
GHashTableIter iter;
crm_node_t *peer = NULL;
crm_debug("Starting new join round join-%d", current_join_id);
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
crm_update_peer_join(__func__, peer, crm_join_none);
}
if (max_generation_from != NULL) {
free(max_generation_from);
max_generation_from = NULL;
}
if (max_generation_xml != NULL) {
free_xml(max_generation_xml);
max_generation_xml = NULL;
}
controld_clear_fsa_input_flags(R_HAVE_CIB);
}
/*!
* \internal
* \brief Create a join message from the DC
*
* \param[in] join_op Join operation name
* \param[in] host_to Recipient of message
*/
static xmlNode *
create_dc_message(const char *join_op, const char *host_to)
{
xmlNode *msg = create_request(join_op, NULL, host_to, CRM_SYSTEM_CRMD,
CRM_SYSTEM_DC, NULL);
/* Identify which election this is a part of */
crm_xml_add_int(msg, PCMK__XA_JOIN_ID, current_join_id);
/* Add a field specifying whether the DC is shutting down. This keeps the
* joining node from fencing the old DC if it becomes the new DC.
*/
pcmk__xe_set_bool_attr(msg, PCMK__XA_DC_LEAVING,
pcmk_is_set(controld_globals.fsa_input_register,
R_SHUTDOWN));
return msg;
}
static void
join_make_offer(gpointer key, gpointer value, gpointer user_data)
{
xmlNode *offer = NULL;
crm_node_t *member = (crm_node_t *)value;
CRM_ASSERT(member != NULL);
if (!pcmk__cluster_is_node_active(member)) {
crm_info("Not making join-%d offer to inactive node %s",
current_join_id,
(member->uname? member->uname : "with unknown name"));
if(member->expected == NULL && pcmk__str_eq(member->state, CRM_NODE_LOST, pcmk__str_casei)) {
/* You would think this unsafe, but in fact this plus an
* active resource is what causes it to be fenced.
*
* Yes, this does mean that any node that dies at the same
* time as the old DC and is not running resource (still)
* won't be fenced.
*
* I'm not happy about this either.
*/
pcmk__update_peer_expected(__func__, member, CRMD_JOINSTATE_DOWN);
}
return;
}
if (member->uname == NULL) {
crm_info("Not making join-%d offer to node uuid %s with unknown name",
current_join_id, member->uuid);
return;
}
if (controld_globals.membership_id != crm_peer_seq) {
controld_globals.membership_id = crm_peer_seq;
crm_info("Making join-%d offers based on membership event %llu",
current_join_id, crm_peer_seq);
}
if(user_data && member->join > crm_join_none) {
crm_info("Not making join-%d offer to already known node %s (%s)",
current_join_id, member->uname,
crm_join_phase_str(member->join));
return;
}
crm_update_peer_join(__func__, (crm_node_t*)member, crm_join_none);
offer = create_dc_message(CRM_OP_JOIN_OFFER, member->uname);
// Advertise our feature set so the joining node can bail if not compatible
crm_xml_add(offer, PCMK_XA_CRM_FEATURE_SET, CRM_FEATURE_SET);
crm_info("Sending join-%d offer to %s", current_join_id, member->uname);
send_cluster_message(member, crm_msg_crmd, offer, TRUE);
free_xml(offer);
crm_update_peer_join(__func__, member, crm_join_welcomed);
}
/* A_DC_JOIN_OFFER_ALL */
void
do_dc_join_offer_all(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
int count;
/* Reset everyone's status back to down or in_ccm in the CIB.
* Any nodes that are active in the CIB but not in the cluster membership
* will be seen as offline by the scheduler anyway.
*/
current_join_id++;
start_join_round();
update_dc(NULL);
if (cause == C_HA_MESSAGE && current_input == I_NODE_JOIN) {
crm_info("A new node joined the cluster");
}
g_hash_table_foreach(crm_peer_cache, join_make_offer, NULL);
count = crmd_join_phase_count(crm_join_welcomed);
crm_info("Waiting on join-%d requests from %d outstanding node%s",
current_join_id, count, pcmk__plural_s(count));
// Don't waste time by invoking the scheduler yet
}
/* A_DC_JOIN_OFFER_ONE */
void
do_dc_join_offer_one(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
crm_node_t *member;
ha_msg_input_t *welcome = NULL;
int count;
const char *join_to = NULL;
if (msg_data->data == NULL) {
crm_info("Making join-%d offers to any unconfirmed nodes "
"because an unknown node joined", current_join_id);
g_hash_table_foreach(crm_peer_cache, join_make_offer, &member);
check_join_state(cur_state, __func__);
return;
}
welcome = fsa_typed_data(fsa_dt_ha_msg);
if (welcome == NULL) {
// fsa_typed_data() already logged an error
return;
}
join_to = crm_element_value(welcome->msg, PCMK__XA_SRC);
if (join_to == NULL) {
crm_err("Can't make join-%d offer to unknown node", current_join_id);
return;
}
- member = pcmk__get_node(0, join_to, NULL, pcmk__node_search_cluster);
+ member = pcmk__get_node(0, join_to, NULL, pcmk__node_search_cluster_member);
/* It is possible that a node will have been sick or starting up when the
* original offer was made. However, it will either re-announce itself in
* due course, or we can re-store the original offer on the client.
*/
crm_update_peer_join(__func__, member, crm_join_none);
join_make_offer(NULL, member, NULL);
/* If the offer isn't to the local node, make an offer to the local node as
* well, to ensure the correct value for max_generation_from.
*/
if (strcasecmp(join_to, controld_globals.our_nodename) != 0) {
member = pcmk__get_node(0, controld_globals.our_nodename, NULL,
- pcmk__node_search_cluster);
+ pcmk__node_search_cluster_member);
join_make_offer(NULL, member, NULL);
}
/* This was a genuine join request; cancel any existing transition and
* invoke the scheduler.
*/
abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart, "Node join",
NULL);
count = crmd_join_phase_count(crm_join_welcomed);
crm_info("Waiting on join-%d requests from %d outstanding node%s",
current_join_id, count, pcmk__plural_s(count));
// Don't waste time by invoking the scheduler yet
}
static int
compare_int_fields(xmlNode * left, xmlNode * right, const char *field)
{
const char *elem_l = crm_element_value(left, field);
const char *elem_r = crm_element_value(right, field);
long long int_elem_l;
long long int_elem_r;
pcmk__scan_ll(elem_l, &int_elem_l, -1LL);
pcmk__scan_ll(elem_r, &int_elem_r, -1LL);
if (int_elem_l < int_elem_r) {
return -1;
} else if (int_elem_l > int_elem_r) {
return 1;
}
return 0;
}
/* A_DC_JOIN_PROCESS_REQ */
void
do_dc_join_filter_offer(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
xmlNode *generation = NULL;
int cmp = 0;
int join_id = -1;
int count = 0;
gint value = 0;
gboolean ack_nack_bool = TRUE;
ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg);
const char *join_from = crm_element_value(join_ack->msg, PCMK__XA_SRC);
const char *ref = crm_element_value(join_ack->msg, PCMK_XA_REFERENCE);
const char *join_version = crm_element_value(join_ack->msg,
PCMK_XA_CRM_FEATURE_SET);
crm_node_t *join_node = NULL;
if (join_from == NULL) {
crm_err("Ignoring invalid join request without node name");
return;
}
- join_node = pcmk__get_node(0, join_from, NULL, pcmk__node_search_cluster);
+ join_node = pcmk__get_node(0, join_from, NULL,
+ pcmk__node_search_cluster_member);
crm_element_value_int(join_ack->msg, PCMK__XA_JOIN_ID, &join_id);
if (join_id != current_join_id) {
crm_debug("Ignoring join-%d request from %s because we are on join-%d",
join_id, join_from, current_join_id);
check_join_state(cur_state, __func__);
return;
}
generation = join_ack->xml;
if (max_generation_xml != NULL && generation != NULL) {
int lpc = 0;
const char *attributes[] = {
PCMK_XA_ADMIN_EPOCH,
PCMK_XA_EPOCH,
PCMK_XA_NUM_UPDATES,
};
/* It's not obvious that join_ack->xml is the PCMK__XE_GENERATION_TUPLE
* element from the join client. The "if" guard is for clarity.
*/
if (pcmk__xe_is(generation, PCMK__XE_GENERATION_TUPLE)) {
for (lpc = 0; cmp == 0 && lpc < PCMK__NELEM(attributes); lpc++) {
cmp = compare_int_fields(max_generation_xml, generation,
attributes[lpc]);
}
} else { // Should always be PCMK__XE_GENERATION_TUPLE
CRM_LOG_ASSERT(false);
}
}
if (ref == NULL) {
ref = "none"; // for logging only
}
if (lookup_failed_sync_node(join_from, &value) == pcmk_rc_ok) {
crm_err("Rejecting join-%d request from node %s because we failed to "
"sync its CIB in join-%d " CRM_XS " ref=%s",
join_id, join_from, value, ref);
ack_nack_bool = FALSE;
} else if (!pcmk__cluster_is_node_active(join_node)) {
if (match_down_event(join_from) != NULL) {
/* The join request was received after the node was fenced or
* otherwise shutdown in a way that we're aware of. No need to log
* an error in this rare occurrence; we know the client was recently
* shut down, and receiving a lingering in-flight request is not
* cause for alarm.
*/
crm_debug("Rejecting join-%d request from inactive node %s "
CRM_XS " ref=%s", join_id, join_from, ref);
} else {
crm_err("Rejecting join-%d request from inactive node %s "
CRM_XS " ref=%s", join_id, join_from, ref);
}
ack_nack_bool = FALSE;
} else if (generation == NULL) {
crm_err("Rejecting invalid join-%d request from node %s "
"missing CIB generation " CRM_XS " ref=%s",
join_id, join_from, ref);
ack_nack_bool = FALSE;
} else if ((join_version == NULL)
|| !feature_set_compatible(CRM_FEATURE_SET, join_version)) {
crm_err("Rejecting join-%d request from node %s because feature set %s"
" is incompatible with ours (%s) " CRM_XS " ref=%s",
join_id, join_from, (join_version? join_version : "pre-3.1.0"),
CRM_FEATURE_SET, ref);
ack_nack_bool = FALSE;
} else if (max_generation_xml == NULL) {
const char *validation = crm_element_value(generation,
PCMK_XA_VALIDATE_WITH);
if (pcmk__get_schema(validation) == NULL) {
crm_err("Rejecting join-%d request from %s (with first CIB "
"generation) due to unknown schema version %s "
CRM_XS " ref=%s",
join_id, join_from, pcmk__s(validation, "(missing)"), ref);
ack_nack_bool = FALSE;
} else {
crm_debug("Accepting join-%d request from %s (with first CIB "
"generation) " CRM_XS " ref=%s",
join_id, join_from, ref);
max_generation_xml = pcmk__xml_copy(NULL, generation);
pcmk__str_update(&max_generation_from, join_from);
}
} else if ((cmp < 0)
|| ((cmp == 0)
&& pcmk__str_eq(join_from, controld_globals.our_nodename,
pcmk__str_casei))) {
const char *validation = crm_element_value(generation,
PCMK_XA_VALIDATE_WITH);
if (pcmk__get_schema(validation) == NULL) {
crm_err("Rejecting join-%d request from %s (with better CIB "
"generation than current best from %s) due to unknown "
"schema version %s " CRM_XS " ref=%s",
join_id, join_from, max_generation_from,
pcmk__s(validation, "(missing)"), ref);
ack_nack_bool = FALSE;
} else {
crm_debug("Accepting join-%d request from %s (with better CIB "
"generation than current best from %s) " CRM_XS " ref=%s",
join_id, join_from, max_generation_from, ref);
crm_log_xml_debug(max_generation_xml, "Old max generation");
crm_log_xml_debug(generation, "New max generation");
free_xml(max_generation_xml);
max_generation_xml = pcmk__xml_copy(NULL, join_ack->xml);
pcmk__str_update(&max_generation_from, join_from);
}
} else {
crm_debug("Accepting join-%d request from %s " CRM_XS " ref=%s",
join_id, join_from, ref);
}
if (!ack_nack_bool) {
if (compare_version(join_version, "3.17.0") < 0) {
/* Clients with CRM_FEATURE_SET < 3.17.0 may respawn infinitely
* after a nack message, don't send one
*/
crm_update_peer_join(__func__, join_node, crm_join_nack_quiet);
} else {
crm_update_peer_join(__func__, join_node, crm_join_nack);
}
pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_NACK);
} else {
crm_update_peer_join(__func__, join_node, crm_join_integrated);
pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_MEMBER);
}
count = crmd_join_phase_count(crm_join_integrated);
crm_debug("%d node%s currently integrated in join-%d",
count, pcmk__plural_s(count), join_id);
if (check_join_state(cur_state, __func__) == FALSE) {
// Don't waste time by invoking the scheduler yet
count = crmd_join_phase_count(crm_join_welcomed);
crm_debug("Waiting on join-%d requests from %d outstanding node%s",
join_id, count, pcmk__plural_s(count));
}
}
/* A_DC_JOIN_FINALIZE */
void
do_dc_join_finalize(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
char *sync_from = NULL;
int rc = pcmk_ok;
int count_welcomed = crmd_join_phase_count(crm_join_welcomed);
int count_finalizable = crmd_join_phase_count(crm_join_integrated)
+ crmd_join_phase_count(crm_join_nack)
+ crmd_join_phase_count(crm_join_nack_quiet);
/* This we can do straight away and avoid clients timing us out
* while we compute the latest CIB
*/
if (count_welcomed != 0) {
crm_debug("Waiting on join-%d requests from %d outstanding node%s "
"before finalizing join", current_join_id, count_welcomed,
pcmk__plural_s(count_welcomed));
crmd_join_phase_log(LOG_DEBUG);
/* crmd_fsa_stall(FALSE); Needed? */
return;
} else if (count_finalizable == 0) {
crm_debug("Finalization not needed for join-%d at the current time",
current_join_id);
crmd_join_phase_log(LOG_DEBUG);
check_join_state(controld_globals.fsa_state, __func__);
return;
}
controld_clear_fsa_input_flags(R_HAVE_CIB);
if (pcmk__str_eq(max_generation_from, controld_globals.our_nodename,
pcmk__str_null_matches|pcmk__str_casei)) {
controld_set_fsa_input_flags(R_HAVE_CIB);
}
if (!controld_globals.transition_graph->complete) {
crm_warn("Delaying join-%d finalization while transition in progress",
current_join_id);
crmd_join_phase_log(LOG_DEBUG);
crmd_fsa_stall(FALSE);
return;
}
if (pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) {
// Send our CIB out to everyone
sync_from = pcmk__str_copy(controld_globals.our_nodename);
crm_debug("Finalizing join-%d for %d node%s (sync'ing from local CIB)",
current_join_id, count_finalizable,
pcmk__plural_s(count_finalizable));
crm_log_xml_debug(max_generation_xml, "Requested CIB version");
} else {
// Ask for the agreed best CIB
sync_from = pcmk__str_copy(max_generation_from);
crm_notice("Finalizing join-%d for %d node%s (sync'ing CIB from %s)",
current_join_id, count_finalizable,
pcmk__plural_s(count_finalizable), sync_from);
crm_log_xml_notice(max_generation_xml, "Requested CIB version");
}
crmd_join_phase_log(LOG_DEBUG);
rc = controld_globals.cib_conn->cmds->sync_from(controld_globals.cib_conn,
sync_from, NULL, cib_none);
fsa_register_cib_callback(rc, sync_from, finalize_sync_callback);
}
void
free_max_generation(void)
{
free(max_generation_from);
max_generation_from = NULL;
free_xml(max_generation_xml);
max_generation_xml = NULL;
}
void
finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
CRM_LOG_ASSERT(-EPERM != rc);
if (rc != pcmk_ok) {
const char *sync_from = (const char *) user_data;
do_crm_log(((rc == -pcmk_err_old_data)? LOG_WARNING : LOG_ERR),
"Could not sync CIB from %s in join-%d: %s",
sync_from, current_join_id, pcmk_strerror(rc));
if (rc != -pcmk_err_old_data) {
record_failed_sync_node(sync_from, current_join_id);
}
/* restart the whole join process */
register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION_DC, NULL, NULL,
__func__);
} else if (!AM_I_DC) {
crm_debug("Sync'ed CIB for join-%d but no longer DC", current_join_id);
} else if (controld_globals.fsa_state != S_FINALIZE_JOIN) {
crm_debug("Sync'ed CIB for join-%d but no longer in S_FINALIZE_JOIN "
"(%s)", current_join_id,
fsa_state2string(controld_globals.fsa_state));
} else {
controld_set_fsa_input_flags(R_HAVE_CIB);
/* make sure dc_uuid is re-set to us */
if (!check_join_state(controld_globals.fsa_state, __func__)) {
int count_finalizable = 0;
count_finalizable = crmd_join_phase_count(crm_join_integrated)
+ crmd_join_phase_count(crm_join_nack)
+ crmd_join_phase_count(crm_join_nack_quiet);
crm_debug("Notifying %d node%s of join-%d results",
count_finalizable, pcmk__plural_s(count_finalizable),
current_join_id);
g_hash_table_foreach(crm_peer_cache, finalize_join_for, NULL);
}
}
}
static void
join_node_state_commit_callback(xmlNode *msg, int call_id, int rc,
xmlNode *output, void *user_data)
{
const char *node = user_data;
if (rc != pcmk_ok) {
fsa_data_t *msg_data = NULL; // for register_fsa_error() macro
crm_crit("join-%d node history update (via CIB call %d) for node %s "
"failed: %s",
current_join_id, call_id, node, pcmk_strerror(rc));
crm_log_xml_debug(msg, "failed");
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
crm_debug("join-%d node history update (via CIB call %d) for node %s "
"complete",
current_join_id, call_id, node);
check_join_state(controld_globals.fsa_state, __func__);
}
/* A_DC_JOIN_PROCESS_ACK */
void
do_dc_join_ack(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
int join_id = -1;
ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg);
const char *op = crm_element_value(join_ack->msg, PCMK__XA_CRM_TASK);
char *join_from = crm_element_value_copy(join_ack->msg, PCMK__XA_SRC);
crm_node_t *peer = NULL;
enum controld_section_e section = controld_section_lrm;
char *xpath = NULL;
xmlNode *state = join_ack->xml;
xmlNode *execd_state = NULL;
cib_t *cib = controld_globals.cib_conn;
int rc = pcmk_ok;
// Sanity checks
if (join_from == NULL) {
crm_warn("Ignoring message received without node identification");
goto done;
}
if (op == NULL) {
crm_warn("Ignoring message received from %s without task", join_from);
goto done;
}
if (strcmp(op, CRM_OP_JOIN_CONFIRM)) {
crm_debug("Ignoring '%s' message from %s while waiting for '%s'",
op, join_from, CRM_OP_JOIN_CONFIRM);
goto done;
}
if (crm_element_value_int(join_ack->msg, PCMK__XA_JOIN_ID, &join_id) != 0) {
crm_warn("Ignoring join confirmation from %s without valid join ID",
join_from);
goto done;
}
- peer = pcmk__get_node(0, join_from, NULL, pcmk__node_search_cluster);
+ peer = pcmk__get_node(0, join_from, NULL, pcmk__node_search_cluster_member);
if (peer->join != crm_join_finalized) {
crm_info("Ignoring out-of-sequence join-%d confirmation from %s "
"(currently %s not %s)",
join_id, join_from, crm_join_phase_str(peer->join),
crm_join_phase_str(crm_join_finalized));
goto done;
}
if (join_id != current_join_id) {
crm_err("Rejecting join-%d confirmation from %s "
"because currently on join-%d",
join_id, join_from, current_join_id);
crm_update_peer_join(__func__, peer, crm_join_nack);
goto done;
}
crm_update_peer_join(__func__, peer, crm_join_confirmed);
/* Update CIB with node's current executor state. A new transition will be
* triggered later, when the CIB manager notifies us of the change.
*
* The delete and modify requests are part of an atomic transaction.
*/
rc = cib->cmds->init_transaction(cib);
if (rc != pcmk_ok) {
goto done;
}
// Delete relevant parts of node's current executor state from CIB
if (pcmk_is_set(controld_globals.flags, controld_shutdown_lock_enabled)) {
section = controld_section_lrm_unlocked;
}
controld_node_state_deletion_strings(join_from, section, &xpath, NULL);
rc = cib->cmds->remove(cib, xpath, NULL,
cib_scope_local
|cib_xpath
|cib_multiple
|cib_transaction);
if (rc != pcmk_ok) {
goto done;
}
// Update CIB with node's latest known executor state
if (pcmk__str_eq(join_from, controld_globals.our_nodename,
pcmk__str_casei)) {
// Use the latest possible state if processing our own join ack
execd_state = controld_query_executor_state();
if (execd_state != NULL) {
crm_debug("Updating local node history for join-%d from query "
"result",
current_join_id);
state = execd_state;
} else {
crm_warn("Updating local node history from join-%d confirmation "
"because query failed",
current_join_id);
}
} else {
crm_debug("Updating node history for %s from join-%d confirmation",
join_from, current_join_id);
}
rc = cib->cmds->modify(cib, PCMK_XE_STATUS, state,
cib_scope_local|cib_can_create|cib_transaction);
free_xml(execd_state);
if (rc != pcmk_ok) {
goto done;
}
// Commit the transaction
rc = cib->cmds->end_transaction(cib, true, cib_scope_local);
fsa_register_cib_callback(rc, join_from, join_node_state_commit_callback);
if (rc > 0) {
// join_from will be freed after callback
join_from = NULL;
rc = pcmk_ok;
}
done:
if (rc != pcmk_ok) {
crm_crit("join-%d node history update for node %s failed: %s",
current_join_id, join_from, pcmk_strerror(rc));
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
free(join_from);
free(xpath);
}
void
finalize_join_for(gpointer key, gpointer value, gpointer user_data)
{
xmlNode *acknak = NULL;
xmlNode *tmp1 = NULL;
crm_node_t *join_node = value;
const char *join_to = join_node->uname;
bool integrated = false;
switch (join_node->join) {
case crm_join_integrated:
integrated = true;
break;
case crm_join_nack:
case crm_join_nack_quiet:
break;
default:
crm_trace("Not updating non-integrated and non-nacked node %s (%s) "
"for join-%d", join_to,
crm_join_phase_str(join_node->join), current_join_id);
return;
}
/* Update the <node> element with the node's name and UUID, in case they
* weren't known before
*/
crm_trace("Updating node name and UUID in CIB for %s", join_to);
tmp1 = pcmk__xe_create(NULL, PCMK_XE_NODE);
crm_xml_add(tmp1, PCMK_XA_ID, crm_peer_uuid(join_node));
crm_xml_add(tmp1, PCMK_XA_UNAME, join_to);
fsa_cib_anon_update(PCMK_XE_NODES, tmp1);
free_xml(tmp1);
if (join_node->join == crm_join_nack_quiet) {
crm_trace("Not sending nack message to node %s with feature set older "
"than 3.17.0", join_to);
return;
}
- join_node = pcmk__get_node(0, join_to, NULL, pcmk__node_search_cluster);
+ join_node = pcmk__get_node(0, join_to, NULL,
+ pcmk__node_search_cluster_member);
if (!pcmk__cluster_is_node_active(join_node)) {
/*
* NACK'ing nodes that the membership layer doesn't know about yet
* simply creates more churn
*
* Better to leave them waiting and let the join restart when
* the new membership event comes in
*
* All other NACKs (due to versions etc) should still be processed
*/
pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_PENDING);
return;
}
// Acknowledge or nack node's join request
crm_debug("%sing join-%d request from %s",
integrated? "Acknowledg" : "Nack", current_join_id, join_to);
acknak = create_dc_message(CRM_OP_JOIN_ACKNAK, join_to);
pcmk__xe_set_bool_attr(acknak, CRM_OP_JOIN_ACKNAK, integrated);
if (integrated) {
// No change needed for a nacked node
crm_update_peer_join(__func__, join_node, crm_join_finalized);
pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_MEMBER);
/* Iterate through the remote peer cache and add information on which
* node hosts each to the ACK message. This keeps new controllers in
* sync with what has already happened.
*/
if (pcmk__cluster_num_remote_nodes() > 0) {
GHashTableIter iter;
crm_node_t *node = NULL;
xmlNode *remotes = pcmk__xe_create(acknak, PCMK_XE_NODES);
g_hash_table_iter_init(&iter, crm_remote_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
xmlNode *remote = NULL;
if (!node->conn_host) {
continue;
}
remote = pcmk__xe_create(remotes, PCMK_XE_NODE);
pcmk__xe_set_props(remote,
PCMK_XA_ID, node->uname,
PCMK__XA_NODE_STATE, node->state,
PCMK__XA_CONNECTION_HOST, node->conn_host,
NULL);
}
}
}
send_cluster_message(join_node, crm_msg_crmd, acknak, TRUE);
free_xml(acknak);
return;
}
gboolean
check_join_state(enum crmd_fsa_state cur_state, const char *source)
{
static unsigned long long highest_seq = 0;
if (controld_globals.membership_id != crm_peer_seq) {
crm_debug("join-%d: Membership changed from %llu to %llu "
CRM_XS " highest=%llu state=%s for=%s",
current_join_id, controld_globals.membership_id, crm_peer_seq,
highest_seq, fsa_state2string(cur_state), source);
if(highest_seq < crm_peer_seq) {
/* Don't spam the FSA with duplicates */
highest_seq = crm_peer_seq;
register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL);
}
} else if (cur_state == S_INTEGRATION) {
if (crmd_join_phase_count(crm_join_welcomed) == 0) {
int count = crmd_join_phase_count(crm_join_integrated);
crm_debug("join-%d: Integration of %d peer%s complete "
CRM_XS " state=%s for=%s",
current_join_id, count, pcmk__plural_s(count),
fsa_state2string(cur_state), source);
register_fsa_input_before(C_FSA_INTERNAL, I_INTEGRATED, NULL);
return TRUE;
}
} else if (cur_state == S_FINALIZE_JOIN) {
if (!pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) {
crm_debug("join-%d: Delaying finalization until we have CIB "
CRM_XS " state=%s for=%s",
current_join_id, fsa_state2string(cur_state), source);
return TRUE;
} else if (crmd_join_phase_count(crm_join_welcomed) != 0) {
int count = crmd_join_phase_count(crm_join_welcomed);
crm_debug("join-%d: Still waiting on %d welcomed node%s "
CRM_XS " state=%s for=%s",
current_join_id, count, pcmk__plural_s(count),
fsa_state2string(cur_state), source);
crmd_join_phase_log(LOG_DEBUG);
} else if (crmd_join_phase_count(crm_join_integrated) != 0) {
int count = crmd_join_phase_count(crm_join_integrated);
crm_debug("join-%d: Still waiting on %d integrated node%s "
CRM_XS " state=%s for=%s",
current_join_id, count, pcmk__plural_s(count),
fsa_state2string(cur_state), source);
crmd_join_phase_log(LOG_DEBUG);
} else if (crmd_join_phase_count(crm_join_finalized) != 0) {
int count = crmd_join_phase_count(crm_join_finalized);
crm_debug("join-%d: Still waiting on %d finalized node%s "
CRM_XS " state=%s for=%s",
current_join_id, count, pcmk__plural_s(count),
fsa_state2string(cur_state), source);
crmd_join_phase_log(LOG_DEBUG);
} else {
crm_debug("join-%d: Complete " CRM_XS " state=%s for=%s",
current_join_id, fsa_state2string(cur_state), source);
register_fsa_input_later(C_FSA_INTERNAL, I_FINALIZED, NULL);
return TRUE;
}
}
return FALSE;
}
void
do_dc_join_final(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
crm_debug("Ensuring DC, quorum and node attributes are up-to-date");
crm_update_quorum(crm_have_quorum, TRUE);
}
int crmd_join_phase_count(enum crm_join_phase phase)
{
int count = 0;
crm_node_t *peer;
GHashTableIter iter;
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
if(peer->join == phase) {
count++;
}
}
return count;
}
void crmd_join_phase_log(int level)
{
crm_node_t *peer;
GHashTableIter iter;
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
do_crm_log(level, "join-%d: %s=%s", current_join_id, peer->uname,
crm_join_phase_str(peer->join));
}
}
diff --git a/daemons/controld/controld_messages.c b/daemons/controld/controld_messages.c
index 262b442c5d..ba7c9f14c9 100644
--- a/daemons/controld/controld_messages.c
+++ b/daemons/controld/controld_messages.c
@@ -1,1358 +1,1358 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <string.h>
#include <time.h>
#include <crm/crm.h>
#include <crm/common/xml.h>
#include <crm/cluster/internal.h>
#include <crm/cib.h>
#include <crm/common/ipc_internal.h>
#include <pacemaker-controld.h>
static enum crmd_fsa_input handle_message(xmlNode *msg,
enum crmd_fsa_cause cause);
static void handle_response(xmlNode *stored_msg);
static enum crmd_fsa_input handle_request(xmlNode *stored_msg,
enum crmd_fsa_cause cause);
static enum crmd_fsa_input handle_shutdown_request(xmlNode *stored_msg);
static void send_msg_via_ipc(xmlNode * msg, const char *sys);
/* debug only, can wrap all it likes */
static int last_data_id = 0;
void
register_fsa_error_adv(enum crmd_fsa_cause cause, enum crmd_fsa_input input,
fsa_data_t * cur_data, void *new_data, const char *raised_from)
{
/* save the current actions if any */
if (controld_globals.fsa_actions != A_NOTHING) {
register_fsa_input_adv(cur_data ? cur_data->fsa_cause : C_FSA_INTERNAL,
I_NULL, cur_data ? cur_data->data : NULL,
controld_globals.fsa_actions, TRUE, __func__);
}
/* reset the action list */
crm_info("Resetting the current action list");
fsa_dump_actions(controld_globals.fsa_actions, "Drop");
controld_globals.fsa_actions = A_NOTHING;
/* register the error */
register_fsa_input_adv(cause, input, new_data, A_NOTHING, TRUE, raised_from);
}
void
register_fsa_input_adv(enum crmd_fsa_cause cause, enum crmd_fsa_input input,
void *data, uint64_t with_actions,
gboolean prepend, const char *raised_from)
{
unsigned old_len = g_list_length(controld_globals.fsa_message_queue);
fsa_data_t *fsa_data = NULL;
if (raised_from == NULL) {
raised_from = "<unknown>";
}
if (input == I_NULL && with_actions == A_NOTHING /* && data == NULL */ ) {
/* no point doing anything */
crm_err("Cannot add entry to queue: no input and no action");
return;
}
if (input == I_WAIT_FOR_EVENT) {
controld_set_global_flags(controld_fsa_is_stalled);
crm_debug("Stalling the FSA pending further input: source=%s cause=%s data=%p queue=%d",
raised_from, fsa_cause2string(cause), data, old_len);
if (old_len > 0) {
fsa_dump_queue(LOG_TRACE);
prepend = FALSE;
}
if (data == NULL) {
controld_set_fsa_action_flags(with_actions);
fsa_dump_actions(with_actions, "Restored");
return;
}
/* Store everything in the new event and reset
* controld_globals.fsa_actions
*/
with_actions |= controld_globals.fsa_actions;
controld_globals.fsa_actions = A_NOTHING;
}
last_data_id++;
crm_trace("%s %s FSA input %d (%s) due to %s, %s data",
raised_from, (prepend? "prepended" : "appended"), last_data_id,
fsa_input2string(input), fsa_cause2string(cause),
(data? "with" : "without"));
fsa_data = pcmk__assert_alloc(1, sizeof(fsa_data_t));
fsa_data->id = last_data_id;
fsa_data->fsa_input = input;
fsa_data->fsa_cause = cause;
fsa_data->origin = raised_from;
fsa_data->data = NULL;
fsa_data->data_type = fsa_dt_none;
fsa_data->actions = with_actions;
if (with_actions != A_NOTHING) {
crm_trace("Adding actions %.16llx to input",
(unsigned long long) with_actions);
}
if (data != NULL) {
switch (cause) {
case C_FSA_INTERNAL:
case C_CRMD_STATUS_CALLBACK:
case C_IPC_MESSAGE:
case C_HA_MESSAGE:
CRM_CHECK(((ha_msg_input_t *) data)->msg != NULL,
crm_err("Bogus data from %s", raised_from));
crm_trace("Copying %s data from %s as cluster message data",
fsa_cause2string(cause), raised_from);
fsa_data->data = copy_ha_msg_input(data);
fsa_data->data_type = fsa_dt_ha_msg;
break;
case C_LRM_OP_CALLBACK:
crm_trace("Copying %s data from %s as lrmd_event_data_t",
fsa_cause2string(cause), raised_from);
fsa_data->data = lrmd_copy_event((lrmd_event_data_t *) data);
fsa_data->data_type = fsa_dt_lrm;
break;
case C_TIMER_POPPED:
case C_SHUTDOWN:
case C_UNKNOWN:
case C_STARTUP:
crm_crit("Copying %s data (from %s) is not yet implemented",
fsa_cause2string(cause), raised_from);
crmd_exit(CRM_EX_SOFTWARE);
break;
}
}
/* make sure to free it properly later */
if (prepend) {
controld_globals.fsa_message_queue
= g_list_prepend(controld_globals.fsa_message_queue, fsa_data);
} else {
controld_globals.fsa_message_queue
= g_list_append(controld_globals.fsa_message_queue, fsa_data);
}
crm_trace("FSA message queue length is %d",
g_list_length(controld_globals.fsa_message_queue));
/* fsa_dump_queue(LOG_TRACE); */
if (old_len == g_list_length(controld_globals.fsa_message_queue)) {
crm_err("Couldn't add message to the queue");
}
if (input != I_WAIT_FOR_EVENT) {
controld_trigger_fsa();
}
}
void
fsa_dump_queue(int log_level)
{
int offset = 0;
for (GList *iter = controld_globals.fsa_message_queue; iter != NULL;
iter = iter->next) {
fsa_data_t *data = (fsa_data_t *) iter->data;
do_crm_log_unlikely(log_level,
"queue[%d.%d]: input %s raised by %s(%p.%d)\t(cause=%s)",
offset++, data->id, fsa_input2string(data->fsa_input),
data->origin, data->data, data->data_type,
fsa_cause2string(data->fsa_cause));
}
}
ha_msg_input_t *
copy_ha_msg_input(ha_msg_input_t * orig)
{
xmlNode *wrapper = NULL;
ha_msg_input_t *copy = pcmk__assert_alloc(1, sizeof(ha_msg_input_t));
copy->msg = (orig != NULL)? pcmk__xml_copy(NULL, orig->msg) : NULL;
wrapper = pcmk__xe_first_child(copy->msg, PCMK__XE_CRM_XML, NULL, NULL);
copy->xml = pcmk__xe_first_child(wrapper, NULL, NULL, NULL);
return copy;
}
void
delete_fsa_input(fsa_data_t * fsa_data)
{
lrmd_event_data_t *op = NULL;
xmlNode *foo = NULL;
if (fsa_data == NULL) {
return;
}
crm_trace("About to free %s data", fsa_cause2string(fsa_data->fsa_cause));
if (fsa_data->data != NULL) {
switch (fsa_data->data_type) {
case fsa_dt_ha_msg:
delete_ha_msg_input(fsa_data->data);
break;
case fsa_dt_xml:
foo = fsa_data->data;
free_xml(foo);
break;
case fsa_dt_lrm:
op = (lrmd_event_data_t *) fsa_data->data;
lrmd_free_event(op);
break;
case fsa_dt_none:
if (fsa_data->data != NULL) {
crm_err("Don't know how to free %s data from %s",
fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin);
crmd_exit(CRM_EX_SOFTWARE);
}
break;
}
crm_trace("%s data freed", fsa_cause2string(fsa_data->fsa_cause));
}
free(fsa_data);
}
/* returns the next message */
fsa_data_t *
get_message(void)
{
fsa_data_t *message
= (fsa_data_t *) controld_globals.fsa_message_queue->data;
controld_globals.fsa_message_queue
= g_list_remove(controld_globals.fsa_message_queue, message);
crm_trace("Processing input %d", message->id);
return message;
}
void *
fsa_typed_data_adv(fsa_data_t * fsa_data, enum fsa_data_type a_type, const char *caller)
{
void *ret_val = NULL;
if (fsa_data == NULL) {
crm_err("%s: No FSA data available", caller);
} else if (fsa_data->data == NULL) {
crm_err("%s: No message data available. Origin: %s", caller, fsa_data->origin);
} else if (fsa_data->data_type != a_type) {
crm_crit("%s: Message data was the wrong type! %d vs. requested=%d. Origin: %s",
caller, fsa_data->data_type, a_type, fsa_data->origin);
CRM_ASSERT(fsa_data->data_type == a_type);
} else {
ret_val = fsa_data->data;
}
return ret_val;
}
/* A_MSG_ROUTE */
void
do_msg_route(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg);
route_message(msg_data->fsa_cause, input->msg);
}
void
route_message(enum crmd_fsa_cause cause, xmlNode * input)
{
ha_msg_input_t fsa_input;
enum crmd_fsa_input result = I_NULL;
fsa_input.msg = input;
CRM_CHECK(cause == C_IPC_MESSAGE || cause == C_HA_MESSAGE, return);
/* try passing the buck first */
if (relay_message(input, cause == C_IPC_MESSAGE)) {
return;
}
/* handle locally */
result = handle_message(input, cause);
/* done or process later? */
switch (result) {
case I_NULL:
case I_CIB_OP:
case I_ROUTER:
case I_NODE_JOIN:
case I_JOIN_REQUEST:
case I_JOIN_RESULT:
break;
default:
/* Defering local processing of message */
register_fsa_input_later(cause, result, &fsa_input);
return;
}
if (result != I_NULL) {
/* add to the front of the queue */
register_fsa_input(cause, result, &fsa_input);
}
}
gboolean
relay_message(xmlNode * msg, gboolean originated_locally)
{
enum crm_ais_msg_types dest = crm_msg_ais;
bool is_for_dc = false;
bool is_for_dcib = false;
bool is_for_te = false;
bool is_for_crm = false;
bool is_for_cib = false;
bool is_local = false;
bool broadcast = false;
const char *host_to = NULL;
const char *sys_to = NULL;
const char *sys_from = NULL;
const char *type = NULL;
const char *task = NULL;
const char *ref = NULL;
crm_node_t *node_to = NULL;
CRM_CHECK(msg != NULL, return TRUE);
host_to = crm_element_value(msg, PCMK__XA_CRM_HOST_TO);
sys_to = crm_element_value(msg, PCMK__XA_CRM_SYS_TO);
sys_from = crm_element_value(msg, PCMK__XA_CRM_SYS_FROM);
type = crm_element_value(msg, PCMK__XA_T);
task = crm_element_value(msg, PCMK__XA_CRM_TASK);
ref = crm_element_value(msg, PCMK_XA_REFERENCE);
broadcast = pcmk__str_empty(host_to);
if (ref == NULL) {
ref = "without reference ID";
}
if (pcmk__str_eq(task, CRM_OP_HELLO, pcmk__str_casei)) {
crm_trace("Received hello %s from %s (no processing needed)",
ref, pcmk__s(sys_from, "unidentified source"));
crm_log_xml_trace(msg, "hello");
return TRUE;
}
// Require message type (set by create_request())
if (!pcmk__str_eq(type, PCMK__VALUE_CRMD, pcmk__str_none)) {
crm_warn("Ignoring invalid message %s with type '%s' "
"(not '" PCMK__VALUE_CRMD "')",
ref, pcmk__s(type, ""));
crm_log_xml_trace(msg, "ignored");
return TRUE;
}
// Require a destination subsystem (also set by create_request())
if (sys_to == NULL) {
crm_warn("Ignoring invalid message %s with no " PCMK__XA_CRM_SYS_TO,
ref);
crm_log_xml_trace(msg, "ignored");
return TRUE;
}
// Get the message type appropriate to the destination subsystem
if (pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync) {
dest = text2msg_type(sys_to);
if ((dest < crm_msg_ais) || (dest > crm_msg_stonith_ng)) {
/* Unrecognized value, use a sane default
*
* @TODO Maybe we should bail instead
*/
dest = crm_msg_crmd;
}
}
is_for_dc = (strcasecmp(CRM_SYSTEM_DC, sys_to) == 0);
is_for_dcib = (strcasecmp(CRM_SYSTEM_DCIB, sys_to) == 0);
is_for_te = (strcasecmp(CRM_SYSTEM_TENGINE, sys_to) == 0);
is_for_cib = (strcasecmp(CRM_SYSTEM_CIB, sys_to) == 0);
is_for_crm = (strcasecmp(CRM_SYSTEM_CRMD, sys_to) == 0);
// Check whether message should be processed locally
is_local = false;
if (broadcast) {
if (is_for_dc || is_for_te) {
is_local = false;
} else if (is_for_crm) {
if (pcmk__strcase_any_of(task, CRM_OP_NODE_INFO,
PCMK__CONTROLD_CMD_NODES, NULL)) {
/* Node info requests do not specify a host, which is normally
* treated as "all hosts", because the whole point is that the
* client may not know the local node name. Always handle these
* requests locally.
*/
is_local = true;
} else {
is_local = !originated_locally;
}
} else {
is_local = true;
}
} else if (pcmk__str_eq(controld_globals.our_nodename, host_to,
pcmk__str_casei)) {
is_local = true;
} else if (is_for_crm && pcmk__str_eq(task, CRM_OP_LRM_DELETE, pcmk__str_casei)) {
xmlNode *wrapper = pcmk__xe_first_child(msg, PCMK__XE_CRM_XML, NULL,
NULL);
xmlNode *msg_data = pcmk__xe_first_child(wrapper, NULL, NULL, NULL);
const char *mode = crm_element_value(msg_data, PCMK__XA_MODE);
if (pcmk__str_eq(mode, PCMK__VALUE_CIB, pcmk__str_none)) {
// Local delete of an offline node's resource history
is_local = true;
}
}
// Check whether message should be relayed
if (is_for_dc || is_for_dcib || is_for_te) {
if (AM_I_DC) {
if (is_for_te) {
crm_trace("Route message %s locally as transition request",
ref);
crm_log_xml_trace(msg, sys_to);
send_msg_via_ipc(msg, sys_to);
return TRUE; // No further processing of message is needed
}
crm_trace("Route message %s locally as DC request", ref);
return FALSE; // More to be done by caller
}
if (originated_locally
&& !pcmk__strcase_any_of(sys_from, CRM_SYSTEM_PENGINE,
CRM_SYSTEM_TENGINE, NULL)) {
crm_trace("Relay message %s to DC (via %s)",
ref, pcmk__s(host_to, "broadcast"));
crm_log_xml_trace(msg, "relayed");
if (!broadcast) {
node_to = pcmk__get_node(0, host_to, NULL,
- pcmk__node_search_cluster);
+ pcmk__node_search_cluster_member);
}
send_cluster_message(node_to, dest, msg, TRUE);
return TRUE;
}
/* Transition engine and scheduler messages are sent only to the DC on
* the same node. If we are no longer the DC, discard this message.
*/
crm_trace("Ignoring message %s because we are no longer DC", ref);
crm_log_xml_trace(msg, "ignored");
return TRUE; // No further processing of message is needed
}
if (is_local) {
if (is_for_crm || is_for_cib) {
crm_trace("Route message %s locally as controller request", ref);
return FALSE; // More to be done by caller
}
crm_trace("Relay message %s locally to %s", ref, sys_to);
crm_log_xml_trace(msg, "IPC-relay");
send_msg_via_ipc(msg, sys_to);
return TRUE;
}
if (!broadcast) {
node_to = pcmk__search_node_caches(0, host_to,
- pcmk__node_search_cluster);
+ pcmk__node_search_cluster_member);
if (node_to == NULL) {
crm_warn("Ignoring message %s because node %s is unknown",
ref, host_to);
crm_log_xml_trace(msg, "ignored");
return TRUE;
}
}
crm_trace("Relay message %s to %s",
ref, pcmk__s(host_to, "all peers"));
crm_log_xml_trace(msg, "relayed");
send_cluster_message(node_to, dest, msg, TRUE);
return TRUE;
}
// Return true if field contains a positive integer
static bool
authorize_version(xmlNode *message_data, const char *field,
const char *client_name, const char *ref, const char *uuid)
{
const char *version = crm_element_value(message_data, field);
long long version_num;
if ((pcmk__scan_ll(version, &version_num, -1LL) != pcmk_rc_ok)
|| (version_num < 0LL)) {
crm_warn("Rejected IPC hello from %s: '%s' is not a valid protocol %s "
CRM_XS " ref=%s uuid=%s",
client_name, ((version == NULL)? "" : version),
field, (ref? ref : "none"), uuid);
return false;
}
return true;
}
/*!
* \internal
* \brief Check whether a client IPC message is acceptable
*
* If a given client IPC message is a hello, "authorize" it by ensuring it has
* valid information such as a protocol version, and return false indicating
* that nothing further needs to be done with the message. If the message is not
* a hello, just return true to indicate it needs further processing.
*
* \param[in] client_msg XML of IPC message
* \param[in,out] curr_client If IPC is not proxied, client that sent message
* \param[in] proxy_session If IPC is proxied, the session ID
*
* \return true if message needs further processing, false if it doesn't
*/
bool
controld_authorize_ipc_message(const xmlNode *client_msg, pcmk__client_t *curr_client,
const char *proxy_session)
{
xmlNode *wrapper = NULL;
xmlNode *message_data = NULL;
const char *client_name = NULL;
const char *op = crm_element_value(client_msg, PCMK__XA_CRM_TASK);
const char *ref = crm_element_value(client_msg, PCMK_XA_REFERENCE);
const char *uuid = (curr_client? curr_client->id : proxy_session);
if (uuid == NULL) {
crm_warn("IPC message from client rejected: No client identifier "
CRM_XS " ref=%s", (ref? ref : "none"));
goto rejected;
}
if (!pcmk__str_eq(CRM_OP_HELLO, op, pcmk__str_casei)) {
// Only hello messages need to be authorized
return true;
}
wrapper = pcmk__xe_first_child(client_msg, PCMK__XE_CRM_XML, NULL, NULL);
message_data = pcmk__xe_first_child(wrapper, NULL, NULL, NULL);
client_name = crm_element_value(message_data, PCMK__XA_CLIENT_NAME);
if (pcmk__str_empty(client_name)) {
crm_warn("IPC hello from client rejected: No client name",
CRM_XS " ref=%s uuid=%s", (ref? ref : "none"), uuid);
goto rejected;
}
if (!authorize_version(message_data, PCMK__XA_MAJOR_VERSION, client_name,
ref, uuid)) {
goto rejected;
}
if (!authorize_version(message_data, PCMK__XA_MINOR_VERSION, client_name,
ref, uuid)) {
goto rejected;
}
crm_trace("Validated IPC hello from client %s", client_name);
crm_log_xml_trace(client_msg, "hello");
if (curr_client) {
curr_client->userdata = pcmk__str_copy(client_name);
}
controld_trigger_fsa();
return false;
rejected:
crm_log_xml_trace(client_msg, "rejected");
if (curr_client) {
qb_ipcs_disconnect(curr_client->ipcs);
}
return false;
}
static enum crmd_fsa_input
handle_message(xmlNode *msg, enum crmd_fsa_cause cause)
{
const char *type = NULL;
CRM_CHECK(msg != NULL, return I_NULL);
type = crm_element_value(msg, PCMK__XA_SUBT);
if (pcmk__str_eq(type, PCMK__VALUE_REQUEST, pcmk__str_none)) {
return handle_request(msg, cause);
}
if (pcmk__str_eq(type, PCMK__VALUE_RESPONSE, pcmk__str_none)) {
handle_response(msg);
return I_NULL;
}
crm_warn("Ignoring message with unknown " PCMK__XA_SUBT" '%s'",
pcmk__s(type, ""));
crm_log_xml_trace(msg, "bad");
return I_NULL;
}
static enum crmd_fsa_input
handle_failcount_op(xmlNode * stored_msg)
{
const char *rsc = NULL;
const char *uname = NULL;
const char *op = NULL;
char *interval_spec = NULL;
guint interval_ms = 0;
gboolean is_remote_node = FALSE;
xmlNode *wrapper = pcmk__xe_first_child(stored_msg, PCMK__XE_CRM_XML, NULL,
NULL);
xmlNode *xml_op = pcmk__xe_first_child(wrapper, NULL, NULL, NULL);
if (xml_op) {
xmlNode *xml_rsc = pcmk__xe_first_child(xml_op, PCMK_XE_PRIMITIVE, NULL,
NULL);
xmlNode *xml_attrs = pcmk__xe_first_child(xml_op, PCMK__XE_ATTRIBUTES,
NULL, NULL);
if (xml_rsc) {
rsc = pcmk__xe_id(xml_rsc);
}
if (xml_attrs) {
op = crm_element_value(xml_attrs,
CRM_META "_" PCMK__META_CLEAR_FAILURE_OP);
crm_element_value_ms(xml_attrs,
CRM_META "_" PCMK__META_CLEAR_FAILURE_INTERVAL,
&interval_ms);
}
}
uname = crm_element_value(xml_op, PCMK__META_ON_NODE);
if ((rsc == NULL) || (uname == NULL)) {
crm_log_xml_warn(stored_msg, "invalid failcount op");
return I_NULL;
}
if (crm_element_value(xml_op, PCMK__XA_ROUTER_NODE)) {
is_remote_node = TRUE;
}
crm_debug("Clearing failures for %s-interval %s on %s "
"from attribute manager, CIB, and executor state",
pcmk__readable_interval(interval_ms), rsc, uname);
if (interval_ms) {
interval_spec = crm_strdup_printf("%ums", interval_ms);
}
update_attrd_clear_failures(uname, rsc, op, interval_spec, is_remote_node);
free(interval_spec);
controld_cib_delete_last_failure(rsc, uname, op, interval_ms);
lrm_clear_last_failure(rsc, uname, op, interval_ms);
return I_NULL;
}
static enum crmd_fsa_input
handle_lrm_delete(xmlNode *stored_msg)
{
const char *mode = NULL;
xmlNode *wrapper = pcmk__xe_first_child(stored_msg, PCMK__XE_CRM_XML, NULL,
NULL);
xmlNode *msg_data = pcmk__xe_first_child(wrapper, NULL, NULL, NULL);
CRM_CHECK(msg_data != NULL, return I_NULL);
/* CRM_OP_LRM_DELETE has two distinct modes. The default behavior is to
* relay the operation to the affected node, which will unregister the
* resource from the local executor, clear the resource's history from the
* CIB, and do some bookkeeping in the controller.
*
* However, if the affected node is offline, the client will specify
* mode=PCMK__VALUE_CIB which means the controller receiving the operation
* should clear the resource's history from the CIB and nothing else. This
* is used to clear shutdown locks.
*/
mode = crm_element_value(msg_data, PCMK__XA_MODE);
if (!pcmk__str_eq(mode, PCMK__VALUE_CIB, pcmk__str_none)) {
// Relay to affected node
crm_xml_add(stored_msg, PCMK__XA_CRM_SYS_TO, CRM_SYSTEM_LRMD);
return I_ROUTER;
} else {
// Delete CIB history locally (compare with do_lrm_delete())
const char *from_sys = NULL;
const char *user_name = NULL;
const char *rsc_id = NULL;
const char *node = NULL;
xmlNode *rsc_xml = NULL;
int rc = pcmk_rc_ok;
rsc_xml = pcmk__xe_first_child(msg_data, PCMK_XE_PRIMITIVE, NULL, NULL);
CRM_CHECK(rsc_xml != NULL, return I_NULL);
rsc_id = pcmk__xe_id(rsc_xml);
from_sys = crm_element_value(stored_msg, PCMK__XA_CRM_SYS_FROM);
node = crm_element_value(msg_data, PCMK__META_ON_NODE);
user_name = pcmk__update_acl_user(stored_msg, PCMK__XA_CRM_USER, NULL);
crm_debug("Handling " CRM_OP_LRM_DELETE " for %s on %s locally%s%s "
"(clearing CIB resource history only)", rsc_id, node,
(user_name? " for user " : ""), (user_name? user_name : ""));
rc = controld_delete_resource_history(rsc_id, node, user_name,
cib_dryrun|cib_sync_call);
if (rc == pcmk_rc_ok) {
rc = controld_delete_resource_history(rsc_id, node, user_name,
crmd_cib_smart_opt());
}
/* Notify client. Also notify tengine if mode=PCMK__VALUE_CIB and
* op=CRM_OP_LRM_DELETE.
*/
if (from_sys) {
lrmd_event_data_t *op = NULL;
const char *from_host = crm_element_value(stored_msg, PCMK__XA_SRC);
const char *transition;
if (strcmp(from_sys, CRM_SYSTEM_TENGINE)) {
transition = crm_element_value(msg_data,
PCMK__XA_TRANSITION_KEY);
} else {
transition = crm_element_value(stored_msg,
PCMK__XA_TRANSITION_KEY);
}
crm_info("Notifying %s on %s that %s was%s deleted",
from_sys, (from_host? from_host : "local node"), rsc_id,
((rc == pcmk_rc_ok)? "" : " not"));
op = lrmd_new_event(rsc_id, PCMK_ACTION_DELETE, 0);
op->type = lrmd_event_exec_complete;
op->user_data = pcmk__str_copy(pcmk__s(transition, FAKE_TE_ID));
op->params = pcmk__strkey_table(free, free);
pcmk__insert_dup(op->params, PCMK_XA_CRM_FEATURE_SET,
CRM_FEATURE_SET);
controld_rc2event(op, rc);
controld_ack_event_directly(from_host, from_sys, NULL, op, rsc_id);
lrmd_free_event(op);
controld_trigger_delete_refresh(from_sys, rsc_id);
}
return I_NULL;
}
}
/*!
* \brief Handle a CRM_OP_REMOTE_STATE message by updating remote peer cache
*
* \param[in] msg Message XML
*
* \return Next FSA input
*/
static enum crmd_fsa_input
handle_remote_state(const xmlNode *msg)
{
const char *conn_host = NULL;
const char *remote_uname = pcmk__xe_id(msg);
crm_node_t *remote_peer;
bool remote_is_up = false;
int rc = pcmk_rc_ok;
rc = pcmk__xe_get_bool_attr(msg, PCMK__XA_IN_CCM, &remote_is_up);
CRM_CHECK(remote_uname && rc == pcmk_rc_ok, return I_NULL);
remote_peer = pcmk__cluster_lookup_remote_node(remote_uname);
CRM_CHECK(remote_peer, return I_NULL);
pcmk__update_peer_state(__func__, remote_peer,
remote_is_up ? CRM_NODE_MEMBER : CRM_NODE_LOST,
0);
conn_host = crm_element_value(msg, PCMK__XA_CONNECTION_HOST);
if (conn_host) {
pcmk__str_update(&remote_peer->conn_host, conn_host);
} else if (remote_peer->conn_host) {
free(remote_peer->conn_host);
remote_peer->conn_host = NULL;
}
return I_NULL;
}
/*!
* \brief Handle a CRM_OP_PING message
*
* \param[in] msg Message XML
*
* \return Next FSA input
*/
static enum crmd_fsa_input
handle_ping(const xmlNode *msg)
{
const char *value = NULL;
xmlNode *ping = NULL;
xmlNode *reply = NULL;
// Build reply
ping = pcmk__xe_create(NULL, PCMK__XE_PING_RESPONSE);
value = crm_element_value(msg, PCMK__XA_CRM_SYS_TO);
crm_xml_add(ping, PCMK__XA_CRM_SUBSYSTEM, value);
// Add controller state
value = fsa_state2string(controld_globals.fsa_state);
crm_xml_add(ping, PCMK__XA_CRMD_STATE, value);
crm_notice("Current ping state: %s", value); // CTS needs this
// Add controller health
// @TODO maybe do some checks to determine meaningful status
crm_xml_add(ping, PCMK_XA_RESULT, "ok");
// Send reply
reply = create_reply(msg, ping);
free_xml(ping);
if (reply != NULL) {
(void) relay_message(reply, TRUE);
free_xml(reply);
}
// Nothing further to do
return I_NULL;
}
/*!
* \brief Handle a PCMK__CONTROLD_CMD_NODES message
*
* \param[in] request Message XML
*
* \return Next FSA input
*/
static enum crmd_fsa_input
handle_node_list(const xmlNode *request)
{
GHashTableIter iter;
crm_node_t *node = NULL;
xmlNode *reply = NULL;
xmlNode *reply_data = NULL;
// Create message data for reply
reply_data = pcmk__xe_create(NULL, PCMK_XE_NODES);
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & node)) {
xmlNode *xml = pcmk__xe_create(reply_data, PCMK_XE_NODE);
crm_xml_add_ll(xml, PCMK_XA_ID, (long long) node->id); // uint32_t
crm_xml_add(xml, PCMK_XA_UNAME, node->uname);
crm_xml_add(xml, PCMK__XA_IN_CCM, node->state);
}
// Create and send reply
reply = create_reply(request, reply_data);
free_xml(reply_data);
if (reply) {
(void) relay_message(reply, TRUE);
free_xml(reply);
}
// Nothing further to do
return I_NULL;
}
/*!
* \brief Handle a CRM_OP_NODE_INFO request
*
* \param[in] msg Message XML
*
* \return Next FSA input
*/
static enum crmd_fsa_input
handle_node_info_request(const xmlNode *msg)
{
const char *value = NULL;
crm_node_t *node = NULL;
int node_id = 0;
xmlNode *reply = NULL;
xmlNode *reply_data = NULL;
// Build reply
reply_data = pcmk__xe_create(NULL, PCMK_XE_NODE);
crm_xml_add(reply_data, PCMK__XA_CRM_SUBSYSTEM, CRM_SYSTEM_CRMD);
// Add whether current partition has quorum
pcmk__xe_set_bool_attr(reply_data, PCMK_XA_HAVE_QUORUM,
pcmk_is_set(controld_globals.flags,
controld_has_quorum));
// Check whether client requested node info by ID and/or name
crm_element_value_int(msg, PCMK_XA_ID, &node_id);
if (node_id < 0) {
node_id = 0;
}
value = crm_element_value(msg, PCMK_XA_UNAME);
// Default to local node if none given
if ((node_id == 0) && (value == NULL)) {
value = controld_globals.our_nodename;
}
node = pcmk__search_node_caches(node_id, value, pcmk__node_search_any);
if (node) {
crm_xml_add(reply_data, PCMK_XA_ID, node->uuid);
crm_xml_add(reply_data, PCMK_XA_UNAME, node->uname);
crm_xml_add(reply_data, PCMK_XA_CRMD, node->state);
pcmk__xe_set_bool_attr(reply_data, PCMK_XA_REMOTE_NODE,
pcmk_is_set(node->flags, crm_remote_node));
}
// Send reply
reply = create_reply(msg, reply_data);
free_xml(reply_data);
if (reply != NULL) {
(void) relay_message(reply, TRUE);
free_xml(reply);
}
// Nothing further to do
return I_NULL;
}
static void
verify_feature_set(xmlNode *msg)
{
const char *dc_version = crm_element_value(msg, PCMK_XA_CRM_FEATURE_SET);
if (dc_version == NULL) {
/* All we really know is that the DC feature set is older than 3.1.0,
* but that's also all that really matters.
*/
dc_version = "3.0.14";
}
if (feature_set_compatible(dc_version, CRM_FEATURE_SET)) {
crm_trace("Local feature set (%s) is compatible with DC's (%s)",
CRM_FEATURE_SET, dc_version);
} else {
crm_err("Local feature set (%s) is incompatible with DC's (%s)",
CRM_FEATURE_SET, dc_version);
// Nothing is likely to improve without administrator involvement
controld_set_fsa_input_flags(R_STAYDOWN);
crmd_exit(CRM_EX_FATAL);
}
}
// DC gets own shutdown all-clear
static enum crmd_fsa_input
handle_shutdown_self_ack(xmlNode *stored_msg)
{
const char *host_from = crm_element_value(stored_msg, PCMK__XA_SRC);
if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
// The expected case -- we initiated own shutdown sequence
crm_info("Shutting down controller");
return I_STOP;
}
if (pcmk__str_eq(host_from, controld_globals.dc_name, pcmk__str_casei)) {
// Must be logic error -- DC confirming its own unrequested shutdown
crm_err("Shutting down controller immediately due to "
"unexpected shutdown confirmation");
return I_TERMINATE;
}
if (controld_globals.fsa_state != S_STOPPING) {
// Shouldn't happen -- non-DC confirming unrequested shutdown
crm_err("Starting new DC election because %s is "
"confirming shutdown we did not request",
(host_from? host_from : "another node"));
return I_ELECTION;
}
// Shouldn't happen, but we are already stopping anyway
crm_debug("Ignoring unexpected shutdown confirmation from %s",
(host_from? host_from : "another node"));
return I_NULL;
}
// Non-DC gets shutdown all-clear from DC
static enum crmd_fsa_input
handle_shutdown_ack(xmlNode *stored_msg)
{
const char *host_from = crm_element_value(stored_msg, PCMK__XA_SRC);
if (host_from == NULL) {
crm_warn("Ignoring shutdown request without origin specified");
return I_NULL;
}
if (pcmk__str_eq(host_from, controld_globals.dc_name,
pcmk__str_null_matches|pcmk__str_casei)) {
if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
crm_info("Shutting down controller after confirmation from %s",
host_from);
} else {
crm_err("Shutting down controller after unexpected "
"shutdown request from %s", host_from);
controld_set_fsa_input_flags(R_STAYDOWN);
}
return I_STOP;
}
crm_warn("Ignoring shutdown request from %s because DC is %s",
host_from, controld_globals.dc_name);
return I_NULL;
}
static enum crmd_fsa_input
handle_request(xmlNode *stored_msg, enum crmd_fsa_cause cause)
{
xmlNode *msg = NULL;
const char *op = crm_element_value(stored_msg, PCMK__XA_CRM_TASK);
/* Optimize this for the DC - it has the most to do */
crm_log_xml_trace(stored_msg, "request");
if (op == NULL) {
crm_warn("Ignoring request without " PCMK__XA_CRM_TASK);
return I_NULL;
}
if (strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0) {
const char *from = crm_element_value(stored_msg, PCMK__XA_SRC);
- crm_node_t *node = pcmk__search_node_caches(0, from,
- pcmk__node_search_cluster);
+ crm_node_t *node =
+ pcmk__search_node_caches(0, from, pcmk__node_search_cluster_member);
pcmk__update_peer_expected(__func__, node, CRMD_JOINSTATE_DOWN);
if(AM_I_DC == FALSE) {
return I_NULL; /* Done */
}
}
/*========== DC-Only Actions ==========*/
if (AM_I_DC) {
if (strcmp(op, CRM_OP_JOIN_ANNOUNCE) == 0) {
return I_NODE_JOIN;
} else if (strcmp(op, CRM_OP_JOIN_REQUEST) == 0) {
return I_JOIN_REQUEST;
} else if (strcmp(op, CRM_OP_JOIN_CONFIRM) == 0) {
return I_JOIN_RESULT;
} else if (strcmp(op, CRM_OP_SHUTDOWN) == 0) {
return handle_shutdown_self_ack(stored_msg);
} else if (strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0) {
// Another controller wants to shut down its node
return handle_shutdown_request(stored_msg);
}
}
/*========== common actions ==========*/
if (strcmp(op, CRM_OP_NOVOTE) == 0) {
ha_msg_input_t fsa_input;
fsa_input.msg = stored_msg;
register_fsa_input_adv(C_HA_MESSAGE, I_NULL, &fsa_input,
A_ELECTION_COUNT | A_ELECTION_CHECK, FALSE,
__func__);
} else if (strcmp(op, CRM_OP_REMOTE_STATE) == 0) {
/* a remote connection host is letting us know the node state */
return handle_remote_state(stored_msg);
} else if (strcmp(op, CRM_OP_THROTTLE) == 0) {
throttle_update(stored_msg);
if (AM_I_DC && (controld_globals.transition_graph != NULL)
&& !controld_globals.transition_graph->complete) {
crm_debug("The throttle changed. Trigger a graph.");
trigger_graph();
}
return I_NULL;
} else if (strcmp(op, CRM_OP_CLEAR_FAILCOUNT) == 0) {
return handle_failcount_op(stored_msg);
} else if (strcmp(op, CRM_OP_VOTE) == 0) {
/* count the vote and decide what to do after that */
ha_msg_input_t fsa_input;
fsa_input.msg = stored_msg;
register_fsa_input_adv(C_HA_MESSAGE, I_NULL, &fsa_input,
A_ELECTION_COUNT | A_ELECTION_CHECK, FALSE,
__func__);
/* Sometimes we _must_ go into S_ELECTION */
if (controld_globals.fsa_state == S_HALT) {
crm_debug("Forcing an election from S_HALT");
return I_ELECTION;
}
} else if (strcmp(op, CRM_OP_JOIN_OFFER) == 0) {
verify_feature_set(stored_msg);
crm_debug("Raising I_JOIN_OFFER: join-%s",
crm_element_value(stored_msg, PCMK__XA_JOIN_ID));
return I_JOIN_OFFER;
} else if (strcmp(op, CRM_OP_JOIN_ACKNAK) == 0) {
crm_debug("Raising I_JOIN_RESULT: join-%s",
crm_element_value(stored_msg, PCMK__XA_JOIN_ID));
return I_JOIN_RESULT;
} else if (strcmp(op, CRM_OP_LRM_DELETE) == 0) {
return handle_lrm_delete(stored_msg);
} else if ((strcmp(op, CRM_OP_LRM_FAIL) == 0)
|| (strcmp(op, CRM_OP_LRM_REFRESH) == 0) // @COMPAT
|| (strcmp(op, CRM_OP_REPROBE) == 0)) {
crm_xml_add(stored_msg, PCMK__XA_CRM_SYS_TO, CRM_SYSTEM_LRMD);
return I_ROUTER;
} else if (strcmp(op, CRM_OP_NOOP) == 0) {
return I_NULL;
} else if (strcmp(op, CRM_OP_PING) == 0) {
return handle_ping(stored_msg);
} else if (strcmp(op, CRM_OP_NODE_INFO) == 0) {
return handle_node_info_request(stored_msg);
} else if (strcmp(op, CRM_OP_RM_NODE_CACHE) == 0) {
int id = 0;
const char *name = NULL;
crm_element_value_int(stored_msg, PCMK_XA_ID, &id);
name = crm_element_value(stored_msg, PCMK_XA_UNAME);
if(cause == C_IPC_MESSAGE) {
msg = create_request(CRM_OP_RM_NODE_CACHE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
if (send_cluster_message(NULL, crm_msg_crmd, msg, TRUE) == FALSE) {
crm_err("Could not instruct peers to remove references to node %s/%u", name, id);
} else {
crm_notice("Instructing peers to remove references to node %s/%u", name, id);
}
free_xml(msg);
} else {
pcmk__cluster_forget_cluster_node(id, name);
/* If we're forgetting this node, also forget any failures to fence
* it, so we don't carry that over to any node added later with the
* same name.
*/
st_fail_count_reset(name);
}
} else if (strcmp(op, CRM_OP_MAINTENANCE_NODES) == 0) {
xmlNode *wrapper = pcmk__xe_first_child(stored_msg, PCMK__XE_CRM_XML,
NULL, NULL);
xmlNode *xml = pcmk__xe_first_child(wrapper, NULL, NULL, NULL);
remote_ra_process_maintenance_nodes(xml);
} else if (strcmp(op, PCMK__CONTROLD_CMD_NODES) == 0) {
return handle_node_list(stored_msg);
/*========== (NOT_DC)-Only Actions ==========*/
} else if (!AM_I_DC) {
if (strcmp(op, CRM_OP_SHUTDOWN) == 0) {
return handle_shutdown_ack(stored_msg);
}
} else {
crm_err("Unexpected request (%s) sent to %s", op, AM_I_DC ? "the DC" : "non-DC node");
crm_log_xml_err(stored_msg, "Unexpected");
}
return I_NULL;
}
static void
handle_response(xmlNode *stored_msg)
{
const char *op = crm_element_value(stored_msg, PCMK__XA_CRM_TASK);
crm_log_xml_trace(stored_msg, "reply");
if (op == NULL) {
crm_warn("Ignoring reply without " PCMK__XA_CRM_TASK);
} else if (AM_I_DC && strcmp(op, CRM_OP_PECALC) == 0) {
// Check whether scheduler answer been superseded by subsequent request
const char *msg_ref = crm_element_value(stored_msg, PCMK_XA_REFERENCE);
if (msg_ref == NULL) {
crm_err("%s - Ignoring calculation with no reference", op);
} else if (pcmk__str_eq(msg_ref, controld_globals.fsa_pe_ref,
pcmk__str_none)) {
ha_msg_input_t fsa_input;
controld_stop_sched_timer();
fsa_input.msg = stored_msg;
register_fsa_input_later(C_IPC_MESSAGE, I_PE_SUCCESS, &fsa_input);
} else {
crm_info("%s calculation %s is obsolete", op, msg_ref);
}
} else if (strcmp(op, CRM_OP_VOTE) == 0
|| strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0 || strcmp(op, CRM_OP_SHUTDOWN) == 0) {
} else {
const char *host_from = crm_element_value(stored_msg, PCMK__XA_SRC);
crm_err("Unexpected response (op=%s, src=%s) sent to the %s",
op, host_from, AM_I_DC ? "DC" : "controller");
}
}
static enum crmd_fsa_input
handle_shutdown_request(xmlNode * stored_msg)
{
/* handle here to avoid potential version issues
* where the shutdown message/procedure may have
* been changed in later versions.
*
* This way the DC is always in control of the shutdown
*/
char *now_s = NULL;
const char *host_from = crm_element_value(stored_msg, PCMK__XA_SRC);
if (host_from == NULL) {
/* we're shutting down and the DC */
host_from = controld_globals.our_nodename;
}
crm_info("Creating shutdown request for %s (state=%s)", host_from,
fsa_state2string(controld_globals.fsa_state));
crm_log_xml_trace(stored_msg, "message");
now_s = pcmk__ttoa(time(NULL));
update_attrd(host_from, PCMK__NODE_ATTR_SHUTDOWN, now_s, NULL, FALSE);
free(now_s);
/* will be picked up by the TE as long as its running */
return I_NULL;
}
static void
send_msg_via_ipc(xmlNode * msg, const char *sys)
{
pcmk__client_t *client_channel = NULL;
CRM_CHECK(sys != NULL, return);
client_channel = pcmk__find_client_by_id(sys);
if (crm_element_value(msg, PCMK__XA_SRC) == NULL) {
crm_xml_add(msg, PCMK__XA_SRC, controld_globals.our_nodename);
}
if (client_channel != NULL) {
/* Transient clients such as crmadmin */
pcmk__ipc_send_xml(client_channel, 0, msg, crm_ipc_server_event);
} else if (pcmk__str_eq(sys, CRM_SYSTEM_TENGINE, pcmk__str_none)) {
xmlNode *wrapper = pcmk__xe_first_child(msg, PCMK__XE_CRM_XML, NULL,
NULL);
xmlNode *data = pcmk__xe_first_child(wrapper, NULL, NULL, NULL);
process_te_message(msg, data);
} else if (pcmk__str_eq(sys, CRM_SYSTEM_LRMD, pcmk__str_none)) {
fsa_data_t fsa_data;
ha_msg_input_t fsa_input;
xmlNode *wrapper = NULL;
fsa_input.msg = msg;
wrapper = pcmk__xe_first_child(msg, PCMK__XE_CRM_XML, NULL, NULL);
fsa_input.xml = pcmk__xe_first_child(wrapper, NULL, NULL, NULL);
fsa_data.id = 0;
fsa_data.actions = 0;
fsa_data.data = &fsa_input;
fsa_data.fsa_input = I_MESSAGE;
fsa_data.fsa_cause = C_IPC_MESSAGE;
fsa_data.origin = __func__;
fsa_data.data_type = fsa_dt_ha_msg;
do_lrm_invoke(A_LRM_INVOKE, C_IPC_MESSAGE, controld_globals.fsa_state,
I_MESSAGE, &fsa_data);
} else if (crmd_is_proxy_session(sys)) {
crmd_proxy_send(sys, msg);
} else {
crm_info("Received invalid request: unknown subsystem '%s'", sys);
}
}
void
delete_ha_msg_input(ha_msg_input_t * orig)
{
if (orig == NULL) {
return;
}
free_xml(orig->msg);
free(orig);
}
/*!
* \internal
* \brief Notify the cluster of a remote node state change
*
* \param[in] node_name Node's name
* \param[in] node_up true if node is up, false if down
*/
void
broadcast_remote_state_message(const char *node_name, bool node_up)
{
xmlNode *msg = create_request(CRM_OP_REMOTE_STATE, NULL, NULL,
CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
crm_info("Notifying cluster of Pacemaker Remote node %s %s",
node_name, node_up? "coming up" : "going down");
crm_xml_add(msg, PCMK_XA_ID, node_name);
pcmk__xe_set_bool_attr(msg, PCMK__XA_IN_CCM, node_up);
if (node_up) {
crm_xml_add(msg, PCMK__XA_CONNECTION_HOST,
controld_globals.our_nodename);
}
send_cluster_message(NULL, crm_msg_crmd, msg, TRUE);
free_xml(msg);
}
diff --git a/daemons/controld/controld_remote_ra.c b/daemons/controld/controld_remote_ra.c
index 83b5d2bb1c..4bbf80c41f 100644
--- a/daemons/controld/controld_remote_ra.c
+++ b/daemons/controld/controld_remote_ra.c
@@ -1,1471 +1,1471 @@
/*
* Copyright 2013-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <crm/crm.h>
#include <crm/common/xml.h>
#include <crm/common/xml_internal.h>
#include <crm/lrmd.h>
#include <crm/lrmd_internal.h>
#include <crm/services.h>
#include <pacemaker-controld.h>
#define REMOTE_LRMD_RA "remote"
/* The max start timeout before cmd retry */
#define MAX_START_TIMEOUT_MS 10000
#define cmd_set_flags(cmd, flags_to_set) do { \
(cmd)->status = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \
"Remote command", (cmd)->rsc_id, (cmd)->status, \
(flags_to_set), #flags_to_set); \
} while (0)
#define cmd_clear_flags(cmd, flags_to_clear) do { \
(cmd)->status = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, \
"Remote command", (cmd)->rsc_id, (cmd)->status, \
(flags_to_clear), #flags_to_clear); \
} while (0)
enum remote_cmd_status {
cmd_reported_success = (1 << 0),
cmd_cancel = (1 << 1),
};
typedef struct remote_ra_cmd_s {
/*! the local node the cmd is issued from */
char *owner;
/*! the remote node the cmd is executed on */
char *rsc_id;
/*! the action to execute */
char *action;
/*! some string the client wants us to give it back */
char *userdata;
/*! start delay in ms */
int start_delay;
/*! timer id used for start delay. */
int delay_id;
/*! timeout in ms for cmd */
int timeout;
int remaining_timeout;
/*! recurring interval in ms */
guint interval_ms;
/*! interval timer id */
int interval_id;
int monitor_timeout_id;
int takeover_timeout_id;
/*! action parameters */
lrmd_key_value_t *params;
pcmk__action_result_t result;
int call_id;
time_t start_time;
uint32_t status;
} remote_ra_cmd_t;
#define lrm_remote_set_flags(lrm_state, flags_to_set) do { \
lrm_state_t *lrm = (lrm_state); \
remote_ra_data_t *ra = lrm->remote_ra_data; \
ra->status = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, "Remote", \
lrm->node_name, ra->status, \
(flags_to_set), #flags_to_set); \
} while (0)
#define lrm_remote_clear_flags(lrm_state, flags_to_clear) do { \
lrm_state_t *lrm = (lrm_state); \
remote_ra_data_t *ra = lrm->remote_ra_data; \
ra->status = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, "Remote", \
lrm->node_name, ra->status, \
(flags_to_clear), #flags_to_clear); \
} while (0)
enum remote_status {
expect_takeover = (1 << 0),
takeover_complete = (1 << 1),
remote_active = (1 << 2),
/* Maintenance mode is difficult to determine from the controller's context,
* so we have it signalled back with the transition from the scheduler.
*/
remote_in_maint = (1 << 3),
/* Similar for whether we are controlling a guest node or remote node.
* Fortunately there is a meta-attribute in the transition already and
* as the situation doesn't change over time we can use the
* resource start for noting down the information for later use when
* the attributes aren't at hand.
*/
controlling_guest = (1 << 4),
};
typedef struct remote_ra_data_s {
crm_trigger_t *work;
remote_ra_cmd_t *cur_cmd;
GList *cmds;
GList *recurring_cmds;
uint32_t status;
} remote_ra_data_t;
static int handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms);
static void handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd);
static GList *fail_all_monitor_cmds(GList * list);
static void
free_cmd(gpointer user_data)
{
remote_ra_cmd_t *cmd = user_data;
if (!cmd) {
return;
}
if (cmd->delay_id) {
g_source_remove(cmd->delay_id);
}
if (cmd->interval_id) {
g_source_remove(cmd->interval_id);
}
if (cmd->monitor_timeout_id) {
g_source_remove(cmd->monitor_timeout_id);
}
if (cmd->takeover_timeout_id) {
g_source_remove(cmd->takeover_timeout_id);
}
free(cmd->owner);
free(cmd->rsc_id);
free(cmd->action);
free(cmd->userdata);
pcmk__reset_result(&(cmd->result));
lrmd_key_value_freeall(cmd->params);
free(cmd);
}
static int
generate_callid(void)
{
static int remote_ra_callid = 0;
remote_ra_callid++;
if (remote_ra_callid <= 0) {
remote_ra_callid = 1;
}
return remote_ra_callid;
}
static gboolean
recurring_helper(gpointer data)
{
remote_ra_cmd_t *cmd = data;
lrm_state_t *connection_rsc = NULL;
cmd->interval_id = 0;
connection_rsc = lrm_state_find(cmd->rsc_id);
if (connection_rsc && connection_rsc->remote_ra_data) {
remote_ra_data_t *ra_data = connection_rsc->remote_ra_data;
ra_data->recurring_cmds = g_list_remove(ra_data->recurring_cmds, cmd);
ra_data->cmds = g_list_append(ra_data->cmds, cmd);
mainloop_set_trigger(ra_data->work);
}
return FALSE;
}
static gboolean
start_delay_helper(gpointer data)
{
remote_ra_cmd_t *cmd = data;
lrm_state_t *connection_rsc = NULL;
cmd->delay_id = 0;
connection_rsc = lrm_state_find(cmd->rsc_id);
if (connection_rsc && connection_rsc->remote_ra_data) {
remote_ra_data_t *ra_data = connection_rsc->remote_ra_data;
mainloop_set_trigger(ra_data->work);
}
return FALSE;
}
static bool
should_purge_attributes(crm_node_t *node)
{
bool purge = true;
crm_node_t *conn_node = NULL;
lrm_state_t *connection_rsc = NULL;
if (!node->conn_host) {
return purge;
}
/* Get the node that was hosting the remote connection resource from the
* peer cache. That's the one we really care about here.
*/
conn_node = pcmk__get_node(0, node->conn_host, NULL,
- pcmk__node_search_cluster);
+ pcmk__node_search_cluster_member);
if (conn_node == NULL) {
return purge;
}
/* Check the uptime of connection_rsc. If it hasn't been running long
* enough, set purge=true. "Long enough" means it started running earlier
* than the timestamp when we noticed it went away in the first place.
*/
connection_rsc = lrm_state_find(node->uname);
if (connection_rsc != NULL) {
lrmd_t *lrm = connection_rsc->conn;
time_t uptime = lrmd__uptime(lrm);
time_t now = time(NULL);
/* Add 20s of fuzziness to give corosync a while to notice the remote
* host is gone. On various error conditions (failure to get uptime,
* peer_lost isn't set) we default to purging.
*/
if (uptime > 0 &&
conn_node->peer_lost > 0 &&
uptime + 20 >= now - conn_node->peer_lost) {
purge = false;
}
}
return purge;
}
static enum controld_section_e
section_to_delete(bool purge)
{
if (pcmk_is_set(controld_globals.flags, controld_shutdown_lock_enabled)) {
if (purge) {
return controld_section_all_unlocked;
} else {
return controld_section_lrm_unlocked;
}
} else {
if (purge) {
return controld_section_all;
} else {
return controld_section_lrm;
}
}
}
static void
purge_remote_node_attrs(int call_opt, crm_node_t *node)
{
bool purge = should_purge_attributes(node);
enum controld_section_e section = section_to_delete(purge);
/* Purge node from attrd's memory */
if (purge) {
update_attrd_remote_node_removed(node->uname, NULL);
}
controld_delete_node_state(node->uname, section, call_opt);
}
/*!
* \internal
* \brief Handle cluster communication related to pacemaker_remote node joining
*
* \param[in] node_name Name of newly integrated pacemaker_remote node
*/
static void
remote_node_up(const char *node_name)
{
int call_opt;
xmlNode *update, *state;
crm_node_t *node;
lrm_state_t *connection_rsc = NULL;
CRM_CHECK(node_name != NULL, return);
crm_info("Announcing Pacemaker Remote node %s", node_name);
call_opt = crmd_cib_smart_opt();
/* Delete node's probe_complete attribute. This serves two purposes:
*
* - @COMPAT DCs < 1.1.14 in a rolling upgrade might use it
* - deleting it (or any attribute for that matter) here ensures the
* attribute manager learns the node is remote
*/
update_attrd(node_name, CRM_OP_PROBED, NULL, NULL, TRUE);
/* Ensure node is in the remote peer cache with member status */
node = pcmk__cluster_lookup_remote_node(node_name);
CRM_CHECK(node != NULL, return);
purge_remote_node_attrs(call_opt, node);
pcmk__update_peer_state(__func__, node, CRM_NODE_MEMBER, 0);
/* Apply any start state that we were given from the environment on the
* remote node.
*/
connection_rsc = lrm_state_find(node->uname);
if (connection_rsc != NULL) {
lrmd_t *lrm = connection_rsc->conn;
const char *start_state = lrmd__node_start_state(lrm);
if (start_state) {
set_join_state(start_state, node->uname, node->uuid, true);
}
}
/* pacemaker_remote nodes don't participate in the membership layer,
* so cluster nodes don't automatically get notified when they come and go.
* We send a cluster message to the DC, and update the CIB node state entry,
* so the DC will get it sooner (via message) or later (via CIB refresh),
* and any other interested parties can query the CIB.
*/
broadcast_remote_state_message(node_name, true);
update = pcmk__xe_create(NULL, PCMK_XE_STATUS);
state = create_node_state_update(node, node_update_cluster, update,
__func__);
/* Clear the PCMK__XA_NODE_FENCED flag in the node state. If the node ever
* needs to be fenced, this flag will allow various actions to determine
* whether the fencing has happened yet.
*/
crm_xml_add(state, PCMK__XA_NODE_FENCED, "0");
/* TODO: If the remote connection drops, and this (async) CIB update either
* failed or has not yet completed, later actions could mistakenly think the
* node has already been fenced (if the PCMK__XA_NODE_FENCED attribute was
* previously set, because it won't have been cleared). This could prevent
* actual fencing or allow recurring monitor failures to be cleared too
* soon. Ideally, we wouldn't rely on the CIB for the fenced status.
*/
controld_update_cib(PCMK_XE_STATUS, update, call_opt, NULL);
free_xml(update);
}
enum down_opts {
DOWN_KEEP_LRM,
DOWN_ERASE_LRM
};
/*!
* \internal
* \brief Handle cluster communication related to pacemaker_remote node leaving
*
* \param[in] node_name Name of lost node
* \param[in] opts Whether to keep or erase LRM history
*/
static void
remote_node_down(const char *node_name, const enum down_opts opts)
{
xmlNode *update;
int call_opt = crmd_cib_smart_opt();
crm_node_t *node;
/* Purge node from attrd's memory */
update_attrd_remote_node_removed(node_name, NULL);
/* Normally, only node attributes should be erased, and the resource history
* should be kept until the node comes back up. However, after a successful
* fence, we want to clear the history as well, so we don't think resources
* are still running on the node.
*/
if (opts == DOWN_ERASE_LRM) {
controld_delete_node_state(node_name, controld_section_all, call_opt);
} else {
controld_delete_node_state(node_name, controld_section_attrs, call_opt);
}
/* Ensure node is in the remote peer cache with lost state */
node = pcmk__cluster_lookup_remote_node(node_name);
CRM_CHECK(node != NULL, return);
pcmk__update_peer_state(__func__, node, CRM_NODE_LOST, 0);
/* Notify DC */
broadcast_remote_state_message(node_name, false);
/* Update CIB node state */
update = pcmk__xe_create(NULL, PCMK_XE_STATUS);
create_node_state_update(node, node_update_cluster, update, __func__);
controld_update_cib(PCMK_XE_STATUS, update, call_opt, NULL);
free_xml(update);
}
/*!
* \internal
* \brief Handle effects of a remote RA command on node state
*
* \param[in] cmd Completed remote RA command
*/
static void
check_remote_node_state(const remote_ra_cmd_t *cmd)
{
/* Only successful actions can change node state */
if (!pcmk__result_ok(&(cmd->result))) {
return;
}
if (pcmk__str_eq(cmd->action, PCMK_ACTION_START, pcmk__str_casei)) {
remote_node_up(cmd->rsc_id);
} else if (pcmk__str_eq(cmd->action, PCMK_ACTION_MIGRATE_FROM,
pcmk__str_casei)) {
/* After a successful migration, we don't need to do remote_node_up()
* because the DC already knows the node is up, and we don't want to
* clear LRM history etc. We do need to add the remote node to this
* host's remote peer cache, because (unless it happens to be DC)
* it hasn't been tracking the remote node, and other code relies on
* the cache to distinguish remote nodes from unseen cluster nodes.
*/
crm_node_t *node = pcmk__cluster_lookup_remote_node(cmd->rsc_id);
CRM_CHECK(node != NULL, return);
pcmk__update_peer_state(__func__, node, CRM_NODE_MEMBER, 0);
} else if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP, pcmk__str_casei)) {
lrm_state_t *lrm_state = lrm_state_find(cmd->rsc_id);
remote_ra_data_t *ra_data = lrm_state? lrm_state->remote_ra_data : NULL;
if (ra_data) {
if (!pcmk_is_set(ra_data->status, takeover_complete)) {
/* Stop means down if we didn't successfully migrate elsewhere */
remote_node_down(cmd->rsc_id, DOWN_KEEP_LRM);
} else if (AM_I_DC == FALSE) {
/* Only the connection host and DC track node state,
* so if the connection migrated elsewhere and we aren't DC,
* un-cache the node, so we don't have stale info
*/
pcmk__cluster_forget_remote_node(cmd->rsc_id);
}
}
}
/* We don't do anything for successful monitors, which is correct for
* routine recurring monitors, and for monitors on nodes where the
* connection isn't supposed to be (the cluster will stop the connection in
* that case). However, if the initial probe finds the connection already
* active on the node where we want it, we probably should do
* remote_node_up(). Unfortunately, we can't distinguish that case here.
* Given that connections have to be initiated by the cluster, the chance of
* that should be close to zero.
*/
}
static void
report_remote_ra_result(remote_ra_cmd_t * cmd)
{
lrmd_event_data_t op = { 0, };
check_remote_node_state(cmd);
op.type = lrmd_event_exec_complete;
op.rsc_id = cmd->rsc_id;
op.op_type = cmd->action;
op.user_data = cmd->userdata;
op.timeout = cmd->timeout;
op.interval_ms = cmd->interval_ms;
op.t_run = (unsigned int) cmd->start_time;
op.t_rcchange = (unsigned int) cmd->start_time;
lrmd__set_result(&op, cmd->result.exit_status, cmd->result.execution_status,
cmd->result.exit_reason);
if (pcmk_is_set(cmd->status, cmd_reported_success) && !pcmk__result_ok(&(cmd->result))) {
op.t_rcchange = (unsigned int) time(NULL);
/* This edge case will likely never ever occur, but if it does the
* result is that a failure will not be processed correctly. This is only
* remotely possible because we are able to detect a connection resource's tcp
* connection has failed at any moment after start has completed. The actual
* recurring operation is just a connectivity ping.
*
* basically, we are not guaranteed that the first successful monitor op and
* a subsequent failed monitor op will not occur in the same timestamp. We have to
* make it look like the operations occurred at separate times though. */
if (op.t_rcchange == op.t_run) {
op.t_rcchange++;
}
}
if (cmd->params) {
lrmd_key_value_t *tmp;
op.params = pcmk__strkey_table(free, free);
for (tmp = cmd->params; tmp; tmp = tmp->next) {
pcmk__insert_dup(op.params, tmp->key, tmp->value);
}
}
op.call_id = cmd->call_id;
op.remote_nodename = cmd->owner;
lrm_op_callback(&op);
if (op.params) {
g_hash_table_destroy(op.params);
}
lrmd__reset_result(&op);
}
static void
update_remaining_timeout(remote_ra_cmd_t * cmd)
{
cmd->remaining_timeout = ((cmd->timeout / 1000) - (time(NULL) - cmd->start_time)) * 1000;
}
static gboolean
retry_start_cmd_cb(gpointer data)
{
lrm_state_t *lrm_state = data;
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
remote_ra_cmd_t *cmd = NULL;
int rc = ETIME;
if (!ra_data || !ra_data->cur_cmd) {
return FALSE;
}
cmd = ra_data->cur_cmd;
if (!pcmk__strcase_any_of(cmd->action, PCMK_ACTION_START,
PCMK_ACTION_MIGRATE_FROM, NULL)) {
return FALSE;
}
update_remaining_timeout(cmd);
if (cmd->remaining_timeout > 0) {
rc = handle_remote_ra_start(lrm_state, cmd, cmd->remaining_timeout);
} else {
pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
PCMK_EXEC_TIMEOUT,
"Not enough time remains to retry remote connection");
}
if (rc != pcmk_rc_ok) {
report_remote_ra_result(cmd);
if (ra_data->cmds) {
mainloop_set_trigger(ra_data->work);
}
ra_data->cur_cmd = NULL;
free_cmd(cmd);
} else {
/* wait for connection event */
}
return FALSE;
}
static gboolean
connection_takeover_timeout_cb(gpointer data)
{
lrm_state_t *lrm_state = NULL;
remote_ra_cmd_t *cmd = data;
crm_info("takeover event timed out for node %s", cmd->rsc_id);
cmd->takeover_timeout_id = 0;
lrm_state = lrm_state_find(cmd->rsc_id);
handle_remote_ra_stop(lrm_state, cmd);
free_cmd(cmd);
return FALSE;
}
static gboolean
monitor_timeout_cb(gpointer data)
{
lrm_state_t *lrm_state = NULL;
remote_ra_cmd_t *cmd = data;
lrm_state = lrm_state_find(cmd->rsc_id);
crm_info("Timed out waiting for remote poke response from %s%s",
cmd->rsc_id, (lrm_state? "" : " (no LRM state)"));
cmd->monitor_timeout_id = 0;
pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_TIMEOUT,
"Remote executor did not respond");
if (lrm_state && lrm_state->remote_ra_data) {
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
if (ra_data->cur_cmd == cmd) {
ra_data->cur_cmd = NULL;
}
if (ra_data->cmds) {
mainloop_set_trigger(ra_data->work);
}
}
report_remote_ra_result(cmd);
free_cmd(cmd);
if(lrm_state) {
lrm_state_disconnect(lrm_state);
}
return FALSE;
}
static void
synthesize_lrmd_success(lrm_state_t *lrm_state, const char *rsc_id, const char *op_type)
{
lrmd_event_data_t op = { 0, };
if (lrm_state == NULL) {
/* if lrm_state not given assume local */
lrm_state = lrm_state_find(controld_globals.our_nodename);
}
CRM_ASSERT(lrm_state != NULL);
op.type = lrmd_event_exec_complete;
op.rsc_id = rsc_id;
op.op_type = op_type;
op.t_run = (unsigned int) time(NULL);
op.t_rcchange = op.t_run;
op.call_id = generate_callid();
lrmd__set_result(&op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
process_lrm_event(lrm_state, &op, NULL, NULL);
}
void
remote_lrm_op_callback(lrmd_event_data_t * op)
{
gboolean cmd_handled = FALSE;
lrm_state_t *lrm_state = NULL;
remote_ra_data_t *ra_data = NULL;
remote_ra_cmd_t *cmd = NULL;
crm_debug("Processing '%s%s%s' event on remote connection to %s: %s "
"(%d) status=%s (%d)",
(op->op_type? op->op_type : ""), (op->op_type? " " : ""),
lrmd_event_type2str(op->type), op->remote_nodename,
services_ocf_exitcode_str(op->rc), op->rc,
pcmk_exec_status_str(op->op_status), op->op_status);
lrm_state = lrm_state_find(op->remote_nodename);
if (!lrm_state || !lrm_state->remote_ra_data) {
crm_debug("No state information found for remote connection event");
return;
}
ra_data = lrm_state->remote_ra_data;
if (op->type == lrmd_event_new_client) {
// Another client has connected to the remote daemon
if (pcmk_is_set(ra_data->status, expect_takeover)) {
// Great, we knew this was coming
lrm_remote_clear_flags(lrm_state, expect_takeover);
lrm_remote_set_flags(lrm_state, takeover_complete);
} else {
crm_err("Disconnecting from Pacemaker Remote node %s due to "
"unexpected client takeover", op->remote_nodename);
/* In this case, lrmd_tls_connection_destroy() will be called under the control of mainloop. */
/* Do not free lrm_state->conn yet. */
/* It'll be freed in the following stop action. */
lrm_state_disconnect_only(lrm_state);
}
return;
}
/* filter all EXEC events up */
if (op->type == lrmd_event_exec_complete) {
if (pcmk_is_set(ra_data->status, takeover_complete)) {
crm_debug("ignoring event, this connection is taken over by another node");
} else {
lrm_op_callback(op);
}
return;
}
if ((op->type == lrmd_event_disconnect) && (ra_data->cur_cmd == NULL)) {
if (!pcmk_is_set(ra_data->status, remote_active)) {
crm_debug("Disconnection from Pacemaker Remote node %s complete",
lrm_state->node_name);
} else if (!remote_ra_is_in_maintenance(lrm_state)) {
crm_err("Lost connection to Pacemaker Remote node %s",
lrm_state->node_name);
ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds);
ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds);
} else {
crm_notice("Unmanaged Pacemaker Remote node %s disconnected",
lrm_state->node_name);
/* Do roughly what a 'stop' on the remote-resource would do */
handle_remote_ra_stop(lrm_state, NULL);
remote_node_down(lrm_state->node_name, DOWN_KEEP_LRM);
/* now fake the reply of a successful 'stop' */
synthesize_lrmd_success(NULL, lrm_state->node_name,
PCMK_ACTION_STOP);
}
return;
}
if (!ra_data->cur_cmd) {
crm_debug("no event to match");
return;
}
cmd = ra_data->cur_cmd;
/* Start actions and migrate from actions complete after connection
* comes back to us. */
if ((op->type == lrmd_event_connect)
&& pcmk__strcase_any_of(cmd->action, PCMK_ACTION_START,
PCMK_ACTION_MIGRATE_FROM, NULL)) {
if (op->connection_rc < 0) {
update_remaining_timeout(cmd);
if ((op->connection_rc == -ENOKEY)
|| (op->connection_rc == -EKEYREJECTED)) {
// Hard error, don't retry
pcmk__set_result(&(cmd->result), PCMK_OCF_INVALID_PARAM,
PCMK_EXEC_ERROR,
pcmk_strerror(op->connection_rc));
} else if (cmd->remaining_timeout > 3000) {
crm_trace("rescheduling start, remaining timeout %d", cmd->remaining_timeout);
g_timeout_add(1000, retry_start_cmd_cb, lrm_state);
return;
} else {
crm_trace("can't reschedule start, remaining timeout too small %d",
cmd->remaining_timeout);
pcmk__format_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
PCMK_EXEC_TIMEOUT,
"%s without enough time to retry",
pcmk_strerror(op->connection_rc));
}
} else {
lrm_state_reset_tables(lrm_state, TRUE);
pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
lrm_remote_set_flags(lrm_state, remote_active);
}
crm_debug("Remote connection event matched %s action", cmd->action);
report_remote_ra_result(cmd);
cmd_handled = TRUE;
} else if ((op->type == lrmd_event_poke)
&& pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR,
pcmk__str_casei)) {
if (cmd->monitor_timeout_id) {
g_source_remove(cmd->monitor_timeout_id);
cmd->monitor_timeout_id = 0;
}
/* Only report success the first time, after that only worry about failures.
* For this function, if we get the poke pack, it is always a success. Pokes
* only fail if the send fails, or the response times out. */
if (!pcmk_is_set(cmd->status, cmd_reported_success)) {
pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
report_remote_ra_result(cmd);
cmd_set_flags(cmd, cmd_reported_success);
}
crm_debug("Remote poke event matched %s action", cmd->action);
/* success, keep rescheduling if interval is present. */
if (cmd->interval_ms && !pcmk_is_set(cmd->status, cmd_cancel)) {
ra_data->recurring_cmds = g_list_append(ra_data->recurring_cmds, cmd);
cmd->interval_id = g_timeout_add(cmd->interval_ms,
recurring_helper, cmd);
cmd = NULL; /* prevent free */
}
cmd_handled = TRUE;
} else if ((op->type == lrmd_event_disconnect)
&& pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR,
pcmk__str_casei)) {
if (pcmk_is_set(ra_data->status, remote_active) &&
!pcmk_is_set(cmd->status, cmd_cancel)) {
pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
PCMK_EXEC_ERROR,
"Remote connection unexpectedly dropped "
"during monitor");
report_remote_ra_result(cmd);
crm_err("Remote connection to %s unexpectedly dropped during monitor",
lrm_state->node_name);
}
cmd_handled = TRUE;
} else if ((op->type == lrmd_event_new_client)
&& pcmk__str_eq(cmd->action, PCMK_ACTION_STOP,
pcmk__str_casei)) {
handle_remote_ra_stop(lrm_state, cmd);
cmd_handled = TRUE;
} else {
crm_debug("Event did not match %s action", ra_data->cur_cmd->action);
}
if (cmd_handled) {
ra_data->cur_cmd = NULL;
if (ra_data->cmds) {
mainloop_set_trigger(ra_data->work);
}
free_cmd(cmd);
}
}
static void
handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd)
{
remote_ra_data_t *ra_data = NULL;
CRM_ASSERT(lrm_state);
ra_data = lrm_state->remote_ra_data;
if (!pcmk_is_set(ra_data->status, takeover_complete)) {
/* delete pending ops when ever the remote connection is intentionally stopped */
g_hash_table_remove_all(lrm_state->active_ops);
} else {
/* we no longer hold the history if this connection has been migrated,
* however, we keep metadata cache for future use */
lrm_state_reset_tables(lrm_state, FALSE);
}
lrm_remote_clear_flags(lrm_state, remote_active);
lrm_state_disconnect(lrm_state);
if (ra_data->cmds) {
g_list_free_full(ra_data->cmds, free_cmd);
}
if (ra_data->recurring_cmds) {
g_list_free_full(ra_data->recurring_cmds, free_cmd);
}
ra_data->cmds = NULL;
ra_data->recurring_cmds = NULL;
ra_data->cur_cmd = NULL;
if (cmd) {
pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
report_remote_ra_result(cmd);
}
}
// \return Standard Pacemaker return code
static int
handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms)
{
const char *server = NULL;
lrmd_key_value_t *tmp = NULL;
int port = 0;
int timeout_used = timeout_ms > MAX_START_TIMEOUT_MS ? MAX_START_TIMEOUT_MS : timeout_ms;
int rc = pcmk_rc_ok;
for (tmp = cmd->params; tmp; tmp = tmp->next) {
if (pcmk__strcase_any_of(tmp->key,
PCMK_REMOTE_RA_ADDR, PCMK_REMOTE_RA_SERVER,
NULL)) {
server = tmp->value;
} else if (pcmk__str_eq(tmp->key, PCMK_REMOTE_RA_PORT,
pcmk__str_none)) {
port = atoi(tmp->value);
} else if (pcmk__str_eq(tmp->key, CRM_META "_" PCMK__META_CONTAINER,
pcmk__str_none)) {
lrm_remote_set_flags(lrm_state, controlling_guest);
}
}
rc = controld_connect_remote_executor(lrm_state, server, port,
timeout_used);
if (rc != pcmk_rc_ok) {
pcmk__format_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
PCMK_EXEC_ERROR,
"Could not connect to Pacemaker Remote node %s: %s",
lrm_state->node_name, pcmk_rc_str(rc));
}
return rc;
}
static gboolean
handle_remote_ra_exec(gpointer user_data)
{
int rc = 0;
lrm_state_t *lrm_state = user_data;
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
remote_ra_cmd_t *cmd;
GList *first = NULL;
if (ra_data->cur_cmd) {
/* still waiting on previous cmd */
return TRUE;
}
while (ra_data->cmds) {
first = ra_data->cmds;
cmd = first->data;
if (cmd->delay_id) {
/* still waiting for start delay timer to trip */
return TRUE;
}
ra_data->cmds = g_list_remove_link(ra_data->cmds, first);
g_list_free_1(first);
if (pcmk__str_any_of(cmd->action, PCMK_ACTION_START,
PCMK_ACTION_MIGRATE_FROM, NULL)) {
lrm_remote_clear_flags(lrm_state, expect_takeover | takeover_complete);
if (handle_remote_ra_start(lrm_state, cmd,
cmd->timeout) == pcmk_rc_ok) {
/* take care of this later when we get async connection result */
crm_debug("Initiated async remote connection, %s action will complete after connect event",
cmd->action);
ra_data->cur_cmd = cmd;
return TRUE;
}
report_remote_ra_result(cmd);
} else if (!strcmp(cmd->action, PCMK_ACTION_MONITOR)) {
if (lrm_state_is_connected(lrm_state) == TRUE) {
rc = lrm_state_poke_connection(lrm_state);
if (rc < 0) {
pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
PCMK_EXEC_ERROR, pcmk_strerror(rc));
}
} else {
rc = -1;
pcmk__set_result(&(cmd->result), PCMK_OCF_NOT_RUNNING,
PCMK_EXEC_DONE, "Remote connection inactive");
}
if (rc == 0) {
crm_debug("Poked Pacemaker Remote at node %s, waiting for async response",
cmd->rsc_id);
ra_data->cur_cmd = cmd;
cmd->monitor_timeout_id = g_timeout_add(cmd->timeout, monitor_timeout_cb, cmd);
return TRUE;
}
report_remote_ra_result(cmd);
} else if (!strcmp(cmd->action, PCMK_ACTION_STOP)) {
if (pcmk_is_set(ra_data->status, expect_takeover)) {
/* briefly wait on stop for the takeover event to occur. If the
* takeover event does not occur during the wait period, that's fine.
* It just means that the remote-node's lrm_status section is going to get
* cleared which will require all the resources running in the remote-node
* to be explicitly re-detected via probe actions. If the takeover does occur
* successfully, then we can leave the status section intact. */
cmd->takeover_timeout_id = g_timeout_add((cmd->timeout/2), connection_takeover_timeout_cb, cmd);
ra_data->cur_cmd = cmd;
return TRUE;
}
handle_remote_ra_stop(lrm_state, cmd);
} else if (strcmp(cmd->action, PCMK_ACTION_MIGRATE_TO) == 0) {
lrm_remote_clear_flags(lrm_state, takeover_complete);
lrm_remote_set_flags(lrm_state, expect_takeover);
pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
report_remote_ra_result(cmd);
} else if (pcmk__str_any_of(cmd->action, PCMK_ACTION_RELOAD,
PCMK_ACTION_RELOAD_AGENT, NULL)) {
/* Currently the only reloadable parameter is
* PCMK_REMOTE_RA_RECONNECT_INTERVAL, which is only used by the
* scheduler via the CIB, so reloads are a no-op.
*
* @COMPAT DC <2.1.0: We only need to check for "reload" in case
* we're in a rolling upgrade with a DC scheduling "reload" instead
* of "reload-agent". An OCF 1.1 "reload" would be a no-op anyway,
* so this would work for that purpose as well.
*/
pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
report_remote_ra_result(cmd);
}
free_cmd(cmd);
}
return TRUE;
}
static void
remote_ra_data_init(lrm_state_t * lrm_state)
{
remote_ra_data_t *ra_data = NULL;
if (lrm_state->remote_ra_data) {
return;
}
ra_data = pcmk__assert_alloc(1, sizeof(remote_ra_data_t));
ra_data->work = mainloop_add_trigger(G_PRIORITY_HIGH, handle_remote_ra_exec, lrm_state);
lrm_state->remote_ra_data = ra_data;
}
void
remote_ra_cleanup(lrm_state_t * lrm_state)
{
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
if (!ra_data) {
return;
}
if (ra_data->cmds) {
g_list_free_full(ra_data->cmds, free_cmd);
}
if (ra_data->recurring_cmds) {
g_list_free_full(ra_data->recurring_cmds, free_cmd);
}
mainloop_destroy_trigger(ra_data->work);
free(ra_data);
lrm_state->remote_ra_data = NULL;
}
gboolean
is_remote_lrmd_ra(const char *agent, const char *provider, const char *id)
{
if (agent && provider && !strcmp(agent, REMOTE_LRMD_RA) && !strcmp(provider, "pacemaker")) {
return TRUE;
}
if ((id != NULL) && (lrm_state_find(id) != NULL)
&& !pcmk__str_eq(id, controld_globals.our_nodename, pcmk__str_casei)) {
return TRUE;
}
return FALSE;
}
lrmd_rsc_info_t *
remote_ra_get_rsc_info(lrm_state_t * lrm_state, const char *rsc_id)
{
lrmd_rsc_info_t *info = NULL;
if ((lrm_state_find(rsc_id))) {
info = pcmk__assert_alloc(1, sizeof(lrmd_rsc_info_t));
info->id = pcmk__str_copy(rsc_id);
info->type = pcmk__str_copy(REMOTE_LRMD_RA);
info->standard = pcmk__str_copy(PCMK_RESOURCE_CLASS_OCF);
info->provider = pcmk__str_copy("pacemaker");
}
return info;
}
static gboolean
is_remote_ra_supported_action(const char *action)
{
return pcmk__str_any_of(action,
PCMK_ACTION_START,
PCMK_ACTION_STOP,
PCMK_ACTION_MONITOR,
PCMK_ACTION_MIGRATE_TO,
PCMK_ACTION_MIGRATE_FROM,
PCMK_ACTION_RELOAD_AGENT,
PCMK_ACTION_RELOAD,
NULL);
}
static GList *
fail_all_monitor_cmds(GList * list)
{
GList *rm_list = NULL;
remote_ra_cmd_t *cmd = NULL;
GList *gIter = NULL;
for (gIter = list; gIter != NULL; gIter = gIter->next) {
cmd = gIter->data;
if ((cmd->interval_ms > 0)
&& pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR,
pcmk__str_casei)) {
rm_list = g_list_append(rm_list, cmd);
}
}
for (gIter = rm_list; gIter != NULL; gIter = gIter->next) {
cmd = gIter->data;
pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
PCMK_EXEC_ERROR, "Lost connection to remote executor");
crm_trace("Pre-emptively failing %s %s (interval=%u, %s)",
cmd->action, cmd->rsc_id, cmd->interval_ms, cmd->userdata);
report_remote_ra_result(cmd);
list = g_list_remove(list, cmd);
free_cmd(cmd);
}
/* frees only the list data, not the cmds */
g_list_free(rm_list);
return list;
}
static GList *
remove_cmd(GList * list, const char *action, guint interval_ms)
{
remote_ra_cmd_t *cmd = NULL;
GList *gIter = NULL;
for (gIter = list; gIter != NULL; gIter = gIter->next) {
cmd = gIter->data;
if ((cmd->interval_ms == interval_ms)
&& pcmk__str_eq(cmd->action, action, pcmk__str_casei)) {
break;
}
cmd = NULL;
}
if (cmd) {
list = g_list_remove(list, cmd);
free_cmd(cmd);
}
return list;
}
int
remote_ra_cancel(lrm_state_t *lrm_state, const char *rsc_id,
const char *action, guint interval_ms)
{
lrm_state_t *connection_rsc = NULL;
remote_ra_data_t *ra_data = NULL;
connection_rsc = lrm_state_find(rsc_id);
if (!connection_rsc || !connection_rsc->remote_ra_data) {
return -EINVAL;
}
ra_data = connection_rsc->remote_ra_data;
ra_data->cmds = remove_cmd(ra_data->cmds, action, interval_ms);
ra_data->recurring_cmds = remove_cmd(ra_data->recurring_cmds, action,
interval_ms);
if (ra_data->cur_cmd &&
(ra_data->cur_cmd->interval_ms == interval_ms) &&
(pcmk__str_eq(ra_data->cur_cmd->action, action, pcmk__str_casei))) {
cmd_set_flags(ra_data->cur_cmd, cmd_cancel);
}
return 0;
}
static remote_ra_cmd_t *
handle_dup_monitor(remote_ra_data_t *ra_data, guint interval_ms,
const char *userdata)
{
GList *gIter = NULL;
remote_ra_cmd_t *cmd = NULL;
/* there are 3 places a potential duplicate monitor operation
* could exist.
* 1. recurring_cmds list. where the op is waiting for its next interval
* 2. cmds list, where the op is queued to get executed immediately
* 3. cur_cmd, which means the monitor op is in flight right now.
*/
if (interval_ms == 0) {
return NULL;
}
if (ra_data->cur_cmd &&
!pcmk_is_set(ra_data->cur_cmd->status, cmd_cancel) &&
(ra_data->cur_cmd->interval_ms == interval_ms)
&& pcmk__str_eq(ra_data->cur_cmd->action, PCMK_ACTION_MONITOR,
pcmk__str_casei)) {
cmd = ra_data->cur_cmd;
goto handle_dup;
}
for (gIter = ra_data->recurring_cmds; gIter != NULL; gIter = gIter->next) {
cmd = gIter->data;
if ((cmd->interval_ms == interval_ms)
&& pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR,
pcmk__str_casei)) {
goto handle_dup;
}
}
for (gIter = ra_data->cmds; gIter != NULL; gIter = gIter->next) {
cmd = gIter->data;
if ((cmd->interval_ms == interval_ms)
&& pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR,
pcmk__str_casei)) {
goto handle_dup;
}
}
return NULL;
handle_dup:
crm_trace("merging duplicate monitor cmd " PCMK__OP_FMT,
cmd->rsc_id, PCMK_ACTION_MONITOR, interval_ms);
/* update the userdata */
if (userdata) {
free(cmd->userdata);
cmd->userdata = pcmk__str_copy(userdata);
}
/* if we've already reported success, generate a new call id */
if (pcmk_is_set(cmd->status, cmd_reported_success)) {
cmd->start_time = time(NULL);
cmd->call_id = generate_callid();
cmd_clear_flags(cmd, cmd_reported_success);
}
/* if we have an interval_id set, that means we are in the process of
* waiting for this cmd's next interval. instead of waiting, cancel
* the timer and execute the action immediately */
if (cmd->interval_id) {
g_source_remove(cmd->interval_id);
cmd->interval_id = 0;
recurring_helper(cmd);
}
return cmd;
}
/*!
* \internal
* \brief Execute an action using the (internal) ocf:pacemaker:remote agent
*
* \param[in] lrm_state Executor state object for remote connection
* \param[in] rsc_id Connection resource ID
* \param[in] action Action to execute
* \param[in] userdata String to copy and pass to execution callback
* \param[in] interval_ms Action interval (in milliseconds)
* \param[in] timeout_ms Action timeout (in milliseconds)
* \param[in] start_delay_ms Delay (in milliseconds) before executing action
* \param[in,out] params Connection resource parameters
* \param[out] call_id Where to store call ID on success
*
* \return Standard Pacemaker return code
* \note This takes ownership of \p params, which should not be used or freed
* after calling this function.
*/
int
controld_execute_remote_agent(const lrm_state_t *lrm_state, const char *rsc_id,
const char *action, const char *userdata,
guint interval_ms, int timeout_ms,
int start_delay_ms, lrmd_key_value_t *params,
int *call_id)
{
lrm_state_t *connection_rsc = NULL;
remote_ra_cmd_t *cmd = NULL;
remote_ra_data_t *ra_data = NULL;
*call_id = 0;
CRM_CHECK((lrm_state != NULL) && (rsc_id != NULL) && (action != NULL)
&& (userdata != NULL) && (call_id != NULL),
lrmd_key_value_freeall(params); return EINVAL);
if (!is_remote_ra_supported_action(action)) {
lrmd_key_value_freeall(params);
return EOPNOTSUPP;
}
connection_rsc = lrm_state_find(rsc_id);
if (connection_rsc == NULL) {
lrmd_key_value_freeall(params);
return ENOTCONN;
}
remote_ra_data_init(connection_rsc);
ra_data = connection_rsc->remote_ra_data;
cmd = handle_dup_monitor(ra_data, interval_ms, userdata);
if (cmd) {
*call_id = cmd->call_id;
lrmd_key_value_freeall(params);
return pcmk_rc_ok;
}
cmd = pcmk__assert_alloc(1, sizeof(remote_ra_cmd_t));
cmd->owner = pcmk__str_copy(lrm_state->node_name);
cmd->rsc_id = pcmk__str_copy(rsc_id);
cmd->action = pcmk__str_copy(action);
cmd->userdata = pcmk__str_copy(userdata);
cmd->interval_ms = interval_ms;
cmd->timeout = timeout_ms;
cmd->start_delay = start_delay_ms;
cmd->params = params;
cmd->start_time = time(NULL);
cmd->call_id = generate_callid();
if (cmd->start_delay) {
cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd);
}
ra_data->cmds = g_list_append(ra_data->cmds, cmd);
mainloop_set_trigger(ra_data->work);
*call_id = cmd->call_id;
return pcmk_rc_ok;
}
/*!
* \internal
* \brief Immediately fail all monitors of a remote node, if proxied here
*
* \param[in] node_name Name of pacemaker_remote node
*/
void
remote_ra_fail(const char *node_name)
{
lrm_state_t *lrm_state = lrm_state_find(node_name);
if (lrm_state && lrm_state_is_connected(lrm_state)) {
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
crm_info("Failing monitors on Pacemaker Remote node %s", node_name);
ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds);
ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds);
}
}
/* A guest node fencing implied by host fencing looks like:
*
* <pseudo_event id="103" operation="stonith" operation_key="stonith-lxc1-off"
* on_node="lxc1" on_node_uuid="lxc1">
* <attributes CRM_meta_on_node="lxc1" CRM_meta_on_node_uuid="lxc1"
* CRM_meta_stonith_action="off" crm_feature_set="3.0.12"/>
* <downed>
* <node id="lxc1"/>
* </downed>
* </pseudo_event>
*/
#define XPATH_PSEUDO_FENCE "/" PCMK__XE_PSEUDO_EVENT \
"[@" PCMK_XA_OPERATION "='stonith']/" PCMK__XE_DOWNED "/" PCMK_XE_NODE
/*!
* \internal
* \brief Check a pseudo-action for Pacemaker Remote node side effects
*
* \param[in,out] xml XML of pseudo-action to check
*/
void
remote_ra_process_pseudo(xmlNode *xml)
{
xmlXPathObjectPtr search = xpath_search(xml, XPATH_PSEUDO_FENCE);
if (numXpathResults(search) == 1) {
xmlNode *result = getXpathResult(search, 0);
/* Normally, we handle the necessary side effects of a guest node stop
* action when reporting the remote agent's result. However, if the stop
* is implied due to fencing, it will be a fencing pseudo-event, and
* there won't be a result to report. Handle that case here.
*
* This will result in a duplicate call to remote_node_down() if the
* guest stop was real instead of implied, but that shouldn't hurt.
*
* There is still one corner case that isn't handled: if a guest node
* isn't running any resources when its host is fenced, it will appear
* to be cleanly stopped, so there will be no pseudo-fence, and our
* peer cache state will be incorrect unless and until the guest is
* recovered.
*/
if (result) {
const char *remote = pcmk__xe_id(result);
if (remote) {
remote_node_down(remote, DOWN_ERASE_LRM);
}
}
}
freeXpathObject(search);
}
static void
remote_ra_maintenance(lrm_state_t * lrm_state, gboolean maintenance)
{
xmlNode *update, *state;
int call_opt;
crm_node_t *node;
call_opt = crmd_cib_smart_opt();
node = pcmk__cluster_lookup_remote_node(lrm_state->node_name);
CRM_CHECK(node != NULL, return);
update = pcmk__xe_create(NULL, PCMK_XE_STATUS);
state = create_node_state_update(node, node_update_none, update,
__func__);
crm_xml_add(state, PCMK__XA_NODE_IN_MAINTENANCE, (maintenance? "1" : "0"));
if (controld_update_cib(PCMK_XE_STATUS, update, call_opt,
NULL) == pcmk_rc_ok) {
/* TODO: still not 100% sure that async update will succeed ... */
if (maintenance) {
lrm_remote_set_flags(lrm_state, remote_in_maint);
} else {
lrm_remote_clear_flags(lrm_state, remote_in_maint);
}
}
free_xml(update);
}
#define XPATH_PSEUDO_MAINTENANCE "//" PCMK__XE_PSEUDO_EVENT \
"[@" PCMK_XA_OPERATION "='" PCMK_ACTION_MAINTENANCE_NODES "']/" \
PCMK__XE_MAINTENANCE
/*!
* \internal
* \brief Check a pseudo-action holding updates for maintenance state
*
* \param[in,out] xml XML of pseudo-action to check
*/
void
remote_ra_process_maintenance_nodes(xmlNode *xml)
{
xmlXPathObjectPtr search = xpath_search(xml, XPATH_PSEUDO_MAINTENANCE);
if (numXpathResults(search) == 1) {
xmlNode *node;
int cnt = 0, cnt_remote = 0;
for (node = pcmk__xe_first_child(getXpathResult(search, 0),
PCMK_XE_NODE, NULL, NULL);
node != NULL; node = pcmk__xe_next_same(node)) {
lrm_state_t *lrm_state = lrm_state_find(pcmk__xe_id(node));
cnt++;
if (lrm_state && lrm_state->remote_ra_data &&
pcmk_is_set(((remote_ra_data_t *) lrm_state->remote_ra_data)->status, remote_active)) {
const char *in_maint_s = NULL;
int in_maint;
cnt_remote++;
in_maint_s = crm_element_value(node,
PCMK__XA_NODE_IN_MAINTENANCE);
pcmk__scan_min_int(in_maint_s, &in_maint, 0);
remote_ra_maintenance(lrm_state, in_maint);
}
}
crm_trace("Action holds %d nodes (%d remotes found) adjusting "
PCMK_OPT_MAINTENANCE_MODE,
cnt, cnt_remote);
}
freeXpathObject(search);
}
gboolean
remote_ra_is_in_maintenance(lrm_state_t * lrm_state)
{
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
return pcmk_is_set(ra_data->status, remote_in_maint);
}
gboolean
remote_ra_controlling_guest(lrm_state_t * lrm_state)
{
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
return pcmk_is_set(ra_data->status, controlling_guest);
}
diff --git a/daemons/controld/controld_te_actions.c b/daemons/controld/controld_te_actions.c
index 387aa58808..762a0fed2c 100644
--- a/daemons/controld/controld_te_actions.c
+++ b/daemons/controld/controld_te_actions.c
@@ -1,760 +1,762 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <crm/crm.h>
#include <crm/cib.h>
#include <crm/lrmd.h> // lrmd_event_data_t, lrmd_free_event()
#include <crm/common/xml.h>
#include <crm/cluster.h>
#include <pacemaker-internal.h>
#include <pacemaker-controld.h>
static GHashTable *te_targets = NULL;
void send_rsc_command(pcmk__graph_action_t *action);
static void te_update_job_count(pcmk__graph_action_t *action, int offset);
static void
te_start_action_timer(const pcmk__graph_t *graph, pcmk__graph_action_t *action)
{
action->timer = g_timeout_add(action->timeout + graph->network_delay,
action_timer_callback, (void *) action);
CRM_ASSERT(action->timer != 0);
}
/*!
* \internal
* \brief Execute a graph pseudo-action
*
* \param[in,out] graph Transition graph being executed
* \param[in,out] pseudo Pseudo-action to execute
*
* \return Standard Pacemaker return code
*/
static int
execute_pseudo_action(pcmk__graph_t *graph, pcmk__graph_action_t *pseudo)
{
const char *task = crm_element_value(pseudo->xml, PCMK_XA_OPERATION);
/* send to peers as well? */
if (pcmk__str_eq(task, PCMK_ACTION_MAINTENANCE_NODES, pcmk__str_casei)) {
GHashTableIter iter;
crm_node_t *node = NULL;
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
xmlNode *cmd = NULL;
if (pcmk__str_eq(controld_globals.our_nodename, node->uname,
pcmk__str_casei)) {
continue;
}
cmd = create_request(task, pseudo->xml, node->uname,
CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL);
send_cluster_message(node, crm_msg_crmd, cmd, FALSE);
free_xml(cmd);
}
remote_ra_process_maintenance_nodes(pseudo->xml);
} else {
/* Check action for Pacemaker Remote node side effects */
remote_ra_process_pseudo(pseudo->xml);
}
crm_debug("Pseudo-action %d (%s) fired and confirmed", pseudo->id,
crm_element_value(pseudo->xml, PCMK__XA_OPERATION_KEY));
te_action_confirmed(pseudo, graph);
return pcmk_rc_ok;
}
static int
get_target_rc(pcmk__graph_action_t *action)
{
int exit_status;
pcmk__scan_min_int(crm_meta_value(action->params, PCMK__META_OP_TARGET_RC),
&exit_status, 0);
return exit_status;
}
/*!
* \internal
* \brief Execute a cluster action from a transition graph
*
* \param[in,out] graph Transition graph being executed
* \param[in,out] action Cluster action to execute
*
* \return Standard Pacemaker return code
*/
static int
execute_cluster_action(pcmk__graph_t *graph, pcmk__graph_action_t *action)
{
char *counter = NULL;
xmlNode *cmd = NULL;
gboolean is_local = FALSE;
const char *id = NULL;
const char *task = NULL;
const char *value = NULL;
const char *on_node = NULL;
const char *router_node = NULL;
gboolean rc = TRUE;
gboolean no_wait = FALSE;
id = pcmk__xe_id(action->xml);
CRM_CHECK(!pcmk__str_empty(id), return EPROTO);
task = crm_element_value(action->xml, PCMK_XA_OPERATION);
CRM_CHECK(!pcmk__str_empty(task), return EPROTO);
on_node = crm_element_value(action->xml, PCMK__META_ON_NODE);
CRM_CHECK(!pcmk__str_empty(on_node), return pcmk_rc_node_unknown);
router_node = crm_element_value(action->xml, PCMK__XA_ROUTER_NODE);
if (router_node == NULL) {
router_node = on_node;
if (pcmk__str_eq(task, PCMK_ACTION_LRM_DELETE, pcmk__str_none)) {
const char *mode = crm_element_value(action->xml, PCMK__XA_MODE);
if (pcmk__str_eq(mode, PCMK__VALUE_CIB, pcmk__str_none)) {
router_node = controld_globals.our_nodename;
}
}
}
if (pcmk__str_eq(router_node, controld_globals.our_nodename,
pcmk__str_casei)) {
is_local = TRUE;
}
value = crm_meta_value(action->params, PCMK__META_OP_NO_WAIT);
if (crm_is_true(value)) {
no_wait = TRUE;
}
crm_info("Handling controller request '%s' (%s on %s)%s%s",
id, task, on_node, (is_local? " locally" : ""),
(no_wait? " without waiting" : ""));
if (is_local
&& pcmk__str_eq(task, PCMK_ACTION_DO_SHUTDOWN, pcmk__str_none)) {
/* defer until everything else completes */
crm_info("Controller request '%s' is a local shutdown", id);
graph->completion_action = pcmk__graph_shutdown;
graph->abort_reason = "local shutdown";
te_action_confirmed(action, graph);
return pcmk_rc_ok;
} else if (pcmk__str_eq(task, PCMK_ACTION_DO_SHUTDOWN, pcmk__str_none)) {
crm_node_t *peer = pcmk__get_node(0, router_node, NULL,
- pcmk__node_search_cluster);
+ pcmk__node_search_cluster_member);
pcmk__update_peer_expected(__func__, peer, CRMD_JOINSTATE_DOWN);
}
cmd = create_request(task, action->xml, router_node, CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL);
counter = pcmk__transition_key(controld_globals.transition_graph->id,
action->id, get_target_rc(action),
controld_globals.te_uuid);
crm_xml_add(cmd, PCMK__XA_TRANSITION_KEY, counter);
rc = send_cluster_message(pcmk__get_node(0, router_node, NULL,
- pcmk__node_search_cluster),
+ pcmk__node_search_cluster_member),
crm_msg_crmd, cmd, TRUE);
free(counter);
free_xml(cmd);
if (rc == FALSE) {
crm_err("Action %d failed: send", action->id);
return ECOMM;
} else if (no_wait) {
te_action_confirmed(action, graph);
} else {
if (action->timeout <= 0) {
crm_err("Action %d: %s on %s had an invalid timeout (%dms). Using %ums instead",
action->id, task, on_node, action->timeout, graph->network_delay);
action->timeout = (int) graph->network_delay;
}
te_start_action_timer(graph, action);
}
return pcmk_rc_ok;
}
/*!
* \internal
* \brief Synthesize an executor event for a resource action timeout
*
* \param[in] action Resource action that timed out
* \param[in] target_rc Expected result of action that timed out
*
* Synthesize an executor event for a resource action timeout. (If the executor
* gets a timeout while waiting for a resource action to complete, that will be
* reported via the usual callback. This timeout means we didn't hear from the
* executor itself or the controller that relayed the action to the executor.)
*
* \return Newly created executor event for result of \p action
* \note The caller is responsible for freeing the return value using
* lrmd_free_event().
*/
static lrmd_event_data_t *
synthesize_timeout_event(const pcmk__graph_action_t *action, int target_rc)
{
lrmd_event_data_t *op = NULL;
const char *target = crm_element_value(action->xml, PCMK__META_ON_NODE);
const char *reason = NULL;
char *dynamic_reason = NULL;
if (pcmk__str_eq(target, get_local_node_name(), pcmk__str_casei)) {
reason = "Local executor did not return result in time";
} else {
const char *router_node = NULL;
router_node = crm_element_value(action->xml, PCMK__XA_ROUTER_NODE);
if (router_node == NULL) {
router_node = target;
}
dynamic_reason = crm_strdup_printf("Controller on %s did not return "
"result in time", router_node);
reason = dynamic_reason;
}
op = pcmk__event_from_graph_action(NULL, action, PCMK_EXEC_TIMEOUT,
PCMK_OCF_UNKNOWN_ERROR, reason);
op->call_id = -1;
op->user_data = pcmk__transition_key(controld_globals.transition_graph->id,
action->id, target_rc,
controld_globals.te_uuid);
free(dynamic_reason);
return op;
}
static void
controld_record_action_event(pcmk__graph_action_t *action,
lrmd_event_data_t *op)
{
cib_t *cib_conn = controld_globals.cib_conn;
xmlNode *state = NULL;
xmlNode *rsc = NULL;
xmlNode *action_rsc = NULL;
int rc = pcmk_ok;
const char *rsc_id = NULL;
const char *target = crm_element_value(action->xml, PCMK__META_ON_NODE);
const char *task_uuid = crm_element_value(action->xml,
PCMK__XA_OPERATION_KEY);
const char *target_uuid = crm_element_value(action->xml,
PCMK__META_ON_NODE_UUID);
int target_rc = get_target_rc(action);
action_rsc = pcmk__xe_first_child(action->xml, PCMK_XE_PRIMITIVE, NULL,
NULL);
if (action_rsc == NULL) {
return;
}
rsc_id = pcmk__xe_id(action_rsc);
CRM_CHECK(rsc_id != NULL,
crm_log_xml_err(action->xml, "Bad:action"); return);
/*
update the CIB
<node_state id="hadev">
<lrm>
<lrm_resources>
<lrm_resource id="rsc2" last_op="start" op_code="0" target="hadev"/>
*/
state = pcmk__xe_create(NULL, PCMK__XE_NODE_STATE);
crm_xml_add(state, PCMK_XA_ID, target_uuid);
crm_xml_add(state, PCMK_XA_UNAME, target);
rsc = pcmk__xe_create(state, PCMK__XE_LRM);
crm_xml_add(rsc, PCMK_XA_ID, target_uuid);
rsc = pcmk__xe_create(rsc, PCMK__XE_LRM_RESOURCES);
rsc = pcmk__xe_create(rsc, PCMK__XE_LRM_RESOURCE);
crm_xml_add(rsc, PCMK_XA_ID, rsc_id);
crm_copy_xml_element(action_rsc, rsc, PCMK_XA_TYPE);
crm_copy_xml_element(action_rsc, rsc, PCMK_XA_CLASS);
crm_copy_xml_element(action_rsc, rsc, PCMK_XA_PROVIDER);
pcmk__create_history_xml(rsc, op, CRM_FEATURE_SET, target_rc, target,
__func__);
rc = cib_conn->cmds->modify(cib_conn, PCMK_XE_STATUS, state,
cib_scope_local);
fsa_register_cib_callback(rc, NULL, cib_action_updated);
free_xml(state);
crm_trace("Sent CIB update (call ID %d) for synthesized event of action %d (%s on %s)",
rc, action->id, task_uuid, target);
pcmk__set_graph_action_flags(action, pcmk__graph_action_sent_update);
}
void
controld_record_action_timeout(pcmk__graph_action_t *action)
{
lrmd_event_data_t *op = NULL;
const char *target = crm_element_value(action->xml, PCMK__META_ON_NODE);
const char *task_uuid = crm_element_value(action->xml,
PCMK__XA_OPERATION_KEY);
int target_rc = get_target_rc(action);
crm_warn("%s %d: %s on %s timed out",
action->xml->name, action->id, task_uuid, target);
op = synthesize_timeout_event(action, target_rc);
controld_record_action_event(action, op);
lrmd_free_event(op);
}
/*!
* \internal
* \brief Execute a resource action from a transition graph
*
* \param[in,out] graph Transition graph being executed
* \param[in,out] action Resource action to execute
*
* \return Standard Pacemaker return code
*/
static int
execute_rsc_action(pcmk__graph_t *graph, pcmk__graph_action_t *action)
{
/* never overwrite stop actions in the CIB with
* anything other than completed results
*
* Writing pending stops makes it look like the
* resource is running again
*/
xmlNode *cmd = NULL;
xmlNode *rsc_op = NULL;
gboolean rc = TRUE;
gboolean no_wait = FALSE;
gboolean is_local = FALSE;
char *counter = NULL;
const char *task = NULL;
const char *value = NULL;
const char *on_node = NULL;
const char *router_node = NULL;
const char *task_uuid = NULL;
CRM_ASSERT(action != NULL);
CRM_ASSERT(action->xml != NULL);
pcmk__clear_graph_action_flags(action, pcmk__graph_action_executed);
on_node = crm_element_value(action->xml, PCMK__META_ON_NODE);
CRM_CHECK(!pcmk__str_empty(on_node),
crm_err("Corrupted command(id=%s) %s: no node",
pcmk__xe_id(action->xml), pcmk__s(task, "without task"));
return pcmk_rc_node_unknown);
rsc_op = action->xml;
task = crm_element_value(rsc_op, PCMK_XA_OPERATION);
task_uuid = crm_element_value(action->xml, PCMK__XA_OPERATION_KEY);
router_node = crm_element_value(rsc_op, PCMK__XA_ROUTER_NODE);
if (!router_node) {
router_node = on_node;
}
counter = pcmk__transition_key(controld_globals.transition_graph->id,
action->id, get_target_rc(action),
controld_globals.te_uuid);
crm_xml_add(rsc_op, PCMK__XA_TRANSITION_KEY, counter);
if (pcmk__str_eq(router_node, controld_globals.our_nodename,
pcmk__str_casei)) {
is_local = TRUE;
}
value = crm_meta_value(action->params, PCMK__META_OP_NO_WAIT);
if (crm_is_true(value)) {
no_wait = TRUE;
}
crm_notice("Initiating %s operation %s%s on %s%s "CRM_XS" action %d",
task, task_uuid, (is_local? " locally" : ""), on_node,
(no_wait? " without waiting" : ""), action->id);
cmd = create_request(CRM_OP_INVOKE_LRM, rsc_op, router_node,
CRM_SYSTEM_LRMD, CRM_SYSTEM_TENGINE, NULL);
if (is_local) {
/* shortcut local resource commands */
ha_msg_input_t data = {
.msg = cmd,
.xml = rsc_op,
};
fsa_data_t msg = {
.id = 0,
.data = &data,
.data_type = fsa_dt_ha_msg,
.fsa_input = I_NULL,
.fsa_cause = C_FSA_INTERNAL,
.actions = A_LRM_INVOKE,
.origin = __func__,
};
do_lrm_invoke(A_LRM_INVOKE, C_FSA_INTERNAL, controld_globals.fsa_state,
I_NULL, &msg);
} else {
- rc = send_cluster_message(pcmk__get_node(0, router_node, NULL,
- pcmk__node_search_cluster),
- crm_msg_lrmd, cmd, TRUE);
+ const crm_node_t *node =
+ pcmk__get_node(0, router_node, NULL,
+ pcmk__node_search_cluster_member);
+
+ rc = send_cluster_message(node, crm_msg_lrmd, cmd, TRUE);
}
free(counter);
free_xml(cmd);
pcmk__set_graph_action_flags(action, pcmk__graph_action_executed);
if (rc == FALSE) {
crm_err("Action %d failed: send", action->id);
return ECOMM;
} else if (no_wait) {
/* Just mark confirmed. Don't bump the job count only to immediately
* decrement it.
*/
crm_info("Action %d confirmed - no wait", action->id);
pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed);
pcmk__update_graph(controld_globals.transition_graph, action);
trigger_graph();
} else if (pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
crm_debug("Action %d: %s %s on %s(timeout %dms) was already confirmed.",
action->id, task, task_uuid, on_node, action->timeout);
} else {
if (action->timeout <= 0) {
crm_err("Action %d: %s %s on %s had an invalid timeout (%dms). Using %ums instead",
action->id, task, task_uuid, on_node, action->timeout, graph->network_delay);
action->timeout = (int) graph->network_delay;
}
te_update_job_count(action, 1);
te_start_action_timer(graph, action);
}
return pcmk_rc_ok;
}
struct te_peer_s
{
char *name;
int jobs;
int migrate_jobs;
};
static void te_peer_free(gpointer p)
{
struct te_peer_s *peer = p;
free(peer->name);
free(peer);
}
void te_reset_job_counts(void)
{
GHashTableIter iter;
struct te_peer_s *peer = NULL;
if(te_targets == NULL) {
te_targets = pcmk__strkey_table(NULL, te_peer_free);
}
g_hash_table_iter_init(&iter, te_targets);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & peer)) {
peer->jobs = 0;
peer->migrate_jobs = 0;
}
}
static void
te_update_job_count_on(const char *target, int offset, bool migrate)
{
struct te_peer_s *r = NULL;
if(target == NULL || te_targets == NULL) {
return;
}
r = g_hash_table_lookup(te_targets, target);
if(r == NULL) {
r = pcmk__assert_alloc(1, sizeof(struct te_peer_s));
r->name = pcmk__str_copy(target);
g_hash_table_insert(te_targets, r->name, r);
}
r->jobs += offset;
if(migrate) {
r->migrate_jobs += offset;
}
crm_trace("jobs[%s] = %d", target, r->jobs);
}
static void
te_update_job_count(pcmk__graph_action_t *action, int offset)
{
const char *task = crm_element_value(action->xml, PCMK_XA_OPERATION);
const char *target = crm_element_value(action->xml, PCMK__META_ON_NODE);
if ((action->type != pcmk__rsc_graph_action) || (target == NULL)) {
/* No limit on these */
return;
}
/* if we have a router node, this means the action is performing
* on a remote node. For now, we count all actions occurring on a
* remote node against the job list on the cluster node hosting
* the connection resources */
target = crm_element_value(action->xml, PCMK__XA_ROUTER_NODE);
if ((target == NULL)
&& pcmk__strcase_any_of(task, PCMK_ACTION_MIGRATE_TO,
PCMK_ACTION_MIGRATE_FROM, NULL)) {
const char *t1 = crm_meta_value(action->params,
PCMK__META_MIGRATE_SOURCE);
const char *t2 = crm_meta_value(action->params,
PCMK__META_MIGRATE_TARGET);
te_update_job_count_on(t1, offset, TRUE);
te_update_job_count_on(t2, offset, TRUE);
return;
} else if (target == NULL) {
target = crm_element_value(action->xml, PCMK__META_ON_NODE);
}
te_update_job_count_on(target, offset, FALSE);
}
/*!
* \internal
* \brief Check whether a graph action is allowed to be executed on a node
*
* \param[in] graph Transition graph being executed
* \param[in] action Graph action being executed
* \param[in] target Name of node where action should be executed
*
* \return true if action is allowed, otherwise false
*/
static bool
allowed_on_node(const pcmk__graph_t *graph, const pcmk__graph_action_t *action,
const char *target)
{
int limit = 0;
struct te_peer_s *r = NULL;
const char *task = crm_element_value(action->xml, PCMK_XA_OPERATION);
const char *id = crm_element_value(action->xml, PCMK__XA_OPERATION_KEY);
if(target == NULL) {
/* No limit on these */
return true;
} else if(te_targets == NULL) {
return false;
}
r = g_hash_table_lookup(te_targets, target);
limit = throttle_get_job_limit(target);
if(r == NULL) {
r = pcmk__assert_alloc(1, sizeof(struct te_peer_s));
r->name = pcmk__str_copy(target);
g_hash_table_insert(te_targets, r->name, r);
}
if(limit <= r->jobs) {
crm_trace("Peer %s is over their job limit of %d (%d): deferring %s",
target, limit, r->jobs, id);
return false;
} else if(graph->migration_limit > 0 && r->migrate_jobs >= graph->migration_limit) {
if (pcmk__strcase_any_of(task, PCMK_ACTION_MIGRATE_TO,
PCMK_ACTION_MIGRATE_FROM, NULL)) {
crm_trace("Peer %s is over their migration job limit of %d (%d): deferring %s",
target, graph->migration_limit, r->migrate_jobs, id);
return false;
}
}
crm_trace("Peer %s has not hit their limit yet. current jobs = %d limit= %d limit", target, r->jobs, limit);
return true;
}
/*!
* \internal
* \brief Check whether a graph action is allowed to be executed
*
* \param[in] graph Transition graph being executed
* \param[in] action Graph action being executed
*
* \return true if action is allowed, otherwise false
*/
static bool
graph_action_allowed(pcmk__graph_t *graph, pcmk__graph_action_t *action)
{
const char *target = NULL;
const char *task = crm_element_value(action->xml, PCMK_XA_OPERATION);
if (action->type != pcmk__rsc_graph_action) {
/* No limit on these */
return true;
}
/* if we have a router node, this means the action is performing
* on a remote node. For now, we count all actions occurring on a
* remote node against the job list on the cluster node hosting
* the connection resources */
target = crm_element_value(action->xml, PCMK__XA_ROUTER_NODE);
if ((target == NULL)
&& pcmk__strcase_any_of(task, PCMK_ACTION_MIGRATE_TO,
PCMK_ACTION_MIGRATE_FROM, NULL)) {
target = crm_meta_value(action->params, PCMK__META_MIGRATE_SOURCE);
if (!allowed_on_node(graph, action, target)) {
return false;
}
target = crm_meta_value(action->params, PCMK__META_MIGRATE_TARGET);
} else if (target == NULL) {
target = crm_element_value(action->xml, PCMK__META_ON_NODE);
}
return allowed_on_node(graph, action, target);
}
/*!
* \brief Confirm a graph action (and optionally update graph)
*
* \param[in,out] action Action to confirm
* \param[in,out] graph Update and trigger this graph (if non-NULL)
*/
void
te_action_confirmed(pcmk__graph_action_t *action, pcmk__graph_t *graph)
{
if (!pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
if ((action->type == pcmk__rsc_graph_action)
&& (crm_element_value(action->xml, PCMK__META_ON_NODE) != NULL)) {
te_update_job_count(action, -1);
}
pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed);
}
if (graph) {
pcmk__update_graph(graph, action);
trigger_graph();
}
}
static pcmk__graph_functions_t te_graph_fns = {
execute_pseudo_action,
execute_rsc_action,
execute_cluster_action,
controld_execute_fence_action,
graph_action_allowed,
};
/*
* \internal
* \brief Register the transitioner's graph functions with \p libpacemaker
*/
void
controld_register_graph_functions(void)
{
pcmk__set_graph_functions(&te_graph_fns);
}
void
notify_crmd(pcmk__graph_t *graph)
{
const char *type = "unknown";
enum crmd_fsa_input event = I_NULL;
crm_debug("Processing transition completion in state %s",
fsa_state2string(controld_globals.fsa_state));
CRM_CHECK(graph->complete, graph->complete = true);
switch (graph->completion_action) {
case pcmk__graph_wait:
type = "stop";
if (controld_globals.fsa_state == S_TRANSITION_ENGINE) {
event = I_TE_SUCCESS;
}
break;
case pcmk__graph_done:
type = "done";
if (controld_globals.fsa_state == S_TRANSITION_ENGINE) {
event = I_TE_SUCCESS;
}
break;
case pcmk__graph_restart:
type = "restart";
if (controld_globals.fsa_state == S_TRANSITION_ENGINE) {
if (controld_get_period_transition_timer() > 0) {
controld_stop_transition_timer();
controld_start_transition_timer();
} else {
event = I_PE_CALC;
}
} else if (controld_globals.fsa_state == S_POLICY_ENGINE) {
controld_set_fsa_action_flags(A_PE_INVOKE);
controld_trigger_fsa();
}
break;
case pcmk__graph_shutdown:
type = "shutdown";
if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
event = I_STOP;
} else {
crm_err("We didn't ask to be shut down, yet the scheduler is telling us to");
event = I_TERMINATE;
}
}
crm_debug("Transition %d status: %s - %s", graph->id, type,
pcmk__s(graph->abort_reason, "unspecified reason"));
graph->abort_reason = NULL;
graph->completion_action = pcmk__graph_done;
if (event != I_NULL) {
register_fsa_input(C_FSA_INTERNAL, event, NULL);
} else {
controld_trigger_fsa();
}
}
diff --git a/daemons/controld/controld_te_events.c b/daemons/controld/controld_te_events.c
index 363d0a549c..e8509c0c88 100644
--- a/daemons/controld/controld_te_events.c
+++ b/daemons/controld/controld_te_events.c
@@ -1,612 +1,613 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <crm/crm.h>
#include <crm/cib.h>
#include <crm/common/xml.h>
#include <pacemaker-controld.h>
#include <crm/common/attrs_internal.h>
#include <crm/common/ipc_attrd_internal.h>
/*!
* \internal
* \brief Action numbers of outside events processed in current update diff
*
* This table is to be used as a set. It should be empty when the transitioner
* begins processing a CIB update diff. It ensures that if there are multiple
* events (for example, "_last_0" and "_last_failure_0") for the same action,
* only one of them updates the failcount. Events that originate outside the
* cluster can't be confirmed, since they're not in the transition graph.
*/
static GHashTable *outside_events = NULL;
/*!
* \internal
* \brief Empty the hash table containing action numbers of outside events
*/
void
controld_remove_all_outside_events(void)
{
if (outside_events != NULL) {
g_hash_table_remove_all(outside_events);
}
}
/*!
* \internal
* \brief Destroy the hash table containing action numbers of outside events
*/
void
controld_destroy_outside_events_table(void)
{
if (outside_events != NULL) {
g_hash_table_destroy(outside_events);
outside_events = NULL;
}
}
/*!
* \internal
* \brief Add an outside event's action number to a set
*
* \return Standard Pacemaker return code. Specifically, \p pcmk_rc_ok if the
* event was not already in the set, or \p pcmk_rc_already otherwise.
*/
static int
record_outside_event(gint action_num)
{
if (outside_events == NULL) {
outside_events = g_hash_table_new(NULL, NULL);
}
if (g_hash_table_add(outside_events, GINT_TO_POINTER(action_num))) {
return pcmk_rc_ok;
}
return pcmk_rc_already;
}
gboolean
fail_incompletable_actions(pcmk__graph_t *graph, const char *down_node)
{
const char *target_uuid = NULL;
const char *router = NULL;
const char *router_uuid = NULL;
xmlNode *last_action = NULL;
GList *gIter = NULL;
GList *gIter2 = NULL;
if (graph == NULL || graph->complete) {
return FALSE;
}
gIter = graph->synapses;
for (; gIter != NULL; gIter = gIter->next) {
pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) gIter->data;
if (pcmk_any_flags_set(synapse->flags, pcmk__synapse_confirmed|pcmk__synapse_failed)) {
/* We've already been here */
continue;
}
gIter2 = synapse->actions;
for (; gIter2 != NULL; gIter2 = gIter2->next) {
pcmk__graph_action_t *action = (pcmk__graph_action_t *) gIter2->data;
if ((action->type == pcmk__pseudo_graph_action)
|| pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
continue;
} else if (action->type == pcmk__cluster_graph_action) {
const char *task = crm_element_value(action->xml,
PCMK_XA_OPERATION);
if (pcmk__str_eq(task, PCMK_ACTION_STONITH, pcmk__str_casei)) {
continue;
}
}
target_uuid = crm_element_value(action->xml,
PCMK__META_ON_NODE_UUID);
router = crm_element_value(action->xml, PCMK__XA_ROUTER_NODE);
if (router) {
- crm_node_t *node = pcmk__get_node(0, router, NULL,
- pcmk__node_search_cluster);
+ const crm_node_t *node =
+ pcmk__get_node(0, router, NULL,
+ pcmk__node_search_cluster_member);
if (node) {
router_uuid = node->uuid;
}
}
if (pcmk__str_eq(target_uuid, down_node, pcmk__str_casei) || pcmk__str_eq(router_uuid, down_node, pcmk__str_casei)) {
pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);
pcmk__set_synapse_flags(synapse, pcmk__synapse_failed);
last_action = action->xml;
stop_te_timer(action);
pcmk__update_graph(graph, action);
if (pcmk_is_set(synapse->flags, pcmk__synapse_executed)) {
crm_notice("Action %d (%s) was pending on %s (offline)",
action->id,
crm_element_value(action->xml,
PCMK__XA_OPERATION_KEY),
down_node);
} else {
crm_info("Action %d (%s) is scheduled for %s (offline)",
action->id,
crm_element_value(action->xml, PCMK__XA_OPERATION_KEY),
down_node);
}
}
}
}
if (last_action != NULL) {
crm_info("Node %s shutdown resulted in un-runnable actions", down_node);
abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
"Node failure", last_action);
return TRUE;
}
return FALSE;
}
/*!
* \internal
* \brief Update failure-related node attributes if warranted
*
* \param[in] event XML describing operation that (maybe) failed
* \param[in] event_node_uuid Node that event occurred on
* \param[in] rc Actual operation return code
* \param[in] target_rc Expected operation return code
* \param[in] do_update If TRUE, do update regardless of operation type
* \param[in] ignore_failures If TRUE, update last failure but not fail count
*
* \return TRUE if this was not a direct nack, success or lrm status refresh
*/
static gboolean
update_failcount(const xmlNode *event, const char *event_node_uuid, int rc,
int target_rc, gboolean do_update, gboolean ignore_failures)
{
guint interval_ms = 0;
char *task = NULL;
char *rsc_id = NULL;
const char *value = NULL;
const char *id = crm_element_value(event, PCMK__XA_OPERATION_KEY);
const char *on_uname = crm_peer_uname(event_node_uuid);
const char *origin = crm_element_value(event, PCMK_XA_CRM_DEBUG_ORIGIN);
// Nothing needs to be done for success or status refresh
if (rc == target_rc) {
return FALSE;
} else if (pcmk__str_eq(origin, "build_active_RAs", pcmk__str_casei)) {
crm_debug("No update for %s (rc=%d) on %s: Old failure from lrm status refresh",
id, rc, on_uname);
return FALSE;
}
/* Sanity check */
CRM_CHECK(on_uname != NULL, return TRUE);
CRM_CHECK(parse_op_key(id, &rsc_id, &task, &interval_ms),
crm_err("Couldn't parse: %s", pcmk__xe_id(event)); goto bail);
/* Decide whether update is necessary and what value to use */
if ((interval_ms > 0)
|| pcmk__str_eq(task, PCMK_ACTION_PROMOTE, pcmk__str_none)
|| pcmk__str_eq(task, PCMK_ACTION_DEMOTE, pcmk__str_none)) {
do_update = TRUE;
} else if (pcmk__str_eq(task, PCMK_ACTION_START, pcmk__str_none)) {
do_update = TRUE;
value = pcmk__s(controld_globals.transition_graph->failed_start_offset,
PCMK_VALUE_INFINITY);
} else if (pcmk__str_eq(task, PCMK_ACTION_STOP, pcmk__str_none)) {
do_update = TRUE;
value = pcmk__s(controld_globals.transition_graph->failed_stop_offset,
PCMK_VALUE_INFINITY);
}
if (do_update) {
pcmk__attrd_query_pair_t *fail_pair = NULL;
pcmk__attrd_query_pair_t *last_pair = NULL;
char *fail_name = NULL;
char *last_name = NULL;
GList *attrs = NULL;
uint32_t opts = pcmk__node_attr_none;
char *now = pcmk__ttoa(time(NULL));
// Fail count will be either incremented or set to infinity
if (!pcmk_str_is_infinity(value)) {
value = PCMK_XA_VALUE "++";
}
if (g_hash_table_lookup(crm_remote_peer_cache, event_node_uuid)) {
opts |= pcmk__node_attr_remote;
}
crm_info("Updating %s for %s on %s after failed %s: rc=%d (update=%s, time=%s)",
(ignore_failures? "last failure" : "failcount"),
rsc_id, on_uname, task, rc, value, now);
/* Update the fail count, if we're not ignoring failures */
if (!ignore_failures) {
fail_pair = pcmk__assert_alloc(1, sizeof(pcmk__attrd_query_pair_t));
fail_name = pcmk__failcount_name(rsc_id, task, interval_ms);
fail_pair->name = fail_name;
fail_pair->value = value;
fail_pair->node = on_uname;
attrs = g_list_prepend(attrs, fail_pair);
}
/* Update the last failure time (even if we're ignoring failures,
* so that failure can still be detected and shown, e.g. by crm_mon)
*/
last_pair = pcmk__assert_alloc(1, sizeof(pcmk__attrd_query_pair_t));
last_name = pcmk__lastfailure_name(rsc_id, task, interval_ms);
last_pair->name = last_name;
last_pair->value = now;
last_pair->node = on_uname;
attrs = g_list_prepend(attrs, last_pair);
update_attrd_list(attrs, opts);
free(fail_name);
free(fail_pair);
free(last_name);
free(last_pair);
g_list_free(attrs);
free(now);
}
bail:
free(rsc_id);
free(task);
return TRUE;
}
pcmk__graph_action_t *
controld_get_action(int id)
{
for (GList *item = controld_globals.transition_graph->synapses;
item != NULL; item = item->next) {
pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) item->data;
for (GList *item2 = synapse->actions; item2; item2 = item2->next) {
pcmk__graph_action_t *action = (pcmk__graph_action_t *) item2->data;
if (action->id == id) {
return action;
}
}
}
return NULL;
}
pcmk__graph_action_t *
get_cancel_action(const char *id, const char *node)
{
GList *gIter = NULL;
GList *gIter2 = NULL;
gIter = controld_globals.transition_graph->synapses;
for (; gIter != NULL; gIter = gIter->next) {
pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) gIter->data;
gIter2 = synapse->actions;
for (; gIter2 != NULL; gIter2 = gIter2->next) {
const char *task = NULL;
const char *target = NULL;
pcmk__graph_action_t *action = (pcmk__graph_action_t *) gIter2->data;
task = crm_element_value(action->xml, PCMK_XA_OPERATION);
if (!pcmk__str_eq(PCMK_ACTION_CANCEL, task, pcmk__str_casei)) {
continue;
}
task = crm_element_value(action->xml, PCMK__XA_OPERATION_KEY);
if (!pcmk__str_eq(task, id, pcmk__str_casei)) {
crm_trace("Wrong key %s for %s on %s", task, id, node);
continue;
}
target = crm_element_value(action->xml, PCMK__META_ON_NODE_UUID);
if (node && !pcmk__str_eq(target, node, pcmk__str_casei)) {
crm_trace("Wrong node %s for %s on %s", target, id, node);
continue;
}
crm_trace("Found %s on %s", id, node);
return action;
}
}
return NULL;
}
bool
confirm_cancel_action(const char *id, const char *node_id)
{
const char *op_key = NULL;
const char *node_name = NULL;
pcmk__graph_action_t *cancel = get_cancel_action(id, node_id);
if (cancel == NULL) {
return FALSE;
}
op_key = crm_element_value(cancel->xml, PCMK__XA_OPERATION_KEY);
node_name = crm_element_value(cancel->xml, PCMK__META_ON_NODE);
stop_te_timer(cancel);
te_action_confirmed(cancel, controld_globals.transition_graph);
crm_info("Cancellation of %s on %s confirmed (action %d)",
op_key, node_name, cancel->id);
return TRUE;
}
/* downed nodes are listed like: <downed> <node id="UUID1" /> ... </downed> */
#define XPATH_DOWNED "//" PCMK__XE_DOWNED \
"/" PCMK_XE_NODE "[@" PCMK_XA_ID "='%s']"
/*!
* \brief Find a transition event that would have made a specified node down
*
* \param[in] target UUID of node to match
*
* \return Matching event if found, NULL otherwise
*/
pcmk__graph_action_t *
match_down_event(const char *target)
{
pcmk__graph_action_t *match = NULL;
xmlXPathObjectPtr xpath_ret = NULL;
GList *gIter, *gIter2;
char *xpath = crm_strdup_printf(XPATH_DOWNED, target);
for (gIter = controld_globals.transition_graph->synapses;
gIter != NULL && match == NULL;
gIter = gIter->next) {
for (gIter2 = ((pcmk__graph_synapse_t * ) gIter->data)->actions;
gIter2 != NULL && match == NULL;
gIter2 = gIter2->next) {
match = (pcmk__graph_action_t *) gIter2->data;
if (pcmk_is_set(match->flags, pcmk__graph_action_executed)) {
xpath_ret = xpath_search(match->xml, xpath);
if (numXpathResults(xpath_ret) < 1) {
match = NULL;
}
freeXpathObject(xpath_ret);
} else {
// Only actions that were actually started can match
match = NULL;
}
}
}
free(xpath);
if (match != NULL) {
crm_debug("Shutdown action %d (%s) found for node %s", match->id,
crm_element_value(match->xml, PCMK__XA_OPERATION_KEY),
target);
} else {
crm_debug("No reason to expect node %s to be down", target);
}
return match;
}
void
process_graph_event(xmlNode *event, const char *event_node)
{
int rc = -1; // Actual result
int target_rc = -1; // Expected result
int status = -1; // Executor status
int callid = -1; // Executor call ID
int transition_num = -1; // Transition number
int action_num = -1; // Action number within transition
char *update_te_uuid = NULL;
bool ignore_failures = FALSE;
const char *id = NULL;
const char *desc = NULL;
const char *magic = NULL;
const char *uname = NULL;
CRM_ASSERT(event != NULL);
/*
<lrm_rsc_op id="rsc_east-05_last_0" operation_key="rsc_east-05_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0.6" transition-key="9:2:7:be2e97d9-05e2-439d-863e-48f7aecab2aa" transition-magic="0:7;9:2:7:be2e97d9-05e2-439d-863e-48f7aecab2aa" call-id="17" rc-code="7" op-status="0" interval="0" last-rc-change="1355361636" exec-time="128" queue-time="0" op-digest="c81f5f40b1c9e859c992e800b1aa6972"/>
*/
magic = crm_element_value(event, PCMK__XA_TRANSITION_KEY);
if (magic == NULL) {
/* non-change */
return;
}
crm_element_value_int(event, PCMK__XA_OP_STATUS, &status);
if (status == PCMK_EXEC_PENDING) {
return;
}
id = crm_element_value(event, PCMK__XA_OPERATION_KEY);
crm_element_value_int(event, PCMK__XA_RC_CODE, &rc);
crm_element_value_int(event, PCMK__XA_CALL_ID, &callid);
rc = pcmk__effective_rc(rc);
if (decode_transition_key(magic, &update_te_uuid, &transition_num,
&action_num, &target_rc) == FALSE) {
// decode_transition_key() already logged the bad key
crm_err("Can't process action %s result: Incompatible versions? "
CRM_XS " call-id=%d", id, callid);
abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
"Bad event", event);
return;
}
if (transition_num == -1) {
// E.g. crm_resource --fail
if (record_outside_event(action_num) != pcmk_rc_ok) {
crm_debug("Outside event with transition key '%s' has already been "
"processed", magic);
goto bail;
}
desc = "initiated outside of the cluster";
abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
"Unexpected event", event);
} else if ((action_num < 0)
|| !pcmk__str_eq(update_te_uuid, controld_globals.te_uuid,
pcmk__str_none)) {
desc = "initiated by a different DC";
abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
"Foreign event", event);
} else if ((controld_globals.transition_graph->id != transition_num)
|| controld_globals.transition_graph->complete) {
// Action is not from currently active transition
guint interval_ms = 0;
if (parse_op_key(id, NULL, NULL, &interval_ms)
&& (interval_ms != 0)) {
/* Recurring actions have the transition number they were first
* scheduled in.
*/
if (status == PCMK_EXEC_CANCELLED) {
confirm_cancel_action(id, get_node_id(event));
goto bail;
}
desc = "arrived after initial scheduling";
abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
"Change in recurring result", event);
} else if (controld_globals.transition_graph->id != transition_num) {
desc = "arrived really late";
abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
"Old event", event);
} else {
desc = "arrived late";
abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
"Inactive graph", event);
}
} else {
// Event is result of an action from currently active transition
pcmk__graph_action_t *action = controld_get_action(action_num);
if (action == NULL) {
// Should never happen
desc = "unknown";
abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
"Unknown event", event);
} else if (pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
/* Nothing further needs to be done if the action has already been
* confirmed. This can happen e.g. when processing both an
* "xxx_last_0" or "xxx_last_failure_0" record as well as the main
* history record, which would otherwise result in incorrectly
* bumping the fail count twice.
*/
crm_log_xml_debug(event, "Event already confirmed:");
goto bail;
} else {
/* An action result needs to be confirmed.
* (This is the only case where desc == NULL.)
*/
if (pcmk__str_eq(crm_meta_value(action->params, PCMK_META_ON_FAIL),
PCMK_VALUE_IGNORE, pcmk__str_casei)) {
ignore_failures = TRUE;
} else if (rc != target_rc) {
pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);
}
stop_te_timer(action);
te_action_confirmed(action, controld_globals.transition_graph);
if (pcmk_is_set(action->flags, pcmk__graph_action_failed)) {
abort_transition(action->synapse->priority + 1,
pcmk__graph_restart, "Event failed", event);
}
}
}
if (id == NULL) {
id = "unknown action";
}
uname = crm_element_value(event, PCMK__META_ON_NODE);
if (uname == NULL) {
uname = "unknown node";
}
if (status == PCMK_EXEC_INVALID) {
// We couldn't attempt the action
crm_info("Transition %d action %d (%s on %s): %s",
transition_num, action_num, id, uname,
pcmk_exec_status_str(status));
} else if (desc && update_failcount(event, event_node, rc, target_rc,
(transition_num == -1), FALSE)) {
crm_notice("Transition %d action %d (%s on %s): expected '%s' but got '%s' "
CRM_XS " target-rc=%d rc=%d call-id=%d event='%s'",
transition_num, action_num, id, uname,
services_ocf_exitcode_str(target_rc),
services_ocf_exitcode_str(rc),
target_rc, rc, callid, desc);
} else if (desc) {
crm_info("Transition %d action %d (%s on %s): %s "
CRM_XS " rc=%d target-rc=%d call-id=%d",
transition_num, action_num, id, uname,
desc, rc, target_rc, callid);
} else if (rc == target_rc) {
crm_info("Transition %d action %d (%s on %s) confirmed: %s "
CRM_XS " rc=%d call-id=%d",
transition_num, action_num, id, uname,
services_ocf_exitcode_str(rc), rc, callid);
} else {
update_failcount(event, event_node, rc, target_rc,
(transition_num == -1), ignore_failures);
crm_notice("Transition %d action %d (%s on %s): expected '%s' but got '%s' "
CRM_XS " target-rc=%d rc=%d call-id=%d",
transition_num, action_num, id, uname,
services_ocf_exitcode_str(target_rc),
services_ocf_exitcode_str(rc),
target_rc, rc, callid);
}
bail:
free(update_te_uuid);
}
diff --git a/daemons/controld/controld_utils.c b/daemons/controld/controld_utils.c
index 847f6b3327..fc0a8fd892 100644
--- a/daemons/controld/controld_utils.c
+++ b/daemons/controld/controld_utils.c
@@ -1,837 +1,837 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <stdlib.h>
#include <stdint.h> // uint64_t
#include <crm/crm.h>
#include <crm/cib.h>
#include <crm/common/xml.h>
#include <pacemaker-controld.h>
const char *
fsa_input2string(enum crmd_fsa_input input)
{
const char *inputAsText = NULL;
switch (input) {
case I_NULL:
inputAsText = "I_NULL";
break;
case I_CIB_OP:
inputAsText = "I_CIB_OP (unused)";
break;
case I_CIB_UPDATE:
inputAsText = "I_CIB_UPDATE";
break;
case I_DC_TIMEOUT:
inputAsText = "I_DC_TIMEOUT";
break;
case I_ELECTION:
inputAsText = "I_ELECTION";
break;
case I_PE_CALC:
inputAsText = "I_PE_CALC";
break;
case I_RELEASE_DC:
inputAsText = "I_RELEASE_DC";
break;
case I_ELECTION_DC:
inputAsText = "I_ELECTION_DC";
break;
case I_ERROR:
inputAsText = "I_ERROR";
break;
case I_FAIL:
inputAsText = "I_FAIL";
break;
case I_INTEGRATED:
inputAsText = "I_INTEGRATED";
break;
case I_FINALIZED:
inputAsText = "I_FINALIZED";
break;
case I_NODE_JOIN:
inputAsText = "I_NODE_JOIN";
break;
case I_JOIN_OFFER:
inputAsText = "I_JOIN_OFFER";
break;
case I_JOIN_REQUEST:
inputAsText = "I_JOIN_REQUEST";
break;
case I_JOIN_RESULT:
inputAsText = "I_JOIN_RESULT";
break;
case I_NOT_DC:
inputAsText = "I_NOT_DC";
break;
case I_RECOVERED:
inputAsText = "I_RECOVERED";
break;
case I_RELEASE_FAIL:
inputAsText = "I_RELEASE_FAIL";
break;
case I_RELEASE_SUCCESS:
inputAsText = "I_RELEASE_SUCCESS";
break;
case I_RESTART:
inputAsText = "I_RESTART";
break;
case I_PE_SUCCESS:
inputAsText = "I_PE_SUCCESS";
break;
case I_ROUTER:
inputAsText = "I_ROUTER";
break;
case I_SHUTDOWN:
inputAsText = "I_SHUTDOWN";
break;
case I_STARTUP:
inputAsText = "I_STARTUP";
break;
case I_TE_SUCCESS:
inputAsText = "I_TE_SUCCESS";
break;
case I_STOP:
inputAsText = "I_STOP";
break;
case I_DC_HEARTBEAT:
inputAsText = "I_DC_HEARTBEAT";
break;
case I_WAIT_FOR_EVENT:
inputAsText = "I_WAIT_FOR_EVENT";
break;
case I_LRM_EVENT:
inputAsText = "I_LRM_EVENT";
break;
case I_PENDING:
inputAsText = "I_PENDING";
break;
case I_HALT:
inputAsText = "I_HALT";
break;
case I_TERMINATE:
inputAsText = "I_TERMINATE";
break;
case I_ILLEGAL:
inputAsText = "I_ILLEGAL";
break;
}
if (inputAsText == NULL) {
crm_err("Input %d is unknown", input);
inputAsText = "<UNKNOWN_INPUT>";
}
return inputAsText;
}
const char *
fsa_state2string(enum crmd_fsa_state state)
{
const char *stateAsText = NULL;
switch (state) {
case S_IDLE:
stateAsText = "S_IDLE";
break;
case S_ELECTION:
stateAsText = "S_ELECTION";
break;
case S_INTEGRATION:
stateAsText = "S_INTEGRATION";
break;
case S_FINALIZE_JOIN:
stateAsText = "S_FINALIZE_JOIN";
break;
case S_NOT_DC:
stateAsText = "S_NOT_DC";
break;
case S_POLICY_ENGINE:
stateAsText = "S_POLICY_ENGINE";
break;
case S_RECOVERY:
stateAsText = "S_RECOVERY";
break;
case S_RELEASE_DC:
stateAsText = "S_RELEASE_DC";
break;
case S_PENDING:
stateAsText = "S_PENDING";
break;
case S_STOPPING:
stateAsText = "S_STOPPING";
break;
case S_TERMINATE:
stateAsText = "S_TERMINATE";
break;
case S_TRANSITION_ENGINE:
stateAsText = "S_TRANSITION_ENGINE";
break;
case S_STARTING:
stateAsText = "S_STARTING";
break;
case S_HALT:
stateAsText = "S_HALT";
break;
case S_ILLEGAL:
stateAsText = "S_ILLEGAL";
break;
}
if (stateAsText == NULL) {
crm_err("State %d is unknown", state);
stateAsText = "<UNKNOWN_STATE>";
}
return stateAsText;
}
const char *
fsa_cause2string(enum crmd_fsa_cause cause)
{
const char *causeAsText = NULL;
switch (cause) {
case C_UNKNOWN:
causeAsText = "C_UNKNOWN";
break;
case C_STARTUP:
causeAsText = "C_STARTUP";
break;
case C_IPC_MESSAGE:
causeAsText = "C_IPC_MESSAGE";
break;
case C_HA_MESSAGE:
causeAsText = "C_HA_MESSAGE";
break;
case C_TIMER_POPPED:
causeAsText = "C_TIMER_POPPED";
break;
case C_SHUTDOWN:
causeAsText = "C_SHUTDOWN";
break;
case C_LRM_OP_CALLBACK:
causeAsText = "C_LRM_OP_CALLBACK";
break;
case C_CRMD_STATUS_CALLBACK:
causeAsText = "C_CRMD_STATUS_CALLBACK";
break;
case C_FSA_INTERNAL:
causeAsText = "C_FSA_INTERNAL";
break;
}
if (causeAsText == NULL) {
crm_err("Cause %d is unknown", cause);
causeAsText = "<UNKNOWN_CAUSE>";
}
return causeAsText;
}
const char *
fsa_action2string(long long action)
{
const char *actionAsText = NULL;
switch (action) {
case A_NOTHING:
actionAsText = "A_NOTHING";
break;
case A_ELECTION_START:
actionAsText = "A_ELECTION_START";
break;
case A_DC_JOIN_FINAL:
actionAsText = "A_DC_JOIN_FINAL";
break;
case A_READCONFIG:
actionAsText = "A_READCONFIG";
break;
case O_RELEASE:
actionAsText = "O_RELEASE";
break;
case A_STARTUP:
actionAsText = "A_STARTUP";
break;
case A_STARTED:
actionAsText = "A_STARTED";
break;
case A_HA_CONNECT:
actionAsText = "A_HA_CONNECT";
break;
case A_HA_DISCONNECT:
actionAsText = "A_HA_DISCONNECT";
break;
case A_LRM_CONNECT:
actionAsText = "A_LRM_CONNECT";
break;
case A_LRM_EVENT:
actionAsText = "A_LRM_EVENT";
break;
case A_LRM_INVOKE:
actionAsText = "A_LRM_INVOKE";
break;
case A_LRM_DISCONNECT:
actionAsText = "A_LRM_DISCONNECT";
break;
case O_LRM_RECONNECT:
actionAsText = "O_LRM_RECONNECT";
break;
case A_CL_JOIN_QUERY:
actionAsText = "A_CL_JOIN_QUERY";
break;
case A_DC_TIMER_STOP:
actionAsText = "A_DC_TIMER_STOP";
break;
case A_DC_TIMER_START:
actionAsText = "A_DC_TIMER_START";
break;
case A_INTEGRATE_TIMER_START:
actionAsText = "A_INTEGRATE_TIMER_START";
break;
case A_INTEGRATE_TIMER_STOP:
actionAsText = "A_INTEGRATE_TIMER_STOP";
break;
case A_FINALIZE_TIMER_START:
actionAsText = "A_FINALIZE_TIMER_START";
break;
case A_FINALIZE_TIMER_STOP:
actionAsText = "A_FINALIZE_TIMER_STOP";
break;
case A_ELECTION_COUNT:
actionAsText = "A_ELECTION_COUNT";
break;
case A_ELECTION_VOTE:
actionAsText = "A_ELECTION_VOTE";
break;
case A_ELECTION_CHECK:
actionAsText = "A_ELECTION_CHECK";
break;
case A_CL_JOIN_ANNOUNCE:
actionAsText = "A_CL_JOIN_ANNOUNCE";
break;
case A_CL_JOIN_REQUEST:
actionAsText = "A_CL_JOIN_REQUEST";
break;
case A_CL_JOIN_RESULT:
actionAsText = "A_CL_JOIN_RESULT";
break;
case A_DC_JOIN_OFFER_ALL:
actionAsText = "A_DC_JOIN_OFFER_ALL";
break;
case A_DC_JOIN_OFFER_ONE:
actionAsText = "A_DC_JOIN_OFFER_ONE";
break;
case A_DC_JOIN_PROCESS_REQ:
actionAsText = "A_DC_JOIN_PROCESS_REQ";
break;
case A_DC_JOIN_PROCESS_ACK:
actionAsText = "A_DC_JOIN_PROCESS_ACK";
break;
case A_DC_JOIN_FINALIZE:
actionAsText = "A_DC_JOIN_FINALIZE";
break;
case A_MSG_PROCESS:
actionAsText = "A_MSG_PROCESS";
break;
case A_MSG_ROUTE:
actionAsText = "A_MSG_ROUTE";
break;
case A_RECOVER:
actionAsText = "A_RECOVER";
break;
case A_DC_RELEASE:
actionAsText = "A_DC_RELEASE";
break;
case A_DC_RELEASED:
actionAsText = "A_DC_RELEASED";
break;
case A_DC_TAKEOVER:
actionAsText = "A_DC_TAKEOVER";
break;
case A_SHUTDOWN:
actionAsText = "A_SHUTDOWN";
break;
case A_SHUTDOWN_REQ:
actionAsText = "A_SHUTDOWN_REQ";
break;
case A_STOP:
actionAsText = "A_STOP ";
break;
case A_EXIT_0:
actionAsText = "A_EXIT_0";
break;
case A_EXIT_1:
actionAsText = "A_EXIT_1";
break;
case O_CIB_RESTART:
actionAsText = "O_CIB_RESTART";
break;
case A_CIB_START:
actionAsText = "A_CIB_START";
break;
case A_CIB_STOP:
actionAsText = "A_CIB_STOP";
break;
case A_TE_INVOKE:
actionAsText = "A_TE_INVOKE";
break;
case O_TE_RESTART:
actionAsText = "O_TE_RESTART";
break;
case A_TE_START:
actionAsText = "A_TE_START";
break;
case A_TE_STOP:
actionAsText = "A_TE_STOP";
break;
case A_TE_HALT:
actionAsText = "A_TE_HALT";
break;
case A_TE_CANCEL:
actionAsText = "A_TE_CANCEL";
break;
case A_PE_INVOKE:
actionAsText = "A_PE_INVOKE";
break;
case O_PE_RESTART:
actionAsText = "O_PE_RESTART";
break;
case A_PE_START:
actionAsText = "A_PE_START";
break;
case A_PE_STOP:
actionAsText = "A_PE_STOP";
break;
case A_NODE_BLOCK:
actionAsText = "A_NODE_BLOCK";
break;
case A_UPDATE_NODESTATUS:
actionAsText = "A_UPDATE_NODESTATUS";
break;
case A_LOG:
actionAsText = "A_LOG ";
break;
case A_ERROR:
actionAsText = "A_ERROR ";
break;
case A_WARN:
actionAsText = "A_WARN ";
break;
/* Composite actions */
case A_DC_TIMER_START | A_CL_JOIN_QUERY:
actionAsText = "A_DC_TIMER_START|A_CL_JOIN_QUERY";
break;
}
if (actionAsText == NULL) {
crm_err("Action %.16llx is unknown", action);
actionAsText = "<UNKNOWN_ACTION>";
}
return actionAsText;
}
void
fsa_dump_inputs(int log_level, const char *text, long long input_register)
{
if (input_register == A_NOTHING) {
return;
}
if (text == NULL) {
text = "Input register contents:";
}
if (pcmk_is_set(input_register, R_THE_DC)) {
crm_trace("%s %.16llx (R_THE_DC)", text, R_THE_DC);
}
if (pcmk_is_set(input_register, R_STARTING)) {
crm_trace("%s %.16llx (R_STARTING)", text, R_STARTING);
}
if (pcmk_is_set(input_register, R_SHUTDOWN)) {
crm_trace("%s %.16llx (R_SHUTDOWN)", text, R_SHUTDOWN);
}
if (pcmk_is_set(input_register, R_STAYDOWN)) {
crm_trace("%s %.16llx (R_STAYDOWN)", text, R_STAYDOWN);
}
if (pcmk_is_set(input_register, R_JOIN_OK)) {
crm_trace("%s %.16llx (R_JOIN_OK)", text, R_JOIN_OK);
}
if (pcmk_is_set(input_register, R_READ_CONFIG)) {
crm_trace("%s %.16llx (R_READ_CONFIG)", text, R_READ_CONFIG);
}
if (pcmk_is_set(input_register, R_INVOKE_PE)) {
crm_trace("%s %.16llx (R_INVOKE_PE)", text, R_INVOKE_PE);
}
if (pcmk_is_set(input_register, R_CIB_CONNECTED)) {
crm_trace("%s %.16llx (R_CIB_CONNECTED)", text, R_CIB_CONNECTED);
}
if (pcmk_is_set(input_register, R_PE_CONNECTED)) {
crm_trace("%s %.16llx (R_PE_CONNECTED)", text, R_PE_CONNECTED);
}
if (pcmk_is_set(input_register, R_TE_CONNECTED)) {
crm_trace("%s %.16llx (R_TE_CONNECTED)", text, R_TE_CONNECTED);
}
if (pcmk_is_set(input_register, R_LRM_CONNECTED)) {
crm_trace("%s %.16llx (R_LRM_CONNECTED)", text, R_LRM_CONNECTED);
}
if (pcmk_is_set(input_register, R_CIB_REQUIRED)) {
crm_trace("%s %.16llx (R_CIB_REQUIRED)", text, R_CIB_REQUIRED);
}
if (pcmk_is_set(input_register, R_PE_REQUIRED)) {
crm_trace("%s %.16llx (R_PE_REQUIRED)", text, R_PE_REQUIRED);
}
if (pcmk_is_set(input_register, R_TE_REQUIRED)) {
crm_trace("%s %.16llx (R_TE_REQUIRED)", text, R_TE_REQUIRED);
}
if (pcmk_is_set(input_register, R_REQ_PEND)) {
crm_trace("%s %.16llx (R_REQ_PEND)", text, R_REQ_PEND);
}
if (pcmk_is_set(input_register, R_PE_PEND)) {
crm_trace("%s %.16llx (R_PE_PEND)", text, R_PE_PEND);
}
if (pcmk_is_set(input_register, R_TE_PEND)) {
crm_trace("%s %.16llx (R_TE_PEND)", text, R_TE_PEND);
}
if (pcmk_is_set(input_register, R_RESP_PEND)) {
crm_trace("%s %.16llx (R_RESP_PEND)", text, R_RESP_PEND);
}
if (pcmk_is_set(input_register, R_CIB_DONE)) {
crm_trace("%s %.16llx (R_CIB_DONE)", text, R_CIB_DONE);
}
if (pcmk_is_set(input_register, R_HAVE_CIB)) {
crm_trace("%s %.16llx (R_HAVE_CIB)", text, R_HAVE_CIB);
}
if (pcmk_is_set(input_register, R_MEMBERSHIP)) {
crm_trace("%s %.16llx (R_MEMBERSHIP)", text, R_MEMBERSHIP);
}
if (pcmk_is_set(input_register, R_PEER_DATA)) {
crm_trace("%s %.16llx (R_PEER_DATA)", text, R_PEER_DATA);
}
if (pcmk_is_set(input_register, R_IN_RECOVERY)) {
crm_trace("%s %.16llx (R_IN_RECOVERY)", text, R_IN_RECOVERY);
}
}
void
fsa_dump_actions(uint64_t action, const char *text)
{
if (pcmk_is_set(action, A_READCONFIG)) {
crm_trace("Action %.16llx (A_READCONFIG) %s", A_READCONFIG, text);
}
if (pcmk_is_set(action, A_STARTUP)) {
crm_trace("Action %.16llx (A_STARTUP) %s", A_STARTUP, text);
}
if (pcmk_is_set(action, A_STARTED)) {
crm_trace("Action %.16llx (A_STARTED) %s", A_STARTED, text);
}
if (pcmk_is_set(action, A_HA_CONNECT)) {
crm_trace("Action %.16llx (A_CONNECT) %s", A_HA_CONNECT, text);
}
if (pcmk_is_set(action, A_HA_DISCONNECT)) {
crm_trace("Action %.16llx (A_DISCONNECT) %s", A_HA_DISCONNECT, text);
}
if (pcmk_is_set(action, A_LRM_CONNECT)) {
crm_trace("Action %.16llx (A_LRM_CONNECT) %s", A_LRM_CONNECT, text);
}
if (pcmk_is_set(action, A_LRM_EVENT)) {
crm_trace("Action %.16llx (A_LRM_EVENT) %s", A_LRM_EVENT, text);
}
if (pcmk_is_set(action, A_LRM_INVOKE)) {
crm_trace("Action %.16llx (A_LRM_INVOKE) %s", A_LRM_INVOKE, text);
}
if (pcmk_is_set(action, A_LRM_DISCONNECT)) {
crm_trace("Action %.16llx (A_LRM_DISCONNECT) %s", A_LRM_DISCONNECT, text);
}
if (pcmk_is_set(action, A_DC_TIMER_STOP)) {
crm_trace("Action %.16llx (A_DC_TIMER_STOP) %s", A_DC_TIMER_STOP, text);
}
if (pcmk_is_set(action, A_DC_TIMER_START)) {
crm_trace("Action %.16llx (A_DC_TIMER_START) %s", A_DC_TIMER_START, text);
}
if (pcmk_is_set(action, A_INTEGRATE_TIMER_START)) {
crm_trace("Action %.16llx (A_INTEGRATE_TIMER_START) %s", A_INTEGRATE_TIMER_START, text);
}
if (pcmk_is_set(action, A_INTEGRATE_TIMER_STOP)) {
crm_trace("Action %.16llx (A_INTEGRATE_TIMER_STOP) %s", A_INTEGRATE_TIMER_STOP, text);
}
if (pcmk_is_set(action, A_FINALIZE_TIMER_START)) {
crm_trace("Action %.16llx (A_FINALIZE_TIMER_START) %s", A_FINALIZE_TIMER_START, text);
}
if (pcmk_is_set(action, A_FINALIZE_TIMER_STOP)) {
crm_trace("Action %.16llx (A_FINALIZE_TIMER_STOP) %s", A_FINALIZE_TIMER_STOP, text);
}
if (pcmk_is_set(action, A_ELECTION_COUNT)) {
crm_trace("Action %.16llx (A_ELECTION_COUNT) %s", A_ELECTION_COUNT, text);
}
if (pcmk_is_set(action, A_ELECTION_VOTE)) {
crm_trace("Action %.16llx (A_ELECTION_VOTE) %s", A_ELECTION_VOTE, text);
}
if (pcmk_is_set(action, A_ELECTION_CHECK)) {
crm_trace("Action %.16llx (A_ELECTION_CHECK) %s", A_ELECTION_CHECK, text);
}
if (pcmk_is_set(action, A_CL_JOIN_ANNOUNCE)) {
crm_trace("Action %.16llx (A_CL_JOIN_ANNOUNCE) %s", A_CL_JOIN_ANNOUNCE, text);
}
if (pcmk_is_set(action, A_CL_JOIN_REQUEST)) {
crm_trace("Action %.16llx (A_CL_JOIN_REQUEST) %s", A_CL_JOIN_REQUEST, text);
}
if (pcmk_is_set(action, A_CL_JOIN_RESULT)) {
crm_trace("Action %.16llx (A_CL_JOIN_RESULT) %s", A_CL_JOIN_RESULT, text);
}
if (pcmk_is_set(action, A_DC_JOIN_OFFER_ALL)) {
crm_trace("Action %.16llx (A_DC_JOIN_OFFER_ALL) %s", A_DC_JOIN_OFFER_ALL, text);
}
if (pcmk_is_set(action, A_DC_JOIN_OFFER_ONE)) {
crm_trace("Action %.16llx (A_DC_JOIN_OFFER_ONE) %s", A_DC_JOIN_OFFER_ONE, text);
}
if (pcmk_is_set(action, A_DC_JOIN_PROCESS_REQ)) {
crm_trace("Action %.16llx (A_DC_JOIN_PROCESS_REQ) %s", A_DC_JOIN_PROCESS_REQ, text);
}
if (pcmk_is_set(action, A_DC_JOIN_PROCESS_ACK)) {
crm_trace("Action %.16llx (A_DC_JOIN_PROCESS_ACK) %s", A_DC_JOIN_PROCESS_ACK, text);
}
if (pcmk_is_set(action, A_DC_JOIN_FINALIZE)) {
crm_trace("Action %.16llx (A_DC_JOIN_FINALIZE) %s", A_DC_JOIN_FINALIZE, text);
}
if (pcmk_is_set(action, A_MSG_PROCESS)) {
crm_trace("Action %.16llx (A_MSG_PROCESS) %s", A_MSG_PROCESS, text);
}
if (pcmk_is_set(action, A_MSG_ROUTE)) {
crm_trace("Action %.16llx (A_MSG_ROUTE) %s", A_MSG_ROUTE, text);
}
if (pcmk_is_set(action, A_RECOVER)) {
crm_trace("Action %.16llx (A_RECOVER) %s", A_RECOVER, text);
}
if (pcmk_is_set(action, A_DC_RELEASE)) {
crm_trace("Action %.16llx (A_DC_RELEASE) %s", A_DC_RELEASE, text);
}
if (pcmk_is_set(action, A_DC_RELEASED)) {
crm_trace("Action %.16llx (A_DC_RELEASED) %s", A_DC_RELEASED, text);
}
if (pcmk_is_set(action, A_DC_TAKEOVER)) {
crm_trace("Action %.16llx (A_DC_TAKEOVER) %s", A_DC_TAKEOVER, text);
}
if (pcmk_is_set(action, A_SHUTDOWN)) {
crm_trace("Action %.16llx (A_SHUTDOWN) %s", A_SHUTDOWN, text);
}
if (pcmk_is_set(action, A_SHUTDOWN_REQ)) {
crm_trace("Action %.16llx (A_SHUTDOWN_REQ) %s", A_SHUTDOWN_REQ, text);
}
if (pcmk_is_set(action, A_STOP)) {
crm_trace("Action %.16llx (A_STOP ) %s", A_STOP, text);
}
if (pcmk_is_set(action, A_EXIT_0)) {
crm_trace("Action %.16llx (A_EXIT_0) %s", A_EXIT_0, text);
}
if (pcmk_is_set(action, A_EXIT_1)) {
crm_trace("Action %.16llx (A_EXIT_1) %s", A_EXIT_1, text);
}
if (pcmk_is_set(action, A_CIB_START)) {
crm_trace("Action %.16llx (A_CIB_START) %s", A_CIB_START, text);
}
if (pcmk_is_set(action, A_CIB_STOP)) {
crm_trace("Action %.16llx (A_CIB_STOP) %s", A_CIB_STOP, text);
}
if (pcmk_is_set(action, A_TE_INVOKE)) {
crm_trace("Action %.16llx (A_TE_INVOKE) %s", A_TE_INVOKE, text);
}
if (pcmk_is_set(action, A_TE_START)) {
crm_trace("Action %.16llx (A_TE_START) %s", A_TE_START, text);
}
if (pcmk_is_set(action, A_TE_STOP)) {
crm_trace("Action %.16llx (A_TE_STOP) %s", A_TE_STOP, text);
}
if (pcmk_is_set(action, A_TE_CANCEL)) {
crm_trace("Action %.16llx (A_TE_CANCEL) %s", A_TE_CANCEL, text);
}
if (pcmk_is_set(action, A_PE_INVOKE)) {
crm_trace("Action %.16llx (A_PE_INVOKE) %s", A_PE_INVOKE, text);
}
if (pcmk_is_set(action, A_PE_START)) {
crm_trace("Action %.16llx (A_PE_START) %s", A_PE_START, text);
}
if (pcmk_is_set(action, A_PE_STOP)) {
crm_trace("Action %.16llx (A_PE_STOP) %s", A_PE_STOP, text);
}
if (pcmk_is_set(action, A_NODE_BLOCK)) {
crm_trace("Action %.16llx (A_NODE_BLOCK) %s", A_NODE_BLOCK, text);
}
if (pcmk_is_set(action, A_UPDATE_NODESTATUS)) {
crm_trace("Action %.16llx (A_UPDATE_NODESTATUS) %s", A_UPDATE_NODESTATUS, text);
}
if (pcmk_is_set(action, A_LOG)) {
crm_trace("Action %.16llx (A_LOG ) %s", A_LOG, text);
}
if (pcmk_is_set(action, A_ERROR)) {
crm_trace("Action %.16llx (A_ERROR ) %s", A_ERROR, text);
}
if (pcmk_is_set(action, A_WARN)) {
crm_trace("Action %.16llx (A_WARN ) %s", A_WARN, text);
}
}
gboolean
update_dc(xmlNode * msg)
{
char *last_dc = controld_globals.dc_name;
const char *dc_version = NULL;
const char *welcome_from = NULL;
if (msg != NULL) {
gboolean invalid = FALSE;
dc_version = crm_element_value(msg, PCMK_XA_VERSION);
welcome_from = crm_element_value(msg, PCMK__XA_SRC);
CRM_CHECK(dc_version != NULL, return FALSE);
CRM_CHECK(welcome_from != NULL, return FALSE);
if (AM_I_DC
&& !pcmk__str_eq(welcome_from, controld_globals.our_nodename,
pcmk__str_casei)) {
invalid = TRUE;
} else if ((controld_globals.dc_name != NULL)
&& !pcmk__str_eq(welcome_from, controld_globals.dc_name,
pcmk__str_casei)) {
invalid = TRUE;
}
if (invalid) {
if (AM_I_DC) {
crm_err("Not updating DC to %s (%s): we are also a DC",
welcome_from, dc_version);
} else {
crm_warn("New DC %s is not %s",
welcome_from, controld_globals.dc_name);
}
controld_set_fsa_action_flags(A_CL_JOIN_QUERY | A_DC_TIMER_START);
controld_trigger_fsa();
return FALSE;
}
}
controld_globals.dc_name = NULL; // freed as last_dc
pcmk__str_update(&(controld_globals.dc_name), welcome_from);
pcmk__str_update(&(controld_globals.dc_version), dc_version);
if (pcmk__str_eq(controld_globals.dc_name, last_dc, pcmk__str_casei)) {
/* do nothing */
} else if (controld_globals.dc_name != NULL) {
crm_node_t *dc_node = pcmk__get_node(0, controld_globals.dc_name, NULL,
- pcmk__node_search_cluster);
+ pcmk__node_search_cluster_member);
crm_info("Set DC to %s (%s)",
controld_globals.dc_name,
pcmk__s(controld_globals.dc_version, "unknown version"));
pcmk__update_peer_expected(__func__, dc_node, CRMD_JOINSTATE_MEMBER);
} else if (last_dc != NULL) {
crm_info("Unset DC (was %s)", last_dc);
}
free(last_dc);
return TRUE;
}
void crmd_peer_down(crm_node_t *peer, bool full)
{
if(full && peer->state == NULL) {
pcmk__update_peer_state(__func__, peer, CRM_NODE_LOST, 0);
crm_update_peer_proc(__func__, peer, crm_proc_none, NULL);
}
crm_update_peer_join(__func__, peer, crm_join_none);
pcmk__update_peer_expected(__func__, peer, CRMD_JOINSTATE_DOWN);
}
/*!
* \internal
* \brief Check feature set compatibility of DC and joining node
*
* Return true if a joining node's CRM feature set is compatible with the
* current DC's. The feature sets are compatible if they have the same major
* version number, and the DC's minor version number is the same or older than
* the joining node's. The minor-minor version is intended solely to allow
* resource agents to detect feature support, and so is ignored.
*
* \param[in] dc_version DC's feature set
* \param[in] join_version Joining node's version
*/
bool
feature_set_compatible(const char *dc_version, const char *join_version)
{
char *dc_minor = NULL;
char *join_minor = NULL;
long dc_v = 0;
long join_v = 0;
// Get DC's major version
errno = 0;
dc_v = strtol(dc_version, &dc_minor, 10);
if (errno) {
return FALSE;
}
// Get joining node's major version
errno = 0;
join_v = strtol(join_version, &join_minor, 10);
if (errno) {
return FALSE;
}
// Major version component must be identical
if (dc_v != join_v) {
return FALSE;
}
// Get DC's minor version
if (*dc_minor == '.') {
++dc_minor;
}
errno = 0;
dc_v = strtol(dc_minor, NULL, 10);
if (errno) {
return FALSE;
}
// Get joining node's minor version
if (*join_minor == '.') {
++join_minor;
}
errno = 0;
join_v = strtol(join_minor, NULL, 10);
if (errno) {
return FALSE;
}
// DC's minor version must be the same or older
return dc_v <= join_v;
}
const char *
get_node_id(xmlNode *lrm_rsc_op)
{
xmlNode *node = lrm_rsc_op;
while ((node != NULL) && !pcmk__xe_is(node, PCMK__XE_NODE_STATE)) {
node = node->parent;
}
CRM_CHECK(node != NULL, return NULL);
return pcmk__xe_id(node);
}
diff --git a/daemons/controld/pacemaker-controld.c b/daemons/controld/pacemaker-controld.c
index bbe6de2feb..913518bc9f 100644
--- a/daemons/controld/pacemaker-controld.c
+++ b/daemons/controld/pacemaker-controld.c
@@ -1,223 +1,223 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <stdlib.h>
#include <errno.h>
#include <fcntl.h>
#include <crm/crm.h>
#include <crm/common/cmdline_internal.h>
#include <crm/common/ipc.h>
#include <crm/common/output_internal.h>
#include <crm/common/xml.h>
#include <pacemaker-controld.h>
#define SUMMARY "daemon for coordinating a Pacemaker cluster's response " \
"to events"
_Noreturn void crmd_init(void);
extern void init_dotfile(void);
controld_globals_t controld_globals = {
// Automatic initialization to 0, false, or NULL is fine for most members
.fsa_state = S_STARTING,
.fsa_actions = A_NOTHING,
};
static pcmk__supported_format_t formats[] = {
PCMK__SUPPORTED_FORMAT_NONE,
PCMK__SUPPORTED_FORMAT_TEXT,
PCMK__SUPPORTED_FORMAT_XML,
{ NULL, NULL, NULL }
};
/* @COMPAT Deprecated since 2.1.8. Use pcmk_list_cluster_options() or
* crm_attribute --list-options=cluster instead of querying daemon metadata.
*/
static int
controld_metadata(pcmk__output_t *out)
{
return pcmk__daemon_metadata(out, "pacemaker-controld",
"Pacemaker controller options",
"Cluster options used by Pacemaker's "
"controller",
pcmk__opt_controld);
}
static GOptionContext *
build_arg_context(pcmk__common_args_t *args, GOptionGroup **group)
{
return pcmk__build_arg_context(args, "text (default), xml", group, NULL);
}
int
main(int argc, char **argv)
{
int rc = pcmk_rc_ok;
crm_exit_t exit_code = CRM_EX_OK;
bool initialize = true;
crm_ipc_t *old_instance = NULL;
pcmk__output_t *out = NULL;
GError *error = NULL;
GOptionGroup *output_group = NULL;
pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY);
gchar **processed_args = pcmk__cmdline_preproc(argv, NULL);
GOptionContext *context = build_arg_context(args, &output_group);
crm_log_preinit(NULL, argc, argv);
pcmk__register_formats(output_group, formats);
if (!g_option_context_parse_strv(context, &processed_args, &error)) {
exit_code = CRM_EX_USAGE;
goto done;
}
rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv);
if (rc != pcmk_rc_ok) {
exit_code = CRM_EX_ERROR;
g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
"Error creating output format %s: %s",
args->output_ty, pcmk_rc_str(rc));
goto done;
}
if (args->version) {
out->version(out, false);
initialize = false;
goto done;
}
if ((g_strv_length(processed_args) >= 2)
&& pcmk__str_eq(processed_args[1], "metadata", pcmk__str_none)) {
initialize = false;
rc = controld_metadata(out);
if (rc != pcmk_rc_ok) {
exit_code = CRM_EX_FATAL;
g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
"Unable to display metadata: %s", pcmk_rc_str(rc));
}
goto done;
}
pcmk__cli_init_logging("pacemaker-controld", args->verbosity);
crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE);
crm_notice("Starting Pacemaker controller");
old_instance = crm_ipc_new(CRM_SYSTEM_CRMD, 0);
if (old_instance == NULL) {
/* crm_ipc_new will have already printed an error message with crm_err. */
exit_code = CRM_EX_FATAL;
goto done;
}
if (pcmk__connect_generic_ipc(old_instance) == pcmk_rc_ok) {
/* IPC end-point already up */
crm_ipc_close(old_instance);
crm_ipc_destroy(old_instance);
crm_err("pacemaker-controld is already active, aborting startup");
initialize = false;
goto done;
} else {
/* not up or not authentic, we'll proceed either way */
crm_ipc_destroy(old_instance);
old_instance = NULL;
}
if (pcmk__daemon_can_write(PE_STATE_DIR, NULL) == FALSE) {
exit_code = CRM_EX_FATAL;
crm_err("Terminating due to bad permissions on " PE_STATE_DIR);
g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
"Bad permissions on " PE_STATE_DIR
" (see logs for details)");
goto done;
} else if (pcmk__daemon_can_write(CRM_CONFIG_DIR, NULL) == FALSE) {
exit_code = CRM_EX_FATAL;
crm_err("Terminating due to bad permissions on " CRM_CONFIG_DIR);
g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
"Bad permissions on " CRM_CONFIG_DIR
" (see logs for details)");
goto done;
}
if (pcmk__log_output_new(&(controld_globals.logger_out)) != pcmk_rc_ok) {
exit_code = CRM_EX_FATAL;
goto done;
}
pcmk__output_set_log_level(controld_globals.logger_out, LOG_TRACE);
done:
g_strfreev(processed_args);
pcmk__free_arg_context(context);
pcmk__output_and_clear_error(&error, out);
if (out != NULL) {
out->finish(out, exit_code, true, NULL);
pcmk__output_free(out);
}
pcmk__unregister_formats();
if ((exit_code == CRM_EX_OK) && initialize) {
// Does not return
crmd_init();
}
crm_exit(exit_code);
}
void
crmd_init(void)
{
crm_exit_t exit_code = CRM_EX_OK;
enum crmd_fsa_state state;
init_dotfile();
register_fsa_input(C_STARTUP, I_STARTUP, NULL);
- crm_peer_init();
+ pcmk__cluster_init_node_caches();
state = s_crmd_fsa(C_STARTUP);
if (state == S_PENDING || state == S_STARTING) {
/* Create the mainloop and run it... */
crm_trace("Starting %s's mainloop", crm_system_name);
controld_globals.mainloop = g_main_loop_new(NULL, FALSE);
g_main_loop_run(controld_globals.mainloop);
if (pcmk_is_set(controld_globals.fsa_input_register, R_STAYDOWN)) {
crm_info("Inhibiting automated respawn");
exit_code = CRM_EX_FATAL;
}
} else {
crm_err("Startup of %s failed. Current state: %s",
crm_system_name, fsa_state2string(state));
exit_code = CRM_EX_ERROR;
}
crm_info("%s[%lu] exiting with status %d (%s)",
crm_system_name, (unsigned long) getpid(), exit_code,
crm_exit_str(exit_code));
crmd_fast_exit(exit_code);
}
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
index 9e0e647192..9143af23d9 100644
--- a/daemons/fenced/fenced_commands.c
+++ b/daemons/fenced/fenced_commands.c
@@ -1,3709 +1,3709 @@
/*
* Copyright 2009-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/stat.h>
#include <unistd.h>
#include <sys/utsname.h>
#include <stdlib.h>
#include <errno.h>
#include <fcntl.h>
#include <ctype.h>
#include <crm/crm.h>
#include <crm/common/ipc.h>
#include <crm/common/ipc_internal.h>
#include <crm/cluster/internal.h>
#include <crm/common/mainloop.h>
#include <crm/stonith-ng.h>
#include <crm/fencing/internal.h>
#include <crm/common/xml.h>
#include <pacemaker-fenced.h>
GHashTable *device_list = NULL;
GHashTable *topology = NULL;
static GList *cmd_list = NULL;
static GHashTable *fenced_handlers = NULL;
struct device_search_s {
/* target of fence action */
char *host;
/* requested fence action */
char *action;
/* timeout to use if a device is queried dynamically for possible targets */
int per_device_timeout;
/* number of registered fencing devices at time of request */
int replies_needed;
/* number of device replies received so far */
int replies_received;
/* whether the target is eligible to perform requested action (or off) */
bool allow_suicide;
/* private data to pass to search callback function */
void *user_data;
/* function to call when all replies have been received */
void (*callback) (GList * devices, void *user_data);
/* devices capable of performing requested action (or off if remapping) */
GList *capable;
/* Whether to perform searches that support the action */
uint32_t support_action_only;
};
static gboolean stonith_device_dispatch(gpointer user_data);
static void st_child_done(int pid, const pcmk__action_result_t *result,
void *user_data);
static void search_devices_record_result(struct device_search_s *search, const char *device,
gboolean can_fence);
static int get_agent_metadata(const char *agent, xmlNode **metadata);
static void read_action_metadata(stonith_device_t *device);
static enum fenced_target_by unpack_level_kind(const xmlNode *level);
typedef struct async_command_s {
int id;
int pid;
int fd_stdout;
int options;
int default_timeout; /* seconds */
int timeout; /* seconds */
int start_delay; // seconds (-1 means disable static/random fencing delays)
int delay_id;
char *op;
char *origin;
char *client;
char *client_name;
char *remote_op_id;
char *target;
uint32_t target_nodeid;
char *action;
char *device;
GList *device_list;
GList *next_device_iter; // device_list entry for next device to execute
void *internal_user_data;
void (*done_cb) (int pid, const pcmk__action_result_t *result,
void *user_data);
guint timer_sigterm;
guint timer_sigkill;
/*! If the operation timed out, this is the last signal
* we sent to the process to get it to terminate */
int last_timeout_signo;
stonith_device_t *active_on;
stonith_device_t *activating_on;
} async_command_t;
static xmlNode *construct_async_reply(const async_command_t *cmd,
const pcmk__action_result_t *result);
static gboolean
is_action_required(const char *action, const stonith_device_t *device)
{
return (device != NULL) && device->automatic_unfencing
&& pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none);
}
static int
get_action_delay_max(const stonith_device_t *device, const char *action)
{
const char *value = NULL;
guint delay_max = 0U;
if (!pcmk__is_fencing_action(action)) {
return 0;
}
value = g_hash_table_lookup(device->params, PCMK_STONITH_DELAY_MAX);
if (value) {
pcmk_parse_interval_spec(value, &delay_max);
delay_max /= 1000;
}
return (int) delay_max;
}
static int
get_action_delay_base(const stonith_device_t *device, const char *action,
const char *target)
{
char *hash_value = NULL;
guint delay_base = 0U;
if (!pcmk__is_fencing_action(action)) {
return 0;
}
hash_value = g_hash_table_lookup(device->params, PCMK_STONITH_DELAY_BASE);
if (hash_value) {
char *value = pcmk__str_copy(hash_value);
char *valptr = value;
if (target != NULL) {
for (char *val = strtok(value, "; \t"); val != NULL; val = strtok(NULL, "; \t")) {
char *mapval = strchr(val, ':');
if (mapval == NULL || mapval[1] == 0) {
crm_err("pcmk_delay_base: empty value in mapping", val);
continue;
}
if (mapval != val && strncasecmp(target, val, (size_t)(mapval - val)) == 0) {
value = mapval + 1;
crm_debug("pcmk_delay_base mapped to %s for %s",
value, target);
break;
}
}
}
if (strchr(value, ':') == 0) {
pcmk_parse_interval_spec(value, &delay_base);
delay_base /= 1000;
}
free(valptr);
}
return (int) delay_base;
}
/*!
* \internal
* \brief Override STONITH timeout with pcmk_*_timeout if available
*
* \param[in] device STONITH device to use
* \param[in] action STONITH action name
* \param[in] default_timeout Timeout to use if device does not have
* a pcmk_*_timeout parameter for action
*
* \return Value of pcmk_(action)_timeout if available, otherwise default_timeout
* \note For consistency, it would be nice if reboot/off/on timeouts could be
* set the same way as start/stop/monitor timeouts, i.e. with an
* <operation> entry in the fencing resource configuration. However that
* is insufficient because fencing devices may be registered directly via
* the fencer's register_device() API instead of going through the CIB
* (e.g. stonith_admin uses it for its -R option, and the executor uses it
* to ensure a device is registered when a command is issued). As device
* properties, pcmk_*_timeout parameters can be grabbed by the fencer when
* the device is registered, whether by CIB change or API call.
*/
static int
get_action_timeout(const stonith_device_t *device, const char *action,
int default_timeout)
{
if (action && device && device->params) {
char buffer[64] = { 0, };
const char *value = NULL;
/* If "reboot" was requested but the device does not support it,
* we will remap to "off", so check timeout for "off" instead
*/
if (pcmk__str_eq(action, PCMK_ACTION_REBOOT, pcmk__str_none)
&& !pcmk_is_set(device->flags, st_device_supports_reboot)) {
crm_trace("%s doesn't support reboot, using timeout for off instead",
device->id);
action = PCMK_ACTION_OFF;
}
/* If the device config specified an action-specific timeout, use it */
snprintf(buffer, sizeof(buffer), "pcmk_%s_timeout", action);
value = g_hash_table_lookup(device->params, buffer);
if (value) {
long long timeout_ms = crm_get_msec(value);
return (int) QB_MIN(timeout_ms / 1000, INT_MAX);
}
}
return default_timeout;
}
/*!
* \internal
* \brief Get the currently executing device for a fencing operation
*
* \param[in] cmd Fencing operation to check
*
* \return Currently executing device for \p cmd if any, otherwise NULL
*/
static stonith_device_t *
cmd_device(const async_command_t *cmd)
{
if ((cmd == NULL) || (cmd->device == NULL) || (device_list == NULL)) {
return NULL;
}
return g_hash_table_lookup(device_list, cmd->device);
}
/*!
* \internal
* \brief Return the configured reboot action for a given device
*
* \param[in] device_id Device ID
*
* \return Configured reboot action for \p device_id
*/
const char *
fenced_device_reboot_action(const char *device_id)
{
const char *action = NULL;
if ((device_list != NULL) && (device_id != NULL)) {
stonith_device_t *device = g_hash_table_lookup(device_list, device_id);
if ((device != NULL) && (device->params != NULL)) {
action = g_hash_table_lookup(device->params, "pcmk_reboot_action");
}
}
return pcmk__s(action, PCMK_ACTION_REBOOT);
}
/*!
* \internal
* \brief Check whether a given device supports the "on" action
*
* \param[in] device_id Device ID
*
* \return true if \p device_id supports "on", otherwise false
*/
bool
fenced_device_supports_on(const char *device_id)
{
if ((device_list != NULL) && (device_id != NULL)) {
stonith_device_t *device = g_hash_table_lookup(device_list, device_id);
if (device != NULL) {
return pcmk_is_set(device->flags, st_device_supports_on);
}
}
return false;
}
static void
free_async_command(async_command_t * cmd)
{
if (!cmd) {
return;
}
if (cmd->delay_id) {
g_source_remove(cmd->delay_id);
}
cmd_list = g_list_remove(cmd_list, cmd);
g_list_free_full(cmd->device_list, free);
free(cmd->device);
free(cmd->action);
free(cmd->target);
free(cmd->remote_op_id);
free(cmd->client);
free(cmd->client_name);
free(cmd->origin);
free(cmd->op);
free(cmd);
}
/*!
* \internal
* \brief Create a new asynchronous fencing operation from request XML
*
* \param[in] msg Fencing request XML (from IPC or CPG)
*
* \return Newly allocated fencing operation on success, otherwise NULL
*
* \note This asserts on memory errors, so a NULL return indicates an
* unparseable message.
*/
static async_command_t *
create_async_command(xmlNode *msg)
{
xmlNode *op = NULL;
async_command_t *cmd = NULL;
if (msg == NULL) {
return NULL;
}
op = get_xpath_object("//@" PCMK__XE_ST_DEVICE_ACTION, msg, LOG_ERR);
if (op == NULL) {
return NULL;
}
cmd = pcmk__assert_alloc(1, sizeof(async_command_t));
// All messages must include these
cmd->action = crm_element_value_copy(op, PCMK__XA_ST_DEVICE_ACTION);
cmd->op = crm_element_value_copy(msg, PCMK__XA_ST_OP);
cmd->client = crm_element_value_copy(msg, PCMK__XA_ST_CLIENTID);
if ((cmd->action == NULL) || (cmd->op == NULL) || (cmd->client == NULL)) {
free_async_command(cmd);
return NULL;
}
crm_element_value_int(msg, PCMK__XA_ST_CALLID, &(cmd->id));
crm_element_value_int(msg, PCMK__XA_ST_CALLOPT, &(cmd->options));
crm_element_value_int(msg, PCMK__XA_ST_DELAY, &(cmd->start_delay));
crm_element_value_int(msg, PCMK__XA_ST_TIMEOUT, &(cmd->default_timeout));
cmd->timeout = cmd->default_timeout;
cmd->origin = crm_element_value_copy(msg, PCMK__XA_SRC);
cmd->remote_op_id = crm_element_value_copy(msg, PCMK__XA_ST_REMOTE_OP);
cmd->client_name = crm_element_value_copy(msg, PCMK__XA_ST_CLIENTNAME);
cmd->target = crm_element_value_copy(op, PCMK__XA_ST_TARGET);
cmd->device = crm_element_value_copy(op, PCMK__XA_ST_DEVICE_ID);
cmd->done_cb = st_child_done;
// Track in global command list
cmd_list = g_list_append(cmd_list, cmd);
return cmd;
}
static int
get_action_limit(stonith_device_t * device)
{
const char *value = NULL;
int action_limit = 1;
value = g_hash_table_lookup(device->params, PCMK_STONITH_ACTION_LIMIT);
if ((value == NULL)
|| (pcmk__scan_min_int(value, &action_limit, INT_MIN) != pcmk_rc_ok)
|| (action_limit == 0)) {
action_limit = 1;
}
return action_limit;
}
static int
get_active_cmds(stonith_device_t * device)
{
int counter = 0;
GList *gIter = NULL;
GList *gIterNext = NULL;
CRM_CHECK(device != NULL, return 0);
for (gIter = cmd_list; gIter != NULL; gIter = gIterNext) {
async_command_t *cmd = gIter->data;
gIterNext = gIter->next;
if (cmd->active_on == device) {
counter++;
}
}
return counter;
}
static void
fork_cb(int pid, void *user_data)
{
async_command_t *cmd = (async_command_t *) user_data;
stonith_device_t * device =
/* in case of a retry we've done the move from
activating_on to active_on already
*/
cmd->activating_on?cmd->activating_on:cmd->active_on;
CRM_ASSERT(device);
crm_debug("Operation '%s' [%d]%s%s using %s now running with %ds timeout",
cmd->action, pid,
((cmd->target == NULL)? "" : " targeting "),
pcmk__s(cmd->target, ""), device->id, cmd->timeout);
cmd->active_on = device;
cmd->activating_on = NULL;
}
static int
get_agent_metadata_cb(gpointer data) {
stonith_device_t *device = data;
guint period_ms;
switch (get_agent_metadata(device->agent, &device->agent_metadata)) {
case pcmk_rc_ok:
if (device->agent_metadata) {
read_action_metadata(device);
stonith__device_parameter_flags(&(device->flags), device->id,
device->agent_metadata);
}
return G_SOURCE_REMOVE;
case EAGAIN:
period_ms = pcmk__mainloop_timer_get_period(device->timer);
if (period_ms < 160 * 1000) {
mainloop_timer_set_period(device->timer, 2 * period_ms);
}
return G_SOURCE_CONTINUE;
default:
return G_SOURCE_REMOVE;
}
}
/*!
* \internal
* \brief Call a command's action callback for an internal (not library) result
*
* \param[in,out] cmd Command to report result for
* \param[in] execution_status Execution status to use for result
* \param[in] exit_status Exit status to use for result
* \param[in] exit_reason Exit reason to use for result
*/
static void
report_internal_result(async_command_t *cmd, int exit_status,
int execution_status, const char *exit_reason)
{
pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
pcmk__set_result(&result, exit_status, execution_status, exit_reason);
cmd->done_cb(0, &result, cmd);
pcmk__reset_result(&result);
}
static gboolean
stonith_device_execute(stonith_device_t * device)
{
int exec_rc = 0;
const char *action_str = NULL;
const char *host_arg = NULL;
async_command_t *cmd = NULL;
stonith_action_t *action = NULL;
int active_cmds = 0;
int action_limit = 0;
GList *gIter = NULL;
GList *gIterNext = NULL;
CRM_CHECK(device != NULL, return FALSE);
active_cmds = get_active_cmds(device);
action_limit = get_action_limit(device);
if (action_limit > -1 && active_cmds >= action_limit) {
crm_trace("%s is over its action limit of %d (%u active action%s)",
device->id, action_limit, active_cmds,
pcmk__plural_s(active_cmds));
return TRUE;
}
for (gIter = device->pending_ops; gIter != NULL; gIter = gIterNext) {
async_command_t *pending_op = gIter->data;
gIterNext = gIter->next;
if (pending_op && pending_op->delay_id) {
crm_trace("Operation '%s'%s%s using %s was asked to run too early, "
"waiting for start delay of %ds",
pending_op->action,
((pending_op->target == NULL)? "" : " targeting "),
pcmk__s(pending_op->target, ""),
device->id, pending_op->start_delay);
continue;
}
device->pending_ops = g_list_remove_link(device->pending_ops, gIter);
g_list_free_1(gIter);
cmd = pending_op;
break;
}
if (cmd == NULL) {
crm_trace("No actions using %s are needed", device->id);
return TRUE;
}
if (pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT,
STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) {
if (pcmk__is_fencing_action(cmd->action)) {
if (node_does_watchdog_fencing(stonith_our_uname)) {
pcmk__panic(__func__);
goto done;
}
} else {
crm_info("Faking success for %s watchdog operation", cmd->action);
report_internal_result(cmd, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
goto done;
}
}
#if SUPPORT_CIBSECRETS
exec_rc = pcmk__substitute_secrets(device->id, device->params);
if (exec_rc != pcmk_rc_ok) {
if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP, pcmk__str_none)) {
crm_info("Proceeding with stop operation for %s "
"despite being unable to load CIB secrets (%s)",
device->id, pcmk_rc_str(exec_rc));
} else {
crm_err("Considering %s unconfigured "
"because unable to load CIB secrets: %s",
device->id, pcmk_rc_str(exec_rc));
report_internal_result(cmd, CRM_EX_ERROR, PCMK_EXEC_NO_SECRETS,
"Failed to get CIB secrets");
goto done;
}
}
#endif
action_str = cmd->action;
if (pcmk__str_eq(cmd->action, PCMK_ACTION_REBOOT, pcmk__str_none)
&& !pcmk_is_set(device->flags, st_device_supports_reboot)) {
crm_notice("Remapping 'reboot' action%s%s using %s to 'off' "
"because agent '%s' does not support reboot",
((cmd->target == NULL)? "" : " targeting "),
pcmk__s(cmd->target, ""), device->id, device->agent);
action_str = PCMK_ACTION_OFF;
}
if (pcmk_is_set(device->flags, st_device_supports_parameter_port)) {
host_arg = "port";
} else if (pcmk_is_set(device->flags, st_device_supports_parameter_plug)) {
host_arg = "plug";
}
action = stonith__action_create(device->agent, action_str, cmd->target,
cmd->target_nodeid, cmd->timeout,
device->params, device->aliases, host_arg);
/* for async exec, exec_rc is negative for early error exit
otherwise handling of success/errors is done via callbacks */
cmd->activating_on = device;
exec_rc = stonith__execute_async(action, (void *)cmd, cmd->done_cb,
fork_cb);
if (exec_rc < 0) {
cmd->activating_on = NULL;
cmd->done_cb(0, stonith__action_result(action), cmd);
stonith__destroy_action(action);
}
done:
/* Device might get triggered to work by multiple fencing commands
* simultaneously. Trigger the device again to make sure any
* remaining concurrent commands get executed. */
if (device->pending_ops) {
mainloop_set_trigger(device->work);
}
return TRUE;
}
static gboolean
stonith_device_dispatch(gpointer user_data)
{
return stonith_device_execute(user_data);
}
static gboolean
start_delay_helper(gpointer data)
{
async_command_t *cmd = data;
stonith_device_t *device = cmd_device(cmd);
cmd->delay_id = 0;
if (device) {
mainloop_set_trigger(device->work);
}
return FALSE;
}
static void
schedule_stonith_command(async_command_t * cmd, stonith_device_t * device)
{
int delay_max = 0;
int delay_base = 0;
int requested_delay = cmd->start_delay;
CRM_CHECK(cmd != NULL, return);
CRM_CHECK(device != NULL, return);
if (cmd->device) {
free(cmd->device);
}
if (device->include_nodeid && (cmd->target != NULL)) {
crm_node_t *node = pcmk__get_node(0, cmd->target, NULL,
- pcmk__node_search_cluster);
+ pcmk__node_search_cluster_member);
cmd->target_nodeid = node->id;
}
cmd->device = pcmk__str_copy(device->id);
cmd->timeout = get_action_timeout(device, cmd->action, cmd->default_timeout);
if (cmd->remote_op_id) {
crm_debug("Scheduling '%s' action%s%s using %s for remote peer %s "
"with op id %.8s and timeout %ds",
cmd->action,
(cmd->target == NULL)? "" : " targeting ",
pcmk__s(cmd->target, ""),
device->id, cmd->origin, cmd->remote_op_id, cmd->timeout);
} else {
crm_debug("Scheduling '%s' action%s%s using %s for %s with timeout %ds",
cmd->action,
(cmd->target == NULL)? "" : " targeting ",
pcmk__s(cmd->target, ""),
device->id, cmd->client, cmd->timeout);
}
device->pending_ops = g_list_append(device->pending_ops, cmd);
mainloop_set_trigger(device->work);
// Value -1 means disable any static/random fencing delays
if (requested_delay < 0) {
return;
}
delay_max = get_action_delay_max(device, cmd->action);
delay_base = get_action_delay_base(device, cmd->action, cmd->target);
if (delay_max == 0) {
delay_max = delay_base;
}
if (delay_max < delay_base) {
crm_warn(PCMK_STONITH_DELAY_BASE " (%ds) is larger than "
PCMK_STONITH_DELAY_MAX " (%ds) for %s using %s "
"(limiting to maximum delay)",
delay_base, delay_max, cmd->action, device->id);
delay_base = delay_max;
}
if (delay_max > 0) {
// coverity[dontcall] It doesn't matter here if rand() is predictable
cmd->start_delay +=
((delay_max != delay_base)?(rand() % (delay_max - delay_base)):0)
+ delay_base;
}
if (cmd->start_delay > 0) {
crm_notice("Delaying '%s' action%s%s using %s for %ds " CRM_XS
" timeout=%ds requested_delay=%ds base=%ds max=%ds",
cmd->action,
(cmd->target == NULL)? "" : " targeting ",
pcmk__s(cmd->target, ""),
device->id, cmd->start_delay, cmd->timeout,
requested_delay, delay_base, delay_max);
cmd->delay_id =
g_timeout_add_seconds(cmd->start_delay, start_delay_helper, cmd);
}
}
static void
free_device(gpointer data)
{
GList *gIter = NULL;
stonith_device_t *device = data;
g_hash_table_destroy(device->params);
g_hash_table_destroy(device->aliases);
for (gIter = device->pending_ops; gIter != NULL; gIter = gIter->next) {
async_command_t *cmd = gIter->data;
crm_warn("Removal of device '%s' purged operation '%s'", device->id, cmd->action);
report_internal_result(cmd, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
"Device was removed before action could be executed");
}
g_list_free(device->pending_ops);
g_list_free_full(device->targets, free);
if (device->timer) {
mainloop_timer_stop(device->timer);
mainloop_timer_del(device->timer);
}
mainloop_destroy_trigger(device->work);
free_xml(device->agent_metadata);
free(device->namespace);
if (device->on_target_actions != NULL) {
g_string_free(device->on_target_actions, TRUE);
}
free(device->agent);
free(device->id);
free(device);
}
void free_device_list(void)
{
if (device_list != NULL) {
g_hash_table_destroy(device_list);
device_list = NULL;
}
}
void
init_device_list(void)
{
if (device_list == NULL) {
device_list = pcmk__strkey_table(NULL, free_device);
}
}
static GHashTable *
build_port_aliases(const char *hostmap, GList ** targets)
{
char *name = NULL;
int last = 0, lpc = 0, max = 0, added = 0;
GHashTable *aliases = pcmk__strikey_table(free, free);
if (hostmap == NULL) {
return aliases;
}
max = strlen(hostmap);
for (; lpc <= max; lpc++) {
switch (hostmap[lpc]) {
/* Skip escaped chars */
case '\\':
lpc++;
break;
/* Assignment chars */
case '=':
case ':':
if (lpc > last) {
free(name);
name = pcmk__assert_alloc(1, 1 + lpc - last);
memcpy(name, hostmap + last, lpc - last);
}
last = lpc + 1;
break;
/* Delimeter chars */
/* case ',': Potentially used to specify multiple ports */
case 0:
case ';':
case ' ':
case '\t':
if (name) {
char *value = NULL;
int k = 0;
value = pcmk__assert_alloc(1, 1 + lpc - last);
memcpy(value, hostmap + last, lpc - last);
for (int i = 0; value[i] != '\0'; i++) {
if (value[i] != '\\') {
value[k++] = value[i];
}
}
value[k] = '\0';
crm_debug("Adding alias '%s'='%s'", name, value);
g_hash_table_replace(aliases, name, value);
if (targets) {
*targets = g_list_append(*targets, pcmk__str_copy(value));
}
value = NULL;
name = NULL;
added++;
} else if (lpc > last) {
crm_debug("Parse error at offset %d near '%s'", lpc - last, hostmap + last);
}
last = lpc + 1;
break;
}
if (hostmap[lpc] == 0) {
break;
}
}
if (added == 0) {
crm_info("No host mappings detected in '%s'", hostmap);
}
free(name);
return aliases;
}
GHashTable *metadata_cache = NULL;
void
free_metadata_cache(void) {
if (metadata_cache != NULL) {
g_hash_table_destroy(metadata_cache);
metadata_cache = NULL;
}
}
static void
init_metadata_cache(void) {
if (metadata_cache == NULL) {
metadata_cache = pcmk__strkey_table(free, free);
}
}
int
get_agent_metadata(const char *agent, xmlNode ** metadata)
{
char *buffer = NULL;
if (metadata == NULL) {
return EINVAL;
}
*metadata = NULL;
if (pcmk__str_eq(agent, STONITH_WATCHDOG_AGENT_INTERNAL, pcmk__str_none)) {
return pcmk_rc_ok;
}
init_metadata_cache();
buffer = g_hash_table_lookup(metadata_cache, agent);
if (buffer == NULL) {
stonith_t *st = stonith_api_new();
int rc;
if (st == NULL) {
crm_warn("Could not get agent meta-data: "
"API memory allocation failed");
return EAGAIN;
}
rc = st->cmds->metadata(st, st_opt_sync_call, agent,
NULL, &buffer, 10);
stonith_api_delete(st);
if (rc || !buffer) {
crm_err("Could not retrieve metadata for fencing agent %s", agent);
return EAGAIN;
}
g_hash_table_replace(metadata_cache, pcmk__str_copy(agent), buffer);
}
*metadata = pcmk__xml_parse(buffer);
return pcmk_rc_ok;
}
static gboolean
is_nodeid_required(xmlNode * xml)
{
xmlXPathObjectPtr xpath = NULL;
if (stand_alone) {
return FALSE;
}
if (!xml) {
return FALSE;
}
xpath = xpath_search(xml,
"//" PCMK_XE_PARAMETER "[@" PCMK_XA_NAME "='nodeid']");
if (numXpathResults(xpath) <= 0) {
freeXpathObject(xpath);
return FALSE;
}
freeXpathObject(xpath);
return TRUE;
}
static void
read_action_metadata(stonith_device_t *device)
{
xmlXPathObjectPtr xpath = NULL;
int max = 0;
int lpc = 0;
if (device->agent_metadata == NULL) {
return;
}
xpath = xpath_search(device->agent_metadata, "//action");
max = numXpathResults(xpath);
if (max <= 0) {
freeXpathObject(xpath);
return;
}
for (lpc = 0; lpc < max; lpc++) {
const char *action = NULL;
xmlNode *match = getXpathResult(xpath, lpc);
CRM_LOG_ASSERT(match != NULL);
if(match == NULL) { continue; };
action = crm_element_value(match, PCMK_XA_NAME);
if (pcmk__str_eq(action, PCMK_ACTION_LIST, pcmk__str_none)) {
stonith__set_device_flags(device->flags, device->id,
st_device_supports_list);
} else if (pcmk__str_eq(action, PCMK_ACTION_STATUS, pcmk__str_none)) {
stonith__set_device_flags(device->flags, device->id,
st_device_supports_status);
} else if (pcmk__str_eq(action, PCMK_ACTION_REBOOT, pcmk__str_none)) {
stonith__set_device_flags(device->flags, device->id,
st_device_supports_reboot);
} else if (pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none)) {
/* PCMK_XA_AUTOMATIC means the cluster will unfence a node when it
* joins.
*
* @COMPAT PCMK__XA_REQUIRED is a deprecated synonym for
* PCMK_XA_AUTOMATIC.
*/
if (pcmk__xe_attr_is_true(match, PCMK_XA_AUTOMATIC)
|| pcmk__xe_attr_is_true(match, PCMK__XA_REQUIRED)) {
device->automatic_unfencing = TRUE;
}
stonith__set_device_flags(device->flags, device->id,
st_device_supports_on);
}
if ((action != NULL)
&& pcmk__xe_attr_is_true(match, PCMK_XA_ON_TARGET)) {
pcmk__add_word(&(device->on_target_actions), 64, action);
}
}
freeXpathObject(xpath);
}
/*!
* \internal
* \brief Set a pcmk_*_action parameter if not already set
*
* \param[in,out] params Device parameters
* \param[in] action Name of action
* \param[in] value Value to use if action is not already set
*/
static void
map_action(GHashTable *params, const char *action, const char *value)
{
char *key = crm_strdup_printf("pcmk_%s_action", action);
if (g_hash_table_lookup(params, key)) {
crm_warn("Ignoring %s='%s', see %s instead",
STONITH_ATTR_ACTION_OP, value, key);
free(key);
} else {
crm_warn("Mapping %s='%s' to %s='%s'",
STONITH_ATTR_ACTION_OP, value, key, value);
g_hash_table_insert(params, key, pcmk__str_copy(value));
}
}
/*!
* \internal
* \brief Create device parameter table from XML
*
* \param[in] name Device name (used for logging only)
* \param[in] dev XML containing device parameters
*/
static GHashTable *
xml2device_params(const char *name, const xmlNode *dev)
{
GHashTable *params = xml2list(dev);
const char *value;
/* Action should never be specified in the device configuration,
* but we support it for users who are familiar with other software
* that worked that way.
*/
value = g_hash_table_lookup(params, STONITH_ATTR_ACTION_OP);
if (value != NULL) {
crm_warn("%s has '%s' parameter, which should never be specified in configuration",
name, STONITH_ATTR_ACTION_OP);
if (*value == '\0') {
crm_warn("Ignoring empty '%s' parameter", STONITH_ATTR_ACTION_OP);
} else if (strcmp(value, PCMK_ACTION_REBOOT) == 0) {
crm_warn("Ignoring %s='reboot' (see " PCMK_OPT_STONITH_ACTION
" cluster property instead)",
STONITH_ATTR_ACTION_OP);
} else if (strcmp(value, PCMK_ACTION_OFF) == 0) {
map_action(params, PCMK_ACTION_REBOOT, value);
} else {
map_action(params, PCMK_ACTION_OFF, value);
map_action(params, PCMK_ACTION_REBOOT, value);
}
g_hash_table_remove(params, STONITH_ATTR_ACTION_OP);
}
return params;
}
static const char *
target_list_type(stonith_device_t * dev)
{
const char *check_type = NULL;
check_type = g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_CHECK);
if (check_type == NULL) {
if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_LIST)) {
check_type = PCMK_VALUE_STATIC_LIST;
} else if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_MAP)) {
check_type = PCMK_VALUE_STATIC_LIST;
} else if (pcmk_is_set(dev->flags, st_device_supports_list)) {
check_type = PCMK_VALUE_DYNAMIC_LIST;
} else if (pcmk_is_set(dev->flags, st_device_supports_status)) {
check_type = PCMK_VALUE_STATUS;
} else {
check_type = PCMK_VALUE_NONE;
}
}
return check_type;
}
static stonith_device_t *
build_device_from_xml(xmlNode *dev)
{
const char *value;
stonith_device_t *device = NULL;
char *agent = crm_element_value_copy(dev, PCMK_XA_AGENT);
CRM_CHECK(agent != NULL, return device);
device = pcmk__assert_alloc(1, sizeof(stonith_device_t));
device->id = crm_element_value_copy(dev, PCMK_XA_ID);
device->agent = agent;
device->namespace = crm_element_value_copy(dev, PCMK__XA_NAMESPACE);
device->params = xml2device_params(device->id, dev);
value = g_hash_table_lookup(device->params, PCMK_STONITH_HOST_LIST);
if (value) {
device->targets = stonith__parse_targets(value);
}
value = g_hash_table_lookup(device->params, PCMK_STONITH_HOST_MAP);
device->aliases = build_port_aliases(value, &(device->targets));
value = target_list_type(device);
if (!pcmk__str_eq(value, PCMK_VALUE_STATIC_LIST, pcmk__str_casei)
&& (device->targets != NULL)) {
// device->targets is necessary only with PCMK_VALUE_STATIC_LIST
g_list_free_full(device->targets, free);
device->targets = NULL;
}
switch (get_agent_metadata(device->agent, &device->agent_metadata)) {
case pcmk_rc_ok:
if (device->agent_metadata) {
read_action_metadata(device);
stonith__device_parameter_flags(&(device->flags), device->id,
device->agent_metadata);
}
break;
case EAGAIN:
if (device->timer == NULL) {
device->timer = mainloop_timer_add("get_agent_metadata", 10 * 1000,
TRUE, get_agent_metadata_cb, device);
}
if (!mainloop_timer_running(device->timer)) {
mainloop_timer_start(device->timer);
}
break;
default:
break;
}
value = g_hash_table_lookup(device->params, "nodeid");
if (!value) {
device->include_nodeid = is_nodeid_required(device->agent_metadata);
}
value = crm_element_value(dev, PCMK__XA_RSC_PROVIDES);
if (pcmk__str_eq(value, PCMK_VALUE_UNFENCING, pcmk__str_casei)) {
device->automatic_unfencing = TRUE;
}
if (is_action_required(PCMK_ACTION_ON, device)) {
crm_info("Fencing device '%s' requires unfencing", device->id);
}
if (device->on_target_actions != NULL) {
crm_info("Fencing device '%s' requires actions (%s) to be executed "
"on target", device->id,
(const char *) device->on_target_actions->str);
}
device->work = mainloop_add_trigger(G_PRIORITY_HIGH, stonith_device_dispatch, device);
/* TODO: Hook up priority */
return device;
}
static void
schedule_internal_command(const char *origin,
stonith_device_t * device,
const char *action,
const char *target,
int timeout,
void *internal_user_data,
void (*done_cb) (int pid,
const pcmk__action_result_t *result,
void *user_data))
{
async_command_t *cmd = NULL;
cmd = pcmk__assert_alloc(1, sizeof(async_command_t));
cmd->id = -1;
cmd->default_timeout = timeout ? timeout : 60;
cmd->timeout = cmd->default_timeout;
cmd->action = pcmk__str_copy(action);
cmd->target = pcmk__str_copy(target);
cmd->device = pcmk__str_copy(device->id);
cmd->origin = pcmk__str_copy(origin);
cmd->client = pcmk__str_copy(crm_system_name);
cmd->client_name = pcmk__str_copy(crm_system_name);
cmd->internal_user_data = internal_user_data;
cmd->done_cb = done_cb; /* cmd, not internal_user_data, is passed to 'done_cb' as the userdata */
schedule_stonith_command(cmd, device);
}
// Fence agent status commands use custom exit status codes
enum fence_status_code {
fence_status_invalid = -1,
fence_status_active = 0,
fence_status_unknown = 1,
fence_status_inactive = 2,
};
static void
status_search_cb(int pid, const pcmk__action_result_t *result, void *user_data)
{
async_command_t *cmd = user_data;
struct device_search_s *search = cmd->internal_user_data;
stonith_device_t *dev = cmd_device(cmd);
gboolean can = FALSE;
free_async_command(cmd);
if (!dev) {
search_devices_record_result(search, NULL, FALSE);
return;
}
mainloop_set_trigger(dev->work);
if (result->execution_status != PCMK_EXEC_DONE) {
crm_warn("Assuming %s cannot fence %s "
"because status could not be executed: %s%s%s%s",
dev->id, search->host,
pcmk_exec_status_str(result->execution_status),
((result->exit_reason == NULL)? "" : " ("),
((result->exit_reason == NULL)? "" : result->exit_reason),
((result->exit_reason == NULL)? "" : ")"));
search_devices_record_result(search, dev->id, FALSE);
return;
}
switch (result->exit_status) {
case fence_status_unknown:
crm_trace("%s reported it cannot fence %s", dev->id, search->host);
break;
case fence_status_active:
case fence_status_inactive:
crm_trace("%s reported it can fence %s", dev->id, search->host);
can = TRUE;
break;
default:
crm_warn("Assuming %s cannot fence %s "
"(status returned unknown code %d)",
dev->id, search->host, result->exit_status);
break;
}
search_devices_record_result(search, dev->id, can);
}
static void
dynamic_list_search_cb(int pid, const pcmk__action_result_t *result,
void *user_data)
{
async_command_t *cmd = user_data;
struct device_search_s *search = cmd->internal_user_data;
stonith_device_t *dev = cmd_device(cmd);
gboolean can_fence = FALSE;
free_async_command(cmd);
/* Host/alias must be in the list output to be eligible to be fenced
*
* Will cause problems if down'd nodes aren't listed or (for virtual nodes)
* if the guest is still listed despite being moved to another machine
*/
if (!dev) {
search_devices_record_result(search, NULL, FALSE);
return;
}
mainloop_set_trigger(dev->work);
if (pcmk__result_ok(result)) {
crm_info("Refreshing target list for %s", dev->id);
g_list_free_full(dev->targets, free);
dev->targets = stonith__parse_targets(result->action_stdout);
dev->targets_age = time(NULL);
} else if (dev->targets != NULL) {
if (result->execution_status == PCMK_EXEC_DONE) {
crm_info("Reusing most recent target list for %s "
"because list returned error code %d",
dev->id, result->exit_status);
} else {
crm_info("Reusing most recent target list for %s "
"because list could not be executed: %s%s%s%s",
dev->id, pcmk_exec_status_str(result->execution_status),
((result->exit_reason == NULL)? "" : " ("),
((result->exit_reason == NULL)? "" : result->exit_reason),
((result->exit_reason == NULL)? "" : ")"));
}
} else { // We have never successfully executed list
if (result->execution_status == PCMK_EXEC_DONE) {
crm_warn("Assuming %s cannot fence %s "
"because list returned error code %d",
dev->id, search->host, result->exit_status);
} else {
crm_warn("Assuming %s cannot fence %s "
"because list could not be executed: %s%s%s%s",
dev->id, search->host,
pcmk_exec_status_str(result->execution_status),
((result->exit_reason == NULL)? "" : " ("),
((result->exit_reason == NULL)? "" : result->exit_reason),
((result->exit_reason == NULL)? "" : ")"));
}
/* Fall back to pcmk_host_check=PCMK_VALUE_STATUS if the user didn't
* explicitly specify PCMK_VALUE_DYNAMIC_LIST
*/
if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_CHECK) == NULL) {
crm_notice("Switching to pcmk_host_check='status' for %s", dev->id);
pcmk__insert_dup(dev->params, PCMK_STONITH_HOST_CHECK,
PCMK_VALUE_STATUS);
}
}
if (dev->targets) {
const char *alias = g_hash_table_lookup(dev->aliases, search->host);
if (!alias) {
alias = search->host;
}
if (pcmk__str_in_list(alias, dev->targets, pcmk__str_casei)) {
can_fence = TRUE;
}
}
search_devices_record_result(search, dev->id, can_fence);
}
/*!
* \internal
* \brief Returns true if any key in first is not in second or second has a different value for key
*/
static int
device_params_diff(GHashTable *first, GHashTable *second) {
char *key = NULL;
char *value = NULL;
GHashTableIter gIter;
g_hash_table_iter_init(&gIter, first);
while (g_hash_table_iter_next(&gIter, (void **)&key, (void **)&value)) {
if(strstr(key, "CRM_meta") == key) {
continue;
} else if (strcmp(key, PCMK_XA_CRM_FEATURE_SET) == 0) {
continue;
} else {
char *other_value = g_hash_table_lookup(second, key);
if (!other_value || !pcmk__str_eq(other_value, value, pcmk__str_casei)) {
crm_trace("Different value for %s: %s != %s", key, other_value, value);
return 1;
}
}
}
return 0;
}
/*!
* \internal
* \brief Checks to see if an identical device already exists in the device_list
*/
static stonith_device_t *
device_has_duplicate(const stonith_device_t *device)
{
stonith_device_t *dup = g_hash_table_lookup(device_list, device->id);
if (!dup) {
crm_trace("No match for %s", device->id);
return NULL;
} else if (!pcmk__str_eq(dup->agent, device->agent, pcmk__str_casei)) {
crm_trace("Different agent: %s != %s", dup->agent, device->agent);
return NULL;
}
/* Use calculate_operation_digest() here? */
if (device_params_diff(device->params, dup->params) ||
device_params_diff(dup->params, device->params)) {
return NULL;
}
crm_trace("Match");
return dup;
}
int
stonith_device_register(xmlNode *dev, gboolean from_cib)
{
stonith_device_t *dup = NULL;
stonith_device_t *device = build_device_from_xml(dev);
guint ndevices = 0;
int rv = pcmk_ok;
CRM_CHECK(device != NULL, return -ENOMEM);
/* do we have a watchdog-device? */
if (pcmk__str_eq(device->id, STONITH_WATCHDOG_ID, pcmk__str_none) ||
pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT,
STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) do {
if (stonith_watchdog_timeout_ms <= 0) {
crm_err("Ignoring watchdog fence device without "
PCMK_OPT_STONITH_WATCHDOG_TIMEOUT " set.");
rv = -ENODEV;
/* fall through to cleanup & return */
} else if (!pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT,
STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) {
crm_err("Ignoring watchdog fence device with unknown "
"agent '%s' unequal '" STONITH_WATCHDOG_AGENT "'.",
device->agent?device->agent:"");
rv = -ENODEV;
/* fall through to cleanup & return */
} else if (!pcmk__str_eq(device->id, STONITH_WATCHDOG_ID,
pcmk__str_none)) {
crm_err("Ignoring watchdog fence device "
"named %s !='"STONITH_WATCHDOG_ID"'.",
device->id?device->id:"");
rv = -ENODEV;
/* fall through to cleanup & return */
} else {
if (pcmk__str_eq(device->agent, STONITH_WATCHDOG_AGENT,
pcmk__str_none)) {
/* this either has an empty list or the targets
configured for watchdog-fencing
*/
g_list_free_full(stonith_watchdog_targets, free);
stonith_watchdog_targets = device->targets;
device->targets = NULL;
}
if (node_does_watchdog_fencing(stonith_our_uname)) {
g_list_free_full(device->targets, free);
device->targets = stonith__parse_targets(stonith_our_uname);
pcmk__insert_dup(device->params,
PCMK_STONITH_HOST_LIST, stonith_our_uname);
/* proceed as with any other stonith-device */
break;
}
crm_debug("Skip registration of watchdog fence device on node not in host-list.");
/* cleanup and fall through to more cleanup and return */
device->targets = NULL;
stonith_device_remove(device->id, from_cib);
}
free_device(device);
return rv;
} while (0);
dup = device_has_duplicate(device);
if (dup) {
ndevices = g_hash_table_size(device_list);
crm_debug("Device '%s' already in device list (%d active device%s)",
device->id, ndevices, pcmk__plural_s(ndevices));
free_device(device);
device = dup;
dup = g_hash_table_lookup(device_list, device->id);
dup->dirty = FALSE;
} else {
stonith_device_t *old = g_hash_table_lookup(device_list, device->id);
if (from_cib && old && old->api_registered) {
/* If the cib is writing over an entry that is shared with a stonith client,
* copy any pending ops that currently exist on the old entry to the new one.
* Otherwise the pending ops will be reported as failures
*/
crm_info("Overwriting existing entry for %s from CIB", device->id);
device->pending_ops = old->pending_ops;
device->api_registered = TRUE;
old->pending_ops = NULL;
if (device->pending_ops) {
mainloop_set_trigger(device->work);
}
}
g_hash_table_replace(device_list, device->id, device);
ndevices = g_hash_table_size(device_list);
crm_notice("Added '%s' to device list (%d active device%s)",
device->id, ndevices, pcmk__plural_s(ndevices));
}
if (from_cib) {
device->cib_registered = TRUE;
} else {
device->api_registered = TRUE;
}
return pcmk_ok;
}
void
stonith_device_remove(const char *id, bool from_cib)
{
stonith_device_t *device = g_hash_table_lookup(device_list, id);
guint ndevices = 0;
if (!device) {
ndevices = g_hash_table_size(device_list);
crm_info("Device '%s' not found (%d active device%s)",
id, ndevices, pcmk__plural_s(ndevices));
return;
}
if (from_cib) {
device->cib_registered = FALSE;
} else {
device->verified = FALSE;
device->api_registered = FALSE;
}
if (!device->cib_registered && !device->api_registered) {
g_hash_table_remove(device_list, id);
ndevices = g_hash_table_size(device_list);
crm_info("Removed '%s' from device list (%d active device%s)",
id, ndevices, pcmk__plural_s(ndevices));
} else {
crm_trace("Not removing '%s' from device list (%d active) because "
"still registered via:%s%s",
id, g_hash_table_size(device_list),
(device->cib_registered? " cib" : ""),
(device->api_registered? " api" : ""));
}
}
/*!
* \internal
* \brief Return the number of stonith levels registered for a node
*
* \param[in] tp Node's topology table entry
*
* \return Number of non-NULL levels in topology entry
* \note This function is used only for log messages.
*/
static int
count_active_levels(const stonith_topology_t *tp)
{
int lpc = 0;
int count = 0;
for (lpc = 0; lpc < ST_LEVEL_MAX; lpc++) {
if (tp->levels[lpc] != NULL) {
count++;
}
}
return count;
}
static void
free_topology_entry(gpointer data)
{
stonith_topology_t *tp = data;
int lpc = 0;
for (lpc = 0; lpc < ST_LEVEL_MAX; lpc++) {
if (tp->levels[lpc] != NULL) {
g_list_free_full(tp->levels[lpc], free);
}
}
free(tp->target);
free(tp->target_value);
free(tp->target_pattern);
free(tp->target_attribute);
free(tp);
}
void
free_topology_list(void)
{
if (topology != NULL) {
g_hash_table_destroy(topology);
topology = NULL;
}
}
void
init_topology_list(void)
{
if (topology == NULL) {
topology = pcmk__strkey_table(NULL, free_topology_entry);
}
}
char *
stonith_level_key(const xmlNode *level, enum fenced_target_by mode)
{
if (mode == fenced_target_by_unknown) {
mode = unpack_level_kind(level);
}
switch (mode) {
case fenced_target_by_name:
return crm_element_value_copy(level, PCMK_XA_TARGET);
case fenced_target_by_pattern:
return crm_element_value_copy(level, PCMK_XA_TARGET_PATTERN);
case fenced_target_by_attribute:
return crm_strdup_printf("%s=%s",
crm_element_value(level, PCMK_XA_TARGET_ATTRIBUTE),
crm_element_value(level, PCMK_XA_TARGET_VALUE));
default:
return crm_strdup_printf("unknown-%s", pcmk__xe_id(level));
}
}
/*!
* \internal
* \brief Parse target identification from topology level XML
*
* \param[in] level Topology level XML to parse
*
* \return How to identify target of \p level
*/
static enum fenced_target_by
unpack_level_kind(const xmlNode *level)
{
if (crm_element_value(level, PCMK_XA_TARGET) != NULL) {
return fenced_target_by_name;
}
if (crm_element_value(level, PCMK_XA_TARGET_PATTERN) != NULL) {
return fenced_target_by_pattern;
}
if (!stand_alone /* if standalone, there's no attribute manager */
&& (crm_element_value(level, PCMK_XA_TARGET_ATTRIBUTE) != NULL)
&& (crm_element_value(level, PCMK_XA_TARGET_VALUE) != NULL)) {
return fenced_target_by_attribute;
}
return fenced_target_by_unknown;
}
static stonith_key_value_t *
parse_device_list(const char *devices)
{
int lpc = 0;
int max = 0;
int last = 0;
stonith_key_value_t *output = NULL;
if (devices == NULL) {
return output;
}
max = strlen(devices);
for (lpc = 0; lpc <= max; lpc++) {
if (devices[lpc] == ',' || devices[lpc] == 0) {
char *line = strndup(devices + last, lpc - last);
output = stonith_key_value_add(output, NULL, line);
free(line);
last = lpc + 1;
}
}
return output;
}
/*!
* \internal
* \brief Unpack essential information from topology request XML
*
* \param[in] xml Request XML to search
* \param[out] mode If not NULL, where to store level kind
* \param[out] target If not NULL, where to store representation of target
* \param[out] id If not NULL, where to store level number
* \param[out] desc If not NULL, where to store log-friendly level description
*
* \return Topology level XML from within \p xml, or NULL if not found
* \note The caller is responsible for freeing \p *target and \p *desc if set.
*/
static xmlNode *
unpack_level_request(xmlNode *xml, enum fenced_target_by *mode, char **target,
int *id, char **desc)
{
enum fenced_target_by local_mode = fenced_target_by_unknown;
char *local_target = NULL;
int local_id = 0;
/* The level element can be the top element or lower. If top level, don't
* search by xpath, because it might give multiple hits if the XML is the
* CIB.
*/
if ((xml != NULL) && !pcmk__xe_is(xml, PCMK_XE_FENCING_LEVEL)) {
xml = get_xpath_object("//" PCMK_XE_FENCING_LEVEL, xml, LOG_WARNING);
}
if (xml == NULL) {
if (desc != NULL) {
*desc = crm_strdup_printf("missing");
}
} else {
local_mode = unpack_level_kind(xml);
local_target = stonith_level_key(xml, local_mode);
crm_element_value_int(xml, PCMK_XA_INDEX, &local_id);
if (desc != NULL) {
*desc = crm_strdup_printf("%s[%d]", local_target, local_id);
}
}
if (mode != NULL) {
*mode = local_mode;
}
if (id != NULL) {
*id = local_id;
}
if (target != NULL) {
*target = local_target;
} else {
free(local_target);
}
return xml;
}
/*!
* \internal
* \brief Register a fencing topology level for a target
*
* Given an XML request specifying the target name, level index, and device IDs
* for the level, this will create an entry for the target in the global topology
* table if one does not already exist, then append the specified device IDs to
* the entry's device list for the specified level.
*
* \param[in] msg XML request for STONITH level registration
* \param[out] desc If not NULL, set to string representation "TARGET[LEVEL]"
* \param[out] result Where to set result of registration
*/
void
fenced_register_level(xmlNode *msg, char **desc, pcmk__action_result_t *result)
{
int id = 0;
xmlNode *level;
enum fenced_target_by mode;
char *target;
stonith_topology_t *tp;
stonith_key_value_t *dIter = NULL;
stonith_key_value_t *devices = NULL;
CRM_CHECK((msg != NULL) && (result != NULL), return);
level = unpack_level_request(msg, &mode, &target, &id, desc);
if (level == NULL) {
fenced_set_protocol_error(result);
return;
}
// Ensure an ID was given (even the client API adds an ID)
if (pcmk__str_empty(pcmk__xe_id(level))) {
crm_warn("Ignoring registration for topology level without ID");
free(target);
crm_log_xml_trace(level, "Bad level");
pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID,
"Topology level is invalid without ID");
return;
}
// Ensure a valid target was specified
if (mode == fenced_target_by_unknown) {
crm_warn("Ignoring registration for topology level '%s' "
"without valid target", pcmk__xe_id(level));
free(target);
crm_log_xml_trace(level, "Bad level");
pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID,
"Invalid target for topology level '%s'",
pcmk__xe_id(level));
return;
}
// Ensure level ID is in allowed range
if ((id <= 0) || (id >= ST_LEVEL_MAX)) {
crm_warn("Ignoring topology registration for %s with invalid level %d",
target, id);
free(target);
crm_log_xml_trace(level, "Bad level");
pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID,
"Invalid level number '%s' for topology level '%s'",
pcmk__s(crm_element_value(level, PCMK_XA_INDEX),
""),
pcmk__xe_id(level));
return;
}
/* Find or create topology table entry */
tp = g_hash_table_lookup(topology, target);
if (tp == NULL) {
tp = pcmk__assert_alloc(1, sizeof(stonith_topology_t));
tp->kind = mode;
tp->target = target;
tp->target_value = crm_element_value_copy(level, PCMK_XA_TARGET_VALUE);
tp->target_pattern = crm_element_value_copy(level,
PCMK_XA_TARGET_PATTERN);
tp->target_attribute = crm_element_value_copy(level,
PCMK_XA_TARGET_ATTRIBUTE);
g_hash_table_replace(topology, tp->target, tp);
crm_trace("Added %s (%d) to the topology (%d active entries)",
target, (int) mode, g_hash_table_size(topology));
} else {
free(target);
}
if (tp->levels[id] != NULL) {
crm_info("Adding to the existing %s[%d] topology entry",
tp->target, id);
}
devices = parse_device_list(crm_element_value(level, PCMK_XA_DEVICES));
for (dIter = devices; dIter; dIter = dIter->next) {
const char *device = dIter->value;
crm_trace("Adding device '%s' for %s[%d]", device, tp->target, id);
tp->levels[id] = g_list_append(tp->levels[id], pcmk__str_copy(device));
}
stonith_key_value_freeall(devices, 1, 1);
{
int nlevels = count_active_levels(tp);
crm_info("Target %s has %d active fencing level%s",
tp->target, nlevels, pcmk__plural_s(nlevels));
}
pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
}
/*!
* \internal
* \brief Unregister a fencing topology level for a target
*
* Given an XML request specifying the target name and level index (or 0 for all
* levels), this will remove any corresponding entry for the target from the
* global topology table.
*
* \param[in] msg XML request for STONITH level registration
* \param[out] desc If not NULL, set to string representation "TARGET[LEVEL]"
* \param[out] result Where to set result of unregistration
*/
void
fenced_unregister_level(xmlNode *msg, char **desc,
pcmk__action_result_t *result)
{
int id = -1;
stonith_topology_t *tp;
char *target;
xmlNode *level = NULL;
CRM_CHECK(result != NULL, return);
level = unpack_level_request(msg, NULL, &target, &id, desc);
if (level == NULL) {
fenced_set_protocol_error(result);
return;
}
// Ensure level ID is in allowed range
if ((id < 0) || (id >= ST_LEVEL_MAX)) {
crm_warn("Ignoring topology unregistration for %s with invalid level %d",
target, id);
free(target);
crm_log_xml_trace(level, "Bad level");
pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID,
"Invalid level number '%s' for topology level %s",
pcmk__s(crm_element_value(level, PCMK_XA_INDEX),
"<null>"),
// Client API doesn't add ID to unregistration XML
pcmk__s(pcmk__xe_id(level), ""));
return;
}
tp = g_hash_table_lookup(topology, target);
if (tp == NULL) {
guint nentries = g_hash_table_size(topology);
crm_info("No fencing topology found for %s (%d active %s)",
target, nentries,
pcmk__plural_alt(nentries, "entry", "entries"));
} else if (id == 0 && g_hash_table_remove(topology, target)) {
guint nentries = g_hash_table_size(topology);
crm_info("Removed all fencing topology entries related to %s "
"(%d active %s remaining)", target, nentries,
pcmk__plural_alt(nentries, "entry", "entries"));
} else if (tp->levels[id] != NULL) {
guint nlevels;
g_list_free_full(tp->levels[id], free);
tp->levels[id] = NULL;
nlevels = count_active_levels(tp);
crm_info("Removed level %d from fencing topology for %s "
"(%d active level%s remaining)",
id, target, nlevels, pcmk__plural_s(nlevels));
}
free(target);
pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
}
static char *
list_to_string(GList *list, const char *delim, gboolean terminate_with_delim)
{
int max = g_list_length(list);
size_t delim_len = delim?strlen(delim):0;
size_t alloc_size = 1 + (max?((max-1+(terminate_with_delim?1:0))*delim_len):0);
char *rv;
GList *gIter;
char *pos = NULL;
const char *lead_delim = "";
for (gIter = list; gIter != NULL; gIter = gIter->next) {
const char *value = (const char *) gIter->data;
alloc_size += strlen(value);
}
rv = pcmk__assert_alloc(alloc_size, sizeof(char));
pos = rv;
for (gIter = list; gIter != NULL; gIter = gIter->next) {
const char *value = (const char *) gIter->data;
pos = &pos[sprintf(pos, "%s%s", lead_delim, value)];
lead_delim = delim;
}
if (max && terminate_with_delim) {
sprintf(pos, "%s", delim);
}
return rv;
}
/*!
* \internal
* \brief Execute a fence agent action directly (and asynchronously)
*
* Handle a STONITH_OP_EXEC API message by scheduling a requested agent action
* directly on a specified device. Only list, monitor, and status actions are
* expected to use this call, though it should work with any agent command.
*
* \param[in] msg Request XML specifying action
* \param[out] result Where to store result of action
*
* \note If the action is monitor, the device must be registered via the API
* (CIB registration is not sufficient), because monitor should not be
* possible unless the device is "started" (API registered).
*/
static void
execute_agent_action(xmlNode *msg, pcmk__action_result_t *result)
{
xmlNode *dev = get_xpath_object("//" PCMK__XE_ST_DEVICE_ID, msg, LOG_ERR);
xmlNode *op = get_xpath_object("//@" PCMK__XE_ST_DEVICE_ACTION, msg,
LOG_ERR);
const char *id = crm_element_value(dev, PCMK__XA_ST_DEVICE_ID);
const char *action = crm_element_value(op, PCMK__XA_ST_DEVICE_ACTION);
async_command_t *cmd = NULL;
stonith_device_t *device = NULL;
if ((id == NULL) || (action == NULL)) {
crm_info("Malformed API action request: device %s, action %s",
(id? id : "not specified"),
(action? action : "not specified"));
fenced_set_protocol_error(result);
return;
}
if (pcmk__str_eq(id, STONITH_WATCHDOG_ID, pcmk__str_none)) {
// Watchdog agent actions are implemented internally
if (stonith_watchdog_timeout_ms <= 0) {
pcmk__set_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
"Watchdog fence device not configured");
return;
} else if (pcmk__str_eq(action, PCMK_ACTION_LIST, pcmk__str_none)) {
pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
pcmk__set_result_output(result,
list_to_string(stonith_watchdog_targets,
"\n", TRUE),
NULL);
return;
} else if (pcmk__str_eq(action, PCMK_ACTION_MONITOR, pcmk__str_none)) {
pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
return;
}
}
device = g_hash_table_lookup(device_list, id);
if (device == NULL) {
crm_info("Ignoring API '%s' action request because device %s not found",
action, id);
pcmk__format_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
"'%s' not found", id);
return;
} else if (!device->api_registered
&& (strcmp(action, PCMK_ACTION_MONITOR) == 0)) {
// Monitors may run only on "started" (API-registered) devices
crm_info("Ignoring API '%s' action request because device %s not active",
action, id);
pcmk__format_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
"'%s' not active", id);
return;
}
cmd = create_async_command(msg);
if (cmd == NULL) {
crm_log_xml_warn(msg, "invalid");
fenced_set_protocol_error(result);
return;
}
schedule_stonith_command(cmd, device);
pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL);
}
static void
search_devices_record_result(struct device_search_s *search, const char *device, gboolean can_fence)
{
search->replies_received++;
if (can_fence && device) {
if (search->support_action_only != st_device_supports_none) {
stonith_device_t *dev = g_hash_table_lookup(device_list, device);
if (dev && !pcmk_is_set(dev->flags, search->support_action_only)) {
return;
}
}
search->capable = g_list_append(search->capable,
pcmk__str_copy(device));
}
if (search->replies_needed == search->replies_received) {
guint ndevices = g_list_length(search->capable);
crm_debug("Search found %d device%s that can perform '%s' targeting %s",
ndevices, pcmk__plural_s(ndevices),
(search->action? search->action : "unknown action"),
(search->host? search->host : "any node"));
search->callback(search->capable, search->user_data);
free(search->host);
free(search->action);
free(search);
}
}
/*!
* \internal
* \brief Check whether the local host is allowed to execute a fencing action
*
* \param[in] device Fence device to check
* \param[in] action Fence action to check
* \param[in] target Hostname of fence target
* \param[in] allow_suicide Whether self-fencing is allowed for this operation
*
* \return TRUE if local host is allowed to execute action, FALSE otherwise
*/
static gboolean
localhost_is_eligible(const stonith_device_t *device, const char *action,
const char *target, gboolean allow_suicide)
{
gboolean localhost_is_target = pcmk__str_eq(target, stonith_our_uname,
pcmk__str_casei);
if ((device != NULL) && (action != NULL)
&& (device->on_target_actions != NULL)
&& (strstr((const char*) device->on_target_actions->str,
action) != NULL)) {
if (!localhost_is_target) {
crm_trace("Operation '%s' using %s can only be executed for local "
"host, not %s", action, device->id, target);
return FALSE;
}
} else if (localhost_is_target && !allow_suicide) {
crm_trace("'%s' operation does not support self-fencing", action);
return FALSE;
}
return TRUE;
}
/*!
* \internal
* \brief Check if local node is allowed to execute (possibly remapped) action
*
* \param[in] device Fence device to check
* \param[in] action Fence action to check
* \param[in] target Node name of fence target
* \param[in] allow_self Whether self-fencing is allowed for this operation
*
* \return true if local node is allowed to execute \p action or any actions it
* might be remapped to, otherwise false
*/
static bool
localhost_is_eligible_with_remap(const stonith_device_t *device,
const char *action, const char *target,
gboolean allow_self)
{
// Check exact action
if (localhost_is_eligible(device, action, target, allow_self)) {
return true;
}
// Check potential remaps
if (pcmk__str_eq(action, PCMK_ACTION_REBOOT, pcmk__str_none)) {
/* "reboot" might get remapped to "off" then "on", so even if reboot is
* disallowed, return true if either of those is allowed. We'll report
* the disallowed actions with the results. We never allow self-fencing
* for remapped "on" actions because the target is off at that point.
*/
if (localhost_is_eligible(device, PCMK_ACTION_OFF, target, allow_self)
|| localhost_is_eligible(device, PCMK_ACTION_ON, target, FALSE)) {
return true;
}
}
return false;
}
static void
can_fence_host_with_device(stonith_device_t *dev,
struct device_search_s *search)
{
gboolean can = FALSE;
const char *check_type = "Internal bug";
const char *target = NULL;
const char *alias = NULL;
const char *dev_id = "Unspecified device";
const char *action = (search == NULL)? NULL : search->action;
CRM_CHECK((dev != NULL) && (action != NULL), goto search_report_results);
if (dev->id != NULL) {
dev_id = dev->id;
}
target = search->host;
if (target == NULL) {
can = TRUE;
check_type = "No target";
goto search_report_results;
}
/* Answer immediately if the device does not support the action
* or the local node is not allowed to perform it
*/
if (pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none)
&& !pcmk_is_set(dev->flags, st_device_supports_on)) {
check_type = "Agent does not support 'on'";
goto search_report_results;
} else if (!localhost_is_eligible_with_remap(dev, action, target,
search->allow_suicide)) {
check_type = "This node is not allowed to execute action";
goto search_report_results;
}
// Check eligibility as specified by pcmk_host_check
check_type = target_list_type(dev);
alias = g_hash_table_lookup(dev->aliases, target);
if (pcmk__str_eq(check_type, PCMK_VALUE_NONE, pcmk__str_casei)) {
can = TRUE;
} else if (pcmk__str_eq(check_type, PCMK_VALUE_STATIC_LIST,
pcmk__str_casei)) {
if (pcmk__str_in_list(target, dev->targets, pcmk__str_casei)) {
can = TRUE;
} else if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_MAP)
&& g_hash_table_lookup(dev->aliases, target)) {
can = TRUE;
}
} else if (pcmk__str_eq(check_type, PCMK_VALUE_DYNAMIC_LIST,
pcmk__str_casei)) {
time_t now = time(NULL);
if (dev->targets == NULL || dev->targets_age + 60 < now) {
int device_timeout = get_action_timeout(dev, PCMK_ACTION_LIST,
search->per_device_timeout);
if (device_timeout > search->per_device_timeout) {
crm_notice("Since the pcmk_list_timeout (%ds) parameter of %s "
"is larger than " PCMK_OPT_STONITH_TIMEOUT
" (%ds), timeout may occur",
device_timeout, dev_id, search->per_device_timeout);
}
crm_trace("Running '%s' to check whether %s is eligible to fence %s (%s)",
check_type, dev_id, target, action);
schedule_internal_command(__func__, dev, PCMK_ACTION_LIST, NULL,
search->per_device_timeout, search, dynamic_list_search_cb);
/* we'll respond to this search request async in the cb */
return;
}
if (pcmk__str_in_list(((alias == NULL)? target : alias), dev->targets,
pcmk__str_casei)) {
can = TRUE;
}
} else if (pcmk__str_eq(check_type, PCMK_VALUE_STATUS, pcmk__str_casei)) {
int device_timeout = get_action_timeout(dev, check_type, search->per_device_timeout);
if (device_timeout > search->per_device_timeout) {
crm_notice("Since the pcmk_status_timeout (%ds) parameter of %s is "
"larger than " PCMK_OPT_STONITH_TIMEOUT " (%ds), "
"timeout may occur",
device_timeout, dev_id, search->per_device_timeout);
}
crm_trace("Running '%s' to check whether %s is eligible to fence %s (%s)",
check_type, dev_id, target, action);
schedule_internal_command(__func__, dev, PCMK_ACTION_STATUS, target,
search->per_device_timeout, search, status_search_cb);
/* we'll respond to this search request async in the cb */
return;
} else {
crm_err("Invalid value for " PCMK_STONITH_HOST_CHECK ": %s", check_type);
check_type = "Invalid " PCMK_STONITH_HOST_CHECK;
}
search_report_results:
crm_info("%s is%s eligible to fence (%s) %s%s%s%s: %s",
dev_id, (can? "" : " not"), pcmk__s(action, "unspecified action"),
pcmk__s(target, "unspecified target"),
(alias == NULL)? "" : " (as '", pcmk__s(alias, ""),
(alias == NULL)? "" : "')", check_type);
search_devices_record_result(search, ((dev == NULL)? NULL : dev_id), can);
}
static void
search_devices(gpointer key, gpointer value, gpointer user_data)
{
stonith_device_t *dev = value;
struct device_search_s *search = user_data;
can_fence_host_with_device(dev, search);
}
#define DEFAULT_QUERY_TIMEOUT 20
static void
get_capable_devices(const char *host, const char *action, int timeout, bool suicide, void *user_data,
void (*callback) (GList * devices, void *user_data), uint32_t support_action_only)
{
struct device_search_s *search;
guint ndevices = g_hash_table_size(device_list);
if (ndevices == 0) {
callback(NULL, user_data);
return;
}
search = pcmk__assert_alloc(1, sizeof(struct device_search_s));
search->host = pcmk__str_copy(host);
search->action = pcmk__str_copy(action);
search->per_device_timeout = timeout;
search->allow_suicide = suicide;
search->callback = callback;
search->user_data = user_data;
search->support_action_only = support_action_only;
/* We are guaranteed this many replies, even if a device is
* unregistered while the search is in progress.
*/
search->replies_needed = ndevices;
crm_debug("Searching %d device%s to see which can execute '%s' targeting %s",
ndevices, pcmk__plural_s(ndevices),
(search->action? search->action : "unknown action"),
(search->host? search->host : "any node"));
g_hash_table_foreach(device_list, search_devices, search);
}
struct st_query_data {
xmlNode *reply;
char *remote_peer;
char *client_id;
char *target;
char *action;
int call_options;
};
/*!
* \internal
* \brief Add action-specific attributes to query reply XML
*
* \param[in,out] xml XML to add attributes to
* \param[in] action Fence action
* \param[in] device Fence device
* \param[in] target Fence target
*/
static void
add_action_specific_attributes(xmlNode *xml, const char *action,
const stonith_device_t *device,
const char *target)
{
int action_specific_timeout;
int delay_max;
int delay_base;
CRM_CHECK(xml && action && device, return);
// PCMK__XA_ST_REQUIRED is currently used only for unfencing
if (is_action_required(action, device)) {
crm_trace("Action '%s' is required using %s", action, device->id);
crm_xml_add_int(xml, PCMK__XA_ST_REQUIRED, 1);
}
// pcmk_<action>_timeout if configured
action_specific_timeout = get_action_timeout(device, action, 0);
if (action_specific_timeout) {
crm_trace("Action '%s' has timeout %ds using %s",
action, action_specific_timeout, device->id);
crm_xml_add_int(xml, PCMK__XA_ST_ACTION_TIMEOUT,
action_specific_timeout);
}
delay_max = get_action_delay_max(device, action);
if (delay_max > 0) {
crm_trace("Action '%s' has maximum random delay %ds using %s",
action, delay_max, device->id);
crm_xml_add_int(xml, PCMK__XA_ST_DELAY_MAX, delay_max);
}
delay_base = get_action_delay_base(device, action, target);
if (delay_base > 0) {
crm_xml_add_int(xml, PCMK__XA_ST_DELAY_BASE, delay_base);
}
if ((delay_max > 0) && (delay_base == 0)) {
crm_trace("Action '%s' has maximum random delay %ds using %s",
action, delay_max, device->id);
} else if ((delay_max == 0) && (delay_base > 0)) {
crm_trace("Action '%s' has a static delay of %ds using %s",
action, delay_base, device->id);
} else if ((delay_max > 0) && (delay_base > 0)) {
crm_trace("Action '%s' has a minimum delay of %ds and a randomly chosen "
"maximum delay of %ds using %s",
action, delay_base, delay_max, device->id);
}
}
/*!
* \internal
* \brief Add "disallowed" attribute to query reply XML if appropriate
*
* \param[in,out] xml XML to add attribute to
* \param[in] action Fence action
* \param[in] device Fence device
* \param[in] target Fence target
* \param[in] allow_suicide Whether self-fencing is allowed
*/
static void
add_disallowed(xmlNode *xml, const char *action, const stonith_device_t *device,
const char *target, gboolean allow_suicide)
{
if (!localhost_is_eligible(device, action, target, allow_suicide)) {
crm_trace("Action '%s' using %s is disallowed for local host",
action, device->id);
pcmk__xe_set_bool_attr(xml, PCMK__XA_ST_ACTION_DISALLOWED, true);
}
}
/*!
* \internal
* \brief Add child element with action-specific values to query reply XML
*
* \param[in,out] xml XML to add attribute to
* \param[in] action Fence action
* \param[in] device Fence device
* \param[in] target Fence target
* \param[in] allow_suicide Whether self-fencing is allowed
*/
static void
add_action_reply(xmlNode *xml, const char *action,
const stonith_device_t *device, const char *target,
gboolean allow_suicide)
{
xmlNode *child = pcmk__xe_create(xml, PCMK__XE_ST_DEVICE_ACTION);
crm_xml_add(child, PCMK_XA_ID, action);
add_action_specific_attributes(child, action, device, target);
add_disallowed(child, action, device, target, allow_suicide);
}
/*!
* \internal
* \brief Send a reply to a CPG peer or IPC client
*
* \param[in] reply XML reply to send
* \param[in] call_options Send synchronously if st_opt_sync_call is set
* \param[in] remote_peer If not NULL, name of peer node to send CPG reply
* \param[in,out] client If not NULL, client to send IPC reply
*/
static void
stonith_send_reply(const xmlNode *reply, int call_options,
const char *remote_peer, pcmk__client_t *client)
{
CRM_CHECK((reply != NULL) && ((remote_peer != NULL) || (client != NULL)),
return);
if (remote_peer == NULL) {
do_local_reply(reply, client, call_options);
} else {
send_cluster_message(pcmk__get_node(0, remote_peer, NULL,
- pcmk__node_search_cluster),
+ pcmk__node_search_cluster_member),
crm_msg_stonith_ng, reply, FALSE);
}
}
static void
stonith_query_capable_device_cb(GList * devices, void *user_data)
{
struct st_query_data *query = user_data;
int available_devices = 0;
xmlNode *dev = NULL;
xmlNode *list = NULL;
GList *lpc = NULL;
pcmk__client_t *client = NULL;
if (query->client_id != NULL) {
client = pcmk__find_client_by_id(query->client_id);
if ((client == NULL) && (query->remote_peer == NULL)) {
crm_trace("Skipping reply to %s: no longer a client",
query->client_id);
goto done;
}
}
/* Pack the results into XML */
list = pcmk__xe_create(NULL, __func__);
crm_xml_add(list, PCMK__XA_ST_TARGET, query->target);
for (lpc = devices; lpc != NULL; lpc = lpc->next) {
stonith_device_t *device = g_hash_table_lookup(device_list, lpc->data);
const char *action = query->action;
if (!device) {
/* It is possible the device got unregistered while
* determining who can fence the target */
continue;
}
available_devices++;
dev = pcmk__xe_create(list, PCMK__XE_ST_DEVICE_ID);
crm_xml_add(dev, PCMK_XA_ID, device->id);
crm_xml_add(dev, PCMK__XA_NAMESPACE, device->namespace);
crm_xml_add(dev, PCMK_XA_AGENT, device->agent);
// Has had successful monitor, list, or status on this node
crm_xml_add_int(dev, PCMK__XA_ST_MONITOR_VERIFIED, device->verified);
crm_xml_add_int(dev, PCMK__XA_ST_DEVICE_SUPPORT_FLAGS, device->flags);
/* If the originating fencer wants to reboot the node, and we have a
* capable device that doesn't support "reboot", remap to "off" instead.
*/
if (!pcmk_is_set(device->flags, st_device_supports_reboot)
&& pcmk__str_eq(query->action, PCMK_ACTION_REBOOT,
pcmk__str_none)) {
crm_trace("%s doesn't support reboot, using values for off instead",
device->id);
action = PCMK_ACTION_OFF;
}
/* Add action-specific values if available */
add_action_specific_attributes(dev, action, device, query->target);
if (pcmk__str_eq(query->action, PCMK_ACTION_REBOOT, pcmk__str_none)) {
/* A "reboot" *might* get remapped to "off" then "on", so after
* sending the "reboot"-specific values in the main element, we add
* sub-elements for "off" and "on" values.
*
* We short-circuited earlier if "reboot", "off" and "on" are all
* disallowed for the local host. However if only one or two are
* disallowed, we send back the results and mark which ones are
* disallowed. If "reboot" is disallowed, this might cause problems
* with older fencer versions, which won't check for it. Older
* versions will ignore "off" and "on", so they are not a problem.
*/
add_disallowed(dev, action, device, query->target,
pcmk_is_set(query->call_options, st_opt_allow_suicide));
add_action_reply(dev, PCMK_ACTION_OFF, device, query->target,
pcmk_is_set(query->call_options, st_opt_allow_suicide));
add_action_reply(dev, PCMK_ACTION_ON, device, query->target, FALSE);
}
/* A query without a target wants device parameters */
if (query->target == NULL) {
xmlNode *attrs = pcmk__xe_create(dev, PCMK__XE_ATTRIBUTES);
g_hash_table_foreach(device->params, hash2field, attrs);
}
}
crm_xml_add_int(list, PCMK__XA_ST_AVAILABLE_DEVICES, available_devices);
if (query->target) {
crm_debug("Found %d matching device%s for target '%s'",
available_devices, pcmk__plural_s(available_devices),
query->target);
} else {
crm_debug("%d device%s installed",
available_devices, pcmk__plural_s(available_devices));
}
if (list != NULL) {
xmlNode *wrapper = pcmk__xe_create(query->reply, PCMK__XE_ST_CALLDATA);
crm_log_xml_trace(list, "Add query results");
xmlAddChild(wrapper, list);
}
stonith_send_reply(query->reply, query->call_options, query->remote_peer,
client);
done:
free_xml(query->reply);
free(query->remote_peer);
free(query->client_id);
free(query->target);
free(query->action);
free(query);
g_list_free_full(devices, free);
}
/*!
* \internal
* \brief Log the result of an asynchronous command
*
* \param[in] cmd Command the result is for
* \param[in] result Result of command
* \param[in] pid Process ID of command, if available
* \param[in] next Alternate device that will be tried if command failed
* \param[in] op_merged Whether this command was merged with an earlier one
*/
static void
log_async_result(const async_command_t *cmd,
const pcmk__action_result_t *result,
int pid, const char *next, bool op_merged)
{
int log_level = LOG_ERR;
int output_log_level = LOG_NEVER;
guint devices_remaining = g_list_length(cmd->next_device_iter);
GString *msg = g_string_sized_new(80); // Reasonable starting size
// Choose log levels appropriately if we have a result
if (pcmk__result_ok(result)) {
log_level = (cmd->target == NULL)? LOG_DEBUG : LOG_NOTICE;
if ((result->action_stdout != NULL)
&& !pcmk__str_eq(cmd->action, PCMK_ACTION_METADATA,
pcmk__str_none)) {
output_log_level = LOG_DEBUG;
}
next = NULL;
} else {
log_level = (cmd->target == NULL)? LOG_NOTICE : LOG_ERR;
if ((result->action_stdout != NULL)
&& !pcmk__str_eq(cmd->action, PCMK_ACTION_METADATA,
pcmk__str_none)) {
output_log_level = LOG_WARNING;
}
}
// Build the log message piece by piece
pcmk__g_strcat(msg, "Operation '", cmd->action, "' ", NULL);
if (pid != 0) {
g_string_append_printf(msg, "[%d] ", pid);
}
if (cmd->target != NULL) {
pcmk__g_strcat(msg, "targeting ", cmd->target, " ", NULL);
}
if (cmd->device != NULL) {
pcmk__g_strcat(msg, "using ", cmd->device, " ", NULL);
}
// Add exit status or execution status as appropriate
if (result->execution_status == PCMK_EXEC_DONE) {
g_string_append_printf(msg, "returned %d", result->exit_status);
} else {
pcmk__g_strcat(msg, "could not be executed: ",
pcmk_exec_status_str(result->execution_status), NULL);
}
// Add exit reason and next device if appropriate
if (result->exit_reason != NULL) {
pcmk__g_strcat(msg, " (", result->exit_reason, ")", NULL);
}
if (next != NULL) {
pcmk__g_strcat(msg, ", retrying with ", next, NULL);
}
if (devices_remaining > 0) {
g_string_append_printf(msg, " (%u device%s remaining)",
(unsigned int) devices_remaining,
pcmk__plural_s(devices_remaining));
}
g_string_append_printf(msg, " " CRM_XS " %scall %d from %s",
(op_merged? "merged " : ""), cmd->id,
cmd->client_name);
// Log the result
do_crm_log(log_level, "%s", msg->str);
g_string_free(msg, TRUE);
// Log the output (which may have multiple lines), if appropriate
if (output_log_level != LOG_NEVER) {
char *prefix = crm_strdup_printf("%s[%d]", cmd->device, pid);
crm_log_output(output_log_level, prefix, result->action_stdout);
free(prefix);
}
}
/*!
* \internal
* \brief Reply to requester after asynchronous command completion
*
* \param[in] cmd Command that completed
* \param[in] result Result of command
* \param[in] pid Process ID of command, if available
* \param[in] merged If true, command was merged with another, not executed
*/
static void
send_async_reply(const async_command_t *cmd, const pcmk__action_result_t *result,
int pid, bool merged)
{
xmlNode *reply = NULL;
pcmk__client_t *client = NULL;
CRM_CHECK((cmd != NULL) && (result != NULL), return);
log_async_result(cmd, result, pid, NULL, merged);
if (cmd->client != NULL) {
client = pcmk__find_client_by_id(cmd->client);
if ((client == NULL) && (cmd->origin == NULL)) {
crm_trace("Skipping reply to %s: no longer a client", cmd->client);
return;
}
}
reply = construct_async_reply(cmd, result);
if (merged) {
pcmk__xe_set_bool_attr(reply, PCMK__XA_ST_OP_MERGED, true);
}
if (!stand_alone && pcmk__is_fencing_action(cmd->action)
&& pcmk__str_eq(cmd->origin, cmd->target, pcmk__str_casei)) {
/* The target was also the originator, so broadcast the result on its
* behalf (since it will be unable to).
*/
crm_trace("Broadcast '%s' result for %s (target was also originator)",
cmd->action, cmd->target);
crm_xml_add(reply, PCMK__XA_SUBT, PCMK__VALUE_BROADCAST);
crm_xml_add(reply, PCMK__XA_ST_OP, STONITH_OP_NOTIFY);
send_cluster_message(NULL, crm_msg_stonith_ng, reply, FALSE);
} else {
// Reply only to the originator
stonith_send_reply(reply, cmd->options, cmd->origin, client);
}
crm_log_xml_trace(reply, "Reply");
free_xml(reply);
if (stand_alone) {
/* Do notification with a clean data object */
xmlNode *notify_data = pcmk__xe_create(NULL, PCMK__XE_ST_NOTIFY_FENCE);
stonith__xe_set_result(notify_data, result);
crm_xml_add(notify_data, PCMK__XA_ST_TARGET, cmd->target);
crm_xml_add(notify_data, PCMK__XA_ST_OP, cmd->op);
crm_xml_add(notify_data, PCMK__XA_ST_DELEGATE, "localhost");
crm_xml_add(notify_data, PCMK__XA_ST_DEVICE_ID, cmd->device);
crm_xml_add(notify_data, PCMK__XA_ST_REMOTE_OP, cmd->remote_op_id);
crm_xml_add(notify_data, PCMK__XA_ST_ORIGIN, cmd->client);
fenced_send_notification(PCMK__VALUE_ST_NOTIFY_FENCE, result,
notify_data);
fenced_send_notification(PCMK__VALUE_ST_NOTIFY_HISTORY, NULL, NULL);
}
}
static void
cancel_stonith_command(async_command_t * cmd)
{
stonith_device_t *device = cmd_device(cmd);
if (device) {
crm_trace("Cancel scheduled '%s' action using %s",
cmd->action, device->id);
device->pending_ops = g_list_remove(device->pending_ops, cmd);
}
}
/*!
* \internal
* \brief Cancel and reply to any duplicates of a just-completed operation
*
* Check whether any fencing operations are scheduled to do the same thing as
* one that just succeeded. If so, rather than performing the same operation
* twice, return the result of this operation for all matching pending commands.
*
* \param[in,out] cmd Fencing operation that just succeeded
* \param[in] result Result of \p cmd
* \param[in] pid If nonzero, process ID of agent invocation (for logs)
*
* \note Duplicate merging will do the right thing for either type of remapped
* reboot. If the executing fencer remapped an unsupported reboot to off,
* then cmd->action will be "reboot" and will be merged with any other
* reboot requests. If the originating fencer remapped a topology reboot
* to off then on, we will get here once with cmd->action "off" and once
* with "on", and they will be merged separately with similar requests.
*/
static void
reply_to_duplicates(async_command_t *cmd, const pcmk__action_result_t *result,
int pid)
{
GList *next = NULL;
for (GList *iter = cmd_list; iter != NULL; iter = next) {
async_command_t *cmd_other = iter->data;
next = iter->next; // We might delete this entry, so grab next now
if (cmd == cmd_other) {
continue;
}
/* A pending operation matches if:
* 1. The client connections are different.
* 2. The target is the same.
* 3. The fencing action is the same.
* 4. The device scheduled to execute the action is the same.
*/
if (pcmk__str_eq(cmd->client, cmd_other->client, pcmk__str_casei) ||
!pcmk__str_eq(cmd->target, cmd_other->target, pcmk__str_casei) ||
!pcmk__str_eq(cmd->action, cmd_other->action, pcmk__str_none) ||
!pcmk__str_eq(cmd->device, cmd_other->device, pcmk__str_casei)) {
continue;
}
crm_notice("Merging fencing action '%s'%s%s originating from "
"client %s with identical fencing request from client %s",
cmd_other->action,
(cmd_other->target == NULL)? "" : " targeting ",
pcmk__s(cmd_other->target, ""), cmd_other->client_name,
cmd->client_name);
// Stop tracking the duplicate, send its result, and cancel it
cmd_list = g_list_remove_link(cmd_list, iter);
send_async_reply(cmd_other, result, pid, true);
cancel_stonith_command(cmd_other);
free_async_command(cmd_other);
g_list_free_1(iter);
}
}
/*!
* \internal
* \brief Return the next required device (if any) for an operation
*
* \param[in,out] cmd Fencing operation that just succeeded
*
* \return Next device required for action if any, otherwise NULL
*/
static stonith_device_t *
next_required_device(async_command_t *cmd)
{
for (GList *iter = cmd->next_device_iter; iter != NULL; iter = iter->next) {
stonith_device_t *next_device = g_hash_table_lookup(device_list,
iter->data);
if (is_action_required(cmd->action, next_device)) {
/* This is only called for successful actions, so it's OK to skip
* non-required devices.
*/
cmd->next_device_iter = iter->next;
return next_device;
}
}
return NULL;
}
static void
st_child_done(int pid, const pcmk__action_result_t *result, void *user_data)
{
async_command_t *cmd = user_data;
stonith_device_t *device = NULL;
stonith_device_t *next_device = NULL;
CRM_CHECK(cmd != NULL, return);
device = cmd_device(cmd);
cmd->active_on = NULL;
/* The device is ready to do something else now */
if (device) {
if (!device->verified && pcmk__result_ok(result)
&& pcmk__strcase_any_of(cmd->action, PCMK_ACTION_LIST,
PCMK_ACTION_MONITOR, PCMK_ACTION_STATUS,
NULL)) {
device->verified = TRUE;
}
mainloop_set_trigger(device->work);
}
if (pcmk__result_ok(result)) {
next_device = next_required_device(cmd);
} else if ((cmd->next_device_iter != NULL)
&& !is_action_required(cmd->action, device)) {
/* if this device didn't work out, see if there are any others we can try.
* if the failed device was 'required', we can't pick another device. */
next_device = g_hash_table_lookup(device_list,
cmd->next_device_iter->data);
cmd->next_device_iter = cmd->next_device_iter->next;
}
if (next_device == NULL) {
send_async_reply(cmd, result, pid, false);
if (pcmk__result_ok(result)) {
reply_to_duplicates(cmd, result, pid);
}
free_async_command(cmd);
} else { // This operation requires more fencing
log_async_result(cmd, result, pid, next_device->id, false);
schedule_stonith_command(cmd, next_device);
}
}
static gint
sort_device_priority(gconstpointer a, gconstpointer b)
{
const stonith_device_t *dev_a = a;
const stonith_device_t *dev_b = b;
if (dev_a->priority > dev_b->priority) {
return -1;
} else if (dev_a->priority < dev_b->priority) {
return 1;
}
return 0;
}
static void
stonith_fence_get_devices_cb(GList * devices, void *user_data)
{
async_command_t *cmd = user_data;
stonith_device_t *device = NULL;
guint ndevices = g_list_length(devices);
crm_info("Found %d matching device%s for target '%s'",
ndevices, pcmk__plural_s(ndevices), cmd->target);
if (devices != NULL) {
/* Order based on priority */
devices = g_list_sort(devices, sort_device_priority);
device = g_hash_table_lookup(device_list, devices->data);
}
if (device == NULL) { // No device found
pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
pcmk__format_result(&result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
"No device configured for target '%s'",
cmd->target);
send_async_reply(cmd, &result, 0, false);
pcmk__reset_result(&result);
free_async_command(cmd);
g_list_free_full(devices, free);
} else { // Device found, schedule it for fencing
cmd->device_list = devices;
cmd->next_device_iter = devices->next;
schedule_stonith_command(cmd, device);
}
}
/*!
* \internal
* \brief Execute a fence action via the local node
*
* \param[in] msg Fencing request
* \param[out] result Where to store result of fence action
*/
static void
fence_locally(xmlNode *msg, pcmk__action_result_t *result)
{
const char *device_id = NULL;
stonith_device_t *device = NULL;
async_command_t *cmd = NULL;
xmlNode *dev = NULL;
CRM_CHECK((msg != NULL) && (result != NULL), return);
dev = get_xpath_object("//@" PCMK__XA_ST_TARGET, msg, LOG_ERR);
cmd = create_async_command(msg);
if (cmd == NULL) {
crm_log_xml_warn(msg, "invalid");
fenced_set_protocol_error(result);
return;
}
device_id = crm_element_value(dev, PCMK__XA_ST_DEVICE_ID);
if (device_id != NULL) {
device = g_hash_table_lookup(device_list, device_id);
if (device == NULL) {
crm_err("Requested device '%s' is not available", device_id);
pcmk__format_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
"Requested device '%s' not found", device_id);
return;
}
schedule_stonith_command(cmd, device);
} else {
const char *host = crm_element_value(dev, PCMK__XA_ST_TARGET);
if (pcmk_is_set(cmd->options, st_opt_cs_nodeid)) {
int nodeid = 0;
crm_node_t *node = NULL;
pcmk__scan_min_int(host, &nodeid, 0);
node = pcmk__search_node_caches(nodeid, NULL,
pcmk__node_search_any
- |pcmk__node_search_known);
+ |pcmk__node_search_cluster_cib);
if (node != NULL) {
host = node->uname;
}
}
/* If we get to here, then self-fencing is implicitly allowed */
get_capable_devices(host, cmd->action, cmd->default_timeout,
TRUE, cmd, stonith_fence_get_devices_cb,
fenced_support_flag(cmd->action));
}
pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL);
}
/*!
* \internal
* \brief Build an XML reply for a fencing operation
*
* \param[in] request Request that reply is for
* \param[in] data If not NULL, add to reply as call data
* \param[in] result Full result of fencing operation
*
* \return Newly created XML reply
* \note The caller is responsible for freeing the result.
* \note This has some overlap with construct_async_reply(), but that copies
* values from an async_command_t, whereas this one copies them from the
* request.
*/
xmlNode *
fenced_construct_reply(const xmlNode *request, xmlNode *data,
const pcmk__action_result_t *result)
{
xmlNode *reply = NULL;
reply = pcmk__xe_create(NULL, PCMK__XE_ST_REPLY);
crm_xml_add(reply, PCMK__XA_ST_ORIGIN, __func__);
crm_xml_add(reply, PCMK__XA_T, PCMK__VALUE_STONITH_NG);
stonith__xe_set_result(reply, result);
if (request == NULL) {
/* Most likely, this is the result of a stonith operation that was
* initiated before we came up. Unfortunately that means we lack enough
* information to provide clients with a full result.
*
* @TODO Maybe synchronize this information at start-up?
*/
crm_warn("Missing request information for client notifications for "
"operation with result '%s' (initiated before we came up?)",
pcmk_exec_status_str(result->execution_status));
} else {
const char *name = NULL;
const char *value = NULL;
// Attributes to copy from request to reply
const char *names[] = {
PCMK__XA_ST_OP,
PCMK__XA_ST_CALLID,
PCMK__XA_ST_CLIENTID,
PCMK__XA_ST_CLIENTNAME,
PCMK__XA_ST_REMOTE_OP,
PCMK__XA_ST_CALLOPT,
};
for (int lpc = 0; lpc < PCMK__NELEM(names); lpc++) {
name = names[lpc];
value = crm_element_value(request, name);
crm_xml_add(reply, name, value);
}
if (data != NULL) {
xmlNode *wrapper = pcmk__xe_create(reply, PCMK__XE_ST_CALLDATA);
pcmk__xml_copy(wrapper, data);
}
}
return reply;
}
/*!
* \internal
* \brief Build an XML reply to an asynchronous fencing command
*
* \param[in] cmd Fencing command that reply is for
* \param[in] result Command result
*/
static xmlNode *
construct_async_reply(const async_command_t *cmd,
const pcmk__action_result_t *result)
{
xmlNode *reply = pcmk__xe_create(NULL, PCMK__XE_ST_REPLY);
crm_xml_add(reply, PCMK__XA_ST_ORIGIN, __func__);
crm_xml_add(reply, PCMK__XA_T, PCMK__VALUE_STONITH_NG);
crm_xml_add(reply, PCMK__XA_ST_OP, cmd->op);
crm_xml_add(reply, PCMK__XA_ST_DEVICE_ID, cmd->device);
crm_xml_add(reply, PCMK__XA_ST_REMOTE_OP, cmd->remote_op_id);
crm_xml_add(reply, PCMK__XA_ST_CLIENTID, cmd->client);
crm_xml_add(reply, PCMK__XA_ST_CLIENTNAME, cmd->client_name);
crm_xml_add(reply, PCMK__XA_ST_TARGET, cmd->target);
crm_xml_add(reply, PCMK__XA_ST_DEVICE_ACTION, cmd->op);
crm_xml_add(reply, PCMK__XA_ST_ORIGIN, cmd->origin);
crm_xml_add_int(reply, PCMK__XA_ST_CALLID, cmd->id);
crm_xml_add_int(reply, PCMK__XA_ST_CALLOPT, cmd->options);
stonith__xe_set_result(reply, result);
return reply;
}
bool fencing_peer_active(crm_node_t *peer)
{
if (peer == NULL) {
return FALSE;
} else if (peer->uname == NULL) {
return FALSE;
} else if (pcmk_is_set(peer->processes, crm_get_cluster_proc())) {
return TRUE;
}
return FALSE;
}
void
set_fencing_completed(remote_fencing_op_t *op)
{
struct timespec tv;
qb_util_timespec_from_epoch_get(&tv);
op->completed = tv.tv_sec;
op->completed_nsec = tv.tv_nsec;
}
/*!
* \internal
* \brief Look for alternate node needed if local node shouldn't fence target
*
* \param[in] target Node that must be fenced
*
* \return Name of an alternate node that should fence \p target if any,
* or NULL otherwise
*/
static const char *
check_alternate_host(const char *target)
{
if (pcmk__str_eq(target, stonith_our_uname, pcmk__str_casei)) {
GHashTableIter gIter;
crm_node_t *entry = NULL;
g_hash_table_iter_init(&gIter, crm_peer_cache);
while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
if (fencing_peer_active(entry)
&& !pcmk__str_eq(entry->uname, target, pcmk__str_casei)) {
crm_notice("Forwarding self-fencing request to %s",
entry->uname);
return entry->uname;
}
}
crm_warn("Will handle own fencing because no peer can");
}
return NULL;
}
static void
remove_relay_op(xmlNode * request)
{
xmlNode *dev = get_xpath_object("//@" PCMK__XE_ST_DEVICE_ACTION, request,
LOG_TRACE);
const char *relay_op_id = NULL;
const char *op_id = NULL;
const char *client_name = NULL;
const char *target = NULL;
remote_fencing_op_t *relay_op = NULL;
if (dev) {
target = crm_element_value(dev, PCMK__XA_ST_TARGET);
}
relay_op_id = crm_element_value(request, PCMK__XA_ST_REMOTE_OP_RELAY);
op_id = crm_element_value(request, PCMK__XA_ST_REMOTE_OP);
client_name = crm_element_value(request, PCMK__XA_ST_CLIENTNAME);
/* Delete RELAY operation. */
if (relay_op_id && target && pcmk__str_eq(target, stonith_our_uname, pcmk__str_casei)) {
relay_op = g_hash_table_lookup(stonith_remote_op_list, relay_op_id);
if (relay_op) {
GHashTableIter iter;
remote_fencing_op_t *list_op = NULL;
g_hash_table_iter_init(&iter, stonith_remote_op_list);
/* If the operation to be deleted is registered as a duplicate, delete the registration. */
while (g_hash_table_iter_next(&iter, NULL, (void **)&list_op)) {
GList *dup_iter = NULL;
if (list_op != relay_op) {
for (dup_iter = list_op->duplicates; dup_iter != NULL; dup_iter = dup_iter->next) {
remote_fencing_op_t *other = dup_iter->data;
if (other == relay_op) {
other->duplicates = g_list_remove(other->duplicates, relay_op);
break;
}
}
}
}
crm_debug("Deleting relay op %s ('%s'%s%s for %s), "
"replaced by op %s ('%s'%s%s for %s)",
relay_op->id, relay_op->action,
(relay_op->target == NULL)? "" : " targeting ",
pcmk__s(relay_op->target, ""),
relay_op->client_name, op_id, relay_op->action,
(target == NULL)? "" : " targeting ", pcmk__s(target, ""),
client_name);
g_hash_table_remove(stonith_remote_op_list, relay_op_id);
}
}
}
/*!
* \internal
* \brief Check whether an API request was sent by a privileged user
*
* API commands related to fencing configuration may be done only by privileged
* IPC users (i.e. root or hacluster), because all other users should go through
* the CIB to have ACLs applied. If no client was given, this is a peer request,
* which is always allowed.
*
* \param[in] c IPC client that sent request (or NULL if sent by CPG peer)
* \param[in] op Requested API operation (for logging only)
*
* \return true if sender is peer or privileged client, otherwise false
*/
static inline bool
is_privileged(const pcmk__client_t *c, const char *op)
{
if ((c == NULL) || pcmk_is_set(c->flags, pcmk__client_privileged)) {
return true;
} else {
crm_warn("Rejecting IPC request '%s' from unprivileged client %s",
pcmk__s(op, ""), pcmk__client_name(c));
return false;
}
}
// CRM_OP_REGISTER
static xmlNode *
handle_register_request(pcmk__request_t *request)
{
xmlNode *reply = pcmk__xe_create(NULL, "reply");
CRM_ASSERT(request->ipc_client != NULL);
crm_xml_add(reply, PCMK__XA_ST_OP, CRM_OP_REGISTER);
crm_xml_add(reply, PCMK__XA_ST_CLIENTID, request->ipc_client->id);
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
pcmk__set_request_flags(request, pcmk__request_reuse_options);
return reply;
}
// STONITH_OP_EXEC
static xmlNode *
handle_agent_request(pcmk__request_t *request)
{
execute_agent_action(request->xml, &request->result);
if (request->result.execution_status == PCMK_EXEC_PENDING) {
return NULL;
}
return fenced_construct_reply(request->xml, NULL, &request->result);
}
// STONITH_OP_TIMEOUT_UPDATE
static xmlNode *
handle_update_timeout_request(pcmk__request_t *request)
{
const char *call_id = crm_element_value(request->xml, PCMK__XA_ST_CALLID);
const char *client_id = crm_element_value(request->xml,
PCMK__XA_ST_CLIENTID);
int op_timeout = 0;
crm_element_value_int(request->xml, PCMK__XA_ST_TIMEOUT, &op_timeout);
do_stonith_async_timeout_update(client_id, call_id, op_timeout);
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
return NULL;
}
// STONITH_OP_QUERY
static xmlNode *
handle_query_request(pcmk__request_t *request)
{
int timeout = 0;
xmlNode *dev = NULL;
const char *action = NULL;
const char *target = NULL;
const char *client_id = crm_element_value(request->xml,
PCMK__XA_ST_CLIENTID);
struct st_query_data *query = NULL;
if (request->peer != NULL) {
// Record it for the future notification
create_remote_stonith_op(client_id, request->xml, TRUE);
}
/* Delete the DC node RELAY operation. */
remove_relay_op(request->xml);
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
dev = get_xpath_object("//@" PCMK__XE_ST_DEVICE_ACTION, request->xml,
LOG_NEVER);
if (dev != NULL) {
const char *device = crm_element_value(dev, PCMK__XA_ST_DEVICE_ID);
if (pcmk__str_eq(device, "manual_ack", pcmk__str_casei)) {
return NULL; // No query or reply necessary
}
target = crm_element_value(dev, PCMK__XA_ST_TARGET);
action = crm_element_value(dev, PCMK__XA_ST_DEVICE_ACTION);
}
crm_log_xml_trace(request->xml, "Query");
query = pcmk__assert_alloc(1, sizeof(struct st_query_data));
query->reply = fenced_construct_reply(request->xml, NULL, &request->result);
query->remote_peer = pcmk__str_copy(request->peer);
query->client_id = pcmk__str_copy(client_id);
query->target = pcmk__str_copy(target);
query->action = pcmk__str_copy(action);
query->call_options = request->call_options;
crm_element_value_int(request->xml, PCMK__XA_ST_TIMEOUT, &timeout);
get_capable_devices(target, action, timeout,
pcmk_is_set(query->call_options, st_opt_allow_suicide),
query, stonith_query_capable_device_cb, st_device_supports_none);
return NULL;
}
// STONITH_OP_NOTIFY
static xmlNode *
handle_notify_request(pcmk__request_t *request)
{
const char *flag_name = NULL;
CRM_ASSERT(request->ipc_client != NULL);
flag_name = crm_element_value(request->xml, PCMK__XA_ST_NOTIFY_ACTIVATE);
if (flag_name != NULL) {
crm_debug("Enabling %s callbacks for client %s",
flag_name, pcmk__request_origin(request));
pcmk__set_client_flags(request->ipc_client, get_stonith_flag(flag_name));
}
flag_name = crm_element_value(request->xml, PCMK__XA_ST_NOTIFY_DEACTIVATE);
if (flag_name != NULL) {
crm_debug("Disabling %s callbacks for client %s",
flag_name, pcmk__request_origin(request));
pcmk__clear_client_flags(request->ipc_client,
get_stonith_flag(flag_name));
}
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
pcmk__set_request_flags(request, pcmk__request_reuse_options);
return pcmk__ipc_create_ack(request->ipc_flags, PCMK__XE_ACK, NULL,
CRM_EX_OK);
}
// STONITH_OP_RELAY
static xmlNode *
handle_relay_request(pcmk__request_t *request)
{
xmlNode *dev = get_xpath_object("//@" PCMK__XA_ST_TARGET, request->xml,
LOG_TRACE);
crm_notice("Received forwarded fencing request from "
"%s %s to fence (%s) peer %s",
pcmk__request_origin_type(request),
pcmk__request_origin(request),
crm_element_value(dev, PCMK__XA_ST_DEVICE_ACTION),
crm_element_value(dev, PCMK__XA_ST_TARGET));
if (initiate_remote_stonith_op(NULL, request->xml, FALSE) == NULL) {
fenced_set_protocol_error(&request->result);
return fenced_construct_reply(request->xml, NULL, &request->result);
}
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL);
return NULL;
}
// STONITH_OP_FENCE
static xmlNode *
handle_fence_request(pcmk__request_t *request)
{
if ((request->peer != NULL) || stand_alone) {
fence_locally(request->xml, &request->result);
} else if (pcmk_is_set(request->call_options, st_opt_manual_ack)) {
switch (fenced_handle_manual_confirmation(request->ipc_client,
request->xml)) {
case pcmk_rc_ok:
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE,
NULL);
break;
case EINPROGRESS:
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING,
NULL);
break;
default:
fenced_set_protocol_error(&request->result);
break;
}
} else {
const char *alternate_host = NULL;
xmlNode *dev = get_xpath_object("//@" PCMK__XA_ST_TARGET, request->xml,
LOG_TRACE);
const char *target = crm_element_value(dev, PCMK__XA_ST_TARGET);
const char *action = crm_element_value(dev, PCMK__XA_ST_DEVICE_ACTION);
const char *device = crm_element_value(dev, PCMK__XA_ST_DEVICE_ID);
if (request->ipc_client != NULL) {
int tolerance = 0;
crm_notice("Client %s wants to fence (%s) %s using %s",
pcmk__request_origin(request), action,
target, (device? device : "any device"));
crm_element_value_int(dev, PCMK__XA_ST_TOLERANCE, &tolerance);
if (stonith_check_fence_tolerance(tolerance, target, action)) {
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE,
NULL);
return fenced_construct_reply(request->xml, NULL,
&request->result);
}
alternate_host = check_alternate_host(target);
} else {
crm_notice("Peer %s wants to fence (%s) '%s' with device '%s'",
request->peer, action, target,
(device == NULL)? "(any)" : device);
}
if (alternate_host != NULL) {
const char *client_id = NULL;
remote_fencing_op_t *op = NULL;
+ crm_node_t *node = pcmk__get_node(0, alternate_host, NULL,
+ pcmk__node_search_cluster_member);
if (request->ipc_client->id == 0) {
client_id = crm_element_value(request->xml,
PCMK__XA_ST_CLIENTID);
} else {
client_id = request->ipc_client->id;
}
/* Create a duplicate fencing operation to relay with the client ID.
* When a query response is received, this operation should be
* deleted to avoid keeping the duplicate around.
*/
op = create_remote_stonith_op(client_id, request->xml, FALSE);
crm_xml_add(request->xml, PCMK__XA_ST_OP, STONITH_OP_RELAY);
crm_xml_add(request->xml, PCMK__XA_ST_CLIENTID,
request->ipc_client->id);
crm_xml_add(request->xml, PCMK__XA_ST_REMOTE_OP, op->id);
- send_cluster_message(pcmk__get_node(0, alternate_host, NULL,
- pcmk__node_search_cluster),
- crm_msg_stonith_ng, request->xml, FALSE);
+ send_cluster_message(node, crm_msg_stonith_ng, request->xml, FALSE);
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING,
NULL);
} else if (initiate_remote_stonith_op(request->ipc_client, request->xml,
FALSE) == NULL) {
fenced_set_protocol_error(&request->result);
} else {
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING,
NULL);
}
}
if (request->result.execution_status == PCMK_EXEC_PENDING) {
return NULL;
}
return fenced_construct_reply(request->xml, NULL, &request->result);
}
// STONITH_OP_FENCE_HISTORY
static xmlNode *
handle_history_request(pcmk__request_t *request)
{
xmlNode *reply = NULL;
xmlNode *data = NULL;
stonith_fence_history(request->xml, &data, request->peer,
request->call_options);
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
if (!pcmk_is_set(request->call_options, st_opt_discard_reply)) {
/* When the local node broadcasts its history, it sets
* st_opt_discard_reply and doesn't need a reply.
*/
reply = fenced_construct_reply(request->xml, data, &request->result);
}
free_xml(data);
return reply;
}
// STONITH_OP_DEVICE_ADD
static xmlNode *
handle_device_add_request(pcmk__request_t *request)
{
const char *op = crm_element_value(request->xml, PCMK__XA_ST_OP);
xmlNode *dev = get_xpath_object("//" PCMK__XE_ST_DEVICE_ID, request->xml,
LOG_ERR);
if (is_privileged(request->ipc_client, op)) {
int rc = stonith_device_register(dev, FALSE);
pcmk__set_result(&request->result,
((rc == pcmk_ok)? CRM_EX_OK : CRM_EX_ERROR),
stonith__legacy2status(rc),
((rc == pcmk_ok)? NULL : pcmk_strerror(rc)));
} else {
pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV,
PCMK_EXEC_INVALID,
"Unprivileged users must register device via CIB");
}
fenced_send_config_notification(op, &request->result,
(dev == NULL)? NULL : pcmk__xe_id(dev));
return fenced_construct_reply(request->xml, NULL, &request->result);
}
// STONITH_OP_DEVICE_DEL
static xmlNode *
handle_device_delete_request(pcmk__request_t *request)
{
xmlNode *dev = get_xpath_object("//" PCMK__XE_ST_DEVICE_ID, request->xml,
LOG_ERR);
const char *device_id = crm_element_value(dev, PCMK_XA_ID);
const char *op = crm_element_value(request->xml, PCMK__XA_ST_OP);
if (is_privileged(request->ipc_client, op)) {
stonith_device_remove(device_id, false);
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
} else {
pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV,
PCMK_EXEC_INVALID,
"Unprivileged users must delete device via CIB");
}
fenced_send_config_notification(op, &request->result, device_id);
return fenced_construct_reply(request->xml, NULL, &request->result);
}
// STONITH_OP_LEVEL_ADD
static xmlNode *
handle_level_add_request(pcmk__request_t *request)
{
char *desc = NULL;
const char *op = crm_element_value(request->xml, PCMK__XA_ST_OP);
if (is_privileged(request->ipc_client, op)) {
fenced_register_level(request->xml, &desc, &request->result);
} else {
unpack_level_request(request->xml, NULL, NULL, NULL, &desc);
pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV,
PCMK_EXEC_INVALID,
"Unprivileged users must add level via CIB");
}
fenced_send_config_notification(op, &request->result, desc);
free(desc);
return fenced_construct_reply(request->xml, NULL, &request->result);
}
// STONITH_OP_LEVEL_DEL
static xmlNode *
handle_level_delete_request(pcmk__request_t *request)
{
char *desc = NULL;
const char *op = crm_element_value(request->xml, PCMK__XA_ST_OP);
if (is_privileged(request->ipc_client, op)) {
fenced_unregister_level(request->xml, &desc, &request->result);
} else {
unpack_level_request(request->xml, NULL, NULL, NULL, &desc);
pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV,
PCMK_EXEC_INVALID,
"Unprivileged users must delete level via CIB");
}
fenced_send_config_notification(op, &request->result, desc);
free(desc);
return fenced_construct_reply(request->xml, NULL, &request->result);
}
// CRM_OP_RM_NODE_CACHE
static xmlNode *
handle_cache_request(pcmk__request_t *request)
{
int node_id = 0;
const char *name = NULL;
crm_element_value_int(request->xml, PCMK_XA_ID, &node_id);
name = crm_element_value(request->xml, PCMK_XA_UNAME);
pcmk__cluster_forget_cluster_node(node_id, name);
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
return NULL;
}
static xmlNode *
handle_unknown_request(pcmk__request_t *request)
{
crm_err("Unknown IPC request %s from %s %s",
request->op, pcmk__request_origin_type(request),
pcmk__request_origin(request));
pcmk__format_result(&request->result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID,
"Unknown IPC request type '%s' (bug?)", request->op);
return fenced_construct_reply(request->xml, NULL, &request->result);
}
static void
fenced_register_handlers(void)
{
pcmk__server_command_t handlers[] = {
{ CRM_OP_REGISTER, handle_register_request },
{ STONITH_OP_EXEC, handle_agent_request },
{ STONITH_OP_TIMEOUT_UPDATE, handle_update_timeout_request },
{ STONITH_OP_QUERY, handle_query_request },
{ STONITH_OP_NOTIFY, handle_notify_request },
{ STONITH_OP_RELAY, handle_relay_request },
{ STONITH_OP_FENCE, handle_fence_request },
{ STONITH_OP_FENCE_HISTORY, handle_history_request },
{ STONITH_OP_DEVICE_ADD, handle_device_add_request },
{ STONITH_OP_DEVICE_DEL, handle_device_delete_request },
{ STONITH_OP_LEVEL_ADD, handle_level_add_request },
{ STONITH_OP_LEVEL_DEL, handle_level_delete_request },
{ CRM_OP_RM_NODE_CACHE, handle_cache_request },
{ NULL, handle_unknown_request },
};
fenced_handlers = pcmk__register_handlers(handlers);
}
void
fenced_unregister_handlers(void)
{
if (fenced_handlers != NULL) {
g_hash_table_destroy(fenced_handlers);
fenced_handlers = NULL;
}
}
static void
handle_request(pcmk__request_t *request)
{
xmlNode *reply = NULL;
const char *reason = NULL;
if (fenced_handlers == NULL) {
fenced_register_handlers();
}
reply = pcmk__process_request(request, fenced_handlers);
if (reply != NULL) {
if (pcmk_is_set(request->flags, pcmk__request_reuse_options)
&& (request->ipc_client != NULL)) {
/* Certain IPC-only commands must reuse the call options from the
* original request rather than the ones set by stonith_send_reply()
* -> do_local_reply().
*/
pcmk__ipc_send_xml(request->ipc_client, request->ipc_id, reply,
request->ipc_flags);
request->ipc_client->request_id = 0;
} else {
stonith_send_reply(reply, request->call_options,
request->peer, request->ipc_client);
}
free_xml(reply);
}
reason = request->result.exit_reason;
crm_debug("Processed %s request from %s %s: %s%s%s%s",
request->op, pcmk__request_origin_type(request),
pcmk__request_origin(request),
pcmk_exec_status_str(request->result.execution_status),
(reason == NULL)? "" : " (",
(reason == NULL)? "" : reason,
(reason == NULL)? "" : ")");
}
static void
handle_reply(pcmk__client_t *client, xmlNode *request, const char *remote_peer)
{
// Copy, because request might be freed before we want to log this
char *op = crm_element_value_copy(request, PCMK__XA_ST_OP);
if (pcmk__str_eq(op, STONITH_OP_QUERY, pcmk__str_none)) {
process_remote_stonith_query(request);
} else if (pcmk__str_any_of(op, STONITH_OP_NOTIFY, STONITH_OP_FENCE,
NULL)) {
fenced_process_fencing_reply(request);
} else {
crm_err("Ignoring unknown %s reply from %s %s",
pcmk__s(op, "untyped"), ((client == NULL)? "peer" : "client"),
((client == NULL)? remote_peer : pcmk__client_name(client)));
crm_log_xml_warn(request, "UnknownOp");
free(op);
return;
}
crm_debug("Processed %s reply from %s %s",
op, ((client == NULL)? "peer" : "client"),
((client == NULL)? remote_peer : pcmk__client_name(client)));
free(op);
}
/*!
* \internal
* \brief Handle a message from an IPC client or CPG peer
*
* \param[in,out] client If not NULL, IPC client that sent message
* \param[in] id If from IPC client, IPC message ID
* \param[in] flags Message flags
* \param[in,out] message Message XML
* \param[in] remote_peer If not NULL, CPG peer that sent message
*/
void
stonith_command(pcmk__client_t *client, uint32_t id, uint32_t flags,
xmlNode *message, const char *remote_peer)
{
int call_options = st_opt_none;
bool is_reply = false;
CRM_CHECK(message != NULL, return);
if (get_xpath_object("//" PCMK__XE_ST_REPLY, message, LOG_NEVER) != NULL) {
is_reply = true;
}
crm_element_value_int(message, PCMK__XA_ST_CALLOPT, &call_options);
crm_debug("Processing %ssynchronous %s %s %u from %s %s",
pcmk_is_set(call_options, st_opt_sync_call)? "" : "a",
crm_element_value(message, PCMK__XA_ST_OP),
(is_reply? "reply" : "request"), id,
((client == NULL)? "peer" : "client"),
((client == NULL)? remote_peer : pcmk__client_name(client)));
if (pcmk_is_set(call_options, st_opt_sync_call)) {
CRM_ASSERT(client == NULL || client->request_id == id);
}
if (is_reply) {
handle_reply(client, message, remote_peer);
} else {
pcmk__request_t request = {
.ipc_client = client,
.ipc_id = id,
.ipc_flags = flags,
.peer = remote_peer,
.xml = message,
.call_options = call_options,
.result = PCMK__UNKNOWN_RESULT,
};
request.op = crm_element_value_copy(request.xml, PCMK__XA_ST_OP);
CRM_CHECK(request.op != NULL, return);
if (pcmk_is_set(request.call_options, st_opt_sync_call)) {
pcmk__set_request_flags(&request, pcmk__request_sync);
}
handle_request(&request);
pcmk__reset_request(&request);
}
}
diff --git a/daemons/fenced/fenced_history.c b/daemons/fenced/fenced_history.c
index afa73c3f92..74d1cd7128 100644
--- a/daemons/fenced/fenced_history.c
+++ b/daemons/fenced/fenced_history.c
@@ -1,572 +1,572 @@
/*
* Copyright 2009-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <crm/crm.h>
#include <crm/common/ipc.h>
#include <crm/common/ipc_internal.h>
#include <crm/cluster/internal.h>
#include <crm/stonith-ng.h>
#include <crm/fencing/internal.h>
#include <crm/common/xml.h>
#include <crm/common/xml_internal.h>
#include <pacemaker-fenced.h>
#define MAX_STONITH_HISTORY 500
/*!
* \internal
* \brief Send a broadcast to all nodes to trigger cleanup or
* history synchronisation
*
* \param[in] history Optional history to be attached
* \param[in] callopts We control cleanup via a flag in the callopts
* \param[in] target Cleanup can be limited to certain fence-targets
*/
static void
stonith_send_broadcast_history(xmlNode *history,
int callopts,
const char *target)
{
xmlNode *bcast = pcmk__xe_create(NULL, PCMK__XE_STONITH_COMMAND);
xmlNode *wrapper = pcmk__xe_create(bcast, PCMK__XE_ST_CALLDATA);
xmlNode *call_data = pcmk__xe_create(wrapper, __func__);
crm_xml_add(bcast, PCMK__XA_T, PCMK__VALUE_STONITH_NG);
crm_xml_add(bcast, PCMK__XA_SUBT, PCMK__VALUE_BROADCAST);
crm_xml_add(bcast, PCMK__XA_ST_OP, STONITH_OP_FENCE_HISTORY);
crm_xml_add_int(bcast, PCMK__XA_ST_CALLOPT, callopts);
pcmk__xml_copy(call_data, history);
if (target != NULL) {
crm_xml_add(call_data, PCMK__XA_ST_TARGET, target);
}
send_cluster_message(NULL, crm_msg_stonith_ng, bcast, FALSE);
free_xml(bcast);
}
static gboolean
stonith_remove_history_entry (gpointer key,
gpointer value,
gpointer user_data)
{
remote_fencing_op_t *op = value;
const char *target = (const char *) user_data;
if ((op->state == st_failed) || (op->state == st_done)) {
if ((target) && (strcmp(op->target, target) != 0)) {
return FALSE;
}
return TRUE;
}
return FALSE; /* don't clean pending operations */
}
/*!
* \internal
* \brief Send out a cleanup broadcast or do a local history-cleanup
*
* \param[in] target Cleanup can be limited to certain fence-targets
* \param[in] broadcast Send out a cleanup broadcast
*/
static void
stonith_fence_history_cleanup(const char *target,
gboolean broadcast)
{
if (broadcast) {
stonith_send_broadcast_history(NULL,
st_opt_cleanup | st_opt_discard_reply,
target);
/* we'll do the local clean when we receive back our own broadcast */
} else if (stonith_remote_op_list) {
g_hash_table_foreach_remove(stonith_remote_op_list,
stonith_remove_history_entry,
(gpointer) target);
fenced_send_notification(PCMK__VALUE_ST_NOTIFY_HISTORY, NULL, NULL);
}
}
/* keeping the length of fence-history within bounds
* =================================================
*
* If things are really running wild a lot of fencing-attempts
* might fill up the hash-map, eventually using up a lot
* of memory and creating huge history-sync messages.
* Before the history being synced across nodes at least
* the reboot of a cluster-node helped keeping the
* history within bounds even though not in a reliable
* manner.
*
* stonith_remote_op_list isn't sorted for time-stamps
* thus it would be kind of expensive to delete e.g.
* the oldest entry if it would grow past MAX_STONITH_HISTORY
* entries.
* It is more efficient to purge MAX_STONITH_HISTORY/2
* entries whenever the list grows beyond MAX_STONITH_HISTORY.
* (sort for age + purge the MAX_STONITH_HISTORY/2 oldest)
* That done on a per-node-base might raise the
* probability of large syncs to occur.
* Things like introducing a broadcast to purge
* MAX_STONITH_HISTORY/2 entries or not sync above a certain
* threshold coming to mind ...
* Simplest thing though is to purge the full history
* throughout the cluster once MAX_STONITH_HISTORY is reached.
* On the other hand this leads to purging the history in
* situations where it would be handy to have it probably.
*/
/*!
* \internal
* \brief Compare two remote fencing operations by status and completion time
*
* A pending operation is ordered before a completed operation. If both
* operations have completed, then the more recently completed operation is
* ordered first. Two pending operations are considered equal.
*
* \param[in] a First \c remote_fencing_op_t to compare
* \param[in] b Second \c remote_fencing_op_t to compare
*
* \return Standard comparison result (a negative integer if \p a is lesser,
* 0 if the values are equal, and a positive integer if \p a is greater)
*/
static gint
cmp_op_by_completion(gconstpointer a, gconstpointer b)
{
const remote_fencing_op_t *op1 = a;
const remote_fencing_op_t *op2 = b;
bool op1_pending = stonith__op_state_pending(op1->state);
bool op2_pending = stonith__op_state_pending(op2->state);
if (op1_pending && op2_pending) {
return 0;
}
if (op1_pending) {
return -1;
}
if (op2_pending) {
return 1;
}
if (op1->completed > op2->completed) {
return -1;
}
if (op1->completed < op2->completed) {
return 1;
}
if (op1->completed_nsec > op2->completed_nsec) {
return -1;
}
if (op1->completed_nsec < op2->completed_nsec) {
return 1;
}
return 0;
}
/*!
* \internal
* \brief Remove a completed operation from \c stonith_remote_op_list
*
* \param[in] data \c remote_fencing_op_t to remove
* \param[in] user_data Ignored
*/
static void
remove_completed_remote_op(gpointer data, gpointer user_data)
{
const remote_fencing_op_t *op = data;
if (!stonith__op_state_pending(op->state)) {
g_hash_table_remove(stonith_remote_op_list, op->id);
}
}
/*!
* \internal
* \brief Do a local history-trim to MAX_STONITH_HISTORY / 2 entries
* once over MAX_STONITH_HISTORY
*/
void
stonith_fence_history_trim(void)
{
if (stonith_remote_op_list == NULL) {
return;
}
if (g_hash_table_size(stonith_remote_op_list) > MAX_STONITH_HISTORY) {
GList *ops = g_hash_table_get_values(stonith_remote_op_list);
crm_trace("More than %d entries in fencing history, purging oldest "
"completed operations", MAX_STONITH_HISTORY);
ops = g_list_sort(ops, cmp_op_by_completion);
// Always keep pending ops regardless of number of entries
g_list_foreach(g_list_nth(ops, MAX_STONITH_HISTORY / 2),
remove_completed_remote_op, NULL);
// No need for a notification after purging old data
g_list_free(ops);
}
}
/*!
* \internal
* \brief Convert xml fence-history to a hash-table like stonith_remote_op_list
*
* \param[in] history Fence-history in xml
*
* \return Fence-history as hash-table
*/
static GHashTable *
stonith_xml_history_to_list(const xmlNode *history)
{
xmlNode *xml_op = NULL;
GHashTable *rv = NULL;
init_stonith_remote_op_hash_table(&rv);
CRM_LOG_ASSERT(rv != NULL);
for (xml_op = pcmk__xe_first_child(history, NULL, NULL, NULL);
xml_op != NULL; xml_op = pcmk__xe_next(xml_op)) {
remote_fencing_op_t *op = NULL;
char *id = crm_element_value_copy(xml_op, PCMK__XA_ST_REMOTE_OP);
int state;
int exit_status = CRM_EX_OK;
int execution_status = PCMK_EXEC_DONE;
long long completed;
long long completed_nsec = 0L;
if (!id) {
crm_warn("Malformed fencing history received from peer");
continue;
}
crm_trace("Attaching op %s to hashtable", id);
op = pcmk__assert_alloc(1, sizeof(remote_fencing_op_t));
op->id = id;
op->target = crm_element_value_copy(xml_op, PCMK__XA_ST_TARGET);
op->action = crm_element_value_copy(xml_op, PCMK__XA_ST_DEVICE_ACTION);
op->originator = crm_element_value_copy(xml_op, PCMK__XA_ST_ORIGIN);
op->delegate = crm_element_value_copy(xml_op, PCMK__XA_ST_DELEGATE);
op->client_name = crm_element_value_copy(xml_op,
PCMK__XA_ST_CLIENTNAME);
crm_element_value_ll(xml_op, PCMK__XA_ST_DATE, &completed);
op->completed = (time_t) completed;
crm_element_value_ll(xml_op, PCMK__XA_ST_DATE_NSEC, &completed_nsec);
op->completed_nsec = completed_nsec;
crm_element_value_int(xml_op, PCMK__XA_ST_STATE, &state);
op->state = (enum op_state) state;
/* @COMPAT We can't use stonith__xe_get_result() here because
* fencers <2.1.3 didn't include results, leading it to assume an error
* status. Instead, set an unknown status in that case.
*/
if ((crm_element_value_int(xml_op, PCMK__XA_RC_CODE, &exit_status) < 0)
|| (crm_element_value_int(xml_op, PCMK__XA_OP_STATUS,
&execution_status) < 0)) {
exit_status = CRM_EX_INDETERMINATE;
execution_status = PCMK_EXEC_UNKNOWN;
}
pcmk__set_result(&op->result, exit_status, execution_status,
crm_element_value(xml_op, PCMK_XA_EXIT_REASON));
pcmk__set_result_output(&op->result,
crm_element_value_copy(xml_op,
PCMK__XA_ST_OUTPUT),
NULL);
g_hash_table_replace(rv, id, op);
CRM_LOG_ASSERT(g_hash_table_lookup(rv, id) != NULL);
}
return rv;
}
/*!
* \internal
* \brief Craft xml difference between local fence-history and a history
* coming from remote, and merge the remote history into the local
*
* \param[in,out] remote_history Fence-history as hash-table (may be NULL)
* \param[in] add_id If crafting the answer for an API
* history-request there is no need for the id
* \param[in] target Optionally limit to certain fence-target
*
* \return The fence-history as xml
*/
static xmlNode *
stonith_local_history_diff_and_merge(GHashTable *remote_history,
gboolean add_id, const char *target)
{
xmlNode *history = NULL;
GHashTableIter iter;
remote_fencing_op_t *op = NULL;
gboolean updated = FALSE;
int cnt = 0;
if (stonith_remote_op_list) {
char *id = NULL;
history = pcmk__xe_create(NULL, PCMK__XE_ST_HISTORY);
g_hash_table_iter_init(&iter, stonith_remote_op_list);
while (g_hash_table_iter_next(&iter, (void **)&id, (void **)&op)) {
xmlNode *entry = NULL;
if (remote_history) {
remote_fencing_op_t *remote_op =
g_hash_table_lookup(remote_history, op->id);
if (remote_op) {
if (stonith__op_state_pending(op->state)
&& !stonith__op_state_pending(remote_op->state)) {
crm_debug("Updating outdated pending operation %.8s "
"(state=%s) according to the one (state=%s) from "
"remote peer history",
op->id, stonith_op_state_str(op->state),
stonith_op_state_str(remote_op->state));
g_hash_table_steal(remote_history, op->id);
op->id = remote_op->id;
remote_op->id = id;
g_hash_table_iter_replace(&iter, remote_op);
updated = TRUE;
continue; /* skip outdated entries */
} else if (!stonith__op_state_pending(op->state)
&& stonith__op_state_pending(remote_op->state)) {
crm_debug("Broadcasting operation %.8s (state=%s) to "
"update the outdated pending one "
"(state=%s) in remote peer history",
op->id, stonith_op_state_str(op->state),
stonith_op_state_str(remote_op->state));
g_hash_table_remove(remote_history, op->id);
} else {
g_hash_table_remove(remote_history, op->id);
continue; /* skip entries broadcasted already */
}
}
}
if (!pcmk__str_eq(target, op->target, pcmk__str_null_matches)) {
continue;
}
cnt++;
crm_trace("Attaching op %s", op->id);
entry = pcmk__xe_create(history, STONITH_OP_EXEC);
if (add_id) {
crm_xml_add(entry, PCMK__XA_ST_REMOTE_OP, op->id);
}
crm_xml_add(entry, PCMK__XA_ST_TARGET, op->target);
crm_xml_add(entry, PCMK__XA_ST_DEVICE_ACTION, op->action);
crm_xml_add(entry, PCMK__XA_ST_ORIGIN, op->originator);
crm_xml_add(entry, PCMK__XA_ST_DELEGATE, op->delegate);
crm_xml_add(entry, PCMK__XA_ST_CLIENTNAME, op->client_name);
crm_xml_add_ll(entry, PCMK__XA_ST_DATE, op->completed);
crm_xml_add_ll(entry, PCMK__XA_ST_DATE_NSEC,
op->completed_nsec);
crm_xml_add_int(entry, PCMK__XA_ST_STATE, op->state);
stonith__xe_set_result(entry, &op->result);
}
}
if (remote_history) {
init_stonith_remote_op_hash_table(&stonith_remote_op_list);
updated |= g_hash_table_size(remote_history);
g_hash_table_iter_init(&iter, remote_history);
while (g_hash_table_iter_next(&iter, NULL, (void **)&op)) {
if (stonith__op_state_pending(op->state) &&
pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) {
crm_warn("Failing pending operation %.8s originated by us but "
"known only from peer history", op->id);
op->state = st_failed;
set_fencing_completed(op);
/* CRM_EX_EXPIRED + PCMK_EXEC_INVALID prevents finalize_op()
* from setting a delegate
*/
pcmk__set_result(&op->result, CRM_EX_EXPIRED, PCMK_EXEC_INVALID,
"Initiated by earlier fencer "
"process and presumed failed");
fenced_broadcast_op_result(op, false);
}
g_hash_table_iter_steal(&iter);
g_hash_table_replace(stonith_remote_op_list, op->id, op);
/* we could trim the history here but if we bail
* out after trim we might miss more recent entries
* of those that might still be in the list
* if we don't bail out trimming once is more
* efficient and memory overhead is minimal as
* we are just moving pointers from one hash to
* another
*/
}
g_hash_table_destroy(remote_history); /* remove what is left */
}
if (updated) {
stonith_fence_history_trim();
fenced_send_notification(PCMK__VALUE_ST_NOTIFY_HISTORY, NULL, NULL);
}
if (cnt == 0) {
free_xml(history);
return NULL;
} else {
return history;
}
}
/*!
* \internal
* \brief Craft xml from the local fence-history
*
* \param[in] add_id If crafting the answer for an API
* history-request there is no need for the id
* \param[in] target Optionally limit to certain fence-target
*
* \return The fence-history as xml
*/
static xmlNode *
stonith_local_history(gboolean add_id, const char *target)
{
return stonith_local_history_diff_and_merge(NULL, add_id, target);
}
/*!
* \internal
* \brief Handle fence-history messages (from API or coming in as broadcasts)
*
* \param[in,out] msg Request XML
* \param[out] output Where to set local history, if requested
* \param[in] remote_peer If broadcast, peer that sent it
* \param[in] options Call options from the request
*/
void
stonith_fence_history(xmlNode *msg, xmlNode **output,
const char *remote_peer, int options)
{
const char *target = NULL;
xmlNode *dev = get_xpath_object("//@" PCMK__XA_ST_TARGET, msg, LOG_NEVER);
xmlNode *out_history = NULL;
if (dev) {
target = crm_element_value(dev, PCMK__XA_ST_TARGET);
if (target && (options & st_opt_cs_nodeid)) {
int nodeid;
crm_node_t *node;
pcmk__scan_min_int(target, &nodeid, 0);
node = pcmk__search_node_caches(nodeid, NULL,
pcmk__node_search_any
- |pcmk__node_search_known);
+ |pcmk__node_search_cluster_cib);
if (node) {
target = node->uname;
}
}
}
if (options & st_opt_cleanup) {
const char *call_id = crm_element_value(msg, PCMK__XA_ST_CALLID);
crm_trace("Cleaning up operations on %s in %p", target,
stonith_remote_op_list);
stonith_fence_history_cleanup(target, (call_id != NULL));
} else if (options & st_opt_broadcast) {
/* there is no clear sign atm for when a history sync
is done so send a notification for anything
that smells like history-sync
*/
fenced_send_notification(PCMK__VALUE_ST_NOTIFY_HISTORY_SYNCED, NULL,
NULL);
if (crm_element_value(msg, PCMK__XA_ST_CALLID) != NULL) {
/* this is coming from the stonith-API
*
* craft a broadcast with node's history
* so that every node can merge and broadcast
* what it has on top
*/
out_history = stonith_local_history(TRUE, NULL);
crm_trace("Broadcasting history to peers");
stonith_send_broadcast_history(out_history,
st_opt_broadcast | st_opt_discard_reply,
NULL);
} else if (remote_peer &&
!pcmk__str_eq(remote_peer, stonith_our_uname, pcmk__str_casei)) {
xmlNode *history = get_xpath_object("//" PCMK__XE_ST_HISTORY, msg,
LOG_NEVER);
/* either a broadcast created directly upon stonith-API request
* or a diff as response to such a thing
*
* in both cases it may have a history or not
* if we have differential data
* merge in what we've received and stop
* otherwise broadcast what we have on top
* marking as differential and merge in afterwards
*/
if (!history
|| !pcmk__xe_attr_is_true(history, PCMK__XA_ST_DIFFERENTIAL)) {
GHashTable *received_history = NULL;
if (history != NULL) {
received_history = stonith_xml_history_to_list(history);
}
out_history =
stonith_local_history_diff_and_merge(received_history, TRUE, NULL);
if (out_history) {
crm_trace("Broadcasting history-diff to peers");
pcmk__xe_set_bool_attr(out_history,
PCMK__XA_ST_DIFFERENTIAL, true);
stonith_send_broadcast_history(out_history,
st_opt_broadcast | st_opt_discard_reply,
NULL);
} else {
crm_trace("History-diff is empty - skip broadcast");
}
}
} else {
crm_trace("Skipping history-query-broadcast (%s%s)"
" we sent ourselves",
remote_peer?"remote-peer=":"local-ipc",
remote_peer?remote_peer:"");
}
} else {
/* plain history request */
crm_trace("Looking for operations on %s in %p", target,
stonith_remote_op_list);
*output = stonith_local_history(FALSE, target);
}
free_xml(out_history);
}
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
index 6cc854ae9b..673454679b 100644
--- a/daemons/fenced/fenced_remote.c
+++ b/daemons/fenced/fenced_remote.c
@@ -1,2592 +1,2596 @@
/*
* Copyright 2009-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/stat.h>
#include <unistd.h>
#include <sys/utsname.h>
#include <stdlib.h>
#include <errno.h>
#include <fcntl.h>
#include <ctype.h>
#include <regex.h>
#include <crm/crm.h>
#include <crm/common/ipc.h>
#include <crm/common/ipc_internal.h>
#include <crm/cluster/internal.h>
#include <crm/stonith-ng.h>
#include <crm/fencing/internal.h>
#include <crm/common/xml.h>
#include <crm/common/xml_internal.h>
#include <crm/common/util.h>
#include <pacemaker-fenced.h>
#define TIMEOUT_MULTIPLY_FACTOR 1.2
/* When one fencer queries its peers for devices able to handle a fencing
* request, each peer will reply with a list of such devices available to it.
* Each reply will be parsed into a peer_device_info_t, with each device's
* information kept in a device_properties_t.
*/
typedef struct device_properties_s {
/* Whether access to this device has been verified */
gboolean verified;
/* The remaining members are indexed by the operation's "phase" */
/* Whether this device has been executed in each phase */
gboolean executed[st_phase_max];
/* Whether this device is disallowed from executing in each phase */
gboolean disallowed[st_phase_max];
/* Action-specific timeout for each phase */
int custom_action_timeout[st_phase_max];
/* Action-specific maximum random delay for each phase */
int delay_max[st_phase_max];
/* Action-specific base delay for each phase */
int delay_base[st_phase_max];
/* Group of enum st_device_flags */
uint32_t device_support_flags;
} device_properties_t;
typedef struct {
/* Name of peer that sent this result */
char *host;
/* Only try peers for non-topology based operations once */
gboolean tried;
/* Number of entries in the devices table */
int ndevices;
/* Devices available to this host that are capable of fencing the target */
GHashTable *devices;
} peer_device_info_t;
GHashTable *stonith_remote_op_list = NULL;
extern xmlNode *stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data,
int call_options);
static void request_peer_fencing(remote_fencing_op_t *op,
peer_device_info_t *peer);
static void finalize_op(remote_fencing_op_t *op, xmlNode *data, bool dup);
static void report_timeout_period(remote_fencing_op_t * op, int op_timeout);
static int get_op_total_timeout(const remote_fencing_op_t *op,
const peer_device_info_t *chosen_peer);
static gint
sort_strings(gconstpointer a, gconstpointer b)
{
return strcmp(a, b);
}
static void
free_remote_query(gpointer data)
{
if (data != NULL) {
peer_device_info_t *peer = data;
g_hash_table_destroy(peer->devices);
free(peer->host);
free(peer);
}
}
void
free_stonith_remote_op_list(void)
{
if (stonith_remote_op_list != NULL) {
g_hash_table_destroy(stonith_remote_op_list);
stonith_remote_op_list = NULL;
}
}
struct peer_count_data {
const remote_fencing_op_t *op;
gboolean verified_only;
uint32_t support_action_only;
int count;
};
/*!
* \internal
* \brief Increment a counter if a device has not been executed yet
*
* \param[in] key Device ID (ignored)
* \param[in] value Device properties
* \param[in,out] user_data Peer count data
*/
static void
count_peer_device(gpointer key, gpointer value, gpointer user_data)
{
device_properties_t *props = (device_properties_t*)value;
struct peer_count_data *data = user_data;
if (!props->executed[data->op->phase]
&& (!data->verified_only || props->verified)
&& ((data->support_action_only == st_device_supports_none) || pcmk_is_set(props->device_support_flags, data->support_action_only))) {
++(data->count);
}
}
/*!
* \internal
* \brief Check the number of available devices in a peer's query results
*
* \param[in] op Operation that results are for
* \param[in] peer Peer to count
* \param[in] verified_only Whether to count only verified devices
* \param[in] support_action_only Whether to count only devices that support action
*
* \return Number of devices available to peer that were not already executed
*/
static int
count_peer_devices(const remote_fencing_op_t *op,
const peer_device_info_t *peer, gboolean verified_only, uint32_t support_on_action_only)
{
struct peer_count_data data;
data.op = op;
data.verified_only = verified_only;
data.support_action_only = support_on_action_only;
data.count = 0;
if (peer) {
g_hash_table_foreach(peer->devices, count_peer_device, &data);
}
return data.count;
}
/*!
* \internal
* \brief Search for a device in a query result
*
* \param[in] op Operation that result is for
* \param[in] peer Query result for a peer
* \param[in] device Device ID to search for
*
* \return Device properties if found, NULL otherwise
*/
static device_properties_t *
find_peer_device(const remote_fencing_op_t *op, const peer_device_info_t *peer,
const char *device, uint32_t support_action_only)
{
device_properties_t *props = g_hash_table_lookup(peer->devices, device);
if (props && support_action_only != st_device_supports_none && !pcmk_is_set(props->device_support_flags, support_action_only)) {
return NULL;
}
return (props && !props->executed[op->phase]
&& !props->disallowed[op->phase])? props : NULL;
}
/*!
* \internal
* \brief Find a device in a peer's device list and mark it as executed
*
* \param[in] op Operation that peer result is for
* \param[in,out] peer Peer with results to search
* \param[in] device ID of device to mark as done
* \param[in] verified_devices_only Only consider verified devices
*
* \return TRUE if device was found and marked, FALSE otherwise
*/
static gboolean
grab_peer_device(const remote_fencing_op_t *op, peer_device_info_t *peer,
const char *device, gboolean verified_devices_only)
{
device_properties_t *props = find_peer_device(op, peer, device,
fenced_support_flag(op->action));
if ((props == NULL) || (verified_devices_only && !props->verified)) {
return FALSE;
}
crm_trace("Removing %s from %s (%d remaining)",
device, peer->host, count_peer_devices(op, peer, FALSE, st_device_supports_none));
props->executed[op->phase] = TRUE;
return TRUE;
}
static void
clear_remote_op_timers(remote_fencing_op_t * op)
{
if (op->query_timer) {
g_source_remove(op->query_timer);
op->query_timer = 0;
}
if (op->op_timer_total) {
g_source_remove(op->op_timer_total);
op->op_timer_total = 0;
}
if (op->op_timer_one) {
g_source_remove(op->op_timer_one);
op->op_timer_one = 0;
}
}
static void
free_remote_op(gpointer data)
{
remote_fencing_op_t *op = data;
crm_log_xml_debug(op->request, "Destroying");
clear_remote_op_timers(op);
free(op->id);
free(op->action);
free(op->delegate);
free(op->target);
free(op->client_id);
free(op->client_name);
free(op->originator);
if (op->query_results) {
g_list_free_full(op->query_results, free_remote_query);
}
if (op->request) {
free_xml(op->request);
op->request = NULL;
}
if (op->devices_list) {
g_list_free_full(op->devices_list, free);
op->devices_list = NULL;
}
g_list_free_full(op->automatic_list, free);
g_list_free(op->duplicates);
pcmk__reset_result(&op->result);
free(op);
}
void
init_stonith_remote_op_hash_table(GHashTable **table)
{
if (*table == NULL) {
*table = pcmk__strkey_table(NULL, free_remote_op);
}
}
/*!
* \internal
* \brief Return an operation's originally requested action (before any remap)
*
* \param[in] op Operation to check
*
* \return Operation's original action
*/
static const char *
op_requested_action(const remote_fencing_op_t *op)
{
return ((op->phase > st_phase_requested)? PCMK_ACTION_REBOOT : op->action);
}
/*!
* \internal
* \brief Remap a "reboot" operation to the "off" phase
*
* \param[in,out] op Operation to remap
*/
static void
op_phase_off(remote_fencing_op_t *op)
{
crm_info("Remapping multiple-device reboot targeting %s to 'off' "
CRM_XS " id=%.8s", op->target, op->id);
op->phase = st_phase_off;
/* Happily, "off" and "on" are shorter than "reboot", so we can reuse the
* memory allocation at each phase.
*/
strcpy(op->action, PCMK_ACTION_OFF);
}
/*!
* \internal
* \brief Advance a remapped reboot operation to the "on" phase
*
* \param[in,out] op Operation to remap
*/
static void
op_phase_on(remote_fencing_op_t *op)
{
GList *iter = NULL;
crm_info("Remapped 'off' targeting %s complete, "
"remapping to 'on' for %s " CRM_XS " id=%.8s",
op->target, op->client_name, op->id);
op->phase = st_phase_on;
strcpy(op->action, PCMK_ACTION_ON);
/* Skip devices with automatic unfencing, because the cluster will handle it
* when the node rejoins.
*/
for (iter = op->automatic_list; iter != NULL; iter = iter->next) {
GList *match = g_list_find_custom(op->devices_list, iter->data,
sort_strings);
if (match) {
op->devices_list = g_list_remove(op->devices_list, match->data);
}
}
g_list_free_full(op->automatic_list, free);
op->automatic_list = NULL;
/* Rewind device list pointer */
op->devices = op->devices_list;
}
/*!
* \internal
* \brief Reset a remapped reboot operation
*
* \param[in,out] op Operation to reset
*/
static void
undo_op_remap(remote_fencing_op_t *op)
{
if (op->phase > 0) {
crm_info("Undoing remap of reboot targeting %s for %s "
CRM_XS " id=%.8s", op->target, op->client_name, op->id);
op->phase = st_phase_requested;
strcpy(op->action, PCMK_ACTION_REBOOT);
}
}
/*!
* \internal
* \brief Create notification data XML for a fencing operation result
*
* \param[in] op Fencer operation that completed
*
* \return Newly created XML to add as notification data
* \note The caller is responsible for freeing the result.
*/
static xmlNode *
fencing_result2xml(const remote_fencing_op_t *op)
{
xmlNode *notify_data = pcmk__xe_create(NULL, PCMK__XE_ST_NOTIFY_FENCE);
crm_xml_add_int(notify_data, PCMK_XA_STATE, op->state);
crm_xml_add(notify_data, PCMK__XA_ST_TARGET, op->target);
crm_xml_add(notify_data, PCMK__XA_ST_DEVICE_ACTION, op->action);
crm_xml_add(notify_data, PCMK__XA_ST_DELEGATE, op->delegate);
crm_xml_add(notify_data, PCMK__XA_ST_REMOTE_OP, op->id);
crm_xml_add(notify_data, PCMK__XA_ST_ORIGIN, op->originator);
crm_xml_add(notify_data, PCMK__XA_ST_CLIENTID, op->client_id);
crm_xml_add(notify_data, PCMK__XA_ST_CLIENTNAME, op->client_name);
return notify_data;
}
/*!
* \internal
* \brief Broadcast a fence result notification to all CPG peers
*
* \param[in] op Fencer operation that completed
* \param[in] op_merged Whether this operation is a duplicate of another
*/
void
fenced_broadcast_op_result(const remote_fencing_op_t *op, bool op_merged)
{
static int count = 0;
xmlNode *bcast = pcmk__xe_create(NULL, PCMK__XE_ST_REPLY);
xmlNode *wrapper = NULL;
xmlNode *notify_data = NULL;
count++;
crm_trace("Broadcasting result to peers");
crm_xml_add(bcast, PCMK__XA_T, PCMK__VALUE_ST_NOTIFY);
crm_xml_add(bcast, PCMK__XA_SUBT, PCMK__VALUE_BROADCAST);
crm_xml_add(bcast, PCMK__XA_ST_OP, STONITH_OP_NOTIFY);
crm_xml_add_int(bcast, PCMK_XA_COUNT, count);
if (op_merged) {
pcmk__xe_set_bool_attr(bcast, PCMK__XA_ST_OP_MERGED, true);
}
wrapper = pcmk__xe_create(bcast, PCMK__XE_ST_CALLDATA);
notify_data = fencing_result2xml(op);
stonith__xe_set_result(notify_data, &op->result);
xmlAddChild(wrapper, notify_data);
send_cluster_message(NULL, crm_msg_stonith_ng, bcast, FALSE);
free_xml(bcast);
return;
}
/*!
* \internal
* \brief Reply to a local request originator and notify all subscribed clients
*
* \param[in,out] op Fencer operation that completed
* \param[in,out] data Top-level XML to add notification to
*/
static void
handle_local_reply_and_notify(remote_fencing_op_t *op, xmlNode *data)
{
xmlNode *notify_data = NULL;
xmlNode *reply = NULL;
pcmk__client_t *client = NULL;
if (op->notify_sent == TRUE) {
/* nothing to do */
return;
}
/* Do notification with a clean data object */
crm_xml_add_int(data, PCMK_XA_STATE, op->state);
crm_xml_add(data, PCMK__XA_ST_TARGET, op->target);
crm_xml_add(data, PCMK__XA_ST_OP, op->action);
reply = fenced_construct_reply(op->request, data, &op->result);
crm_xml_add(reply, PCMK__XA_ST_DELEGATE, op->delegate);
/* Send fencing OP reply to local client that initiated fencing */
client = pcmk__find_client_by_id(op->client_id);
if (client == NULL) {
crm_trace("Skipping reply to %s: no longer a client", op->client_id);
} else {
do_local_reply(reply, client, op->call_options);
}
/* bcast to all local clients that the fencing operation happend */
notify_data = fencing_result2xml(op);
fenced_send_notification(PCMK__VALUE_ST_NOTIFY_FENCE, &op->result,
notify_data);
free_xml(notify_data);
fenced_send_notification(PCMK__VALUE_ST_NOTIFY_HISTORY, NULL, NULL);
/* mark this op as having notify's already sent */
op->notify_sent = TRUE;
free_xml(reply);
}
/*!
* \internal
* \brief Finalize all duplicates of a given fencer operation
*
* \param[in,out] op Fencer operation that completed
* \param[in,out] data Top-level XML to add notification to
*/
static void
finalize_op_duplicates(remote_fencing_op_t *op, xmlNode *data)
{
for (GList *iter = op->duplicates; iter != NULL; iter = iter->next) {
remote_fencing_op_t *other = iter->data;
if (other->state == st_duplicate) {
other->state = op->state;
crm_debug("Performing duplicate notification for %s@%s: %s "
CRM_XS " id=%.8s",
other->client_name, other->originator,
pcmk_exec_status_str(op->result.execution_status),
other->id);
pcmk__copy_result(&op->result, &other->result);
finalize_op(other, data, true);
} else {
// Possible if (for example) it timed out already
crm_err("Skipping duplicate notification for %s@%s "
CRM_XS " state=%s id=%.8s",
other->client_name, other->originator,
stonith_op_state_str(other->state), other->id);
}
}
}
static char *
delegate_from_xml(xmlNode *xml)
{
xmlNode *match = get_xpath_object("//@" PCMK__XA_ST_DELEGATE, xml,
LOG_NEVER);
if (match == NULL) {
return crm_element_value_copy(xml, PCMK__XA_SRC);
} else {
return crm_element_value_copy(match, PCMK__XA_ST_DELEGATE);
}
}
/*!
* \internal
* \brief Finalize a peer fencing operation
*
* Clean up after a fencing operation completes. This function has two code
* paths: the executioner uses it to broadcast the result to CPG peers, and then
* each peer (including the executioner) uses it to process that broadcast and
* notify its IPC clients of the result.
*
* \param[in,out] op Fencer operation that completed
* \param[in,out] data If not NULL, XML reply of last delegated operation
* \param[in] dup Whether this operation is a duplicate of another
* (in which case, do not broadcast the result)
*
* \note The operation result should be set before calling this function.
*/
static void
finalize_op(remote_fencing_op_t *op, xmlNode *data, bool dup)
{
int level = LOG_ERR;
const char *subt = NULL;
xmlNode *local_data = NULL;
gboolean op_merged = FALSE;
CRM_CHECK((op != NULL), return);
// This is a no-op if timers have already been cleared
clear_remote_op_timers(op);
if (op->notify_sent) {
// Most likely, this is a timed-out action that eventually completed
crm_notice("Operation '%s'%s%s by %s for %s@%s%s: "
"Result arrived too late " CRM_XS " id=%.8s",
op->action, (op->target? " targeting " : ""),
(op->target? op->target : ""),
(op->delegate? op->delegate : "unknown node"),
op->client_name, op->originator,
(op_merged? " (merged)" : ""),
op->id);
return;
}
set_fencing_completed(op);
undo_op_remap(op);
if (data == NULL) {
data = pcmk__xe_create(NULL, "remote-op");
local_data = data;
} else if (op->delegate == NULL) {
switch (op->result.execution_status) {
case PCMK_EXEC_NO_FENCE_DEVICE:
break;
case PCMK_EXEC_INVALID:
if (op->result.exit_status != CRM_EX_EXPIRED) {
op->delegate = delegate_from_xml(data);
}
break;
default:
op->delegate = delegate_from_xml(data);
break;
}
}
if (dup || (crm_element_value(data, PCMK__XA_ST_OP_MERGED) != NULL)) {
op_merged = true;
}
/* Tell everyone the operation is done, we will continue
* with doing the local notifications once we receive
* the broadcast back. */
subt = crm_element_value(data, PCMK__XA_SUBT);
if (!dup && !pcmk__str_eq(subt, PCMK__VALUE_BROADCAST, pcmk__str_none)) {
/* Defer notification until the bcast message arrives */
fenced_broadcast_op_result(op, op_merged);
free_xml(local_data);
return;
}
if (pcmk__result_ok(&op->result) || dup
|| !pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) {
level = LOG_NOTICE;
}
do_crm_log(level, "Operation '%s'%s%s by %s for %s@%s%s: %s (%s%s%s) "
CRM_XS " id=%.8s", op->action, (op->target? " targeting " : ""),
(op->target? op->target : ""),
(op->delegate? op->delegate : "unknown node"),
op->client_name, op->originator,
(op_merged? " (merged)" : ""),
crm_exit_str(op->result.exit_status),
pcmk_exec_status_str(op->result.execution_status),
((op->result.exit_reason == NULL)? "" : ": "),
((op->result.exit_reason == NULL)? "" : op->result.exit_reason),
op->id);
handle_local_reply_and_notify(op, data);
if (!dup) {
finalize_op_duplicates(op, data);
}
/* Free non-essential parts of the record
* Keep the record around so we can query the history
*/
if (op->query_results) {
g_list_free_full(op->query_results, free_remote_query);
op->query_results = NULL;
}
if (op->request) {
free_xml(op->request);
op->request = NULL;
}
free_xml(local_data);
}
/*!
* \internal
* \brief Finalize a watchdog fencer op after the waiting time expires
*
* \param[in,out] userdata Fencer operation that completed
*
* \return G_SOURCE_REMOVE (which tells glib not to restart timer)
*/
static gboolean
remote_op_watchdog_done(gpointer userdata)
{
remote_fencing_op_t *op = userdata;
op->op_timer_one = 0;
crm_notice("Self-fencing (%s) by %s for %s assumed complete "
CRM_XS " id=%.8s",
op->action, op->target, op->client_name, op->id);
op->state = st_done;
pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
finalize_op(op, NULL, false);
return G_SOURCE_REMOVE;
}
static gboolean
remote_op_timeout_one(gpointer userdata)
{
remote_fencing_op_t *op = userdata;
op->op_timer_one = 0;
crm_notice("Peer's '%s' action targeting %s for client %s timed out " CRM_XS
" id=%.8s", op->action, op->target, op->client_name, op->id);
pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT,
"Peer did not return fence result within timeout");
// The requested delay has been applied for the first device
if (op->client_delay > 0) {
op->client_delay = 0;
crm_trace("Try another device for '%s' action targeting %s "
"for client %s without delay " CRM_XS " id=%.8s",
op->action, op->target, op->client_name, op->id);
}
// Try another device, if appropriate
request_peer_fencing(op, NULL);
return G_SOURCE_REMOVE;
}
/*!
* \internal
* \brief Finalize a remote fencer operation that timed out
*
* \param[in,out] op Fencer operation that timed out
* \param[in] reason Readable description of what step timed out
*/
static void
finalize_timed_out_op(remote_fencing_op_t *op, const char *reason)
{
crm_debug("Action '%s' targeting %s for client %s timed out "
CRM_XS " id=%.8s",
op->action, op->target, op->client_name, op->id);
if (op->phase == st_phase_on) {
/* A remapped reboot operation timed out in the "on" phase, but the
* "off" phase completed successfully, so quit trying any further
* devices, and return success.
*/
op->state = st_done;
pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
} else {
op->state = st_failed;
pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT, reason);
}
finalize_op(op, NULL, false);
}
/*!
* \internal
* \brief Finalize a remote fencer operation that timed out
*
* \param[in,out] userdata Fencer operation that timed out
*
* \return G_SOURCE_REMOVE (which tells glib not to restart timer)
*/
static gboolean
remote_op_timeout(gpointer userdata)
{
remote_fencing_op_t *op = userdata;
op->op_timer_total = 0;
if (op->state == st_done) {
crm_debug("Action '%s' targeting %s for client %s already completed "
CRM_XS " id=%.8s",
op->action, op->target, op->client_name, op->id);
} else {
finalize_timed_out_op(userdata, "Fencing did not complete within a "
"total timeout based on the "
"configured timeout and retries for "
"any devices attempted");
}
return G_SOURCE_REMOVE;
}
static gboolean
remote_op_query_timeout(gpointer data)
{
remote_fencing_op_t *op = data;
op->query_timer = 0;
if (op->state == st_done) {
crm_debug("Operation %.8s targeting %s already completed",
op->id, op->target);
} else if (op->state == st_exec) {
crm_debug("Operation %.8s targeting %s already in progress",
op->id, op->target);
} else if (op->query_results) {
// Query succeeded, so attempt the actual fencing
crm_debug("Query %.8s targeting %s complete (state=%s)",
op->id, op->target, stonith_op_state_str(op->state));
request_peer_fencing(op, NULL);
} else {
crm_debug("Query %.8s targeting %s timed out (state=%s)",
op->id, op->target, stonith_op_state_str(op->state));
finalize_timed_out_op(op, "No capable peers replied to device query "
"within timeout");
}
return G_SOURCE_REMOVE;
}
static gboolean
topology_is_empty(stonith_topology_t *tp)
{
int i;
if (tp == NULL) {
return TRUE;
}
for (i = 0; i < ST_LEVEL_MAX; i++) {
if (tp->levels[i] != NULL) {
return FALSE;
}
}
return TRUE;
}
/*!
* \internal
* \brief Add a device to an operation's automatic unfencing list
*
* \param[in,out] op Operation to modify
* \param[in] device Device ID to add
*/
static void
add_required_device(remote_fencing_op_t *op, const char *device)
{
GList *match = g_list_find_custom(op->automatic_list, device,
sort_strings);
if (!match) {
op->automatic_list = g_list_prepend(op->automatic_list,
pcmk__str_copy(device));
}
}
/*!
* \internal
* \brief Remove a device from the automatic unfencing list
*
* \param[in,out] op Operation to modify
* \param[in] device Device ID to remove
*/
static void
remove_required_device(remote_fencing_op_t *op, const char *device)
{
GList *match = g_list_find_custom(op->automatic_list, device,
sort_strings);
if (match) {
op->automatic_list = g_list_remove(op->automatic_list, match->data);
}
}
/* deep copy the device list */
static void
set_op_device_list(remote_fencing_op_t * op, GList *devices)
{
GList *lpc = NULL;
if (op->devices_list) {
g_list_free_full(op->devices_list, free);
op->devices_list = NULL;
}
for (lpc = devices; lpc != NULL; lpc = lpc->next) {
const char *device = lpc->data;
op->devices_list = g_list_append(op->devices_list,
pcmk__str_copy(device));
}
op->devices = op->devices_list;
}
/*!
* \internal
* \brief Check whether a node matches a topology target
*
* \param[in] tp Topology table entry to check
* \param[in] node Name of node to check
*
* \return TRUE if node matches topology target
*/
static gboolean
topology_matches(const stonith_topology_t *tp, const char *node)
{
regex_t r_patt;
CRM_CHECK(node && tp && tp->target, return FALSE);
switch (tp->kind) {
case fenced_target_by_attribute:
/* This level targets by attribute, so tp->target is a NAME=VALUE pair
* of a permanent attribute applied to targeted nodes. The test below
* relies on the locally cached copy of the CIB, so if fencing needs to
* be done before the initial CIB is received or after a malformed CIB
* is received, then the topology will be unable to be used.
*/
if (node_has_attr(node, tp->target_attribute, tp->target_value)) {
crm_notice("Matched %s with %s by attribute", node, tp->target);
return TRUE;
}
break;
case fenced_target_by_pattern:
/* This level targets node names matching a pattern, so tp->target
* (and tp->target_pattern) is a regular expression.
*/
if (regcomp(&r_patt, tp->target_pattern, REG_EXTENDED|REG_NOSUB)) {
crm_info("Bad regex '%s' for fencing level", tp->target);
} else {
int status = regexec(&r_patt, node, 0, NULL, 0);
regfree(&r_patt);
if (status == 0) {
crm_notice("Matched %s with %s by name", node, tp->target);
return TRUE;
}
}
break;
case fenced_target_by_name:
crm_trace("Testing %s against %s", node, tp->target);
return pcmk__str_eq(tp->target, node, pcmk__str_casei);
default:
break;
}
crm_trace("No match for %s with %s", node, tp->target);
return FALSE;
}
stonith_topology_t *
find_topology_for_host(const char *host)
{
GHashTableIter tIter;
stonith_topology_t *tp = g_hash_table_lookup(topology, host);
if(tp != NULL) {
crm_trace("Found %s for %s in %d entries", tp->target, host, g_hash_table_size(topology));
return tp;
}
g_hash_table_iter_init(&tIter, topology);
while (g_hash_table_iter_next(&tIter, NULL, (gpointer *) & tp)) {
if (topology_matches(tp, host)) {
crm_trace("Found %s for %s in %d entries", tp->target, host, g_hash_table_size(topology));
return tp;
}
}
crm_trace("No matches for %s in %d topology entries", host, g_hash_table_size(topology));
return NULL;
}
/*!
* \internal
* \brief Set fencing operation's device list to target's next topology level
*
* \param[in,out] op Remote fencing operation to modify
* \param[in] empty_ok If true, an operation without a target (i.e.
* queries) or a target without a topology will get a
* pcmk_rc_ok return value instead of ENODEV
*
* \return Standard Pacemaker return value
*/
static int
advance_topology_level(remote_fencing_op_t *op, bool empty_ok)
{
stonith_topology_t *tp = NULL;
if (op->target) {
tp = find_topology_for_host(op->target);
}
if (topology_is_empty(tp)) {
return empty_ok? pcmk_rc_ok : ENODEV;
}
CRM_ASSERT(tp->levels != NULL);
stonith__set_call_options(op->call_options, op->id, st_opt_topology);
/* This is a new level, so undo any remapping left over from previous */
undo_op_remap(op);
do {
op->level++;
} while (op->level < ST_LEVEL_MAX && tp->levels[op->level] == NULL);
if (op->level < ST_LEVEL_MAX) {
crm_trace("Attempting fencing level %d targeting %s (%d devices) "
"for client %s@%s (id=%.8s)",
op->level, op->target, g_list_length(tp->levels[op->level]),
op->client_name, op->originator, op->id);
set_op_device_list(op, tp->levels[op->level]);
// The requested delay has been applied for the first fencing level
if ((op->level > 1) && (op->client_delay > 0)) {
op->client_delay = 0;
}
if ((g_list_next(op->devices_list) != NULL)
&& pcmk__str_eq(op->action, PCMK_ACTION_REBOOT, pcmk__str_none)) {
/* A reboot has been requested for a topology level with multiple
* devices. Instead of rebooting the devices sequentially, we will
* turn them all off, then turn them all on again. (Think about
* switched power outlets for redundant power supplies.)
*/
op_phase_off(op);
}
return pcmk_rc_ok;
}
crm_info("All %sfencing options targeting %s for client %s@%s failed "
CRM_XS " id=%.8s",
(stonith_watchdog_timeout_ms > 0)?"non-watchdog ":"",
op->target, op->client_name, op->originator, op->id);
return ENODEV;
}
/*!
* \internal
* \brief If fencing operation is a duplicate, merge it into the other one
*
* \param[in,out] op Fencing operation to check
*/
static void
merge_duplicates(remote_fencing_op_t *op)
{
GHashTableIter iter;
remote_fencing_op_t *other = NULL;
time_t now = time(NULL);
g_hash_table_iter_init(&iter, stonith_remote_op_list);
while (g_hash_table_iter_next(&iter, NULL, (void **)&other)) {
const char *other_action = op_requested_action(other);
+ crm_node_t *node = NULL;
if (!strcmp(op->id, other->id)) {
continue; // Don't compare against self
}
if (other->state > st_exec) {
crm_trace("%.8s not duplicate of %.8s: not in progress",
op->id, other->id);
continue;
}
if (!pcmk__str_eq(op->target, other->target, pcmk__str_casei)) {
crm_trace("%.8s not duplicate of %.8s: node %s vs. %s",
op->id, other->id, op->target, other->target);
continue;
}
if (!pcmk__str_eq(op->action, other_action, pcmk__str_none)) {
crm_trace("%.8s not duplicate of %.8s: action %s vs. %s",
op->id, other->id, op->action, other_action);
continue;
}
if (pcmk__str_eq(op->client_name, other->client_name, pcmk__str_casei)) {
crm_trace("%.8s not duplicate of %.8s: same client %s",
op->id, other->id, op->client_name);
continue;
}
if (pcmk__str_eq(other->target, other->originator, pcmk__str_casei)) {
crm_trace("%.8s not duplicate of %.8s: suicide for %s",
op->id, other->id, other->target);
continue;
}
- if (!fencing_peer_active(pcmk__get_node(0, other->originator, NULL,
- pcmk__node_search_cluster))) {
+
+ node = pcmk__get_node(0, other->originator, NULL,
+ pcmk__node_search_cluster_member);
+
+ if (!fencing_peer_active(node)) {
crm_notice("Failing action '%s' targeting %s originating from "
"client %s@%s: Originator is dead " CRM_XS " id=%.8s",
other->action, other->target, other->client_name,
other->originator, other->id);
crm_trace("%.8s not duplicate of %.8s: originator dead",
op->id, other->id);
other->state = st_failed;
continue;
}
if ((other->total_timeout > 0)
&& (now > (other->total_timeout + other->created))) {
crm_trace("%.8s not duplicate of %.8s: old (%lld vs. %lld + %ds)",
op->id, other->id, (long long)now, (long long)other->created,
other->total_timeout);
continue;
}
/* There is another in-flight request to fence the same host
* Piggyback on that instead. If it fails, so do we.
*/
other->duplicates = g_list_append(other->duplicates, op);
if (other->total_timeout == 0) {
other->total_timeout = op->total_timeout =
TIMEOUT_MULTIPLY_FACTOR * get_op_total_timeout(op, NULL);
crm_trace("Best guess as to timeout used for %.8s: %ds",
other->id, other->total_timeout);
}
crm_notice("Merging fencing action '%s' targeting %s originating from "
"client %s with identical request from %s@%s "
CRM_XS " original=%.8s duplicate=%.8s total_timeout=%ds",
op->action, op->target, op->client_name,
other->client_name, other->originator,
op->id, other->id, other->total_timeout);
report_timeout_period(op, other->total_timeout);
op->state = st_duplicate;
}
}
static uint32_t fencing_active_peers(void)
{
uint32_t count = 0;
crm_node_t *entry;
GHashTableIter gIter;
g_hash_table_iter_init(&gIter, crm_peer_cache);
while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
if(fencing_peer_active(entry)) {
count++;
}
}
return count;
}
/*!
* \internal
* \brief Process a manual confirmation of a pending fence action
*
* \param[in] client IPC client that sent confirmation
* \param[in,out] msg Request XML with manual confirmation
*
* \return Standard Pacemaker return code
*/
int
fenced_handle_manual_confirmation(const pcmk__client_t *client, xmlNode *msg)
{
remote_fencing_op_t *op = NULL;
xmlNode *dev = get_xpath_object("//@" PCMK__XA_ST_TARGET, msg, LOG_ERR);
CRM_CHECK(dev != NULL, return EPROTO);
crm_notice("Received manual confirmation that %s has been fenced",
pcmk__s(crm_element_value(dev, PCMK__XA_ST_TARGET),
"unknown target"));
op = initiate_remote_stonith_op(client, msg, TRUE);
if (op == NULL) {
return EPROTO;
}
op->state = st_done;
set_fencing_completed(op);
op->delegate = pcmk__str_copy("a human");
// For the fencer's purposes, the fencing operation is done
pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
finalize_op(op, msg, false);
/* For the requester's purposes, the operation is still pending. The
* actual result will be sent asynchronously via the operation's done_cb().
*/
return EINPROGRESS;
}
/*!
* \internal
* \brief Create a new remote stonith operation
*
* \param[in] client ID of local stonith client that initiated the operation
* \param[in] request The request from the client that started the operation
* \param[in] peer TRUE if this operation is owned by another stonith peer
* (an operation owned by one peer is stored on all peers,
* but only the owner executes it; all nodes get the results
* once the owner finishes execution)
*/
void *
create_remote_stonith_op(const char *client, xmlNode *request, gboolean peer)
{
remote_fencing_op_t *op = NULL;
xmlNode *dev = get_xpath_object("//@" PCMK__XA_ST_TARGET, request,
LOG_NEVER);
int call_options = 0;
const char *operation = NULL;
init_stonith_remote_op_hash_table(&stonith_remote_op_list);
/* If this operation is owned by another node, check to make
* sure we haven't already created this operation. */
if (peer && dev) {
const char *op_id = crm_element_value(dev, PCMK__XA_ST_REMOTE_OP);
CRM_CHECK(op_id != NULL, return NULL);
op = g_hash_table_lookup(stonith_remote_op_list, op_id);
if (op) {
crm_debug("Reusing existing remote fencing op %.8s for %s",
op_id, ((client == NULL)? "unknown client" : client));
return op;
}
}
op = pcmk__assert_alloc(1, sizeof(remote_fencing_op_t));
crm_element_value_int(request, PCMK__XA_ST_TIMEOUT, &(op->base_timeout));
// Value -1 means disable any static/random fencing delays
crm_element_value_int(request, PCMK__XA_ST_DELAY, &(op->client_delay));
if (peer && dev) {
op->id = crm_element_value_copy(dev, PCMK__XA_ST_REMOTE_OP);
} else {
op->id = crm_generate_uuid();
}
g_hash_table_replace(stonith_remote_op_list, op->id, op);
op->state = st_query;
op->replies_expected = fencing_active_peers();
op->action = crm_element_value_copy(dev, PCMK__XA_ST_DEVICE_ACTION);
/* The node initiating the stonith operation. If an operation is relayed,
* this is the last node the operation lands on. When in standalone mode,
* origin is the ID of the client that originated the operation.
*
* Or may be the name of the function that created the operation.
*/
op->originator = crm_element_value_copy(dev, PCMK__XA_ST_ORIGIN);
if (op->originator == NULL) {
/* Local or relayed request */
op->originator = pcmk__str_copy(stonith_our_uname);
}
// Delegate may not be set
op->delegate = crm_element_value_copy(dev, PCMK__XA_ST_DELEGATE);
op->created = time(NULL);
CRM_LOG_ASSERT(client != NULL);
op->client_id = pcmk__str_copy(client);
/* For a RELAY operation, set fenced on the client. */
operation = crm_element_value(request, PCMK__XA_ST_OP);
if (pcmk__str_eq(operation, STONITH_OP_RELAY, pcmk__str_none)) {
op->client_name = crm_strdup_printf("%s.%lu", crm_system_name,
(unsigned long) getpid());
} else {
op->client_name = crm_element_value_copy(request,
PCMK__XA_ST_CLIENTNAME);
}
op->target = crm_element_value_copy(dev, PCMK__XA_ST_TARGET);
// @TODO Figure out how to avoid copying XML here
op->request = pcmk__xml_copy(NULL, request);
crm_element_value_int(request, PCMK__XA_ST_CALLOPT, &call_options);
op->call_options = call_options;
crm_element_value_int(request, PCMK__XA_ST_CALLID, &(op->client_callid));
crm_trace("%s new fencing op %s ('%s' targeting %s for client %s, "
"base timeout %ds, %u %s expected)",
(peer && dev)? "Recorded" : "Generated", op->id, op->action,
op->target, op->client_name, op->base_timeout,
op->replies_expected,
pcmk__plural_alt(op->replies_expected, "reply", "replies"));
if (op->call_options & st_opt_cs_nodeid) {
int nodeid;
crm_node_t *node;
pcmk__scan_min_int(op->target, &nodeid, 0);
node = pcmk__search_node_caches(nodeid, NULL,
pcmk__node_search_any
- |pcmk__node_search_known);
+ |pcmk__node_search_cluster_cib);
/* Ensure the conversion only happens once */
stonith__clear_call_options(op->call_options, op->id, st_opt_cs_nodeid);
if (node && node->uname) {
pcmk__str_update(&(op->target), node->uname);
} else {
crm_warn("Could not expand nodeid '%s' into a host name", op->target);
}
}
/* check to see if this is a duplicate operation of another in-flight operation */
merge_duplicates(op);
if (op->state != st_duplicate) {
/* kick history readers */
fenced_send_notification(PCMK__VALUE_ST_NOTIFY_HISTORY, NULL, NULL);
}
/* safe to trim as long as that doesn't touch pending ops */
stonith_fence_history_trim();
return op;
}
/*!
* \internal
* \brief Create a peer fencing operation from a request, and initiate it
*
* \param[in] client IPC client that made request (NULL to get from request)
* \param[in] request Request XML
* \param[in] manual_ack Whether this is a manual action confirmation
*
* \return Newly created operation on success, otherwise NULL
*/
remote_fencing_op_t *
initiate_remote_stonith_op(const pcmk__client_t *client, xmlNode *request,
gboolean manual_ack)
{
int query_timeout = 0;
xmlNode *query = NULL;
const char *client_id = NULL;
remote_fencing_op_t *op = NULL;
const char *relay_op_id = NULL;
const char *operation = NULL;
if (client) {
client_id = client->id;
} else {
client_id = crm_element_value(request, PCMK__XA_ST_CLIENTID);
}
CRM_LOG_ASSERT(client_id != NULL);
op = create_remote_stonith_op(client_id, request, FALSE);
op->owner = TRUE;
if (manual_ack) {
return op;
}
CRM_CHECK(op->action, return NULL);
if (advance_topology_level(op, true) != pcmk_rc_ok) {
op->state = st_failed;
}
switch (op->state) {
case st_failed:
// advance_topology_level() exhausted levels
pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_ERROR,
"All topology levels failed");
crm_warn("Could not request peer fencing (%s) targeting %s "
CRM_XS " id=%.8s", op->action, op->target, op->id);
finalize_op(op, NULL, false);
return op;
case st_duplicate:
crm_info("Requesting peer fencing (%s) targeting %s (duplicate) "
CRM_XS " id=%.8s", op->action, op->target, op->id);
return op;
default:
crm_notice("Requesting peer fencing (%s) targeting %s "
CRM_XS " id=%.8s state=%s base_timeout=%ds",
op->action, op->target, op->id,
stonith_op_state_str(op->state), op->base_timeout);
}
query = stonith_create_op(op->client_callid, op->id, STONITH_OP_QUERY,
NULL, op->call_options);
crm_xml_add(query, PCMK__XA_ST_REMOTE_OP, op->id);
crm_xml_add(query, PCMK__XA_ST_TARGET, op->target);
crm_xml_add(query, PCMK__XA_ST_DEVICE_ACTION, op_requested_action(op));
crm_xml_add(query, PCMK__XA_ST_ORIGIN, op->originator);
crm_xml_add(query, PCMK__XA_ST_CLIENTID, op->client_id);
crm_xml_add(query, PCMK__XA_ST_CLIENTNAME, op->client_name);
crm_xml_add_int(query, PCMK__XA_ST_TIMEOUT, op->base_timeout);
/* In case of RELAY operation, RELAY information is added to the query to delete the original operation of RELAY. */
operation = crm_element_value(request, PCMK__XA_ST_OP);
if (pcmk__str_eq(operation, STONITH_OP_RELAY, pcmk__str_none)) {
relay_op_id = crm_element_value(request, PCMK__XA_ST_REMOTE_OP);
if (relay_op_id) {
crm_xml_add(query, PCMK__XA_ST_REMOTE_OP_RELAY, relay_op_id);
}
}
send_cluster_message(NULL, crm_msg_stonith_ng, query, FALSE);
free_xml(query);
query_timeout = op->base_timeout * TIMEOUT_MULTIPLY_FACTOR;
op->query_timer = g_timeout_add((1000 * query_timeout), remote_op_query_timeout, op);
return op;
}
enum find_best_peer_options {
/*! Skip checking the target peer for capable fencing devices */
FIND_PEER_SKIP_TARGET = 0x0001,
/*! Only check the target peer for capable fencing devices */
FIND_PEER_TARGET_ONLY = 0x0002,
/*! Skip peers and devices that are not verified */
FIND_PEER_VERIFIED_ONLY = 0x0004,
};
static bool
is_watchdog_fencing(const remote_fencing_op_t *op, const char *device)
{
return (stonith_watchdog_timeout_ms > 0
// Only an explicit mismatch is considered not a watchdog fencing.
&& pcmk__str_eq(device, STONITH_WATCHDOG_ID, pcmk__str_null_matches)
&& pcmk__is_fencing_action(op->action)
&& node_does_watchdog_fencing(op->target));
}
static peer_device_info_t *
find_best_peer(const char *device, remote_fencing_op_t * op, enum find_best_peer_options options)
{
GList *iter = NULL;
gboolean verified_devices_only = (options & FIND_PEER_VERIFIED_ONLY) ? TRUE : FALSE;
if (!device && pcmk_is_set(op->call_options, st_opt_topology)) {
return NULL;
}
for (iter = op->query_results; iter != NULL; iter = iter->next) {
peer_device_info_t *peer = iter->data;
crm_trace("Testing result from %s targeting %s with %d device%s: %d %x",
peer->host, op->target, peer->ndevices,
pcmk__plural_s(peer->ndevices), peer->tried, options);
if ((options & FIND_PEER_SKIP_TARGET) && pcmk__str_eq(peer->host, op->target, pcmk__str_casei)) {
continue;
}
if ((options & FIND_PEER_TARGET_ONLY) && !pcmk__str_eq(peer->host, op->target, pcmk__str_casei)) {
continue;
}
if (pcmk_is_set(op->call_options, st_opt_topology)) {
if (grab_peer_device(op, peer, device, verified_devices_only)) {
return peer;
}
} else if (!peer->tried
&& count_peer_devices(op, peer, verified_devices_only,
fenced_support_flag(op->action))) {
/* No topology: Use the current best peer */
crm_trace("Simple fencing");
return peer;
}
}
return NULL;
}
static peer_device_info_t *
stonith_choose_peer(remote_fencing_op_t * op)
{
const char *device = NULL;
peer_device_info_t *peer = NULL;
uint32_t active = fencing_active_peers();
do {
if (op->devices) {
device = op->devices->data;
crm_trace("Checking for someone to fence (%s) %s using %s",
op->action, op->target, device);
} else {
crm_trace("Checking for someone to fence (%s) %s",
op->action, op->target);
}
/* Best choice is a peer other than the target with verified access */
peer = find_best_peer(device, op, FIND_PEER_SKIP_TARGET|FIND_PEER_VERIFIED_ONLY);
if (peer) {
crm_trace("Found verified peer %s for %s", peer->host, device?device:"<any>");
return peer;
}
if(op->query_timer != 0 && op->replies < QB_MIN(op->replies_expected, active)) {
crm_trace("Waiting before looking for unverified devices to fence %s", op->target);
return NULL;
}
/* If no other peer has verified access, next best is unverified access */
peer = find_best_peer(device, op, FIND_PEER_SKIP_TARGET);
if (peer) {
crm_trace("Found best unverified peer %s", peer->host);
return peer;
}
/* If no other peer can do it, last option is self-fencing
* (which is never allowed for the "on" phase of a remapped reboot)
*/
if (op->phase != st_phase_on) {
peer = find_best_peer(device, op, FIND_PEER_TARGET_ONLY);
if (peer) {
crm_trace("%s will fence itself", peer->host);
return peer;
}
}
/* Try the next fencing level if there is one (unless we're in the "on"
* phase of a remapped "reboot", because we ignore errors in that case)
*/
} while ((op->phase != st_phase_on)
&& pcmk_is_set(op->call_options, st_opt_topology)
&& (advance_topology_level(op, false) == pcmk_rc_ok));
/* With a simple watchdog fencing configuration without a topology,
* "device" is NULL here. Consider it should be done with watchdog fencing.
*/
if (is_watchdog_fencing(op, device)) {
crm_info("Couldn't contact watchdog-fencing target-node (%s)",
op->target);
/* check_watchdog_fencing_and_wait will log additional info */
} else {
crm_notice("Couldn't find anyone to fence (%s) %s using %s",
op->action, op->target, (device? device : "any device"));
}
return NULL;
}
static int
valid_fencing_timeout(int specified_timeout, bool action_specific,
const remote_fencing_op_t *op, const char *device)
{
int timeout = specified_timeout;
if (!is_watchdog_fencing(op, device)) {
return timeout;
}
timeout = (int) QB_MIN(QB_MAX(specified_timeout,
stonith_watchdog_timeout_ms / 1000), INT_MAX);
if (timeout > specified_timeout) {
if (action_specific) {
crm_warn("pcmk_%s_timeout %ds for %s is too short (must be >= "
PCMK_OPT_STONITH_WATCHDOG_TIMEOUT " %ds), using %ds "
"instead",
op->action, specified_timeout, device? device : "watchdog",
timeout, timeout);
} else {
crm_warn("Fencing timeout %ds is too short (must be >= "
PCMK_OPT_STONITH_WATCHDOG_TIMEOUT " %ds), using %ds "
"instead",
specified_timeout, timeout, timeout);
}
}
return timeout;
}
static int
get_device_timeout(const remote_fencing_op_t *op,
const peer_device_info_t *peer, const char *device,
bool with_delay)
{
int timeout = op->base_timeout;
device_properties_t *props;
timeout = valid_fencing_timeout(op->base_timeout, false, op, device);
if (!peer || !device) {
return timeout;
}
props = g_hash_table_lookup(peer->devices, device);
if (!props) {
return timeout;
}
if (props->custom_action_timeout[op->phase]) {
timeout = valid_fencing_timeout(props->custom_action_timeout[op->phase],
true, op, device);
}
// op->client_delay < 0 means disable any static/random fencing delays
if (with_delay && (op->client_delay >= 0)) {
// delay_base is eventually limited by delay_max
timeout += (props->delay_max[op->phase] > 0 ?
props->delay_max[op->phase] : props->delay_base[op->phase]);
}
return timeout;
}
struct timeout_data {
const remote_fencing_op_t *op;
const peer_device_info_t *peer;
int total_timeout;
};
/*!
* \internal
* \brief Add timeout to a total if device has not been executed yet
*
* \param[in] key GHashTable key (device ID)
* \param[in] value GHashTable value (device properties)
* \param[in,out] user_data Timeout data
*/
static void
add_device_timeout(gpointer key, gpointer value, gpointer user_data)
{
const char *device_id = key;
device_properties_t *props = value;
struct timeout_data *timeout = user_data;
if (!props->executed[timeout->op->phase]
&& !props->disallowed[timeout->op->phase]) {
timeout->total_timeout += get_device_timeout(timeout->op, timeout->peer,
device_id, true);
}
}
static int
get_peer_timeout(const remote_fencing_op_t *op, const peer_device_info_t *peer)
{
struct timeout_data timeout;
timeout.op = op;
timeout.peer = peer;
timeout.total_timeout = 0;
g_hash_table_foreach(peer->devices, add_device_timeout, &timeout);
return (timeout.total_timeout? timeout.total_timeout : op->base_timeout);
}
static int
get_op_total_timeout(const remote_fencing_op_t *op,
const peer_device_info_t *chosen_peer)
{
long long total_timeout = 0;
stonith_topology_t *tp = find_topology_for_host(op->target);
if (pcmk_is_set(op->call_options, st_opt_topology) && tp) {
int i;
GList *device_list = NULL;
GList *iter = NULL;
GList *auto_list = NULL;
if (pcmk__str_eq(op->action, PCMK_ACTION_ON, pcmk__str_none)
&& (op->automatic_list != NULL)) {
auto_list = g_list_copy(op->automatic_list);
}
/* Yep, this looks scary, nested loops all over the place.
* Here is what is going on.
* Loop1: Iterate through fencing levels.
* Loop2: If a fencing level has devices, loop through each device
* Loop3: For each device in a fencing level, see what peer owns it
* and what that peer has reported the timeout is for the device.
*/
for (i = 0; i < ST_LEVEL_MAX; i++) {
if (!tp->levels[i]) {
continue;
}
for (device_list = tp->levels[i]; device_list; device_list = device_list->next) {
bool found = false;
for (iter = op->query_results; iter != NULL; iter = iter->next) {
const peer_device_info_t *peer = iter->data;
if (auto_list) {
GList *match = g_list_find_custom(auto_list, device_list->data,
sort_strings);
if (match) {
auto_list = g_list_remove(auto_list, match->data);
}
}
if (find_peer_device(op, peer, device_list->data,
fenced_support_flag(op->action))) {
total_timeout += get_device_timeout(op, peer,
device_list->data,
true);
found = true;
break;
}
} /* End Loop3: match device with peer that owns device, find device's timeout period */
/* in case of watchdog-device we add the timeout to the budget
if didn't get a reply
*/
if (!found && is_watchdog_fencing(op, device_list->data)) {
total_timeout += stonith_watchdog_timeout_ms / 1000;
}
} /* End Loop2: iterate through devices at a specific level */
} /*End Loop1: iterate through fencing levels */
//Add only exists automatic_list device timeout
if (auto_list) {
for (iter = auto_list; iter != NULL; iter = iter->next) {
GList *iter2 = NULL;
for (iter2 = op->query_results; iter2 != NULL; iter = iter2->next) {
peer_device_info_t *peer = iter2->data;
if (find_peer_device(op, peer, iter->data, st_device_supports_on)) {
total_timeout += get_device_timeout(op, peer,
iter->data, true);
break;
}
}
}
}
g_list_free(auto_list);
} else if (chosen_peer) {
total_timeout = get_peer_timeout(op, chosen_peer);
} else {
total_timeout = valid_fencing_timeout(op->base_timeout, false, op,
NULL);
}
if (total_timeout <= 0) {
total_timeout = op->base_timeout;
}
/* Take any requested fencing delay into account to prevent it from eating
* up the total timeout.
*/
if (op->client_delay > 0) {
total_timeout += op->client_delay;
}
return (int) QB_MIN(total_timeout, INT_MAX);
}
static void
report_timeout_period(remote_fencing_op_t * op, int op_timeout)
{
GList *iter = NULL;
xmlNode *update = NULL;
const char *client_node = NULL;
const char *client_id = NULL;
const char *call_id = NULL;
if (op->call_options & st_opt_sync_call) {
/* There is no reason to report the timeout for a synchronous call. It
* is impossible to use the reported timeout to do anything when the client
* is blocking for the response. This update is only important for
* async calls that require a callback to report the results in. */
return;
} else if (!op->request) {
return;
}
crm_trace("Reporting timeout for %s (id=%.8s)", op->client_name, op->id);
client_node = crm_element_value(op->request, PCMK__XA_ST_CLIENTNODE);
call_id = crm_element_value(op->request, PCMK__XA_ST_CALLID);
client_id = crm_element_value(op->request, PCMK__XA_ST_CLIENTID);
if (!client_node || !call_id || !client_id) {
return;
}
if (pcmk__str_eq(client_node, stonith_our_uname, pcmk__str_casei)) {
// Client is connected to this node, so send update directly to them
do_stonith_async_timeout_update(client_id, call_id, op_timeout);
return;
}
/* The client is connected to another node, relay this update to them */
update = stonith_create_op(op->client_callid, op->id, STONITH_OP_TIMEOUT_UPDATE, NULL, 0);
crm_xml_add(update, PCMK__XA_ST_REMOTE_OP, op->id);
crm_xml_add(update, PCMK__XA_ST_CLIENTID, client_id);
crm_xml_add(update, PCMK__XA_ST_CALLID, call_id);
crm_xml_add_int(update, PCMK__XA_ST_TIMEOUT, op_timeout);
send_cluster_message(pcmk__get_node(0, client_node, NULL,
- pcmk__node_search_cluster),
+ pcmk__node_search_cluster_member),
crm_msg_stonith_ng, update, FALSE);
free_xml(update);
for (iter = op->duplicates; iter != NULL; iter = iter->next) {
remote_fencing_op_t *dup = iter->data;
crm_trace("Reporting timeout for duplicate %.8s to client %s",
dup->id, dup->client_name);
report_timeout_period(iter->data, op_timeout);
}
}
/*!
* \internal
* \brief Advance an operation to the next device in its topology
*
* \param[in,out] op Fencer operation to advance
* \param[in] device ID of device that just completed
* \param[in,out] msg If not NULL, XML reply of last delegated operation
*/
static void
advance_topology_device_in_level(remote_fencing_op_t *op, const char *device,
xmlNode *msg)
{
/* Advance to the next device at this topology level, if any */
if (op->devices) {
op->devices = op->devices->next;
}
/* Handle automatic unfencing if an "on" action was requested */
if ((op->phase == st_phase_requested)
&& pcmk__str_eq(op->action, PCMK_ACTION_ON, pcmk__str_none)) {
/* If the device we just executed was required, it's not anymore */
remove_required_device(op, device);
/* If there are no more devices at this topology level, run through any
* remaining devices with automatic unfencing
*/
if (op->devices == NULL) {
op->devices = op->automatic_list;
}
}
if ((op->devices == NULL) && (op->phase == st_phase_off)) {
/* We're done with this level and with required devices, but we had
* remapped "reboot" to "off", so start over with "on". If any devices
* need to be turned back on, op->devices will be non-NULL after this.
*/
op_phase_on(op);
}
// This function is only called if the previous device succeeded
pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
if (op->devices) {
/* Necessary devices remain, so execute the next one */
crm_trace("Next targeting %s on behalf of %s@%s",
op->target, op->client_name, op->originator);
// The requested delay has been applied for the first device
if (op->client_delay > 0) {
op->client_delay = 0;
}
request_peer_fencing(op, NULL);
} else {
/* We're done with all devices and phases, so finalize operation */
crm_trace("Marking complex fencing op targeting %s as complete",
op->target);
op->state = st_done;
finalize_op(op, msg, false);
}
}
static gboolean
check_watchdog_fencing_and_wait(remote_fencing_op_t * op)
{
if (node_does_watchdog_fencing(op->target)) {
guint timeout_ms = QB_MIN(stonith_watchdog_timeout_ms, UINT_MAX);
crm_notice("Waiting %s for %s to self-fence (%s) for "
"client %s " CRM_XS " id=%.8s",
pcmk__readable_interval(timeout_ms), op->target, op->action,
op->client_name, op->id);
if (op->op_timer_one) {
g_source_remove(op->op_timer_one);
}
op->op_timer_one = g_timeout_add(timeout_ms, remote_op_watchdog_done,
op);
return TRUE;
} else {
crm_debug("Skipping fallback to watchdog-fencing as %s is "
"not in host-list", op->target);
}
return FALSE;
}
/*!
* \internal
* \brief Ask a peer to execute a fencing operation
*
* \param[in,out] op Fencing operation to be executed
* \param[in,out] peer If NULL or topology is in use, choose best peer to
* execute the fencing, otherwise use this peer
*/
static void
request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer)
{
const char *device = NULL;
int timeout;
CRM_CHECK(op != NULL, return);
crm_trace("Action %.8s targeting %s for %s is %s",
op->id, op->target, op->client_name,
stonith_op_state_str(op->state));
if ((op->phase == st_phase_on) && (op->devices != NULL)) {
/* We are in the "on" phase of a remapped topology reboot. If this
* device has pcmk_reboot_action="off", or doesn't support the "on"
* action, skip it.
*
* We can't check device properties at this point because we haven't
* chosen a peer for this stage yet. Instead, we check the local node's
* knowledge about the device. If different versions of the fence agent
* are installed on different nodes, there's a chance this could be
* mistaken, but the worst that could happen is we don't try turning the
* node back on when we should.
*/
device = op->devices->data;
if (pcmk__str_eq(fenced_device_reboot_action(device), PCMK_ACTION_OFF,
pcmk__str_none)) {
crm_info("Not turning %s back on using %s because the device is "
"configured to stay off (pcmk_reboot_action='off')",
op->target, device);
advance_topology_device_in_level(op, device, NULL);
return;
}
if (!fenced_device_supports_on(device)) {
crm_info("Not turning %s back on using %s because the agent "
"doesn't support 'on'", op->target, device);
advance_topology_device_in_level(op, device, NULL);
return;
}
}
timeout = op->base_timeout;
if ((peer == NULL) && !pcmk_is_set(op->call_options, st_opt_topology)) {
peer = stonith_choose_peer(op);
}
if (!op->op_timer_total) {
op->total_timeout = TIMEOUT_MULTIPLY_FACTOR * get_op_total_timeout(op, peer);
op->op_timer_total = g_timeout_add(1000 * op->total_timeout, remote_op_timeout, op);
report_timeout_period(op, op->total_timeout);
crm_info("Total timeout set to %ds for peer's fencing targeting %s for %s"
CRM_XS "id=%.8s",
op->total_timeout, op->target, op->client_name, op->id);
}
if (pcmk_is_set(op->call_options, st_opt_topology) && op->devices) {
/* Ignore the caller's peer preference if topology is in use, because
* that peer might not have access to the required device. With
* topology, stonith_choose_peer() removes the device from further
* consideration, so the timeout must be calculated beforehand.
*
* @TODO Basing the total timeout on the caller's preferred peer (above)
* is less than ideal.
*/
peer = stonith_choose_peer(op);
device = op->devices->data;
/* Fencing timeout sent to peer takes no delay into account.
* The peer will add a dedicated timer for any delay upon
* schedule_stonith_command().
*/
timeout = get_device_timeout(op, peer, device, false);
}
if (peer) {
int timeout_one = 0;
xmlNode *remote_op = stonith_create_op(op->client_callid, op->id, STONITH_OP_FENCE, NULL, 0);
if (op->client_delay > 0) {
/* Take requested fencing delay into account to prevent it from
* eating up the timeout.
*/
timeout_one = TIMEOUT_MULTIPLY_FACTOR * op->client_delay;
}
crm_xml_add(remote_op, PCMK__XA_ST_REMOTE_OP, op->id);
crm_xml_add(remote_op, PCMK__XA_ST_TARGET, op->target);
crm_xml_add(remote_op, PCMK__XA_ST_DEVICE_ACTION, op->action);
crm_xml_add(remote_op, PCMK__XA_ST_ORIGIN, op->originator);
crm_xml_add(remote_op, PCMK__XA_ST_CLIENTID, op->client_id);
crm_xml_add(remote_op, PCMK__XA_ST_CLIENTNAME, op->client_name);
crm_xml_add_int(remote_op, PCMK__XA_ST_TIMEOUT, timeout);
crm_xml_add_int(remote_op, PCMK__XA_ST_CALLOPT, op->call_options);
crm_xml_add_int(remote_op, PCMK__XA_ST_DELAY, op->client_delay);
if (device) {
timeout_one += TIMEOUT_MULTIPLY_FACTOR *
get_device_timeout(op, peer, device, true);
crm_notice("Requesting that %s perform '%s' action targeting %s "
"using %s " CRM_XS " for client %s (%ds)",
peer->host, op->action, op->target, device,
op->client_name, timeout_one);
crm_xml_add(remote_op, PCMK__XA_ST_DEVICE_ID, device);
} else {
timeout_one += TIMEOUT_MULTIPLY_FACTOR * get_peer_timeout(op, peer);
crm_notice("Requesting that %s perform '%s' action targeting %s "
CRM_XS " for client %s (%ds, %s)",
peer->host, op->action, op->target, op->client_name,
timeout_one,
pcmk__readable_interval(stonith_watchdog_timeout_ms));
}
op->state = st_exec;
if (op->op_timer_one) {
g_source_remove(op->op_timer_one);
op->op_timer_one = 0;
}
if (!is_watchdog_fencing(op, device)
|| !check_watchdog_fencing_and_wait(op)) {
/* Some thoughts about self-fencing cases reaching this point:
- Actually check in check_watchdog_fencing_and_wait
shouldn't fail if STONITH_WATCHDOG_ID is
chosen as fencing-device and it being present implies
watchdog-fencing is enabled anyway
- If watchdog-fencing is disabled either in general or for
a specific target - detected in check_watchdog_fencing_and_wait -
for some other kind of self-fencing we can't expect
a success answer but timeout is fine if the node doesn't
come back in between
- Delicate might be the case where we have watchdog-fencing
enabled for a node but the watchdog-fencing-device isn't
explicitly chosen for suicide. Local pe-execution in sbd
may detect the node as unclean and lead to timely suicide.
Otherwise the selection of PCMK_OPT_STONITH_WATCHDOG_TIMEOUT
at least is questionable.
*/
/* coming here we're not waiting for watchdog timeout -
thus engage timer with timout evaluated before */
op->op_timer_one = g_timeout_add((1000 * timeout_one), remote_op_timeout_one, op);
}
send_cluster_message(pcmk__get_node(0, peer->host, NULL,
- pcmk__node_search_cluster),
+ pcmk__node_search_cluster_member),
crm_msg_stonith_ng, remote_op, FALSE);
peer->tried = TRUE;
free_xml(remote_op);
return;
} else if (op->phase == st_phase_on) {
/* A remapped "on" cannot be executed, but the node was already
* turned off successfully, so ignore the error and continue.
*/
crm_warn("Ignoring %s 'on' failure (no capable peers) targeting %s "
"after successful 'off'", device, op->target);
advance_topology_device_in_level(op, device, NULL);
return;
} else if (op->owner == FALSE) {
crm_err("Fencing (%s) targeting %s for client %s is not ours to control",
op->action, op->target, op->client_name);
} else if (op->query_timer == 0) {
/* We've exhausted all available peers */
crm_info("No remaining peers capable of fencing (%s) %s for client %s "
CRM_XS " state=%s", op->action, op->target, op->client_name,
stonith_op_state_str(op->state));
CRM_CHECK(op->state < st_done, return);
finalize_timed_out_op(op, "All nodes failed, or are unable, to "
"fence target");
} else if(op->replies >= op->replies_expected || op->replies >= fencing_active_peers()) {
/* if the operation never left the query state,
* but we have all the expected replies, then no devices
* are available to execute the fencing operation. */
if (is_watchdog_fencing(op, device)
&& check_watchdog_fencing_and_wait(op)) {
/* Consider a watchdog fencing targeting an offline node executing
* once it starts waiting for the target to self-fence. So that when
* the query timer pops, remote_op_query_timeout() considers the
* fencing already in progress.
*/
op->state = st_exec;
return;
}
if (op->state == st_query) {
crm_info("No peers (out of %d) have devices capable of fencing "
"(%s) %s for client %s " CRM_XS " state=%s",
op->replies, op->action, op->target, op->client_name,
stonith_op_state_str(op->state));
pcmk__reset_result(&op->result);
pcmk__set_result(&op->result, CRM_EX_ERROR,
PCMK_EXEC_NO_FENCE_DEVICE, NULL);
} else {
if (pcmk_is_set(op->call_options, st_opt_topology)) {
pcmk__reset_result(&op->result);
pcmk__set_result(&op->result, CRM_EX_ERROR,
PCMK_EXEC_NO_FENCE_DEVICE, NULL);
}
/* ... else use existing result from previous failed attempt
* (topology is not in use, and no devices remain to be attempted).
* Overwriting the result with PCMK_EXEC_NO_FENCE_DEVICE would
* prevent finalize_op() from setting the correct delegate if
* needed.
*/
crm_info("No peers (out of %d) are capable of fencing (%s) %s "
"for client %s " CRM_XS " state=%s",
op->replies, op->action, op->target, op->client_name,
stonith_op_state_str(op->state));
}
op->state = st_failed;
finalize_op(op, NULL, false);
} else {
crm_info("Waiting for additional peers capable of fencing (%s) %s%s%s "
"for client %s " CRM_XS " id=%.8s",
op->action, op->target, (device? " using " : ""),
(device? device : ""), op->client_name, op->id);
}
}
/*!
* \internal
* \brief Comparison function for sorting query results
*
* \param[in] a GList item to compare
* \param[in] b GList item to compare
*
* \return Per the glib documentation, "a negative integer if the first value
* comes before the second, 0 if they are equal, or a positive integer
* if the first value comes after the second."
*/
static gint
sort_peers(gconstpointer a, gconstpointer b)
{
const peer_device_info_t *peer_a = a;
const peer_device_info_t *peer_b = b;
return (peer_b->ndevices - peer_a->ndevices);
}
/*!
* \internal
* \brief Determine if all the devices in the topology are found or not
*
* \param[in] op Fencing operation with topology to check
*/
static gboolean
all_topology_devices_found(const remote_fencing_op_t *op)
{
GList *device = NULL;
GList *iter = NULL;
device_properties_t *match = NULL;
stonith_topology_t *tp = NULL;
gboolean skip_target = FALSE;
int i;
tp = find_topology_for_host(op->target);
if (!tp) {
return FALSE;
}
if (pcmk__is_fencing_action(op->action)) {
/* Don't count the devices on the target node if we are killing
* the target node. */
skip_target = TRUE;
}
for (i = 0; i < ST_LEVEL_MAX; i++) {
for (device = tp->levels[i]; device; device = device->next) {
match = NULL;
for (iter = op->query_results; iter && !match; iter = iter->next) {
peer_device_info_t *peer = iter->data;
if (skip_target && pcmk__str_eq(peer->host, op->target, pcmk__str_casei)) {
continue;
}
match = find_peer_device(op, peer, device->data, st_device_supports_none);
}
if (!match) {
return FALSE;
}
}
}
return TRUE;
}
/*!
* \internal
* \brief Parse action-specific device properties from XML
*
* \param[in] xml XML element containing the properties
* \param[in] peer Name of peer that sent XML (for logs)
* \param[in] device Device ID (for logs)
* \param[in] action Action the properties relate to (for logs)
* \param[in,out] op Fencing operation that properties are being parsed for
* \param[in] phase Phase the properties relate to
* \param[in,out] props Device properties to update
*/
static void
parse_action_specific(const xmlNode *xml, const char *peer, const char *device,
const char *action, remote_fencing_op_t *op,
enum st_remap_phase phase, device_properties_t *props)
{
props->custom_action_timeout[phase] = 0;
crm_element_value_int(xml, PCMK__XA_ST_ACTION_TIMEOUT,
&props->custom_action_timeout[phase]);
if (props->custom_action_timeout[phase]) {
crm_trace("Peer %s with device %s returned %s action timeout %ds",
peer, device, action, props->custom_action_timeout[phase]);
}
props->delay_max[phase] = 0;
crm_element_value_int(xml, PCMK__XA_ST_DELAY_MAX, &props->delay_max[phase]);
if (props->delay_max[phase]) {
crm_trace("Peer %s with device %s returned maximum of random delay %ds for %s",
peer, device, props->delay_max[phase], action);
}
props->delay_base[phase] = 0;
crm_element_value_int(xml, PCMK__XA_ST_DELAY_BASE,
&props->delay_base[phase]);
if (props->delay_base[phase]) {
crm_trace("Peer %s with device %s returned base delay %ds for %s",
peer, device, props->delay_base[phase], action);
}
/* Handle devices with automatic unfencing */
if (pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none)) {
int required = 0;
crm_element_value_int(xml, PCMK__XA_ST_REQUIRED, &required);
if (required) {
crm_trace("Peer %s requires device %s to execute for action %s",
peer, device, action);
add_required_device(op, device);
}
}
/* If a reboot is remapped to off+on, it's possible that a node is allowed
* to perform one action but not another.
*/
if (pcmk__xe_attr_is_true(xml, PCMK__XA_ST_ACTION_DISALLOWED)) {
props->disallowed[phase] = TRUE;
crm_trace("Peer %s is disallowed from executing %s for device %s",
peer, action, device);
}
}
/*!
* \internal
* \brief Parse one device's properties from peer's XML query reply
*
* \param[in] xml XML node containing device properties
* \param[in,out] op Operation that query and reply relate to
* \param[in,out] peer Peer's device information
* \param[in] device ID of device being parsed
*/
static void
add_device_properties(const xmlNode *xml, remote_fencing_op_t *op,
peer_device_info_t *peer, const char *device)
{
xmlNode *child;
int verified = 0;
device_properties_t *props =
pcmk__assert_alloc(1, sizeof(device_properties_t));
int flags = st_device_supports_on; /* Old nodes that don't set the flag assume they support the on action */
/* Add a new entry to this peer's devices list */
g_hash_table_insert(peer->devices, pcmk__str_copy(device), props);
/* Peers with verified (monitored) access will be preferred */
crm_element_value_int(xml, PCMK__XA_ST_MONITOR_VERIFIED, &verified);
if (verified) {
crm_trace("Peer %s has confirmed a verified device %s",
peer->host, device);
props->verified = TRUE;
}
crm_element_value_int(xml, PCMK__XA_ST_DEVICE_SUPPORT_FLAGS, &flags);
props->device_support_flags = flags;
/* Parse action-specific device properties */
parse_action_specific(xml, peer->host, device, op_requested_action(op),
op, st_phase_requested, props);
for (child = pcmk__xe_first_child(xml, NULL, NULL, NULL); child != NULL;
child = pcmk__xe_next(child)) {
/* Replies for "reboot" operations will include the action-specific
* values for "off" and "on" in child elements, just in case the reboot
* winds up getting remapped.
*/
if (pcmk__str_eq(pcmk__xe_id(child), PCMK_ACTION_OFF, pcmk__str_none)) {
parse_action_specific(child, peer->host, device, PCMK_ACTION_OFF,
op, st_phase_off, props);
} else if (pcmk__str_eq(pcmk__xe_id(child), PCMK_ACTION_ON,
pcmk__str_none)) {
parse_action_specific(child, peer->host, device, PCMK_ACTION_ON,
op, st_phase_on, props);
}
}
}
/*!
* \internal
* \brief Parse a peer's XML query reply and add it to operation's results
*
* \param[in,out] op Operation that query and reply relate to
* \param[in] host Name of peer that sent this reply
* \param[in] ndevices Number of devices expected in reply
* \param[in] xml XML node containing device list
*
* \return Newly allocated result structure with parsed reply
*/
static peer_device_info_t *
add_result(remote_fencing_op_t *op, const char *host, int ndevices,
const xmlNode *xml)
{
peer_device_info_t *peer = pcmk__assert_alloc(1,
sizeof(peer_device_info_t));
xmlNode *child;
peer->host = pcmk__str_copy(host);
peer->devices = pcmk__strkey_table(free, free);
/* Each child element describes one capable device available to the peer */
for (child = pcmk__xe_first_child(xml, NULL, NULL, NULL); child != NULL;
child = pcmk__xe_next(child)) {
const char *device = pcmk__xe_id(child);
if (device) {
add_device_properties(child, op, peer, device);
}
}
peer->ndevices = g_hash_table_size(peer->devices);
CRM_CHECK(ndevices == peer->ndevices,
crm_err("Query claimed to have %d device%s but %d found",
ndevices, pcmk__plural_s(ndevices), peer->ndevices));
op->query_results = g_list_insert_sorted(op->query_results, peer, sort_peers);
return peer;
}
/*!
* \internal
* \brief Handle a peer's reply to our fencing query
*
* Parse a query result from XML and store it in the remote operation
* table, and when enough replies have been received, issue a fencing request.
*
* \param[in] msg XML reply received
*
* \return pcmk_ok on success, -errno on error
*
* \note See initiate_remote_stonith_op() for how the XML query was initially
* formed, and stonith_query() for how the peer formed its XML reply.
*/
int
process_remote_stonith_query(xmlNode *msg)
{
int ndevices = 0;
gboolean host_is_target = FALSE;
gboolean have_all_replies = FALSE;
const char *id = NULL;
const char *host = NULL;
remote_fencing_op_t *op = NULL;
peer_device_info_t *peer = NULL;
uint32_t replies_expected;
xmlNode *dev = get_xpath_object("//@" PCMK__XA_ST_REMOTE_OP, msg, LOG_ERR);
CRM_CHECK(dev != NULL, return -EPROTO);
id = crm_element_value(dev, PCMK__XA_ST_REMOTE_OP);
CRM_CHECK(id != NULL, return -EPROTO);
dev = get_xpath_object("//@" PCMK__XA_ST_AVAILABLE_DEVICES, msg, LOG_ERR);
CRM_CHECK(dev != NULL, return -EPROTO);
crm_element_value_int(dev, PCMK__XA_ST_AVAILABLE_DEVICES, &ndevices);
op = g_hash_table_lookup(stonith_remote_op_list, id);
if (op == NULL) {
crm_debug("Received query reply for unknown or expired operation %s",
id);
return -EOPNOTSUPP;
}
replies_expected = fencing_active_peers();
if (op->replies_expected < replies_expected) {
replies_expected = op->replies_expected;
}
if ((++op->replies >= replies_expected) && (op->state == st_query)) {
have_all_replies = TRUE;
}
host = crm_element_value(msg, PCMK__XA_SRC);
host_is_target = pcmk__str_eq(host, op->target, pcmk__str_casei);
crm_info("Query result %d of %d from %s for %s/%s (%d device%s) %s",
op->replies, replies_expected, host,
op->target, op->action, ndevices, pcmk__plural_s(ndevices), id);
if (ndevices > 0) {
peer = add_result(op, host, ndevices, dev);
}
pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
if (pcmk_is_set(op->call_options, st_opt_topology)) {
/* If we start the fencing before all the topology results are in,
* it is possible fencing levels will be skipped because of the missing
* query results. */
if (op->state == st_query && all_topology_devices_found(op)) {
/* All the query results are in for the topology, start the fencing ops. */
crm_trace("All topology devices found");
request_peer_fencing(op, peer);
} else if (have_all_replies) {
crm_info("All topology query replies have arrived, continuing (%d expected/%d received) ",
replies_expected, op->replies);
request_peer_fencing(op, NULL);
}
} else if (op->state == st_query) {
int nverified = count_peer_devices(op, peer, TRUE,
fenced_support_flag(op->action));
/* We have a result for a non-topology fencing op that looks promising,
* go ahead and start fencing before query timeout */
if ((peer != NULL) && !host_is_target && nverified) {
/* we have a verified device living on a peer that is not the target */
crm_trace("Found %d verified device%s",
nverified, pcmk__plural_s(nverified));
request_peer_fencing(op, peer);
} else if (have_all_replies) {
crm_info("All query replies have arrived, continuing (%d expected/%d received) ",
replies_expected, op->replies);
request_peer_fencing(op, NULL);
} else {
crm_trace("Waiting for more peer results before launching fencing operation");
}
} else if ((peer != NULL) && (op->state == st_done)) {
crm_info("Discarding query result from %s (%d device%s): "
"Operation is %s", peer->host,
peer->ndevices, pcmk__plural_s(peer->ndevices),
stonith_op_state_str(op->state));
}
return pcmk_ok;
}
/*!
* \internal
* \brief Handle a peer's reply to a fencing request
*
* Parse a fencing reply from XML, and either finalize the operation
* or attempt another device as appropriate.
*
* \param[in] msg XML reply received
*/
void
fenced_process_fencing_reply(xmlNode *msg)
{
const char *id = NULL;
const char *device = NULL;
remote_fencing_op_t *op = NULL;
xmlNode *dev = get_xpath_object("//@" PCMK__XA_ST_REMOTE_OP, msg, LOG_ERR);
pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
CRM_CHECK(dev != NULL, return);
id = crm_element_value(dev, PCMK__XA_ST_REMOTE_OP);
CRM_CHECK(id != NULL, return);
dev = stonith__find_xe_with_result(msg);
CRM_CHECK(dev != NULL, return);
stonith__xe_get_result(dev, &result);
device = crm_element_value(dev, PCMK__XA_ST_DEVICE_ID);
if (stonith_remote_op_list) {
op = g_hash_table_lookup(stonith_remote_op_list, id);
}
if ((op == NULL) && pcmk__result_ok(&result)) {
/* Record successful fencing operations */
const char *client_id = crm_element_value(dev, PCMK__XA_ST_CLIENTID);
op = create_remote_stonith_op(client_id, dev, TRUE);
}
if (op == NULL) {
/* Could be for an event that began before we started */
/* TODO: Record the op for later querying */
crm_info("Received peer result of unknown or expired operation %s", id);
pcmk__reset_result(&result);
return;
}
pcmk__reset_result(&op->result);
op->result = result; // The operation takes ownership of the result
if (op->devices && device && !pcmk__str_eq(op->devices->data, device, pcmk__str_casei)) {
crm_err("Received outdated reply for device %s (instead of %s) to "
"fence (%s) %s. Operation already timed out at peer level.",
device, (const char *) op->devices->data, op->action, op->target);
return;
}
if (pcmk__str_eq(crm_element_value(msg, PCMK__XA_SUBT),
PCMK__VALUE_BROADCAST, pcmk__str_none)) {
if (pcmk__result_ok(&op->result)) {
op->state = st_done;
} else {
op->state = st_failed;
}
finalize_op(op, msg, false);
return;
} else if (!pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) {
/* If this isn't a remote level broadcast, and we are not the
* originator of the operation, we should not be receiving this msg. */
crm_err("Received non-broadcast fencing result for operation %.8s "
"we do not own (device %s targeting %s)",
op->id, device, op->target);
return;
}
if (pcmk_is_set(op->call_options, st_opt_topology)) {
const char *device = NULL;
const char *reason = op->result.exit_reason;
/* We own the op, and it is complete. broadcast the result to all nodes
* and notify our local clients. */
if (op->state == st_done) {
finalize_op(op, msg, false);
return;
}
device = crm_element_value(msg, PCMK__XA_ST_DEVICE_ID);
if ((op->phase == 2) && !pcmk__result_ok(&op->result)) {
/* A remapped "on" failed, but the node was already turned off
* successfully, so ignore the error and continue.
*/
crm_warn("Ignoring %s 'on' failure (%s%s%s) targeting %s "
"after successful 'off'",
device, pcmk_exec_status_str(op->result.execution_status),
(reason == NULL)? "" : ": ",
(reason == NULL)? "" : reason,
op->target);
pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
} else {
crm_notice("Action '%s' targeting %s%s%s on behalf of %s@%s: "
"%s%s%s%s",
op->action, op->target,
((device == NULL)? "" : " using "),
((device == NULL)? "" : device),
op->client_name,
op->originator,
pcmk_exec_status_str(op->result.execution_status),
(reason == NULL)? "" : " (",
(reason == NULL)? "" : reason,
(reason == NULL)? "" : ")");
}
if (pcmk__result_ok(&op->result)) {
/* An operation completed successfully. Try another device if
* necessary, otherwise mark the operation as done. */
advance_topology_device_in_level(op, device, msg);
return;
} else {
/* This device failed, time to try another topology level. If no other
* levels are available, mark this operation as failed and report results. */
if (advance_topology_level(op, false) != pcmk_rc_ok) {
op->state = st_failed;
finalize_op(op, msg, false);
return;
}
}
} else if (pcmk__result_ok(&op->result) && (op->devices == NULL)) {
op->state = st_done;
finalize_op(op, msg, false);
return;
} else if ((op->result.execution_status == PCMK_EXEC_TIMEOUT)
&& (op->devices == NULL)) {
/* If the operation timed out don't bother retrying other peers. */
op->state = st_failed;
finalize_op(op, msg, false);
return;
} else {
/* fall-through and attempt other fencing action using another peer */
}
/* Retry on failure */
crm_trace("Next for %s on behalf of %s@%s (result was: %s)",
op->target, op->originator, op->client_name,
pcmk_exec_status_str(op->result.execution_status));
request_peer_fencing(op, NULL);
}
gboolean
stonith_check_fence_tolerance(int tolerance, const char *target, const char *action)
{
GHashTableIter iter;
time_t now = time(NULL);
remote_fencing_op_t *rop = NULL;
if (tolerance <= 0 || !stonith_remote_op_list || target == NULL ||
action == NULL) {
return FALSE;
}
g_hash_table_iter_init(&iter, stonith_remote_op_list);
while (g_hash_table_iter_next(&iter, NULL, (void **)&rop)) {
if (strcmp(rop->target, target) != 0) {
continue;
} else if (rop->state != st_done) {
continue;
/* We don't have to worry about remapped reboots here
* because if state is done, any remapping has been undone
*/
} else if (strcmp(rop->action, action) != 0) {
continue;
} else if ((rop->completed + tolerance) < now) {
continue;
}
crm_notice("Target %s was fenced (%s) less than %ds ago by %s on behalf of %s",
target, action, tolerance, rop->delegate, rop->originator);
return TRUE;
}
return FALSE;
}
diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c
index f458e07112..53ab2c5856 100644
--- a/daemons/fenced/pacemaker-fenced.c
+++ b/daemons/fenced/pacemaker-fenced.c
@@ -1,680 +1,680 @@
/*
* Copyright 2009-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <sys/utsname.h>
#include <stdlib.h>
#include <errno.h>
#include <fcntl.h>
#include <inttypes.h> // PRIu32, PRIx32
#include <crm/crm.h>
#include <crm/common/cmdline_internal.h>
#include <crm/common/ipc.h>
#include <crm/common/ipc_internal.h>
#include <crm/common/output_internal.h>
#include <crm/stonith-ng.h>
#include <crm/fencing/internal.h>
#include <crm/common/xml.h>
#include <crm/common/xml_internal.h>
#include <crm/common/mainloop.h>
#include <crm/cib/internal.h>
#include <pacemaker-fenced.h>
#define SUMMARY "daemon for executing fencing devices in a Pacemaker cluster"
char *stonith_our_uname = NULL;
long long stonith_watchdog_timeout_ms = 0;
GList *stonith_watchdog_targets = NULL;
static GMainLoop *mainloop = NULL;
gboolean stand_alone = FALSE;
gboolean stonith_shutdown_flag = FALSE;
static qb_ipcs_service_t *ipcs = NULL;
static pcmk__output_t *out = NULL;
pcmk__supported_format_t formats[] = {
PCMK__SUPPORTED_FORMAT_NONE,
PCMK__SUPPORTED_FORMAT_TEXT,
PCMK__SUPPORTED_FORMAT_XML,
{ NULL, NULL, NULL }
};
static struct {
bool no_cib_connect;
gchar **log_files;
} options;
crm_exit_t exit_code = CRM_EX_OK;
static void stonith_cleanup(void);
static int32_t
st_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
{
if (stonith_shutdown_flag) {
crm_info("Ignoring new client [%d] during shutdown",
pcmk__client_pid(c));
return -ECONNREFUSED;
}
if (pcmk__new_client(c, uid, gid) == NULL) {
return -ENOMEM;
}
return 0;
}
/* Exit code means? */
static int32_t
st_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size)
{
uint32_t id = 0;
uint32_t flags = 0;
int call_options = 0;
xmlNode *request = NULL;
pcmk__client_t *c = pcmk__find_client(qbc);
const char *op = NULL;
if (c == NULL) {
crm_info("Invalid client: %p", qbc);
return 0;
}
request = pcmk__client_data2xml(c, data, &id, &flags);
if (request == NULL) {
pcmk__ipc_send_ack(c, id, flags, PCMK__XE_NACK, NULL, CRM_EX_PROTOCOL);
return 0;
}
op = crm_element_value(request, PCMK__XA_CRM_TASK);
if(pcmk__str_eq(op, CRM_OP_RM_NODE_CACHE, pcmk__str_casei)) {
crm_xml_add(request, PCMK__XA_T, PCMK__VALUE_STONITH_NG);
crm_xml_add(request, PCMK__XA_ST_OP, op);
crm_xml_add(request, PCMK__XA_ST_CLIENTID, c->id);
crm_xml_add(request, PCMK__XA_ST_CLIENTNAME, pcmk__client_name(c));
crm_xml_add(request, PCMK__XA_ST_CLIENTNODE, stonith_our_uname);
send_cluster_message(NULL, crm_msg_stonith_ng, request, FALSE);
free_xml(request);
return 0;
}
if (c->name == NULL) {
const char *value = crm_element_value(request, PCMK__XA_ST_CLIENTNAME);
c->name = crm_strdup_printf("%s.%u", pcmk__s(value, "unknown"), c->pid);
}
crm_element_value_int(request, PCMK__XA_ST_CALLOPT, &call_options);
crm_trace("Flags %#08" PRIx32 "/%#08x for command %" PRIu32
" from client %s", flags, call_options, id, pcmk__client_name(c));
if (pcmk_is_set(call_options, st_opt_sync_call)) {
CRM_ASSERT(flags & crm_ipc_client_response);
CRM_LOG_ASSERT(c->request_id == 0); /* This means the client has two synchronous events in-flight */
c->request_id = id; /* Reply only to the last one */
}
crm_xml_add(request, PCMK__XA_ST_CLIENTID, c->id);
crm_xml_add(request, PCMK__XA_ST_CLIENTNAME, pcmk__client_name(c));
crm_xml_add(request, PCMK__XA_ST_CLIENTNODE, stonith_our_uname);
crm_log_xml_trace(request, "ipc-received");
stonith_command(c, id, flags, request, NULL);
free_xml(request);
return 0;
}
/* Error code means? */
static int32_t
st_ipc_closed(qb_ipcs_connection_t * c)
{
pcmk__client_t *client = pcmk__find_client(c);
if (client == NULL) {
return 0;
}
crm_trace("Connection %p closed", c);
pcmk__free_client(client);
/* 0 means: yes, go ahead and destroy the connection */
return 0;
}
static void
st_ipc_destroy(qb_ipcs_connection_t * c)
{
crm_trace("Connection %p destroyed", c);
st_ipc_closed(c);
}
static void
stonith_peer_callback(xmlNode * msg, void *private_data)
{
const char *remote_peer = crm_element_value(msg, PCMK__XA_SRC);
const char *op = crm_element_value(msg, PCMK__XA_ST_OP);
if (pcmk__str_eq(op, STONITH_OP_POKE, pcmk__str_none)) {
return;
}
crm_log_xml_trace(msg, "Peer[inbound]");
stonith_command(NULL, 0, 0, msg, remote_peer);
}
#if SUPPORT_COROSYNC
static void
stonith_peer_ais_callback(cpg_handle_t handle,
const struct cpg_name *groupName,
uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len)
{
uint32_t kind = 0;
xmlNode *xml = NULL;
const char *from = NULL;
char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from);
if(data == NULL) {
return;
}
if (kind == crm_class_cluster) {
xml = pcmk__xml_parse(data);
if (xml == NULL) {
crm_err("Invalid XML: '%.120s'", data);
free(data);
return;
}
crm_xml_add(xml, PCMK__XA_SRC, from);
stonith_peer_callback(xml, NULL);
}
free_xml(xml);
free(data);
return;
}
static void
stonith_peer_cs_destroy(gpointer user_data)
{
crm_crit("Lost connection to cluster layer, shutting down");
stonith_shutdown(0);
}
#endif
void
do_local_reply(const xmlNode *notify_src, pcmk__client_t *client,
int call_options)
{
/* send callback to originating child */
int local_rc = pcmk_rc_ok;
int rid = 0;
uint32_t ipc_flags = crm_ipc_server_event;
if (pcmk_is_set(call_options, st_opt_sync_call)) {
CRM_LOG_ASSERT(client->request_id);
rid = client->request_id;
client->request_id = 0;
ipc_flags = crm_ipc_flags_none;
}
local_rc = pcmk__ipc_send_xml(client, rid, notify_src, ipc_flags);
if (local_rc == pcmk_rc_ok) {
crm_trace("Sent response %d to client %s",
rid, pcmk__client_name(client));
} else {
crm_warn("%synchronous reply to client %s failed: %s",
(pcmk_is_set(call_options, st_opt_sync_call)? "S" : "As"),
pcmk__client_name(client), pcmk_rc_str(local_rc));
}
}
uint64_t
get_stonith_flag(const char *name)
{
if (pcmk__str_eq(name, PCMK__VALUE_ST_NOTIFY_FENCE, pcmk__str_none)) {
return st_callback_notify_fence;
} else if (pcmk__str_eq(name, STONITH_OP_DEVICE_ADD, pcmk__str_casei)) {
return st_callback_device_add;
} else if (pcmk__str_eq(name, STONITH_OP_DEVICE_DEL, pcmk__str_casei)) {
return st_callback_device_del;
} else if (pcmk__str_eq(name, PCMK__VALUE_ST_NOTIFY_HISTORY,
pcmk__str_none)) {
return st_callback_notify_history;
} else if (pcmk__str_eq(name, PCMK__VALUE_ST_NOTIFY_HISTORY_SYNCED,
pcmk__str_none)) {
return st_callback_notify_history_synced;
}
return st_callback_unknown;
}
static void
stonith_notify_client(gpointer key, gpointer value, gpointer user_data)
{
const xmlNode *update_msg = user_data;
pcmk__client_t *client = value;
const char *type = NULL;
CRM_CHECK(client != NULL, return);
CRM_CHECK(update_msg != NULL, return);
type = crm_element_value(update_msg, PCMK__XA_SUBT);
CRM_CHECK(type != NULL, crm_log_xml_err(update_msg, "notify"); return);
if (client->ipcs == NULL) {
crm_trace("Skipping client with NULL channel");
return;
}
if (pcmk_is_set(client->flags, get_stonith_flag(type))) {
int rc = pcmk__ipc_send_xml(client, 0, update_msg,
crm_ipc_server_event);
if (rc != pcmk_rc_ok) {
crm_warn("%s notification of client %s failed: %s "
CRM_XS " id=%.8s rc=%d", type, pcmk__client_name(client),
pcmk_rc_str(rc), client->id, rc);
} else {
crm_trace("Sent %s notification to client %s",
type, pcmk__client_name(client));
}
}
}
void
do_stonith_async_timeout_update(const char *client_id, const char *call_id, int timeout)
{
pcmk__client_t *client = NULL;
xmlNode *notify_data = NULL;
if (!timeout || !call_id || !client_id) {
return;
}
client = pcmk__find_client_by_id(client_id);
if (!client) {
return;
}
notify_data = pcmk__xe_create(NULL, PCMK__XE_ST_ASYNC_TIMEOUT_VALUE);
crm_xml_add(notify_data, PCMK__XA_T, PCMK__VALUE_ST_ASYNC_TIMEOUT_VALUE);
crm_xml_add(notify_data, PCMK__XA_ST_CALLID, call_id);
crm_xml_add_int(notify_data, PCMK__XA_ST_TIMEOUT, timeout);
crm_trace("timeout update is %d for client %s and call id %s", timeout, client_id, call_id);
if (client) {
pcmk__ipc_send_xml(client, 0, notify_data, crm_ipc_server_event);
}
free_xml(notify_data);
}
/*!
* \internal
* \brief Notify relevant IPC clients of a fencing operation result
*
* \param[in] type Notification type
* \param[in] result Result of fencing operation (assume success if NULL)
* \param[in] data If not NULL, add to notification as call data
*/
void
fenced_send_notification(const char *type, const pcmk__action_result_t *result,
xmlNode *data)
{
/* TODO: Standardize the contents of data */
xmlNode *update_msg = pcmk__xe_create(NULL, PCMK__XE_NOTIFY);
CRM_LOG_ASSERT(type != NULL);
crm_xml_add(update_msg, PCMK__XA_T, PCMK__VALUE_ST_NOTIFY);
crm_xml_add(update_msg, PCMK__XA_SUBT, type);
crm_xml_add(update_msg, PCMK__XA_ST_OP, type);
stonith__xe_set_result(update_msg, result);
if (data != NULL) {
xmlNode *wrapper = pcmk__xe_create(update_msg, PCMK__XE_ST_CALLDATA);
pcmk__xml_copy(wrapper, data);
}
crm_trace("Notifying clients");
pcmk__foreach_ipc_client(stonith_notify_client, update_msg);
free_xml(update_msg);
crm_trace("Notify complete");
}
/*!
* \internal
* \brief Send notifications for a configuration change to subscribed clients
*
* \param[in] op Notification type (\c STONITH_OP_DEVICE_ADD,
* \c STONITH_OP_DEVICE_DEL, \c STONITH_OP_LEVEL_ADD, or
* \c STONITH_OP_LEVEL_DEL)
* \param[in] result Operation result
* \param[in] desc Description of what changed (either device ID or string
* representation of level
* (<tt><target>[<level_index>]</tt>))
*/
void
fenced_send_config_notification(const char *op,
const pcmk__action_result_t *result,
const char *desc)
{
xmlNode *notify_data = pcmk__xe_create(NULL, op);
crm_xml_add(notify_data, PCMK__XA_ST_DEVICE_ID, desc);
fenced_send_notification(op, result, notify_data);
free_xml(notify_data);
}
/*!
* \internal
* \brief Check whether a node does watchdog-fencing
*
* \param[in] node Name of node to check
*
* \return TRUE if node found in stonith_watchdog_targets
* or stonith_watchdog_targets is empty indicating
* all nodes are doing watchdog-fencing
*/
gboolean
node_does_watchdog_fencing(const char *node)
{
return ((stonith_watchdog_targets == NULL) ||
pcmk__str_in_list(node, stonith_watchdog_targets, pcmk__str_casei));
}
void
stonith_shutdown(int nsig)
{
crm_info("Terminating with %d clients", pcmk__ipc_client_count());
stonith_shutdown_flag = TRUE;
if (mainloop != NULL && g_main_loop_is_running(mainloop)) {
g_main_loop_quit(mainloop);
}
}
static void
stonith_cleanup(void)
{
fenced_cib_cleanup();
if (ipcs) {
qb_ipcs_destroy(ipcs);
}
- crm_peer_destroy();
+ pcmk__cluster_destroy_node_caches();
pcmk__client_cleanup();
free_stonith_remote_op_list();
free_topology_list();
free_device_list();
free_metadata_cache();
fenced_unregister_handlers();
free(stonith_our_uname);
stonith_our_uname = NULL;
}
static gboolean
stand_alone_cpg_cb(const gchar *option_name, const gchar *optarg, gpointer data,
GError **error)
{
stand_alone = FALSE;
options.no_cib_connect = true;
return TRUE;
}
struct qb_ipcs_service_handlers ipc_callbacks = {
.connection_accept = st_ipc_accept,
.connection_created = NULL,
.msg_process = st_ipc_dispatch,
.connection_closed = st_ipc_closed,
.connection_destroyed = st_ipc_destroy
};
/*!
* \internal
* \brief Callback for peer status changes
*
* \param[in] type What changed
* \param[in] node What peer had the change
* \param[in] data Previous value of what changed
*/
static void
st_peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data)
{
if ((type != crm_status_processes)
&& !pcmk_is_set(node->flags, crm_remote_node)) {
/*
* This is a hack until we can send to a nodeid and/or we fix node name lookups
* These messages are ignored in stonith_peer_callback()
*/
xmlNode *query = pcmk__xe_create(NULL, PCMK__XE_STONITH_COMMAND);
crm_xml_add(query, PCMK__XA_T, PCMK__VALUE_STONITH_NG);
crm_xml_add(query, PCMK__XA_ST_OP, STONITH_OP_POKE);
crm_debug("Broadcasting our uname because of node %u", node->id);
send_cluster_message(NULL, crm_msg_stonith_ng, query, FALSE);
free_xml(query);
}
}
/* @COMPAT Deprecated since 2.1.8. Use pcmk_list_fence_attrs() or
* crm_resource --list-options=fencing instead of querying daemon metadata.
*/
static int
fencer_metadata(void)
{
const char *name = "pacemaker-fenced";
const char *desc_short = N_("Instance attributes available for all "
"\"stonith\"-class resources");
const char *desc_long = N_("Instance attributes available for all "
"\"stonith\"-class resources and used by "
"Pacemaker's fence daemon, formerly known as "
"stonithd");
return pcmk__daemon_metadata(out, name, desc_short, desc_long,
pcmk__opt_fencing);
}
static GOptionEntry entries[] = {
{ "stand-alone", 's', G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE, &stand_alone,
N_("Deprecated (will be removed in a future release)"), NULL },
{ "stand-alone-w-cpg", 'c', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK,
stand_alone_cpg_cb, N_("Intended for use in regression testing only"), NULL },
{ "logfile", 'l', G_OPTION_FLAG_NONE, G_OPTION_ARG_FILENAME_ARRAY,
&options.log_files, N_("Send logs to the additional named logfile"), NULL },
{ NULL }
};
static GOptionContext *
build_arg_context(pcmk__common_args_t *args, GOptionGroup **group)
{
GOptionContext *context = NULL;
context = pcmk__build_arg_context(args, "text (default), xml", group, NULL);
pcmk__add_main_args(context, entries);
return context;
}
int
main(int argc, char **argv)
{
int rc = pcmk_rc_ok;
pcmk_cluster_t *cluster = NULL;
crm_ipc_t *old_instance = NULL;
GError *error = NULL;
GOptionGroup *output_group = NULL;
pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY);
gchar **processed_args = pcmk__cmdline_preproc(argv, "l");
GOptionContext *context = build_arg_context(args, &output_group);
crm_log_preinit(NULL, argc, argv);
pcmk__register_formats(output_group, formats);
if (!g_option_context_parse_strv(context, &processed_args, &error)) {
exit_code = CRM_EX_USAGE;
goto done;
}
rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv);
if (rc != pcmk_rc_ok) {
exit_code = CRM_EX_ERROR;
g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
"Error creating output format %s: %s",
args->output_ty, pcmk_rc_str(rc));
goto done;
}
if (args->version) {
out->version(out, false);
goto done;
}
if ((g_strv_length(processed_args) >= 2)
&& pcmk__str_eq(processed_args[1], "metadata", pcmk__str_none)) {
rc = fencer_metadata();
if (rc != pcmk_rc_ok) {
exit_code = CRM_EX_FATAL;
g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
"Unable to display metadata: %s", pcmk_rc_str(rc));
}
goto done;
}
// Open additional log files
pcmk__add_logfiles(options.log_files, out);
crm_log_init(NULL, LOG_INFO + args->verbosity, TRUE,
(args->verbosity > 0), argc, argv, FALSE);
crm_notice("Starting Pacemaker fencer");
old_instance = crm_ipc_new("stonith-ng", 0);
if (old_instance == NULL) {
/* crm_ipc_new() will have already logged an error message with
* crm_err()
*/
exit_code = CRM_EX_FATAL;
goto done;
}
if (pcmk__connect_generic_ipc(old_instance) == pcmk_rc_ok) {
// IPC endpoint already up
crm_ipc_close(old_instance);
crm_ipc_destroy(old_instance);
crm_err("pacemaker-fenced is already active, aborting startup");
goto done;
} else {
// Not up or not authentic, we'll proceed either way
crm_ipc_destroy(old_instance);
old_instance = NULL;
}
mainloop_add_signal(SIGTERM, stonith_shutdown);
- crm_peer_init();
+ pcmk__cluster_init_node_caches();
rc = fenced_scheduler_init();
if (rc != pcmk_rc_ok) {
exit_code = CRM_EX_FATAL;
g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
"Error initializing scheduler data: %s", pcmk_rc_str(rc));
goto done;
}
cluster = pcmk_cluster_new();
if (!stand_alone) {
#if SUPPORT_COROSYNC
if (pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync) {
pcmk_cluster_set_destroy_fn(cluster, stonith_peer_cs_destroy);
pcmk_cpg_set_deliver_fn(cluster, stonith_peer_ais_callback);
pcmk_cpg_set_confchg_fn(cluster, pcmk_cpg_membership);
}
#endif // SUPPORT_COROSYNC
crm_set_status_callback(&st_peer_update_callback);
if (pcmk_cluster_connect(cluster) != pcmk_rc_ok) {
exit_code = CRM_EX_FATAL;
crm_crit("Cannot sign in to the cluster... terminating");
goto done;
}
pcmk__str_update(&stonith_our_uname, cluster->uname);
if (!options.no_cib_connect) {
setup_cib();
}
} else {
pcmk__str_update(&stonith_our_uname, "localhost");
crm_warn("Stand-alone mode is deprecated and will be removed "
"in a future release");
}
init_device_list();
init_topology_list();
pcmk__serve_fenced_ipc(&ipcs, &ipc_callbacks);
// Create the mainloop and run it...
mainloop = g_main_loop_new(NULL, FALSE);
crm_notice("Pacemaker fencer successfully started and accepting connections");
g_main_loop_run(mainloop);
done:
g_strfreev(processed_args);
pcmk__free_arg_context(context);
g_strfreev(options.log_files);
stonith_cleanup();
pcmk_cluster_free(cluster);
fenced_scheduler_cleanup();
pcmk__output_and_clear_error(&error, out);
if (out != NULL) {
out->finish(out, exit_code, true, NULL);
pcmk__output_free(out);
}
pcmk__unregister_formats();
crm_exit(exit_code);
}
diff --git a/include/crm/cluster.h b/include/crm/cluster.h
index 57424ef835..2f92cfa89f 100644
--- a/include/crm/cluster.h
+++ b/include/crm/cluster.h
@@ -1,285 +1,282 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef PCMK__CRM_CLUSTER__H
# define PCMK__CRM_CLUSTER__H
# include <stdint.h> // uint32_t, uint64_t
# include <glib.h> // gboolean, GHashTable
# include <libxml/tree.h> // xmlNode
# include <crm/common/xml.h>
# include <crm/common/util.h>
#ifdef __cplusplus
extern "C" {
#endif
# if SUPPORT_COROSYNC
# include <corosync/cpg.h>
# endif
// @COMPAT Make this internal when we can break API backward compatibility
//! \deprecated Do not use (public access will be removed in a future release)
extern gboolean crm_have_quorum;
// @COMPAT Make this internal when we can break API backward compatibility
//! \deprecated Do not use (public access will be removed in a future release)
extern GHashTable *crm_peer_cache;
// @COMPAT Make this internal when we can break API backward compatibility
//! \deprecated Do not use (public access will be removed in a future release)
extern GHashTable *crm_remote_peer_cache;
// @COMPAT Make this internal when we can break API backward compatibility
//! \deprecated Do not use (public access will be removed in a future release)
extern unsigned long long crm_peer_seq;
// @COMPAT Make this internal when we can break API backward compatibility
//! \deprecated Do not use (public access will be removed in a future release)
#define CRM_NODE_LOST "lost"
// @COMPAT Make this internal when we can break API backward compatibility
//! \deprecated Do not use (public access will be removed in a future release)
#define CRM_NODE_MEMBER "member"
// @COMPAT Make this internal when we can break API backward compatibility
//!@{
//! \deprecated Do not use (public access will be removed in a future release)
enum crm_join_phase {
/* @COMPAT: crm_join_nack_quiet can be replaced by crm_node_t:user_data
* at a compatibility break.
*/
//! Not allowed to join, but don't send a nack message
crm_join_nack_quiet = -2,
crm_join_nack = -1,
crm_join_none = 0,
crm_join_welcomed = 1,
crm_join_integrated = 2,
crm_join_finalized = 3,
crm_join_confirmed = 4,
};
//!@}
// @COMPAT Make this internal when we can break API backward compatibility
//!@{
//! \deprecated Do not use (public access will be removed in a future release)
enum crm_node_flags {
/* Node is not a cluster node and should not be considered for cluster
* membership
*/
crm_remote_node = (1U << 0),
// Node's cache entry is dirty
crm_node_dirty = (1U << 1),
};
//!@}
typedef struct crm_peer_node_s {
char *uname; // Node name as known to cluster
/* @COMPAT This is less than ideal since the value is not a valid XML ID
* (for Corosync, it's the string equivalent of the node's numeric node ID,
* but XML IDs can't start with a number) and the three elements should have
* different IDs.
*
* Ideally, we would use something like node-NODEID, node_state-NODEID, and
* transient_attributes-NODEID as the element IDs. Unfortunately changing it
* would be impractical due to backward compatibility; older nodes in a
* rolling upgrade will always write and expect the value in the old format.
*
* This is also named poorly, since the value is not a UUID, but at least
* that can be changed at an API compatibility break.
*/
/*! Value of the PCMK_XA_ID XML attribute to use with the node's
* PCMK_XE_NODE, PCMK_XE_NODE_STATE, and PCMK_XE_TRANSIENT_ATTRIBUTES
* XML elements in the CIB
*/
char *uuid;
char *state; // @TODO change to enum
uint64_t flags; // Bitmask of crm_node_flags
uint64_t last_seen; // Only needed by cluster nodes
uint32_t processes; // @TODO most not needed, merge into flags
/* @TODO When we can break public API compatibility, we can make the rest of
* these members separate structs and use void *cluster_data and
* void *user_data here instead, to abstract the cluster layer further.
*/
// Currently only needed by corosync stack
uint32_t id; // Node ID
time_t when_lost; // When CPG membership was last lost
// Only used by controller
enum crm_join_phase join;
char *expected;
time_t peer_lost;
char *conn_host;
time_t when_member; // Since when node has been a cluster member
time_t when_online; // Since when peer has been online in CPG
} crm_node_t;
-void crm_peer_init(void);
-void crm_peer_destroy(void);
-
// Implementation of pcmk_cluster_t
// @COMPAT Make this internal when we can break API backward compatibility
//!@{
//! \deprecated Do not use (public access will be removed in a future release)
struct crm_cluster_s {
char *uuid;
char *uname;
uint32_t nodeid;
//! \deprecated Call pcmk_cluster_set_destroy_fn() to set this
void (*destroy) (gpointer);
# if SUPPORT_COROSYNC
/* @TODO When we can break public API compatibility, make these members a
* separate struct and use void *cluster_data here instead, to abstract the
* cluster layer further.
*/
struct cpg_name group;
/*!
* \deprecated Call pcmk_cpg_set_deliver_fn() and pcmk_cpg_set_confchg_fn()
* to set these
*/
cpg_callbacks_t cpg;
cpg_handle_t cpg_handle;
# endif
};
//!@}
//! Connection to a cluster layer
typedef struct crm_cluster_s pcmk_cluster_t;
int pcmk_cluster_connect(pcmk_cluster_t *cluster);
int pcmk_cluster_disconnect(pcmk_cluster_t *cluster);
pcmk_cluster_t *pcmk_cluster_new(void);
void pcmk_cluster_free(pcmk_cluster_t *cluster);
int pcmk_cluster_set_destroy_fn(pcmk_cluster_t *cluster, void (*fn)(gpointer));
#if SUPPORT_COROSYNC
int pcmk_cpg_set_deliver_fn(pcmk_cluster_t *cluster, cpg_deliver_fn_t fn);
int pcmk_cpg_set_confchg_fn(pcmk_cluster_t *cluster, cpg_confchg_fn_t fn);
#endif // SUPPORT_COROSYNC
enum crm_ais_msg_class {
crm_class_cluster = 0,
};
// @COMPAT Make this internal when we can break API backward compatibility
//!@{
//! \deprecated Do not use (public access will be removed in a future release)
enum crm_ais_msg_types {
crm_msg_none = 0,
crm_msg_ais = 1,
crm_msg_lrmd = 2,
crm_msg_cib = 3,
crm_msg_crmd = 4,
crm_msg_attrd = 5,
crm_msg_stonithd = 6,
crm_msg_te = 7,
crm_msg_pe = 8,
crm_msg_stonith_ng = 9,
};
//!@}
gboolean send_cluster_message(const crm_node_t *node,
enum crm_ais_msg_types service,
const xmlNode *data, gboolean ordered);
# if SUPPORT_COROSYNC
uint32_t get_local_nodeid(cpg_handle_t handle);
void pcmk_cpg_membership(cpg_handle_t handle,
const struct cpg_name *groupName,
const struct cpg_address *member_list, size_t member_list_entries,
const struct cpg_address *left_list, size_t left_list_entries,
const struct cpg_address *joined_list, size_t joined_list_entries);
gboolean crm_is_corosync_peer_active(const crm_node_t * node);
gboolean send_cluster_text(enum crm_ais_msg_class msg_class, const char *data,
gboolean local, const crm_node_t *node,
enum crm_ais_msg_types dest);
char *pcmk_message_common_cs(cpg_handle_t handle, uint32_t nodeid, uint32_t pid, void *msg,
uint32_t *kind, const char **from);
# endif
const char *crm_peer_uuid(crm_node_t *node);
const char *crm_peer_uname(const char *uuid);
// @COMPAT Make this internal when we can break API backward compatibility
//!@{
//! \deprecated Do not use (public access will be removed in a future release)
enum crm_status_type {
crm_status_uname,
crm_status_nstate,
crm_status_processes,
};
//!@}
enum crm_ais_msg_types text2msg_type(const char *text);
void crm_set_status_callback(void (*dispatch) (enum crm_status_type, crm_node_t *, const void *));
void crm_set_autoreap(gboolean autoreap);
/*!
* \enum pcmk_cluster_layer
* \brief Types of cluster layer
*/
enum pcmk_cluster_layer {
pcmk_cluster_layer_unknown = 1, //!< Unknown cluster layer
pcmk_cluster_layer_invalid = 2, //!< Invalid cluster layer
pcmk_cluster_layer_corosync = 32, //!< Corosync Cluster Engine
};
enum pcmk_cluster_layer pcmk_get_cluster_layer(void);
const char *pcmk_cluster_layer_text(enum pcmk_cluster_layer layer);
const char *get_local_node_name(void);
char *get_node_name(uint32_t nodeid);
/*
* \brief Get log-friendly string equivalent of a join phase
*
* \param[in] phase Join phase
*
* \return Log-friendly string equivalent of \p phase
*/
//! \deprecated Do not use (public access will be removed in a future release)
static inline const char *
crm_join_phase_str(enum crm_join_phase phase)
{
switch (phase) {
case crm_join_nack_quiet: return "nack_quiet";
case crm_join_nack: return "nack";
case crm_join_none: return "none";
case crm_join_welcomed: return "welcomed";
case crm_join_integrated: return "integrated";
case crm_join_finalized: return "finalized";
case crm_join_confirmed: return "confirmed";
default: return "invalid";
}
}
#if !defined(PCMK_ALLOW_DEPRECATED) || (PCMK_ALLOW_DEPRECATED == 1)
#include <crm/cluster/compat.h>
#endif
#ifdef __cplusplus
}
#endif
#endif
diff --git a/include/crm/cluster/compat.h b/include/crm/cluster/compat.h
index 7fac448ac8..133c8dc138 100644
--- a/include/crm/cluster/compat.h
+++ b/include/crm/cluster/compat.h
@@ -1,118 +1,124 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef PCMK__CRM_CLUSTER_COMPAT__H
# define PCMK__CRM_CLUSTER_COMPAT__H
#include <inttypes.h> // uint32_t
#include <glib.h> // gboolean, guint
#include <libxml/tree.h> // xmlNode
#include <crm/cluster.h> // crm_node_t
#ifdef __cplusplus
extern "C" {
#endif
/**
* \file
* \brief Deprecated Pacemaker cluster API
* \ingroup cluster
* \deprecated Do not include this header directly. The cluster APIs in this
* header, and the header itself, will be removed in a future
* release.
*/
// \deprecated Do not use
enum crm_get_peer_flags {
CRM_GET_PEER_CLUSTER = 0x0001,
CRM_GET_PEER_REMOTE = 0x0002,
CRM_GET_PEER_ANY = CRM_GET_PEER_CLUSTER|CRM_GET_PEER_REMOTE,
};
// \deprecated Use \c pcmk_cluster_t instead
typedef pcmk_cluster_t crm_cluster_t;
// \deprecated Do not use Pacemaker for cluster node cacheing
crm_node_t *crm_get_peer(unsigned int id, const char *uname);
// \deprecated Do not use Pacemaker for cluster node cacheing
crm_node_t *crm_get_peer_full(unsigned int id, const char *uname, int flags);
// \deprecated Use stonith_api_kick() from libstonithd instead
int crm_terminate_member(int nodeid, const char *uname, void *unused);
// \deprecated Use stonith_api_kick() from libstonithd instead
int crm_terminate_member_no_mainloop(int nodeid, const char *uname,
int *connection);
// \deprecated Use crm_xml_add(xml, attr, crm_peer_uuid(node)) instead
void set_uuid(xmlNode *xml, const char *attr, crm_node_t *node);
#if SUPPORT_COROSYNC
// \deprecated Do not use
gboolean cluster_connect_cpg(pcmk_cluster_t *cluster);
// \deprecated Do not use
void cluster_disconnect_cpg(pcmk_cluster_t *cluster);
#endif // SUPPORT_COROSYNC
// \deprecated Use \c pcmk_cluster_connect() instead
gboolean crm_cluster_connect(pcmk_cluster_t *cluster);
// \deprecated Use \c pcmk_cluster_disconnect() instead
void crm_cluster_disconnect(pcmk_cluster_t *cluster);
// \deprecated Do not use
int crm_remote_peer_cache_size(void);
// \deprecated Do not use
void crm_remote_peer_cache_refresh(xmlNode *cib);
// \deprecated Do not use
crm_node_t *crm_remote_peer_get(const char *node_name);
// \deprecated Do not use
void crm_remote_peer_cache_remove(const char *node_name);
// \deprecated Do not use
gboolean crm_is_peer_active(const crm_node_t *node);
// \deprecated Do not use
guint crm_active_peers(void);
// \deprecated Do not use
guint reap_crm_member(uint32_t id, const char *name);
//!@{
//! \deprecated Use <tt>enum pcmk_cluster_layer</tt> instead
enum cluster_type_e {
pcmk_cluster_unknown = pcmk_cluster_layer_unknown,
pcmk_cluster_invalid = pcmk_cluster_layer_invalid,
pcmk_cluster_corosync = pcmk_cluster_layer_corosync,
};
//!@}
// \deprecated Use \c pcmk_cluster_layer_text() instead
const char *name_for_cluster_type(enum cluster_type_e type);
// \deprecated Use \c pcmk_get_cluster_layer() instead
enum cluster_type_e get_cluster_type(void);
// \deprecated Use \c pcmk_get_cluster_layer() instead
gboolean is_corosync_cluster(void);
+// \deprecated Do not use
+void crm_peer_init(void);
+
+// \deprecated Do not use
+void crm_peer_destroy(void);
+
#ifdef __cplusplus
}
#endif
#endif // PCMK_CLUSTER_COMPAT__H
diff --git a/include/crm/cluster/internal.h b/include/crm/cluster/internal.h
index 5484683bce..ffcb421ece 100644
--- a/include/crm/cluster/internal.h
+++ b/include/crm/cluster/internal.h
@@ -1,162 +1,175 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef PCMK__CRM_CLUSTER_INTERNAL__H
# define PCMK__CRM_CLUSTER_INTERNAL__H
# include <stdint.h> // uint32_t, uint64_t
# include <glib.h> // gboolean
# include <crm/cluster.h>
/* *INDENT-OFF* */
enum crm_proc_flag {
crm_proc_none = 0x00000001,
// Cluster layers
crm_proc_cpg = 0x04000000,
// Daemons
crm_proc_execd = 0x00000010,
crm_proc_based = 0x00000100,
crm_proc_controld = 0x00000200,
crm_proc_attrd = 0x00001000,
crm_proc_schedulerd = 0x00010000,
crm_proc_fenced = 0x00100000,
};
/* *INDENT-ON* */
// Used with node cache search functions
enum pcmk__node_search_flags {
- pcmk__node_search_none = 0,
- pcmk__node_search_cluster = (1 << 0), // Search for cluster nodes
- pcmk__node_search_remote = (1 << 1), // Search for remote nodes
- pcmk__node_search_any = pcmk__node_search_cluster
- |pcmk__node_search_remote,
+ //! Does not affect search
+ pcmk__node_search_none = 0,
+
+ //! Search for cluster nodes from membership cache
+ pcmk__node_search_cluster_member = (1 << 0),
+
+ //! Search for remote nodes
+ pcmk__node_search_remote = (1 << 1),
+
+ //! Search for cluster member nodes and remote nodes
+ pcmk__node_search_any = pcmk__node_search_cluster_member
+ |pcmk__node_search_remote,
/* @COMPAT The values before this must stay the same until we can drop
* support for enum crm_get_peer_flags
*/
- pcmk__node_search_known = (1 << 2), // Search previously known nodes
+ //! Search for cluster nodes from CIB (as of last cache refresh)
+ pcmk__node_search_cluster_cib = (1 << 2),
};
/*!
* \internal
* \brief Return the process bit corresponding to the current cluster stack
*
* \return Process flag if detectable, otherwise 0
*/
static inline uint32_t
crm_get_cluster_proc(void)
{
switch (pcmk_get_cluster_layer()) {
case pcmk_cluster_layer_corosync:
return crm_proc_cpg;
default:
break;
}
return crm_proc_none;
}
/*!
* \internal
* \brief Get log-friendly string description of a Corosync return code
*
* \param[in] error Corosync return code
*
* \return Log-friendly string description corresponding to \p error
*/
static inline const char *
pcmk__cs_err_str(int error)
{
# if SUPPORT_COROSYNC
switch (error) {
case CS_OK: return "OK";
case CS_ERR_LIBRARY: return "Library error";
case CS_ERR_VERSION: return "Version error";
case CS_ERR_INIT: return "Initialization error";
case CS_ERR_TIMEOUT: return "Timeout";
case CS_ERR_TRY_AGAIN: return "Try again";
case CS_ERR_INVALID_PARAM: return "Invalid parameter";
case CS_ERR_NO_MEMORY: return "No memory";
case CS_ERR_BAD_HANDLE: return "Bad handle";
case CS_ERR_BUSY: return "Busy";
case CS_ERR_ACCESS: return "Access error";
case CS_ERR_NOT_EXIST: return "Doesn't exist";
case CS_ERR_NAME_TOO_LONG: return "Name too long";
case CS_ERR_EXIST: return "Exists";
case CS_ERR_NO_SPACE: return "No space";
case CS_ERR_INTERRUPT: return "Interrupt";
case CS_ERR_NAME_NOT_FOUND: return "Name not found";
case CS_ERR_NO_RESOURCES: return "No resources";
case CS_ERR_NOT_SUPPORTED: return "Not supported";
case CS_ERR_BAD_OPERATION: return "Bad operation";
case CS_ERR_FAILED_OPERATION: return "Failed operation";
case CS_ERR_MESSAGE_ERROR: return "Message error";
case CS_ERR_QUEUE_FULL: return "Queue full";
case CS_ERR_QUEUE_NOT_AVAILABLE: return "Queue not available";
case CS_ERR_BAD_FLAGS: return "Bad flags";
case CS_ERR_TOO_BIG: return "Too big";
case CS_ERR_NO_SECTIONS: return "No sections";
}
# endif
return "Corosync error";
}
# if SUPPORT_COROSYNC
#if 0
/* This is the new way to do it, but we still support all Corosync 2 versions,
* and this isn't always available. A better alternative here would be to check
* for support in the configure script and enable this conditionally.
*/
#define pcmk__init_cmap(handle) cmap_initialize_map((handle), CMAP_MAP_ICMAP)
#else
#define pcmk__init_cmap(handle) cmap_initialize(handle)
#endif
char *pcmk__corosync_cluster_name(void);
bool pcmk__corosync_add_nodes(xmlNode *xml_parent);
# endif
crm_node_t *crm_update_peer_proc(const char *source, crm_node_t * peer,
uint32_t flag, const char *status);
crm_node_t *pcmk__update_peer_state(const char *source, crm_node_t *node,
const char *state, uint64_t membership);
void pcmk__update_peer_expected(const char *source, crm_node_t *node,
const char *expected);
void pcmk__reap_unseen_nodes(uint64_t ring_id);
void pcmk__corosync_quorum_connect(gboolean (*dispatch)(unsigned long long,
gboolean),
void (*destroy) (gpointer));
+// Membership
+
+void pcmk__cluster_init_node_caches(void);
+void pcmk__cluster_destroy_node_caches(void);
+
bool pcmk__cluster_is_node_active(const crm_node_t *node);
unsigned int pcmk__cluster_num_active_nodes(void);
unsigned int pcmk__cluster_num_remote_nodes(void);
crm_node_t *pcmk__cluster_lookup_remote_node(const char *node_name);
void pcmk__cluster_forget_cluster_node(uint32_t id, const char *node_name);
void pcmk__cluster_forget_remote_node(const char *node_name);
crm_node_t *pcmk__search_node_caches(unsigned int id, const char *uname,
uint32_t flags);
crm_node_t *pcmk__search_cluster_node_cache(unsigned int id, const char *uname,
const char *uuid);
void pcmk__purge_node_from_cache(const char *node_name, uint32_t node_id);
void pcmk__refresh_node_caches_from_cib(xmlNode *cib);
crm_node_t *pcmk__get_node(unsigned int id, const char *uname,
const char *uuid, uint32_t flags);
#endif // PCMK__CRM_CLUSTER_INTERNAL__H
diff --git a/include/crm/common/xml_internal.h b/include/crm/common/xml_internal.h
index cfe9ef49ff..b17bb1e89b 100644
--- a/include/crm/common/xml_internal.h
+++ b/include/crm/common/xml_internal.h
@@ -1,592 +1,594 @@
/*
* Copyright 2017-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef PCMK__XML_INTERNAL__H
# define PCMK__XML_INTERNAL__H
/*
* Internal-only wrappers for and extensions to libxml2 (libxslt)
*/
# include <stdlib.h>
# include <stdint.h> // uint32_t
# include <stdio.h>
# include <string.h>
# include <crm/crm.h> /* transitively imports qblog.h */
# include <crm/common/output_internal.h>
# include <crm/common/xml_io_internal.h>
# include <crm/common/xml_names_internal.h> // PCMK__XE_PROMOTABLE_LEGACY
# include <libxml/relaxng.h>
/*!
* \brief Base for directing lib{xml2,xslt} log into standard libqb backend
*
* This macro implements the core of what can be needed for directing
* libxml2 or libxslt error messaging into standard, preconfigured
* libqb-backed log stream.
*
* It's a bit unfortunate that libxml2 (and more sparsely, also libxslt)
* emits a single message by chunks (location is emitted separatedly from
* the message itself), so we have to take the effort to combine these
* chunks back to single message. Whether to do this or not is driven
* with \p dechunk toggle.
*
* The form of a macro was chosen for implicit deriving of __FILE__, etc.
* and also because static dechunking buffer should be differentiated per
* library (here we assume different functions referring to this macro
* will not ever be using both at once), preferably also per-library
* context of use to avoid clashes altogether.
*
* Note that we cannot use qb_logt, because callsite data have to be known
* at the moment of compilation, which it is not always the case -- xml_log
* (and unfortunately there's no clear explanation of the fail to compile).
*
* Also note that there's no explicit guard against said libraries producing
* never-newline-terminated chunks (which would just keep consuming memory),
* as it's quite improbable. Termination of the program in between the
* same-message chunks will raise a flag with valgrind and the likes, though.
*
* And lastly, regarding how dechunking combines with other non-message
* parameters -- for \p priority, most important running specification
* wins (possibly elevated to LOG_ERR in case of nonconformance with the
* newline-termination "protocol"), \p dechunk is expected to always be
* on once it was at the start, and the rest (\p postemit and \p prefix)
* are picked directly from the last chunk entry finalizing the message
* (also reasonable to always have it the same with all related entries).
*
* \param[in] priority Syslog priority for the message to be logged
* \param[in] dechunk Whether to dechunk new-line terminated message
* \param[in] postemit Code to be executed once message is sent out
* \param[in] prefix How to prefix the message or NULL for raw passing
* \param[in] fmt Format string as with printf-like functions
* \param[in] ap Variable argument list to supplement \p fmt format string
*/
#define PCMK__XML_LOG_BASE(priority, dechunk, postemit, prefix, fmt, ap) \
do { \
if (!(dechunk) && (prefix) == NULL) { /* quick pass */ \
qb_log_from_external_source_va(__func__, __FILE__, (fmt), \
(priority), __LINE__, 0, (ap)); \
(void) (postemit); \
} else { \
int CXLB_len = 0; \
char *CXLB_buf = NULL; \
static int CXLB_buffer_len = 0; \
static char *CXLB_buffer = NULL; \
static uint8_t CXLB_priority = 0; \
\
CXLB_len = vasprintf(&CXLB_buf, (fmt), (ap)); \
\
if (CXLB_len <= 0 || CXLB_buf[CXLB_len - 1] == '\n' || !(dechunk)) { \
if (CXLB_len < 0) { \
CXLB_buf = (char *) "LOG CORRUPTION HAZARD"; /*we don't modify*/\
CXLB_priority = QB_MIN(CXLB_priority, LOG_ERR); \
} else if (CXLB_len > 0 /* && (dechunk) */ \
&& CXLB_buf[CXLB_len - 1] == '\n') { \
CXLB_buf[CXLB_len - 1] = '\0'; \
} \
if (CXLB_buffer) { \
qb_log_from_external_source(__func__, __FILE__, "%s%s%s", \
CXLB_priority, __LINE__, 0, \
(prefix) != NULL ? (prefix) : "", \
CXLB_buffer, CXLB_buf); \
free(CXLB_buffer); \
} else { \
qb_log_from_external_source(__func__, __FILE__, "%s%s", \
(priority), __LINE__, 0, \
(prefix) != NULL ? (prefix) : "", \
CXLB_buf); \
} \
if (CXLB_len < 0) { \
CXLB_buf = NULL; /* restore temporary override */ \
} \
CXLB_buffer = NULL; \
CXLB_buffer_len = 0; \
(void) (postemit); \
\
} else if (CXLB_buffer == NULL) { \
CXLB_buffer_len = CXLB_len; \
CXLB_buffer = CXLB_buf; \
CXLB_buf = NULL; \
CXLB_priority = (priority); /* remember as a running severest */ \
\
} else { \
CXLB_buffer = realloc(CXLB_buffer, 1 + CXLB_buffer_len + CXLB_len); \
memcpy(CXLB_buffer + CXLB_buffer_len, CXLB_buf, CXLB_len); \
CXLB_buffer_len += CXLB_len; \
CXLB_buffer[CXLB_buffer_len] = '\0'; \
CXLB_priority = QB_MIN(CXLB_priority, (priority)); /* severest? */ \
} \
free(CXLB_buf); \
} \
} while (0)
/*
* \enum pcmk__xml_fmt_options
* \brief Bit flags to control format in XML logs and dumps
*/
enum pcmk__xml_fmt_options {
//! Exclude certain XML attributes (for calculating digests)
pcmk__xml_fmt_filtered = (1 << 0),
//! Include indentation and newlines
pcmk__xml_fmt_pretty = (1 << 1),
//! Include the opening tag of an XML element, and include XML comments
pcmk__xml_fmt_open = (1 << 3),
//! Include the children of an XML element
pcmk__xml_fmt_children = (1 << 4),
//! Include the closing tag of an XML element
pcmk__xml_fmt_close = (1 << 5),
// @COMPAT Can we start including text nodes unconditionally?
//! Include XML text nodes
pcmk__xml_fmt_text = (1 << 6),
// @COMPAT Remove when v1 patchsets are removed
//! Log a created XML subtree
pcmk__xml_fmt_diff_plus = (1 << 7),
// @COMPAT Remove when v1 patchsets are removed
//! Log a removed XML subtree
pcmk__xml_fmt_diff_minus = (1 << 8),
// @COMPAT Remove when v1 patchsets are removed
//! Log a minimal version of an XML diff (only showing the changes)
pcmk__xml_fmt_diff_short = (1 << 9),
};
int pcmk__xml_show(pcmk__output_t *out, const char *prefix, const xmlNode *data,
int depth, uint32_t options);
int pcmk__xml_show_changes(pcmk__output_t *out, const xmlNode *xml);
/* XML search strings for guest, remote and pacemaker_remote nodes */
/* search string to find CIB resources entries for cluster nodes */
#define PCMK__XP_MEMBER_NODE_CONFIG \
"//" PCMK_XE_CIB "/" PCMK_XE_CONFIGURATION "/" PCMK_XE_NODES \
- "/" PCMK_XE_NODE "[not(@type) or @type='member']"
+ "/" PCMK_XE_NODE \
+ "[not(@" PCMK_XA_TYPE ") or @" PCMK_XA_TYPE "='" PCMK_VALUE_MEMBER "']"
/* search string to find CIB resources entries for guest nodes */
#define PCMK__XP_GUEST_NODE_CONFIG \
"//" PCMK_XE_CIB "//" PCMK_XE_CONFIGURATION "//" PCMK_XE_PRIMITIVE \
"//" PCMK_XE_META_ATTRIBUTES "//" PCMK_XE_NVPAIR \
- "[@name='" PCMK_META_REMOTE_NODE "']"
+ "[@" PCMK_XA_NAME "='" PCMK_META_REMOTE_NODE "']"
/* search string to find CIB resources entries for remote nodes */
#define PCMK__XP_REMOTE_NODE_CONFIG \
"//" PCMK_XE_CIB "//" PCMK_XE_CONFIGURATION "//" PCMK_XE_PRIMITIVE \
- "[@type='remote'][@provider='pacemaker']"
+ "[@" PCMK_XA_TYPE "='" PCMK_VALUE_REMOTE "']" \
+ "[@" PCMK_XA_PROVIDER "='pacemaker']"
/* search string to find CIB node status entries for pacemaker_remote nodes */
#define PCMK__XP_REMOTE_NODE_STATUS \
"//" PCMK_XE_CIB "//" PCMK_XE_STATUS "//" PCMK__XE_NODE_STATE \
- "[@" PCMK_XA_REMOTE_NODE "='true']"
+ "[@" PCMK_XA_REMOTE_NODE "='" PCMK_VALUE_TRUE "']"
/*!
* \internal
* \brief Serialize XML (using libxml) into provided descriptor
*
* \param[in] fd File descriptor to (piece-wise) write to
* \param[in] cur XML subtree to proceed
*
* \return a standard Pacemaker return code
*/
int pcmk__xml2fd(int fd, xmlNode *cur);
enum pcmk__xml_artefact_ns {
pcmk__xml_artefact_ns_legacy_rng = 1,
pcmk__xml_artefact_ns_legacy_xslt,
pcmk__xml_artefact_ns_base_rng,
pcmk__xml_artefact_ns_base_xslt,
};
void pcmk__strip_xml_text(xmlNode *xml);
const char *pcmk__xe_add_last_written(xmlNode *xe);
xmlNode *pcmk__xe_first_child(const xmlNode *parent, const char *node_name,
const char *attr_n, const char *attr_v);
void pcmk__xe_remove_attr(xmlNode *element, const char *name);
bool pcmk__xe_remove_attr_cb(xmlNode *xml, void *user_data);
void pcmk__xe_remove_matching_attrs(xmlNode *element,
bool (*match)(xmlAttrPtr, void *),
void *user_data);
int pcmk__xe_delete_match(xmlNode *xml, xmlNode *search);
int pcmk__xe_replace_match(xmlNode *xml, xmlNode *replace);
int pcmk__xe_update_match(xmlNode *xml, xmlNode *update, uint32_t flags);
GString *pcmk__element_xpath(const xmlNode *xml);
/*!
* \internal
* \enum pcmk__xml_escape_type
* \brief Indicators of which XML characters to escape
*
* XML allows the escaping of special characters by replacing them with entity
* references (for example, <tt>"&quot;"</tt>) or character references (for
* example, <tt>"&#13;"</tt>).
*
* The special characters <tt>'&'</tt> (except as the beginning of an entity
* reference) and <tt>'<'</tt> are not allowed in their literal forms in XML
* character data. Character data is non-markup text (for example, the content
* of a text node). <tt>'>'</tt> is allowed under most circumstances; we escape
* it for safety and symmetry.
*
* For more details, see the "Character Data and Markup" section of the XML
* spec, currently section 2.4:
* https://www.w3.org/TR/xml/#dt-markup
*
* Attribute values are handled specially.
* * If an attribute value is delimited by single quotes, then single quotes
* must be escaped within the value.
* * Similarly, if an attribute value is delimited by double quotes, then double
* quotes must be escaped within the value.
* * A conformant XML processor replaces a literal whitespace character (tab,
* newline, carriage return, space) in an attribute value with a space
* (\c '#x20') character. However, a reference to a whitespace character (for
* example, \c "&#x0A;" for \c '\n') does not get replaced.
* * For more details, see the "Attribute-Value Normalization" section of the
* XML spec, currently section 3.3.3. Note that the default attribute type
* is CDATA; we don't deal with NMTOKENS, etc.:
* https://www.w3.org/TR/xml/#AVNormalize
*
* Pacemaker always delimits attribute values with double quotes, so there's no
* need to escape single quotes.
*
* Newlines and tabs should be escaped in attribute values when XML is
* serialized to text, so that future parsing preserves them rather than
* normalizing them to spaces.
*
* We always escape carriage returns, so that they're not converted to spaces
* during attribute-value normalization and because displaying them as literals
* is messy.
*/
enum pcmk__xml_escape_type {
/*!
* For text nodes.
* * Escape \c '<', \c '>', and \c '&' using entity references.
* * Do not escape \c '\n' and \c '\t'.
* * Escape other non-printing characters using character references.
*/
pcmk__xml_escape_text,
/*!
* For attribute values.
* * Escape \c '<', \c '>', \c '&', and \c '"' using entity references.
* * Escape \c '\n', \c '\t', and other non-printing characters using
* character references.
*/
pcmk__xml_escape_attr,
/* @COMPAT Drop escaping of at least '\n' and '\t' for
* pcmk__xml_escape_attr_pretty when openstack-info, openstack-floating-ip,
* and openstack-virtual-ip resource agents no longer depend on it.
*
* At time of writing, openstack-info may set a multiline value for the
* openstack_ports node attribute. The other two agents query the value and
* require it to be on one line with no spaces.
*/
/*!
* For attribute values displayed in text output delimited by double quotes.
* * Escape \c '\n' as \c "\\n"
* * Escape \c '\r' as \c "\\r"
* * Escape \c '\t' as \c "\\t"
* * Escape \c '"' as \c "\\""
*/
pcmk__xml_escape_attr_pretty,
};
bool pcmk__xml_needs_escape(const char *text, enum pcmk__xml_escape_type type);
char *pcmk__xml_escape(const char *text, enum pcmk__xml_escape_type type);
/*!
* \internal
* \brief Get the root directory to scan XML artefacts of given kind for
*
* \param[in] ns governs the hierarchy nesting against the inherent root dir
*
* \return root directory to scan XML artefacts of given kind for
*/
char *
pcmk__xml_artefact_root(enum pcmk__xml_artefact_ns ns);
/*!
* \internal
* \brief Get the fully unwrapped path to particular XML artifact (RNG/XSLT)
*
* \param[in] ns denotes path forming details (parent dir, suffix)
* \param[in] filespec symbolic file specification to be combined with
* #artefact_ns to form the final path
* \return unwrapped path to particular XML artifact (RNG/XSLT)
*/
char *pcmk__xml_artefact_path(enum pcmk__xml_artefact_ns ns,
const char *filespec);
/*!
* \internal
* \brief Retrieve the value of the \c PCMK_XA_ID XML attribute
*
* \param[in] xml XML element to check
*
* \return Value of the \c PCMK_XA_ID attribute (may be \c NULL)
*/
static inline const char *
pcmk__xe_id(const xmlNode *xml)
{
return crm_element_value(xml, PCMK_XA_ID);
}
/*!
* \internal
* \brief Check whether an XML element is of a particular type
*
* \param[in] xml XML element to compare
* \param[in] name XML element name to compare
*
* \return \c true if \p xml is of type \p name, otherwise \c false
*/
static inline bool
pcmk__xe_is(const xmlNode *xml, const char *name)
{
return (xml != NULL) && (xml->name != NULL) && (name != NULL)
&& (strcmp((const char *) xml->name, name) == 0);
}
/*!
* \internal
* \brief Return first non-text child node of an XML node
*
* \param[in] parent XML node to check
*
* \return First non-text child node of \p parent (or NULL if none)
*/
static inline xmlNode *
pcmk__xml_first_child(const xmlNode *parent)
{
xmlNode *child = (parent? parent->children : NULL);
while (child && (child->type == XML_TEXT_NODE)) {
child = child->next;
}
return child;
}
/*!
* \internal
* \brief Return next non-text sibling node of an XML node
*
* \param[in] child XML node to check
*
* \return Next non-text sibling of \p child (or NULL if none)
*/
static inline xmlNode *
pcmk__xml_next(const xmlNode *child)
{
xmlNode *next = (child? child->next : NULL);
while (next && (next->type == XML_TEXT_NODE)) {
next = next->next;
}
return next;
}
/*!
* \internal
* \brief Return next non-text sibling element of an XML element
*
* \param[in] child XML element to check
*
* \return Next sibling element of \p child (or NULL if none)
*/
static inline xmlNode *
pcmk__xe_next(const xmlNode *child)
{
xmlNode *next = child? child->next : NULL;
while (next && (next->type != XML_ELEMENT_NODE)) {
next = next->next;
}
return next;
}
xmlNode *pcmk__xe_create(xmlNode *parent, const char *name);
xmlNode *pcmk__xml_copy(xmlNode *parent, xmlNode *src);
xmlNode *pcmk__xe_next_same(const xmlNode *node);
void pcmk__xe_set_content(xmlNode *node, const char *format, ...)
G_GNUC_PRINTF(2, 3);
/*!
* \internal
* \enum pcmk__xa_flags
* \brief Flags for operations affecting XML attributes
*/
enum pcmk__xa_flags {
//! Flag has no effect
pcmk__xaf_none = 0U,
//! Don't overwrite existing values
pcmk__xaf_no_overwrite = (1U << 0),
/*!
* Treat values as score updates where possible (see
* \c pcmk__xe_set_score())
*/
pcmk__xaf_score_update = (1U << 1),
};
int pcmk__xe_copy_attrs(xmlNode *target, const xmlNode *src, uint32_t flags);
/*!
* \internal
* \brief Like pcmk__xe_set_props, but takes a va_list instead of
* arguments directly.
*
* \param[in,out] node XML to add attributes to
* \param[in] pairs NULL-terminated list of name/value pairs to add
*/
void
pcmk__xe_set_propv(xmlNodePtr node, va_list pairs);
/*!
* \internal
* \brief Add a NULL-terminated list of name/value pairs to the given
* XML node as properties.
*
* \param[in,out] node XML node to add properties to
* \param[in] ... NULL-terminated list of name/value pairs
*
* \note A NULL name terminates the arguments; a NULL value will be skipped.
*/
void
pcmk__xe_set_props(xmlNodePtr node, ...)
G_GNUC_NULL_TERMINATED;
/*!
* \internal
* \brief Get first attribute of an XML element
*
* \param[in] xe XML element to check
*
* \return First attribute of \p xe (or NULL if \p xe is NULL or has none)
*/
static inline xmlAttr *
pcmk__xe_first_attr(const xmlNode *xe)
{
return (xe == NULL)? NULL : xe->properties;
}
/*!
* \internal
* \brief Extract the ID attribute from an XML element
*
* \param[in] xpath String to search
* \param[in] node Node to get the ID for
*
* \return ID attribute of \p node in xpath string \p xpath
*/
char *
pcmk__xpath_node_id(const char *xpath, const char *node);
/*!
* \internal
* \brief Print an informational message if an xpath query returned multiple
* items with the same ID.
*
* \param[in,out] out The output object
* \param[in] search The xpath search result, most typically the result of
* calling cib->cmds->query().
* \param[in] name The name searched for
*/
void
pcmk__warn_multiple_name_matches(pcmk__output_t *out, xmlNode *search,
const char *name);
/* internal XML-related utilities */
enum xml_private_flags {
pcmk__xf_none = 0x0000,
pcmk__xf_dirty = 0x0001,
pcmk__xf_deleted = 0x0002,
pcmk__xf_created = 0x0004,
pcmk__xf_modified = 0x0008,
pcmk__xf_tracking = 0x0010,
pcmk__xf_processed = 0x0020,
pcmk__xf_skip = 0x0040,
pcmk__xf_moved = 0x0080,
pcmk__xf_acl_enabled = 0x0100,
pcmk__xf_acl_read = 0x0200,
pcmk__xf_acl_write = 0x0400,
pcmk__xf_acl_deny = 0x0800,
pcmk__xf_acl_create = 0x1000,
pcmk__xf_acl_denied = 0x2000,
pcmk__xf_lazy = 0x4000,
};
void pcmk__set_xml_doc_flag(xmlNode *xml, enum xml_private_flags flag);
/*!
* \internal
* \brief Iterate over child elements of \p xml
*
* This function iterates over the children of \p xml, performing the
* callback function \p handler on each node. If the callback returns
* a value other than pcmk_rc_ok, the iteration stops and the value is
* returned. It is therefore possible that not all children will be
* visited.
*
* \param[in,out] xml The starting XML node. Can be NULL.
* \param[in] child_element_name The name that the node must match in order
* for \p handler to be run. If NULL, all
* child elements will match.
* \param[in] handler The callback function.
* \param[in,out] userdata User data to pass to the callback function.
* Can be NULL.
*
* \return Standard Pacemaker return code
*/
int
pcmk__xe_foreach_child(xmlNode *xml, const char *child_element_name,
int (*handler)(xmlNode *xml, void *userdata),
void *userdata);
bool pcmk__xml_tree_foreach(xmlNode *xml, bool (*fn)(xmlNode *, void *),
void *user_data);
static inline const char *
pcmk__xml_attr_value(const xmlAttr *attr)
{
return ((attr == NULL) || (attr->children == NULL))? NULL
: (const char *) attr->children->content;
}
// @COMPAT Remove when v1 patchsets are removed
xmlNode *pcmk__diff_v1_xml_object(xmlNode *left, xmlNode *right, bool suppress);
// @COMPAT Drop when PCMK__XE_PROMOTABLE_LEGACY is removed
static inline const char *
pcmk__map_element_name(const xmlNode *xml)
{
if (xml == NULL) {
return NULL;
} else if (pcmk__xe_is(xml, PCMK__XE_PROMOTABLE_LEGACY)) {
return PCMK_XE_CLONE;
} else {
return (const char *) xml->name;
}
}
#endif // PCMK__XML_INTERNAL__H
diff --git a/include/crm_internal.h b/include/crm_internal.h
index ecfc2b9fef..590f88cc7c 100644
--- a/include/crm_internal.h
+++ b/include/crm_internal.h
@@ -1,86 +1,87 @@
/*
* Copyright 2006-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef CRM_INTERNAL__H
# define CRM_INTERNAL__H
# ifndef PCMK__CONFIG_H
# define PCMK__CONFIG_H
# include <config.h>
# endif
# include <portability.h>
/* Our minimum glib dependency is 2.42. Define that as both the minimum and
* maximum glib APIs that are allowed (i.e. APIs that were already deprecated
* in 2.42, and APIs introduced after 2.42, cannot be used by Pacemaker code).
*/
#define GLIB_VERSION_MIN_REQUIRED GLIB_VERSION_2_42
#define GLIB_VERSION_MAX_ALLOWED GLIB_VERSION_2_42
# include <glib.h>
# include <stdbool.h>
# include <libxml/tree.h>
/* Public API headers can guard including deprecated API headers with this
* symbol, thus preventing internal code (which includes this header) from using
* deprecated APIs, while still allowing external code to use them by default.
*/
#define PCMK_ALLOW_DEPRECATED 0
# include <crm/lrmd.h>
+# include <crm/cluster/internal.h>
# include <crm/common/logging.h>
# include <crm/common/logging_internal.h>
# include <crm/common/ipc_internal.h>
# include <crm/common/options_internal.h>
# include <crm/common/output_internal.h>
# include <crm/common/scheduler_internal.h>
# include <crm/common/schemas_internal.h>
# include <crm/common/xml_internal.h>
# include <crm/common/xml_io_internal.h>
# include <crm/common/xml_names_internal.h>
# include <crm/common/internal.h>
# include <locale.h>
# include <gettext.h>
#define N_(String) (String)
#ifdef ENABLE_NLS
# define _(String) gettext(String)
#else
# define _(String) (String)
#endif
/*
* IPC service names that are only used internally
*/
# define PCMK__SERVER_BASED_RO "cib_ro"
# define PCMK__SERVER_BASED_RW "cib_rw"
# define PCMK__SERVER_BASED_SHM "cib_shm"
/*
* IPC commands that can be sent to Pacemaker daemons
*/
#define PCMK__ATTRD_CMD_PEER_REMOVE "peer-remove"
#define PCMK__ATTRD_CMD_UPDATE "update"
#define PCMK__ATTRD_CMD_UPDATE_BOTH "update-both"
#define PCMK__ATTRD_CMD_UPDATE_DELAY "update-delay"
#define PCMK__ATTRD_CMD_QUERY "query"
#define PCMK__ATTRD_CMD_REFRESH "refresh"
#define PCMK__ATTRD_CMD_FLUSH "flush"
#define PCMK__ATTRD_CMD_SYNC_RESPONSE "sync-response"
#define PCMK__ATTRD_CMD_CLEAR_FAILURE "clear-failure"
#define PCMK__ATTRD_CMD_CONFIRM "confirm"
#define PCMK__CONTROLD_CMD_NODES "list-nodes"
#endif /* CRM_INTERNAL__H */
diff --git a/lib/cluster/cluster.c b/lib/cluster/cluster.c
index ef46bef022..9faad9b0cc 100644
--- a/lib/cluster/cluster.c
+++ b/lib/cluster/cluster.c
@@ -1,464 +1,464 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <dlfcn.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <stdlib.h>
#include <time.h>
#include <sys/param.h>
#include <sys/types.h>
#include <crm/crm.h>
#include <crm/common/ipc.h>
#include <crm/common/xml.h>
#include <crm/cluster/internal.h>
#include "crmcluster_private.h"
CRM_TRACE_INIT_DATA(cluster);
/*!
* \brief Get (and set if needed) a node's UUID
*
* \param[in,out] peer Node to check
*
* \return Node UUID of \p peer, or NULL if unknown
*/
const char *
crm_peer_uuid(crm_node_t *peer)
{
char *uuid = NULL;
// Check simple cases first, to avoid any calls that might block
if (peer == NULL) {
return NULL;
}
if (peer->uuid != NULL) {
return peer->uuid;
}
switch (pcmk_get_cluster_layer()) {
case pcmk_cluster_layer_corosync:
#if SUPPORT_COROSYNC
uuid = pcmk__corosync_uuid(peer);
#endif
break;
case pcmk_cluster_layer_unknown:
case pcmk_cluster_layer_invalid:
crm_err("Unsupported cluster layer");
break;
}
peer->uuid = uuid;
return peer->uuid;
}
/*!
* \internal
* \brief Connect to the cluster layer
*
* \param[in,out] cluster Initialized cluster object to connect
*
* \return Standard Pacemaker return code
*/
int
pcmk_cluster_connect(pcmk_cluster_t *cluster)
{
const enum pcmk_cluster_layer cluster_layer = pcmk_get_cluster_layer();
const char *cluster_layer_s = pcmk_cluster_layer_text(cluster_layer);
crm_notice("Connecting to %s cluster layer", cluster_layer_s);
switch (cluster_layer) {
case pcmk_cluster_layer_corosync:
#if SUPPORT_COROSYNC
- crm_peer_init();
+ pcmk__cluster_init_node_caches();
return pcmk__corosync_connect(cluster);
#else
break;
#endif // SUPPORT_COROSYNC
default:
break;
}
crm_err("Failed to connect to unsupported cluster layer %s",
cluster_layer_s);
return EPROTONOSUPPORT;
}
/*!
* \brief Disconnect from the cluster layer
*
* \param[in,out] cluster Cluster object to disconnect
*
* \return Standard Pacemaker return code
*/
int
pcmk_cluster_disconnect(pcmk_cluster_t *cluster)
{
const enum pcmk_cluster_layer cluster_layer = pcmk_get_cluster_layer();
const char *cluster_layer_s = pcmk_cluster_layer_text(cluster_layer);
crm_info("Disconnecting from %s cluster layer", cluster_layer_s);
switch (cluster_layer) {
case pcmk_cluster_layer_corosync:
#if SUPPORT_COROSYNC
- crm_peer_destroy();
+ pcmk__cluster_destroy_node_caches();
pcmk__corosync_disconnect(cluster);
return pcmk_rc_ok;
#else
break;
#endif // SUPPORT_COROSYNC
default:
break;
}
crm_err("Failed to disconnect from unsupported cluster layer %s",
cluster_layer_s);
return EPROTONOSUPPORT;
}
/*!
* \brief Allocate a new \p pcmk_cluster_t object
*
* \return A newly allocated \p pcmk_cluster_t object (guaranteed not \c NULL)
* \note The caller is responsible for freeing the return value using
* \p pcmk_cluster_free().
*/
pcmk_cluster_t *
pcmk_cluster_new(void)
{
return (pcmk_cluster_t *) pcmk__assert_alloc(1, sizeof(pcmk_cluster_t));
}
/*!
* \brief Free a \p pcmk_cluster_t object and its dynamically allocated members
*
* \param[in,out] cluster Cluster object to free
*/
void
pcmk_cluster_free(pcmk_cluster_t *cluster)
{
if (cluster == NULL) {
return;
}
free(cluster->uuid);
free(cluster->uname);
free(cluster);
}
/*!
* \brief Set the destroy function for a cluster object
*
* \param[in,out] cluster Cluster object
* \param[in] fn Destroy function to set
*
* \return Standard Pacemaker return code
*/
int
pcmk_cluster_set_destroy_fn(pcmk_cluster_t *cluster, void (*fn)(gpointer))
{
if (cluster == NULL) {
return EINVAL;
}
cluster->destroy = fn;
return pcmk_rc_ok;
}
/*!
* \brief Send an XML message via the cluster messaging layer
*
* \param[in] node Cluster node to send message to
* \param[in] service Message type to use in message host info
* \param[in] data XML message to send
* \param[in] ordered Ignored for currently supported messaging layers
*
* \return TRUE on success, otherwise FALSE
*/
gboolean
send_cluster_message(const crm_node_t *node, enum crm_ais_msg_types service,
const xmlNode *data, gboolean ordered)
{
switch (pcmk_get_cluster_layer()) {
case pcmk_cluster_layer_corosync:
#if SUPPORT_COROSYNC
return pcmk__cpg_send_xml(data, node, service);
#endif
break;
default:
break;
}
return FALSE;
}
/*!
* \brief Get the local node's name
*
* \return Local node's name
* \note This will fatally exit if local node name cannot be known.
*/
const char *
get_local_node_name(void)
{
static char *name = NULL;
if (name == NULL) {
name = get_node_name(0);
}
return name;
}
/*!
* \brief Get the node name corresponding to a cluster node ID
*
* \param[in] nodeid Node ID to check (or 0 for local node)
*
* \return Node name corresponding to \p nodeid
* \note This will fatally exit if \p nodeid is 0 and local node name cannot be
* known.
*/
char *
get_node_name(uint32_t nodeid)
{
char *name = NULL;
const enum pcmk_cluster_layer cluster_layer = pcmk_get_cluster_layer();
const char *cluster_layer_s = pcmk_cluster_layer_text(cluster_layer);
switch (cluster_layer) {
case pcmk_cluster_layer_corosync:
#if SUPPORT_COROSYNC
name = pcmk__corosync_name(0, nodeid);
break;
#endif // SUPPORT_COROSYNC
default:
crm_err("Unknown cluster layer: %s (%d)",
cluster_layer_s, cluster_layer);
}
if ((name == NULL) && (nodeid == 0)) {
name = pcmk_hostname();
if (name == NULL) {
// @TODO Maybe let the caller decide what to do
crm_err("Could not obtain the local %s node name", cluster_layer_s);
crm_exit(CRM_EX_FATAL);
}
crm_notice("Defaulting to uname -n for the local %s node name",
cluster_layer_s);
}
if (name == NULL) {
crm_notice("Could not obtain a node name for %s node with "
PCMK_XA_ID " %u",
cluster_layer_s, nodeid);
}
return name;
}
/*!
* \brief Get the node name corresponding to a node UUID
*
* \param[in] uuid UUID of desired node
*
* \return name of desired node
*
* \note This relies on the remote peer cache being populated with all
* remote nodes in the cluster, so callers should maintain that cache.
*/
const char *
crm_peer_uname(const char *uuid)
{
GHashTableIter iter;
crm_node_t *node = NULL;
CRM_CHECK(uuid != NULL, return NULL);
/* remote nodes have the same uname and uuid */
if (g_hash_table_lookup(crm_remote_peer_cache, uuid)) {
return uuid;
}
/* avoid blocking calls where possible */
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
if (pcmk__str_eq(node->uuid, uuid, pcmk__str_casei)) {
if (node->uname != NULL) {
return node->uname;
}
break;
}
}
node = NULL;
if (pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync) {
long long id;
if ((pcmk__scan_ll(uuid, &id, 0LL) != pcmk_rc_ok)
|| (id < 1LL) || (id > UINT32_MAX)) {
crm_err("Invalid Corosync node ID '%s'", uuid);
return NULL;
}
node = pcmk__search_node_caches((uint32_t) id, NULL,
- pcmk__node_search_cluster);
+ pcmk__node_search_cluster_member);
if (node != NULL) {
crm_info("Setting uuid for node %s[%u] to %s",
node->uname, node->id, uuid);
node->uuid = strdup(uuid);
return node->uname;
}
return NULL;
}
return NULL;
}
/*!
* \brief Get a log-friendly string equivalent of a cluster layer
*
* \param[in] layer Cluster layer
*
* \return Log-friendly string corresponding to \p layer
*/
const char *
pcmk_cluster_layer_text(enum pcmk_cluster_layer layer)
{
switch (layer) {
case pcmk_cluster_layer_corosync:
return "corosync";
case pcmk_cluster_layer_unknown:
return "unknown";
case pcmk_cluster_layer_invalid:
return "invalid";
default:
crm_err("Invalid cluster layer: %d", layer);
return "invalid";
}
}
/*!
* \brief Get and validate the local cluster layer
*
* If a cluster layer is not configured via the \c PCMK__ENV_CLUSTER_TYPE local
* option, this will try to detect an active cluster from among the supported
* cluster layers.
*
* \return Local cluster layer
*
* \note This will fatally exit if the configured cluster layer is invalid.
*/
enum pcmk_cluster_layer
pcmk_get_cluster_layer(void)
{
static enum pcmk_cluster_layer cluster_layer = pcmk_cluster_layer_unknown;
const char *cluster = NULL;
// Cluster layer is stable once set
if (cluster_layer != pcmk_cluster_layer_unknown) {
return cluster_layer;
}
cluster = pcmk__env_option(PCMK__ENV_CLUSTER_TYPE);
if (cluster != NULL) {
crm_info("Verifying configured cluster layer '%s'", cluster);
cluster_layer = pcmk_cluster_layer_invalid;
#if SUPPORT_COROSYNC
if (pcmk__str_eq(cluster, PCMK_VALUE_COROSYNC, pcmk__str_casei)) {
cluster_layer = pcmk_cluster_layer_corosync;
}
#endif // SUPPORT_COROSYNC
if (cluster_layer == pcmk_cluster_layer_invalid) {
crm_notice("This installation does not support the '%s' cluster "
"infrastructure: terminating",
cluster);
crm_exit(CRM_EX_FATAL);
}
crm_info("Assuming an active '%s' cluster", cluster);
} else {
// Nothing configured, so test supported cluster layers
#if SUPPORT_COROSYNC
crm_debug("Testing with Corosync");
if (pcmk__corosync_is_active()) {
cluster_layer = pcmk_cluster_layer_corosync;
}
#endif // SUPPORT_COROSYNC
if (cluster_layer == pcmk_cluster_layer_unknown) {
crm_notice("Could not determine the current cluster layer");
} else {
crm_info("Detected an active '%s' cluster",
pcmk_cluster_layer_text(cluster_layer));
}
}
return cluster_layer;
}
// Deprecated functions kept only for backward API compatibility
// LCOV_EXCL_START
#include <crm/cluster/compat.h>
void
set_uuid(xmlNode *xml, const char *attr, crm_node_t *node)
{
crm_xml_add(xml, attr, crm_peer_uuid(node));
}
gboolean
crm_cluster_connect(pcmk_cluster_t *cluster)
{
return pcmk_cluster_connect(cluster) == pcmk_rc_ok;
}
void
crm_cluster_disconnect(pcmk_cluster_t *cluster)
{
pcmk_cluster_disconnect(cluster);
}
const char *
name_for_cluster_type(enum cluster_type_e type)
{
switch (type) {
case pcmk_cluster_corosync:
return "corosync";
case pcmk_cluster_unknown:
return "unknown";
case pcmk_cluster_invalid:
return "invalid";
}
crm_err("Invalid cluster type: %d", type);
return "invalid";
}
enum cluster_type_e
get_cluster_type(void)
{
return (enum cluster_type_e) pcmk_get_cluster_layer();
}
gboolean
is_corosync_cluster(void)
{
return pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync;
}
// LCOV_EXCL_STOP
// End deprecated API
diff --git a/lib/cluster/corosync.c b/lib/cluster/corosync.c
index 4c7ba2aaf8..4dd3787501 100644
--- a/lib/cluster/corosync.c
+++ b/lib/cluster/corosync.c
@@ -1,812 +1,813 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <netdb.h>
#include <inttypes.h> // PRIu64
#include <stdbool.h>
#include <bzlib.h>
#include <crm/common/ipc.h>
#include <crm/cluster/internal.h>
#include <crm/common/mainloop.h>
#include <sys/utsname.h>
#include <qb/qbipcc.h>
#include <qb/qbutil.h>
#include <corosync/corodefs.h>
#include <corosync/corotypes.h>
#include <corosync/hdb.h>
#include <corosync/cfg.h>
#include <corosync/cmap.h>
#include <corosync/quorum.h>
#include <crm/common/xml.h>
#include <crm/common/ipc_internal.h> /* PCMK__SPECIAL_PID* */
#include "crmcluster_private.h"
static quorum_handle_t pcmk_quorum_handle = 0;
static gboolean (*quorum_app_callback)(unsigned long long seq,
gboolean quorate) = NULL;
/*!
* \internal
* \brief Get the Corosync UUID associated with a Pacemaker node
*
* \param[in] node Pacemaker node
*
* \return Newly allocated string with node's Corosync UUID, or NULL if unknown
* \note It is the caller's responsibility to free the result with free().
*/
char *
pcmk__corosync_uuid(const crm_node_t *node)
{
if ((node != NULL)
&& (pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync)) {
if (node->id > 0) {
return crm_strdup_printf("%u", node->id);
} else {
crm_info("Node %s is not yet known by Corosync", node->uname);
}
}
return NULL;
}
static bool
node_name_is_valid(const char *key, const char *name)
{
int octet;
if (name == NULL) {
crm_trace("%s is empty", key);
return false;
} else if (sscanf(name, "%d.%d.%d.%d", &octet, &octet, &octet, &octet) == 4) {
crm_trace("%s contains an IPv4 address (%s), ignoring", key, name);
return false;
} else if (strstr(name, ":") != NULL) {
crm_trace("%s contains an IPv6 address (%s), ignoring", key, name);
return false;
}
crm_trace("'%s: %s' is valid", key, name);
return true;
}
/*
* \internal
* \brief Get Corosync node name corresponding to a node ID
*
* \param[in] cmap_handle Connection to Corosync CMAP
* \param[in] nodeid Node ID to check
*
* \return Newly allocated string with name or (if no name) IP address
* associated with first address assigned to a Corosync node ID (or NULL
* if unknown)
* \note It is the caller's responsibility to free the result with free().
*/
char *
pcmk__corosync_name(uint64_t /*cmap_handle_t */ cmap_handle, uint32_t nodeid)
{
// Originally based on corosync-quorumtool.c:node_name()
int lpc = 0;
cs_error_t rc = CS_OK;
int retries = 0;
char *name = NULL;
cmap_handle_t local_handle = 0;
int fd = -1;
uid_t found_uid = 0;
gid_t found_gid = 0;
pid_t found_pid = 0;
int rv;
if (nodeid == 0) {
nodeid = get_local_nodeid(0);
}
if (cmap_handle == 0 && local_handle == 0) {
retries = 0;
crm_trace("Initializing CMAP connection");
do {
rc = pcmk__init_cmap(&local_handle);
if (rc != CS_OK) {
retries++;
crm_debug("API connection setup failed: %s. Retrying in %ds", cs_strerror(rc),
retries);
sleep(retries);
}
} while (retries < 5 && rc != CS_OK);
if (rc != CS_OK) {
crm_warn("Could not connect to Cluster Configuration Database API, error %s",
cs_strerror(rc));
local_handle = 0;
}
}
if (cmap_handle == 0) {
cmap_handle = local_handle;
rc = cmap_fd_get(cmap_handle, &fd);
if (rc != CS_OK) {
crm_err("Could not obtain the CMAP API connection: %s (%d)",
cs_strerror(rc), rc);
goto bail;
}
/* CMAP provider run as root (in given user namespace, anyway)? */
if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
&found_uid, &found_gid))) {
crm_err("CMAP provider is not authentic:"
" process %lld (uid: %lld, gid: %lld)",
(long long) PCMK__SPECIAL_PID_AS_0(found_pid),
(long long) found_uid, (long long) found_gid);
goto bail;
} else if (rv < 0) {
crm_err("Could not verify authenticity of CMAP provider: %s (%d)",
strerror(-rv), -rv);
goto bail;
}
}
while (name == NULL && cmap_handle != 0) {
uint32_t id = 0;
char *key = NULL;
key = crm_strdup_printf("nodelist.node.%d.nodeid", lpc);
rc = cmap_get_uint32(cmap_handle, key, &id);
crm_trace("Checking %u vs %u from %s", nodeid, id, key);
free(key);
if (rc != CS_OK) {
break;
}
if (nodeid == id) {
crm_trace("Searching for node name for %u in nodelist.node.%d %s",
nodeid, lpc, pcmk__s(name, "<null>"));
if (name == NULL) {
key = crm_strdup_printf("nodelist.node.%d.name", lpc);
cmap_get_string(cmap_handle, key, &name);
crm_trace("%s = %s", key, pcmk__s(name, "<null>"));
free(key);
}
if (name == NULL) {
key = crm_strdup_printf("nodelist.node.%d.ring0_addr", lpc);
cmap_get_string(cmap_handle, key, &name);
crm_trace("%s = %s", key, pcmk__s(name, "<null>"));
if (!node_name_is_valid(key, name)) {
free(name);
name = NULL;
}
free(key);
}
break;
}
lpc++;
}
bail:
if(local_handle) {
cmap_finalize(local_handle);
}
if (name == NULL) {
crm_info("Unable to get node name for nodeid %u", nodeid);
}
return name;
}
/*!
* \internal
* \brief Disconnect from Corosync cluster
*
* \param[in,out] cluster Cluster object to disconnect
*/
void
pcmk__corosync_disconnect(pcmk_cluster_t *cluster)
{
pcmk__cpg_disconnect(cluster);
if (pcmk_quorum_handle != 0) {
quorum_finalize(pcmk_quorum_handle);
pcmk_quorum_handle = 0;
}
crm_notice("Disconnected from Corosync");
}
/*!
* \internal
* \brief Dispatch function for quorum connection file descriptor
*
* \param[in] user_data Ignored
*
* \return 0 on success, -1 on error (per mainloop_io_t interface)
*/
static int
quorum_dispatch_cb(gpointer user_data)
{
int rc = quorum_dispatch(pcmk_quorum_handle, CS_DISPATCH_ALL);
if (rc < 0) {
crm_err("Connection to the Quorum API failed: %d", rc);
quorum_finalize(pcmk_quorum_handle);
pcmk_quorum_handle = 0;
return -1;
}
return 0;
}
/*!
* \internal
* \brief Notification callback for Corosync quorum connection
*
* \param[in] handle Corosync quorum connection
* \param[in] quorate Whether cluster is quorate
* \param[in] ring_id Corosync ring ID
* \param[in] view_list_entries Number of entries in \p view_list
* \param[in] view_list Corosync node IDs in membership
*/
static void
quorum_notification_cb(quorum_handle_t handle, uint32_t quorate,
uint64_t ring_id, uint32_t view_list_entries,
uint32_t *view_list)
{
int i;
GHashTableIter iter;
crm_node_t *node = NULL;
static gboolean init_phase = TRUE;
if (quorate != crm_have_quorum) {
if (quorate) {
crm_notice("Quorum acquired " CRM_XS " membership=%" PRIu64 " members=%lu",
ring_id, (long unsigned int)view_list_entries);
} else {
crm_warn("Quorum lost " CRM_XS " membership=%" PRIu64 " members=%lu",
ring_id, (long unsigned int)view_list_entries);
}
crm_have_quorum = quorate;
} else {
crm_info("Quorum %s " CRM_XS " membership=%" PRIu64 " members=%lu",
(quorate? "retained" : "still lost"), ring_id,
(long unsigned int)view_list_entries);
}
if (view_list_entries == 0 && init_phase) {
crm_info("Corosync membership is still forming, ignoring");
return;
}
init_phase = FALSE;
/* Reset last_seen for all cached nodes so we can tell which ones aren't
* in the view list */
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
node->last_seen = 0;
}
/* Update the peer cache for each node in view list */
for (i = 0; i < view_list_entries; i++) {
uint32_t id = view_list[i];
crm_debug("Member[%d] %u ", i, id);
/* Get this node's peer cache entry (adding one if not already there) */
- node = pcmk__get_node(id, NULL, NULL, pcmk__node_search_cluster);
+ node = pcmk__get_node(id, NULL, NULL, pcmk__node_search_cluster_member);
if (node->uname == NULL) {
char *name = pcmk__corosync_name(0, id);
crm_info("Obtaining name for new node %u", id);
- node = pcmk__get_node(id, name, NULL, pcmk__node_search_cluster);
+ node = pcmk__get_node(id, name, NULL,
+ pcmk__node_search_cluster_member);
free(name);
}
/* Update the node state (including updating last_seen to ring_id) */
pcmk__update_peer_state(__func__, node, CRM_NODE_MEMBER, ring_id);
}
/* Remove any peer cache entries we didn't update */
pcmk__reap_unseen_nodes(ring_id);
if (quorum_app_callback) {
quorum_app_callback(ring_id, quorate);
}
}
/*!
* \internal
* \brief Connect to Corosync quorum service
*
* \param[in] dispatch Connection dispatch callback
* \param[in] destroy Connection destroy callback
*/
void
pcmk__corosync_quorum_connect(gboolean (*dispatch)(unsigned long long,
gboolean),
void (*destroy)(gpointer))
{
cs_error_t rc;
int fd = 0;
int quorate = 0;
uint32_t quorum_type = 0;
struct mainloop_fd_callbacks quorum_fd_callbacks;
uid_t found_uid = 0;
gid_t found_gid = 0;
pid_t found_pid = 0;
int rv;
quorum_fd_callbacks.dispatch = quorum_dispatch_cb;
quorum_fd_callbacks.destroy = destroy;
crm_debug("Configuring Pacemaker to obtain quorum from Corosync");
{
#if 0
// New way but not supported by all Corosync 2 versions
quorum_model_v0_data_t quorum_model_data = {
.model = QUORUM_MODEL_V0,
.quorum_notify_fn = quorum_notification_cb,
};
rc = quorum_model_initialize(&pcmk_quorum_handle, QUORUM_MODEL_V0,
(quorum_model_data_t *) &quorum_model_data,
&quorum_type, NULL);
#else
quorum_callbacks_t quorum_callbacks = {
.quorum_notify_fn = quorum_notification_cb,
};
rc = quorum_initialize(&pcmk_quorum_handle, &quorum_callbacks,
&quorum_type);
#endif
}
if (rc != CS_OK) {
crm_err("Could not connect to the Quorum API: %s (%d)",
cs_strerror(rc), rc);
goto bail;
} else if (quorum_type != QUORUM_SET) {
crm_err("Corosync quorum is not configured");
goto bail;
}
rc = quorum_fd_get(pcmk_quorum_handle, &fd);
if (rc != CS_OK) {
crm_err("Could not obtain the Quorum API connection: %s (%d)",
strerror(rc), rc);
goto bail;
}
/* Quorum provider run as root (in given user namespace, anyway)? */
if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
&found_uid, &found_gid))) {
crm_err("Quorum provider is not authentic:"
" process %lld (uid: %lld, gid: %lld)",
(long long) PCMK__SPECIAL_PID_AS_0(found_pid),
(long long) found_uid, (long long) found_gid);
rc = CS_ERR_ACCESS;
goto bail;
} else if (rv < 0) {
crm_err("Could not verify authenticity of Quorum provider: %s (%d)",
strerror(-rv), -rv);
rc = CS_ERR_ACCESS;
goto bail;
}
rc = quorum_getquorate(pcmk_quorum_handle, &quorate);
if (rc != CS_OK) {
crm_err("Could not obtain the current Quorum API state: %d", rc);
goto bail;
}
if (quorate) {
crm_notice("Quorum acquired");
} else {
crm_warn("No quorum");
}
quorum_app_callback = dispatch;
crm_have_quorum = quorate;
rc = quorum_trackstart(pcmk_quorum_handle, CS_TRACK_CHANGES | CS_TRACK_CURRENT);
if (rc != CS_OK) {
crm_err("Could not setup Quorum API notifications: %d", rc);
goto bail;
}
mainloop_add_fd("quorum", G_PRIORITY_HIGH, fd, dispatch, &quorum_fd_callbacks);
pcmk__corosync_add_nodes(NULL);
bail:
if (rc != CS_OK) {
quorum_finalize(pcmk_quorum_handle);
}
}
/*!
* \internal
* \brief Connect to Corosync cluster layer
*
* \param[in,out] cluster Initialized cluster object to connect
*
* \return Standard Pacemaker return code
*/
int
pcmk__corosync_connect(pcmk_cluster_t *cluster)
{
crm_node_t *peer = NULL;
const enum pcmk_cluster_layer cluster_layer = pcmk_get_cluster_layer();
const char *cluster_layer_s = pcmk_cluster_layer_text(cluster_layer);
int rc = pcmk_rc_ok;
- crm_peer_init();
+ pcmk__cluster_init_node_caches();
if (cluster_layer != pcmk_cluster_layer_corosync) {
crm_err("Invalid cluster layer: %s " CRM_XS " cluster_layer=%d",
cluster_layer_s, cluster_layer);
return EINVAL;
}
rc = pcmk__cpg_connect(cluster);
if (rc != pcmk_rc_ok) {
// Error message was logged by pcmk__cpg_connect()
return rc;
}
crm_info("Connection to %s established", cluster_layer_s);
cluster->nodeid = get_local_nodeid(0);
if (cluster->nodeid == 0) {
crm_err("Could not determine local node ID");
return ENXIO;
}
cluster->uname = get_node_name(0);
if (cluster->uname == NULL) {
crm_err("Could not determine local node name");
return ENXIO;
}
// Ensure local node always exists in peer cache
peer = pcmk__get_node(cluster->nodeid, cluster->uname, NULL,
- pcmk__node_search_cluster);
+ pcmk__node_search_cluster_member);
cluster->uuid = pcmk__corosync_uuid(peer);
return pcmk_rc_ok;
}
/*!
* \internal
* \brief Check whether a Corosync cluster is active
*
* \return \c true if Corosync is found active, or \c false otherwise
*/
bool
pcmk__corosync_is_active(void)
{
cmap_handle_t handle;
int rc = pcmk__init_cmap(&handle);
if (rc == CS_OK) {
cmap_finalize(handle);
return true;
}
crm_info("Failed to initialize the cmap API: %s (%d)",
pcmk__cs_err_str(rc), rc);
return false;
}
/*!
* \brief Check whether a Corosync cluster peer is active
*
* \param[in] node Node to check
*
* \return TRUE if \p node is an active Corosync peer, otherwise FALSE
*/
gboolean
crm_is_corosync_peer_active(const crm_node_t *node)
{
if (node == NULL) {
crm_trace("Corosync peer inactive: NULL");
return FALSE;
} else if (!pcmk__str_eq(node->state, CRM_NODE_MEMBER, pcmk__str_casei)) {
crm_trace("Corosync peer %s inactive: state=%s",
node->uname, node->state);
return FALSE;
} else if (!pcmk_is_set(node->processes, crm_proc_cpg)) {
crm_trace("Corosync peer %s inactive: processes=%.16x",
node->uname, node->processes);
return FALSE;
}
return TRUE;
}
/*!
* \internal
* \brief Load Corosync node list (via CMAP) into peer cache and optionally XML
*
* \param[in,out] xml_parent If not NULL, add <node> entry here for each node
*
* \return true if any nodes were found, false otherwise
*/
bool
pcmk__corosync_add_nodes(xmlNode *xml_parent)
{
int lpc = 0;
cs_error_t rc = CS_OK;
int retries = 0;
bool any = false;
cmap_handle_t cmap_handle;
int fd = -1;
uid_t found_uid = 0;
gid_t found_gid = 0;
pid_t found_pid = 0;
int rv;
do {
rc = pcmk__init_cmap(&cmap_handle);
if (rc != CS_OK) {
retries++;
crm_debug("API connection setup failed: %s. Retrying in %ds", cs_strerror(rc),
retries);
sleep(retries);
}
} while (retries < 5 && rc != CS_OK);
if (rc != CS_OK) {
crm_warn("Could not connect to Cluster Configuration Database API, error %d", rc);
return false;
}
rc = cmap_fd_get(cmap_handle, &fd);
if (rc != CS_OK) {
crm_err("Could not obtain the CMAP API connection: %s (%d)",
cs_strerror(rc), rc);
goto bail;
}
/* CMAP provider run as root (in given user namespace, anyway)? */
if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
&found_uid, &found_gid))) {
crm_err("CMAP provider is not authentic:"
" process %lld (uid: %lld, gid: %lld)",
(long long) PCMK__SPECIAL_PID_AS_0(found_pid),
(long long) found_uid, (long long) found_gid);
goto bail;
} else if (rv < 0) {
crm_err("Could not verify authenticity of CMAP provider: %s (%d)",
strerror(-rv), -rv);
goto bail;
}
- crm_peer_init();
+ pcmk__cluster_init_node_caches();
crm_trace("Initializing Corosync node list");
for (lpc = 0; TRUE; lpc++) {
uint32_t nodeid = 0;
char *name = NULL;
char *key = NULL;
key = crm_strdup_printf("nodelist.node.%d.nodeid", lpc);
rc = cmap_get_uint32(cmap_handle, key, &nodeid);
free(key);
if (rc != CS_OK) {
break;
}
name = pcmk__corosync_name(cmap_handle, nodeid);
if (name != NULL) {
GHashTableIter iter;
crm_node_t *node = NULL;
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
if(node && node->uname && strcasecmp(node->uname, name) == 0) {
if (node->id && node->id != nodeid) {
crm_crit("Nodes %u and %u share the same name '%s': shutting down", node->id,
nodeid, name);
crm_exit(CRM_EX_FATAL);
}
}
}
}
if (nodeid > 0 || name != NULL) {
crm_trace("Initializing node[%d] %u = %s", lpc, nodeid, name);
- pcmk__get_node(nodeid, name, NULL, pcmk__node_search_cluster);
+ pcmk__get_node(nodeid, name, NULL, pcmk__node_search_cluster_member);
}
if (nodeid > 0 && name != NULL) {
any = true;
if (xml_parent) {
xmlNode *node = pcmk__xe_create(xml_parent, PCMK_XE_NODE);
crm_xml_set_id(node, "%u", nodeid);
crm_xml_add(node, PCMK_XA_UNAME, name);
}
}
free(name);
}
bail:
cmap_finalize(cmap_handle);
return any;
}
/*!
* \internal
* \brief Get cluster name from Corosync configuration (via CMAP)
*
* \return Newly allocated string with cluster name if configured, or NULL
*/
char *
pcmk__corosync_cluster_name(void)
{
cmap_handle_t handle;
char *cluster_name = NULL;
cs_error_t rc = CS_OK;
int fd = -1;
uid_t found_uid = 0;
gid_t found_gid = 0;
pid_t found_pid = 0;
int rv;
rc = pcmk__init_cmap(&handle);
if (rc != CS_OK) {
crm_info("Failed to initialize the cmap API: %s (%d)",
cs_strerror(rc), rc);
return NULL;
}
rc = cmap_fd_get(handle, &fd);
if (rc != CS_OK) {
crm_err("Could not obtain the CMAP API connection: %s (%d)",
cs_strerror(rc), rc);
goto bail;
}
/* CMAP provider run as root (in given user namespace, anyway)? */
if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
&found_uid, &found_gid))) {
crm_err("CMAP provider is not authentic:"
" process %lld (uid: %lld, gid: %lld)",
(long long) PCMK__SPECIAL_PID_AS_0(found_pid),
(long long) found_uid, (long long) found_gid);
goto bail;
} else if (rv < 0) {
crm_err("Could not verify authenticity of CMAP provider: %s (%d)",
strerror(-rv), -rv);
goto bail;
}
rc = cmap_get_string(handle, "totem.cluster_name", &cluster_name);
if (rc != CS_OK) {
crm_info("Cannot get totem.cluster_name: %s (%d)", cs_strerror(rc), rc);
} else {
crm_debug("cmap totem.cluster_name = '%s'", cluster_name);
}
bail:
cmap_finalize(handle);
return cluster_name;
}
/*!
* \internal
* \brief Check (via CMAP) whether Corosync configuration has a node list
*
* \return true if Corosync has node list, otherwise false
*/
bool
pcmk__corosync_has_nodelist(void)
{
cs_error_t cs_rc = CS_OK;
int retries = 0;
cmap_handle_t cmap_handle;
cmap_iter_handle_t iter_handle;
char key_name[CMAP_KEYNAME_MAXLEN + 1];
int fd = -1;
uid_t found_uid = 0;
gid_t found_gid = 0;
pid_t found_pid = 0;
int rc = pcmk_ok;
static bool got_result = false;
static bool result = false;
if (got_result) {
return result;
}
// Connect to CMAP
do {
cs_rc = pcmk__init_cmap(&cmap_handle);
if (cs_rc != CS_OK) {
retries++;
crm_debug("CMAP connection failed: %s (rc=%d, retrying in %ds)",
cs_strerror(cs_rc), cs_rc, retries);
sleep(retries);
}
} while ((retries < 5) && (cs_rc != CS_OK));
if (cs_rc != CS_OK) {
crm_warn("Assuming Corosync does not have node list: "
"CMAP connection failed (%s) " CRM_XS " rc=%d",
cs_strerror(cs_rc), cs_rc);
return false;
}
// Get CMAP connection file descriptor
cs_rc = cmap_fd_get(cmap_handle, &fd);
if (cs_rc != CS_OK) {
crm_warn("Assuming Corosync does not have node list: "
"CMAP unusable (%s) " CRM_XS " rc=%d",
cs_strerror(cs_rc), cs_rc);
goto bail;
}
// Check whether CMAP connection is authentic (i.e. provided by root)
rc = crm_ipc_is_authentic_process(fd, (uid_t) 0, (gid_t) 0,
&found_pid, &found_uid, &found_gid);
if (rc == 0) {
crm_warn("Assuming Corosync does not have node list: "
"CMAP provider is inauthentic "
CRM_XS " pid=%lld uid=%lld gid=%lld",
(long long) PCMK__SPECIAL_PID_AS_0(found_pid),
(long long) found_uid, (long long) found_gid);
goto bail;
} else if (rc < 0) {
crm_warn("Assuming Corosync does not have node list: "
"Could not verify CMAP authenticity (%s) " CRM_XS " rc=%d",
pcmk_strerror(rc), rc);
goto bail;
}
// Check whether nodelist section is presetn
cs_rc = cmap_iter_init(cmap_handle, "nodelist", &iter_handle);
if (cs_rc != CS_OK) {
crm_warn("Assuming Corosync does not have node list: "
"CMAP not readable (%s) " CRM_XS " rc=%d",
cs_strerror(cs_rc), cs_rc);
goto bail;
}
cs_rc = cmap_iter_next(cmap_handle, iter_handle, key_name, NULL, NULL);
if (cs_rc == CS_OK) {
result = true;
}
cmap_iter_finalize(cmap_handle, iter_handle);
got_result = true;
crm_debug("Corosync %s node list", (result? "has" : "does not have"));
bail:
cmap_finalize(cmap_handle);
return result;
}
diff --git a/lib/cluster/cpg.c b/lib/cluster/cpg.c
index a972d23dd6..0666e66efa 100644
--- a/lib/cluster/cpg.c
+++ b/lib/cluster/cpg.c
@@ -1,1159 +1,1160 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <bzlib.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <netdb.h>
#include <crm/common/ipc.h>
#include <crm/cluster/internal.h>
#include <crm/common/mainloop.h>
#include <sys/utsname.h>
#include <qb/qbipc_common.h>
#include <qb/qbipcc.h>
#include <qb/qbutil.h>
#include <corosync/corodefs.h>
#include <corosync/corotypes.h>
#include <corosync/hdb.h>
#include <corosync/cpg.h>
#include <crm/common/xml.h>
#include <crm/common/ipc_internal.h> /* PCMK__SPECIAL_PID* */
#include "crmcluster_private.h"
/* @TODO Once we can update the public API to require pcmk_cluster_t* in more
* functions, we can ditch this in favor of cluster->cpg_handle.
*/
static cpg_handle_t pcmk_cpg_handle = 0;
// @TODO These could be moved to pcmk_cluster_t* at that time as well
static bool cpg_evicted = false;
static GList *cs_message_queue = NULL;
static int cs_message_timer = 0;
struct pcmk__cpg_host_s {
uint32_t id;
uint32_t pid;
gboolean local;
enum crm_ais_msg_types type;
uint32_t size;
char uname[MAX_NAME];
} __attribute__ ((packed));
typedef struct pcmk__cpg_host_s pcmk__cpg_host_t;
struct pcmk__cpg_msg_s {
struct qb_ipc_response_header header __attribute__ ((aligned(8)));
uint32_t id;
gboolean is_compressed;
pcmk__cpg_host_t host;
pcmk__cpg_host_t sender;
uint32_t size;
uint32_t compressed_size;
/* 584 bytes */
char data[0];
} __attribute__ ((packed));
typedef struct pcmk__cpg_msg_s pcmk__cpg_msg_t;
static void crm_cs_flush(gpointer data);
#define msg_data_len(msg) (msg->is_compressed?msg->compressed_size:msg->size)
#define cs_repeat(rc, counter, max, code) do { \
rc = code; \
if ((rc == CS_ERR_TRY_AGAIN) || (rc == CS_ERR_QUEUE_FULL)) { \
counter++; \
crm_debug("Retrying operation after %ds", counter); \
sleep(counter); \
} else { \
break; \
} \
} while (counter < max)
/*!
* \brief Get the local Corosync node ID (via CPG)
*
* \param[in] handle CPG connection to use (or 0 to use new connection)
*
* \return Corosync ID of local node (or 0 if not known)
*/
uint32_t
get_local_nodeid(cpg_handle_t handle)
{
cs_error_t rc = CS_OK;
int retries = 0;
static uint32_t local_nodeid = 0;
cpg_handle_t local_handle = handle;
cpg_model_v1_data_t cpg_model_info = {CPG_MODEL_V1, NULL, NULL, NULL, 0};
int fd = -1;
uid_t found_uid = 0;
gid_t found_gid = 0;
pid_t found_pid = 0;
int rv;
if(local_nodeid != 0) {
return local_nodeid;
}
if(handle == 0) {
crm_trace("Creating connection");
cs_repeat(rc, retries, 5, cpg_model_initialize(&local_handle, CPG_MODEL_V1, (cpg_model_data_t *)&cpg_model_info, NULL));
if (rc != CS_OK) {
crm_err("Could not connect to the CPG API: %s (%d)",
cs_strerror(rc), rc);
return 0;
}
rc = cpg_fd_get(local_handle, &fd);
if (rc != CS_OK) {
crm_err("Could not obtain the CPG API connection: %s (%d)",
cs_strerror(rc), rc);
goto bail;
}
/* CPG provider run as root (in given user namespace, anyway)? */
if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
&found_uid, &found_gid))) {
crm_err("CPG provider is not authentic:"
" process %lld (uid: %lld, gid: %lld)",
(long long) PCMK__SPECIAL_PID_AS_0(found_pid),
(long long) found_uid, (long long) found_gid);
goto bail;
} else if (rv < 0) {
crm_err("Could not verify authenticity of CPG provider: %s (%d)",
strerror(-rv), -rv);
goto bail;
}
}
if (rc == CS_OK) {
retries = 0;
crm_trace("Performing lookup");
cs_repeat(rc, retries, 5, cpg_local_get(local_handle, &local_nodeid));
}
if (rc != CS_OK) {
crm_err("Could not get local node id from the CPG API: %s (%d)",
pcmk__cs_err_str(rc), rc);
}
bail:
if(handle == 0) {
crm_trace("Closing connection");
cpg_finalize(local_handle);
}
crm_debug("Local nodeid is %u", local_nodeid);
return local_nodeid;
}
/*!
* \internal
* \brief Callback function for Corosync message queue timer
*
* \param[in] data CPG handle
*
* \return FALSE (to indicate to glib that timer should not be removed)
*/
static gboolean
crm_cs_flush_cb(gpointer data)
{
cs_message_timer = 0;
crm_cs_flush(data);
return FALSE;
}
// Send no more than this many CPG messages in one flush
#define CS_SEND_MAX 200
/*!
* \internal
* \brief Send messages in Corosync CPG message queue
*
* \param[in] data CPG handle
*/
static void
crm_cs_flush(gpointer data)
{
unsigned int sent = 0;
guint queue_len = 0;
cs_error_t rc = 0;
cpg_handle_t *handle = (cpg_handle_t *) data;
if (*handle == 0) {
crm_trace("Connection is dead");
return;
}
queue_len = g_list_length(cs_message_queue);
if (((queue_len % 1000) == 0) && (queue_len > 1)) {
crm_err("CPG queue has grown to %d", queue_len);
} else if (queue_len == CS_SEND_MAX) {
crm_warn("CPG queue has grown to %d", queue_len);
}
if (cs_message_timer != 0) {
/* There is already a timer, wait until it goes off */
crm_trace("Timer active %d", cs_message_timer);
return;
}
while ((cs_message_queue != NULL) && (sent < CS_SEND_MAX)) {
struct iovec *iov = cs_message_queue->data;
rc = cpg_mcast_joined(*handle, CPG_TYPE_AGREED, iov, 1);
if (rc != CS_OK) {
break;
}
sent++;
crm_trace("CPG message sent, size=%llu",
(unsigned long long) iov->iov_len);
cs_message_queue = g_list_remove(cs_message_queue, iov);
free(iov->iov_base);
free(iov);
}
queue_len -= sent;
do_crm_log((queue_len > 5)? LOG_INFO : LOG_TRACE,
"Sent %u CPG message%s (%d still queued): %s (rc=%d)",
sent, pcmk__plural_s(sent), queue_len, pcmk__cs_err_str(rc),
(int) rc);
if (cs_message_queue) {
uint32_t delay_ms = 100;
if (rc != CS_OK) {
/* Proportionally more if sending failed but cap at 1s */
delay_ms = QB_MIN(1000, CS_SEND_MAX + (10 * queue_len));
}
cs_message_timer = g_timeout_add(delay_ms, crm_cs_flush_cb, data);
}
}
/*!
* \internal
* \brief Dispatch function for CPG handle
*
* \param[in,out] user_data Cluster object
*
* \return 0 on success, -1 on error (per mainloop_io_t interface)
*/
static int
pcmk_cpg_dispatch(gpointer user_data)
{
cs_error_t rc = CS_OK;
pcmk_cluster_t *cluster = (pcmk_cluster_t *) user_data;
rc = cpg_dispatch(cluster->cpg_handle, CS_DISPATCH_ONE);
if (rc != CS_OK) {
crm_err("Connection to the CPG API failed: %s (%d)",
pcmk__cs_err_str(rc), rc);
cpg_finalize(cluster->cpg_handle);
cluster->cpg_handle = 0;
return -1;
} else if (cpg_evicted) {
crm_err("Evicted from CPG membership");
return -1;
}
return 0;
}
static inline const char *
ais_dest(const pcmk__cpg_host_t *host)
{
if (host->local) {
return "local";
} else if (host->size > 0) {
return host->uname;
} else {
return "<all>";
}
}
static inline const char *
msg_type2text(enum crm_ais_msg_types type)
{
const char *text = "unknown";
switch (type) {
case crm_msg_none:
text = "unknown";
break;
case crm_msg_ais:
text = "ais";
break;
case crm_msg_cib:
text = "cib";
break;
case crm_msg_crmd:
text = "crmd";
break;
case crm_msg_pe:
text = "pengine";
break;
case crm_msg_te:
text = "tengine";
break;
case crm_msg_lrmd:
text = "lrmd";
break;
case crm_msg_attrd:
text = "attrd";
break;
case crm_msg_stonithd:
text = "stonithd";
break;
case crm_msg_stonith_ng:
text = "stonith-ng";
break;
}
return text;
}
/*!
* \internal
* \brief Check whether a Corosync CPG message is valid
*
* \param[in] msg Corosync CPG message to check
*
* \return true if \p msg is valid, otherwise false
*/
static bool
check_message_sanity(const pcmk__cpg_msg_t *msg)
{
int32_t payload_size = msg->header.size - sizeof(pcmk__cpg_msg_t);
if (payload_size < 1) {
crm_err("%sCPG message %d from %s invalid: "
"Claimed size of %d bytes is too small "
CRM_XS " from %s[%u] to %s@%s",
(msg->is_compressed? "Compressed " : ""),
msg->id, ais_dest(&(msg->sender)),
(int) msg->header.size,
msg_type2text(msg->sender.type), msg->sender.pid,
msg_type2text(msg->host.type), ais_dest(&(msg->host)));
return false;
}
if (msg->header.error != CS_OK) {
crm_err("%sCPG message %d from %s invalid: "
"Sender indicated error %d "
CRM_XS " from %s[%u] to %s@%s",
(msg->is_compressed? "Compressed " : ""),
msg->id, ais_dest(&(msg->sender)),
msg->header.error,
msg_type2text(msg->sender.type), msg->sender.pid,
msg_type2text(msg->host.type), ais_dest(&(msg->host)));
return false;
}
if (msg_data_len(msg) != payload_size) {
crm_err("%sCPG message %d from %s invalid: "
"Total size %d inconsistent with payload size %d "
CRM_XS " from %s[%u] to %s@%s",
(msg->is_compressed? "Compressed " : ""),
msg->id, ais_dest(&(msg->sender)),
(int) msg->header.size, (int) msg_data_len(msg),
msg_type2text(msg->sender.type), msg->sender.pid,
msg_type2text(msg->host.type), ais_dest(&(msg->host)));
return false;
}
if (!msg->is_compressed &&
/* msg->size != (strlen(msg->data) + 1) would be a stronger check,
* but checking the last byte or two should be quick
*/
(((msg->size > 1) && (msg->data[msg->size - 2] == '\0'))
|| (msg->data[msg->size - 1] != '\0'))) {
crm_err("CPG message %d from %s invalid: "
"Payload does not end at byte %llu "
CRM_XS " from %s[%u] to %s@%s",
msg->id, ais_dest(&(msg->sender)),
(unsigned long long) msg->size,
msg_type2text(msg->sender.type), msg->sender.pid,
msg_type2text(msg->host.type), ais_dest(&(msg->host)));
return false;
}
crm_trace("Verified %d-byte %sCPG message %d from %s[%u]@%s to %s@%s",
(int) msg->header.size, (msg->is_compressed? "compressed " : ""),
msg->id, msg_type2text(msg->sender.type), msg->sender.pid,
ais_dest(&(msg->sender)),
msg_type2text(msg->host.type), ais_dest(&(msg->host)));
return true;
}
/*!
* \brief Extract text data from a Corosync CPG message
*
* \param[in] handle CPG connection (to get local node ID if not known)
* \param[in] nodeid Corosync ID of node that sent message
* \param[in] pid Process ID of message sender (for logging only)
* \param[in,out] content CPG message
* \param[out] kind If not NULL, will be set to CPG header ID
* (which should be an enum crm_ais_msg_class value,
* currently always crm_class_cluster)
* \param[out] from If not NULL, will be set to sender uname
* (valid for the lifetime of \p content)
*
* \return Newly allocated string with message data
* \note It is the caller's responsibility to free the return value with free().
*/
char *
pcmk_message_common_cs(cpg_handle_t handle, uint32_t nodeid, uint32_t pid, void *content,
uint32_t *kind, const char **from)
{
char *data = NULL;
pcmk__cpg_msg_t *msg = (pcmk__cpg_msg_t *) content;
if(handle) {
// Do filtering and field massaging
uint32_t local_nodeid = get_local_nodeid(handle);
const char *local_name = get_local_node_name();
if (msg->sender.id > 0 && msg->sender.id != nodeid) {
crm_err("Nodeid mismatch from %d.%d: claimed nodeid=%u", nodeid, pid, msg->sender.id);
return NULL;
} else if (msg->host.id != 0 && (local_nodeid != msg->host.id)) {
/* Not for us */
crm_trace("Not for us: %u != %u", msg->host.id, local_nodeid);
return NULL;
} else if (msg->host.size != 0 && !pcmk__str_eq(msg->host.uname, local_name, pcmk__str_casei)) {
/* Not for us */
crm_trace("Not for us: %s != %s", msg->host.uname, local_name);
return NULL;
}
msg->sender.id = nodeid;
if (msg->sender.size == 0) {
crm_node_t *peer = pcmk__get_node(nodeid, NULL, NULL,
- pcmk__node_search_cluster);
+ pcmk__node_search_cluster_member);
if (peer == NULL) {
crm_err("Peer with nodeid=%u is unknown", nodeid);
} else if (peer->uname == NULL) {
crm_err("No uname for peer with nodeid=%u", nodeid);
} else {
crm_notice("Fixing uname for peer with nodeid=%u", nodeid);
msg->sender.size = strlen(peer->uname);
memset(msg->sender.uname, 0, MAX_NAME);
memcpy(msg->sender.uname, peer->uname, msg->sender.size);
}
}
}
crm_trace("Got new%s message (size=%d, %d, %d)",
msg->is_compressed ? " compressed" : "",
msg_data_len(msg), msg->size, msg->compressed_size);
if (kind != NULL) {
*kind = msg->header.id;
}
if (from != NULL) {
*from = msg->sender.uname;
}
if (msg->is_compressed && msg->size > 0) {
int rc = BZ_OK;
char *uncompressed = NULL;
unsigned int new_size = msg->size + 1;
if (!check_message_sanity(msg)) {
goto badmsg;
}
crm_trace("Decompressing message data");
uncompressed = pcmk__assert_alloc(1, new_size);
rc = BZ2_bzBuffToBuffDecompress(uncompressed, &new_size, msg->data, msg->compressed_size, 1, 0);
rc = pcmk__bzlib2rc(rc);
if (rc != pcmk_rc_ok) {
crm_err("Decompression failed: %s " CRM_XS " rc=%d", pcmk_rc_str(rc), rc);
free(uncompressed);
goto badmsg;
}
CRM_ASSERT(new_size == msg->size);
data = uncompressed;
} else if (!check_message_sanity(msg)) {
goto badmsg;
} else {
data = strdup(msg->data);
}
// Is this necessary?
pcmk__get_node(msg->sender.id, msg->sender.uname, NULL,
- pcmk__node_search_cluster);
+ pcmk__node_search_cluster_member);
crm_trace("Payload: %.200s", data);
return data;
badmsg:
crm_err("Invalid message (id=%d, dest=%s:%s, from=%s:%s.%d):"
" min=%d, total=%d, size=%d, bz2_size=%d",
msg->id, ais_dest(&(msg->host)), msg_type2text(msg->host.type),
ais_dest(&(msg->sender)), msg_type2text(msg->sender.type),
msg->sender.pid, (int)sizeof(pcmk__cpg_msg_t),
msg->header.size, msg->size, msg->compressed_size);
free(data);
return NULL;
}
/*!
* \internal
* \brief Compare cpg_address objects by node ID
*
* \param[in] first First cpg_address structure to compare
* \param[in] second Second cpg_address structure to compare
*
* \return Negative number if first's node ID is lower,
* positive number if first's node ID is greater,
* or 0 if both node IDs are equal
*/
static int
cmp_member_list_nodeid(const void *first, const void *second)
{
const struct cpg_address *const a = *((const struct cpg_address **) first),
*const b = *((const struct cpg_address **) second);
if (a->nodeid < b->nodeid) {
return -1;
} else if (a->nodeid > b->nodeid) {
return 1;
}
/* don't bother with "reason" nor "pid" */
return 0;
}
/*!
* \internal
* \brief Get a readable string equivalent of a cpg_reason_t value
*
* \param[in] reason CPG reason value
*
* \return Readable string suitable for logging
*/
static const char *
cpgreason2str(cpg_reason_t reason)
{
switch (reason) {
case CPG_REASON_JOIN: return " via cpg_join";
case CPG_REASON_LEAVE: return " via cpg_leave";
case CPG_REASON_NODEDOWN: return " via cluster exit";
case CPG_REASON_NODEUP: return " via cluster join";
case CPG_REASON_PROCDOWN: return " for unknown reason";
default: break;
}
return "";
}
/*!
* \internal
* \brief Get a log-friendly node name
*
* \param[in] peer Node to check
*
* \return Node's uname, or readable string if not known
*/
static inline const char *
peer_name(const crm_node_t *peer)
{
if (peer == NULL) {
return "unknown node";
} else if (peer->uname == NULL) {
return "peer node";
} else {
return peer->uname;
}
}
/*!
* \internal
* \brief Process a CPG peer's leaving the cluster
*
* \param[in] cpg_group_name CPG group name (for logging)
* \param[in] event_counter Event number (for logging)
* \param[in] local_nodeid Node ID of local node
* \param[in] cpg_peer CPG peer that left
* \param[in] sorted_member_list List of remaining members, qsort()-ed by ID
* \param[in] member_list_entries Number of entries in \p sorted_member_list
*/
static void
node_left(const char *cpg_group_name, int event_counter,
uint32_t local_nodeid, const struct cpg_address *cpg_peer,
const struct cpg_address **sorted_member_list,
size_t member_list_entries)
{
- crm_node_t *peer = pcmk__search_node_caches(cpg_peer->nodeid, NULL,
- pcmk__node_search_cluster);
+ crm_node_t *peer =
+ pcmk__search_node_caches(cpg_peer->nodeid, NULL,
+ pcmk__node_search_cluster_member);
const struct cpg_address **rival = NULL;
/* Most CPG-related Pacemaker code assumes that only one process on a node
* can be in the process group, but Corosync does not impose this
* limitation, and more than one can be a member in practice due to a
* daemon attempting to start while another instance is already running.
*
* Check for any such duplicate instances, because we don't want to process
* their leaving as if our actual peer left. If the peer that left still has
* an entry in sorted_member_list (with a different PID), we will ignore the
* leaving.
*
* @TODO Track CPG members' PIDs so we can tell exactly who left.
*/
if (peer != NULL) {
rival = bsearch(&cpg_peer, sorted_member_list, member_list_entries,
sizeof(const struct cpg_address *),
cmp_member_list_nodeid);
}
if (rival == NULL) {
crm_info("Group %s event %d: %s (node %u pid %u) left%s",
cpg_group_name, event_counter, peer_name(peer),
cpg_peer->nodeid, cpg_peer->pid,
cpgreason2str(cpg_peer->reason));
if (peer != NULL) {
crm_update_peer_proc(__func__, peer, crm_proc_cpg,
PCMK_VALUE_OFFLINE);
}
} else if (cpg_peer->nodeid == local_nodeid) {
crm_warn("Group %s event %d: duplicate local pid %u left%s",
cpg_group_name, event_counter,
cpg_peer->pid, cpgreason2str(cpg_peer->reason));
} else {
crm_warn("Group %s event %d: "
"%s (node %u) duplicate pid %u left%s (%u remains)",
cpg_group_name, event_counter, peer_name(peer),
cpg_peer->nodeid, cpg_peer->pid,
cpgreason2str(cpg_peer->reason), (*rival)->pid);
}
}
/*!
* \brief Handle a CPG configuration change event
*
* \param[in] handle CPG connection
* \param[in] cpg_name CPG group name
* \param[in] member_list List of current CPG members
* \param[in] member_list_entries Number of entries in \p member_list
* \param[in] left_list List of CPG members that left
* \param[in] left_list_entries Number of entries in \p left_list
* \param[in] joined_list List of CPG members that joined
* \param[in] joined_list_entries Number of entries in \p joined_list
*/
void
pcmk_cpg_membership(cpg_handle_t handle,
const struct cpg_name *groupName,
const struct cpg_address *member_list, size_t member_list_entries,
const struct cpg_address *left_list, size_t left_list_entries,
const struct cpg_address *joined_list, size_t joined_list_entries)
{
int i;
gboolean found = FALSE;
static int counter = 0;
uint32_t local_nodeid = get_local_nodeid(handle);
const struct cpg_address **sorted;
sorted = pcmk__assert_alloc(member_list_entries,
sizeof(const struct cpg_address *));
for (size_t iter = 0; iter < member_list_entries; iter++) {
sorted[iter] = member_list + iter;
}
/* so that the cross-matching multiply-subscribed nodes is then cheap */
qsort(sorted, member_list_entries, sizeof(const struct cpg_address *),
cmp_member_list_nodeid);
for (i = 0; i < left_list_entries; i++) {
node_left(groupName->value, counter, local_nodeid, &left_list[i],
sorted, member_list_entries);
}
free(sorted);
sorted = NULL;
for (i = 0; i < joined_list_entries; i++) {
crm_info("Group %s event %d: node %u pid %u joined%s",
groupName->value, counter, joined_list[i].nodeid,
joined_list[i].pid, cpgreason2str(joined_list[i].reason));
}
for (i = 0; i < member_list_entries; i++) {
crm_node_t *peer = pcmk__get_node(member_list[i].nodeid, NULL, NULL,
- pcmk__node_search_cluster);
+ pcmk__node_search_cluster_member);
if (member_list[i].nodeid == local_nodeid
&& member_list[i].pid != getpid()) {
// See the note in node_left()
crm_warn("Group %s event %d: detected duplicate local pid %u",
groupName->value, counter, member_list[i].pid);
continue;
}
crm_info("Group %s event %d: %s (node %u pid %u) is member",
groupName->value, counter, peer_name(peer),
member_list[i].nodeid, member_list[i].pid);
/* If the caller left auto-reaping enabled, this will also update the
* state to member.
*/
peer = crm_update_peer_proc(__func__, peer, crm_proc_cpg,
PCMK_VALUE_ONLINE);
if (peer && peer->state && strcmp(peer->state, CRM_NODE_MEMBER)) {
/* The node is a CPG member, but we currently think it's not a
* cluster member. This is possible only if auto-reaping was
* disabled. The node may be joining, and we happened to get the CPG
* notification before the quorum notification; or the node may have
* just died, and we are processing its final messages; or a bug
* has affected the peer cache.
*/
time_t now = time(NULL);
if (peer->when_lost == 0) {
// Track when we first got into this contradictory state
peer->when_lost = now;
} else if (now > (peer->when_lost + 60)) {
// If it persists for more than a minute, update the state
crm_warn("Node %u is member of group %s but was believed offline",
member_list[i].nodeid, groupName->value);
pcmk__update_peer_state(__func__, peer, CRM_NODE_MEMBER, 0);
}
}
if (local_nodeid == member_list[i].nodeid) {
found = TRUE;
}
}
if (!found) {
crm_err("Local node was evicted from group %s", groupName->value);
cpg_evicted = true;
}
counter++;
}
/*!
* \brief Set the CPG deliver callback function for a cluster object
*
* \param[in,out] cluster Cluster object
* \param[in] fn Deliver callback function to set
*
* \return Standard Pacemaker return code
*/
int
pcmk_cpg_set_deliver_fn(pcmk_cluster_t *cluster, cpg_deliver_fn_t fn)
{
if (cluster == NULL) {
return EINVAL;
}
cluster->cpg.cpg_deliver_fn = fn;
return pcmk_rc_ok;
}
/*!
* \brief Set the CPG config change callback function for a cluster object
*
* \param[in,out] cluster Cluster object
* \param[in] fn Configuration change callback function to set
*
* \return Standard Pacemaker return code
*/
int
pcmk_cpg_set_confchg_fn(pcmk_cluster_t *cluster, cpg_confchg_fn_t fn)
{
if (cluster == NULL) {
return EINVAL;
}
cluster->cpg.cpg_confchg_fn = fn;
return pcmk_rc_ok;
}
/*!
* \brief Connect to Corosync CPG
*
* \param[in,out] cluster Initialized cluster object to connect
*
* \return Standard Pacemaker return code
*/
int
pcmk__cpg_connect(pcmk_cluster_t *cluster)
{
cs_error_t rc;
int fd = -1;
int retries = 0;
uint32_t id = 0;
crm_node_t *peer = NULL;
cpg_handle_t handle = 0;
const char *message_name = pcmk__message_name(crm_system_name);
uid_t found_uid = 0;
gid_t found_gid = 0;
pid_t found_pid = 0;
int rv;
struct mainloop_fd_callbacks cpg_fd_callbacks = {
.dispatch = pcmk_cpg_dispatch,
.destroy = cluster->destroy,
};
cpg_model_v1_data_t cpg_model_info = {
.model = CPG_MODEL_V1,
.cpg_deliver_fn = cluster->cpg.cpg_deliver_fn,
.cpg_confchg_fn = cluster->cpg.cpg_confchg_fn,
.cpg_totem_confchg_fn = NULL,
.flags = 0,
};
cpg_evicted = false;
cluster->group.length = 0;
cluster->group.value[0] = 0;
/* group.value is char[128] */
strncpy(cluster->group.value, message_name, 127);
cluster->group.value[127] = 0;
cluster->group.length = 1 + QB_MIN(127, strlen(cluster->group.value));
cs_repeat(rc, retries, 30, cpg_model_initialize(&handle, CPG_MODEL_V1, (cpg_model_data_t *)&cpg_model_info, NULL));
if (rc != CS_OK) {
crm_err("Could not connect to the CPG API: %s (%d)",
cs_strerror(rc), rc);
goto bail;
}
rc = cpg_fd_get(handle, &fd);
if (rc != CS_OK) {
crm_err("Could not obtain the CPG API connection: %s (%d)",
cs_strerror(rc), rc);
goto bail;
}
/* CPG provider run as root (in given user namespace, anyway)? */
if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
&found_uid, &found_gid))) {
crm_err("CPG provider is not authentic:"
" process %lld (uid: %lld, gid: %lld)",
(long long) PCMK__SPECIAL_PID_AS_0(found_pid),
(long long) found_uid, (long long) found_gid);
rc = CS_ERR_ACCESS;
goto bail;
} else if (rv < 0) {
crm_err("Could not verify authenticity of CPG provider: %s (%d)",
strerror(-rv), -rv);
rc = CS_ERR_ACCESS;
goto bail;
}
id = get_local_nodeid(handle);
if (id == 0) {
crm_err("Could not get local node id from the CPG API");
goto bail;
}
cluster->nodeid = id;
retries = 0;
cs_repeat(rc, retries, 30, cpg_join(handle, &cluster->group));
if (rc != CS_OK) {
crm_err("Could not join the CPG group '%s': %d", message_name, rc);
goto bail;
}
pcmk_cpg_handle = handle;
cluster->cpg_handle = handle;
mainloop_add_fd("corosync-cpg", G_PRIORITY_MEDIUM, fd, cluster, &cpg_fd_callbacks);
bail:
if (rc != CS_OK) {
cpg_finalize(handle);
// @TODO Map rc to more specific Pacemaker return code
return ENOTCONN;
}
- peer = pcmk__get_node(id, NULL, NULL, pcmk__node_search_cluster);
+ peer = pcmk__get_node(id, NULL, NULL, pcmk__node_search_cluster_member);
crm_update_peer_proc(__func__, peer, crm_proc_cpg, PCMK_VALUE_ONLINE);
return pcmk_rc_ok;
}
/*!
* \internal
* \brief Disconnect from Corosync CPG
*
* \param[in,out] cluster Cluster object to disconnect
*/
void
pcmk__cpg_disconnect(pcmk_cluster_t *cluster)
{
pcmk_cpg_handle = 0;
if (cluster->cpg_handle != 0) {
crm_trace("Disconnecting CPG");
cpg_leave(cluster->cpg_handle, &cluster->group);
cpg_finalize(cluster->cpg_handle);
cluster->cpg_handle = 0;
} else {
crm_info("No CPG connection");
}
}
/*!
* \internal
* \brief Send an XML message via Corosync CPG
*
* \param[in] msg XML message to send
* \param[in] node Cluster node to send message to
* \param[in] dest Type of message to send
*
* \return TRUE on success, otherwise FALSE
*/
bool
pcmk__cpg_send_xml(const xmlNode *msg, const crm_node_t *node,
enum crm_ais_msg_types dest)
{
bool rc = true;
GString *data = g_string_sized_new(1024);
pcmk__xml_string(msg, 0, data, 0);
rc = send_cluster_text(crm_class_cluster, data->str, FALSE, node, dest);
g_string_free(data, TRUE);
return rc;
}
/*!
* \internal
* \brief Send string data via Corosync CPG
*
* \param[in] msg_class Message class (to set as CPG header ID)
* \param[in] data Data to send
* \param[in] local What to set as host "local" value (which is never used)
* \param[in] node Cluster node to send message to
* \param[in] dest Type of message to send
*
* \return TRUE on success, otherwise FALSE
*/
gboolean
send_cluster_text(enum crm_ais_msg_class msg_class, const char *data,
gboolean local, const crm_node_t *node,
enum crm_ais_msg_types dest)
{
static int msg_id = 0;
static int local_pid = 0;
static int local_name_len = 0;
static const char *local_name = NULL;
char *target = NULL;
struct iovec *iov;
pcmk__cpg_msg_t *msg = NULL;
enum crm_ais_msg_types sender = text2msg_type(crm_system_name);
switch (msg_class) {
case crm_class_cluster:
break;
default:
crm_err("Invalid message class: %d", msg_class);
return FALSE;
}
CRM_CHECK(dest != crm_msg_ais, return FALSE);
if (local_name == NULL) {
local_name = get_local_node_name();
}
if ((local_name_len == 0) && (local_name != NULL)) {
local_name_len = strlen(local_name);
}
if (data == NULL) {
data = "";
}
if (local_pid == 0) {
local_pid = getpid();
}
if (sender == crm_msg_none) {
sender = local_pid;
}
msg = pcmk__assert_alloc(1, sizeof(pcmk__cpg_msg_t));
msg_id++;
msg->id = msg_id;
msg->header.id = msg_class;
msg->header.error = CS_OK;
msg->host.type = dest;
msg->host.local = local;
if (node) {
if (node->uname) {
target = pcmk__str_copy(node->uname);
msg->host.size = strlen(node->uname);
memset(msg->host.uname, 0, MAX_NAME);
memcpy(msg->host.uname, node->uname, msg->host.size);
} else {
target = crm_strdup_printf("%u", node->id);
}
msg->host.id = node->id;
} else {
target = pcmk__str_copy("all");
}
msg->sender.id = 0;
msg->sender.type = sender;
msg->sender.pid = local_pid;
msg->sender.size = local_name_len;
memset(msg->sender.uname, 0, MAX_NAME);
if ((local_name != NULL) && (msg->sender.size != 0)) {
memcpy(msg->sender.uname, local_name, msg->sender.size);
}
msg->size = 1 + strlen(data);
msg->header.size = sizeof(pcmk__cpg_msg_t) + msg->size;
if (msg->size < CRM_BZ2_THRESHOLD) {
msg = pcmk__realloc(msg, msg->header.size);
memcpy(msg->data, data, msg->size);
} else {
char *compressed = NULL;
unsigned int new_size = 0;
if (pcmk__compress(data, (unsigned int) msg->size, 0, &compressed,
&new_size) == pcmk_rc_ok) {
msg->header.size = sizeof(pcmk__cpg_msg_t) + new_size;
msg = pcmk__realloc(msg, msg->header.size);
memcpy(msg->data, compressed, new_size);
msg->is_compressed = TRUE;
msg->compressed_size = new_size;
} else {
// cppcheck seems not to understand the abort logic in pcmk__realloc
// cppcheck-suppress memleak
msg = pcmk__realloc(msg, msg->header.size);
memcpy(msg->data, data, msg->size);
}
free(compressed);
}
iov = pcmk__assert_alloc(1, sizeof(struct iovec));
iov->iov_base = msg;
iov->iov_len = msg->header.size;
if (msg->compressed_size) {
crm_trace("Queueing CPG message %u to %s (%llu bytes, %d bytes compressed payload): %.200s",
msg->id, target, (unsigned long long) iov->iov_len,
msg->compressed_size, data);
} else {
crm_trace("Queueing CPG message %u to %s (%llu bytes, %d bytes payload): %.200s",
msg->id, target, (unsigned long long) iov->iov_len,
msg->size, data);
}
free(target);
cs_message_queue = g_list_append(cs_message_queue, iov);
crm_cs_flush(&pcmk_cpg_handle);
return TRUE;
}
/*!
* \brief Get the message type equivalent of a string
*
* \param[in] text String of message type
*
* \return Message type equivalent of \p text
*/
enum crm_ais_msg_types
text2msg_type(const char *text)
{
int type = crm_msg_none;
CRM_CHECK(text != NULL, return type);
text = pcmk__message_name(text);
if (pcmk__str_eq(text, "ais", pcmk__str_casei)) {
type = crm_msg_ais;
} else if (pcmk__str_eq(text, CRM_SYSTEM_CIB, pcmk__str_casei)) {
type = crm_msg_cib;
} else if (pcmk__strcase_any_of(text, CRM_SYSTEM_CRMD, CRM_SYSTEM_DC, NULL)) {
type = crm_msg_crmd;
} else if (pcmk__str_eq(text, CRM_SYSTEM_TENGINE, pcmk__str_casei)) {
type = crm_msg_te;
} else if (pcmk__str_eq(text, CRM_SYSTEM_PENGINE, pcmk__str_casei)) {
type = crm_msg_pe;
} else if (pcmk__str_eq(text, CRM_SYSTEM_LRMD, pcmk__str_casei)) {
type = crm_msg_lrmd;
} else if (pcmk__str_eq(text, CRM_SYSTEM_STONITHD, pcmk__str_casei)) {
type = crm_msg_stonithd;
} else if (pcmk__str_eq(text, "stonith-ng", pcmk__str_casei)) {
type = crm_msg_stonith_ng;
} else if (pcmk__str_eq(text, "attrd", pcmk__str_casei)) {
type = crm_msg_attrd;
} else {
/* This will normally be a transient client rather than
* a cluster daemon. Set the type to the pid of the client
*/
int scan_rc = sscanf(text, "%d", &type);
if (scan_rc != 1 || type <= crm_msg_stonith_ng) {
/* Ensure it's sane */
type = crm_msg_none;
}
}
return type;
}
// Deprecated functions kept only for backward API compatibility
// LCOV_EXCL_START
#include <crm/cluster/compat.h>
/*!
* \brief Connect to Corosync CPG
*
* \param[in,out] cluster Cluster object
*
* \return TRUE on success, otherwise FALSE
*/
gboolean
cluster_connect_cpg(pcmk_cluster_t *cluster)
{
return pcmk__cpg_connect(cluster) == pcmk_rc_ok;
}
void
cluster_disconnect_cpg(pcmk_cluster_t *cluster)
{
pcmk__cpg_disconnect(cluster);
}
// LCOV_EXCL_STOP
// End deprecated API
diff --git a/lib/cluster/election.c b/lib/cluster/election.c
index 02bf024e2c..c2b8d1c1d3 100644
--- a/lib/cluster/election.c
+++ b/lib/cluster/election.c
@@ -1,723 +1,726 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <crm/common/xml.h>
#include <crm/common/mainloop.h>
#include <crm/cluster/internal.h>
#include <crm/cluster/election_internal.h>
#include <crm/crm.h>
#define STORM_INTERVAL 2 /* in seconds */
struct election_s {
enum election_result state;
guint count; // How many times local node has voted
char *name; // Descriptive name for this election
char *uname; // Local node's name
GSourceFunc cb; // Function to call if election is won
GHashTable *voted; // Key = node name, value = how node voted
mainloop_timer_t *timeout; // When to abort if all votes not received
int election_wins; // Track wins, for storm detection
bool wrote_blackbox; // Write a storm blackbox at most once
time_t expires; // When storm detection period ends
time_t last_election_loss; // When dampening period ends
};
static void
election_complete(election_t *e)
{
e->state = election_won;
if (e->cb != NULL) {
e->cb(e);
}
election_reset(e);
}
static gboolean
election_timer_cb(gpointer user_data)
{
election_t *e = user_data;
crm_info("%s timed out, declaring local node as winner", e->name);
election_complete(e);
return FALSE;
}
/*!
* \brief Get current state of an election
*
* \param[in] e Election object
*
* \return Current state of \e
*/
enum election_result
election_state(const election_t *e)
{
return (e == NULL)? election_error : e->state;
}
/*!
* \brief Create a new election object
*
* Every node that wishes to participate in an election must create an election
* object. Typically, this should be done once, at start-up. A caller should
* only create a single election object.
*
* \param[in] name Label for election (for logging)
* \param[in] uname Local node's name
* \param[in] period_ms How long to wait for all peers to vote
* \param[in] cb Function to call if local node wins election
*
* \return Newly allocated election object on success, NULL on error
* \note The caller is responsible for freeing the returned value using
* election_fini().
*/
election_t *
election_init(const char *name, const char *uname, guint period_ms, GSourceFunc cb)
{
election_t *e = NULL;
static guint count = 0;
CRM_CHECK(uname != NULL, return NULL);
e = calloc(1, sizeof(election_t));
if (e == NULL) {
crm_perror(LOG_CRIT, "Cannot create election");
return NULL;
}
e->uname = strdup(uname);
if (e->uname == NULL) {
crm_perror(LOG_CRIT, "Cannot create election");
free(e);
return NULL;
}
e->name = name? crm_strdup_printf("election-%s", name)
: crm_strdup_printf("election-%u", count++);
e->cb = cb;
e->timeout = mainloop_timer_add(e->name, period_ms, FALSE,
election_timer_cb, e);
crm_trace("Created %s", e->name);
return e;
}
/*!
* \brief Disregard any previous vote by specified peer
*
* This discards any recorded vote from a specified peer. Election users should
* call this whenever a voting peer becomes inactive.
*
* \param[in,out] e Election object
* \param[in] uname Name of peer to disregard
*/
void
election_remove(election_t *e, const char *uname)
{
if ((e != NULL) && (uname != NULL) && (e->voted != NULL)) {
crm_trace("Discarding %s (no-)vote from lost peer %s", e->name, uname);
g_hash_table_remove(e->voted, uname);
}
}
/*!
* \brief Stop election timer and disregard all votes
*
* \param[in,out] e Election object
*/
void
election_reset(election_t *e)
{
if (e != NULL) {
crm_trace("Resetting election %s", e->name);
mainloop_timer_stop(e->timeout);
if (e->voted) {
crm_trace("Destroying voted cache with %d members", g_hash_table_size(e->voted));
g_hash_table_destroy(e->voted);
e->voted = NULL;
}
}
}
/*!
* \brief Free an election object
*
* Free all memory associated with an election object, stopping its
* election timer (if running).
*
* \param[in,out] e Election object
*/
void
election_fini(election_t *e)
{
if (e != NULL) {
election_reset(e);
crm_trace("Destroying %s", e->name);
mainloop_timer_del(e->timeout);
free(e->uname);
free(e->name);
free(e);
}
}
static void
election_timeout_start(election_t *e)
{
if (e != NULL) {
mainloop_timer_start(e->timeout);
}
}
/*!
* \brief Stop an election's timer, if running
*
* \param[in,out] e Election object
*/
void
election_timeout_stop(election_t *e)
{
if (e != NULL) {
mainloop_timer_stop(e->timeout);
}
}
/*!
* \brief Change an election's timeout (restarting timer if running)
*
* \param[in,out] e Election object
* \param[in] period New timeout
*/
void
election_timeout_set_period(election_t *e, guint period)
{
if (e != NULL) {
mainloop_timer_set_period(e->timeout, period);
} else {
crm_err("No election defined");
}
}
static int
get_uptime(struct timeval *output)
{
static time_t expires = 0;
static struct rusage info;
time_t tm_now = time(NULL);
if (expires < tm_now) {
int rc = 0;
info.ru_utime.tv_sec = 0;
info.ru_utime.tv_usec = 0;
rc = getrusage(RUSAGE_SELF, &info);
output->tv_sec = 0;
output->tv_usec = 0;
if (rc < 0) {
crm_perror(LOG_ERR, "Could not calculate the current uptime");
expires = 0;
return -1;
}
crm_debug("Current CPU usage is: %lds, %ldus", (long)info.ru_utime.tv_sec,
(long)info.ru_utime.tv_usec);
}
expires = tm_now + STORM_INTERVAL; /* N seconds after the last _access_ */
output->tv_sec = info.ru_utime.tv_sec;
output->tv_usec = info.ru_utime.tv_usec;
return 1;
}
static int
compare_age(struct timeval your_age)
{
struct timeval our_age;
get_uptime(&our_age); /* If an error occurred, our_age will be compared as {0,0} */
if (our_age.tv_sec > your_age.tv_sec) {
crm_debug("Win: %ld vs %ld (seconds)", (long)our_age.tv_sec, (long)your_age.tv_sec);
return 1;
} else if (our_age.tv_sec < your_age.tv_sec) {
crm_debug("Lose: %ld vs %ld (seconds)", (long)our_age.tv_sec, (long)your_age.tv_sec);
return -1;
} else if (our_age.tv_usec > your_age.tv_usec) {
crm_debug("Win: %ld.%06ld vs %ld.%06ld (usec)",
(long)our_age.tv_sec, (long)our_age.tv_usec, (long)your_age.tv_sec, (long)your_age.tv_usec);
return 1;
} else if (our_age.tv_usec < your_age.tv_usec) {
crm_debug("Lose: %ld.%06ld vs %ld.%06ld (usec)",
(long)our_age.tv_sec, (long)our_age.tv_usec, (long)your_age.tv_sec, (long)your_age.tv_usec);
return -1;
}
return 0;
}
/*!
* \brief Start a new election by offering local node's candidacy
*
* Broadcast a "vote" election message containing the local node's ID,
* (incremented) election counter, and uptime, and start the election timer.
*
* \param[in,out] e Election object
*
* \note Any nodes agreeing to the candidacy will send a "no-vote" reply, and if
* all active peers do so, or if the election times out, the local node
* wins the election. (If we lose to any peer vote, we will stop the
* timer, so a timeout means we did not lose -- either some peer did not
* vote, or we did not call election_check() in time.)
*/
void
election_vote(election_t *e)
{
struct timeval age;
xmlNode *vote = NULL;
crm_node_t *our_node;
if (e == NULL) {
crm_trace("Election vote requested, but no election available");
return;
}
- our_node = pcmk__get_node(0, e->uname, NULL, pcmk__node_search_cluster);
+ our_node = pcmk__get_node(0, e->uname, NULL,
+ pcmk__node_search_cluster_member);
if (!pcmk__cluster_is_node_active(our_node)) {
crm_trace("Cannot vote in %s yet: local node not connected to cluster",
e->name);
return;
}
election_reset(e);
e->state = election_in_progress;
vote = create_request(CRM_OP_VOTE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
e->count++;
crm_xml_add(vote, PCMK__XA_ELECTION_OWNER, our_node->uuid);
crm_xml_add_int(vote, PCMK__XA_ELECTION_ID, e->count);
// Warning: PCMK__XA_ELECTION_AGE_NANO_SEC value is actually microseconds
get_uptime(&age);
crm_xml_add_timeval(vote, PCMK__XA_ELECTION_AGE_SEC,
PCMK__XA_ELECTION_AGE_NANO_SEC, &age);
send_cluster_message(NULL, crm_msg_crmd, vote, TRUE);
free_xml(vote);
crm_debug("Started %s round %d", e->name, e->count);
election_timeout_start(e);
return;
}
/*!
* \brief Check whether local node has won an election
*
* If all known peers have sent no-vote messages, stop the election timer, set
* the election state to won, and call any registered win callback.
*
* \param[in,out] e Election object
*
* \return TRUE if local node has won, FALSE otherwise
* \note If all known peers have sent no-vote messages, but the election owner
* does not call this function, the election will not be won (and the
* callback will not be called) until the election times out.
* \note This should be called when election_count_vote() returns
* \c election_in_progress.
*/
bool
election_check(election_t *e)
{
int voted_size = 0;
int num_members = 0;
if (e == NULL) {
crm_trace("Election check requested, but no election available");
return FALSE;
}
if (e->voted == NULL) {
crm_trace("%s check requested, but no votes received yet", e->name);
return FALSE;
}
voted_size = g_hash_table_size(e->voted);
num_members = pcmk__cluster_num_active_nodes();
/* in the case of #voted > #members, it is better to
* wait for the timeout and give the cluster time to
* stabilize
*/
if (voted_size >= num_members) {
/* we won and everyone has voted */
election_timeout_stop(e);
if (voted_size > num_members) {
GHashTableIter gIter;
const crm_node_t *node;
char *key = NULL;
crm_warn("Received too many votes in %s", e->name);
g_hash_table_iter_init(&gIter, crm_peer_cache);
while (g_hash_table_iter_next(&gIter, NULL, (gpointer *) & node)) {
if (pcmk__cluster_is_node_active(node)) {
crm_warn("* expected vote: %s", node->uname);
}
}
g_hash_table_iter_init(&gIter, e->voted);
while (g_hash_table_iter_next(&gIter, (gpointer *) & key, NULL)) {
crm_warn("* actual vote: %s", key);
}
}
crm_info("%s won by local node", e->name);
election_complete(e);
return TRUE;
} else {
crm_debug("%s still waiting on %d of %d votes",
e->name, num_members - voted_size, num_members);
}
return FALSE;
}
#define LOSS_DAMPEN 2 /* in seconds */
struct vote {
const char *op;
const char *from;
const char *version;
const char *election_owner;
int election_id;
struct timeval age;
};
/*!
* \brief Unpack an election message
*
* \param[in] e Election object (for logging only)
* \param[in] message Election message XML
* \param[out] vote Parsed fields from message
*
* \return TRUE if election message and election are valid, FALSE otherwise
* \note The parsed struct's pointer members are valid only for the lifetime of
* the message argument.
*/
static bool
parse_election_message(const election_t *e, const xmlNode *message,
struct vote *vote)
{
CRM_CHECK(message && vote, return FALSE);
vote->election_id = -1;
vote->age.tv_sec = -1;
vote->age.tv_usec = -1;
vote->op = crm_element_value(message, PCMK__XA_CRM_TASK);
vote->from = crm_element_value(message, PCMK__XA_SRC);
vote->version = crm_element_value(message, PCMK_XA_VERSION);
vote->election_owner = crm_element_value(message, PCMK__XA_ELECTION_OWNER);
crm_element_value_int(message, PCMK__XA_ELECTION_ID, &(vote->election_id));
if ((vote->op == NULL) || (vote->from == NULL) || (vote->version == NULL)
|| (vote->election_owner == NULL) || (vote->election_id < 0)) {
crm_warn("Invalid %s message from %s in %s ",
(vote->op? vote->op : "election"),
(vote->from? vote->from : "unspecified node"),
(e? e->name : "election"));
return FALSE;
}
// Op-specific validation
if (pcmk__str_eq(vote->op, CRM_OP_VOTE, pcmk__str_none)) {
/* Only vote ops have uptime.
Warning: PCMK__XA_ELECTION_AGE_NANO_SEC value is in microseconds.
*/
crm_element_value_timeval(message, PCMK__XA_ELECTION_AGE_SEC,
PCMK__XA_ELECTION_AGE_NANO_SEC, &(vote->age));
if ((vote->age.tv_sec < 0) || (vote->age.tv_usec < 0)) {
crm_warn("Cannot count %s %s from %s because it is missing uptime",
(e? e->name : "election"), vote->op, vote->from);
return FALSE;
}
} else if (!pcmk__str_eq(vote->op, CRM_OP_NOVOTE, pcmk__str_none)) {
crm_info("Cannot process %s message from %s because %s is not a known election op",
(e? e->name : "election"), vote->from, vote->op);
return FALSE;
}
// Election validation
if (e == NULL) {
crm_info("Cannot count %s from %s because no election available",
vote->op, vote->from);
return FALSE;
}
/* If the membership cache is NULL, we REALLY shouldn't be voting --
* the question is how we managed to get here.
*/
if (crm_peer_cache == NULL) {
crm_info("Cannot count %s %s from %s because no peer information available",
e->name, vote->op, vote->from);
return FALSE;
}
return TRUE;
}
static void
record_vote(election_t *e, struct vote *vote)
{
CRM_ASSERT(e && vote && vote->from && vote->op);
if (e->voted == NULL) {
e->voted = pcmk__strkey_table(free, free);
}
pcmk__insert_dup(e->voted, vote->from, vote->op);
}
static void
send_no_vote(crm_node_t *peer, struct vote *vote)
{
// @TODO probably shouldn't hardcode CRM_SYSTEM_CRMD and crm_msg_crmd
xmlNode *novote = create_request(CRM_OP_NOVOTE, NULL, vote->from,
CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
crm_xml_add(novote, PCMK__XA_ELECTION_OWNER, vote->election_owner);
crm_xml_add_int(novote, PCMK__XA_ELECTION_ID, vote->election_id);
send_cluster_message(peer, crm_msg_crmd, novote, TRUE);
free_xml(novote);
}
/*!
* \brief Process an election message (vote or no-vote) from a peer
*
* \param[in,out] e Election object
* \param[in] message Election message XML from peer
* \param[in] can_win Whether local node is eligible to win
*
* \return Election state after new vote is considered
* \note If the peer message is a vote, and we prefer the peer to win, this will
* send a no-vote reply to the peer.
* \note The situations "we lost to this vote" from "this is a late no-vote
* after we've already lost" both return election_lost. If a caller needs
* to distinguish them, it should save the current state before calling
* this function, and then compare the result.
*/
enum election_result
election_count_vote(election_t *e, const xmlNode *message, bool can_win)
{
int log_level = LOG_INFO;
gboolean done = FALSE;
gboolean we_lose = FALSE;
const char *reason = "unknown";
bool we_are_owner = FALSE;
crm_node_t *our_node = NULL, *your_node = NULL;
time_t tm_now = time(NULL);
struct vote vote;
CRM_CHECK(message != NULL, return election_error);
if (parse_election_message(e, message, &vote) == FALSE) {
return election_error;
}
- your_node = pcmk__get_node(0, vote.from, NULL, pcmk__node_search_cluster);
- our_node = pcmk__get_node(0, e->uname, NULL, pcmk__node_search_cluster);
+ your_node = pcmk__get_node(0, vote.from, NULL,
+ pcmk__node_search_cluster_member);
+ our_node = pcmk__get_node(0, e->uname, NULL,
+ pcmk__node_search_cluster_member);
we_are_owner = (our_node != NULL)
&& pcmk__str_eq(our_node->uuid, vote.election_owner,
pcmk__str_none);
if (!can_win) {
reason = "Not eligible";
we_lose = TRUE;
} else if (!pcmk__cluster_is_node_active(our_node)) {
reason = "We are not part of the cluster";
log_level = LOG_ERR;
we_lose = TRUE;
} else if (we_are_owner && (vote.election_id != e->count)) {
log_level = LOG_TRACE;
reason = "Superseded";
done = TRUE;
} else if (!pcmk__cluster_is_node_active(your_node)) {
/* Possibly we cached the message in the FSA queue at a point that it wasn't */
reason = "Peer is not part of our cluster";
log_level = LOG_WARNING;
done = TRUE;
} else if (pcmk__str_eq(vote.op, CRM_OP_NOVOTE, pcmk__str_none)
|| pcmk__str_eq(vote.from, e->uname, pcmk__str_none)) {
/* Receiving our own broadcast vote, or a no-vote from peer, is a vote
* for us to win
*/
if (!we_are_owner) {
crm_warn("Cannot count %s round %d %s from %s because we are not election owner (%s)",
e->name, vote.election_id, vote.op, vote.from,
vote.election_owner);
return election_error;
}
if (e->state != election_in_progress) {
// Should only happen if we already lost
crm_debug("Not counting %s round %d %s from %s because no election in progress",
e->name, vote.election_id, vote.op, vote.from);
return e->state;
}
record_vote(e, &vote);
reason = "Recorded";
done = TRUE;
} else {
// A peer vote requires a comparison to determine which node is better
int age_result = compare_age(vote.age);
int version_result = compare_version(vote.version, CRM_FEATURE_SET);
if (version_result < 0) {
reason = "Version";
we_lose = TRUE;
} else if (version_result > 0) {
reason = "Version";
} else if (age_result < 0) {
reason = "Uptime";
we_lose = TRUE;
} else if (age_result > 0) {
reason = "Uptime";
} else if (strcasecmp(e->uname, vote.from) > 0) {
reason = "Host name";
we_lose = TRUE;
} else {
reason = "Host name";
}
}
if (e->expires < tm_now) {
e->election_wins = 0;
e->expires = tm_now + STORM_INTERVAL;
} else if (done == FALSE && we_lose == FALSE) {
int peers = 1 + g_hash_table_size(crm_peer_cache);
/* If every node has to vote down every other node, thats N*(N-1) total elections
* Allow some leeway before _really_ complaining
*/
e->election_wins++;
if (e->election_wins > (peers * peers)) {
crm_warn("%s election storm detected: %d wins in %d seconds",
e->name, e->election_wins, STORM_INTERVAL);
e->election_wins = 0;
e->expires = tm_now + STORM_INTERVAL;
if (e->wrote_blackbox == FALSE) {
/* It's questionable whether a black box (from every node in the
* cluster) would be truly helpful in diagnosing an election
* storm. It's also highly doubtful a production environment
* would get multiple election storms from distinct causes, so
* saving one blackbox per process lifetime should be
* sufficient. Alternatives would be to save a timestamp of the
* last blackbox write instead of a boolean, and write a new one
* if some amount of time has passed; or to save a storm count,
* write a blackbox on every Nth occurrence.
*/
crm_write_blackbox(0, NULL);
e->wrote_blackbox = TRUE;
}
}
}
if (done) {
do_crm_log(log_level + 1,
"Processed %s round %d %s (current round %d) from %s (%s)",
e->name, vote.election_id, vote.op, e->count, vote.from,
reason);
return e->state;
} else if (we_lose == FALSE) {
/* We track the time of the last election loss to implement an election
* dampening period, reducing the likelihood of an election storm. If
* this node has lost within the dampening period, don't start a new
* election, even if we win against a peer's vote -- the peer we lost to
* should win again.
*
* @TODO This has a problem case: if an election winner immediately
* leaves the cluster, and a new election is immediately called, all
* nodes could lose, with no new winner elected. The ideal solution
* would be to tie the election structure with the peer caches, which
* would allow us to clear the dampening when the previous winner
* leaves (and would allow other improvements as well).
*/
if ((e->last_election_loss == 0)
|| ((tm_now - e->last_election_loss) > (time_t) LOSS_DAMPEN)) {
do_crm_log(log_level, "%s round %d (owner node ID %s) pass: %s from %s (%s)",
e->name, vote.election_id, vote.election_owner, vote.op,
vote.from, reason);
e->last_election_loss = 0;
election_timeout_stop(e);
/* Start a new election by voting down this, and other, peers */
e->state = election_start;
return e->state;
} else {
char *loss_time = ctime(&e->last_election_loss);
if (loss_time) {
// Show only HH:MM:SS
loss_time += 11;
loss_time[8] = '\0';
}
crm_info("Ignoring %s round %d (owner node ID %s) pass vs %s because we lost less than %ds ago at %s",
e->name, vote.election_id, vote.election_owner, vote.from,
LOSS_DAMPEN, (loss_time? loss_time : "unknown"));
}
}
e->last_election_loss = tm_now;
do_crm_log(log_level, "%s round %d (owner node ID %s) lost: %s from %s (%s)",
e->name, vote.election_id, vote.election_owner, vote.op,
vote.from, reason);
election_reset(e);
send_no_vote(your_node, &vote);
e->state = election_lost;
return e->state;
}
/*!
* \brief Reset any election dampening currently in effect
*
* \param[in,out] e Election object to clear
*/
void
election_clear_dampening(election_t *e)
{
e->last_election_loss = 0;
}
diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c
index e68c8a4c77..fb5eeeb611 100644
--- a/lib/cluster/membership.c
+++ b/lib/cluster/membership.c
@@ -1,1541 +1,1567 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#ifndef _GNU_SOURCE
# define _GNU_SOURCE
#endif
#include <inttypes.h> // PRIu32
#include <sys/param.h>
#include <sys/types.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <glib.h>
#include <crm/common/ipc.h>
#include <crm/common/xml_internal.h>
#include <crm/cluster/internal.h>
#include <crm/common/xml.h>
#include <crm/stonith-ng.h>
#include "crmcluster_private.h"
/* The peer cache remembers cluster nodes that have been seen.
* This is managed mostly automatically by libcluster, based on
* cluster membership events.
*
* Because cluster nodes can have conflicting names or UUIDs,
* the hash table key is a uniquely generated ID.
+ *
+ * @COMPAT When this is internal, rename to cluster_node_member_cache and make
+ * static.
*/
GHashTable *crm_peer_cache = NULL;
/*
* The remote peer cache tracks pacemaker_remote nodes. While the
* value has the same type as the peer cache's, it is tracked separately for
* three reasons: pacemaker_remote nodes can't have conflicting names or UUIDs,
* so the name (which is also the UUID) is used as the hash table key; there
* is no equivalent of membership events, so management is not automatic; and
* most users of the peer cache need to exclude pacemaker_remote nodes.
*
* That said, using a single cache would be more logical and less error-prone,
* so it would be a good idea to merge them one day.
*
* libcluster provides two avenues for populating the cache:
* pcmk__cluster_lookup_remote_node() and pcmk__cluster_forget_remote_node()
* directly manage it, while refresh_remote_nodes() populates it via the CIB.
*/
GHashTable *crm_remote_peer_cache = NULL;
/*
- * The known node cache tracks cluster and remote nodes that have been seen in
+ * The CIB cluster node cache tracks cluster nodes that have been seen in
* the CIB. It is useful mainly when a caller needs to know about a node that
* may no longer be in the membership, but doesn't want to add the node to the
* main peer cache tables.
*/
-static GHashTable *known_node_cache = NULL;
+static GHashTable *cluster_node_cib_cache = NULL;
unsigned long long crm_peer_seq = 0;
gboolean crm_have_quorum = FALSE;
static gboolean crm_autoreap = TRUE;
// Flag setting and clearing for crm_node_t:flags
#define set_peer_flags(peer, flags_to_set) do { \
(peer)->flags = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \
"Peer", (peer)->uname, \
(peer)->flags, (flags_to_set), \
#flags_to_set); \
} while (0)
#define clear_peer_flags(peer, flags_to_clear) do { \
(peer)->flags = pcmk__clear_flags_as(__func__, __LINE__, \
LOG_TRACE, \
"Peer", (peer)->uname, \
(peer)->flags, (flags_to_clear), \
#flags_to_clear); \
} while (0)
static void update_peer_uname(crm_node_t *node, const char *uname);
-static crm_node_t *find_known_node(const char *id, const char *uname);
+static crm_node_t *find_cib_cluster_node(const char *id, const char *uname);
/*!
* \internal
* \brief Get the number of Pacemaker Remote nodes that have been seen
*
* \return Number of cached Pacemaker Remote nodes
*/
unsigned int
pcmk__cluster_num_remote_nodes(void)
{
if (crm_remote_peer_cache == NULL) {
return 0U;
}
return g_hash_table_size(crm_remote_peer_cache);
}
/*!
* \internal
* \brief Get a remote node cache entry, creating it if necessary
*
* \param[in] node_name Name of remote node
*
* \return Cache entry for node on success, or \c NULL (and set \c errno)
* otherwise
*
* \note When creating a new entry, this will leave the node state undetermined.
* The caller should also call \c pcmk__update_peer_state() if the state
* is known.
* \note Because this can add and remove cache entries, callers should not
* assume any previously obtained cache entry pointers remain valid.
*/
crm_node_t *
pcmk__cluster_lookup_remote_node(const char *node_name)
{
crm_node_t *node;
char *node_name_copy = NULL;
if (node_name == NULL) {
errno = EINVAL;
return NULL;
}
/* It's theoretically possible that the node was added to the cluster peer
* cache before it was known to be a Pacemaker Remote node. Remove that
* entry unless it has a node ID, which means the name actually is
* associated with a cluster node. (@TODO return an error in that case?)
*/
- node = pcmk__search_node_caches(0, node_name, pcmk__node_search_cluster);
+ node = pcmk__search_node_caches(0, node_name,
+ pcmk__node_search_cluster_member);
if ((node != NULL) && (node->uuid == NULL)) {
/* node_name could be a pointer into the cache entry being removed, so
* reassign it to a copy before the original gets freed
*/
node_name_copy = strdup(node_name);
if (node_name_copy == NULL) {
errno = ENOMEM;
return NULL;
}
node_name = node_name_copy;
pcmk__cluster_forget_cluster_node(0, node_name);
}
/* Return existing cache entry if one exists */
node = g_hash_table_lookup(crm_remote_peer_cache, node_name);
if (node) {
free(node_name_copy);
return node;
}
/* Allocate a new entry */
node = calloc(1, sizeof(crm_node_t));
if (node == NULL) {
free(node_name_copy);
return NULL;
}
/* Populate the essential information */
set_peer_flags(node, crm_remote_node);
node->uuid = strdup(node_name);
if (node->uuid == NULL) {
free(node);
errno = ENOMEM;
free(node_name_copy);
return NULL;
}
/* Add the new entry to the cache */
g_hash_table_replace(crm_remote_peer_cache, node->uuid, node);
crm_trace("added %s to remote cache", node_name);
/* Update the entry's uname, ensuring peer status callbacks are called */
update_peer_uname(node, node_name);
free(node_name_copy);
return node;
}
/*!
* \internal
* \brief Remove a node from the Pacemaker Remote node cache
*
* \param[in] node_name Name of node to remove from cache
*
* \note The caller must be careful not to use \p node_name after calling this
* function if it might be a pointer into the cache entry being removed.
*/
void
pcmk__cluster_forget_remote_node(const char *node_name)
{
/* Do a lookup first, because node_name could be a pointer within the entry
* being removed -- we can't log it *after* removing it.
*/
if (g_hash_table_lookup(crm_remote_peer_cache, node_name) != NULL) {
crm_trace("Removing %s from Pacemaker Remote node cache", node_name);
g_hash_table_remove(crm_remote_peer_cache, node_name);
}
}
/*!
* \internal
* \brief Return node status based on a CIB status entry
*
* \param[in] node_state XML of node state
*
* \return \c CRM_NODE_LOST if \c PCMK__XA_IN_CCM is false in
* \c PCMK__XE_NODE_STATE, \c CRM_NODE_MEMBER otherwise
* \note Unlike most boolean XML attributes, this one defaults to true, for
* backward compatibility with older controllers that don't set it.
*/
static const char *
remote_state_from_cib(const xmlNode *node_state)
{
bool status = false;
if ((pcmk__xe_get_bool_attr(node_state, PCMK__XA_IN_CCM,
&status) == pcmk_rc_ok) && !status) {
return CRM_NODE_LOST;
} else {
return CRM_NODE_MEMBER;
}
}
/* user data for looping through remote node xpath searches */
struct refresh_data {
const char *field; /* XML attribute to check for node name */
gboolean has_state; /* whether to update node state based on XML */
};
/*!
* \internal
* \brief Process one pacemaker_remote node xpath search result
*
* \param[in] result XML search result
* \param[in] user_data what to look for in the XML
*/
static void
remote_cache_refresh_helper(xmlNode *result, void *user_data)
{
const struct refresh_data *data = user_data;
const char *remote = crm_element_value(result, data->field);
const char *state = NULL;
crm_node_t *node;
CRM_CHECK(remote != NULL, return);
/* Determine node's state, if the result has it */
if (data->has_state) {
state = remote_state_from_cib(result);
}
/* Check whether cache already has entry for node */
node = g_hash_table_lookup(crm_remote_peer_cache, remote);
if (node == NULL) {
/* Node is not in cache, so add a new entry for it */
node = pcmk__cluster_lookup_remote_node(remote);
CRM_ASSERT(node);
if (state) {
pcmk__update_peer_state(__func__, node, state, 0);
}
} else if (pcmk_is_set(node->flags, crm_node_dirty)) {
/* Node is in cache and hasn't been updated already, so mark it clean */
clear_peer_flags(node, crm_node_dirty);
if (state) {
pcmk__update_peer_state(__func__, node, state, 0);
}
}
}
static void
mark_dirty(gpointer key, gpointer value, gpointer user_data)
{
set_peer_flags((crm_node_t *) value, crm_node_dirty);
}
static gboolean
is_dirty(gpointer key, gpointer value, gpointer user_data)
{
return pcmk_is_set(((crm_node_t*)value)->flags, crm_node_dirty);
}
/*!
* \internal
* \brief Repopulate the remote node cache based on CIB XML
*
* \param[in] cib CIB XML to parse
*/
static void
refresh_remote_nodes(xmlNode *cib)
{
struct refresh_data data;
- crm_peer_init();
+ pcmk__cluster_init_node_caches();
/* First, we mark all existing cache entries as dirty,
* so that later we can remove any that weren't in the CIB.
* We don't empty the cache, because we need to detect changes in state.
*/
g_hash_table_foreach(crm_remote_peer_cache, mark_dirty, NULL);
/* Look for guest nodes and remote nodes in the status section */
data.field = PCMK_XA_ID;
data.has_state = TRUE;
crm_foreach_xpath_result(cib, PCMK__XP_REMOTE_NODE_STATUS,
remote_cache_refresh_helper, &data);
/* Look for guest nodes and remote nodes in the configuration section,
* because they may have just been added and not have a status entry yet.
* In that case, the cached node state will be left NULL, so that the
* peer status callback isn't called until we're sure the node started
* successfully.
*/
data.field = PCMK_XA_VALUE;
data.has_state = FALSE;
crm_foreach_xpath_result(cib, PCMK__XP_GUEST_NODE_CONFIG,
remote_cache_refresh_helper, &data);
data.field = PCMK_XA_ID;
data.has_state = FALSE;
crm_foreach_xpath_result(cib, PCMK__XP_REMOTE_NODE_CONFIG,
remote_cache_refresh_helper, &data);
/* Remove all old cache entries that weren't seen in the CIB */
g_hash_table_foreach_remove(crm_remote_peer_cache, is_dirty, NULL);
}
/*!
* \internal
* \brief Check whether a node is an active cluster node
*
* Remote nodes are never considered active. This guarantees that they can never
* become DC.
*
* \param[in] node Node to check
*
* \return \c true if the node is an active cluster node, or \c false otherwise
*/
bool
pcmk__cluster_is_node_active(const crm_node_t *node)
{
const enum pcmk_cluster_layer cluster_layer = pcmk_get_cluster_layer();
if ((node == NULL) || pcmk_is_set(node->flags, crm_remote_node)) {
return false;
}
switch (cluster_layer) {
case pcmk_cluster_layer_corosync:
#if SUPPORT_COROSYNC
return crm_is_corosync_peer_active(node);
#else
break;
#endif // SUPPORT_COROSYNC
default:
break;
}
crm_err("Unhandled cluster layer: %s",
pcmk_cluster_layer_text(cluster_layer));
return false;
}
/*!
* \internal
* \brief Check if a node's entry should be removed from the cluster node cache
*
* A node should be removed from the cache if it's inactive and matches another
* \c crm_node_t (the search object). The node is considered a mismatch if any
* of the following are true:
* * The search object is \c NULL.
* * The search object has an ID set and the cached node's ID does not match it.
* * The search object does not have an ID set, and the cached node's name does
* not match the search node's name. (If both names are \c NULL, it's a
* match.)
*
* Otherwise, the node is considered a match.
*
* Note that if the search object has both an ID and a name set, the name is
* ignored for matching purposes.
*
* \param[in] key Ignored
* \param[in] value \c crm_node_t object from cluster node cache
* \param[in] user_data \c crm_node_t object to match against (search object)
*
* \return \c TRUE if the node entry should be removed from \c crm_peer_cache,
* or \c FALSE otherwise
*/
static gboolean
should_forget_cluster_node(gpointer key, gpointer value, gpointer user_data)
{
crm_node_t *node = value;
crm_node_t *search = user_data;
if (search == NULL) {
return FALSE;
}
if ((search->id != 0) && (node->id != search->id)) {
return FALSE;
}
if ((search->id == 0)
&& !pcmk__str_eq(node->uname, search->uname, pcmk__str_casei)) {
// @TODO Consider name even if ID is set?
return FALSE;
}
if (pcmk__cluster_is_node_active(value)) {
return FALSE;
}
crm_info("Removing node with name %s and " PCMK_XA_ID " %u from membership "
"cache",
pcmk__s(node->uname, "(unknown)"), node->id);
return TRUE;
}
/*!
* \internal
* \brief Remove one or more inactive nodes from the cluster node cache
*
* All inactive nodes matching \p id and \p node_name as described in
* \c should_forget_cluster_node documentation are removed from the cache.
*
* If \p id is 0 and \p node_name is \c NULL, all inactive nodes are removed
* from the cache regardless of ID and name. This differs from clearing the
* cache, in that entries for active nodes are preserved.
*
* \param[in] id ID of node to remove from cache (0 to ignore)
* \param[in] node_name Name of node to remove from cache (ignored if \p id is
* nonzero)
*
* \note \p node_name is not modified directly, but it will be freed if it's a
* pointer into a cache entry that is removed.
*/
void
pcmk__cluster_forget_cluster_node(uint32_t id, const char *node_name)
{
crm_node_t search = { 0, };
char *criterion = NULL; // For logging
guint matches = 0;
if (crm_peer_cache == NULL) {
crm_trace("Membership cache not initialized, ignoring removal request");
return;
}
search.id = id;
search.uname = pcmk__str_copy(node_name); // May log after original freed
if (id > 0) {
criterion = crm_strdup_printf(PCMK_XA_ID "=%" PRIu32, id);
} else if (node_name != NULL) {
criterion = crm_strdup_printf(PCMK_XA_UNAME "=%s", node_name);
}
matches = g_hash_table_foreach_remove(crm_peer_cache,
should_forget_cluster_node, &search);
if (matches > 0) {
if (criterion != NULL) {
crm_notice("Removed %u inactive node%s with %s from the membership "
"cache",
matches, pcmk__plural_s(matches), criterion);
} else {
crm_notice("Removed all (%u) inactive cluster nodes from the "
"membership cache",
matches);
}
} else {
crm_info("No inactive cluster nodes%s%s to remove from the membership "
"cache",
((criterion != NULL)? " with" : ""), pcmk__s(criterion, ""));
}
free(search.uname);
free(criterion);
}
static void
count_peer(gpointer key, gpointer value, gpointer user_data)
{
unsigned int *count = user_data;
crm_node_t *node = value;
if (pcmk__cluster_is_node_active(node)) {
*count = *count + 1;
}
}
/*!
* \internal
* \brief Get the number of active cluster nodes that have been seen
*
* Remote nodes are never considered active. This guarantees that they can never
* become DC.
*
* \return Number of active nodes in the cluster node cache
*/
unsigned int
pcmk__cluster_num_active_nodes(void)
{
unsigned int count = 0;
if (crm_peer_cache != NULL) {
g_hash_table_foreach(crm_peer_cache, count_peer, &count);
}
return count;
}
static void
destroy_crm_node(gpointer data)
{
crm_node_t *node = data;
crm_trace("Destroying entry for node %u: %s", node->id, node->uname);
free(node->uname);
free(node->state);
free(node->uuid);
free(node->expected);
free(node->conn_host);
free(node);
}
+/*!
+ * \internal
+ * \brief Initialize node caches
+ */
void
-crm_peer_init(void)
+pcmk__cluster_init_node_caches(void)
{
if (crm_peer_cache == NULL) {
crm_peer_cache = pcmk__strikey_table(free, destroy_crm_node);
}
if (crm_remote_peer_cache == NULL) {
crm_remote_peer_cache = pcmk__strikey_table(NULL, destroy_crm_node);
}
- if (known_node_cache == NULL) {
- known_node_cache = pcmk__strikey_table(free, destroy_crm_node);
+ if (cluster_node_cib_cache == NULL) {
+ cluster_node_cib_cache = pcmk__strikey_table(free, destroy_crm_node);
}
}
+/*!
+ * \internal
+ * \brief Initialize node caches
+ */
void
-crm_peer_destroy(void)
+pcmk__cluster_destroy_node_caches(void)
{
if (crm_peer_cache != NULL) {
- crm_trace("Destroying peer cache with %d members", g_hash_table_size(crm_peer_cache));
+ crm_trace("Destroying peer cache with %d members",
+ g_hash_table_size(crm_peer_cache));
g_hash_table_destroy(crm_peer_cache);
crm_peer_cache = NULL;
}
if (crm_remote_peer_cache != NULL) {
crm_trace("Destroying remote peer cache with %d members",
pcmk__cluster_num_remote_nodes());
g_hash_table_destroy(crm_remote_peer_cache);
crm_remote_peer_cache = NULL;
}
- if (known_node_cache != NULL) {
- crm_trace("Destroying known node cache with %d members",
- g_hash_table_size(known_node_cache));
- g_hash_table_destroy(known_node_cache);
- known_node_cache = NULL;
+ if (cluster_node_cib_cache != NULL) {
+ crm_trace("Destroying configured cluster node cache with %d members",
+ g_hash_table_size(cluster_node_cib_cache));
+ g_hash_table_destroy(cluster_node_cib_cache);
+ cluster_node_cib_cache = NULL;
}
-
}
static void (*peer_status_callback)(enum crm_status_type, crm_node_t *,
const void *) = NULL;
/*!
* \brief Set a client function that will be called after peer status changes
*
* \param[in] dispatch Pointer to function to use as callback
*
* \note Previously, client callbacks were responsible for peer cache
* management. This is no longer the case, and client callbacks should do
* only client-specific handling. Callbacks MUST NOT add or remove entries
* in the peer caches.
*/
void
crm_set_status_callback(void (*dispatch) (enum crm_status_type, crm_node_t *, const void *))
{
peer_status_callback = dispatch;
}
/*!
* \brief Tell the library whether to automatically reap lost nodes
*
* If TRUE (the default), calling crm_update_peer_proc() will also update the
* peer state to CRM_NODE_MEMBER or CRM_NODE_LOST, and pcmk__update_peer_state()
* will reap peers whose state changes to anything other than CRM_NODE_MEMBER.
* Callers should leave this enabled unless they plan to manage the cache
* separately on their own.
*
* \param[in] autoreap TRUE to enable automatic reaping, FALSE to disable
*/
void
crm_set_autoreap(gboolean autoreap)
{
crm_autoreap = autoreap;
}
static void
dump_peer_hash(int level, const char *caller)
{
GHashTableIter iter;
const char *id = NULL;
crm_node_t *node = NULL;
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, (gpointer *) &id, (gpointer *) &node)) {
do_crm_log(level, "%s: Node %u/%s = %p - %s", caller, node->id, node->uname, node, id);
}
}
static gboolean
hash_find_by_data(gpointer key, gpointer value, gpointer user_data)
{
return value == user_data;
}
/*!
* \internal
* \brief Search caches for a node (cluster or Pacemaker Remote)
*
* \param[in] id If not 0, cluster node ID to search for
* \param[in] uname If not NULL, node name to search for
* \param[in] flags Group of enum pcmk__node_search_flags
*
* \return Node cache entry if found, otherwise NULL
*/
crm_node_t *
pcmk__search_node_caches(unsigned int id, const char *uname, uint32_t flags)
{
crm_node_t *node = NULL;
CRM_ASSERT(id > 0 || uname != NULL);
- crm_peer_init();
+ pcmk__cluster_init_node_caches();
if ((uname != NULL) && pcmk_is_set(flags, pcmk__node_search_remote)) {
node = g_hash_table_lookup(crm_remote_peer_cache, uname);
}
- if ((node == NULL) && pcmk_is_set(flags, pcmk__node_search_cluster)) {
+ if ((node == NULL)
+ && pcmk_is_set(flags, pcmk__node_search_cluster_member)) {
+
node = pcmk__search_cluster_node_cache(id, uname, NULL);
}
- if ((node == NULL) && pcmk_is_set(flags, pcmk__node_search_known)) {
+ if ((node == NULL) && pcmk_is_set(flags, pcmk__node_search_cluster_cib)) {
char *id_str = (id == 0)? NULL : crm_strdup_printf("%u", id);
- node = find_known_node(id_str, uname);
+ node = find_cib_cluster_node(id_str, uname);
free(id_str);
}
return node;
}
/*!
* \internal
* \brief Purge a node from cache (both cluster and Pacemaker Remote)
*
* \param[in] node_name If not NULL, purge only nodes with this name
* \param[in] node_id If not 0, purge cluster nodes only if they have this ID
*
* \note If \p node_name is NULL and \p node_id is 0, no nodes will be purged.
* If \p node_name is not NULL and \p node_id is not 0, Pacemaker Remote
* nodes that match \p node_name will be purged, and cluster nodes that
* match both \p node_name and \p node_id will be purged.
* \note The caller must be careful not to use \p node_name after calling this
* function if it might be a pointer into a cache entry being removed.
*/
void
pcmk__purge_node_from_cache(const char *node_name, uint32_t node_id)
{
char *node_name_copy = NULL;
if ((node_name == NULL) && (node_id == 0U)) {
return;
}
// Purge from Pacemaker Remote node cache
if ((node_name != NULL)
&& (g_hash_table_lookup(crm_remote_peer_cache, node_name) != NULL)) {
/* node_name could be a pointer into the cache entry being purged,
* so reassign it to a copy before the original gets freed
*/
node_name_copy = pcmk__str_copy(node_name);
node_name = node_name_copy;
crm_trace("Purging %s from Pacemaker Remote node cache", node_name);
g_hash_table_remove(crm_remote_peer_cache, node_name);
}
pcmk__cluster_forget_cluster_node(node_id, node_name);
free(node_name_copy);
}
/*!
* \internal
* \brief Search cluster node cache
*
* \param[in] id If not 0, cluster node ID to search for
* \param[in] uname If not NULL, node name to search for
* \param[in] uuid If not NULL while id is 0, node UUID instead of cluster
* node ID to search for
*
* \return Cluster node cache entry if found, otherwise NULL
*/
crm_node_t *
pcmk__search_cluster_node_cache(unsigned int id, const char *uname,
const char *uuid)
{
GHashTableIter iter;
crm_node_t *node = NULL;
crm_node_t *by_id = NULL;
crm_node_t *by_name = NULL;
CRM_ASSERT(id > 0 || uname != NULL);
- crm_peer_init();
+ pcmk__cluster_init_node_caches();
if (uname != NULL) {
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
if(node->uname && strcasecmp(node->uname, uname) == 0) {
crm_trace("Name match: %s = %p", node->uname, node);
by_name = node;
break;
}
}
}
if (id > 0) {
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
if(node->id == id) {
crm_trace("ID match: %u = %p", node->id, node);
by_id = node;
break;
}
}
} else if (uuid != NULL) {
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
if (pcmk__str_eq(node->uuid, uuid, pcmk__str_casei)) {
crm_trace("UUID match: %s = %p", node->uuid, node);
by_id = node;
break;
}
}
}
node = by_id; /* Good default */
if(by_id == by_name) {
/* Nothing to do if they match (both NULL counts) */
crm_trace("Consistent: %p for %u/%s", by_id, id, uname);
} else if(by_id == NULL && by_name) {
crm_trace("Only one: %p for %u/%s", by_name, id, uname);
if(id && by_name->id) {
dump_peer_hash(LOG_WARNING, __func__);
crm_crit("Node %u and %u share the same name '%s'",
id, by_name->id, uname);
node = NULL; /* Create a new one */
} else {
node = by_name;
}
} else if(by_name == NULL && by_id) {
crm_trace("Only one: %p for %u/%s", by_id, id, uname);
if(uname && by_id->uname) {
dump_peer_hash(LOG_WARNING, __func__);
crm_crit("Node '%s' and '%s' share the same cluster nodeid %u: assuming '%s' is correct",
uname, by_id->uname, id, uname);
}
} else if(uname && by_id->uname) {
if(pcmk__str_eq(uname, by_id->uname, pcmk__str_casei)) {
crm_notice("Node '%s' has changed its ID from %u to %u", by_id->uname, by_name->id, by_id->id);
g_hash_table_foreach_remove(crm_peer_cache, hash_find_by_data, by_name);
} else {
crm_warn("Node '%s' and '%s' share the same cluster nodeid: %u %s", by_id->uname, by_name->uname, id, uname);
dump_peer_hash(LOG_INFO, __func__);
crm_abort(__FILE__, __func__, __LINE__, "member weirdness", TRUE,
TRUE);
}
} else if(id && by_name->id) {
crm_warn("Node %u and %u share the same name: '%s'", by_id->id, by_name->id, uname);
} else {
/* Simple merge */
/* Only corosync-based clusters use node IDs. The functions that call
* pcmk__update_peer_state() and crm_update_peer_proc() only know
* nodeid, so 'by_id' is authoritative when merging.
*/
dump_peer_hash(LOG_DEBUG, __func__);
crm_info("Merging %p into %p", by_name, by_id);
g_hash_table_foreach_remove(crm_peer_cache, hash_find_by_data, by_name);
}
return node;
}
#if SUPPORT_COROSYNC
static guint
remove_conflicting_peer(crm_node_t *node)
{
int matches = 0;
GHashTableIter iter;
crm_node_t *existing_node = NULL;
if (node->id == 0 || node->uname == NULL) {
return 0;
}
if (!pcmk__corosync_has_nodelist()) {
return 0;
}
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &existing_node)) {
if (existing_node->id > 0
&& existing_node->id != node->id
&& existing_node->uname != NULL
&& strcasecmp(existing_node->uname, node->uname) == 0) {
if (pcmk__cluster_is_node_active(existing_node)) {
continue;
}
crm_warn("Removing cached offline node %u/%s which has conflicting uname with %u",
existing_node->id, existing_node->uname, node->id);
g_hash_table_iter_remove(&iter);
matches++;
}
}
return matches;
}
#endif
/*!
* \brief Get a cluster node cache entry
*
* \param[in] id If not 0, cluster node ID to search for
* \param[in] uname If not NULL, node name to search for
* \param[in] uuid If not NULL while id is 0, node UUID instead of cluster
* node ID to search for
* \param[in] flags Group of enum pcmk__node_search_flags
*
* \return (Possibly newly created) cluster node cache entry
*/
/* coverity[-alloc] Memory is referenced in one or both hashtables */
crm_node_t *
pcmk__get_node(unsigned int id, const char *uname, const char *uuid,
uint32_t flags)
{
crm_node_t *node = NULL;
char *uname_lookup = NULL;
CRM_ASSERT(id > 0 || uname != NULL);
- crm_peer_init();
+ pcmk__cluster_init_node_caches();
// Check the Pacemaker Remote node cache first
if (pcmk_is_set(flags, pcmk__node_search_remote)) {
node = g_hash_table_lookup(crm_remote_peer_cache, uname);
if (node != NULL) {
return node;
}
}
- if (!pcmk_is_set(flags, pcmk__node_search_cluster)) {
+ if (!pcmk_is_set(flags, pcmk__node_search_cluster_member)) {
return NULL;
}
node = pcmk__search_cluster_node_cache(id, uname, uuid);
/* if uname wasn't provided, and find_peer did not turn up a uname based on id.
* we need to do a lookup of the node name using the id in the cluster membership. */
if ((node == NULL || node->uname == NULL) && (uname == NULL)) {
uname_lookup = get_node_name(id);
}
if (uname_lookup) {
uname = uname_lookup;
crm_trace("Inferred a name of '%s' for node %u", uname, id);
/* try to turn up the node one more time now that we know the uname. */
if (node == NULL) {
node = pcmk__search_cluster_node_cache(id, uname, uuid);
}
}
if (node == NULL) {
char *uniqueid = crm_generate_uuid();
node = pcmk__assert_alloc(1, sizeof(crm_node_t));
crm_info("Created entry %s/%p for node %s/%u (%d total)",
uniqueid, node, uname, id, 1 + g_hash_table_size(crm_peer_cache));
g_hash_table_replace(crm_peer_cache, uniqueid, node);
}
if(id > 0 && uname && (node->id == 0 || node->uname == NULL)) {
crm_info("Node %u is now known as %s", id, uname);
}
if(id > 0 && node->id == 0) {
node->id = id;
}
if (uname && (node->uname == NULL)) {
update_peer_uname(node, uname);
}
if(node->uuid == NULL) {
if (uuid == NULL) {
uuid = crm_peer_uuid(node);
}
if (uuid) {
crm_info("Node %u has uuid %s", id, uuid);
} else {
crm_info("Cannot obtain a UUID for node %u/%s", id, node->uname);
}
}
free(uname_lookup);
return node;
}
/*!
* \internal
* \brief Update a node's uname
*
* \param[in,out] node Node object to update
* \param[in] uname New name to set
*
* \note This function should not be called within a peer cache iteration,
* because in some cases it can remove conflicting cache entries,
* which would invalidate the iterator.
*/
static void
update_peer_uname(crm_node_t *node, const char *uname)
{
CRM_CHECK(uname != NULL,
crm_err("Bug: can't update node name without name"); return);
CRM_CHECK(node != NULL,
crm_err("Bug: can't update node name to %s without node", uname);
return);
if (pcmk__str_eq(uname, node->uname, pcmk__str_casei)) {
crm_debug("Node uname '%s' did not change", uname);
return;
}
for (const char *c = uname; *c; ++c) {
if ((*c >= 'A') && (*c <= 'Z')) {
crm_warn("Node names with capitals are discouraged, consider changing '%s'",
uname);
break;
}
}
pcmk__str_update(&node->uname, uname);
if (peer_status_callback != NULL) {
peer_status_callback(crm_status_uname, node, NULL);
}
#if SUPPORT_COROSYNC
if ((pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync)
&& !pcmk_is_set(node->flags, crm_remote_node)) {
remove_conflicting_peer(node);
}
#endif
}
/*!
* \internal
* \brief Get log-friendly string equivalent of a process flag
*
* \param[in] proc Process flag
*
* \return Log-friendly string equivalent of \p proc
*/
static inline const char *
proc2text(enum crm_proc_flag proc)
{
const char *text = "unknown";
switch (proc) {
case crm_proc_none:
text = "none";
break;
case crm_proc_based:
text = "pacemaker-based";
break;
case crm_proc_controld:
text = "pacemaker-controld";
break;
case crm_proc_schedulerd:
text = "pacemaker-schedulerd";
break;
case crm_proc_execd:
text = "pacemaker-execd";
break;
case crm_proc_attrd:
text = "pacemaker-attrd";
break;
case crm_proc_fenced:
text = "pacemaker-fenced";
break;
case crm_proc_cpg:
text = "corosync-cpg";
break;
}
return text;
}
/*!
* \internal
* \brief Update a node's process information (and potentially state)
*
* \param[in] source Caller's function name (for log messages)
* \param[in,out] node Node object to update
* \param[in] flag Bitmask of new process information
* \param[in] status node status (online, offline, etc.)
*
* \return NULL if any node was reaped from peer caches, value of node otherwise
*
* \note If this function returns NULL, the supplied node object was likely
* freed and should not be used again. This function should not be
* called within a cache iteration if reaping is possible, otherwise
* reaping could invalidate the iterator.
*/
crm_node_t *
crm_update_peer_proc(const char *source, crm_node_t * node, uint32_t flag, const char *status)
{
uint32_t last = 0;
gboolean changed = FALSE;
CRM_CHECK(node != NULL, crm_err("%s: Could not set %s to %s for NULL",
source, proc2text(flag), status);
return NULL);
/* Pacemaker doesn't spawn processes on remote nodes */
if (pcmk_is_set(node->flags, crm_remote_node)) {
return node;
}
last = node->processes;
if (status == NULL) {
node->processes = flag;
if (node->processes != last) {
changed = TRUE;
}
} else if (pcmk__str_eq(status, PCMK_VALUE_ONLINE, pcmk__str_casei)) {
if ((node->processes & flag) != flag) {
node->processes = pcmk__set_flags_as(__func__, __LINE__,
LOG_TRACE, "Peer process",
node->uname, node->processes,
flag, "processes");
changed = TRUE;
}
} else if (node->processes & flag) {
node->processes = pcmk__clear_flags_as(__func__, __LINE__,
LOG_TRACE, "Peer process",
node->uname, node->processes,
flag, "processes");
changed = TRUE;
}
if (changed) {
if (status == NULL && flag <= crm_proc_none) {
crm_info("%s: Node %s[%u] - all processes are now offline", source, node->uname,
node->id);
} else {
crm_info("%s: Node %s[%u] - %s is now %s", source, node->uname, node->id,
proc2text(flag), status);
}
if (pcmk_is_set(node->processes, crm_get_cluster_proc())) {
node->when_online = time(NULL);
} else {
node->when_online = 0;
}
/* Call the client callback first, then update the peer state,
* in case the node will be reaped
*/
if (peer_status_callback != NULL) {
peer_status_callback(crm_status_processes, node, &last);
}
/* The client callback shouldn't touch the peer caches,
* but as a safety net, bail if the peer cache was destroyed.
*/
if (crm_peer_cache == NULL) {
return NULL;
}
if (crm_autoreap) {
const char *peer_state = NULL;
if (pcmk_is_set(node->processes, crm_get_cluster_proc())) {
peer_state = CRM_NODE_MEMBER;
} else {
peer_state = CRM_NODE_LOST;
}
node = pcmk__update_peer_state(__func__, node, peer_state, 0);
}
} else {
crm_trace("%s: Node %s[%u] - %s is unchanged (%s)", source, node->uname, node->id,
proc2text(flag), status);
}
return node;
}
/*!
* \internal
* \brief Update a cluster node cache entry's expected join state
*
* \param[in] source Caller's function name (for logging)
* \param[in,out] node Node to update
* \param[in] expected Node's new join state
*/
void
pcmk__update_peer_expected(const char *source, crm_node_t *node,
const char *expected)
{
char *last = NULL;
gboolean changed = FALSE;
CRM_CHECK(node != NULL, crm_err("%s: Could not set 'expected' to %s", source, expected);
return);
/* Remote nodes don't participate in joins */
if (pcmk_is_set(node->flags, crm_remote_node)) {
return;
}
last = node->expected;
if (expected != NULL && !pcmk__str_eq(node->expected, expected, pcmk__str_casei)) {
node->expected = strdup(expected);
changed = TRUE;
}
if (changed) {
crm_info("%s: Node %s[%u] - expected state is now %s (was %s)", source, node->uname, node->id,
expected, last);
free(last);
} else {
crm_trace("%s: Node %s[%u] - expected state is unchanged (%s)", source, node->uname,
node->id, expected);
}
}
/*!
* \internal
* \brief Update a node's state and membership information
*
* \param[in] source Caller's function name (for log messages)
* \param[in,out] node Node object to update
* \param[in] state Node's new state
* \param[in] membership Node's new membership ID
* \param[in,out] iter If not NULL, pointer to node's peer cache iterator
*
* \return NULL if any node was reaped, value of node otherwise
*
* \note If this function returns NULL, the supplied node object was likely
* freed and should not be used again. This function may be called from
* within a peer cache iteration if the iterator is supplied.
*/
static crm_node_t *
update_peer_state_iter(const char *source, crm_node_t *node, const char *state,
uint64_t membership, GHashTableIter *iter)
{
gboolean is_member;
CRM_CHECK(node != NULL,
crm_err("Could not set state for unknown host to %s"
CRM_XS " source=%s", state, source);
return NULL);
is_member = pcmk__str_eq(state, CRM_NODE_MEMBER, pcmk__str_casei);
if (is_member) {
node->when_lost = 0;
if (membership) {
node->last_seen = membership;
}
}
if (state && !pcmk__str_eq(node->state, state, pcmk__str_casei)) {
char *last = node->state;
if (is_member) {
node->when_member = time(NULL);
} else {
node->when_member = 0;
}
node->state = strdup(state);
crm_notice("Node %s state is now %s " CRM_XS
" nodeid=%u previous=%s source=%s", node->uname, state,
node->id, (last? last : "unknown"), source);
if (peer_status_callback != NULL) {
peer_status_callback(crm_status_nstate, node, last);
}
free(last);
if (crm_autoreap && !is_member
&& !pcmk_is_set(node->flags, crm_remote_node)) {
/* We only autoreap from the peer cache, not the remote peer cache,
* because the latter should be managed only by
* refresh_remote_nodes().
*/
if(iter) {
crm_notice("Purged 1 peer with " PCMK_XA_ID
"=%u and/or uname=%s from the membership cache",
node->id, node->uname);
g_hash_table_iter_remove(iter);
} else {
pcmk__cluster_forget_cluster_node(node->id, node->uname);
}
node = NULL;
}
} else {
crm_trace("Node %s state is unchanged (%s) " CRM_XS
" nodeid=%u source=%s", node->uname, state, node->id, source);
}
return node;
}
/*!
* \brief Update a node's state and membership information
*
* \param[in] source Caller's function name (for log messages)
* \param[in,out] node Node object to update
* \param[in] state Node's new state
* \param[in] membership Node's new membership ID
*
* \return NULL if any node was reaped, value of node otherwise
*
* \note If this function returns NULL, the supplied node object was likely
* freed and should not be used again. This function should not be
* called within a cache iteration if reaping is possible,
* otherwise reaping could invalidate the iterator.
*/
crm_node_t *
pcmk__update_peer_state(const char *source, crm_node_t *node,
const char *state, uint64_t membership)
{
return update_peer_state_iter(source, node, state, membership, NULL);
}
/*!
* \internal
* \brief Reap all nodes from cache whose membership information does not match
*
* \param[in] membership Membership ID of nodes to keep
*/
void
pcmk__reap_unseen_nodes(uint64_t membership)
{
GHashTableIter iter;
crm_node_t *node = NULL;
crm_trace("Reaping unseen nodes...");
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *)&node)) {
if (node->last_seen != membership) {
if (node->state) {
/*
* Calling update_peer_state_iter() allows us to
* remove the node from crm_peer_cache without
* invalidating our iterator
*/
update_peer_state_iter(__func__, node, CRM_NODE_LOST,
membership, &iter);
} else {
crm_info("State of node %s[%u] is still unknown",
node->uname, node->id);
}
}
}
}
static crm_node_t *
-find_known_node(const char *id, const char *uname)
+find_cib_cluster_node(const char *id, const char *uname)
{
GHashTableIter iter;
crm_node_t *node = NULL;
crm_node_t *by_id = NULL;
crm_node_t *by_name = NULL;
if (uname) {
- g_hash_table_iter_init(&iter, known_node_cache);
+ g_hash_table_iter_init(&iter, cluster_node_cib_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
if (node->uname && strcasecmp(node->uname, uname) == 0) {
crm_trace("Name match: %s = %p", node->uname, node);
by_name = node;
break;
}
}
}
if (id) {
- g_hash_table_iter_init(&iter, known_node_cache);
+ g_hash_table_iter_init(&iter, cluster_node_cib_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
if(strcasecmp(node->uuid, id) == 0) {
crm_trace("ID match: %s= %p", id, node);
by_id = node;
break;
}
}
}
node = by_id; /* Good default */
if (by_id == by_name) {
/* Nothing to do if they match (both NULL counts) */
crm_trace("Consistent: %p for %s/%s", by_id, id, uname);
} else if (by_id == NULL && by_name) {
crm_trace("Only one: %p for %s/%s", by_name, id, uname);
if (id) {
node = NULL;
} else {
node = by_name;
}
} else if (by_name == NULL && by_id) {
crm_trace("Only one: %p for %s/%s", by_id, id, uname);
if (uname) {
node = NULL;
}
} else if (uname && by_id->uname
&& pcmk__str_eq(uname, by_id->uname, pcmk__str_casei)) {
/* Multiple nodes have the same uname in the CIB.
* Return by_id. */
} else if (id && by_name->uuid
&& pcmk__str_eq(id, by_name->uuid, pcmk__str_casei)) {
/* Multiple nodes have the same id in the CIB.
* Return by_name. */
node = by_name;
} else {
node = NULL;
}
if (node == NULL) {
crm_debug("Couldn't find node%s%s%s%s",
id? " " : "",
id? id : "",
uname? " with name " : "",
uname? uname : "");
}
return node;
}
static void
-known_node_cache_refresh_helper(xmlNode *xml_node, void *user_data)
+cluster_node_cib_cache_refresh_helper(xmlNode *xml_node, void *user_data)
{
const char *id = crm_element_value(xml_node, PCMK_XA_ID);
const char *uname = crm_element_value(xml_node, PCMK_XA_UNAME);
crm_node_t * node = NULL;
CRM_CHECK(id != NULL && uname !=NULL, return);
- node = find_known_node(id, uname);
+ node = find_cib_cluster_node(id, uname);
if (node == NULL) {
char *uniqueid = crm_generate_uuid();
node = pcmk__assert_alloc(1, sizeof(crm_node_t));
node->uname = pcmk__str_copy(uname);
node->uuid = pcmk__str_copy(id);
- g_hash_table_replace(known_node_cache, uniqueid, node);
+ g_hash_table_replace(cluster_node_cib_cache, uniqueid, node);
} else if (pcmk_is_set(node->flags, crm_node_dirty)) {
pcmk__str_update(&node->uname, uname);
/* Node is in cache and hasn't been updated already, so mark it clean */
clear_peer_flags(node, crm_node_dirty);
}
}
static void
-refresh_known_node_cache(xmlNode *cib)
+refresh_cluster_node_cib_cache(xmlNode *cib)
{
- crm_peer_init();
+ pcmk__cluster_init_node_caches();
- g_hash_table_foreach(known_node_cache, mark_dirty, NULL);
+ g_hash_table_foreach(cluster_node_cib_cache, mark_dirty, NULL);
crm_foreach_xpath_result(cib, PCMK__XP_MEMBER_NODE_CONFIG,
- known_node_cache_refresh_helper, NULL);
+ cluster_node_cib_cache_refresh_helper, NULL);
- /* Remove all old cache entries that weren't seen in the CIB */
- g_hash_table_foreach_remove(known_node_cache, is_dirty, NULL);
+ // Remove all old cache entries that weren't seen in the CIB
+ g_hash_table_foreach_remove(cluster_node_cib_cache, is_dirty, NULL);
}
void
pcmk__refresh_node_caches_from_cib(xmlNode *cib)
{
refresh_remote_nodes(cib);
- refresh_known_node_cache(cib);
+ refresh_cluster_node_cib_cache(cib);
}
// Deprecated functions kept only for backward API compatibility
// LCOV_EXCL_START
#include <crm/cluster/compat.h>
int
crm_terminate_member(int nodeid, const char *uname, void *unused)
{
return stonith_api_kick(nodeid, uname, 120, TRUE);
}
int
crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection)
{
return stonith_api_kick(nodeid, uname, 120, TRUE);
}
crm_node_t *
crm_get_peer(unsigned int id, const char *uname)
{
- return pcmk__get_node(id, uname, NULL, pcmk__node_search_cluster);
+ return pcmk__get_node(id, uname, NULL, pcmk__node_search_cluster_member);
}
crm_node_t *
crm_get_peer_full(unsigned int id, const char *uname, int flags)
{
return pcmk__get_node(id, uname, NULL, flags);
}
int
crm_remote_peer_cache_size(void)
{
unsigned int count = pcmk__cluster_num_remote_nodes();
return QB_MIN(count, INT_MAX);
}
void
crm_remote_peer_cache_refresh(xmlNode *cib)
{
refresh_remote_nodes(cib);
}
crm_node_t *
crm_remote_peer_get(const char *node_name)
{
return pcmk__cluster_lookup_remote_node(node_name);
}
void
crm_remote_peer_cache_remove(const char *node_name)
{
pcmk__cluster_forget_remote_node(node_name);
}
gboolean
crm_is_peer_active(const crm_node_t * node)
{
return pcmk__cluster_is_node_active(node);
}
guint
crm_active_peers(void)
{
return pcmk__cluster_num_active_nodes();
}
guint
reap_crm_member(uint32_t id, const char *name)
{
int matches = 0;
crm_node_t search = { 0, };
if (crm_peer_cache == NULL) {
crm_trace("Membership cache not initialized, ignoring purge request");
return 0;
}
search.id = id;
search.uname = pcmk__str_copy(name);
matches = g_hash_table_foreach_remove(crm_peer_cache,
should_forget_cluster_node, &search);
if(matches) {
crm_notice("Purged %d peer%s with " PCMK_XA_ID
"=%u%s%s from the membership cache",
matches, pcmk__plural_s(matches), search.id,
(search.uname? " and/or uname=" : ""),
(search.uname? search.uname : ""));
} else {
crm_info("No peers with " PCMK_XA_ID
"=%u%s%s to purge from the membership cache",
search.id, (search.uname? " and/or uname=" : ""),
(search.uname? search.uname : ""));
}
free(search.uname);
return matches;
}
+void
+crm_peer_init(void)
+{
+ pcmk__cluster_init_node_caches();
+}
+
+void
+crm_peer_destroy(void)
+{
+ pcmk__cluster_destroy_node_caches();
+}
+
// LCOV_EXCL_STOP
// End deprecated API

File Metadata

Mime Type
text/x-diff
Expires
Mon, Apr 21, 8:04 PM (11 h, 40 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1659646
Default Alt Text
(1000 KB)

Event Timeline