Page Menu
Home
ClusterLabs Projects
Search
Configure Global Search
Log In
Files
F1842493
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
205 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/daemons/attrd/attrd_cib.c b/daemons/attrd/attrd_cib.c
index b4d8a2ad38..c5d2713ea0 100644
--- a/daemons/attrd/attrd_cib.c
+++ b/daemons/attrd/attrd_cib.c
@@ -1,685 +1,685 @@
/*
* Copyright 2013-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <errno.h>
#include <inttypes.h> // PRIu32
#include <stdbool.h>
#include <stdlib.h>
#include <glib.h>
#include <crm/cib/internal.h> // cib__*
#include <crm/common/logging.h>
#include <crm/common/results.h>
#include <crm/common/strings_internal.h>
#include <crm/common/xml.h>
#include <crm/cluster/internal.h> // pcmk__get_node()
#include "pacemaker-attrd.h"
static int last_cib_op_done = 0;
static void write_attribute(attribute_t *a, bool ignore_delay);
static void
attrd_cib_destroy_cb(gpointer user_data)
{
cib_t *cib = user_data;
cib->cmds->signoff(cib);
if (attrd_shutting_down(false)) {
crm_info("Disconnected from the CIB manager");
} else {
// @TODO This should trigger a reconnect, not a shutdown
crm_crit("Lost connection to the CIB manager, shutting down");
attrd_exit_status = CRM_EX_DISCONNECT;
attrd_shutdown(0);
}
}
static void
attrd_cib_updated_cb(const char *event, xmlNode *msg)
{
const xmlNode *patchset = NULL;
const char *client_name = NULL;
bool status_changed = false;
if (attrd_shutting_down(true)) {
crm_debug("Ignoring CIB change during shutdown");
return;
}
if (cib__get_notify_patchset(msg, &patchset) != pcmk_rc_ok) {
return;
}
if (cib__element_in_patchset(patchset, PCMK_XE_ALERTS)) {
mainloop_set_trigger(attrd_config_read);
}
status_changed = cib__element_in_patchset(patchset, PCMK_XE_STATUS);
client_name = crm_element_value(msg, PCMK__XA_CIB_CLIENTNAME);
if (!cib__client_triggers_refresh(client_name)) {
/* This change came from a source that ensured the CIB is consistent
* with our attributes table, so we don't need to write anything out.
*/
return;
}
if (!attrd_election_won()) {
// Don't write attributes if we're not the writer
return;
}
if (status_changed || cib__element_in_patchset(patchset, PCMK_XE_NODES)) {
/* An unsafe client modified the PCMK_XE_NODES or PCMK_XE_STATUS
* section. Write transient attributes to ensure they're up-to-date in
* the CIB.
*/
if (client_name == NULL) {
client_name = crm_element_value(msg, PCMK__XA_CIB_CLIENTID);
}
crm_notice("Updating all attributes after %s event triggered by %s",
event, pcmk__s(client_name, "(unidentified client)"));
attrd_write_attributes(attrd_write_all);
}
}
int
attrd_cib_connect(int max_retry)
{
static int attempts = 0;
int rc = -ENOTCONN;
the_cib = cib_new();
if (the_cib == NULL) {
return -ENOTCONN;
}
do {
if (attempts > 0) {
sleep(attempts);
}
attempts++;
crm_debug("Connection attempt %d to the CIB manager", attempts);
rc = the_cib->cmds->signon(the_cib, PCMK__VALUE_ATTRD, cib_command);
} while ((rc != pcmk_ok) && (attempts < max_retry));
if (rc != pcmk_ok) {
crm_err("Connection to the CIB manager failed: %s " QB_XS " rc=%d",
pcmk_strerror(rc), rc);
goto cleanup;
}
crm_debug("Connected to the CIB manager after %d attempts", attempts);
rc = the_cib->cmds->set_connection_dnotify(the_cib, attrd_cib_destroy_cb);
if (rc != pcmk_ok) {
crm_err("Could not set disconnection callback");
goto cleanup;
}
rc = the_cib->cmds->add_notify_callback(the_cib,
PCMK__VALUE_CIB_DIFF_NOTIFY,
attrd_cib_updated_cb);
if (rc != pcmk_ok) {
crm_err("Could not set CIB notification callback");
goto cleanup;
}
return pcmk_ok;
cleanup:
cib__clean_up_connection(&the_cib);
return -ENOTCONN;
}
void
attrd_cib_disconnect(void)
{
CRM_CHECK(the_cib != NULL, return);
the_cib->cmds->del_notify_callback(the_cib, PCMK__VALUE_CIB_DIFF_NOTIFY,
attrd_cib_updated_cb);
cib__clean_up_connection(&the_cib);
mainloop_destroy_trigger(attrd_config_read);
}
static void
attrd_erase_cb(xmlNode *msg, int call_id, int rc, xmlNode *output,
void *user_data)
{
const char *node = pcmk__s((const char *) user_data, "a node");
if (rc == pcmk_ok) {
crm_info("Cleared transient node attributes for %s from CIB", node);
} else {
crm_err("Unable to clear transient node attributes for %s from CIB: %s",
node, pcmk_strerror(rc));
}
}
#define XPATH_TRANSIENT "//" PCMK__XE_NODE_STATE \
"[@" PCMK_XA_UNAME "='%s']" \
"/" PCMK__XE_TRANSIENT_ATTRIBUTES
/*!
* \internal
* \brief Wipe all transient node attributes for a node from the CIB
*
* \param[in] node Node to clear attributes for
*/
void
attrd_cib_erase_transient_attrs(const char *node)
{
int call_id = 0;
char *xpath = NULL;
CRM_CHECK(node != NULL, return);
xpath = crm_strdup_printf(XPATH_TRANSIENT, node);
crm_debug("Clearing transient node attributes for %s from CIB using %s",
node, xpath);
call_id = the_cib->cmds->remove(the_cib, xpath, NULL, cib_xpath);
free(xpath);
the_cib->cmds->register_callback_full(the_cib, call_id, 120, FALSE,
pcmk__str_copy(node),
"attrd_erase_cb", attrd_erase_cb,
free);
}
/*!
* \internal
* \brief Prepare the CIB after cluster is connected
*/
void
attrd_cib_init(void)
{
/* We have no attribute values in memory, so wipe the CIB to match. This is
* normally done by the DC's controller when this node leaves the cluster, but
* this handles the case where the node restarted so quickly that the
* cluster layer didn't notice.
*
* \todo If pacemaker-attrd respawns after crashing (see PCMK_ENV_RESPAWNED),
* ideally we'd skip this and sync our attributes from the writer.
* However, currently we reject any values for us that the writer has, in
* attrd_peer_update().
*/
- attrd_cib_erase_transient_attrs(attrd_cluster->uname);
+ attrd_cib_erase_transient_attrs(attrd_cluster->priv->node_name);
// Set a trigger for reading the CIB (for the alerts section)
attrd_config_read = mainloop_add_trigger(G_PRIORITY_HIGH, attrd_read_options, NULL);
// Always read the CIB at start-up
mainloop_set_trigger(attrd_config_read);
}
static gboolean
attribute_timer_cb(gpointer data)
{
attribute_t *a = data;
crm_trace("Dampen interval expired for %s", a->id);
attrd_write_or_elect_attribute(a);
return FALSE;
}
static void
attrd_cib_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data)
{
int level = LOG_ERR;
GHashTableIter iter;
const char *peer = NULL;
attribute_value_t *v = NULL;
char *name = user_data;
attribute_t *a = g_hash_table_lookup(attributes, name);
if(a == NULL) {
crm_info("Attribute %s no longer exists", name);
return;
}
a->update = 0;
if (rc == pcmk_ok && call_id < 0) {
rc = call_id;
}
switch (rc) {
case pcmk_ok:
level = LOG_INFO;
last_cib_op_done = call_id;
if (a->timer && !a->timeout_ms) {
// Remove temporary dampening for failed writes
mainloop_timer_del(a->timer);
a->timer = NULL;
}
break;
case -pcmk_err_diff_failed: /* When an attr changes while the CIB is syncing */
case -ETIME: /* When an attr changes while there is a DC election */
case -ENXIO: /* When an attr changes while the CIB is syncing a
* newer config from a node that just came up
*/
level = LOG_WARNING;
break;
}
do_crm_log(level, "CIB update %d result for %s: %s " QB_XS " rc=%d",
call_id, a->id, pcmk_strerror(rc), rc);
g_hash_table_iter_init(&iter, a->values);
while (g_hash_table_iter_next(&iter, (gpointer *) & peer, (gpointer *) & v)) {
if (rc == pcmk_ok) {
crm_info("* Wrote %s[%s]=%s",
a->id, peer, pcmk__s(v->requested, "(unset)"));
pcmk__str_update(&(v->requested), NULL);
} else {
do_crm_log(level, "* Could not write %s[%s]=%s",
a->id, peer, pcmk__s(v->requested, "(unset)"));
/* Reattempt write below if we are still the writer */
attrd_set_attr_flags(a, attrd_attr_changed);
}
}
if (pcmk_is_set(a->flags, attrd_attr_changed) && attrd_election_won()) {
if (rc == pcmk_ok) {
/* We deferred a write of a new update because this update was in
* progress. Write out the new value without additional delay.
*/
crm_debug("Pending update for %s can be written now", a->id);
write_attribute(a, false);
/* We're re-attempting a write because the original failed; delay
* the next attempt so we don't potentially flood the CIB manager
* and logs with a zillion attempts per second.
*
* @TODO We could elect a new writer instead. However, we'd have to
* somehow downgrade our vote, and we'd still need something like this
* if all peers similarly fail to write this attribute (which may
* indicate a corrupted attribute entry rather than a CIB issue).
*/
} else if (a->timer) {
// Attribute has a dampening value, so use that as delay
if (!mainloop_timer_running(a->timer)) {
crm_trace("Delayed re-attempted write for %s by %s",
name, pcmk__readable_interval(a->timeout_ms));
mainloop_timer_start(a->timer);
}
} else {
/* Set a temporary dampening of 2 seconds (timer will continue
* to exist until the attribute's dampening gets set or the
* write succeeds).
*/
a->timer = attrd_add_timer(a->id, 2000, a);
mainloop_timer_start(a->timer);
}
}
}
/*!
* \internal
* \brief Add a set-attribute update request to the current CIB transaction
*
* \param[in] attr Attribute to update
* \param[in] attr_id ID of attribute to update
* \param[in] node_id ID of node for which to update attribute value
* \param[in] set_id ID of attribute set
* \param[in] value New value for attribute
*
* \return Standard Pacemaker return code
*/
static int
add_set_attr_update(const attribute_t *attr, const char *attr_id,
const char *node_id, const char *set_id, const char *value)
{
xmlNode *update = pcmk__xe_create(NULL, PCMK__XE_NODE_STATE);
xmlNode *child = update;
int rc = ENOMEM;
crm_xml_add(child, PCMK_XA_ID, node_id);
child = pcmk__xe_create(child, PCMK__XE_TRANSIENT_ATTRIBUTES);
crm_xml_add(child, PCMK_XA_ID, node_id);
child = pcmk__xe_create(child, attr->set_type);
crm_xml_add(child, PCMK_XA_ID, set_id);
child = pcmk__xe_create(child, PCMK_XE_NVPAIR);
crm_xml_add(child, PCMK_XA_ID, attr_id);
crm_xml_add(child, PCMK_XA_NAME, attr->id);
crm_xml_add(child, PCMK_XA_VALUE, value);
rc = the_cib->cmds->modify(the_cib, PCMK_XE_STATUS, update,
cib_can_create|cib_transaction);
rc = pcmk_legacy2rc(rc);
pcmk__xml_free(update);
return rc;
}
/*!
* \internal
* \brief Add an unset-attribute update request to the current CIB transaction
*
* \param[in] attr Attribute to update
* \param[in] attr_id ID of attribute to update
* \param[in] node_id ID of node for which to update attribute value
* \param[in] set_id ID of attribute set
*
* \return Standard Pacemaker return code
*/
static int
add_unset_attr_update(const attribute_t *attr, const char *attr_id,
const char *node_id, const char *set_id)
{
char *xpath = crm_strdup_printf("/" PCMK_XE_CIB
"/" PCMK_XE_STATUS
"/" PCMK__XE_NODE_STATE
"[@" PCMK_XA_ID "='%s']"
"/" PCMK__XE_TRANSIENT_ATTRIBUTES
"[@" PCMK_XA_ID "='%s']"
"/%s[@" PCMK_XA_ID "='%s']"
"/" PCMK_XE_NVPAIR
"[@" PCMK_XA_ID "='%s' "
"and @" PCMK_XA_NAME "='%s']",
node_id, node_id, attr->set_type, set_id,
attr_id, attr->id);
int rc = the_cib->cmds->remove(the_cib, xpath, NULL,
cib_xpath|cib_transaction);
free(xpath);
return pcmk_legacy2rc(rc);
}
/*!
* \internal
* \brief Add an attribute update request to the current CIB transaction
*
* \param[in] attr Attribute to update
* \param[in] value New value for attribute
* \param[in] node_id ID of node for which to update attribute value
*
* \return Standard Pacemaker return code
*/
static int
add_attr_update(const attribute_t *attr, const char *value, const char *node_id)
{
char *set_id = attrd_set_id(attr, node_id);
char *nvpair_id = attrd_nvpair_id(attr, node_id);
int rc = pcmk_rc_ok;
if (value == NULL) {
rc = add_unset_attr_update(attr, nvpair_id, node_id, set_id);
} else {
rc = add_set_attr_update(attr, nvpair_id, node_id, set_id, value);
}
free(set_id);
free(nvpair_id);
return rc;
}
static void
send_alert_attributes_value(attribute_t *a, GHashTable *t)
{
int rc = 0;
attribute_value_t *at = NULL;
GHashTableIter vIter;
g_hash_table_iter_init(&vIter, t);
while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & at)) {
rc = attrd_send_attribute_alert(at->nodename, at->nodeid,
a->id, at->current);
crm_trace("Sent alerts for %s[%s]=%s: nodeid=%d rc=%d",
a->id, at->nodename, at->current, at->nodeid, rc);
}
}
static void
set_alert_attribute_value(GHashTable *t, attribute_value_t *v)
{
attribute_value_t *a_v = pcmk__assert_alloc(1, sizeof(attribute_value_t));
a_v->nodeid = v->nodeid;
a_v->nodename = pcmk__str_copy(v->nodename);
a_v->current = pcmk__str_copy(v->current);
g_hash_table_replace(t, a_v->nodename, a_v);
}
mainloop_timer_t *
attrd_add_timer(const char *id, int timeout_ms, attribute_t *attr)
{
return mainloop_timer_add(id, timeout_ms, FALSE, attribute_timer_cb, attr);
}
/*!
* \internal
* \brief Write an attribute's values to the CIB if appropriate
*
* \param[in,out] a Attribute to write
* \param[in] ignore_delay If true, write attribute now regardless of any
* configured delay
*/
static void
write_attribute(attribute_t *a, bool ignore_delay)
{
int private_updates = 0, cib_updates = 0;
attribute_value_t *v = NULL;
GHashTableIter iter;
GHashTable *alert_attribute_value = NULL;
int rc = pcmk_ok;
if (a == NULL) {
return;
}
/* If this attribute will be written to the CIB ... */
if (!stand_alone && !pcmk_is_set(a->flags, attrd_attr_is_private)) {
/* Defer the write if now's not a good time */
if (a->update && (a->update < last_cib_op_done)) {
crm_info("Write out of '%s' continuing: update %d considered lost",
a->id, a->update);
a->update = 0; // Don't log this message again
} else if (a->update) {
crm_info("Write out of '%s' delayed: update %d in progress",
a->id, a->update);
goto done;
} else if (mainloop_timer_running(a->timer)) {
if (ignore_delay) {
mainloop_timer_stop(a->timer);
crm_debug("Overriding '%s' write delay", a->id);
} else {
crm_info("Delaying write of '%s'", a->id);
goto done;
}
}
// Initiate a transaction for all the peer value updates
CRM_CHECK(the_cib != NULL, goto done);
the_cib->cmds->set_user(the_cib, a->user);
rc = the_cib->cmds->init_transaction(the_cib);
if (rc != pcmk_ok) {
crm_err("Failed to write %s (set %s): Could not initiate "
"CIB transaction",
a->id, pcmk__s(a->set_id, "unspecified"));
goto done;
}
}
/* Attribute will be written shortly, so clear changed flag and force
* write flag, and initialize UUID missing flag to false.
*/
attrd_clear_attr_flags(a, attrd_attr_changed|attrd_attr_uuid_missing|attrd_attr_force_write);
/* Make the table for the attribute trap */
alert_attribute_value = pcmk__strikey_table(NULL,
attrd_free_attribute_value);
/* Iterate over each peer value of this attribute */
g_hash_table_iter_init(&iter, a->values);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &v)) {
const char *uuid = NULL;
if (pcmk_is_set(v->flags, attrd_value_remote)) {
/* If this is a Pacemaker Remote node, the node's UUID is the same
* as its name, which we already have.
*/
uuid = v->nodename;
} else {
// This will create a cluster node cache entry if none exists
pcmk__node_status_t *peer = pcmk__get_node(v->nodeid, v->nodename,
NULL,
pcmk__node_search_any);
uuid = peer->xml_id;
// Remember peer's node ID if we're just now learning it
if ((peer->cluster_layer_id != 0) && (v->nodeid == 0)) {
crm_trace("Learned ID %" PRIu32 " for node %s",
peer->cluster_layer_id, v->nodename);
v->nodeid = peer->cluster_layer_id;
}
}
/* If this is a private attribute, no update needs to be sent */
if (stand_alone || pcmk_is_set(a->flags, attrd_attr_is_private)) {
private_updates++;
continue;
}
// Defer write if this is a cluster node that's never been seen
if (uuid == NULL) {
attrd_set_attr_flags(a, attrd_attr_uuid_missing);
crm_notice("Cannot update %s[%s]='%s' now because node's UUID is "
"unknown (will retry if learned)",
a->id, v->nodename, v->current);
continue;
}
// Update this value as part of the CIB transaction we're building
rc = add_attr_update(a, v->current, uuid);
if (rc != pcmk_rc_ok) {
crm_err("Failed to update %s[%s]='%s': %s "
QB_XS " node uuid=%s id=%" PRIu32,
a->id, v->nodename, v->current, pcmk_rc_str(rc),
uuid, v->nodeid);
continue;
}
crm_debug("Writing %s[%s]=%s (node-state-id=%s node-id=%" PRIu32 ")",
a->id, v->nodename, pcmk__s(v->current, "(unset)"),
uuid, v->nodeid);
cib_updates++;
/* Preservation of the attribute to transmit alert */
set_alert_attribute_value(alert_attribute_value, v);
// Save this value so we can log it when write completes
pcmk__str_update(&(v->requested), v->current);
}
if (private_updates) {
crm_info("Processed %d private change%s for %s (set %s)",
private_updates, pcmk__plural_s(private_updates),
a->id, pcmk__s(a->set_id, "unspecified"));
}
if (cib_updates > 0) {
char *id = pcmk__str_copy(a->id);
// Commit transaction
a->update = the_cib->cmds->end_transaction(the_cib, true, cib_none);
crm_info("Sent CIB request %d with %d change%s for %s (set %s)",
a->update, cib_updates, pcmk__plural_s(cib_updates),
a->id, pcmk__s(a->set_id, "unspecified"));
if (the_cib->cmds->register_callback_full(the_cib, a->update,
CIB_OP_TIMEOUT_S, FALSE, id,
"attrd_cib_callback",
attrd_cib_callback, free)) {
// Transmit alert of the attribute
send_alert_attributes_value(a, alert_attribute_value);
}
}
done:
// Discard transaction (if any)
if (the_cib != NULL) {
the_cib->cmds->end_transaction(the_cib, false, cib_none);
the_cib->cmds->set_user(the_cib, NULL);
}
if (alert_attribute_value != NULL) {
g_hash_table_destroy(alert_attribute_value);
}
}
/*!
* \internal
* \brief Write out attributes
*
* \param[in] options Group of enum attrd_write_options
*/
void
attrd_write_attributes(uint32_t options)
{
GHashTableIter iter;
attribute_t *a = NULL;
crm_debug("Writing out %s attributes",
pcmk_is_set(options, attrd_write_all)? "all" : "changed");
g_hash_table_iter_init(&iter, attributes);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & a)) {
if (!pcmk_is_set(options, attrd_write_all) &&
pcmk_is_set(a->flags, attrd_attr_uuid_missing)) {
// Try writing this attribute again, in case peer ID was learned
attrd_set_attr_flags(a, attrd_attr_changed);
} else if (pcmk_is_set(a->flags, attrd_attr_force_write)) {
/* If the force_write flag is set, write the attribute. */
attrd_set_attr_flags(a, attrd_attr_changed);
}
if (pcmk_is_set(options, attrd_write_all) ||
pcmk_is_set(a->flags, attrd_attr_changed)) {
bool ignore_delay = pcmk_is_set(options, attrd_write_no_delay);
if (pcmk_is_set(a->flags, attrd_attr_force_write)) {
// Always ignore delay when forced write flag is set
ignore_delay = true;
}
write_attribute(a, ignore_delay);
} else {
crm_trace("Skipping unchanged attribute %s", a->id);
}
}
}
void
attrd_write_or_elect_attribute(attribute_t *a)
{
if (attrd_election_won()) {
write_attribute(a, false);
} else {
attrd_start_election_if_needed();
}
}
diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c
index 49240ed067..bd39e7621f 100644
--- a/daemons/attrd/attrd_corosync.c
+++ b/daemons/attrd/attrd_corosync.c
@@ -1,611 +1,612 @@
/*
* Copyright 2013-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <errno.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#include <crm/cluster.h>
#include <crm/cluster/internal.h>
#include <crm/common/logging.h>
#include <crm/common/results.h>
#include <crm/common/strings_internal.h>
#include <crm/common/xml.h>
#include "pacemaker-attrd.h"
static xmlNode *
attrd_confirmation(int callid)
{
xmlNode *node = pcmk__xe_create(NULL, __func__);
crm_xml_add(node, PCMK__XA_T, PCMK__VALUE_ATTRD);
crm_xml_add(node, PCMK__XA_SRC, pcmk__cluster_local_node_name());
crm_xml_add(node, PCMK_XA_TASK, PCMK__ATTRD_CMD_CONFIRM);
crm_xml_add_int(node, PCMK__XA_CALL_ID, callid);
return node;
}
static void
attrd_peer_message(pcmk__node_status_t *peer, xmlNode *xml)
{
const char *election_op = crm_element_value(xml, PCMK__XA_CRM_TASK);
if (election_op) {
attrd_handle_election_op(peer, xml);
return;
}
if (attrd_shutting_down(false)) {
/* If we're shutting down, we want to continue responding to election
* ops as long as we're a cluster member (because our vote may be
* needed). Ignore all other messages.
*/
return;
} else {
pcmk__request_t request = {
.ipc_client = NULL,
.ipc_id = 0,
.ipc_flags = 0,
.peer = peer->name,
.xml = xml,
.call_options = 0,
.result = PCMK__UNKNOWN_RESULT,
};
request.op = crm_element_value_copy(request.xml, PCMK_XA_TASK);
CRM_CHECK(request.op != NULL, return);
attrd_handle_request(&request);
/* Having finished handling the request, check to see if the originating
* peer requested confirmation. If so, send that confirmation back now.
*/
if (pcmk__xe_attr_is_true(xml, PCMK__XA_CONFIRM) &&
!pcmk__str_eq(request.op, PCMK__ATTRD_CMD_CONFIRM, pcmk__str_none)) {
int callid = 0;
xmlNode *reply = NULL;
/* Add the confirmation ID for the message we are confirming to the
* response so the originating peer knows what they're a confirmation
* for.
*/
crm_element_value_int(xml, PCMK__XA_CALL_ID, &callid);
reply = attrd_confirmation(callid);
/* And then send the confirmation back to the originating peer. This
* ends up right back in this same function (attrd_peer_message) on the
* peer where it will have to do something with a PCMK__XA_CONFIRM type
* message.
*/
crm_debug("Sending %s a confirmation", peer->name);
attrd_send_message(peer, reply, false);
pcmk__xml_free(reply);
}
pcmk__reset_request(&request);
}
}
static void
attrd_cpg_dispatch(cpg_handle_t handle,
const struct cpg_name *groupName,
uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len)
{
xmlNode *xml = NULL;
const char *from = NULL;
char *data = pcmk__cpg_message_data(handle, nodeid, pid, msg, &from);
if(data == NULL) {
return;
}
xml = pcmk__xml_parse(data);
if (xml == NULL) {
crm_err("Bad message received from %s[%u]: '%.120s'",
from, nodeid, data);
} else {
attrd_peer_message(pcmk__get_node(nodeid, from, NULL,
pcmk__node_search_cluster_member),
xml);
}
pcmk__xml_free(xml);
free(data);
}
static void
attrd_cpg_destroy(gpointer unused)
{
if (attrd_shutting_down(false)) {
crm_info("Disconnected from Corosync process group");
} else {
crm_crit("Lost connection to Corosync process group, shutting down");
attrd_exit_status = CRM_EX_DISCONNECT;
attrd_shutdown(0);
}
}
/*!
* \internal
* \brief Broadcast an update for a single attribute value
*
* \param[in] a Attribute to broadcast
* \param[in] v Attribute value to broadcast
*/
void
attrd_broadcast_value(const attribute_t *a, const attribute_value_t *v)
{
xmlNode *op = pcmk__xe_create(NULL, PCMK_XE_OP);
crm_xml_add(op, PCMK_XA_TASK, PCMK__ATTRD_CMD_UPDATE);
attrd_add_value_xml(op, a, v, false);
attrd_send_message(NULL, op, false);
pcmk__xml_free(op);
}
#define state_text(state) pcmk__s((state), "in unknown state")
static void
attrd_peer_change_cb(enum pcmk__node_update kind, pcmk__node_status_t *peer,
const void *data)
{
bool gone = false;
bool is_remote = pcmk_is_set(peer->flags, pcmk__node_status_remote);
switch (kind) {
case pcmk__node_update_name:
crm_debug("%s node %s is now %s",
(is_remote? "Remote" : "Cluster"),
peer->name, state_text(peer->state));
break;
case pcmk__node_update_processes:
if (!pcmk_is_set(peer->processes, crm_get_cluster_proc())) {
gone = true;
}
crm_debug("Node %s is %s a peer",
peer->name, (gone? "no longer" : "now"));
break;
case pcmk__node_update_state:
crm_debug("%s node %s is now %s (was %s)",
(is_remote? "Remote" : "Cluster"),
peer->name, state_text(peer->state), state_text(data));
if (pcmk__str_eq(peer->state, CRM_NODE_MEMBER, pcmk__str_casei)) {
/* If we're the writer, send new peers a list of all attributes
* (unless it's a remote node, which doesn't run its own attrd)
*/
if (attrd_election_won()
&& !pcmk_is_set(peer->flags, pcmk__node_status_remote)) {
attrd_peer_sync(peer);
}
} else {
// Remove all attribute values associated with lost nodes
attrd_peer_remove(peer->name, false, "loss");
gone = true;
}
break;
}
// Remove votes from cluster nodes that leave, in case election in progress
if (gone && !is_remote) {
attrd_remove_voter(peer);
attrd_remove_peer_protocol_ver(peer->name);
attrd_do_not_expect_from_peer(peer->name);
}
}
static void
record_peer_nodeid(attribute_value_t *v, const char *host)
{
pcmk__node_status_t *known_peer =
pcmk__get_node(v->nodeid, host, NULL, pcmk__node_search_cluster_member);
crm_trace("Learned %s has node id %s",
known_peer->name, known_peer->xml_id);
if (attrd_election_won()) {
attrd_write_attributes(attrd_write_changed);
}
}
#define readable_value(rv_v) pcmk__s((rv_v)->current, "(unset)")
#define readable_peer(p) \
(((p) == NULL)? "all peers" : pcmk__s((p)->name, "unknown peer"))
static void
update_attr_on_host(attribute_t *a, const pcmk__node_status_t *peer,
const xmlNode *xml, const char *attr, const char *value,
const char *host, bool filter)
{
int is_remote = 0;
bool changed = false;
attribute_value_t *v = NULL;
// Create entry for value if not already existing
v = g_hash_table_lookup(a->values, host);
if (v == NULL) {
v = pcmk__assert_alloc(1, sizeof(attribute_value_t));
v->nodename = pcmk__str_copy(host);
g_hash_table_replace(a->values, v->nodename, v);
}
// If value is for a Pacemaker Remote node, remember that
crm_element_value_int(xml, PCMK__XA_ATTR_IS_REMOTE, &is_remote);
if (is_remote) {
attrd_set_value_flags(v, attrd_value_remote);
CRM_ASSERT(pcmk__cluster_lookup_remote_node(host) != NULL);
}
// Check whether the value changed
changed = !pcmk__str_eq(v->current, value, pcmk__str_casei);
- if (changed && filter && pcmk__str_eq(host, attrd_cluster->uname,
- pcmk__str_casei)) {
+ if (changed && filter
+ && pcmk__str_eq(host, attrd_cluster->priv->node_name,
+ pcmk__str_casei)) {
/* Broadcast the local value for an attribute that differs from the
* value provided in a peer's attribute synchronization response. This
* ensures a node's values for itself take precedence and all peers are
* kept in sync.
*/
- v = g_hash_table_lookup(a->values, attrd_cluster->uname);
+ v = g_hash_table_lookup(a->values, attrd_cluster->priv->node_name);
crm_notice("%s[%s]: local value '%s' takes priority over '%s' from %s",
attr, host, readable_value(v), value, peer->name);
attrd_broadcast_value(a, v);
} else if (changed) {
crm_notice("Setting %s[%s]%s%s: %s -> %s "
QB_XS " from %s with %s write delay",
attr, host, a->set_type ? " in " : "",
pcmk__s(a->set_type, ""), readable_value(v),
pcmk__s(value, "(unset)"), peer->name,
(a->timeout_ms == 0)? "no" : pcmk__readable_interval(a->timeout_ms));
pcmk__str_update(&v->current, value);
attrd_set_attr_flags(a, attrd_attr_changed);
- if (pcmk__str_eq(host, attrd_cluster->uname, pcmk__str_casei)
+ if (pcmk__str_eq(host, attrd_cluster->priv->node_name, pcmk__str_casei)
&& pcmk__str_eq(attr, PCMK__NODE_ATTR_SHUTDOWN, pcmk__str_none)) {
if (!pcmk__str_eq(value, "0", pcmk__str_null_matches)) {
attrd_set_requesting_shutdown();
} else {
attrd_clear_requesting_shutdown();
}
}
// Write out new value or start dampening timer
if (a->timeout_ms && a->timer) {
crm_trace("Delaying write of %s %s for dampening",
attr, pcmk__readable_interval(a->timeout_ms));
mainloop_timer_start(a->timer);
} else {
attrd_write_or_elect_attribute(a);
}
} else {
int is_force_write = 0;
crm_element_value_int(xml, PCMK__XA_ATTRD_IS_FORCE_WRITE,
&is_force_write);
if (is_force_write == 1 && a->timeout_ms && a->timer) {
/* Save forced writing and set change flag. */
/* The actual attribute is written by Writer after election. */
crm_trace("%s[%s] from %s is unchanged (%s), forcing write",
attr, host, peer->name, pcmk__s(value, "unset"));
attrd_set_attr_flags(a, attrd_attr_force_write);
} else {
crm_trace("%s[%s] from %s is unchanged (%s)",
attr, host, peer->name, pcmk__s(value, "unset"));
}
}
// This allows us to later detect local values that peer doesn't know about
attrd_set_value_flags(v, attrd_value_from_peer);
/* If this is a cluster node whose node ID we are learning, remember it */
if ((v->nodeid == 0) && !pcmk_is_set(v->flags, attrd_value_remote)
&& (crm_element_value_int(xml, PCMK__XA_ATTR_HOST_ID,
(int*)&v->nodeid) == 0) && (v->nodeid > 0)) {
record_peer_nodeid(v, host);
}
}
static void
attrd_peer_update_one(const pcmk__node_status_t *peer, xmlNode *xml,
bool filter)
{
attribute_t *a = NULL;
const char *attr = crm_element_value(xml, PCMK__XA_ATTR_NAME);
const char *value = crm_element_value(xml, PCMK__XA_ATTR_VALUE);
const char *host = crm_element_value(xml, PCMK__XA_ATTR_HOST);
if (attr == NULL) {
crm_warn("Could not update attribute: peer did not specify name");
return;
}
a = attrd_populate_attribute(xml, attr);
if (a == NULL) {
return;
}
if (host == NULL) {
// If no host was specified, update all hosts
GHashTableIter vIter;
crm_debug("Setting %s for all hosts to %s", attr, value);
pcmk__xe_remove_attr(xml, PCMK__XA_ATTR_HOST_ID);
g_hash_table_iter_init(&vIter, a->values);
while (g_hash_table_iter_next(&vIter, (gpointer *) & host, NULL)) {
update_attr_on_host(a, peer, xml, attr, value, host, filter);
}
} else {
// Update attribute value for the given host
update_attr_on_host(a, peer, xml, attr, value, host, filter);
}
/* If this is a message from some attrd instance broadcasting its protocol
* version, check to see if it's a new minimum version.
*/
if (pcmk__str_eq(attr, CRM_ATTR_PROTOCOL, pcmk__str_none)) {
attrd_update_minimum_protocol_ver(peer->name, value);
}
}
static void
broadcast_unseen_local_values(void)
{
GHashTableIter aIter;
GHashTableIter vIter;
attribute_t *a = NULL;
attribute_value_t *v = NULL;
xmlNode *sync = NULL;
g_hash_table_iter_init(&aIter, attributes);
while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) {
g_hash_table_iter_init(&vIter, a->values);
while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) {
if (!pcmk_is_set(v->flags, attrd_value_from_peer)
- && pcmk__str_eq(v->nodename, attrd_cluster->uname,
+ && pcmk__str_eq(v->nodename, attrd_cluster->priv->node_name,
pcmk__str_casei)) {
crm_trace("* %s[%s]='%s' is local-only",
a->id, v->nodename, readable_value(v));
if (sync == NULL) {
sync = pcmk__xe_create(NULL, __func__);
crm_xml_add(sync, PCMK_XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE);
}
attrd_add_value_xml(sync, a, v, a->timeout_ms && a->timer);
}
}
}
if (sync != NULL) {
crm_debug("Broadcasting local-only values");
attrd_send_message(NULL, sync, false);
pcmk__xml_free(sync);
}
}
int
attrd_cluster_connect(void)
{
int rc = pcmk_rc_ok;
attrd_cluster = pcmk_cluster_new();
pcmk_cluster_set_destroy_fn(attrd_cluster, attrd_cpg_destroy);
pcmk_cpg_set_deliver_fn(attrd_cluster, attrd_cpg_dispatch);
pcmk_cpg_set_confchg_fn(attrd_cluster, pcmk__cpg_confchg_cb);
pcmk__cluster_set_status_callback(&attrd_peer_change_cb);
rc = pcmk_cluster_connect(attrd_cluster);
rc = pcmk_rc2legacy(rc);
if (rc != pcmk_ok) {
crm_err("Cluster connection failed");
return rc;
}
return pcmk_ok;
}
void
attrd_peer_clear_failure(pcmk__request_t *request)
{
xmlNode *xml = request->xml;
const char *rsc = crm_element_value(xml, PCMK__XA_ATTR_RESOURCE);
const char *host = crm_element_value(xml, PCMK__XA_ATTR_HOST);
const char *op = crm_element_value(xml, PCMK__XA_ATTR_CLEAR_OPERATION);
const char *interval_spec = crm_element_value(xml,
PCMK__XA_ATTR_CLEAR_INTERVAL);
guint interval_ms = 0U;
char *attr = NULL;
GHashTableIter iter;
regex_t regex;
pcmk__node_status_t *peer =
pcmk__get_node(0, request->peer, NULL,
pcmk__node_search_cluster_member);
pcmk_parse_interval_spec(interval_spec, &interval_ms);
if (attrd_failure_regex(®ex, rsc, op, interval_ms) != pcmk_ok) {
crm_info("Ignoring invalid request to clear failures for %s",
pcmk__s(rsc, "all resources"));
return;
}
crm_xml_add(xml, PCMK_XA_TASK, PCMK__ATTRD_CMD_UPDATE);
/* Make sure value is not set, so we delete */
pcmk__xe_remove_attr(xml, PCMK__XA_ATTR_VALUE);
g_hash_table_iter_init(&iter, attributes);
while (g_hash_table_iter_next(&iter, (gpointer *) &attr, NULL)) {
if (regexec(®ex, attr, 0, NULL, 0) == 0) {
crm_trace("Matched %s when clearing %s",
attr, pcmk__s(rsc, "all resources"));
crm_xml_add(xml, PCMK__XA_ATTR_NAME, attr);
attrd_peer_update(peer, xml, host, false);
}
}
regfree(®ex);
}
/*!
* \internal
* \brief Load attributes from a peer sync response
*
* \param[in] peer Peer that sent sync response
* \param[in] peer_won Whether peer is the attribute writer
* \param[in,out] xml Request XML
*/
void
attrd_peer_sync_response(const pcmk__node_status_t *peer, bool peer_won,
xmlNode *xml)
{
crm_info("Processing " PCMK__ATTRD_CMD_SYNC_RESPONSE " from %s",
peer->name);
if (peer_won) {
/* Initialize the "seen" flag for all attributes to cleared, so we can
* detect attributes that local node has but the writer doesn't.
*/
attrd_clear_value_seen();
}
// Process each attribute update in the sync response
for (xmlNode *child = pcmk__xe_first_child(xml, NULL, NULL, NULL);
child != NULL; child = pcmk__xe_next(child)) {
attrd_peer_update(peer, child,
crm_element_value(child, PCMK__XA_ATTR_HOST), true);
}
if (peer_won) {
/* If any attributes are still not marked as seen, the writer doesn't
* know about them, so send all peers an update with them.
*/
broadcast_unseen_local_values();
}
}
/*!
* \internal
* \brief Remove all attributes and optionally peer cache entries for a node
*
* \param[in] host Name of node to purge
* \param[in] uncache If true, remove node from peer caches
* \param[in] source Who requested removal (only used for logging)
*/
void
attrd_peer_remove(const char *host, bool uncache, const char *source)
{
attribute_t *a = NULL;
GHashTableIter aIter;
CRM_CHECK(host != NULL, return);
crm_notice("Removing all %s attributes for node %s "
QB_XS " %s reaping node from cache",
host, source, (uncache? "and" : "without"));
g_hash_table_iter_init(&aIter, attributes);
while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) {
if(g_hash_table_remove(a->values, host)) {
crm_debug("Removed %s[%s] for peer %s", a->id, host, source);
}
}
if (uncache) {
pcmk__purge_node_from_cache(host, 0);
}
}
/*!
* \internal
* \brief Send all known attributes and values to a peer
*
* \param[in] peer Peer to send sync to (if NULL, broadcast to all peers)
*/
void
attrd_peer_sync(pcmk__node_status_t *peer)
{
GHashTableIter aIter;
GHashTableIter vIter;
attribute_t *a = NULL;
attribute_value_t *v = NULL;
xmlNode *sync = pcmk__xe_create(NULL, __func__);
crm_xml_add(sync, PCMK_XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE);
g_hash_table_iter_init(&aIter, attributes);
while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) {
g_hash_table_iter_init(&vIter, a->values);
while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) {
crm_debug("Syncing %s[%s]='%s' to %s",
a->id, v->nodename, readable_value(v),
readable_peer(peer));
attrd_add_value_xml(sync, a, v, false);
}
}
crm_debug("Syncing values to %s", readable_peer(peer));
attrd_send_message(peer, sync, false);
pcmk__xml_free(sync);
}
void
attrd_peer_update(const pcmk__node_status_t *peer, xmlNode *xml,
const char *host, bool filter)
{
bool handle_sync_point = false;
CRM_CHECK((peer != NULL) && (xml != NULL), return);
if (xml->children != NULL) {
for (xmlNode *child = pcmk__xe_first_child(xml, PCMK_XE_OP, NULL, NULL);
child != NULL; child = pcmk__xe_next_same(child)) {
pcmk__xe_copy_attrs(child, xml, pcmk__xaf_no_overwrite);
attrd_peer_update_one(peer, child, filter);
if (attrd_request_has_sync_point(child)) {
handle_sync_point = true;
}
}
} else {
attrd_peer_update_one(peer, xml, filter);
if (attrd_request_has_sync_point(xml)) {
handle_sync_point = true;
}
}
/* If the update XML specified that the client wanted to wait for a sync
* point, process that now.
*/
if (handle_sync_point) {
crm_trace("Hit local sync point for attribute update");
attrd_ack_waitlist_clients(attrd_sync_point_local, xml);
}
}
diff --git a/daemons/attrd/attrd_elections.c b/daemons/attrd/attrd_elections.c
index 3fa531fcf7..058f6d3230 100644
--- a/daemons/attrd/attrd_elections.c
+++ b/daemons/attrd/attrd_elections.c
@@ -1,185 +1,185 @@
/*
* Copyright 2013-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <crm/cluster.h>
#include <crm/cluster/election_internal.h>
#include <crm/common/xml.h>
#include "pacemaker-attrd.h"
static char *peer_writer = NULL;
static election_t *writer = NULL;
static gboolean
attrd_election_cb(gpointer user_data)
{
attrd_declare_winner();
/* Update the peers after an election */
attrd_peer_sync(NULL);
/* After winning an election, update the CIB with the values of all
* attributes as the winner knows them.
*/
attrd_write_attributes(attrd_write_all);
return G_SOURCE_REMOVE;
}
void
attrd_election_init(void)
{
- writer = election_init(PCMK__VALUE_ATTRD, attrd_cluster->uname, 120000,
- attrd_election_cb);
+ writer = election_init(PCMK__VALUE_ATTRD, attrd_cluster->priv->node_name,
+ 120000, attrd_election_cb);
}
void
attrd_election_fini(void)
{
election_fini(writer);
}
void
attrd_start_election_if_needed(void)
{
if ((peer_writer == NULL)
&& (election_state(writer) != election_in_progress)
&& !attrd_shutting_down(false)) {
crm_info("Starting an election to determine the writer");
election_vote(writer);
}
}
bool
attrd_election_won(void)
{
return (election_state(writer) == election_won);
}
void
attrd_handle_election_op(const pcmk__node_status_t *peer, xmlNode *xml)
{
enum election_result rc = 0;
enum election_result previous = election_state(writer);
crm_xml_add(xml, PCMK__XA_SRC, peer->name);
// Don't become writer if we're shutting down
rc = election_count_vote(writer, xml, !attrd_shutting_down(false));
switch(rc) {
case election_start:
crm_debug("Unsetting writer (was %s) and starting new election",
peer_writer? peer_writer : "unset");
free(peer_writer);
peer_writer = NULL;
election_vote(writer);
break;
case election_lost:
/* The election API should really distinguish between "we just lost
* to this peer" and "we already lost previously, and we are
* discarding this vote for some reason", but it doesn't.
*
* In the first case, we want to tentatively set the peer writer to
* this peer, even though another peer may eventually win (which we
* will learn via attrd_check_for_new_writer()), so
* attrd_start_election_if_needed() doesn't start a new election.
*
* Approximate a test for that case as best as possible.
*/
if ((peer_writer == NULL) || (previous != election_lost)) {
pcmk__str_update(&peer_writer, peer->name);
crm_debug("Election lost, presuming %s is writer for now",
peer_writer);
}
break;
case election_in_progress:
election_check(writer);
break;
default:
crm_info("Ignoring election op from %s due to error", peer->name);
break;
}
}
bool
attrd_check_for_new_writer(const pcmk__node_status_t *peer, const xmlNode *xml)
{
int peer_state = 0;
crm_element_value_int(xml, PCMK__XA_ATTR_WRITER, &peer_state);
if (peer_state == election_won) {
if ((election_state(writer) == election_won)
- && !pcmk__str_eq(peer->name, attrd_cluster->uname,
+ && !pcmk__str_eq(peer->name, attrd_cluster->priv->node_name,
pcmk__str_casei)) {
crm_notice("Detected another attribute writer (%s), starting new "
"election",
peer->name);
election_vote(writer);
} else if (!pcmk__str_eq(peer->name, peer_writer, pcmk__str_casei)) {
crm_notice("Recorded new attribute writer: %s (was %s)",
peer->name, pcmk__s(peer_writer, "unset"));
pcmk__str_update(&peer_writer, peer->name);
}
}
return (peer_state == election_won);
}
void
attrd_declare_winner(void)
{
crm_notice("Recorded local node as attribute writer (was %s)",
(peer_writer? peer_writer : "unset"));
- pcmk__str_update(&peer_writer, attrd_cluster->uname);
+ pcmk__str_update(&peer_writer, attrd_cluster->priv->node_name);
}
void
attrd_remove_voter(const pcmk__node_status_t *peer)
{
election_remove(writer, peer->name);
if ((peer_writer != NULL)
&& pcmk__str_eq(peer->name, peer_writer, pcmk__str_casei)) {
free(peer_writer);
peer_writer = NULL;
crm_notice("Lost attribute writer %s", peer->name);
/* Clear any election dampening in effect. Otherwise, if the lost writer
* had just won, the election could fizzle out with no new writer.
*/
election_clear_dampening(writer);
/* If the writer received attribute updates during its shutdown, it will
* not have written them to the CIB. Ensure we get a new writer so they
* are written out. This means that every node that sees the writer
* leave will start a new election, but that's better than losing
* attributes.
*/
attrd_start_election_if_needed();
/* If an election is in progress, we need to call election_check(), in case
* this lost peer is the only one that hasn't voted, otherwise the election
* would be pending until it's timed out.
*/
} else if (election_state(writer) == election_in_progress) {
crm_debug("Checking election status upon loss of voter %s", peer->name);
election_check(writer);
}
}
void
attrd_xml_add_writer(xmlNode *xml)
{
crm_xml_add_int(xml, PCMK__XA_ATTR_WRITER, election_state(writer));
}
diff --git a/daemons/attrd/attrd_ipc.c b/daemons/attrd/attrd_ipc.c
index 1769a5e4cf..9b74944f60 100644
--- a/daemons/attrd/attrd_ipc.c
+++ b/daemons/attrd/attrd_ipc.c
@@ -1,627 +1,628 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <errno.h>
#include <stdint.h>
#include <stdlib.h>
#include <sys/types.h>
#include <crm/cluster.h>
#include <crm/cluster/internal.h>
#include <crm/common/acl_internal.h>
#include <crm/common/ipc_internal.h>
#include <crm/common/logging.h>
#include <crm/common/results.h>
#include <crm/common/strings_internal.h>
#include <crm/common/util.h>
#include <crm/common/xml.h>
#include "pacemaker-attrd.h"
static qb_ipcs_service_t *ipcs = NULL;
/*!
* \internal
* \brief Build the XML reply to a client query
*
* \param[in] attr Name of requested attribute
* \param[in] host Name of requested host (or NULL for all hosts)
*
* \return New XML reply
* \note Caller is responsible for freeing the resulting XML
*/
static xmlNode *build_query_reply(const char *attr, const char *host)
{
xmlNode *reply = pcmk__xe_create(NULL, __func__);
attribute_t *a;
crm_xml_add(reply, PCMK__XA_T, PCMK__VALUE_ATTRD);
crm_xml_add(reply, PCMK__XA_SUBT, PCMK__ATTRD_CMD_QUERY);
crm_xml_add(reply, PCMK__XA_ATTR_VERSION, ATTRD_PROTOCOL_VERSION);
/* If desired attribute exists, add its value(s) to the reply */
a = g_hash_table_lookup(attributes, attr);
if (a) {
attribute_value_t *v;
xmlNode *host_value;
crm_xml_add(reply, PCMK__XA_ATTR_NAME, attr);
/* Allow caller to use "localhost" to refer to local node */
if (pcmk__str_eq(host, "localhost", pcmk__str_casei)) {
- host = attrd_cluster->uname;
+ host = attrd_cluster->priv->node_name;
crm_trace("Mapped localhost to %s", host);
}
/* If a specific node was requested, add its value */
if (host) {
v = g_hash_table_lookup(a->values, host);
host_value = pcmk__xe_create(reply, PCMK_XE_NODE);
pcmk__xe_add_node(host_value, host, 0);
crm_xml_add(host_value, PCMK__XA_ATTR_VALUE,
(v? v->current : NULL));
/* Otherwise, add all nodes' values */
} else {
GHashTableIter iter;
g_hash_table_iter_init(&iter, a->values);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &v)) {
host_value = pcmk__xe_create(reply, PCMK_XE_NODE);
pcmk__xe_add_node(host_value, v->nodename, 0);
crm_xml_add(host_value, PCMK__XA_ATTR_VALUE, v->current);
}
}
}
return reply;
}
xmlNode *
attrd_client_clear_failure(pcmk__request_t *request)
{
xmlNode *xml = request->xml;
const char *rsc, *op, *interval_spec;
if (minimum_protocol_version >= 2) {
/* Propagate to all peers (including ourselves).
* This ends up at attrd_peer_message().
*/
attrd_send_message(NULL, xml, false);
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
return NULL;
}
rsc = crm_element_value(xml, PCMK__XA_ATTR_RESOURCE);
op = crm_element_value(xml, PCMK__XA_ATTR_CLEAR_OPERATION);
interval_spec = crm_element_value(xml, PCMK__XA_ATTR_CLEAR_INTERVAL);
/* Map this to an update */
crm_xml_add(xml, PCMK_XA_TASK, PCMK__ATTRD_CMD_UPDATE);
/* Add regular expression matching desired attributes */
if (rsc) {
char *pattern;
if (op == NULL) {
pattern = crm_strdup_printf(ATTRD_RE_CLEAR_ONE, rsc);
} else {
guint interval_ms = 0U;
pcmk_parse_interval_spec(interval_spec, &interval_ms);
pattern = crm_strdup_printf(ATTRD_RE_CLEAR_OP,
rsc, op, interval_ms);
}
crm_xml_add(xml, PCMK__XA_ATTR_REGEX, pattern);
free(pattern);
} else {
crm_xml_add(xml, PCMK__XA_ATTR_REGEX, ATTRD_RE_CLEAR_ALL);
}
/* Make sure attribute and value are not set, so we delete via regex */
pcmk__xe_remove_attr(xml, PCMK__XA_ATTR_NAME);
pcmk__xe_remove_attr(xml, PCMK__XA_ATTR_VALUE);
return attrd_client_update(request);
}
xmlNode *
attrd_client_peer_remove(pcmk__request_t *request)
{
xmlNode *xml = request->xml;
// Host and ID are not used in combination, rather host has precedence
const char *host = crm_element_value(xml, PCMK__XA_ATTR_HOST);
char *host_alloc = NULL;
attrd_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags);
if (host == NULL) {
int nodeid = 0;
crm_element_value_int(xml, PCMK__XA_ATTR_HOST_ID, &nodeid);
if (nodeid > 0) {
pcmk__node_status_t *node = NULL;
char *host_alloc = NULL;
node = pcmk__search_node_caches(nodeid, NULL,
pcmk__node_search_cluster_member);
if ((node != NULL) && (node->name != NULL)) {
// Use cached name if available
host = node->name;
} else {
// Otherwise ask cluster layer
host_alloc = pcmk__cluster_node_name(nodeid);
host = host_alloc;
}
pcmk__xe_add_node(xml, host, 0);
}
}
if (host) {
crm_info("Client %s is requesting all values for %s be removed",
pcmk__client_name(request->ipc_client), host);
attrd_send_message(NULL, xml, false); /* ends up at attrd_peer_message() */
free(host_alloc);
} else {
crm_info("Ignoring request by client %s to remove all peer values without specifying peer",
pcmk__client_name(request->ipc_client));
}
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
return NULL;
}
xmlNode *
attrd_client_query(pcmk__request_t *request)
{
xmlNode *query = request->xml;
xmlNode *reply = NULL;
const char *attr = NULL;
crm_debug("Query arrived from %s", pcmk__client_name(request->ipc_client));
/* Request must specify attribute name to query */
attr = crm_element_value(query, PCMK__XA_ATTR_NAME);
if (attr == NULL) {
pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR,
"Ignoring malformed query from %s (no attribute name given)",
pcmk__client_name(request->ipc_client));
return NULL;
}
/* Build the XML reply */
reply = build_query_reply(attr,
crm_element_value(query, PCMK__XA_ATTR_HOST));
if (reply == NULL) {
pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR,
"Could not respond to query from %s: could not create XML reply",
pcmk__client_name(request->ipc_client));
return NULL;
} else {
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
}
request->ipc_client->request_id = 0;
return reply;
}
xmlNode *
attrd_client_refresh(pcmk__request_t *request)
{
crm_info("Updating all attributes");
attrd_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags);
attrd_write_attributes(attrd_write_all|attrd_write_no_delay);
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
return NULL;
}
static void
handle_missing_host(xmlNode *xml)
{
const char *host = crm_element_value(xml, PCMK__XA_ATTR_HOST);
if (host == NULL) {
crm_trace("Inferring host");
- pcmk__xe_add_node(xml, attrd_cluster->uname, attrd_cluster->nodeid);
+ pcmk__xe_add_node(xml, attrd_cluster->priv->node_name,
+ attrd_cluster->nodeid);
}
}
/* Convert a single IPC message with a regex into one with multiple children, one
* for each regex match.
*/
static int
expand_regexes(xmlNode *xml, const char *attr, const char *value, const char *regex)
{
if (attr == NULL && regex) {
bool matched = false;
GHashTableIter aIter;
regex_t r_patt;
crm_debug("Setting %s to %s", regex, value);
if (regcomp(&r_patt, regex, REG_EXTENDED|REG_NOSUB)) {
return EINVAL;
}
g_hash_table_iter_init(&aIter, attributes);
while (g_hash_table_iter_next(&aIter, (gpointer *) & attr, NULL)) {
int status = regexec(&r_patt, attr, 0, NULL, 0);
if (status == 0) {
xmlNode *child = pcmk__xe_create(xml, PCMK_XE_OP);
crm_trace("Matched %s with %s", attr, regex);
matched = true;
/* Copy all the non-conflicting attributes from the parent over,
* but remove the regex and replace it with the name.
*/
pcmk__xe_copy_attrs(child, xml, pcmk__xaf_no_overwrite);
pcmk__xe_remove_attr(child, PCMK__XA_ATTR_REGEX);
crm_xml_add(child, PCMK__XA_ATTR_NAME, attr);
}
}
regfree(&r_patt);
/* Return a code if we never matched anything. This should not be treated
* as an error. It indicates there was a regex, and it was a valid regex,
* but simply did not match anything and the caller should not continue
* doing any regex-related processing.
*/
if (!matched) {
return pcmk_rc_op_unsatisfied;
}
} else if (attr == NULL) {
return pcmk_rc_bad_nvpair;
}
return pcmk_rc_ok;
}
static int
handle_regexes(pcmk__request_t *request)
{
xmlNode *xml = request->xml;
int rc = pcmk_rc_ok;
const char *attr = crm_element_value(xml, PCMK__XA_ATTR_NAME);
const char *value = crm_element_value(xml, PCMK__XA_ATTR_VALUE);
const char *regex = crm_element_value(xml, PCMK__XA_ATTR_REGEX);
rc = expand_regexes(xml, attr, value, regex);
if (rc == EINVAL) {
pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR,
"Bad regex '%s' for update from client %s", regex,
pcmk__client_name(request->ipc_client));
} else if (rc == pcmk_rc_bad_nvpair) {
crm_err("Update request did not specify attribute or regular expression");
pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR,
"Client %s update request did not specify attribute or regular expression",
pcmk__client_name(request->ipc_client));
}
return rc;
}
static int
handle_value_expansion(const char **value, xmlNode *xml, const char *op,
const char *attr)
{
attribute_t *a = g_hash_table_lookup(attributes, attr);
if (a == NULL && pcmk__str_eq(op, PCMK__ATTRD_CMD_UPDATE_DELAY, pcmk__str_none)) {
return EINVAL;
}
if (*value && attrd_value_needs_expansion(*value)) {
int int_value;
attribute_value_t *v = NULL;
if (a) {
const char *host = crm_element_value(xml, PCMK__XA_ATTR_HOST);
v = g_hash_table_lookup(a->values, host);
}
int_value = attrd_expand_value(*value, (v? v->current : NULL));
crm_info("Expanded %s=%s to %d", attr, *value, int_value);
crm_xml_add_int(xml, PCMK__XA_ATTR_VALUE, int_value);
/* Replacing the value frees the previous memory, so re-query it */
*value = crm_element_value(xml, PCMK__XA_ATTR_VALUE);
}
return pcmk_rc_ok;
}
static void
send_update_msg_to_cluster(pcmk__request_t *request, xmlNode *xml)
{
if (pcmk__str_eq(attrd_request_sync_point(xml), PCMK__VALUE_CLUSTER, pcmk__str_none)) {
/* The client is waiting on the cluster-wide sync point. In this case,
* the response ACK is not sent until this attrd broadcasts the update
* and receives its own confirmation back from all peers.
*/
attrd_expect_confirmations(request, attrd_cluster_sync_point_update);
attrd_send_message(NULL, xml, true); /* ends up at attrd_peer_message() */
} else {
/* The client is either waiting on the local sync point or was not
* waiting on any sync point at all. For the local sync point, the
* response ACK is sent in attrd_peer_update. For clients not
* waiting on any sync point, the response ACK is sent in
* handle_update_request immediately before this function was called.
*/
attrd_send_message(NULL, xml, false); /* ends up at attrd_peer_message() */
}
}
static int
send_child_update(xmlNode *child, void *data)
{
pcmk__request_t *request = (pcmk__request_t *) data;
/* Calling pcmk__set_result is handled by one of these calls to
* attrd_client_update, so no need to do it again here.
*/
request->xml = child;
attrd_client_update(request);
return pcmk_rc_ok;
}
xmlNode *
attrd_client_update(pcmk__request_t *request)
{
xmlNode *xml = NULL;
const char *attr, *value, *regex;
CRM_CHECK((request != NULL) && (request->xml != NULL), return NULL);
xml = request->xml;
/* If the message has children, that means it is a message from a newer
* client that supports sending multiple operations at a time. There are
* two ways we can handle that.
*/
if (xml->children != NULL) {
if (ATTRD_SUPPORTS_MULTI_MESSAGE(minimum_protocol_version)) {
/* First, if all peers support a certain protocol version, we can
* just broadcast the big message and they'll handle it. However,
* we also need to apply all the transformations in this function
* to the children since they don't happen anywhere else.
*/
for (xmlNode *child = pcmk__xe_first_child(xml, PCMK_XE_OP, NULL,
NULL);
child != NULL; child = pcmk__xe_next_same(child)) {
attr = crm_element_value(child, PCMK__XA_ATTR_NAME);
value = crm_element_value(child, PCMK__XA_ATTR_VALUE);
handle_missing_host(child);
if (handle_value_expansion(&value, child, request->op, attr) == EINVAL) {
pcmk__format_result(&request->result, CRM_EX_NOSUCH, PCMK_EXEC_ERROR,
"Attribute %s does not exist", attr);
return NULL;
}
}
send_update_msg_to_cluster(request, xml);
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
} else {
/* Save the original xml node pointer so it can be restored after iterating
* over all the children.
*/
xmlNode *orig_xml = request->xml;
/* Second, if they do not support that protocol version, split it
* up into individual messages and call attrd_client_update on
* each one.
*/
pcmk__xe_foreach_child(xml, PCMK_XE_OP, send_child_update, request);
request->xml = orig_xml;
}
return NULL;
}
attr = crm_element_value(xml, PCMK__XA_ATTR_NAME);
value = crm_element_value(xml, PCMK__XA_ATTR_VALUE);
regex = crm_element_value(xml, PCMK__XA_ATTR_REGEX);
if (handle_regexes(request) != pcmk_rc_ok) {
/* Error handling was already dealt with in handle_regexes, so just return. */
return NULL;
} else if (regex) {
/* Recursively call attrd_client_update on the new message with regexes
* expanded. If supported by the attribute daemon, this means that all
* matches can also be handled atomically.
*/
return attrd_client_update(request);
}
handle_missing_host(xml);
if (handle_value_expansion(&value, xml, request->op, attr) == EINVAL) {
pcmk__format_result(&request->result, CRM_EX_NOSUCH, PCMK_EXEC_ERROR,
"Attribute %s does not exist", attr);
return NULL;
}
crm_debug("Broadcasting %s[%s]=%s%s",
attr, crm_element_value(xml, PCMK__XA_ATTR_HOST),
value, (attrd_election_won()? " (writer)" : ""));
send_update_msg_to_cluster(request, xml);
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
return NULL;
}
/*!
* \internal
* \brief Accept a new client IPC connection
*
* \param[in,out] c New connection
* \param[in] uid Client user id
* \param[in] gid Client group id
*
* \return pcmk_ok on success, -errno otherwise
*/
static int32_t
attrd_ipc_accept(qb_ipcs_connection_t *c, uid_t uid, gid_t gid)
{
crm_trace("New client connection %p", c);
if (attrd_shutting_down(false)) {
crm_info("Ignoring new connection from pid %d during shutdown",
pcmk__client_pid(c));
return -ECONNREFUSED;
}
if (pcmk__new_client(c, uid, gid) == NULL) {
return -ENOMEM;
}
return pcmk_ok;
}
/*!
* \internal
* \brief Destroy a client IPC connection
*
* \param[in] c Connection to destroy
*
* \return FALSE (i.e. do not re-run this callback)
*/
static int32_t
attrd_ipc_closed(qb_ipcs_connection_t *c)
{
pcmk__client_t *client = pcmk__find_client(c);
if (client == NULL) {
crm_trace("Ignoring request to clean up unknown connection %p", c);
} else {
crm_trace("Cleaning up closed client connection %p", c);
/* Remove the client from the sync point waitlist if it's present. */
attrd_remove_client_from_waitlist(client);
/* And no longer wait for confirmations from any peers. */
attrd_do_not_wait_for_client(client);
pcmk__free_client(client);
}
return FALSE;
}
/*!
* \internal
* \brief Destroy a client IPC connection
*
* \param[in,out] c Connection to destroy
*
* \note We handle a destroyed connection the same as a closed one,
* but we need a separate handler because the return type is different.
*/
static void
attrd_ipc_destroy(qb_ipcs_connection_t *c)
{
crm_trace("Destroying client connection %p", c);
attrd_ipc_closed(c);
}
static int32_t
attrd_ipc_dispatch(qb_ipcs_connection_t * c, void *data, size_t size)
{
uint32_t id = 0;
uint32_t flags = 0;
pcmk__client_t *client = pcmk__find_client(c);
xmlNode *xml = NULL;
// Sanity-check, and parse XML from IPC data
CRM_CHECK((c != NULL) && (client != NULL), return 0);
if (data == NULL) {
crm_debug("No IPC data from PID %d", pcmk__client_pid(c));
return 0;
}
xml = pcmk__client_data2xml(client, data, &id, &flags);
if (xml == NULL) {
crm_debug("Unrecognizable IPC data from PID %d", pcmk__client_pid(c));
pcmk__ipc_send_ack(client, id, flags, PCMK__XE_ACK, NULL,
CRM_EX_PROTOCOL);
return 0;
} else {
pcmk__request_t request = {
.ipc_client = client,
.ipc_id = id,
.ipc_flags = flags,
.peer = NULL,
.xml = xml,
.call_options = 0,
.result = PCMK__UNKNOWN_RESULT,
};
CRM_ASSERT(client->user != NULL);
pcmk__update_acl_user(xml, PCMK__XA_ATTR_USER, client->user);
request.op = crm_element_value_copy(request.xml, PCMK_XA_TASK);
CRM_CHECK(request.op != NULL, return 0);
attrd_handle_request(&request);
pcmk__reset_request(&request);
}
pcmk__xml_free(xml);
return 0;
}
static struct qb_ipcs_service_handlers ipc_callbacks = {
.connection_accept = attrd_ipc_accept,
.connection_created = NULL,
.msg_process = attrd_ipc_dispatch,
.connection_closed = attrd_ipc_closed,
.connection_destroyed = attrd_ipc_destroy
};
void
attrd_ipc_fini(void)
{
if (ipcs != NULL) {
pcmk__drop_all_clients(ipcs);
qb_ipcs_destroy(ipcs);
ipcs = NULL;
}
attrd_unregister_handlers();
pcmk__client_cleanup();
}
/*!
* \internal
* \brief Set up attrd IPC communication
*/
void
attrd_init_ipc(void)
{
pcmk__serve_attrd_ipc(&ipcs, &ipc_callbacks);
}
diff --git a/daemons/attrd/attrd_messages.c b/daemons/attrd/attrd_messages.c
index ce69a138c3..6ef12af424 100644
--- a/daemons/attrd/attrd_messages.c
+++ b/daemons/attrd/attrd_messages.c
@@ -1,345 +1,346 @@
/*
* Copyright 2022-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <glib.h>
#include <crm/common/messages_internal.h>
#include <crm/cluster/internal.h> // pcmk__get_node()
#include <crm/common/xml.h>
#include "pacemaker-attrd.h"
int minimum_protocol_version = -1;
static GHashTable *attrd_handlers = NULL;
static bool
is_sync_point_attr(xmlAttrPtr attr, void *data)
{
return pcmk__str_eq((const char *) attr->name, PCMK__XA_ATTR_SYNC_POINT, pcmk__str_none);
}
static int
remove_sync_point_attribute(xmlNode *xml, void *data)
{
pcmk__xe_remove_matching_attrs(xml, is_sync_point_attr, NULL);
pcmk__xe_foreach_child(xml, PCMK_XE_OP, remove_sync_point_attribute, NULL);
return pcmk_rc_ok;
}
/* Sync points on a multi-update IPC message to an attrd too old to support
* multi-update messages won't work. Strip the sync point attribute off here
* so we don't pretend to support this situation and instead ACK the client
* immediately.
*/
static void
remove_unsupported_sync_points(pcmk__request_t *request)
{
if (request->xml->children != NULL && !ATTRD_SUPPORTS_MULTI_MESSAGE(minimum_protocol_version) &&
attrd_request_has_sync_point(request->xml)) {
crm_warn("Ignoring sync point in request from %s because not all nodes support it",
pcmk__request_origin(request));
remove_sync_point_attribute(request->xml, NULL);
}
}
static xmlNode *
handle_unknown_request(pcmk__request_t *request)
{
crm_err("Unknown IPC request %s from %s %s",
request->op, pcmk__request_origin_type(request),
pcmk__request_origin(request));
pcmk__format_result(&request->result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID,
"Unknown request type '%s' (bug?)", request->op);
return NULL;
}
static xmlNode *
handle_clear_failure_request(pcmk__request_t *request)
{
if (request->peer != NULL) {
/* It is not currently possible to receive this as a peer command,
* but will be, if we one day enable propagating this operation.
*/
attrd_peer_clear_failure(request);
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
return NULL;
} else {
remove_unsupported_sync_points(request);
if (attrd_request_has_sync_point(request->xml)) {
/* If this client supplied a sync point it wants to wait for, add it to
* the wait list. Clients on this list will not receive an ACK until
* their sync point is hit which will result in the client stalled there
* until it receives a response.
*
* All other clients will receive the expected response as normal.
*/
attrd_add_client_to_waitlist(request);
} else {
/* If the client doesn't want to wait for a sync point, go ahead and send
* the ACK immediately. Otherwise, we'll send the ACK when the appropriate
* sync point is reached.
*/
attrd_send_ack(request->ipc_client, request->ipc_id,
request->ipc_flags);
}
return attrd_client_clear_failure(request);
}
}
static xmlNode *
handle_confirm_request(pcmk__request_t *request)
{
if (request->peer != NULL) {
int callid;
crm_debug("Received confirmation from %s", request->peer);
if (crm_element_value_int(request->xml, PCMK__XA_CALL_ID,
&callid) == -1) {
pcmk__set_result(&request->result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID,
"Could not get callid from XML");
} else {
attrd_handle_confirmation(callid, request->peer);
}
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
return NULL;
} else {
return handle_unknown_request(request);
}
}
static xmlNode *
handle_query_request(pcmk__request_t *request)
{
if (request->peer != NULL) {
return handle_unknown_request(request);
} else {
return attrd_client_query(request);
}
}
static xmlNode *
handle_remove_request(pcmk__request_t *request)
{
if (request->peer != NULL) {
const char *host = crm_element_value(request->xml, PCMK__XA_ATTR_HOST);
bool reap = false;
if (pcmk__xe_get_bool_attr(request->xml, PCMK__XA_REAP,
&reap) != pcmk_rc_ok) {
reap = true; // Default to true for backward compatibility
}
attrd_peer_remove(host, reap, request->peer);
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
return NULL;
} else {
return attrd_client_peer_remove(request);
}
}
static xmlNode *
handle_refresh_request(pcmk__request_t *request)
{
if (request->peer != NULL) {
return handle_unknown_request(request);
} else {
return attrd_client_refresh(request);
}
}
static xmlNode *
handle_sync_response_request(pcmk__request_t *request)
{
if (request->ipc_client != NULL) {
return handle_unknown_request(request);
} else {
if (request->peer != NULL) {
pcmk__node_status_t *peer =
pcmk__get_node(0, request->peer, NULL,
pcmk__node_search_cluster_member);
bool peer_won = attrd_check_for_new_writer(peer, request->xml);
- if (!pcmk__str_eq(peer->name, attrd_cluster->uname,
+ if (!pcmk__str_eq(peer->name, attrd_cluster->priv->node_name,
pcmk__str_casei)) {
attrd_peer_sync_response(peer, peer_won, request->xml);
}
}
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
return NULL;
}
}
static xmlNode *
handle_update_request(pcmk__request_t *request)
{
if (request->peer != NULL) {
const char *host = crm_element_value(request->xml, PCMK__XA_ATTR_HOST);
pcmk__node_status_t *peer =
pcmk__get_node(0, request->peer, NULL,
pcmk__node_search_cluster_member);
attrd_peer_update(peer, request->xml, host, false);
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
return NULL;
} else {
remove_unsupported_sync_points(request);
if (attrd_request_has_sync_point(request->xml)) {
/* If this client supplied a sync point it wants to wait for, add it to
* the wait list. Clients on this list will not receive an ACK until
* their sync point is hit which will result in the client stalled there
* until it receives a response.
*
* All other clients will receive the expected response as normal.
*/
attrd_add_client_to_waitlist(request);
} else {
/* If the client doesn't want to wait for a sync point, go ahead and send
* the ACK immediately. Otherwise, we'll send the ACK when the appropriate
* sync point is reached.
*
* In the normal case, attrd_client_update can be called recursively which
* makes where to send the ACK tricky. Doing it here ensures the client
* only ever receives one.
*/
attrd_send_ack(request->ipc_client, request->ipc_id,
request->flags|crm_ipc_client_response);
}
return attrd_client_update(request);
}
}
static void
attrd_register_handlers(void)
{
pcmk__server_command_t handlers[] = {
{ PCMK__ATTRD_CMD_CLEAR_FAILURE, handle_clear_failure_request },
{ PCMK__ATTRD_CMD_CONFIRM, handle_confirm_request },
{ PCMK__ATTRD_CMD_PEER_REMOVE, handle_remove_request },
{ PCMK__ATTRD_CMD_QUERY, handle_query_request },
{ PCMK__ATTRD_CMD_REFRESH, handle_refresh_request },
{ PCMK__ATTRD_CMD_SYNC_RESPONSE, handle_sync_response_request },
{ PCMK__ATTRD_CMD_UPDATE, handle_update_request },
{ PCMK__ATTRD_CMD_UPDATE_DELAY, handle_update_request },
{ PCMK__ATTRD_CMD_UPDATE_BOTH, handle_update_request },
{ NULL, handle_unknown_request },
};
attrd_handlers = pcmk__register_handlers(handlers);
}
void
attrd_unregister_handlers(void)
{
if (attrd_handlers != NULL) {
g_hash_table_destroy(attrd_handlers);
attrd_handlers = NULL;
}
}
void
attrd_handle_request(pcmk__request_t *request)
{
xmlNode *reply = NULL;
char *log_msg = NULL;
const char *reason = NULL;
if (attrd_handlers == NULL) {
attrd_register_handlers();
}
reply = pcmk__process_request(request, attrd_handlers);
if (reply != NULL) {
crm_log_xml_trace(reply, "Reply");
if (request->ipc_client != NULL) {
pcmk__ipc_send_xml(request->ipc_client, request->ipc_id, reply,
request->ipc_flags);
} else {
crm_err("Not sending CPG reply to client");
}
pcmk__xml_free(reply);
}
reason = request->result.exit_reason;
log_msg = crm_strdup_printf("Processed %s request from %s %s: %s%s%s%s",
request->op, pcmk__request_origin_type(request),
pcmk__request_origin(request),
pcmk_exec_status_str(request->result.execution_status),
(reason == NULL)? "" : " (",
pcmk__s(reason, ""),
(reason == NULL)? "" : ")");
if (!pcmk__result_ok(&request->result)) {
crm_warn("%s", log_msg);
} else {
crm_debug("%s", log_msg);
}
free(log_msg);
pcmk__reset_request(request);
}
/*!
\internal
\brief Broadcast private attribute for local node with protocol version
*/
void
attrd_broadcast_protocol(void)
{
xmlNode *attrd_op = pcmk__xe_create(NULL, __func__);
crm_xml_add(attrd_op, PCMK__XA_T, PCMK__VALUE_ATTRD);
crm_xml_add(attrd_op, PCMK__XA_SRC, crm_system_name);
crm_xml_add(attrd_op, PCMK_XA_TASK, PCMK__ATTRD_CMD_UPDATE);
crm_xml_add(attrd_op, PCMK__XA_ATTR_NAME, CRM_ATTR_PROTOCOL);
crm_xml_add(attrd_op, PCMK__XA_ATTR_VALUE, ATTRD_PROTOCOL_VERSION);
crm_xml_add_int(attrd_op, PCMK__XA_ATTR_IS_PRIVATE, 1);
- pcmk__xe_add_node(attrd_op, attrd_cluster->uname, attrd_cluster->nodeid);
+ pcmk__xe_add_node(attrd_op, attrd_cluster->priv->node_name,
+ attrd_cluster->nodeid);
crm_debug("Broadcasting attrd protocol version %s for node %s",
- ATTRD_PROTOCOL_VERSION, attrd_cluster->uname);
+ ATTRD_PROTOCOL_VERSION, attrd_cluster->priv->node_name);
attrd_send_message(NULL, attrd_op, false); /* ends up at attrd_peer_message() */
pcmk__xml_free(attrd_op);
}
gboolean
attrd_send_message(pcmk__node_status_t *node, xmlNode *data, bool confirm)
{
const char *op = crm_element_value(data, PCMK_XA_TASK);
crm_xml_add(data, PCMK__XA_T, PCMK__VALUE_ATTRD);
crm_xml_add(data, PCMK__XA_ATTR_VERSION, ATTRD_PROTOCOL_VERSION);
/* Request a confirmation from the destination peer node (which could
* be all if node is NULL) that the message has been received and
* acted upon.
*/
if (!pcmk__str_eq(op, PCMK__ATTRD_CMD_CONFIRM, pcmk__str_none)) {
pcmk__xe_set_bool_attr(data, PCMK__XA_CONFIRM, confirm);
}
attrd_xml_add_writer(data);
return pcmk__cluster_send_message(node, pcmk__cluster_msg_attrd, data);
}
diff --git a/daemons/based/based_messages.c b/daemons/based/based_messages.c
index 4af5264acd..3297dfa342 100644
--- a/daemons/based/based_messages.c
+++ b/daemons/based/based_messages.c
@@ -1,523 +1,522 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <errno.h>
#include <fcntl.h>
#include <time.h>
#include <sys/param.h>
#include <sys/types.h>
#include <glib.h>
#include <libxml/tree.h>
#include <crm/crm.h>
#include <crm/cib/internal.h>
#include <crm/common/xml.h>
#include <crm/common/ipc_internal.h>
#include <crm/common/xml_internal.h>
#include <crm/cluster/internal.h>
#include <pacemaker-based.h>
/* Maximum number of diffs to ignore while waiting for a resync */
#define MAX_DIFF_RETRY 5
bool based_is_primary = false;
xmlNode *the_cib = NULL;
int
cib_process_shutdown_req(const char *op, int options, const char *section, xmlNode * req,
xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib,
xmlNode ** answer)
{
const char *host = crm_element_value(req, PCMK__XA_SRC);
*answer = NULL;
if (crm_element_value(req, PCMK__XA_CIB_ISREPLYTO) == NULL) {
crm_info("Peer %s is requesting to shut down", host);
return pcmk_ok;
}
if (cib_shutdown_flag == FALSE) {
crm_err("Peer %s mistakenly thinks we wanted to shut down", host);
return -EINVAL;
}
crm_info("Peer %s has acknowledged our shutdown request", host);
terminate_cib(__func__, 0);
return pcmk_ok;
}
// @COMPAT: Remove when PCMK__CIB_REQUEST_NOOP is removed
int
cib_process_noop(const char *op, int options, const char *section, xmlNode *req,
xmlNode *input, xmlNode *existing_cib, xmlNode **result_cib,
xmlNode **answer)
{
crm_trace("Processing \"%s\" event", op);
*answer = NULL;
return pcmk_ok;
}
int
cib_process_readwrite(const char *op, int options, const char *section, xmlNode * req,
xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib,
xmlNode ** answer)
{
int result = pcmk_ok;
crm_trace("Processing \"%s\" event", op);
if (pcmk__str_eq(op, PCMK__CIB_REQUEST_IS_PRIMARY, pcmk__str_none)) {
if (based_is_primary) {
result = pcmk_ok;
} else {
result = -EPERM;
}
return result;
}
if (pcmk__str_eq(op, PCMK__CIB_REQUEST_PRIMARY, pcmk__str_none)) {
if (!based_is_primary) {
crm_info("We are now in R/W mode");
based_is_primary = true;
} else {
crm_debug("We are still in R/W mode");
}
} else if (based_is_primary) {
crm_info("We are now in R/O mode");
based_is_primary = false;
}
return result;
}
/* Set to 1 when a sync is requested, incremented when a diff is ignored,
* reset to 0 when a sync is received
*/
static int sync_in_progress = 0;
void
send_sync_request(const char *host)
{
xmlNode *sync_me = pcmk__xe_create(NULL, "sync-me");
pcmk__node_status_t *peer = NULL;
crm_info("Requesting re-sync from %s", (host? host : "all peers"));
sync_in_progress = 1;
crm_xml_add(sync_me, PCMK__XA_T, PCMK__VALUE_CIB);
crm_xml_add(sync_me, PCMK__XA_CIB_OP, PCMK__CIB_REQUEST_SYNC_TO_ONE);
- crm_xml_add(sync_me, PCMK__XA_CIB_DELEGATED_FROM,
- stand_alone? "localhost" : crm_cluster->uname);
+ crm_xml_add(sync_me, PCMK__XA_CIB_DELEGATED_FROM, OUR_NODENAME);
if (host != NULL) {
peer = pcmk__get_node(0, host, NULL, pcmk__node_search_cluster_member);
}
pcmk__cluster_send_message(peer, pcmk__cluster_msg_based, sync_me);
pcmk__xml_free(sync_me);
}
int
cib_process_ping(const char *op, int options, const char *section, xmlNode * req, xmlNode * input,
xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer)
{
const char *host = crm_element_value(req, PCMK__XA_SRC);
const char *seq = crm_element_value(req, PCMK__XA_CIB_PING_ID);
char *digest = pcmk__digest_xml(the_cib, true);
xmlNode *wrapper = NULL;
crm_trace("Processing \"%s\" event %s from %s", op, seq, host);
*answer = pcmk__xe_create(NULL, PCMK__XE_PING_RESPONSE);
crm_xml_add(*answer, PCMK_XA_CRM_FEATURE_SET, CRM_FEATURE_SET);
crm_xml_add(*answer, PCMK__XA_DIGEST, digest);
crm_xml_add(*answer, PCMK__XA_CIB_PING_ID, seq);
wrapper = pcmk__xe_create(*answer, PCMK__XE_CIB_CALLDATA);
if (the_cib != NULL) {
pcmk__if_tracing(
{
/* Append additional detail so the receiver can log the
* differences
*/
pcmk__xml_copy(wrapper, the_cib);
},
{
// Always include at least the version details
const char *name = (const char *) the_cib->name;
xmlNode *shallow = pcmk__xe_create(wrapper, name);
pcmk__xe_copy_attrs(shallow, the_cib, pcmk__xaf_none);
}
);
}
crm_info("Reporting our current digest to %s: %s for %s.%s.%s",
host, digest,
crm_element_value(existing_cib, PCMK_XA_ADMIN_EPOCH),
crm_element_value(existing_cib, PCMK_XA_EPOCH),
crm_element_value(existing_cib, PCMK_XA_NUM_UPDATES));
free(digest);
return pcmk_ok;
}
int
cib_process_sync(const char *op, int options, const char *section, xmlNode * req, xmlNode * input,
xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer)
{
return sync_our_cib(req, TRUE);
}
int
cib_process_upgrade_server(const char *op, int options, const char *section, xmlNode * req, xmlNode * input,
xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer)
{
int rc = pcmk_ok;
*answer = NULL;
if (crm_element_value(req, PCMK__XA_CIB_SCHEMA_MAX) != NULL) {
/* The originator of an upgrade request sends it to the DC, without
* PCMK__XA_CIB_SCHEMA_MAX. If an upgrade is needed, the DC
* re-broadcasts the request with PCMK__XA_CIB_SCHEMA_MAX, and each node
* performs the upgrade (and notifies its local clients) here.
*/
return cib_process_upgrade(
op, options, section, req, input, existing_cib, result_cib, answer);
} else {
xmlNode *scratch = pcmk__xml_copy(NULL, existing_cib);
const char *host = crm_element_value(req, PCMK__XA_SRC);
const char *original_schema = NULL;
const char *new_schema = NULL;
const char *client_id = crm_element_value(req, PCMK__XA_CIB_CLIENTID);
const char *call_opts = crm_element_value(req, PCMK__XA_CIB_CALLOPT);
const char *call_id = crm_element_value(req, PCMK__XA_CIB_CALLID);
crm_trace("Processing \"%s\" event", op);
original_schema = crm_element_value(existing_cib,
PCMK_XA_VALIDATE_WITH);
rc = pcmk__update_schema(&scratch, NULL, true, true);
rc = pcmk_rc2legacy(rc);
new_schema = crm_element_value(scratch, PCMK_XA_VALIDATE_WITH);
if (pcmk__cmp_schemas_by_name(new_schema, original_schema) > 0) {
xmlNode *up = pcmk__xe_create(NULL, __func__);
rc = pcmk_ok;
crm_notice("Upgrade request from %s verified", host);
crm_xml_add(up, PCMK__XA_T, PCMK__VALUE_CIB);
crm_xml_add(up, PCMK__XA_CIB_OP, PCMK__CIB_REQUEST_UPGRADE);
crm_xml_add(up, PCMK__XA_CIB_SCHEMA_MAX, new_schema);
crm_xml_add(up, PCMK__XA_CIB_DELEGATED_FROM, host);
crm_xml_add(up, PCMK__XA_CIB_CLIENTID, client_id);
crm_xml_add(up, PCMK__XA_CIB_CALLOPT, call_opts);
crm_xml_add(up, PCMK__XA_CIB_CALLID, call_id);
pcmk__cluster_send_message(NULL, pcmk__cluster_msg_based, up);
pcmk__xml_free(up);
} else if(rc == pcmk_ok) {
rc = -pcmk_err_schema_unchanged;
}
if (rc != pcmk_ok) {
// Notify originating peer so it can notify its local clients
pcmk__node_status_t *origin = NULL;
origin = pcmk__search_node_caches(0, host,
pcmk__node_search_cluster_member);
crm_info("Rejecting upgrade request from %s: %s "
QB_XS " rc=%d peer=%s", host, pcmk_strerror(rc), rc,
(origin? origin->name : "lost"));
if (origin) {
xmlNode *up = pcmk__xe_create(NULL, __func__);
crm_xml_add(up, PCMK__XA_T, PCMK__VALUE_CIB);
crm_xml_add(up, PCMK__XA_CIB_OP, PCMK__CIB_REQUEST_UPGRADE);
crm_xml_add(up, PCMK__XA_CIB_DELEGATED_FROM, host);
crm_xml_add(up, PCMK__XA_CIB_ISREPLYTO, host);
crm_xml_add(up, PCMK__XA_CIB_CLIENTID, client_id);
crm_xml_add(up, PCMK__XA_CIB_CALLOPT, call_opts);
crm_xml_add(up, PCMK__XA_CIB_CALLID, call_id);
crm_xml_add_int(up, PCMK__XA_CIB_UPGRADE_RC, rc);
if (!pcmk__cluster_send_message(origin, pcmk__cluster_msg_based,
up)) {
crm_warn("Could not send CIB upgrade result to %s", host);
}
pcmk__xml_free(up);
}
}
pcmk__xml_free(scratch);
}
return rc;
}
int
cib_process_sync_one(const char *op, int options, const char *section, xmlNode * req,
xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib,
xmlNode ** answer)
{
return sync_our_cib(req, FALSE);
}
int
cib_server_process_diff(const char *op, int options, const char *section, xmlNode * req,
xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib,
xmlNode ** answer)
{
int rc = pcmk_ok;
if (sync_in_progress > MAX_DIFF_RETRY) {
/* Don't ignore diffs forever; the last request may have been lost.
* If the diff fails, we'll ask for another full resync.
*/
sync_in_progress = 0;
}
// The primary instance should never ignore a diff
if (sync_in_progress && !based_is_primary) {
int diff_add_updates = 0;
int diff_add_epoch = 0;
int diff_add_admin_epoch = 0;
int diff_del_updates = 0;
int diff_del_epoch = 0;
int diff_del_admin_epoch = 0;
cib_diff_version_details(input,
&diff_add_admin_epoch, &diff_add_epoch, &diff_add_updates,
&diff_del_admin_epoch, &diff_del_epoch, &diff_del_updates);
sync_in_progress++;
crm_notice("Not applying diff %d.%d.%d -> %d.%d.%d (sync in progress)",
diff_del_admin_epoch, diff_del_epoch, diff_del_updates,
diff_add_admin_epoch, diff_add_epoch, diff_add_updates);
return -pcmk_err_diff_resync;
}
rc = cib_process_diff(op, options, section, req, input, existing_cib, result_cib, answer);
crm_trace("result: %s (%d), %s", pcmk_strerror(rc), rc,
(based_is_primary? "primary": "secondary"));
if ((rc == -pcmk_err_diff_resync) && !based_is_primary) {
pcmk__xml_free(*result_cib);
*result_cib = NULL;
send_sync_request(NULL);
} else if (rc == -pcmk_err_diff_resync) {
rc = -pcmk_err_diff_failed;
if (options & cib_force_diff) {
crm_warn("Not requesting full refresh in R/W mode");
}
}
return rc;
}
int
cib_process_replace_svr(const char *op, int options, const char *section, xmlNode * req,
xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib,
xmlNode ** answer)
{
int rc =
cib_process_replace(op, options, section, req, input, existing_cib, result_cib, answer);
if ((rc == pcmk_ok) && pcmk__xe_is(input, PCMK_XE_CIB)) {
sync_in_progress = 0;
}
return rc;
}
// @COMPAT: Remove when PCMK__CIB_REQUEST_ABS_DELETE is removed
int
cib_process_delete_absolute(const char *op, int options, const char *section, xmlNode * req,
xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib,
xmlNode ** answer)
{
return -EINVAL;
}
static xmlNode *
cib_msg_copy(xmlNode *msg)
{
static const char *field_list[] = {
PCMK__XA_T,
PCMK__XA_CIB_CLIENTID,
PCMK__XA_CIB_CALLOPT,
PCMK__XA_CIB_CALLID,
PCMK__XA_CIB_OP,
PCMK__XA_CIB_ISREPLYTO,
PCMK__XA_CIB_SECTION,
PCMK__XA_CIB_HOST,
PCMK__XA_CIB_RC,
PCMK__XA_CIB_DELEGATED_FROM,
PCMK__XA_CIB_UPDATE,
PCMK__XA_CIB_CLIENTNAME,
PCMK__XA_CIB_USER,
PCMK__XA_CIB_NOTIFY_TYPE,
PCMK__XA_CIB_NOTIFY_ACTIVATE,
};
xmlNode *copy = pcmk__xe_create(NULL, PCMK__XE_COPY);
for (int lpc = 0; lpc < PCMK__NELEM(field_list); lpc++) {
const char *field = field_list[lpc];
const char *value = crm_element_value(msg, field);
if (value != NULL) {
crm_xml_add(copy, field, value);
}
}
return copy;
}
int
sync_our_cib(xmlNode * request, gboolean all)
{
int result = pcmk_ok;
char *digest = NULL;
const char *host = crm_element_value(request, PCMK__XA_SRC);
const char *op = crm_element_value(request, PCMK__XA_CIB_OP);
pcmk__node_status_t *peer = NULL;
xmlNode *replace_request = NULL;
xmlNode *wrapper = NULL;
CRM_CHECK(the_cib != NULL, return -EINVAL);
CRM_CHECK(all || (host != NULL), return -EINVAL);
crm_debug("Syncing CIB to %s", all ? "all peers" : host);
replace_request = cib_msg_copy(request);
if (host != NULL) {
crm_xml_add(replace_request, PCMK__XA_CIB_ISREPLYTO, host);
}
if (all) {
pcmk__xe_remove_attr(replace_request, PCMK__XA_CIB_HOST);
}
crm_xml_add(replace_request, PCMK__XA_CIB_OP, PCMK__CIB_REQUEST_REPLACE);
// @TODO Keep for tracing, or drop?
crm_xml_add(replace_request, PCMK__XA_ORIGINAL_CIB_OP, op);
pcmk__xe_set_bool_attr(replace_request, PCMK__XA_CIB_UPDATE, true);
crm_xml_add(replace_request, PCMK_XA_CRM_FEATURE_SET, CRM_FEATURE_SET);
digest = pcmk__digest_xml(the_cib, true);
crm_xml_add(replace_request, PCMK__XA_DIGEST, digest);
wrapper = pcmk__xe_create(replace_request, PCMK__XE_CIB_CALLDATA);
pcmk__xml_copy(wrapper, the_cib);
if (!all) {
peer = pcmk__get_node(0, host, NULL, pcmk__node_search_cluster_member);
}
if (!pcmk__cluster_send_message(peer, pcmk__cluster_msg_based,
replace_request)) {
result = -ENOTCONN;
}
pcmk__xml_free(replace_request);
free(digest);
return result;
}
int
cib_process_commit_transaction(const char *op, int options, const char *section,
xmlNode *req, xmlNode *input,
xmlNode *existing_cib, xmlNode **result_cib,
xmlNode **answer)
{
/* On success, our caller will activate *result_cib locally, trigger a
* replace notification if appropriate, and sync *result_cib to all nodes.
* On failure, our caller will free *result_cib.
*/
int rc = pcmk_rc_ok;
const char *client_id = crm_element_value(req, PCMK__XA_CIB_CLIENTID);
const char *origin = crm_element_value(req, PCMK__XA_SRC);
pcmk__client_t *client = pcmk__find_client_by_id(client_id);
rc = based_commit_transaction(input, client, origin, result_cib);
if (rc != pcmk_rc_ok) {
char *source = based_transaction_source_str(client, origin);
crm_err("Could not commit transaction for %s: %s",
source, pcmk_rc_str(rc));
free(source);
}
return pcmk_rc2legacy(rc);
}
int
cib_process_schemas(const char *op, int options, const char *section, xmlNode *req,
xmlNode *input, xmlNode *existing_cib, xmlNode **result_cib,
xmlNode **answer)
{
xmlNode *wrapper = NULL;
xmlNode *data = NULL;
const char *after_ver = NULL;
GList *schemas = NULL;
GList *already_included = NULL;
*answer = pcmk__xe_create(NULL, PCMK__XA_SCHEMAS);
wrapper = pcmk__xe_first_child(req, PCMK__XE_CIB_CALLDATA, NULL, NULL);
data = pcmk__xe_first_child(wrapper, NULL, NULL, NULL);
if (data == NULL) {
crm_warn("No data specified in request");
return -EPROTO;
}
after_ver = crm_element_value(data, PCMK_XA_VERSION);
if (after_ver == NULL) {
crm_warn("No version specified in request");
return -EPROTO;
}
/* The client requested all schemas after the latest one we know about, which
* means the client is fully up-to-date. Return a properly formatted reply
* with no schemas.
*/
if (pcmk__str_eq(after_ver, pcmk__highest_schema_name(), pcmk__str_none)) {
return pcmk_ok;
}
schemas = pcmk__schema_files_later_than(after_ver);
for (GList *iter = schemas; iter != NULL; iter = iter->next) {
pcmk__build_schema_xml_node(*answer, iter->data, &already_included);
}
g_list_free_full(schemas, free);
g_list_free_full(already_included, free);
return pcmk_ok;
}
diff --git a/daemons/based/pacemaker-based.h b/daemons/based/pacemaker-based.h
index 127d125d31..6de1dfb227 100644
--- a/daemons/based/pacemaker-based.h
+++ b/daemons/based/pacemaker-based.h
@@ -1,138 +1,138 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef PACEMAKER_BASED__H
# define PACEMAKER_BASED__H
#include <stdio.h>
#include <sys/types.h>
#include <unistd.h>
#include <stdlib.h>
#include <glib.h>
#include <errno.h>
#include <fcntl.h>
#include <glib.h>
#include <libxml/tree.h>
#include <crm/crm.h>
#include <crm/cib.h>
#include <crm/common/xml.h>
#include <crm/cluster.h>
#include <crm/common/ipc_internal.h>
#include <crm/common/mainloop.h>
#include <crm/cib/internal.h>
#include "based_transaction.h"
#include <gnutls/gnutls.h>
-#define OUR_NODENAME (stand_alone? "localhost" : crm_cluster->uname)
+#define OUR_NODENAME (stand_alone? "localhost" : crm_cluster->priv->node_name)
// CIB-specific client flags
enum cib_client_flags {
// Notifications
cib_notify_pre = (UINT64_C(1) << 0),
cib_notify_post = (UINT64_C(1) << 1),
cib_notify_confirm = (UINT64_C(1) << 3),
cib_notify_diff = (UINT64_C(1) << 4),
// Whether client is another cluster daemon
cib_is_daemon = (UINT64_C(1) << 12),
};
extern bool based_is_primary;
extern GHashTable *config_hash;
extern xmlNode *the_cib;
extern crm_trigger_t *cib_writer;
extern gboolean cib_writes_enabled;
extern GMainLoop *mainloop;
extern pcmk_cluster_t *crm_cluster;
extern gboolean stand_alone;
extern gboolean cib_shutdown_flag;
extern gchar *cib_root;
extern int cib_status;
extern struct qb_ipcs_service_handlers ipc_ro_callbacks;
extern struct qb_ipcs_service_handlers ipc_rw_callbacks;
extern qb_ipcs_service_t *ipcs_ro;
extern qb_ipcs_service_t *ipcs_rw;
extern qb_ipcs_service_t *ipcs_shm;
void cib_peer_callback(xmlNode *msg, void *private_data);
void cib_common_callback_worker(uint32_t id, uint32_t flags,
xmlNode *op_request, pcmk__client_t *cib_client,
gboolean privileged);
int cib_process_request(xmlNode *request, gboolean privileged,
const pcmk__client_t *cib_client);
void cib_shutdown(int nsig);
void terminate_cib(const char *caller, int fast);
gboolean uninitializeCib(void);
xmlNode *readCibXmlFile(const char *dir, const char *file,
gboolean discard_status);
int activateCibXml(xmlNode *doc, gboolean to_disk, const char *op);
int cib_process_shutdown_req(const char *op, int options, const char *section,
xmlNode *req, xmlNode *input,
xmlNode *existing_cib, xmlNode **result_cib,
xmlNode **answer);
int cib_process_noop(const char *op, int options, const char *section,
xmlNode *req, xmlNode *input, xmlNode *existing_cib,
xmlNode **result_cib, xmlNode **answer);
int cib_process_ping(const char *op, int options, const char *section,
xmlNode *req, xmlNode *input, xmlNode *existing_cib,
xmlNode **result_cib, xmlNode **answer);
int cib_process_readwrite(const char *op, int options, const char *section,
xmlNode *req, xmlNode *input, xmlNode *existing_cib,
xmlNode **result_cib, xmlNode **answer);
int cib_process_replace_svr(const char *op, int options, const char *section,
xmlNode *req, xmlNode *input, xmlNode *existing_cib,
xmlNode **result_cib, xmlNode **answer);
int cib_server_process_diff(const char *op, int options, const char *section,
xmlNode *req, xmlNode *input, xmlNode *existing_cib,
xmlNode **result_cib, xmlNode **answer);
int cib_process_sync(const char *op, int options, const char *section,
xmlNode *req, xmlNode *input, xmlNode *existing_cib,
xmlNode **result_cib, xmlNode **answer);
int cib_process_sync_one(const char *op, int options, const char *section,
xmlNode *req, xmlNode *input, xmlNode *existing_cib,
xmlNode **result_cib, xmlNode **answer);
int cib_process_delete_absolute(const char *op, int options,
const char *section, xmlNode *req,
xmlNode *input, xmlNode *existing_cib,
xmlNode **result_cib, xmlNode **answer);
int cib_process_upgrade_server(const char *op, int options, const char *section,
xmlNode *req, xmlNode *input,
xmlNode *existing_cib, xmlNode **result_cib,
xmlNode **answer);
int cib_process_commit_transaction(const char *op, int options,
const char *section, xmlNode *req,
xmlNode *input, xmlNode *existing_cib,
xmlNode **result_cib, xmlNode **answer);
int cib_process_schemas(const char *op, int options, const char *section,
xmlNode *req, xmlNode *input, xmlNode *existing_cib,
xmlNode **result_cib, xmlNode **answer);
void send_sync_request(const char *host);
int sync_our_cib(xmlNode *request, gboolean all);
cib__op_fn_t based_get_op_function(const cib__operation_t *operation);
void cib_diff_notify(const char *op, int result, const char *call_id,
const char *client_id, const char *client_name,
const char *origin, xmlNode *update, xmlNode *diff);
static inline const char *
cib_config_lookup(const char *opt)
{
return g_hash_table_lookup(config_hash, opt);
}
#endif // PACEMAKER_BASED__H
diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c
index d10d849195..daca127ff2 100644
--- a/daemons/controld/controld_control.c
+++ b/daemons/controld/controld_control.c
@@ -1,697 +1,697 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <crm/crm.h>
#include <crm/common/xml.h>
#include <crm/pengine/rules.h>
#include <crm/cluster/internal.h>
#include <crm/cluster/election_internal.h>
#include <crm/common/ipc_internal.h>
#include <pacemaker-controld.h>
static qb_ipcs_service_t *ipcs = NULL;
static crm_trigger_t *config_read_trigger = NULL;
#if SUPPORT_COROSYNC
extern gboolean crm_connect_corosync(pcmk_cluster_t *cluster);
#endif
static void crm_shutdown(int nsig);
static gboolean crm_read_options(gpointer user_data);
/* A_HA_CONNECT */
void
do_ha_control(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
gboolean registered = FALSE;
static pcmk_cluster_t *cluster = NULL;
if (cluster == NULL) {
cluster = pcmk_cluster_new();
}
if (action & A_HA_DISCONNECT) {
pcmk_cluster_disconnect(cluster);
crm_info("Disconnected from the cluster");
controld_set_fsa_input_flags(R_HA_DISCONNECTED);
}
if (action & A_HA_CONNECT) {
pcmk__cluster_set_status_callback(&peer_update_callback);
pcmk__cluster_set_autoreap(false);
#if SUPPORT_COROSYNC
if (pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync) {
registered = crm_connect_corosync(cluster);
}
#endif // SUPPORT_COROSYNC
if (registered) {
pcmk__node_status_t *node =
- pcmk__get_node(cluster->nodeid, cluster->uname, NULL,
+ pcmk__get_node(cluster->nodeid, cluster->priv->node_name, NULL,
pcmk__node_search_cluster_member);
- controld_election_init(cluster->uname);
- controld_globals.our_nodename = cluster->uname;
+ controld_election_init(cluster->priv->node_name);
+ controld_globals.our_nodename = cluster->priv->node_name;
free(controld_globals.our_uuid);
controld_globals.our_uuid =
pcmk__str_copy(pcmk__cluster_node_uuid(node));
if (controld_globals.our_uuid == NULL) {
crm_err("Could not obtain local uuid");
registered = FALSE;
}
}
if (!registered) {
controld_set_fsa_input_flags(R_HA_DISCONNECTED);
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
return;
}
populate_cib_nodes(node_update_none, __func__);
controld_clear_fsa_input_flags(R_HA_DISCONNECTED);
crm_info("Connected to the cluster");
}
if (action & ~(A_HA_CONNECT | A_HA_DISCONNECT)) {
crm_err("Unexpected action %s in %s", fsa_action2string(action),
__func__);
}
}
/* A_SHUTDOWN */
void
do_shutdown(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
/* just in case */
controld_set_fsa_input_flags(R_SHUTDOWN);
controld_disconnect_fencer(FALSE);
}
/* A_SHUTDOWN_REQ */
void
do_shutdown_req(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
xmlNode *msg = NULL;
controld_set_fsa_input_flags(R_SHUTDOWN);
//controld_set_fsa_input_flags(R_STAYDOWN);
crm_info("Sending shutdown request to all peers (DC is %s)",
pcmk__s(controld_globals.dc_name, "not set"));
msg = create_request(CRM_OP_SHUTDOWN_REQ, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
if (!pcmk__cluster_send_message(NULL, pcmk__cluster_msg_controld, msg)) {
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
pcmk__xml_free(msg);
}
void
crmd_fast_exit(crm_exit_t exit_code)
{
if (pcmk_is_set(controld_globals.fsa_input_register, R_STAYDOWN)) {
crm_warn("Inhibiting respawn " QB_XS " remapping exit code %d to %d",
exit_code, CRM_EX_FATAL);
exit_code = CRM_EX_FATAL;
} else if ((exit_code == CRM_EX_OK)
&& pcmk_is_set(controld_globals.fsa_input_register,
R_IN_RECOVERY)) {
crm_err("Could not recover from internal error");
exit_code = CRM_EX_ERROR;
}
if (controld_globals.logger_out != NULL) {
controld_globals.logger_out->finish(controld_globals.logger_out,
exit_code, true, NULL);
pcmk__output_free(controld_globals.logger_out);
controld_globals.logger_out = NULL;
}
crm_exit(exit_code);
}
crm_exit_t
crmd_exit(crm_exit_t exit_code)
{
GMainLoop *mloop = controld_globals.mainloop;
static bool in_progress = FALSE;
if (in_progress && (exit_code == CRM_EX_OK)) {
crm_debug("Exit is already in progress");
return exit_code;
} else if(in_progress) {
crm_notice("Error during shutdown process, exiting now with status %d (%s)",
exit_code, crm_exit_str(exit_code));
crm_write_blackbox(SIGTRAP, NULL);
crmd_fast_exit(exit_code);
}
in_progress = TRUE;
crm_trace("Preparing to exit with status %d (%s)",
exit_code, crm_exit_str(exit_code));
/* Suppress secondary errors resulting from us disconnecting everything */
controld_set_fsa_input_flags(R_HA_DISCONNECTED);
/* Close all IPC servers and clients to ensure any and all shared memory files are cleaned up */
if(ipcs) {
crm_trace("Closing IPC server");
mainloop_del_ipc_server(ipcs);
ipcs = NULL;
}
controld_close_attrd_ipc();
controld_shutdown_schedulerd_ipc();
controld_disconnect_fencer(TRUE);
if ((exit_code == CRM_EX_OK) && (controld_globals.mainloop == NULL)) {
crm_debug("No mainloop detected");
exit_code = CRM_EX_ERROR;
}
/* On an error, just get out.
*
* Otherwise, make the effort to have mainloop exit gracefully so
* that it (mostly) cleans up after itself and valgrind has less
* to report on - allowing real errors stand out
*/
if (exit_code != CRM_EX_OK) {
crm_notice("Forcing immediate exit with status %d (%s)",
exit_code, crm_exit_str(exit_code));
crm_write_blackbox(SIGTRAP, NULL);
crmd_fast_exit(exit_code);
}
/* Clean up as much memory as possible for valgrind */
for (GList *iter = controld_globals.fsa_message_queue; iter != NULL;
iter = iter->next) {
fsa_data_t *fsa_data = (fsa_data_t *) iter->data;
crm_info("Dropping %s: [ state=%s cause=%s origin=%s ]",
fsa_input2string(fsa_data->fsa_input),
fsa_state2string(controld_globals.fsa_state),
fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin);
delete_fsa_input(fsa_data);
}
controld_clear_fsa_input_flags(R_MEMBERSHIP);
g_list_free(controld_globals.fsa_message_queue);
controld_globals.fsa_message_queue = NULL;
controld_free_node_pending_timers();
controld_election_fini();
/* Tear down the CIB manager connection, but don't free it yet -- it could
* be used when we drain the mainloop later.
*/
controld_disconnect_cib_manager();
verify_stopped(controld_globals.fsa_state, LOG_WARNING);
controld_clear_fsa_input_flags(R_LRM_CONNECTED);
lrm_state_destroy_all();
mainloop_destroy_trigger(config_read_trigger);
config_read_trigger = NULL;
controld_destroy_fsa_trigger();
controld_destroy_transition_trigger();
pcmk__client_cleanup();
pcmk__cluster_destroy_node_caches();
controld_free_fsa_timers();
te_cleanup_stonith_history_sync(NULL, TRUE);
controld_free_sched_timer();
free(controld_globals.our_nodename);
controld_globals.our_nodename = NULL;
free(controld_globals.our_uuid);
controld_globals.our_uuid = NULL;
free(controld_globals.dc_name);
controld_globals.dc_name = NULL;
free(controld_globals.dc_version);
controld_globals.dc_version = NULL;
free(controld_globals.cluster_name);
controld_globals.cluster_name = NULL;
free(controld_globals.te_uuid);
controld_globals.te_uuid = NULL;
free_max_generation();
controld_destroy_failed_sync_table();
controld_destroy_outside_events_table();
mainloop_destroy_signal(SIGPIPE);
mainloop_destroy_signal(SIGUSR1);
mainloop_destroy_signal(SIGTERM);
mainloop_destroy_signal(SIGTRAP);
/* leave SIGCHLD engaged as we might still want to drain some service-actions */
if (mloop) {
GMainContext *ctx = g_main_loop_get_context(controld_globals.mainloop);
/* Don't re-enter this block */
controld_globals.mainloop = NULL;
/* no signals on final draining anymore */
mainloop_destroy_signal(SIGCHLD);
crm_trace("Draining mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx));
{
int lpc = 0;
while((g_main_context_pending(ctx) && lpc < 10)) {
lpc++;
crm_trace("Iteration %d", lpc);
g_main_context_dispatch(ctx);
}
}
crm_trace("Closing mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx));
g_main_loop_quit(mloop);
/* Won't do anything yet, since we're inside it now */
g_main_loop_unref(mloop);
} else {
mainloop_destroy_signal(SIGCHLD);
}
cib_delete(controld_globals.cib_conn);
controld_globals.cib_conn = NULL;
throttle_fini();
/* Graceful */
crm_trace("Done preparing for exit with status %d (%s)",
exit_code, crm_exit_str(exit_code));
return exit_code;
}
/* A_EXIT_0, A_EXIT_1 */
void
do_exit(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
crm_exit_t exit_code = CRM_EX_OK;
if (pcmk_is_set(action, A_EXIT_1)) {
exit_code = CRM_EX_ERROR;
crm_err("Exiting now due to errors");
}
verify_stopped(cur_state, LOG_ERR);
crmd_exit(exit_code);
}
static void sigpipe_ignore(int nsig) { return; }
/* A_STARTUP */
void
do_startup(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
crm_debug("Registering Signal Handlers");
mainloop_add_signal(SIGTERM, crm_shutdown);
mainloop_add_signal(SIGPIPE, sigpipe_ignore);
config_read_trigger = mainloop_add_trigger(G_PRIORITY_HIGH,
crm_read_options, NULL);
controld_init_fsa_trigger();
controld_init_transition_trigger();
crm_debug("Creating CIB manager and executor objects");
controld_globals.cib_conn = cib_new();
lrm_state_init_local();
if (controld_init_fsa_timers() == FALSE) {
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
}
// \return libqb error code (0 on success, -errno on error)
static int32_t
accept_controller_client(qb_ipcs_connection_t *c, uid_t uid, gid_t gid)
{
crm_trace("Accepting new IPC client connection");
if (pcmk__new_client(c, uid, gid) == NULL) {
return -ENOMEM;
}
return 0;
}
// \return libqb error code (0 on success, -errno on error)
static int32_t
dispatch_controller_ipc(qb_ipcs_connection_t * c, void *data, size_t size)
{
uint32_t id = 0;
uint32_t flags = 0;
pcmk__client_t *client = pcmk__find_client(c);
xmlNode *msg = pcmk__client_data2xml(client, data, &id, &flags);
if (msg == NULL) {
pcmk__ipc_send_ack(client, id, flags, PCMK__XE_ACK, NULL,
CRM_EX_PROTOCOL);
return 0;
}
pcmk__ipc_send_ack(client, id, flags, PCMK__XE_ACK, NULL,
CRM_EX_INDETERMINATE);
CRM_ASSERT(client->user != NULL);
pcmk__update_acl_user(msg, PCMK__XA_CRM_USER, client->user);
crm_xml_add(msg, PCMK__XA_CRM_SYS_FROM, client->id);
if (controld_authorize_ipc_message(msg, client, NULL)) {
crm_trace("Processing IPC message from client %s",
pcmk__client_name(client));
route_message(C_IPC_MESSAGE, msg);
}
controld_trigger_fsa();
pcmk__xml_free(msg);
return 0;
}
static int32_t
ipc_client_disconnected(qb_ipcs_connection_t *c)
{
pcmk__client_t *client = pcmk__find_client(c);
if (client) {
crm_trace("Disconnecting %sregistered client %s (%p/%p)",
(client->userdata? "" : "un"), pcmk__client_name(client),
c, client);
free(client->userdata);
pcmk__free_client(client);
controld_trigger_fsa();
}
return 0;
}
static void
ipc_connection_destroyed(qb_ipcs_connection_t *c)
{
crm_trace("Connection %p", c);
ipc_client_disconnected(c);
}
/* A_STOP */
void
do_stop(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
crm_trace("Closing IPC server");
mainloop_del_ipc_server(ipcs); ipcs = NULL;
register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL);
}
/* A_STARTED */
void
do_started(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
static struct qb_ipcs_service_handlers crmd_callbacks = {
.connection_accept = accept_controller_client,
.connection_created = NULL,
.msg_process = dispatch_controller_ipc,
.connection_closed = ipc_client_disconnected,
.connection_destroyed = ipc_connection_destroyed
};
if (cur_state != S_STARTING) {
crm_err("Start cancelled... %s", fsa_state2string(cur_state));
return;
} else if (!pcmk_is_set(controld_globals.fsa_input_register,
R_MEMBERSHIP)) {
crm_info("Delaying start, no membership data (%.16llx)", R_MEMBERSHIP);
crmd_fsa_stall(TRUE);
return;
} else if (!pcmk_is_set(controld_globals.fsa_input_register,
R_LRM_CONNECTED)) {
crm_info("Delaying start, not connected to executor (%.16llx)", R_LRM_CONNECTED);
crmd_fsa_stall(TRUE);
return;
} else if (!pcmk_is_set(controld_globals.fsa_input_register,
R_CIB_CONNECTED)) {
crm_info("Delaying start, CIB not connected (%.16llx)", R_CIB_CONNECTED);
crmd_fsa_stall(TRUE);
return;
} else if (!pcmk_is_set(controld_globals.fsa_input_register,
R_READ_CONFIG)) {
crm_info("Delaying start, Config not read (%.16llx)", R_READ_CONFIG);
crmd_fsa_stall(TRUE);
return;
} else if (!pcmk_is_set(controld_globals.fsa_input_register, R_PEER_DATA)) {
crm_info("Delaying start, No peer data (%.16llx)", R_PEER_DATA);
crmd_fsa_stall(TRUE);
return;
}
crm_debug("Init server comms");
ipcs = pcmk__serve_controld_ipc(&crmd_callbacks);
if (ipcs == NULL) {
crm_err("Failed to create IPC server: shutting down and inhibiting respawn");
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
} else {
crm_notice("Pacemaker controller successfully started and accepting connections");
}
controld_set_fsa_input_flags(R_ST_REQUIRED);
controld_timer_fencer_connect(GINT_TO_POINTER(TRUE));
controld_clear_fsa_input_flags(R_STARTING);
register_fsa_input(msg_data->fsa_cause, I_PENDING, NULL);
}
/* A_RECOVER */
void
do_recover(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
controld_set_fsa_input_flags(R_IN_RECOVERY);
crm_warn("Fast-tracking shutdown in response to errors");
register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL);
}
static void
config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
const char *value = NULL;
GHashTable *config_hash = NULL;
crm_time_t *now = crm_time_new(NULL);
xmlNode *crmconfig = NULL;
xmlNode *alerts = NULL;
if (rc != pcmk_ok) {
fsa_data_t *msg_data = NULL;
crm_err("Local CIB query resulted in an error: %s", pcmk_strerror(rc));
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
if (rc == -EACCES || rc == -pcmk_err_schema_validation) {
crm_err("The cluster is mis-configured - shutting down and staying down");
controld_set_fsa_input_flags(R_STAYDOWN);
}
goto bail;
}
crmconfig = output;
if ((crmconfig != NULL) && !pcmk__xe_is(crmconfig, PCMK_XE_CRM_CONFIG)) {
crmconfig = pcmk__xe_first_child(crmconfig, PCMK_XE_CRM_CONFIG, NULL,
NULL);
}
if (!crmconfig) {
fsa_data_t *msg_data = NULL;
crm_err("Local CIB query for " PCMK_XE_CRM_CONFIG " section failed");
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
goto bail;
}
crm_debug("Call %d : Parsing CIB options", call_id);
config_hash = pcmk__strkey_table(free, free);
pe_unpack_nvpairs(crmconfig, crmconfig, PCMK_XE_CLUSTER_PROPERTY_SET, NULL,
config_hash, PCMK_VALUE_CIB_BOOTSTRAP_OPTIONS, FALSE, now,
NULL);
// Validate all options, and use defaults if not already present in hash
pcmk__validate_cluster_options(config_hash);
/* Validate the watchdog timeout in the context of the local node
* environment. If invalid, the controller will exit with a fatal error.
*
* We do this via a wrapper in the controller, so that we call
* pcmk__valid_stonith_watchdog_timeout() only if watchdog fencing is
* enabled for the local node. Otherwise, we may exit unnecessarily.
*
* A validator function in libcrmcommon can't act as such a wrapper, because
* it doesn't have a stonith API connection or the local node name.
*/
value = g_hash_table_lookup(config_hash, PCMK_OPT_STONITH_WATCHDOG_TIMEOUT);
controld_verify_stonith_watchdog_timeout(value);
value = g_hash_table_lookup(config_hash, PCMK_OPT_NO_QUORUM_POLICY);
if (pcmk__str_eq(value, PCMK_VALUE_FENCE_LEGACY, pcmk__str_casei)
&& (pcmk__locate_sbd() != 0)) {
controld_set_global_flags(controld_no_quorum_suicide);
}
value = g_hash_table_lookup(config_hash, PCMK_OPT_SHUTDOWN_LOCK);
if (crm_is_true(value)) {
controld_set_global_flags(controld_shutdown_lock_enabled);
} else {
controld_clear_global_flags(controld_shutdown_lock_enabled);
}
value = g_hash_table_lookup(config_hash, PCMK_OPT_SHUTDOWN_LOCK_LIMIT);
pcmk_parse_interval_spec(value, &controld_globals.shutdown_lock_limit);
controld_globals.shutdown_lock_limit /= 1000;
value = g_hash_table_lookup(config_hash, PCMK_OPT_NODE_PENDING_TIMEOUT);
pcmk_parse_interval_spec(value, &controld_globals.node_pending_timeout);
controld_globals.node_pending_timeout /= 1000;
value = g_hash_table_lookup(config_hash, PCMK_OPT_CLUSTER_NAME);
pcmk__str_update(&(controld_globals.cluster_name), value);
// Let subcomponents initialize their own static variables
controld_configure_election(config_hash);
controld_configure_fencing(config_hash);
controld_configure_fsa_timers(config_hash);
controld_configure_throttle(config_hash);
alerts = pcmk__xe_first_child(output, PCMK_XE_ALERTS, NULL, NULL);
crmd_unpack_alerts(alerts);
controld_set_fsa_input_flags(R_READ_CONFIG);
controld_trigger_fsa();
g_hash_table_destroy(config_hash);
bail:
crm_time_free(now);
}
/*!
* \internal
* \brief Trigger read and processing of the configuration
*
* \param[in] fn Calling function name
* \param[in] line Line number where call occurred
*/
void
controld_trigger_config_as(const char *fn, int line)
{
if (config_read_trigger != NULL) {
crm_trace("%s:%d - Triggered config processing", fn, line);
mainloop_set_trigger(config_read_trigger);
}
}
gboolean
crm_read_options(gpointer user_data)
{
cib_t *cib_conn = controld_globals.cib_conn;
int call_id = cib_conn->cmds->query(cib_conn,
"//" PCMK_XE_CRM_CONFIG
" | //" PCMK_XE_ALERTS,
NULL, cib_xpath);
fsa_register_cib_callback(call_id, NULL, config_query_callback);
crm_trace("Querying the CIB... call %d", call_id);
return TRUE;
}
/* A_READCONFIG */
void
do_read_config(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
throttle_init();
controld_trigger_config();
}
static void
crm_shutdown(int nsig)
{
const char *value = NULL;
guint default_period_ms = 0;
if ((controld_globals.mainloop == NULL)
|| !g_main_loop_is_running(controld_globals.mainloop)) {
crmd_exit(CRM_EX_OK);
return;
}
if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
crm_err("Escalating shutdown");
register_fsa_input_before(C_SHUTDOWN, I_ERROR, NULL);
return;
}
controld_set_fsa_input_flags(R_SHUTDOWN);
register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL);
/* If shutdown timer doesn't have a period set, use the default
*
* @TODO: Evaluate whether this is still necessary. As long as
* config_query_callback() has been run at least once, it doesn't look like
* anything could have changed the timer period since then.
*/
value = pcmk__cluster_option(NULL, PCMK_OPT_SHUTDOWN_ESCALATION);
pcmk_parse_interval_spec(value, &default_period_ms);
controld_shutdown_start_countdown(default_period_ms);
}
diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c
index 323fbd5440..db974ef769 100644
--- a/daemons/fenced/pacemaker-fenced.c
+++ b/daemons/fenced/pacemaker-fenced.c
@@ -1,678 +1,678 @@
/*
* Copyright 2009-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <sys/utsname.h>
#include <stdlib.h>
#include <errno.h>
#include <fcntl.h>
#include <inttypes.h> // PRIu32, PRIx32
#include <crm/crm.h>
#include <crm/common/cmdline_internal.h>
#include <crm/common/ipc.h>
#include <crm/common/ipc_internal.h>
#include <crm/common/output_internal.h>
#include <crm/stonith-ng.h>
#include <crm/fencing/internal.h>
#include <crm/common/xml.h>
#include <crm/common/xml_internal.h>
#include <crm/common/mainloop.h>
#include <crm/cib/internal.h>
#include <pacemaker-fenced.h>
#define SUMMARY "daemon for executing fencing devices in a Pacemaker cluster"
char *stonith_our_uname = NULL;
long long stonith_watchdog_timeout_ms = 0;
GList *stonith_watchdog_targets = NULL;
static GMainLoop *mainloop = NULL;
gboolean stand_alone = FALSE;
gboolean stonith_shutdown_flag = FALSE;
static qb_ipcs_service_t *ipcs = NULL;
static pcmk__output_t *out = NULL;
pcmk__supported_format_t formats[] = {
PCMK__SUPPORTED_FORMAT_NONE,
PCMK__SUPPORTED_FORMAT_TEXT,
PCMK__SUPPORTED_FORMAT_XML,
{ NULL, NULL, NULL }
};
static struct {
bool no_cib_connect;
gchar **log_files;
} options;
crm_exit_t exit_code = CRM_EX_OK;
static void stonith_cleanup(void);
static int32_t
st_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
{
if (stonith_shutdown_flag) {
crm_info("Ignoring new client [%d] during shutdown",
pcmk__client_pid(c));
return -ECONNREFUSED;
}
if (pcmk__new_client(c, uid, gid) == NULL) {
return -ENOMEM;
}
return 0;
}
/* Exit code means? */
static int32_t
st_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size)
{
uint32_t id = 0;
uint32_t flags = 0;
int call_options = 0;
xmlNode *request = NULL;
pcmk__client_t *c = pcmk__find_client(qbc);
const char *op = NULL;
if (c == NULL) {
crm_info("Invalid client: %p", qbc);
return 0;
}
request = pcmk__client_data2xml(c, data, &id, &flags);
if (request == NULL) {
pcmk__ipc_send_ack(c, id, flags, PCMK__XE_NACK, NULL, CRM_EX_PROTOCOL);
return 0;
}
op = crm_element_value(request, PCMK__XA_CRM_TASK);
if(pcmk__str_eq(op, CRM_OP_RM_NODE_CACHE, pcmk__str_casei)) {
crm_xml_add(request, PCMK__XA_T, PCMK__VALUE_STONITH_NG);
crm_xml_add(request, PCMK__XA_ST_OP, op);
crm_xml_add(request, PCMK__XA_ST_CLIENTID, c->id);
crm_xml_add(request, PCMK__XA_ST_CLIENTNAME, pcmk__client_name(c));
crm_xml_add(request, PCMK__XA_ST_CLIENTNODE, stonith_our_uname);
pcmk__cluster_send_message(NULL, pcmk__cluster_msg_fenced, request);
pcmk__xml_free(request);
return 0;
}
if (c->name == NULL) {
const char *value = crm_element_value(request, PCMK__XA_ST_CLIENTNAME);
c->name = crm_strdup_printf("%s.%u", pcmk__s(value, "unknown"), c->pid);
}
crm_element_value_int(request, PCMK__XA_ST_CALLOPT, &call_options);
crm_trace("Flags %#08" PRIx32 "/%#08x for command %" PRIu32
" from client %s", flags, call_options, id, pcmk__client_name(c));
if (pcmk_is_set(call_options, st_opt_sync_call)) {
CRM_ASSERT(flags & crm_ipc_client_response);
CRM_LOG_ASSERT(c->request_id == 0); /* This means the client has two synchronous events in-flight */
c->request_id = id; /* Reply only to the last one */
}
crm_xml_add(request, PCMK__XA_ST_CLIENTID, c->id);
crm_xml_add(request, PCMK__XA_ST_CLIENTNAME, pcmk__client_name(c));
crm_xml_add(request, PCMK__XA_ST_CLIENTNODE, stonith_our_uname);
crm_log_xml_trace(request, "ipc-received");
stonith_command(c, id, flags, request, NULL);
pcmk__xml_free(request);
return 0;
}
/* Error code means? */
static int32_t
st_ipc_closed(qb_ipcs_connection_t * c)
{
pcmk__client_t *client = pcmk__find_client(c);
if (client == NULL) {
return 0;
}
crm_trace("Connection %p closed", c);
pcmk__free_client(client);
/* 0 means: yes, go ahead and destroy the connection */
return 0;
}
static void
st_ipc_destroy(qb_ipcs_connection_t * c)
{
crm_trace("Connection %p destroyed", c);
st_ipc_closed(c);
}
static void
stonith_peer_callback(xmlNode * msg, void *private_data)
{
const char *remote_peer = crm_element_value(msg, PCMK__XA_SRC);
const char *op = crm_element_value(msg, PCMK__XA_ST_OP);
if (pcmk__str_eq(op, STONITH_OP_POKE, pcmk__str_none)) {
return;
}
crm_log_xml_trace(msg, "Peer[inbound]");
stonith_command(NULL, 0, 0, msg, remote_peer);
}
#if SUPPORT_COROSYNC
static void
stonith_peer_ais_callback(cpg_handle_t handle,
const struct cpg_name *groupName,
uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len)
{
xmlNode *xml = NULL;
const char *from = NULL;
char *data = pcmk__cpg_message_data(handle, nodeid, pid, msg, &from);
if(data == NULL) {
return;
}
xml = pcmk__xml_parse(data);
if (xml == NULL) {
crm_err("Invalid XML: '%.120s'", data);
free(data);
return;
}
crm_xml_add(xml, PCMK__XA_SRC, from);
stonith_peer_callback(xml, NULL);
pcmk__xml_free(xml);
free(data);
}
static void
stonith_peer_cs_destroy(gpointer user_data)
{
crm_crit("Lost connection to cluster layer, shutting down");
stonith_shutdown(0);
}
#endif
void
do_local_reply(const xmlNode *notify_src, pcmk__client_t *client,
int call_options)
{
/* send callback to originating child */
int local_rc = pcmk_rc_ok;
int rid = 0;
uint32_t ipc_flags = crm_ipc_server_event;
if (pcmk_is_set(call_options, st_opt_sync_call)) {
CRM_LOG_ASSERT(client->request_id);
rid = client->request_id;
client->request_id = 0;
ipc_flags = crm_ipc_flags_none;
}
local_rc = pcmk__ipc_send_xml(client, rid, notify_src, ipc_flags);
if (local_rc == pcmk_rc_ok) {
crm_trace("Sent response %d to client %s",
rid, pcmk__client_name(client));
} else {
crm_warn("%synchronous reply to client %s failed: %s",
(pcmk_is_set(call_options, st_opt_sync_call)? "S" : "As"),
pcmk__client_name(client), pcmk_rc_str(local_rc));
}
}
uint64_t
get_stonith_flag(const char *name)
{
if (pcmk__str_eq(name, PCMK__VALUE_ST_NOTIFY_FENCE, pcmk__str_none)) {
return st_callback_notify_fence;
} else if (pcmk__str_eq(name, STONITH_OP_DEVICE_ADD, pcmk__str_casei)) {
return st_callback_device_add;
} else if (pcmk__str_eq(name, STONITH_OP_DEVICE_DEL, pcmk__str_casei)) {
return st_callback_device_del;
} else if (pcmk__str_eq(name, PCMK__VALUE_ST_NOTIFY_HISTORY,
pcmk__str_none)) {
return st_callback_notify_history;
} else if (pcmk__str_eq(name, PCMK__VALUE_ST_NOTIFY_HISTORY_SYNCED,
pcmk__str_none)) {
return st_callback_notify_history_synced;
}
return st_callback_unknown;
}
static void
stonith_notify_client(gpointer key, gpointer value, gpointer user_data)
{
const xmlNode *update_msg = user_data;
pcmk__client_t *client = value;
const char *type = NULL;
CRM_CHECK(client != NULL, return);
CRM_CHECK(update_msg != NULL, return);
type = crm_element_value(update_msg, PCMK__XA_SUBT);
CRM_CHECK(type != NULL, crm_log_xml_err(update_msg, "notify"); return);
if (client->ipcs == NULL) {
crm_trace("Skipping client with NULL channel");
return;
}
if (pcmk_is_set(client->flags, get_stonith_flag(type))) {
int rc = pcmk__ipc_send_xml(client, 0, update_msg,
crm_ipc_server_event);
if (rc != pcmk_rc_ok) {
crm_warn("%s notification of client %s failed: %s "
QB_XS " id=%.8s rc=%d", type, pcmk__client_name(client),
pcmk_rc_str(rc), client->id, rc);
} else {
crm_trace("Sent %s notification to client %s",
type, pcmk__client_name(client));
}
}
}
void
do_stonith_async_timeout_update(const char *client_id, const char *call_id, int timeout)
{
pcmk__client_t *client = NULL;
xmlNode *notify_data = NULL;
if (!timeout || !call_id || !client_id) {
return;
}
client = pcmk__find_client_by_id(client_id);
if (!client) {
return;
}
notify_data = pcmk__xe_create(NULL, PCMK__XE_ST_ASYNC_TIMEOUT_VALUE);
crm_xml_add(notify_data, PCMK__XA_T, PCMK__VALUE_ST_ASYNC_TIMEOUT_VALUE);
crm_xml_add(notify_data, PCMK__XA_ST_CALLID, call_id);
crm_xml_add_int(notify_data, PCMK__XA_ST_TIMEOUT, timeout);
crm_trace("timeout update is %d for client %s and call id %s", timeout, client_id, call_id);
if (client) {
pcmk__ipc_send_xml(client, 0, notify_data, crm_ipc_server_event);
}
pcmk__xml_free(notify_data);
}
/*!
* \internal
* \brief Notify relevant IPC clients of a fencing operation result
*
* \param[in] type Notification type
* \param[in] result Result of fencing operation (assume success if NULL)
* \param[in] data If not NULL, add to notification as call data
*/
void
fenced_send_notification(const char *type, const pcmk__action_result_t *result,
xmlNode *data)
{
/* TODO: Standardize the contents of data */
xmlNode *update_msg = pcmk__xe_create(NULL, PCMK__XE_NOTIFY);
CRM_LOG_ASSERT(type != NULL);
crm_xml_add(update_msg, PCMK__XA_T, PCMK__VALUE_ST_NOTIFY);
crm_xml_add(update_msg, PCMK__XA_SUBT, type);
crm_xml_add(update_msg, PCMK__XA_ST_OP, type);
stonith__xe_set_result(update_msg, result);
if (data != NULL) {
xmlNode *wrapper = pcmk__xe_create(update_msg, PCMK__XE_ST_CALLDATA);
pcmk__xml_copy(wrapper, data);
}
crm_trace("Notifying clients");
pcmk__foreach_ipc_client(stonith_notify_client, update_msg);
pcmk__xml_free(update_msg);
crm_trace("Notify complete");
}
/*!
* \internal
* \brief Send notifications for a configuration change to subscribed clients
*
* \param[in] op Notification type (\c STONITH_OP_DEVICE_ADD,
* \c STONITH_OP_DEVICE_DEL, \c STONITH_OP_LEVEL_ADD, or
* \c STONITH_OP_LEVEL_DEL)
* \param[in] result Operation result
* \param[in] desc Description of what changed (either device ID or string
* representation of level
* (<tt><target>[<level_index>]</tt>))
*/
void
fenced_send_config_notification(const char *op,
const pcmk__action_result_t *result,
const char *desc)
{
xmlNode *notify_data = pcmk__xe_create(NULL, op);
crm_xml_add(notify_data, PCMK__XA_ST_DEVICE_ID, desc);
fenced_send_notification(op, result, notify_data);
pcmk__xml_free(notify_data);
}
/*!
* \internal
* \brief Check whether a node does watchdog-fencing
*
* \param[in] node Name of node to check
*
* \return TRUE if node found in stonith_watchdog_targets
* or stonith_watchdog_targets is empty indicating
* all nodes are doing watchdog-fencing
*/
gboolean
node_does_watchdog_fencing(const char *node)
{
return ((stonith_watchdog_targets == NULL) ||
pcmk__str_in_list(node, stonith_watchdog_targets, pcmk__str_casei));
}
void
stonith_shutdown(int nsig)
{
crm_info("Terminating with %d clients", pcmk__ipc_client_count());
stonith_shutdown_flag = TRUE;
if (mainloop != NULL && g_main_loop_is_running(mainloop)) {
g_main_loop_quit(mainloop);
}
}
static void
stonith_cleanup(void)
{
fenced_cib_cleanup();
if (ipcs) {
qb_ipcs_destroy(ipcs);
}
pcmk__cluster_destroy_node_caches();
pcmk__client_cleanup();
free_stonith_remote_op_list();
free_topology_list();
free_device_list();
free_metadata_cache();
fenced_unregister_handlers();
free(stonith_our_uname);
stonith_our_uname = NULL;
}
static gboolean
stand_alone_cpg_cb(const gchar *option_name, const gchar *optarg, gpointer data,
GError **error)
{
stand_alone = FALSE;
options.no_cib_connect = true;
return TRUE;
}
struct qb_ipcs_service_handlers ipc_callbacks = {
.connection_accept = st_ipc_accept,
.connection_created = NULL,
.msg_process = st_ipc_dispatch,
.connection_closed = st_ipc_closed,
.connection_destroyed = st_ipc_destroy
};
/*!
* \internal
* \brief Callback for peer status changes
*
* \param[in] type What changed
* \param[in] node What peer had the change
* \param[in] data Previous value of what changed
*/
static void
st_peer_update_callback(enum pcmk__node_update type, pcmk__node_status_t *node,
const void *data)
{
if ((type != pcmk__node_update_processes)
&& !pcmk_is_set(node->flags, pcmk__node_status_remote)) {
/*
* This is a hack until we can send to a nodeid and/or we fix node name lookups
* These messages are ignored in stonith_peer_callback()
*/
xmlNode *query = pcmk__xe_create(NULL, PCMK__XE_STONITH_COMMAND);
crm_xml_add(query, PCMK__XA_T, PCMK__VALUE_STONITH_NG);
crm_xml_add(query, PCMK__XA_ST_OP, STONITH_OP_POKE);
crm_debug("Broadcasting our uname because of node %" PRIu32,
node->cluster_layer_id);
pcmk__cluster_send_message(NULL, pcmk__cluster_msg_fenced, query);
pcmk__xml_free(query);
}
}
/* @COMPAT Deprecated since 2.1.8. Use pcmk_list_fence_attrs() or
* crm_resource --list-options=fencing instead of querying daemon metadata.
*/
static int
fencer_metadata(void)
{
const char *name = "pacemaker-fenced";
const char *desc_short = N_("Instance attributes available for all "
"\"stonith\"-class resources");
const char *desc_long = N_("Instance attributes available for all "
"\"stonith\"-class resources and used by "
"Pacemaker's fence daemon");
return pcmk__daemon_metadata(out, name, desc_short, desc_long,
pcmk__opt_fencing);
}
static GOptionEntry entries[] = {
{ "stand-alone", 's', G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE, &stand_alone,
N_("Deprecated (will be removed in a future release)"), NULL },
{ "stand-alone-w-cpg", 'c', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK,
stand_alone_cpg_cb, N_("Intended for use in regression testing only"), NULL },
{ "logfile", 'l', G_OPTION_FLAG_NONE, G_OPTION_ARG_FILENAME_ARRAY,
&options.log_files, N_("Send logs to the additional named logfile"), NULL },
{ NULL }
};
static GOptionContext *
build_arg_context(pcmk__common_args_t *args, GOptionGroup **group)
{
GOptionContext *context = NULL;
context = pcmk__build_arg_context(args, "text (default), xml", group, NULL);
pcmk__add_main_args(context, entries);
return context;
}
int
main(int argc, char **argv)
{
int rc = pcmk_rc_ok;
pcmk_cluster_t *cluster = NULL;
crm_ipc_t *old_instance = NULL;
GError *error = NULL;
GOptionGroup *output_group = NULL;
pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY);
gchar **processed_args = pcmk__cmdline_preproc(argv, "l");
GOptionContext *context = build_arg_context(args, &output_group);
crm_log_preinit(NULL, argc, argv);
pcmk__register_formats(output_group, formats);
if (!g_option_context_parse_strv(context, &processed_args, &error)) {
exit_code = CRM_EX_USAGE;
goto done;
}
rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv);
if (rc != pcmk_rc_ok) {
exit_code = CRM_EX_ERROR;
g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
"Error creating output format %s: %s",
args->output_ty, pcmk_rc_str(rc));
goto done;
}
if (args->version) {
out->version(out, false);
goto done;
}
if ((g_strv_length(processed_args) >= 2)
&& pcmk__str_eq(processed_args[1], "metadata", pcmk__str_none)) {
rc = fencer_metadata();
if (rc != pcmk_rc_ok) {
exit_code = CRM_EX_FATAL;
g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
"Unable to display metadata: %s", pcmk_rc_str(rc));
}
goto done;
}
// Open additional log files
pcmk__add_logfiles(options.log_files, out);
crm_log_init(NULL, LOG_INFO + args->verbosity, TRUE,
(args->verbosity > 0), argc, argv, FALSE);
crm_notice("Starting Pacemaker fencer");
old_instance = crm_ipc_new("stonith-ng", 0);
if (old_instance == NULL) {
/* crm_ipc_new() will have already logged an error message with
* crm_err()
*/
exit_code = CRM_EX_FATAL;
goto done;
}
if (pcmk__connect_generic_ipc(old_instance) == pcmk_rc_ok) {
// IPC endpoint already up
crm_ipc_close(old_instance);
crm_ipc_destroy(old_instance);
crm_err("pacemaker-fenced is already active, aborting startup");
goto done;
} else {
// Not up or not authentic, we'll proceed either way
crm_ipc_destroy(old_instance);
old_instance = NULL;
}
mainloop_add_signal(SIGTERM, stonith_shutdown);
pcmk__cluster_init_node_caches();
rc = fenced_scheduler_init();
if (rc != pcmk_rc_ok) {
exit_code = CRM_EX_FATAL;
g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
"Error initializing scheduler data: %s", pcmk_rc_str(rc));
goto done;
}
cluster = pcmk_cluster_new();
if (!stand_alone) {
#if SUPPORT_COROSYNC
if (pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync) {
pcmk_cluster_set_destroy_fn(cluster, stonith_peer_cs_destroy);
pcmk_cpg_set_deliver_fn(cluster, stonith_peer_ais_callback);
pcmk_cpg_set_confchg_fn(cluster, pcmk__cpg_confchg_cb);
}
#endif // SUPPORT_COROSYNC
pcmk__cluster_set_status_callback(&st_peer_update_callback);
if (pcmk_cluster_connect(cluster) != pcmk_rc_ok) {
exit_code = CRM_EX_FATAL;
crm_crit("Cannot sign in to the cluster... terminating");
goto done;
}
- pcmk__str_update(&stonith_our_uname, cluster->uname);
+ pcmk__str_update(&stonith_our_uname, cluster->priv->node_name);
if (!options.no_cib_connect) {
setup_cib();
}
} else {
pcmk__str_update(&stonith_our_uname, "localhost");
crm_warn("Stand-alone mode is deprecated and will be removed "
"in a future release");
}
init_device_list();
init_topology_list();
pcmk__serve_fenced_ipc(&ipcs, &ipc_callbacks);
// Create the mainloop and run it...
mainloop = g_main_loop_new(NULL, FALSE);
crm_notice("Pacemaker fencer successfully started and accepting connections");
g_main_loop_run(mainloop);
done:
g_strfreev(processed_args);
pcmk__free_arg_context(context);
g_strfreev(options.log_files);
stonith_cleanup();
pcmk_cluster_free(cluster);
fenced_scheduler_cleanup();
pcmk__output_and_clear_error(&error, out);
if (out != NULL) {
out->finish(out, exit_code, true, NULL);
pcmk__output_free(out);
}
pcmk__unregister_formats();
crm_exit(exit_code);
}
diff --git a/include/crm/cluster.h b/include/crm/cluster.h
index b3e2d8e55a..43d53e5b6f 100644
--- a/include/crm/cluster.h
+++ b/include/crm/cluster.h
@@ -1,143 +1,142 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef PCMK__CRM_CLUSTER__H
# define PCMK__CRM_CLUSTER__H
# include <stdint.h> // uint32_t, uint64_t
# include <glib.h> // gboolean, GHashTable
# include <libxml/tree.h> // xmlNode
# include <crm/common/xml.h>
# include <crm/common/util.h>
#ifdef __cplusplus
extern "C" {
#endif
# if SUPPORT_COROSYNC
# include <corosync/cpg.h>
# endif
// @COMPAT Make this internal when we can break API backward compatibility
//! \deprecated Do not use (public access will be removed in a future release)
extern GHashTable *crm_peer_cache;
// @COMPAT Make this internal when we can break API backward compatibility
//! \deprecated Do not use (public access will be removed in a future release)
extern GHashTable *crm_remote_peer_cache;
// @COMPAT Make this internal when we can break API backward compatibility
//! \deprecated Do not use (public access will be removed in a future release)
extern unsigned long long crm_peer_seq;
// @COMPAT Make this internal when we can break API backward compatibility
//! \deprecated Do not use (public access will be removed in a future release)
#define CRM_NODE_LOST "lost"
// @COMPAT Make this internal when we can break API backward compatibility
//! \deprecated Do not use (public access will be removed in a future release)
#define CRM_NODE_MEMBER "member"
// @COMPAT Make this internal when we can break API backward compatibility
//!@{
//! \deprecated Do not use (public access will be removed in a future release)
enum crm_join_phase {
/* @COMPAT: crm_join_nack_quiet can be replaced by
* pcmk__node_status_t:user_data at a compatibility break
*/
//! Not allowed to join, but don't send a nack message
crm_join_nack_quiet = -2,
crm_join_nack = -1,
crm_join_none = 0,
crm_join_welcomed = 1,
crm_join_integrated = 2,
crm_join_finalized = 3,
crm_join_confirmed = 4,
};
//!@}
//! \internal Do not use
typedef struct pcmk__cluster_private pcmk__cluster_private_t;
// Implementation of pcmk_cluster_t
// @COMPAT Make contents internal when we can break API backward compatibility
//!@{
//! \deprecated Do not use (public access will be removed in a future release)
struct pcmk__cluster {
/* @COMPAT Once all members are moved to pcmk__cluster_private_t, we can
* make that the pcmk_cluster_t implementation and drop this struct
* altogether, leaving pcmk_cluster_t as an opaque public type.
*/
//! \internal Do not use
pcmk__cluster_private_t *priv;
- char *uname;
uint32_t nodeid;
// NOTE: sbd (as of at least 1.5.2) uses this
//! \deprecated Call pcmk_cluster_set_destroy_fn() to set this
void (*destroy) (gpointer);
# if SUPPORT_COROSYNC
/* @TODO When we can break public API compatibility, make these members a
* separate struct and use void *cluster_data here instead, to abstract the
* cluster layer further.
*/
struct cpg_name group;
// NOTE: sbd (as of at least 1.5.2) uses this
/*!
* \deprecated Call pcmk_cpg_set_deliver_fn() and pcmk_cpg_set_confchg_fn()
* to set these
*/
cpg_callbacks_t cpg;
cpg_handle_t cpg_handle;
# endif
};
//!@}
//! Connection to a cluster layer
typedef struct pcmk__cluster pcmk_cluster_t;
int pcmk_cluster_connect(pcmk_cluster_t *cluster);
int pcmk_cluster_disconnect(pcmk_cluster_t *cluster);
pcmk_cluster_t *pcmk_cluster_new(void);
void pcmk_cluster_free(pcmk_cluster_t *cluster);
int pcmk_cluster_set_destroy_fn(pcmk_cluster_t *cluster, void (*fn)(gpointer));
#if SUPPORT_COROSYNC
int pcmk_cpg_set_deliver_fn(pcmk_cluster_t *cluster, cpg_deliver_fn_t fn);
int pcmk_cpg_set_confchg_fn(pcmk_cluster_t *cluster, cpg_confchg_fn_t fn);
#endif // SUPPORT_COROSYNC
/*!
* \enum pcmk_cluster_layer
* \brief Types of cluster layer
*/
enum pcmk_cluster_layer {
pcmk_cluster_layer_unknown = 1, //!< Unknown cluster layer
pcmk_cluster_layer_invalid = 2, //!< Invalid cluster layer
pcmk_cluster_layer_corosync = 32, //!< Corosync Cluster Engine
};
enum pcmk_cluster_layer pcmk_get_cluster_layer(void);
const char *pcmk_cluster_layer_text(enum pcmk_cluster_layer layer);
#ifdef __cplusplus
}
#endif
#if !defined(PCMK_ALLOW_DEPRECATED) || (PCMK_ALLOW_DEPRECATED == 1)
#include <crm/cluster/compat.h>
#endif
#endif
diff --git a/include/crm/cluster/internal.h b/include/crm/cluster/internal.h
index 3686b77f9f..0c15006212 100644
--- a/include/crm/cluster/internal.h
+++ b/include/crm/cluster/internal.h
@@ -1,313 +1,315 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef PCMK__CRM_CLUSTER_INTERNAL__H
#define PCMK__CRM_CLUSTER_INTERNAL__H
#include <stdbool.h>
#include <stdint.h> // uint32_t, uint64_t
#include <glib.h> // gboolean
#include <crm/cluster.h>
#ifdef __cplusplus
extern "C" {
#endif
/*!
* \internal
* \enum pcmk__cluster_msg
* \brief Types of message sent via the cluster layer
*/
enum pcmk__cluster_msg {
pcmk__cluster_msg_unknown,
pcmk__cluster_msg_attrd,
pcmk__cluster_msg_based,
pcmk__cluster_msg_controld,
pcmk__cluster_msg_execd,
pcmk__cluster_msg_fenced,
};
enum crm_proc_flag {
/* @COMPAT When pcmk__node_status_t:processes is made internal, we can merge
* this into node flags or turn it into a boolean. Until then, in theory
* something could depend on these particular numeric values.
*/
crm_proc_none = 0x00000001,
// Cluster layers
crm_proc_cpg = 0x04000000,
};
/*!
* \internal
* \enum pcmk__node_status_flags
* \brief Boolean flags for a \c pcmk__node_status_t object
*
* Some flags may not be related to status specifically. However, we keep these
* separate from <tt>enum pcmk__node_flags</tt> because they're used with
* different object types.
*/
enum pcmk__node_status_flags {
/*!
* Node is a Pacemaker Remote node and should not be considered for cluster
* membership
*/
pcmk__node_status_remote = (UINT32_C(1) << 0),
//! Node's cache entry is dirty
pcmk__node_status_dirty = (UINT32_C(1) << 1),
};
// Used with node cache search functions
enum pcmk__node_search_flags {
//! Does not affect search
pcmk__node_search_none = 0,
//! Search for cluster nodes from membership cache
pcmk__node_search_cluster_member = (1 << 0),
//! Search for remote nodes
pcmk__node_search_remote = (1 << 1),
//! Search for cluster member nodes and remote nodes
pcmk__node_search_any = pcmk__node_search_cluster_member
|pcmk__node_search_remote,
//! Search for cluster nodes from CIB (as of last cache refresh)
pcmk__node_search_cluster_cib = (1 << 2),
};
/*!
* \internal
* \enum pcmk__node_update
* \brief Type of update to a \c pcmk__node_status_t object
*/
enum pcmk__node_update {
pcmk__node_update_name, //!< Node name updated
pcmk__node_update_state, //!< Node connection state updated
pcmk__node_update_processes, //!< Node process group membership updated
};
//! Implementation of pcmk__cluster_private_t
struct pcmk__cluster_private {
+ // @TODO Drop and replace with per-daemon node name global variables?
+ char *node_name; //!< Local node name at cluster layer
};
//! Node status data (may be a cluster node or a Pacemaker Remote node)
typedef struct pcmk__node_status {
//! Node name as known to cluster layer, or Pacemaker Remote node name
char *name;
/* @COMPAT This is less than ideal since the value is not a valid XML ID
* (for Corosync, it's the string equivalent of the node's numeric node ID,
* but XML IDs can't start with a number) and the three elements should have
* different IDs.
*
* Ideally, we would use something like node-NODEID, node_state-NODEID, and
* transient_attributes-NODEID as the element IDs. Unfortunately changing it
* would be impractical due to backward compatibility; older nodes in a
* rolling upgrade will always write and expect the value in the old format.
*/
/*!
* Value of the PCMK_XA_ID XML attribute to use with the node's
* PCMK_XE_NODE, PCMK_XE_NODE_STATE, and PCMK_XE_TRANSIENT_ATTRIBUTES
* XML elements in the CIB
*/
char *xml_id;
char *state; // @TODO change to enum
//! Group of <tt>enum pcmk__node_status_flags</tt>
uint32_t flags;
/*!
* Most recent cluster membership in which node was seen (0 for Pacemaker
* Remote nodes)
*/
uint64_t membership_id;
uint32_t processes; // @TODO most not needed, merge into flags
/* @TODO When we can break public API compatibility, we can make the rest of
* these members separate structs and use void *cluster_data and
* void *user_data here instead, to abstract the cluster layer further.
*/
// Only used by controller
enum crm_join_phase join;
char *expected;
time_t peer_lost;
char *conn_host;
time_t when_member; // Since when node has been a cluster member
time_t when_online; // Since when peer has been online in CPG
/* @TODO The following are currently needed only by the Corosync stack.
* Eventually consider moving them to a cluster-layer-specific data object.
*/
uint32_t cluster_layer_id; //!< Cluster-layer numeric node ID
time_t when_lost; //!< When CPG membership was last lost
} pcmk__node_status_t;
/*!
* \internal
* \brief Return the process bit corresponding to the current cluster stack
*
* \return Process flag if detectable, otherwise 0
*/
static inline uint32_t
crm_get_cluster_proc(void)
{
switch (pcmk_get_cluster_layer()) {
case pcmk_cluster_layer_corosync:
return crm_proc_cpg;
default:
break;
}
return crm_proc_none;
}
/*!
* \internal
* \brief Get log-friendly string description of a Corosync return code
*
* \param[in] error Corosync return code
*
* \return Log-friendly string description corresponding to \p error
*/
static inline const char *
pcmk__cs_err_str(int error)
{
# if SUPPORT_COROSYNC
switch (error) {
case CS_OK: return "OK";
case CS_ERR_LIBRARY: return "Library error";
case CS_ERR_VERSION: return "Version error";
case CS_ERR_INIT: return "Initialization error";
case CS_ERR_TIMEOUT: return "Timeout";
case CS_ERR_TRY_AGAIN: return "Try again";
case CS_ERR_INVALID_PARAM: return "Invalid parameter";
case CS_ERR_NO_MEMORY: return "No memory";
case CS_ERR_BAD_HANDLE: return "Bad handle";
case CS_ERR_BUSY: return "Busy";
case CS_ERR_ACCESS: return "Access error";
case CS_ERR_NOT_EXIST: return "Doesn't exist";
case CS_ERR_NAME_TOO_LONG: return "Name too long";
case CS_ERR_EXIST: return "Exists";
case CS_ERR_NO_SPACE: return "No space";
case CS_ERR_INTERRUPT: return "Interrupt";
case CS_ERR_NAME_NOT_FOUND: return "Name not found";
case CS_ERR_NO_RESOURCES: return "No resources";
case CS_ERR_NOT_SUPPORTED: return "Not supported";
case CS_ERR_BAD_OPERATION: return "Bad operation";
case CS_ERR_FAILED_OPERATION: return "Failed operation";
case CS_ERR_MESSAGE_ERROR: return "Message error";
case CS_ERR_QUEUE_FULL: return "Queue full";
case CS_ERR_QUEUE_NOT_AVAILABLE: return "Queue not available";
case CS_ERR_BAD_FLAGS: return "Bad flags";
case CS_ERR_TOO_BIG: return "Too big";
case CS_ERR_NO_SECTIONS: return "No sections";
}
# endif
return "Corosync error";
}
# if SUPPORT_COROSYNC
#if 0
/* This is the new way to do it, but we still support all Corosync 2 versions,
* and this isn't always available. A better alternative here would be to check
* for support in the configure script and enable this conditionally.
*/
#define pcmk__init_cmap(handle) cmap_initialize_map((handle), CMAP_MAP_ICMAP)
#else
#define pcmk__init_cmap(handle) cmap_initialize(handle)
#endif
char *pcmk__corosync_cluster_name(void);
bool pcmk__corosync_add_nodes(xmlNode *xml_parent);
void pcmk__cpg_confchg_cb(cpg_handle_t handle,
const struct cpg_name *group_name,
const struct cpg_address *member_list,
size_t member_list_entries,
const struct cpg_address *left_list,
size_t left_list_entries,
const struct cpg_address *joined_list,
size_t joined_list_entries);
char *pcmk__cpg_message_data(cpg_handle_t handle, uint32_t sender_id,
uint32_t pid, void *content, const char **from);
# endif
const char *pcmk__cluster_node_uuid(pcmk__node_status_t *node);
char *pcmk__cluster_node_name(uint32_t nodeid);
const char *pcmk__cluster_local_node_name(void);
const char *pcmk__node_name_from_uuid(const char *uuid);
pcmk__node_status_t *crm_update_peer_proc(const char *source,
pcmk__node_status_t *peer,
uint32_t flag, const char *status);
pcmk__node_status_t *pcmk__update_peer_state(const char *source,
pcmk__node_status_t *node,
const char *state,
uint64_t membership);
void pcmk__update_peer_expected(const char *source, pcmk__node_status_t *node,
const char *expected);
void pcmk__reap_unseen_nodes(uint64_t ring_id);
void pcmk__corosync_quorum_connect(gboolean (*dispatch)(unsigned long long,
gboolean),
void (*destroy) (gpointer));
enum pcmk__cluster_msg pcmk__cluster_parse_msg_type(const char *text);
bool pcmk__cluster_send_message(const pcmk__node_status_t *node,
enum pcmk__cluster_msg service,
const xmlNode *data);
// Membership
bool pcmk__cluster_has_quorum(void);
void pcmk__cluster_init_node_caches(void);
void pcmk__cluster_destroy_node_caches(void);
void pcmk__cluster_set_autoreap(bool enable);
void pcmk__cluster_set_status_callback(void (*dispatch)(enum pcmk__node_update,
pcmk__node_status_t *,
const void *));
bool pcmk__cluster_is_node_active(const pcmk__node_status_t *node);
unsigned int pcmk__cluster_num_active_nodes(void);
unsigned int pcmk__cluster_num_remote_nodes(void);
pcmk__node_status_t *pcmk__cluster_lookup_remote_node(const char *node_name);
void pcmk__cluster_forget_cluster_node(uint32_t id, const char *node_name);
void pcmk__cluster_forget_remote_node(const char *node_name);
pcmk__node_status_t *pcmk__search_node_caches(unsigned int id,
const char *uname,
uint32_t flags);
void pcmk__purge_node_from_cache(const char *node_name, uint32_t node_id);
void pcmk__refresh_node_caches_from_cib(xmlNode *cib);
pcmk__node_status_t *pcmk__get_node(unsigned int id, const char *uname,
const char *uuid, uint32_t flags);
#ifdef __cplusplus
}
#endif
#endif // PCMK__CRM_CLUSTER_INTERNAL__H
diff --git a/lib/cluster/cluster.c b/lib/cluster/cluster.c
index 21246be462..313450e88d 100644
--- a/lib/cluster/cluster.c
+++ b/lib/cluster/cluster.c
@@ -1,488 +1,488 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <dlfcn.h>
#include <inttypes.h> // PRIu32
#include <stdbool.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <stdlib.h>
#include <time.h>
#include <sys/param.h>
#include <sys/types.h>
#include <sys/utsname.h> // uname()
#include <glib.h> // gboolean
#include <crm/crm.h>
#include <crm/common/ipc.h>
#include <crm/common/xml.h>
#include <crm/cluster/internal.h>
#include "crmcluster_private.h"
CRM_TRACE_INIT_DATA(cluster);
/*!
* \internal
* \brief Get the message type equivalent of a string
*
* \param[in] text String of message type
*
* \return Message type equivalent of \p text
*/
enum pcmk__cluster_msg
pcmk__cluster_parse_msg_type(const char *text)
{
CRM_CHECK(text != NULL, return pcmk__cluster_msg_unknown);
text = pcmk__message_name(text);
if (pcmk__str_eq(text, "attrd", pcmk__str_none)) {
return pcmk__cluster_msg_attrd;
} else if (pcmk__str_eq(text, CRM_SYSTEM_CIB, pcmk__str_none)) {
return pcmk__cluster_msg_based;
} else if (pcmk__str_any_of(text, CRM_SYSTEM_CRMD, CRM_SYSTEM_DC, NULL)) {
return pcmk__cluster_msg_controld;
} else if (pcmk__str_eq(text, CRM_SYSTEM_LRMD, pcmk__str_none)) {
return pcmk__cluster_msg_execd;
} else if (pcmk__str_eq(text, "stonith-ng", pcmk__str_none)) {
return pcmk__cluster_msg_fenced;
} else {
return pcmk__cluster_msg_unknown;
}
}
/*!
* \internal
* \brief Get a node's cluster-layer UUID, setting it if not already set
*
* \param[in,out] node Node to check
*
* \return Cluster-layer node UUID of \p node, or \c NULL if unknown
*/
const char *
pcmk__cluster_node_uuid(pcmk__node_status_t *node)
{
const enum pcmk_cluster_layer cluster_layer = pcmk_get_cluster_layer();
if (node == NULL) {
return NULL;
}
if (node->xml_id != NULL) {
return node->xml_id;
}
switch (cluster_layer) {
#if SUPPORT_COROSYNC
case pcmk_cluster_layer_corosync:
node->xml_id = pcmk__corosync_uuid(node);
return node->xml_id;
#endif // SUPPORT_COROSYNC
default:
crm_err("Unsupported cluster layer %s",
pcmk_cluster_layer_text(cluster_layer));
return NULL;
}
}
/*!
* \internal
* \brief Connect to the cluster layer
*
* \param[in,out] cluster Initialized cluster object to connect
*
* \return Standard Pacemaker return code
*/
int
pcmk_cluster_connect(pcmk_cluster_t *cluster)
{
const enum pcmk_cluster_layer cluster_layer = pcmk_get_cluster_layer();
const char *cluster_layer_s = pcmk_cluster_layer_text(cluster_layer);
// cts-lab looks for this message
crm_notice("Connecting to %s cluster layer", cluster_layer_s);
switch (cluster_layer) {
#if SUPPORT_COROSYNC
case pcmk_cluster_layer_corosync:
return pcmk__corosync_connect(cluster);
#endif // SUPPORT_COROSYNC
default:
break;
}
crm_err("Failed to connect to unsupported cluster layer %s",
cluster_layer_s);
return EPROTONOSUPPORT;
}
/*!
* \brief Disconnect from the cluster layer
*
* \param[in,out] cluster Cluster object to disconnect
*
* \return Standard Pacemaker return code
*/
int
pcmk_cluster_disconnect(pcmk_cluster_t *cluster)
{
const enum pcmk_cluster_layer cluster_layer = pcmk_get_cluster_layer();
const char *cluster_layer_s = pcmk_cluster_layer_text(cluster_layer);
crm_info("Disconnecting from %s cluster layer", cluster_layer_s);
switch (cluster_layer) {
#if SUPPORT_COROSYNC
case pcmk_cluster_layer_corosync:
pcmk__corosync_disconnect(cluster);
pcmk__cluster_destroy_node_caches();
return pcmk_rc_ok;
#endif // SUPPORT_COROSYNC
default:
break;
}
crm_err("Failed to disconnect from unsupported cluster layer %s",
cluster_layer_s);
return EPROTONOSUPPORT;
}
/*!
* \brief Allocate a new \p pcmk_cluster_t object
*
* \return A newly allocated \p pcmk_cluster_t object (guaranteed not \c NULL)
* \note The caller is responsible for freeing the return value using
* \p pcmk_cluster_free().
*/
pcmk_cluster_t *
pcmk_cluster_new(void)
{
pcmk_cluster_t *cluster = pcmk__assert_alloc(1, sizeof(pcmk_cluster_t));
cluster->priv = pcmk__assert_alloc(1, sizeof(pcmk__cluster_private_t));
return cluster;
}
/*!
* \brief Free a \p pcmk_cluster_t object and its dynamically allocated members
*
* \param[in,out] cluster Cluster object to free
*/
void
pcmk_cluster_free(pcmk_cluster_t *cluster)
{
if (cluster == NULL) {
return;
}
- free(cluster->uname);
+ free(cluster->priv->node_name);
free(cluster->priv);
free(cluster);
}
/*!
* \brief Set the destroy function for a cluster object
*
* \param[in,out] cluster Cluster object
* \param[in] fn Destroy function to set
*
* \return Standard Pacemaker return code
*/
int
pcmk_cluster_set_destroy_fn(pcmk_cluster_t *cluster, void (*fn)(gpointer))
{
if (cluster == NULL) {
return EINVAL;
}
cluster->destroy = fn;
return pcmk_rc_ok;
}
/*!
* \internal
* \brief Send an XML message via the cluster messaging layer
*
* \param[in] node Cluster node to send message to
* \param[in] service Message type to use in message host info
* \param[in] data XML message to send
*
* \return \c true on success, or \c false otherwise
*/
bool
pcmk__cluster_send_message(const pcmk__node_status_t *node,
enum pcmk__cluster_msg service, const xmlNode *data)
{
// @TODO Return standard Pacemaker return code
switch (pcmk_get_cluster_layer()) {
#if SUPPORT_COROSYNC
case pcmk_cluster_layer_corosync:
return pcmk__cpg_send_xml(data, node, service);
#endif // SUPPORT_COROSYNC
default:
break;
}
return false;
}
/*!
* \internal
* \brief Get the node name corresponding to a cluster-layer node ID
*
* Get the node name from the cluster layer if possible. Otherwise, if for the
* local node, call \c uname() and get the \c nodename member from the
* <tt>struct utsname</tt> object.
*
* \param[in] nodeid Node ID to check (or 0 for the local node)
*
* \return Node name corresponding to \p nodeid
*
* \note This will fatally exit if \c uname() fails to get the local node name
* or we run out of memory.
* \note The caller is responsible for freeing the return value using \c free().
*/
char *
pcmk__cluster_node_name(uint32_t nodeid)
{
const enum pcmk_cluster_layer cluster_layer = pcmk_get_cluster_layer();
const char *cluster_layer_s = pcmk_cluster_layer_text(cluster_layer);
switch (cluster_layer) {
#if SUPPORT_COROSYNC
case pcmk_cluster_layer_corosync:
return pcmk__corosync_name(0, nodeid);
#else
break;
#endif // SUPPORT_COROSYNC
default:
crm_err("Unsupported cluster layer: %s", cluster_layer_s);
break;
}
if (nodeid == 0) {
struct utsname hostinfo;
crm_notice("Could not get local node name from %s cluster layer, "
"defaulting to local hostname",
cluster_layer_s);
if (uname(&hostinfo) < 0) {
// @TODO Maybe let the caller decide what to do
crm_err("Failed to get the local hostname");
crm_exit(CRM_EX_FATAL);
}
return pcmk__str_copy(hostinfo.nodename);
}
crm_notice("Could not obtain a node name for node with "
PCMK_XA_ID "=" PRIu32,
nodeid);
return NULL;
}
/*!
* \internal
* \brief Get the local node's cluster-layer node name
*
* If getting the node name from the cluster layer is impossible, call
* \c uname() and get the \c nodename member from the <tt>struct utsname</tt>
* object.
*
* \return Local node's name
*
* \note This will fatally exit if \c uname() fails to get the local node name
* or we run out of memory.
*/
const char *
pcmk__cluster_local_node_name(void)
{
// @TODO Refactor to avoid trivially leaking name at exit
static char *name = NULL;
if (name == NULL) {
name = pcmk__cluster_node_name(0);
}
return name;
}
/*!
* \internal
* \brief Get the node name corresonding to a node UUID
*
* Look for the UUID in both the remote node cache and the cluster member cache.
*
* \param[in] uuid UUID to search for
*
* \return Node name corresponding to \p uuid if found, or \c NULL otherwise
*/
const char *
pcmk__node_name_from_uuid(const char *uuid)
{
/* @TODO There are too many functions in libcrmcluster that look up a node
* from the node caches (possibly creating a cache entry if none exists).
* There are at least the following:
* * pcmk__cluster_lookup_remote_node()
* * pcmk__get_node()
* * pcmk__node_name_from_uuid()
* * pcmk__search_node_caches()
*
* There's a lot of duplication among them, but they all do slightly
* different things. We should try to clean them up and consolidate them to
* the extent possible, likely with new helper functions.
*/
GHashTableIter iter;
pcmk__node_status_t *node = NULL;
CRM_CHECK(uuid != NULL, return NULL);
// Remote nodes have the same uname and uuid
if (g_hash_table_lookup(crm_remote_peer_cache, uuid)) {
return uuid;
}
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
if (pcmk__str_eq(node->xml_id, uuid, pcmk__str_casei)) {
return node->name;
}
}
return NULL;
}
/*!
* \brief Get a log-friendly string equivalent of a cluster layer
*
* \param[in] layer Cluster layer
*
* \return Log-friendly string corresponding to \p layer
*/
const char *
pcmk_cluster_layer_text(enum pcmk_cluster_layer layer)
{
switch (layer) {
case pcmk_cluster_layer_corosync:
return "corosync";
case pcmk_cluster_layer_unknown:
return "unknown";
case pcmk_cluster_layer_invalid:
return "invalid";
default:
crm_err("Invalid cluster layer: %d", layer);
return "invalid";
}
}
/*!
* \brief Get and validate the local cluster layer
*
* If a cluster layer is not configured via the \c PCMK__ENV_CLUSTER_TYPE local
* option, this will try to detect an active cluster from among the supported
* cluster layers.
*
* \return Local cluster layer
*
* \note This will fatally exit if the configured cluster layer is invalid.
*/
enum pcmk_cluster_layer
pcmk_get_cluster_layer(void)
{
static enum pcmk_cluster_layer cluster_layer = pcmk_cluster_layer_unknown;
const char *cluster = NULL;
// Cluster layer is stable once set
if (cluster_layer != pcmk_cluster_layer_unknown) {
return cluster_layer;
}
cluster = pcmk__env_option(PCMK__ENV_CLUSTER_TYPE);
if (cluster != NULL) {
crm_info("Verifying configured cluster layer '%s'", cluster);
cluster_layer = pcmk_cluster_layer_invalid;
#if SUPPORT_COROSYNC
if (pcmk__str_eq(cluster, PCMK_VALUE_COROSYNC, pcmk__str_casei)) {
cluster_layer = pcmk_cluster_layer_corosync;
}
#endif // SUPPORT_COROSYNC
if (cluster_layer == pcmk_cluster_layer_invalid) {
crm_notice("This installation does not support the '%s' cluster "
"infrastructure: terminating",
cluster);
crm_exit(CRM_EX_FATAL);
}
crm_info("Assuming an active '%s' cluster", cluster);
} else {
// Nothing configured, so test supported cluster layers
#if SUPPORT_COROSYNC
crm_debug("Testing with Corosync");
if (pcmk__corosync_is_active()) {
cluster_layer = pcmk_cluster_layer_corosync;
}
#endif // SUPPORT_COROSYNC
if (cluster_layer == pcmk_cluster_layer_unknown) {
crm_notice("Could not determine the current cluster layer");
} else {
crm_info("Detected an active '%s' cluster",
pcmk_cluster_layer_text(cluster_layer));
}
}
return cluster_layer;
}
// Deprecated functions kept only for backward API compatibility
// LCOV_EXCL_START
#include <crm/cluster/compat.h>
gboolean
crm_cluster_connect(pcmk_cluster_t *cluster)
{
return pcmk_cluster_connect(cluster) == pcmk_rc_ok;
}
const char *
name_for_cluster_type(enum cluster_type_e type)
{
switch (type) {
case pcmk_cluster_corosync:
return "corosync";
case pcmk_cluster_unknown:
return "unknown";
case pcmk_cluster_invalid:
return "invalid";
}
crm_err("Invalid cluster type: %d", type);
return "invalid";
}
enum cluster_type_e
get_cluster_type(void)
{
return (enum cluster_type_e) pcmk_get_cluster_layer();
}
// LCOV_EXCL_STOP
// End deprecated API
diff --git a/lib/cluster/corosync.c b/lib/cluster/corosync.c
index 3505efef1a..71d95925e2 100644
--- a/lib/cluster/corosync.c
+++ b/lib/cluster/corosync.c
@@ -1,816 +1,816 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <arpa/inet.h>
#include <inttypes.h> // PRIu64, etc.
#include <netdb.h>
#include <netinet/in.h>
#include <stdbool.h>
#include <sys/socket.h>
#include <sys/utsname.h>
#include <bzlib.h>
#include <corosync/cfg.h>
#include <corosync/cmap.h>
#include <corosync/corodefs.h>
#include <corosync/corotypes.h>
#include <corosync/hdb.h>
#include <corosync/quorum.h>
#include <qb/qbipcc.h>
#include <qb/qbutil.h>
#include <crm/cluster/internal.h>
#include <crm/common/ipc.h>
#include <crm/common/ipc_internal.h> // PCMK__SPECIAL_PID
#include <crm/common/mainloop.h>
#include <crm/common/xml.h>
#include "crmcluster_private.h"
static quorum_handle_t pcmk_quorum_handle = 0;
static gboolean (*quorum_app_callback)(unsigned long long seq,
gboolean quorate) = NULL;
/*!
* \internal
* \brief Get the Corosync UUID associated with a Pacemaker node
*
* \param[in] node Pacemaker node
*
* \return Newly allocated string with node's Corosync UUID, or NULL if unknown
* \note It is the caller's responsibility to free the result with free().
*/
char *
pcmk__corosync_uuid(const pcmk__node_status_t *node)
{
CRM_ASSERT(pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync);
if (node != NULL) {
if (node->cluster_layer_id > 0) {
return crm_strdup_printf("%" PRIu32, node->cluster_layer_id);
} else {
crm_info("Node %s is not yet known by Corosync", node->name);
}
}
return NULL;
}
static bool
node_name_is_valid(const char *key, const char *name)
{
int octet;
if (name == NULL) {
crm_trace("%s is empty", key);
return false;
} else if (sscanf(name, "%d.%d.%d.%d", &octet, &octet, &octet, &octet) == 4) {
crm_trace("%s contains an IPv4 address (%s), ignoring", key, name);
return false;
} else if (strstr(name, ":") != NULL) {
crm_trace("%s contains an IPv6 address (%s), ignoring", key, name);
return false;
}
crm_trace("'%s: %s' is valid", key, name);
return true;
}
/*
* \internal
* \brief Get Corosync node name corresponding to a node ID
*
* \param[in] cmap_handle Connection to Corosync CMAP
* \param[in] nodeid Node ID to check
*
* \return Newly allocated string with name or (if no name) IP address
* associated with first address assigned to a Corosync node ID (or NULL
* if unknown)
* \note It is the caller's responsibility to free the result with free().
*/
char *
pcmk__corosync_name(uint64_t /*cmap_handle_t */ cmap_handle, uint32_t nodeid)
{
// Originally based on corosync-quorumtool.c:node_name()
int lpc = 0;
cs_error_t rc = CS_OK;
int retries = 0;
char *name = NULL;
cmap_handle_t local_handle = 0;
int fd = -1;
uid_t found_uid = 0;
gid_t found_gid = 0;
pid_t found_pid = 0;
int rv;
if (nodeid == 0) {
nodeid = pcmk__cpg_local_nodeid(0);
}
if (cmap_handle == 0 && local_handle == 0) {
retries = 0;
crm_trace("Initializing CMAP connection");
do {
rc = pcmk__init_cmap(&local_handle);
if (rc != CS_OK) {
retries++;
crm_debug("API connection setup failed: %s. Retrying in %ds", cs_strerror(rc),
retries);
sleep(retries);
}
} while (retries < 5 && rc != CS_OK);
if (rc != CS_OK) {
crm_warn("Could not connect to Cluster Configuration Database API, error %s",
cs_strerror(rc));
local_handle = 0;
}
}
if (cmap_handle == 0) {
cmap_handle = local_handle;
rc = cmap_fd_get(cmap_handle, &fd);
if (rc != CS_OK) {
crm_err("Could not obtain the CMAP API connection: %s (%d)",
cs_strerror(rc), rc);
goto bail;
}
/* CMAP provider run as root (in given user namespace, anyway)? */
if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
&found_uid, &found_gid))) {
crm_err("CMAP provider is not authentic:"
" process %lld (uid: %lld, gid: %lld)",
(long long) PCMK__SPECIAL_PID_AS_0(found_pid),
(long long) found_uid, (long long) found_gid);
goto bail;
} else if (rv < 0) {
crm_err("Could not verify authenticity of CMAP provider: %s (%d)",
strerror(-rv), -rv);
goto bail;
}
}
while (name == NULL && cmap_handle != 0) {
uint32_t id = 0;
char *key = NULL;
key = crm_strdup_printf("nodelist.node.%d.nodeid", lpc);
rc = cmap_get_uint32(cmap_handle, key, &id);
crm_trace("Checking %u vs %u from %s", nodeid, id, key);
free(key);
if (rc != CS_OK) {
break;
}
if (nodeid == id) {
crm_trace("Searching for node name for %u in nodelist.node.%d %s",
nodeid, lpc, pcmk__s(name, "<null>"));
if (name == NULL) {
key = crm_strdup_printf("nodelist.node.%d.name", lpc);
cmap_get_string(cmap_handle, key, &name);
crm_trace("%s = %s", key, pcmk__s(name, "<null>"));
free(key);
}
if (name == NULL) {
key = crm_strdup_printf("nodelist.node.%d.ring0_addr", lpc);
cmap_get_string(cmap_handle, key, &name);
crm_trace("%s = %s", key, pcmk__s(name, "<null>"));
if (!node_name_is_valid(key, name)) {
free(name);
name = NULL;
}
free(key);
}
break;
}
lpc++;
}
bail:
if(local_handle) {
cmap_finalize(local_handle);
}
if (name == NULL) {
crm_info("Unable to get node name for nodeid %u", nodeid);
}
return name;
}
/*!
* \internal
* \brief Disconnect from Corosync cluster
*
* \param[in,out] cluster Cluster object to disconnect
*/
void
pcmk__corosync_disconnect(pcmk_cluster_t *cluster)
{
pcmk__cpg_disconnect(cluster);
if (pcmk_quorum_handle != 0) {
quorum_finalize(pcmk_quorum_handle);
pcmk_quorum_handle = 0;
}
crm_notice("Disconnected from Corosync");
}
/*!
* \internal
* \brief Dispatch function for quorum connection file descriptor
*
* \param[in] user_data Ignored
*
* \return 0 on success, -1 on error (per mainloop_io_t interface)
*/
static int
quorum_dispatch_cb(gpointer user_data)
{
int rc = quorum_dispatch(pcmk_quorum_handle, CS_DISPATCH_ALL);
if (rc < 0) {
crm_err("Connection to the Quorum API failed: %d", rc);
quorum_finalize(pcmk_quorum_handle);
pcmk_quorum_handle = 0;
return -1;
}
return 0;
}
/*!
* \internal
* \brief Notification callback for Corosync quorum connection
*
* \param[in] handle Corosync quorum connection
* \param[in] quorate Whether cluster is quorate
* \param[in] ring_id Corosync ring ID
* \param[in] view_list_entries Number of entries in \p view_list
* \param[in] view_list Corosync node IDs in membership
*/
static void
quorum_notification_cb(quorum_handle_t handle, uint32_t quorate,
uint64_t ring_id, uint32_t view_list_entries,
uint32_t *view_list)
{
int i;
GHashTableIter iter;
pcmk__node_status_t *node = NULL;
static gboolean init_phase = TRUE;
bool is_quorate = (quorate != 0);
bool was_quorate = pcmk__cluster_has_quorum();
if (is_quorate && !was_quorate) {
crm_notice("Quorum acquired " QB_XS " membership=%" PRIu64
" members=%" PRIu32,
ring_id, view_list_entries);
pcmk__cluster_set_quorum(true);
} else if (!is_quorate && was_quorate) {
crm_warn("Quorum lost " QB_XS " membership=%" PRIu64 " members=" PRIu32,
ring_id, view_list_entries);
pcmk__cluster_set_quorum(false);
} else {
crm_info("Quorum %s " QB_XS " membership=%" PRIu64 " members=%" PRIu32,
(is_quorate? "retained" : "still lost"), ring_id,
view_list_entries);
}
if (view_list_entries == 0 && init_phase) {
crm_info("Corosync membership is still forming, ignoring");
return;
}
init_phase = FALSE;
/* Reset membership_id for all cached nodes so we can tell which ones aren't
* in the view list */
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
node->membership_id = 0;
}
/* Update the peer cache for each node in view list */
for (i = 0; i < view_list_entries; i++) {
uint32_t id = view_list[i];
crm_debug("Member[%d] %u ", i, id);
/* Get this node's peer cache entry (adding one if not already there) */
node = pcmk__get_node(id, NULL, NULL, pcmk__node_search_cluster_member);
if (node->name == NULL) {
char *name = pcmk__corosync_name(0, id);
crm_info("Obtaining name for new node %u", id);
node = pcmk__get_node(id, name, NULL,
pcmk__node_search_cluster_member);
free(name);
}
// Update the node state (including updating membership_id to ring_id)
pcmk__update_peer_state(__func__, node, CRM_NODE_MEMBER, ring_id);
}
/* Remove any peer cache entries we didn't update */
pcmk__reap_unseen_nodes(ring_id);
if (quorum_app_callback) {
quorum_app_callback(ring_id, is_quorate);
}
}
/*!
* \internal
* \brief Connect to Corosync quorum service
*
* \param[in] dispatch Connection dispatch callback
* \param[in] destroy Connection destroy callback
*/
void
pcmk__corosync_quorum_connect(gboolean (*dispatch)(unsigned long long,
gboolean),
void (*destroy)(gpointer))
{
cs_error_t rc;
int fd = 0;
int quorate = 0;
uint32_t quorum_type = 0;
struct mainloop_fd_callbacks quorum_fd_callbacks;
uid_t found_uid = 0;
gid_t found_gid = 0;
pid_t found_pid = 0;
int rv;
quorum_fd_callbacks.dispatch = quorum_dispatch_cb;
quorum_fd_callbacks.destroy = destroy;
crm_debug("Configuring Pacemaker to obtain quorum from Corosync");
{
#if 0
// New way but not supported by all Corosync 2 versions
quorum_model_v0_data_t quorum_model_data = {
.model = QUORUM_MODEL_V0,
.quorum_notify_fn = quorum_notification_cb,
};
rc = quorum_model_initialize(&pcmk_quorum_handle, QUORUM_MODEL_V0,
(quorum_model_data_t *) &quorum_model_data,
&quorum_type, NULL);
#else
quorum_callbacks_t quorum_callbacks = {
.quorum_notify_fn = quorum_notification_cb,
};
rc = quorum_initialize(&pcmk_quorum_handle, &quorum_callbacks,
&quorum_type);
#endif
}
if (rc != CS_OK) {
crm_err("Could not connect to the Quorum API: %s (%d)",
cs_strerror(rc), rc);
goto bail;
} else if (quorum_type != QUORUM_SET) {
crm_err("Corosync quorum is not configured");
goto bail;
}
rc = quorum_fd_get(pcmk_quorum_handle, &fd);
if (rc != CS_OK) {
crm_err("Could not obtain the Quorum API connection: %s (%d)",
strerror(rc), rc);
goto bail;
}
/* Quorum provider run as root (in given user namespace, anyway)? */
if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
&found_uid, &found_gid))) {
crm_err("Quorum provider is not authentic:"
" process %lld (uid: %lld, gid: %lld)",
(long long) PCMK__SPECIAL_PID_AS_0(found_pid),
(long long) found_uid, (long long) found_gid);
rc = CS_ERR_ACCESS;
goto bail;
} else if (rv < 0) {
crm_err("Could not verify authenticity of Quorum provider: %s (%d)",
strerror(-rv), -rv);
rc = CS_ERR_ACCESS;
goto bail;
}
rc = quorum_getquorate(pcmk_quorum_handle, &quorate);
if (rc != CS_OK) {
crm_err("Could not obtain the current Quorum API state: %d", rc);
goto bail;
}
if (quorate) {
crm_notice("Quorum acquired");
} else {
crm_warn("No quorum");
}
quorum_app_callback = dispatch;
pcmk__cluster_set_quorum(quorate != 0);
rc = quorum_trackstart(pcmk_quorum_handle, CS_TRACK_CHANGES | CS_TRACK_CURRENT);
if (rc != CS_OK) {
crm_err("Could not setup Quorum API notifications: %d", rc);
goto bail;
}
mainloop_add_fd("quorum", G_PRIORITY_HIGH, fd, dispatch, &quorum_fd_callbacks);
pcmk__corosync_add_nodes(NULL);
bail:
if (rc != CS_OK) {
quorum_finalize(pcmk_quorum_handle);
}
}
/*!
* \internal
* \brief Connect to Corosync cluster layer
*
* \param[in,out] cluster Initialized cluster object to connect
*
* \return Standard Pacemaker return code
*/
int
pcmk__corosync_connect(pcmk_cluster_t *cluster)
{
const enum pcmk_cluster_layer cluster_layer = pcmk_get_cluster_layer();
const char *cluster_layer_s = pcmk_cluster_layer_text(cluster_layer);
int rc = pcmk_rc_ok;
pcmk__cluster_init_node_caches();
if (cluster_layer != pcmk_cluster_layer_corosync) {
crm_err("Invalid cluster layer: %s " QB_XS " cluster_layer=%d",
cluster_layer_s, cluster_layer);
return EINVAL;
}
rc = pcmk__cpg_connect(cluster);
if (rc != pcmk_rc_ok) {
// Error message was logged by pcmk__cpg_connect()
return rc;
}
crm_info("Connection to %s established", cluster_layer_s);
cluster->nodeid = pcmk__cpg_local_nodeid(0);
if (cluster->nodeid == 0) {
crm_err("Could not determine local node ID");
return ENXIO;
}
- cluster->uname = pcmk__cluster_node_name(0);
- if (cluster->uname == NULL) {
+ cluster->priv->node_name = pcmk__cluster_node_name(0);
+ if (cluster->priv->node_name == NULL) {
crm_err("Could not determine local node name");
return ENXIO;
}
// Ensure local node always exists in peer cache
- pcmk__get_node(cluster->nodeid, cluster->uname, NULL,
+ pcmk__get_node(cluster->nodeid, cluster->priv->node_name, NULL,
pcmk__node_search_cluster_member);
return pcmk_rc_ok;
}
/*!
* \internal
* \brief Check whether a Corosync cluster is active
*
* \return \c true if Corosync is found active, or \c false otherwise
*/
bool
pcmk__corosync_is_active(void)
{
cmap_handle_t handle;
int rc = pcmk__init_cmap(&handle);
if (rc == CS_OK) {
cmap_finalize(handle);
return true;
}
crm_info("Failed to initialize the cmap API: %s (%d)",
pcmk__cs_err_str(rc), rc);
return false;
}
/*!
* \internal
* \brief Check whether a Corosync cluster peer is active
*
* \param[in] node Node to check
*
* \return \c true if \p node is an active Corosync peer, or \c false otherwise
*/
bool
pcmk__corosync_is_peer_active(const pcmk__node_status_t *node)
{
if (node == NULL) {
crm_trace("Corosync peer inactive: NULL");
return false;
}
if (!pcmk__str_eq(node->state, CRM_NODE_MEMBER, pcmk__str_none)) {
crm_trace("Corosync peer %s inactive: state=%s",
node->name, node->state);
return false;
}
if (!pcmk_is_set(node->processes, crm_proc_cpg)) {
crm_trace("Corosync peer %s inactive " QB_XS " processes=%.16" PRIx32,
node->name, node->processes);
return false;
}
return true;
}
/*!
* \internal
* \brief Load Corosync node list (via CMAP) into peer cache and optionally XML
*
* \param[in,out] xml_parent If not NULL, add <node> entry here for each node
*
* \return true if any nodes were found, false otherwise
*/
bool
pcmk__corosync_add_nodes(xmlNode *xml_parent)
{
int lpc = 0;
cs_error_t rc = CS_OK;
int retries = 0;
bool any = false;
cmap_handle_t cmap_handle;
int fd = -1;
uid_t found_uid = 0;
gid_t found_gid = 0;
pid_t found_pid = 0;
int rv;
do {
rc = pcmk__init_cmap(&cmap_handle);
if (rc != CS_OK) {
retries++;
crm_debug("API connection setup failed: %s. Retrying in %ds", cs_strerror(rc),
retries);
sleep(retries);
}
} while (retries < 5 && rc != CS_OK);
if (rc != CS_OK) {
crm_warn("Could not connect to Cluster Configuration Database API, error %d", rc);
return false;
}
rc = cmap_fd_get(cmap_handle, &fd);
if (rc != CS_OK) {
crm_err("Could not obtain the CMAP API connection: %s (%d)",
cs_strerror(rc), rc);
goto bail;
}
/* CMAP provider run as root (in given user namespace, anyway)? */
if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
&found_uid, &found_gid))) {
crm_err("CMAP provider is not authentic:"
" process %lld (uid: %lld, gid: %lld)",
(long long) PCMK__SPECIAL_PID_AS_0(found_pid),
(long long) found_uid, (long long) found_gid);
goto bail;
} else if (rv < 0) {
crm_err("Could not verify authenticity of CMAP provider: %s (%d)",
strerror(-rv), -rv);
goto bail;
}
pcmk__cluster_init_node_caches();
crm_trace("Initializing Corosync node list");
for (lpc = 0; TRUE; lpc++) {
uint32_t nodeid = 0;
char *name = NULL;
char *key = NULL;
key = crm_strdup_printf("nodelist.node.%d.nodeid", lpc);
rc = cmap_get_uint32(cmap_handle, key, &nodeid);
free(key);
if (rc != CS_OK) {
break;
}
name = pcmk__corosync_name(cmap_handle, nodeid);
if (name != NULL) {
GHashTableIter iter;
pcmk__node_status_t *node = NULL;
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
if ((node != NULL)
&& (node->cluster_layer_id > 0)
&& (node->cluster_layer_id != nodeid)
&& pcmk__str_eq(node->name, name, pcmk__str_casei)) {
crm_crit("Nodes %" PRIu32 " and %" PRIu32 " share the "
"same name '%s': shutting down",
node->cluster_layer_id, nodeid, name);
crm_exit(CRM_EX_FATAL);
}
}
}
if (nodeid > 0 || name != NULL) {
crm_trace("Initializing node[%d] %u = %s", lpc, nodeid, name);
pcmk__get_node(nodeid, name, NULL, pcmk__node_search_cluster_member);
}
if (nodeid > 0 && name != NULL) {
any = true;
if (xml_parent) {
xmlNode *node = pcmk__xe_create(xml_parent, PCMK_XE_NODE);
pcmk__xe_set_id(node, "%u", nodeid);
crm_xml_add(node, PCMK_XA_UNAME, name);
}
}
free(name);
}
bail:
cmap_finalize(cmap_handle);
return any;
}
/*!
* \internal
* \brief Get cluster name from Corosync configuration (via CMAP)
*
* \return Newly allocated string with cluster name if configured, or NULL
*/
char *
pcmk__corosync_cluster_name(void)
{
cmap_handle_t handle;
char *cluster_name = NULL;
cs_error_t rc = CS_OK;
int fd = -1;
uid_t found_uid = 0;
gid_t found_gid = 0;
pid_t found_pid = 0;
int rv;
rc = pcmk__init_cmap(&handle);
if (rc != CS_OK) {
crm_info("Failed to initialize the cmap API: %s (%d)",
cs_strerror(rc), rc);
return NULL;
}
rc = cmap_fd_get(handle, &fd);
if (rc != CS_OK) {
crm_err("Could not obtain the CMAP API connection: %s (%d)",
cs_strerror(rc), rc);
goto bail;
}
/* CMAP provider run as root (in given user namespace, anyway)? */
if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
&found_uid, &found_gid))) {
crm_err("CMAP provider is not authentic:"
" process %lld (uid: %lld, gid: %lld)",
(long long) PCMK__SPECIAL_PID_AS_0(found_pid),
(long long) found_uid, (long long) found_gid);
goto bail;
} else if (rv < 0) {
crm_err("Could not verify authenticity of CMAP provider: %s (%d)",
strerror(-rv), -rv);
goto bail;
}
rc = cmap_get_string(handle, "totem.cluster_name", &cluster_name);
if (rc != CS_OK) {
crm_info("Cannot get totem.cluster_name: %s (%d)", cs_strerror(rc), rc);
} else {
crm_debug("cmap totem.cluster_name = '%s'", cluster_name);
}
bail:
cmap_finalize(handle);
return cluster_name;
}
/*!
* \internal
* \brief Check (via CMAP) whether Corosync configuration has a node list
*
* \return true if Corosync has node list, otherwise false
*/
bool
pcmk__corosync_has_nodelist(void)
{
cs_error_t cs_rc = CS_OK;
int retries = 0;
cmap_handle_t cmap_handle;
cmap_iter_handle_t iter_handle;
char key_name[CMAP_KEYNAME_MAXLEN + 1];
int fd = -1;
uid_t found_uid = 0;
gid_t found_gid = 0;
pid_t found_pid = 0;
int rc = pcmk_ok;
static bool got_result = false;
static bool result = false;
if (got_result) {
return result;
}
// Connect to CMAP
do {
cs_rc = pcmk__init_cmap(&cmap_handle);
if (cs_rc != CS_OK) {
retries++;
crm_debug("CMAP connection failed: %s (rc=%d, retrying in %ds)",
cs_strerror(cs_rc), cs_rc, retries);
sleep(retries);
}
} while ((retries < 5) && (cs_rc != CS_OK));
if (cs_rc != CS_OK) {
crm_warn("Assuming Corosync does not have node list: "
"CMAP connection failed (%s) " QB_XS " rc=%d",
cs_strerror(cs_rc), cs_rc);
return false;
}
// Get CMAP connection file descriptor
cs_rc = cmap_fd_get(cmap_handle, &fd);
if (cs_rc != CS_OK) {
crm_warn("Assuming Corosync does not have node list: "
"CMAP unusable (%s) " QB_XS " rc=%d",
cs_strerror(cs_rc), cs_rc);
goto bail;
}
// Check whether CMAP connection is authentic (i.e. provided by root)
rc = crm_ipc_is_authentic_process(fd, (uid_t) 0, (gid_t) 0,
&found_pid, &found_uid, &found_gid);
if (rc == 0) {
crm_warn("Assuming Corosync does not have node list: "
"CMAP provider is inauthentic "
QB_XS " pid=%lld uid=%lld gid=%lld",
(long long) PCMK__SPECIAL_PID_AS_0(found_pid),
(long long) found_uid, (long long) found_gid);
goto bail;
} else if (rc < 0) {
crm_warn("Assuming Corosync does not have node list: "
"Could not verify CMAP authenticity (%s) " QB_XS " rc=%d",
pcmk_strerror(rc), rc);
goto bail;
}
// Check whether nodelist section is presetn
cs_rc = cmap_iter_init(cmap_handle, "nodelist", &iter_handle);
if (cs_rc != CS_OK) {
crm_warn("Assuming Corosync does not have node list: "
"CMAP not readable (%s) " QB_XS " rc=%d",
cs_strerror(cs_rc), cs_rc);
goto bail;
}
cs_rc = cmap_iter_next(cmap_handle, iter_handle, key_name, NULL, NULL);
if (cs_rc == CS_OK) {
result = true;
}
cmap_iter_finalize(cmap_handle, iter_handle);
got_result = true;
crm_debug("Corosync %s node list", (result? "has" : "does not have"));
bail:
cmap_finalize(cmap_handle);
return result;
}
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Sat, Nov 23, 4:50 PM (14 h, 22 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1014874
Default Alt Text
(205 KB)
Attached To
Mode
rP Pacemaker
Attached
Detach File
Event Timeline
Log In to Comment