Page MenuHomeClusterLabs Projects

No OneTemporary

This file is larger than 256 KB, so syntax highlighting was skipped.
diff --git a/daemons/attrd/attrd_attributes.c b/daemons/attrd/attrd_attributes.c
index fdc238375e..a18076f7a3 100644
--- a/daemons/attrd/attrd_attributes.c
+++ b/daemons/attrd/attrd_attributes.c
@@ -1,284 +1,476 @@
/*
- * Copyright 2013-2024 the Pacemaker project contributors
+ * Copyright 2013-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <errno.h>
#include <stdbool.h>
#include <stdlib.h>
#include <glib.h>
#include <crm/common/logging.h>
#include <crm/common/results.h>
#include <crm/common/strings_internal.h>
#include <crm/common/xml.h>
#include "pacemaker-attrd.h"
static attribute_t *
attrd_create_attribute(xmlNode *xml)
{
int is_private = 0;
long long dampen = 0;
const char *name = crm_element_value(xml, PCMK__XA_ATTR_NAME);
const char *set_type = crm_element_value(xml, PCMK__XA_ATTR_SET_TYPE);
const char *dampen_s = crm_element_value(xml, PCMK__XA_ATTR_DAMPENING);
attribute_t *a = NULL;
if (set_type == NULL) {
set_type = PCMK_XE_INSTANCE_ATTRIBUTES;
}
/* Set type is meaningful only when writing to the CIB. Private
* attributes are not written.
*/
crm_element_value_int(xml, PCMK__XA_ATTR_IS_PRIVATE, &is_private);
if (!is_private && !pcmk__str_any_of(set_type,
PCMK_XE_INSTANCE_ATTRIBUTES,
PCMK_XE_UTILIZATION, NULL)) {
crm_warn("Ignoring attribute %s with invalid set type %s",
pcmk__s(name, "(unidentified)"), set_type);
return NULL;
}
a = pcmk__assert_alloc(1, sizeof(attribute_t));
a->id = pcmk__str_copy(name);
a->set_type = pcmk__str_copy(set_type);
a->set_id = crm_element_value_copy(xml, PCMK__XA_ATTR_SET);
a->user = crm_element_value_copy(xml, PCMK__XA_ATTR_USER);
a->values = pcmk__strikey_table(NULL, attrd_free_attribute_value);
if (is_private) {
attrd_set_attr_flags(a, attrd_attr_is_private);
}
if (dampen_s != NULL) {
dampen = crm_get_msec(dampen_s);
}
if (dampen > 0) {
a->timeout_ms = (int) QB_MIN(dampen, INT_MAX);
a->timer = attrd_add_timer(a->id, a->timeout_ms, a);
} else if (dampen < 0) {
crm_warn("Ignoring invalid delay %s for attribute %s", dampen_s, a->id);
}
crm_trace("Created attribute %s with %s write delay and %s CIB user",
a->id,
((dampen > 0)? pcmk__readable_interval(a->timeout_ms) : "no"),
pcmk__s(a->user, "default"));
g_hash_table_replace(attributes, a->id, a);
return a;
}
static int
attrd_update_dampening(attribute_t *a, xmlNode *xml, const char *attr)
{
const char *dvalue = crm_element_value(xml, PCMK__XA_ATTR_DAMPENING);
long long dampen = 0;
if (dvalue == NULL) {
crm_warn("Could not update %s: peer did not specify value for delay",
attr);
return EINVAL;
}
dampen = crm_get_msec(dvalue);
if (dampen < 0) {
crm_warn("Could not update %s: invalid delay value %dms (%s)",
attr, dampen, dvalue);
return EINVAL;
}
if (a->timeout_ms != dampen) {
mainloop_timer_del(a->timer);
a->timeout_ms = (int) QB_MIN(dampen, INT_MAX);
if (dampen > 0) {
a->timer = attrd_add_timer(attr, a->timeout_ms, a);
crm_info("Update attribute %s delay to %dms (%s)",
attr, dampen, dvalue);
} else {
a->timer = NULL;
crm_info("Update attribute %s to remove delay", attr);
}
/* If dampening changed, do an immediate write-out,
* otherwise repeated dampening changes would prevent write-outs
*/
attrd_write_or_elect_attribute(a);
}
return pcmk_rc_ok;
}
GHashTable *attributes = NULL;
/*!
* \internal
* \brief Create an XML representation of an attribute for use in peer messages
*
* \param[in,out] parent Create attribute XML as child element of this
* \param[in] a Attribute to represent
* \param[in] v Attribute value to represent
* \param[in] force_write If true, value should be written even if unchanged
*
* \return XML representation of attribute
*/
xmlNode *
attrd_add_value_xml(xmlNode *parent, const attribute_t *a,
const attribute_value_t *v, bool force_write)
{
xmlNode *xml = pcmk__xe_create(parent, __func__);
crm_xml_add(xml, PCMK__XA_ATTR_NAME, a->id);
crm_xml_add(xml, PCMK__XA_ATTR_SET_TYPE, a->set_type);
crm_xml_add(xml, PCMK__XA_ATTR_SET, a->set_id);
crm_xml_add(xml, PCMK__XA_ATTR_USER, a->user);
crm_xml_add(xml, PCMK__XA_ATTR_HOST, v->nodename);
/* @COMPAT Prior to 2.1.10 and 3.0.1, the node's cluster ID was added
* instead of its XML ID. For Corosync and Pacemaker Remote nodes, those are
* the same, but if we ever support node XML IDs that differ from their
* cluster IDs, we will have to drop support for rolling upgrades from
* versions before those.
*/
crm_xml_add(xml, PCMK__XA_ATTR_HOST_ID, attrd_get_node_xml_id(v->nodename));
crm_xml_add(xml, PCMK__XA_ATTR_VALUE, v->current);
crm_xml_add_int(xml, PCMK__XA_ATTR_DAMPENING,
pcmk__timeout_ms2s(a->timeout_ms));
crm_xml_add_int(xml, PCMK__XA_ATTR_IS_PRIVATE,
pcmk_is_set(a->flags, attrd_attr_is_private));
crm_xml_add_int(xml, PCMK__XA_ATTR_IS_REMOTE,
pcmk_is_set(v->flags, attrd_value_remote));
crm_xml_add_int(xml, PCMK__XA_ATTRD_IS_FORCE_WRITE, force_write);
return xml;
}
void
attrd_clear_value_seen(void)
{
GHashTableIter aIter;
GHashTableIter vIter;
attribute_t *a;
attribute_value_t *v = NULL;
g_hash_table_iter_init(&aIter, attributes);
while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) {
g_hash_table_iter_init(&vIter, a->values);
while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) {
attrd_clear_value_flags(v, attrd_value_from_peer);
}
}
}
attribute_t *
attrd_populate_attribute(xmlNode *xml, const char *attr)
{
attribute_t *a = NULL;
bool update_both = false;
const char *op = crm_element_value(xml, PCMK_XA_TASK);
// NULL because PCMK__ATTRD_CMD_SYNC_RESPONSE has no PCMK_XA_TASK
update_both = pcmk__str_eq(op, PCMK__ATTRD_CMD_UPDATE_BOTH,
pcmk__str_null_matches);
// Look up or create attribute entry
a = g_hash_table_lookup(attributes, attr);
if (a == NULL) {
if (update_both || pcmk__str_eq(op, PCMK__ATTRD_CMD_UPDATE, pcmk__str_none)) {
a = attrd_create_attribute(xml);
if (a == NULL) {
return NULL;
}
} else {
crm_warn("Could not update %s: attribute not found", attr);
return NULL;
}
}
// Update attribute dampening
if (update_both || pcmk__str_eq(op, PCMK__ATTRD_CMD_UPDATE_DELAY, pcmk__str_none)) {
int rc = attrd_update_dampening(a, xml, attr);
if (rc != pcmk_rc_ok || !update_both) {
return NULL;
}
}
return a;
}
/*!
* \internal
* \brief Get the XML ID used to write out an attribute set
*
* \param[in] attr Attribute to get set ID for
* \param[in] node_state_id XML ID of node state that attribute value is for
*
* \return Newly allocated string with XML ID to use for \p attr set
*/
char *
attrd_set_id(const attribute_t *attr, const char *node_state_id)
{
char *set_id = NULL;
pcmk__assert((attr != NULL) && (node_state_id != NULL));
if (pcmk__str_empty(attr->set_id)) {
/* @COMPAT This should really take the set type into account. Currently
* we use the same XML ID for transient attributes and utilization
* attributes. It doesn't cause problems because the status section is
* not limited by the schema in any way, but it's still unfortunate.
* For backward compatibility reasons, we can't change this.
*/
set_id = crm_strdup_printf("%s-%s", PCMK_XE_STATUS, node_state_id);
} else {
/* @COMPAT When the user specifies a set ID for an attribute, it is the
* same for every node. That is less than ideal, but again, the schema
* doesn't enforce anything for the status section. We couldn't change
* it without allowing the set ID to vary per value rather than per
* attribute, which would break backward compatibility, pose design
* challenges, and potentially cause problems in rolling upgrades.
*/
set_id = pcmk__str_copy(attr->set_id);
}
pcmk__xml_sanitize_id(set_id);
return set_id;
}
/*!
* \internal
* \brief Get the XML ID used to write out an attribute value
*
* \param[in] attr Attribute to get value XML ID for
* \param[in] node_state_id UUID of node that attribute value is for
*
* \return Newly allocated string with XML ID of \p attr value
*/
char *
attrd_nvpair_id(const attribute_t *attr, const char *node_state_id)
{
char *nvpair_id = NULL;
if (attr->set_id != NULL) {
nvpair_id = crm_strdup_printf("%s-%s", attr->set_id, attr->id);
} else {
nvpair_id = crm_strdup_printf(PCMK_XE_STATUS "-%s-%s",
node_state_id, attr->id);
}
pcmk__xml_sanitize_id(nvpair_id);
return nvpair_id;
}
+
+/*!
+ * \internal
+ * \brief Check whether an attribute is one that must be written to the CIB
+ *
+ * \param[in] a Attribute to check
+ *
+ * \return false if we are in standalone mode or \p a is private, otherwise true
+ */
+bool
+attrd_for_cib(const attribute_t *a)
+{
+ return !stand_alone && (a != NULL)
+ && !pcmk_is_set(a->flags, attrd_attr_is_private);
+}
+
+/*!
+ * \internal
+ * \brief Drop NULL attribute values as indicated by given function
+ *
+ * Drop all NULL node attribute values that a given function indicates should
+ * be, based on the XML ID of an element that was removed from the CIB.
+ *
+ * \param[in] cib_id ID of XML element that was removed from CIB
+ * (a name/value pair, an attribute set, or a node state)
+ * \param[in] set_type If not NULL, drop only attributes with this set type
+ * \param[in] func Call this function for every attribute/value
+ * combination; keep the value if it returns 0, drop
+ * the value and keep checking the attribute's other
+ * values if it returns 1, or drop value and stop checking
+ * the attribute's further values if it returns 2
+ */
+static void
+drop_removed_values(const char *cib_id, const char *set_type,
+ int (*func)(const attribute_t *, const attribute_value_t *,
+ const char *))
+{
+ attribute_t *a = NULL;
+ GHashTableIter attr_iter;
+ const char *entry_type = pcmk__s(set_type, "status entry"); // for log
+
+ CRM_CHECK((cib_id != NULL) && (func != NULL), return);
+
+ // Check every attribute ...
+ g_hash_table_iter_init(&attr_iter, attributes);
+ while (g_hash_table_iter_next(&attr_iter, NULL, (gpointer *) &a)) {
+ attribute_value_t *v = NULL;
+ GHashTableIter value_iter;
+
+ if (!attrd_for_cib(a)
+ || ((set_type != NULL)
+ && !pcmk__str_eq(a->set_type, set_type, pcmk__str_none))) {
+ continue;
+ }
+
+ // Check every value of the attribute ...
+ g_hash_table_iter_init(&value_iter, a->values);
+ while (g_hash_table_iter_next(&value_iter, NULL, (gpointer *) &v)) {
+ int rc = 0;
+
+ if (v->current != NULL) {
+ continue;
+ }
+
+ if (attrd_get_node_xml_id(v->nodename) == NULL) {
+ /* This shouldn't be a significant issue, since we will know the
+ * XML ID if *any* attribute for the node has ever been written.
+ */
+ crm_trace("Ignoring %s[%s] after CIB erasure of %s %s because "
+ "its node XML ID is unknown (possibly attribute was "
+ "never written to CIB)",
+ a->id, v->nodename, entry_type, cib_id);
+ continue;
+ }
+
+ rc = func(a, v, cib_id);
+ if (rc > 0) {
+ crm_debug("Dropping %s[%s] after CIB erasure of %s %s",
+ a->id, v->nodename, entry_type, cib_id);
+ g_hash_table_iter_remove(&value_iter);
+ if (rc > 1) {
+ return;
+ }
+ }
+ }
+ }
+}
+
+/*!
+ * \internal
+ * \brief Check whether an attribute value has a given XML ID
+ *
+ * \param[in] a Attribute being checked
+ * \param[in] v Attribute value being checked
+ * \param[in] cib_id ID of name/value pair element that was removed from CIB
+ *
+ * \return 2 if value matches XML ID, otherwise 0
+ */
+static int
+nvpair_matches(const attribute_t *a, const attribute_value_t *v,
+ const char *cib_id)
+{
+ char *id = attrd_nvpair_id(a, attrd_get_node_xml_id(v->nodename));
+
+ /* The attribute manager doesn't enforce uniqueness for value XML IDs
+ * (schema validation could be disabled), but in practice they should be,
+ * so we can stop looping if we find a match.
+ */
+ int rc = pcmk__str_eq(id, cib_id, pcmk__str_none)? 2 : 0;
+
+ free(id);
+ return rc;
+}
+
+/*!
+ * \internal
+ * \brief Drop attribute value corresponding to given removed CIB entry
+ *
+ * \param[in] cib_id ID of name/value pair element that was removed from CIB
+ */
+void
+attrd_drop_removed_value(const char *cib_id)
+{
+ drop_removed_values(cib_id, NULL, nvpair_matches);
+}
+
+/*!
+ * \internal
+ * \brief Check whether an attribute value has a given attribute set ID
+ *
+ * \param[in] a Attribute being checked
+ * \param[in] v Attribute value being checked
+ * \param[in] cib_id ID of attribute set that was removed from CIB
+ *
+ * \return 1 if value matches XML ID, otherwise 0
+ */
+static int
+set_id_matches(const attribute_t *a, const attribute_value_t *v,
+ const char *cib_id)
+{
+ char *id = attrd_set_id(a, attrd_get_node_xml_id(v->nodename));
+ int rc = pcmk__str_eq(id, cib_id, pcmk__str_none)? 1: 0;
+
+ free(id);
+ return rc;
+}
+
+/*!
+ * \internal
+ * \brief Drop all removed attribute values for an attribute set
+ *
+ * \param[in] set_type XML element name of set that was removed
+ * \param[in] cib_id ID of attribute set that was removed from CIB
+ */
+void
+attrd_drop_removed_set(const char *set_type, const char *cib_id)
+{
+ drop_removed_values(cib_id, set_type, set_id_matches);
+}
+
+/*!
+ * \internal
+ * \brief Check whether an attribute value has a given node state XML ID
+ *
+ * \param[in] a Attribute being checked
+ * \param[in] v Attribute value being checked
+ * \param[in] cib_id ID of node state that was removed from CIB
+ *
+ * \return 1 if value matches removed ID, otherwise 0
+ */
+static int
+node_matches(const attribute_t *a, const attribute_value_t *v,
+ const char *cib_id)
+{
+ if (pcmk__str_eq(cib_id, attrd_get_node_xml_id(v->nodename),
+ pcmk__str_none)) {
+ return 1;
+ }
+ return 0;
+}
+
+/*!
+ * \internal
+ * \brief Drop all removed attribute values for a node
+ *
+ * \param[in] cib_id ID of node state that was removed from CIB
+ */
+void
+attrd_drop_removed_values(const char *cib_id)
+{
+ drop_removed_values(cib_id, NULL, node_matches);
+}
diff --git a/daemons/attrd/attrd_cib.c b/daemons/attrd/attrd_cib.c
index 4231e4a668..588e779348 100644
--- a/daemons/attrd/attrd_cib.c
+++ b/daemons/attrd/attrd_cib.c
@@ -1,712 +1,871 @@
/*
- * Copyright 2013-2024 the Pacemaker project contributors
+ * Copyright 2013-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
+#include <sys/types.h> // for regex.h
#include <errno.h>
+#include <string.h> // strndup()
+#include <regex.h> // regcomp(), regexec(), regex_t, regmatch_t, regoff_t
#include <stdbool.h>
#include <stdlib.h>
#include <glib.h>
#include <crm/cib/internal.h> // cib__*
#include <crm/common/logging.h>
#include <crm/common/results.h>
#include <crm/common/strings_internal.h>
#include <crm/common/xml.h>
#include <crm/cluster/internal.h> // pcmk__get_node()
#include "pacemaker-attrd.h"
static int last_cib_op_done = 0;
static void write_attribute(attribute_t *a, bool ignore_delay);
static void
attrd_cib_destroy_cb(gpointer user_data)
{
cib_t *cib = user_data;
cib->cmds->signoff(cib);
- if (attrd_shutting_down(false)) {
+ if (attrd_shutting_down()) {
crm_info("Disconnected from the CIB manager");
} else {
// @TODO This should trigger a reconnect, not a shutdown
crm_crit("Lost connection to the CIB manager, shutting down");
attrd_exit_status = CRM_EX_DISCONNECT;
attrd_shutdown(0);
}
}
+/* In a CIB patchset, deletions have the XPath to the deleted element, like:
+ *
+ * /cib/status/node_state[@id='X']
+ *
+ * This regular expression checks whether a node state was deleted, or transient
+ * attributes beneath that, or an attribute set beneath that, or a name/value
+ * pair beneath that.
+ */
+#define ID_REGEX "\\[@" PCMK_XA_ID "='([^']+)'\\]"
+#define DELETION_REGEX "/" PCMK_XE_CIB "/" PCMK_XE_STATUS \
+ "/" PCMK__XE_NODE_STATE ID_REGEX \
+ "(/" PCMK__XE_TRANSIENT_ATTRIBUTES ID_REGEX ")?" \
+ "(/([^[/]+)" ID_REGEX ")?" "(/" PCMK_XE_NVPAIR ID_REGEX ")?$"
+
+// Number of parenthesized submatches in DELETION_REGEX plus 1 for entire match
+#define DELETION_NMATCH 9
+
+/*!
+ * \internal
+ * \brief Duplicate a regular expression submatch
+ *
+ * \param[in] string String being matched against a regular expression
+ * \param[in] matches Submatches, as determined by regexec()
+ * \param[in] submatch Desired index into \p matches
+ *
+ * \return Newly allocated string with desired submatch
+ * \note This asserts on allocation failure, so the result is guaranteed to be
+ * non-NULL.
+ */
+static char *
+re_submatch(const char *string, regmatch_t matches[], size_t submatch)
+{
+ const regoff_t start = matches[submatch].rm_so;
+ char *match = strndup(string + start, matches[submatch].rm_eo - start);
+
+ pcmk__mem_assert(match);
+ return match;
+}
+
+/*!
+ * \internal
+ * \brief Check a patchset change for deletion of node attribute values
+ *
+ * \param[in] xml Patchset change element
+ * \param[in] data Ignored
+ *
+ * \return pcmk_rc_ok (to always continue to next patchset change)
+ */
+static int
+drop_values_in_deletion(xmlNode *xml, void *data)
+{
+ const char *value = NULL;
+ char *id = NULL;
+ regmatch_t matches[DELETION_NMATCH];
+
+ static regex_t re;
+ static bool re_compiled = false;
+
+ // Skip this change if it does not look like a deletion
+ value = crm_element_value(xml, PCMK_XA_OPERATION);
+ if (!pcmk__str_eq(value, "delete", pcmk__str_none)) {
+ return pcmk_rc_ok;
+ }
+ value = crm_element_value(xml, PCMK_XA_PATH);
+ if (value == NULL) {
+ crm_warn("Ignoring malformed deletion in "
+ "CIB change notification: No " PCMK_XA_PATH);
+ return pcmk_rc_ok;
+ }
+
+ // Check whether deleted XPath could contain node attribute values
+ if (!re_compiled) {
+ CRM_CHECK(regcomp(&re, DELETION_REGEX, REG_EXTENDED) == 0,
+ return pcmk_rc_ok);
+ re_compiled = true;
+ }
+ if (regexec(&re, value, DELETION_NMATCH, matches, 0) != 0) {
+ return pcmk_rc_ok; // This is not an attribute deletion
+ }
+
+ /* matches[0] = entire node state match
+ * matches[1] = node state ID
+ *
+ * Optional:
+ * matches[2] = transient attributes element with ID
+ * matches[3] = transient attributes ID
+ * matches[4] = attribute set element with ID
+ * matches[5] = attribute set element name
+ * matches[6] = attribute set ID
+ * matches[7] = name/value pair element with ID
+ * matches[8] = name/value pair ID
+ */
+
+ // If we get here, we must have matched at least node state and its ID
+ CRM_CHECK((matches[0].rm_so == 0) && (matches[1].rm_so > 0),
+ return pcmk_rc_ok);
+
+ /* Check whether all of node's attributes were deleted (if no matches[2],
+ * entire node state was deleted; if no matches[4], entire
+ * transient attributes section was deleted)
+ */
+ if ((matches[2].rm_so < 0) || (matches[4].rm_so < 0)) {
+ id = re_submatch(value, matches, 1);
+ attrd_drop_removed_values(id);
+ free(id);
+ return pcmk_rc_ok;
+ }
+
+ /* We matched transient attributes, so we must have its ID, as well as an
+ * attribute set, so we must have its element name and ID
+ */
+ CRM_CHECK((matches[3].rm_so > 0) && (matches[5].rm_so > 0)
+ && (matches[6].rm_so > 0), return pcmk_rc_ok);
+
+ // Check whether the entire set was deleted
+ if (matches[7].rm_so < 0) {
+ char *set_type = re_submatch(value, matches, 5);
+
+ id = re_submatch(value, matches, 6);
+ attrd_drop_removed_set(set_type, id);
+ free(id);
+ free(set_type);
+ return pcmk_rc_ok;
+ }
+
+ // We matched a single name/value pair, so we must have its ID
+ CRM_CHECK(matches[8].rm_so > 0, return pcmk_rc_ok);
+
+ // Drop the one value
+ id = re_submatch(value, matches, 8);
+ attrd_drop_removed_value(id);
+ free(id);
+ return pcmk_rc_ok;
+}
+
static void
attrd_cib_updated_cb(const char *event, xmlNode *msg)
{
const xmlNode *patchset = NULL;
const char *client_name = NULL;
bool status_changed = false;
if (cib__get_notify_patchset(msg, &patchset) != pcmk_rc_ok) {
return;
}
if (pcmk__cib_element_in_patchset(patchset, PCMK_XE_ALERTS)) {
- if (attrd_shutting_down(true)) {
+ if (attrd_shutting_down()) {
crm_debug("Ignoring alerts change in CIB during shutdown");
} else {
mainloop_set_trigger(attrd_config_read);
}
}
status_changed = pcmk__cib_element_in_patchset(patchset, PCMK_XE_STATUS);
client_name = crm_element_value(msg, PCMK__XA_CIB_CLIENTNAME);
if (!cib__client_triggers_refresh(client_name)) {
/* This change came from a source that ensured the CIB is consistent
* with our attributes table, so we don't need to write anything out.
+ * If a removed attribute has been erased, we can forget it now.
*/
+ int format = 1;
+
+ if ((crm_element_value_int(patchset, PCMK_XA_FORMAT, &format) != 0)
+ || (format != 2)) {
+ crm_warn("Can't handle CIB patch format %d", format);
+ return;
+ }
+
+ /* This won't modify patchset, but we need to break const to match the
+ * function signature.
+ */
+ pcmk__xe_foreach_child((xmlNode *) patchset, PCMK_XE_CHANGE,
+ drop_values_in_deletion, NULL);
return;
}
if (!attrd_election_won()) {
// Don't write attributes if we're not the writer
return;
}
if (status_changed
|| pcmk__cib_element_in_patchset(patchset, PCMK_XE_NODES)) {
- if (attrd_shutting_down(true)) {
+ if (attrd_shutting_down()) {
crm_debug("Ignoring node change in CIB during shutdown");
return;
}
/* An unsafe client modified the PCMK_XE_NODES or PCMK_XE_STATUS
* section. Write transient attributes to ensure they're up-to-date in
* the CIB.
*/
if (client_name == NULL) {
client_name = crm_element_value(msg, PCMK__XA_CIB_CLIENTID);
}
crm_notice("Updating all attributes after %s event triggered by %s",
event, pcmk__s(client_name, "unidentified client"));
attrd_write_attributes(attrd_write_all);
}
}
int
attrd_cib_connect(int max_retry)
{
static int attempts = 0;
int rc = -ENOTCONN;
the_cib = cib_new();
if (the_cib == NULL) {
return -ENOTCONN;
}
do {
if (attempts > 0) {
sleep(attempts);
}
attempts++;
crm_debug("Connection attempt %d to the CIB manager", attempts);
rc = the_cib->cmds->signon(the_cib, crm_system_name, cib_command);
} while ((rc != pcmk_ok) && (attempts < max_retry));
if (rc != pcmk_ok) {
crm_err("Connection to the CIB manager failed: %s " QB_XS " rc=%d",
pcmk_strerror(rc), rc);
goto cleanup;
}
crm_debug("Connected to the CIB manager after %d attempts", attempts);
rc = the_cib->cmds->set_connection_dnotify(the_cib, attrd_cib_destroy_cb);
if (rc != pcmk_ok) {
crm_err("Could not set disconnection callback");
goto cleanup;
}
rc = the_cib->cmds->add_notify_callback(the_cib,
PCMK__VALUE_CIB_DIFF_NOTIFY,
attrd_cib_updated_cb);
if (rc != pcmk_ok) {
crm_err("Could not set CIB notification callback");
goto cleanup;
}
return pcmk_ok;
cleanup:
cib__clean_up_connection(&the_cib);
return -ENOTCONN;
}
void
attrd_cib_disconnect(void)
{
CRM_CHECK(the_cib != NULL, return);
the_cib->cmds->del_notify_callback(the_cib, PCMK__VALUE_CIB_DIFF_NOTIFY,
attrd_cib_updated_cb);
cib__clean_up_connection(&the_cib);
mainloop_destroy_trigger(attrd_config_read);
}
static void
attrd_erase_cb(xmlNode *msg, int call_id, int rc, xmlNode *output,
void *user_data)
{
const char *node = pcmk__s((const char *) user_data, "a node");
if (rc == pcmk_ok) {
crm_info("Cleared transient node attributes for %s from CIB", node);
} else {
crm_err("Unable to clear transient node attributes for %s from CIB: %s",
node, pcmk_strerror(rc));
}
}
#define XPATH_TRANSIENT "//" PCMK__XE_NODE_STATE \
"[@" PCMK_XA_UNAME "='%s']" \
"/" PCMK__XE_TRANSIENT_ATTRIBUTES
/*!
* \internal
* \brief Wipe all transient node attributes for a node from the CIB
*
* \param[in] node Node to clear attributes for
*/
void
attrd_cib_erase_transient_attrs(const char *node)
{
int call_id = 0;
char *xpath = NULL;
CRM_CHECK(node != NULL, return);
xpath = crm_strdup_printf(XPATH_TRANSIENT, node);
crm_debug("Clearing transient node attributes for %s from CIB using %s",
node, xpath);
call_id = the_cib->cmds->remove(the_cib, xpath, NULL, cib_xpath);
free(xpath);
the_cib->cmds->register_callback_full(the_cib, call_id, 120, FALSE,
pcmk__str_copy(node),
"attrd_erase_cb", attrd_erase_cb,
free);
}
/*!
* \internal
* \brief Prepare the CIB after cluster is connected
*/
void
attrd_cib_init(void)
{
/* We have no attribute values in memory, so wipe the CIB to match. This is
- * normally done by the DC's controller when this node leaves the cluster, but
- * this handles the case where the node restarted so quickly that the
+ * normally done by the writer when this node leaves the cluster, but this
+ * handles the case where the node restarted so quickly that the
* cluster layer didn't notice.
*
* \todo If the attribute manager respawns after crashing (see
* PCMK_ENV_RESPAWNED), ideally we'd skip this and sync our attributes
* from the writer. However, currently we reject any values for us
* that the writer has, in attrd_peer_update().
*/
attrd_cib_erase_transient_attrs(attrd_cluster->priv->node_name);
// Set a trigger for reading the CIB (for the alerts section)
attrd_config_read = mainloop_add_trigger(G_PRIORITY_HIGH, attrd_read_options, NULL);
// Always read the CIB at start-up
mainloop_set_trigger(attrd_config_read);
}
static gboolean
attribute_timer_cb(gpointer data)
{
attribute_t *a = data;
crm_trace("Dampen interval expired for %s", a->id);
attrd_write_or_elect_attribute(a);
return FALSE;
}
static void
attrd_cib_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data)
{
int level = LOG_ERR;
GHashTableIter iter;
const char *peer = NULL;
attribute_value_t *v = NULL;
char *name = user_data;
attribute_t *a = g_hash_table_lookup(attributes, name);
if(a == NULL) {
crm_info("Attribute %s no longer exists", name);
return;
}
a->update = 0;
if (rc == pcmk_ok && call_id < 0) {
rc = call_id;
}
switch (rc) {
case pcmk_ok:
level = LOG_INFO;
last_cib_op_done = call_id;
if (a->timer && !a->timeout_ms) {
// Remove temporary dampening for failed writes
mainloop_timer_del(a->timer);
a->timer = NULL;
}
break;
case -pcmk_err_diff_failed: /* When an attr changes while the CIB is syncing */
case -ETIME: /* When an attr changes while there is a DC election */
case -ENXIO: /* When an attr changes while the CIB is syncing a
* newer config from a node that just came up
*/
level = LOG_WARNING;
break;
}
do_crm_log(level, "CIB update %d result for %s: %s " QB_XS " rc=%d",
call_id, a->id, pcmk_strerror(rc), rc);
g_hash_table_iter_init(&iter, a->values);
while (g_hash_table_iter_next(&iter, (gpointer *) & peer, (gpointer *) & v)) {
if (rc == pcmk_ok) {
crm_info("* Wrote %s[%s]=%s",
a->id, peer, pcmk__s(v->requested, "(unset)"));
pcmk__str_update(&(v->requested), NULL);
} else {
do_crm_log(level, "* Could not write %s[%s]=%s",
a->id, peer, pcmk__s(v->requested, "(unset)"));
/* Reattempt write below if we are still the writer */
attrd_set_attr_flags(a, attrd_attr_changed);
}
}
if (pcmk_is_set(a->flags, attrd_attr_changed) && attrd_election_won()) {
if (rc == pcmk_ok) {
/* We deferred a write of a new update because this update was in
* progress. Write out the new value without additional delay.
*/
crm_debug("Pending update for %s can be written now", a->id);
write_attribute(a, false);
/* We're re-attempting a write because the original failed; delay
* the next attempt so we don't potentially flood the CIB manager
* and logs with a zillion attempts per second.
*
* @TODO We could elect a new writer instead. However, we'd have to
* somehow downgrade our vote, and we'd still need something like this
* if all peers similarly fail to write this attribute (which may
* indicate a corrupted attribute entry rather than a CIB issue).
*/
} else if (a->timer) {
// Attribute has a dampening value, so use that as delay
if (!mainloop_timer_running(a->timer)) {
crm_trace("Delayed re-attempted write for %s by %s",
name, pcmk__readable_interval(a->timeout_ms));
mainloop_timer_start(a->timer);
}
} else {
/* Set a temporary dampening of 2 seconds (timer will continue
* to exist until the attribute's dampening gets set or the
* write succeeds).
*/
a->timer = attrd_add_timer(a->id, 2000, a);
mainloop_timer_start(a->timer);
}
}
}
/*!
* \internal
* \brief Add a set-attribute update request to the current CIB transaction
*
* \param[in] attr Attribute to update
* \param[in] attr_id ID of attribute to update
* \param[in] node_id ID of node for which to update attribute value
* \param[in] set_id ID of attribute set
* \param[in] value New value for attribute
*
* \return Standard Pacemaker return code
*/
static int
add_set_attr_update(const attribute_t *attr, const char *attr_id,
const char *node_id, const char *set_id, const char *value)
{
xmlNode *update = pcmk__xe_create(NULL, PCMK__XE_NODE_STATE);
xmlNode *child = update;
int rc = ENOMEM;
crm_xml_add(child, PCMK_XA_ID, node_id);
child = pcmk__xe_create(child, PCMK__XE_TRANSIENT_ATTRIBUTES);
crm_xml_add(child, PCMK_XA_ID, node_id);
child = pcmk__xe_create(child, attr->set_type);
crm_xml_add(child, PCMK_XA_ID, set_id);
child = pcmk__xe_create(child, PCMK_XE_NVPAIR);
crm_xml_add(child, PCMK_XA_ID, attr_id);
crm_xml_add(child, PCMK_XA_NAME, attr->id);
crm_xml_add(child, PCMK_XA_VALUE, value);
rc = the_cib->cmds->modify(the_cib, PCMK_XE_STATUS, update,
cib_can_create|cib_transaction);
rc = pcmk_legacy2rc(rc);
pcmk__xml_free(update);
return rc;
}
/*!
* \internal
* \brief Add an unset-attribute update request to the current CIB transaction
*
* \param[in] attr Attribute to update
* \param[in] attr_id ID of attribute to update
* \param[in] node_id ID of node for which to update attribute value
* \param[in] set_id ID of attribute set
*
* \return Standard Pacemaker return code
*/
static int
add_unset_attr_update(const attribute_t *attr, const char *attr_id,
const char *node_id, const char *set_id)
{
char *xpath = crm_strdup_printf("/" PCMK_XE_CIB
"/" PCMK_XE_STATUS
"/" PCMK__XE_NODE_STATE
"[@" PCMK_XA_ID "='%s']"
"/" PCMK__XE_TRANSIENT_ATTRIBUTES
"[@" PCMK_XA_ID "='%s']"
"/%s[@" PCMK_XA_ID "='%s']"
"/" PCMK_XE_NVPAIR
"[@" PCMK_XA_ID "='%s' "
"and @" PCMK_XA_NAME "='%s']",
node_id, node_id, attr->set_type, set_id,
attr_id, attr->id);
int rc = the_cib->cmds->remove(the_cib, xpath, NULL,
cib_xpath|cib_transaction);
free(xpath);
return pcmk_legacy2rc(rc);
}
/*!
* \internal
* \brief Add an attribute update request to the current CIB transaction
*
* \param[in] attr Attribute to update
* \param[in] value New value for attribute
* \param[in] node_id ID of node for which to update attribute value
*
* \return Standard Pacemaker return code
*/
static int
add_attr_update(const attribute_t *attr, const char *value, const char *node_id)
{
char *set_id = attrd_set_id(attr, node_id);
char *nvpair_id = attrd_nvpair_id(attr, node_id);
int rc = pcmk_rc_ok;
if (value == NULL) {
rc = add_unset_attr_update(attr, nvpair_id, node_id, set_id);
} else {
rc = add_set_attr_update(attr, nvpair_id, node_id, set_id, value);
}
free(set_id);
free(nvpair_id);
return rc;
}
static void
send_alert_attributes_value(attribute_t *a, GHashTable *t)
{
int rc = 0;
attribute_value_t *at = NULL;
GHashTableIter vIter;
g_hash_table_iter_init(&vIter, t);
while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & at)) {
const char *node_xml_id = attrd_get_node_xml_id(at->nodename);
rc = attrd_send_attribute_alert(at->nodename, node_xml_id,
a->id, at->current);
crm_trace("Sent alerts for %s[%s]=%s with node XML ID %s "
"(%s agents failed)",
a->id, at->nodename, at->current,
pcmk__s(node_xml_id, "unknown"),
((rc == 0)? "no" : ((rc == -1)? "some" : "all")));
}
}
static void
set_alert_attribute_value(GHashTable *t, attribute_value_t *v)
{
attribute_value_t *a_v = pcmk__assert_alloc(1, sizeof(attribute_value_t));
a_v->nodename = pcmk__str_copy(v->nodename);
a_v->current = pcmk__str_copy(v->current);
g_hash_table_replace(t, a_v->nodename, a_v);
}
mainloop_timer_t *
attrd_add_timer(const char *id, int timeout_ms, attribute_t *attr)
{
return mainloop_timer_add(id, timeout_ms, FALSE, attribute_timer_cb, attr);
}
/*!
* \internal
* \brief Write an attribute's values to the CIB if appropriate
*
* \param[in,out] a Attribute to write
* \param[in] ignore_delay If true, write attribute now regardless of any
* configured delay
*/
static void
write_attribute(attribute_t *a, bool ignore_delay)
{
int private_updates = 0, cib_updates = 0;
attribute_value_t *v = NULL;
GHashTableIter iter;
GHashTable *alert_attribute_value = NULL;
int rc = pcmk_ok;
- bool should_write = true;
+ bool should_write = attrd_for_cib(a);
if (a == NULL) {
return;
}
- // Private attributes (or any in standalone mode) are not written to the CIB
- if (stand_alone || pcmk_is_set(a->flags, attrd_attr_is_private)) {
- should_write = false;
- }
-
- /* If this attribute will be written to the CIB ... */
if (should_write) {
/* Defer the write if now's not a good time */
if (a->update && (a->update < last_cib_op_done)) {
crm_info("Write out of '%s' continuing: update %d considered lost",
a->id, a->update);
a->update = 0; // Don't log this message again
} else if (a->update) {
crm_info("Write out of '%s' delayed: update %d in progress",
a->id, a->update);
goto done;
} else if (mainloop_timer_running(a->timer)) {
if (ignore_delay) {
mainloop_timer_stop(a->timer);
crm_debug("Overriding '%s' write delay", a->id);
} else {
crm_info("Delaying write of '%s'", a->id);
goto done;
}
}
// Initiate a transaction for all the peer value updates
CRM_CHECK(the_cib != NULL, goto done);
the_cib->cmds->set_user(the_cib, a->user);
rc = the_cib->cmds->init_transaction(the_cib);
if (rc != pcmk_ok) {
crm_err("Failed to write %s (set %s): Could not initiate "
"CIB transaction",
a->id, pcmk__s(a->set_id, "unspecified"));
goto done;
}
}
/* The changed and force-write flags apply only to the next write,
* which this is, so clear them now. Also clear the "node unknown" flag
* because we will check whether it is known below and reset if appopriate.
*/
attrd_clear_attr_flags(a, attrd_attr_changed
|attrd_attr_force_write
|attrd_attr_node_unknown);
/* Make the table for the attribute trap */
alert_attribute_value = pcmk__strikey_table(NULL,
attrd_free_attribute_value);
/* Iterate over each peer value of this attribute */
g_hash_table_iter_init(&iter, a->values);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &v)) {
const char *node_xml_id = NULL;
const char *prev_xml_id = NULL;
+ pcmk__node_status_t *peer = NULL;
if (!should_write) {
private_updates++;
continue;
}
/* We need the node's CIB XML ID to write out its attributes, so look
* for it now. Check the node caches first, even if the ID was
* previously known (in case it changed), but use any previous value as
* a fallback.
*/
prev_xml_id = attrd_get_node_xml_id(v->nodename);
if (pcmk_is_set(v->flags, attrd_value_remote)) {
// A Pacemaker Remote node's XML ID is the same as its name
node_xml_id = v->nodename;
+ } else if (v->current == NULL) {
+ /* If a value was removed, check the caches for the node XML ID,
+ * but don't create a new cache entry. We don't want to re-create a
+ * purged node.
+ */
+ peer = pcmk__search_node_caches(0, v->nodename, prev_xml_id,
+ pcmk__node_search_any
+ |pcmk__node_search_cluster_cib);
+ node_xml_id = pcmk__cluster_get_xml_id(peer);
+ if (node_xml_id == NULL) {
+ node_xml_id = prev_xml_id;
+ }
+
} else {
// This creates a cluster node cache entry if none exists
- pcmk__node_status_t *peer = pcmk__get_node(0, v->nodename,
- prev_xml_id,
- pcmk__node_search_any);
-
+ peer = pcmk__get_node(0, v->nodename, prev_xml_id,
+ pcmk__node_search_any);
node_xml_id = pcmk__cluster_get_xml_id(peer);
if (node_xml_id == NULL) {
node_xml_id = prev_xml_id;
}
}
// Defer write if this is a cluster node that's never been seen
if (node_xml_id == NULL) {
attrd_set_attr_flags(a, attrd_attr_node_unknown);
crm_notice("Cannot write %s[%s]='%s' to CIB because node's XML ID "
"is unknown (will retry if learned)",
a->id, v->nodename, v->current);
continue;
}
if (!pcmk__str_eq(prev_xml_id, node_xml_id, pcmk__str_none)) {
crm_trace("Setting %s[%s] node XML ID to %s (was %s)",
a->id, v->nodename, node_xml_id,
pcmk__s(prev_xml_id, "unknown"));
attrd_set_node_xml_id(v->nodename, node_xml_id);
}
// Update this value as part of the CIB transaction we're building
rc = add_attr_update(a, v->current, node_xml_id);
if (rc != pcmk_rc_ok) {
crm_err("Couldn't add %s[%s]='%s' to CIB transaction: %s "
QB_XS " node XML ID %s",
- a->id, v->nodename, v->current, pcmk_rc_str(rc),
- node_xml_id);
+ a->id, v->nodename, pcmk__s(v->current, "(unset)"),
+ pcmk_rc_str(rc), node_xml_id);
continue;
}
crm_debug("Added %s[%s]=%s to CIB transaction (node XML ID %s)",
a->id, v->nodename, pcmk__s(v->current, "(unset)"),
node_xml_id);
cib_updates++;
/* Preservation of the attribute to transmit alert */
set_alert_attribute_value(alert_attribute_value, v);
// Save this value so we can log it when write completes
pcmk__str_update(&(v->requested), v->current);
}
if (private_updates) {
crm_info("Processed %d private change%s for %s (set %s)",
private_updates, pcmk__plural_s(private_updates),
a->id, pcmk__s(a->set_id, "unspecified"));
}
if (cib_updates > 0) {
char *id = pcmk__str_copy(a->id);
// Commit transaction
a->update = the_cib->cmds->end_transaction(the_cib, true, cib_none);
crm_info("Sent CIB request %d with %d change%s for %s (set %s)",
a->update, cib_updates, pcmk__plural_s(cib_updates),
a->id, pcmk__s(a->set_id, "unspecified"));
if (the_cib->cmds->register_callback_full(the_cib, a->update,
CIB_OP_TIMEOUT_S, FALSE, id,
"attrd_cib_callback",
attrd_cib_callback, free)) {
// Transmit alert of the attribute
+ // @TODO Do this in callback only if write was successful
send_alert_attributes_value(a, alert_attribute_value);
}
}
done:
// Discard transaction (if any)
if (the_cib != NULL) {
the_cib->cmds->end_transaction(the_cib, false, cib_none);
the_cib->cmds->set_user(the_cib, NULL);
}
if (alert_attribute_value != NULL) {
g_hash_table_destroy(alert_attribute_value);
}
}
/*!
* \internal
* \brief Write out attributes
*
* \param[in] options Group of enum attrd_write_options
*/
void
attrd_write_attributes(uint32_t options)
{
GHashTableIter iter;
attribute_t *a = NULL;
crm_debug("Writing out %s attributes",
pcmk_is_set(options, attrd_write_all)? "all" : "changed");
g_hash_table_iter_init(&iter, attributes);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & a)) {
if (!pcmk_is_set(options, attrd_write_all)
&& pcmk_is_set(a->flags, attrd_attr_node_unknown)) {
// Try writing this attribute again, in case peer ID was learned
attrd_set_attr_flags(a, attrd_attr_changed);
} else if (pcmk_is_set(a->flags, attrd_attr_force_write)) {
/* If the force_write flag is set, write the attribute. */
attrd_set_attr_flags(a, attrd_attr_changed);
}
if (pcmk_is_set(options, attrd_write_all) ||
pcmk_is_set(a->flags, attrd_attr_changed)) {
bool ignore_delay = pcmk_is_set(options, attrd_write_no_delay);
if (pcmk_is_set(a->flags, attrd_attr_force_write)) {
// Always ignore delay when forced write flag is set
ignore_delay = true;
}
write_attribute(a, ignore_delay);
} else {
crm_trace("Skipping unchanged attribute %s", a->id);
}
}
}
void
attrd_write_or_elect_attribute(attribute_t *a)
{
if (attrd_election_won()) {
write_attribute(a, false);
} else {
attrd_start_election_if_needed();
}
}
diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c
index e681fb24d4..af4bc015de 100644
--- a/daemons/attrd/attrd_corosync.c
+++ b/daemons/attrd/attrd_corosync.c
@@ -1,633 +1,645 @@
/*
- * Copyright 2013-2024 the Pacemaker project contributors
+ * Copyright 2013-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <errno.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#include <crm/cluster.h>
#include <crm/cluster/internal.h>
#include <crm/common/logging.h>
#include <crm/common/results.h>
#include <crm/common/strings_internal.h>
#include <crm/common/xml.h>
#include "pacemaker-attrd.h"
static xmlNode *
attrd_confirmation(int callid)
{
xmlNode *node = pcmk__xe_create(NULL, __func__);
crm_xml_add(node, PCMK__XA_T, PCMK__VALUE_ATTRD);
crm_xml_add(node, PCMK__XA_SRC, pcmk__cluster_local_node_name());
crm_xml_add(node, PCMK_XA_TASK, PCMK__ATTRD_CMD_CONFIRM);
crm_xml_add_int(node, PCMK__XA_CALL_ID, callid);
return node;
}
static void
attrd_peer_message(pcmk__node_status_t *peer, xmlNode *xml)
{
const char *election_op = crm_element_value(xml, PCMK__XA_CRM_TASK);
if (election_op) {
attrd_handle_election_op(peer, xml);
return;
}
- if (attrd_shutting_down(false)) {
+ if (attrd_shutting_down()) {
/* If we're shutting down, we want to continue responding to election
* ops as long as we're a cluster member (because our vote may be
* needed). Ignore all other messages.
*/
return;
} else {
pcmk__request_t request = {
.ipc_client = NULL,
.ipc_id = 0,
.ipc_flags = 0,
.peer = peer->name,
.xml = xml,
.call_options = 0,
.result = PCMK__UNKNOWN_RESULT,
};
request.op = crm_element_value_copy(request.xml, PCMK_XA_TASK);
CRM_CHECK(request.op != NULL, return);
attrd_handle_request(&request);
/* Having finished handling the request, check to see if the originating
* peer requested confirmation. If so, send that confirmation back now.
*/
if (pcmk__xe_attr_is_true(xml, PCMK__XA_CONFIRM) &&
!pcmk__str_eq(request.op, PCMK__ATTRD_CMD_CONFIRM, pcmk__str_none)) {
int callid = 0;
xmlNode *reply = NULL;
/* Add the confirmation ID for the message we are confirming to the
* response so the originating peer knows what they're a confirmation
* for.
*/
crm_element_value_int(xml, PCMK__XA_CALL_ID, &callid);
reply = attrd_confirmation(callid);
/* And then send the confirmation back to the originating peer. This
* ends up right back in this same function (attrd_peer_message) on the
* peer where it will have to do something with a PCMK__XA_CONFIRM type
* message.
*/
crm_debug("Sending %s a confirmation", peer->name);
attrd_send_message(peer, reply, false);
pcmk__xml_free(reply);
}
pcmk__reset_request(&request);
}
}
static void
attrd_cpg_dispatch(cpg_handle_t handle,
const struct cpg_name *groupName,
uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len)
{
xmlNode *xml = NULL;
const char *from = NULL;
char *data = pcmk__cpg_message_data(handle, nodeid, pid, msg, &from);
if(data == NULL) {
return;
}
xml = pcmk__xml_parse(data);
if (xml == NULL) {
crm_err("Bad message received from %s[%" PRIu32 "]: '%.120s'",
from, nodeid, data);
} else {
attrd_peer_message(pcmk__get_node(nodeid, from, NULL,
pcmk__node_search_cluster_member),
xml);
}
pcmk__xml_free(xml);
free(data);
}
static void
attrd_cpg_destroy(gpointer unused)
{
- if (attrd_shutting_down(false)) {
+ if (attrd_shutting_down()) {
crm_info("Disconnected from Corosync process group");
} else {
crm_crit("Lost connection to Corosync process group, shutting down");
attrd_exit_status = CRM_EX_DISCONNECT;
attrd_shutdown(0);
}
}
/*!
* \internal
* \brief Broadcast an update for a single attribute value
*
* \param[in] a Attribute to broadcast
* \param[in] v Attribute value to broadcast
*/
void
attrd_broadcast_value(const attribute_t *a, const attribute_value_t *v)
{
xmlNode *op = pcmk__xe_create(NULL, PCMK_XE_OP);
crm_xml_add(op, PCMK_XA_TASK, PCMK__ATTRD_CMD_UPDATE);
attrd_add_value_xml(op, a, v, false);
attrd_send_message(NULL, op, false);
pcmk__xml_free(op);
}
#define state_text(state) pcmk__s((state), "in unknown state")
static void
attrd_peer_change_cb(enum pcmk__node_update kind, pcmk__node_status_t *peer,
const void *data)
{
bool gone = false;
bool is_remote = pcmk_is_set(peer->flags, pcmk__node_status_remote);
switch (kind) {
case pcmk__node_update_name:
crm_debug("%s node %s[%" PRIu32 "] is now %s",
(is_remote? "Remote" : "Cluster"),
pcmk__s(peer->name, "unknown"), peer->cluster_layer_id,
state_text(peer->state));
break;
case pcmk__node_update_processes:
if (!pcmk_is_set(peer->processes, crm_get_cluster_proc())) {
gone = true;
}
crm_debug("Node %s[%" PRIu32 "] is %s a peer",
pcmk__s(peer->name, "unknown"), peer->cluster_layer_id,
(gone? "no longer" : "now"));
break;
case pcmk__node_update_state:
crm_debug("%s node %s[%" PRIu32 "] is now %s (was %s)",
(is_remote? "Remote" : "Cluster"),
pcmk__s(peer->name, "unknown"), peer->cluster_layer_id,
state_text(peer->state), state_text(data));
if (pcmk__str_eq(peer->state, PCMK_VALUE_MEMBER, pcmk__str_none)) {
/* If we're the writer, send new peers a list of all attributes
* (unless it's a remote node, which doesn't run its own attrd)
*/
if (!is_remote) {
if (attrd_election_won()) {
attrd_peer_sync(peer);
} else {
// Anyway send a message so that the peer learns our name
attrd_send_protocol(peer);
}
}
} else {
// Remove all attribute values associated with lost nodes
if (peer->name != NULL) {
attrd_peer_remove(peer->name, false, "loss");
}
gone = true;
}
break;
}
// Remove votes from cluster nodes that leave, in case election in progress
if (gone && !is_remote && peer->name != NULL) {
attrd_remove_voter(peer);
attrd_remove_peer_protocol_ver(peer->name);
attrd_do_not_expect_from_peer(peer->name);
}
}
#define readable_value(rv_v) pcmk__s((rv_v)->current, "(unset)")
#define readable_peer(p) \
(((p) == NULL)? "all peers" : pcmk__s((p)->name, "unknown peer"))
static void
update_attr_on_host(attribute_t *a, const pcmk__node_status_t *peer,
const xmlNode *xml, const char *attr, const char *value,
const char *host, bool filter)
{
int is_remote = 0;
bool changed = false;
attribute_value_t *v = NULL;
const char *prev_xml_id = NULL;
const char *node_xml_id = crm_element_value(xml, PCMK__XA_ATTR_HOST_ID);
// Create entry for value if not already existing
v = g_hash_table_lookup(a->values, host);
if (v == NULL) {
v = pcmk__assert_alloc(1, sizeof(attribute_value_t));
v->nodename = pcmk__str_copy(host);
g_hash_table_replace(a->values, v->nodename, v);
}
/* If update doesn't contain the node XML ID, fall back to any previously
* known value (for logging)
*/
prev_xml_id = attrd_get_node_xml_id(v->nodename);
if (node_xml_id == NULL) {
node_xml_id = prev_xml_id;
}
// If value is for a Pacemaker Remote node, remember that
crm_element_value_int(xml, PCMK__XA_ATTR_IS_REMOTE, &is_remote);
if (is_remote) {
attrd_set_value_flags(v, attrd_value_remote);
pcmk__assert(pcmk__cluster_lookup_remote_node(host) != NULL);
}
// Check whether the value changed
changed = !pcmk__str_eq(v->current, value, pcmk__str_casei);
if (changed && filter
&& pcmk__str_eq(host, attrd_cluster->priv->node_name,
pcmk__str_casei)) {
/* Broadcast the local value for an attribute that differs from the
* value provided in a peer's attribute synchronization response. This
* ensures a node's values for itself take precedence and all peers are
* kept in sync.
*/
v = g_hash_table_lookup(a->values, attrd_cluster->priv->node_name);
crm_notice("%s[%s]: local value '%s' takes priority over '%s' from %s",
attr, host, readable_value(v), value, peer->name);
attrd_broadcast_value(a, v);
} else if (changed) {
crm_notice("Setting %s[%s]%s%s: %s -> %s "
QB_XS " from %s with %s write delay and node XML ID %s",
attr, host, a->set_type ? " in " : "",
pcmk__s(a->set_type, ""), readable_value(v),
pcmk__s(value, "(unset)"), peer->name,
(a->timeout_ms == 0)? "no" : pcmk__readable_interval(a->timeout_ms),
pcmk__s(node_xml_id, "unknown"));
pcmk__str_update(&v->current, value);
attrd_set_attr_flags(a, attrd_attr_changed);
- if (pcmk__str_eq(host, attrd_cluster->priv->node_name, pcmk__str_casei)
- && pcmk__str_eq(attr, PCMK__NODE_ATTR_SHUTDOWN, pcmk__str_none)) {
-
- if (!pcmk__str_eq(value, "0", pcmk__str_null_matches)) {
- attrd_set_requesting_shutdown();
-
- } else {
- attrd_clear_requesting_shutdown();
- }
- }
-
// Write out new value or start dampening timer
if (a->timeout_ms && a->timer) {
crm_trace("Delaying write of %s %s for dampening",
attr, pcmk__readable_interval(a->timeout_ms));
mainloop_timer_start(a->timer);
} else {
attrd_write_or_elect_attribute(a);
}
} else {
int is_force_write = 0;
crm_element_value_int(xml, PCMK__XA_ATTRD_IS_FORCE_WRITE,
&is_force_write);
if (is_force_write == 1 && a->timeout_ms && a->timer) {
/* Save forced writing and set change flag. */
/* The actual attribute is written by Writer after election. */
crm_trace("%s[%s] from %s is unchanged (%s), forcing write",
attr, host, peer->name, pcmk__s(value, "unset"));
attrd_set_attr_flags(a, attrd_attr_force_write);
} else {
crm_trace("%s[%s] from %s is unchanged (%s)",
attr, host, peer->name, pcmk__s(value, "unset"));
}
}
// This allows us to later detect local values that peer doesn't know about
attrd_set_value_flags(v, attrd_value_from_peer);
// Remember node's XML ID if we're just learning it
if ((node_xml_id != NULL)
&& !pcmk__str_eq(node_xml_id, prev_xml_id, pcmk__str_none)) {
// Remember node's name in case unknown in the membership cache
pcmk__node_status_t *known_peer =
pcmk__get_node(0, host, node_xml_id,
pcmk__node_search_cluster_member);
crm_trace("Learned %s[%s] node XML ID is %s (was %s)",
a->id, known_peer->name, node_xml_id,
pcmk__s(prev_xml_id, "unknown"));
attrd_set_node_xml_id(v->nodename, node_xml_id);
if (attrd_election_won()) {
// In case we couldn't write a value missing the XML ID before
attrd_write_attributes(attrd_write_changed);
}
}
}
static void
attrd_peer_update_one(const pcmk__node_status_t *peer, xmlNode *xml,
bool filter)
{
attribute_t *a = NULL;
const char *attr = crm_element_value(xml, PCMK__XA_ATTR_NAME);
const char *value = crm_element_value(xml, PCMK__XA_ATTR_VALUE);
const char *host = crm_element_value(xml, PCMK__XA_ATTR_HOST);
if (attr == NULL) {
crm_warn("Could not update attribute: peer did not specify name");
return;
}
a = attrd_populate_attribute(xml, attr);
if (a == NULL) {
return;
}
if (host == NULL) {
// If no host was specified, update all hosts
GHashTableIter vIter;
crm_debug("Setting %s for all hosts to %s", attr, value);
pcmk__xe_remove_attr(xml, PCMK__XA_ATTR_HOST_ID);
g_hash_table_iter_init(&vIter, a->values);
while (g_hash_table_iter_next(&vIter, (gpointer *) & host, NULL)) {
update_attr_on_host(a, peer, xml, attr, value, host, filter);
}
} else {
// Update attribute value for the given host
update_attr_on_host(a, peer, xml, attr, value, host, filter);
}
/* If this is a message from some attrd instance broadcasting its protocol
* version, check to see if it's a new minimum version.
*/
if (pcmk__str_eq(attr, CRM_ATTR_PROTOCOL, pcmk__str_none)) {
attrd_update_minimum_protocol_ver(peer->name, value);
}
}
static void
broadcast_unseen_local_values(void)
{
GHashTableIter aIter;
GHashTableIter vIter;
attribute_t *a = NULL;
attribute_value_t *v = NULL;
xmlNode *sync = NULL;
g_hash_table_iter_init(&aIter, attributes);
while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) {
g_hash_table_iter_init(&vIter, a->values);
while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) {
if (!pcmk_is_set(v->flags, attrd_value_from_peer)
&& pcmk__str_eq(v->nodename, attrd_cluster->priv->node_name,
pcmk__str_casei)) {
crm_trace("* %s[%s]='%s' is local-only",
a->id, v->nodename, readable_value(v));
if (sync == NULL) {
sync = pcmk__xe_create(NULL, __func__);
crm_xml_add(sync, PCMK_XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE);
}
attrd_add_value_xml(sync, a, v, a->timeout_ms && a->timer);
}
}
}
if (sync != NULL) {
crm_debug("Broadcasting local-only values");
attrd_send_message(NULL, sync, false);
pcmk__xml_free(sync);
}
}
int
attrd_cluster_connect(void)
{
int rc = pcmk_rc_ok;
attrd_cluster = pcmk_cluster_new();
pcmk_cluster_set_destroy_fn(attrd_cluster, attrd_cpg_destroy);
pcmk_cpg_set_deliver_fn(attrd_cluster, attrd_cpg_dispatch);
pcmk_cpg_set_confchg_fn(attrd_cluster, pcmk__cpg_confchg_cb);
pcmk__cluster_set_status_callback(&attrd_peer_change_cb);
rc = pcmk_cluster_connect(attrd_cluster);
rc = pcmk_rc2legacy(rc);
if (rc != pcmk_ok) {
crm_err("Cluster connection failed");
return rc;
}
return pcmk_ok;
}
void
attrd_peer_clear_failure(pcmk__request_t *request)
{
xmlNode *xml = request->xml;
const char *rsc = crm_element_value(xml, PCMK__XA_ATTR_RESOURCE);
const char *host = crm_element_value(xml, PCMK__XA_ATTR_HOST);
const char *op = crm_element_value(xml, PCMK__XA_ATTR_CLEAR_OPERATION);
const char *interval_spec = crm_element_value(xml,
PCMK__XA_ATTR_CLEAR_INTERVAL);
guint interval_ms = 0U;
char *attr = NULL;
GHashTableIter iter;
regex_t regex;
pcmk__node_status_t *peer =
pcmk__get_node(0, request->peer, NULL,
pcmk__node_search_cluster_member);
pcmk_parse_interval_spec(interval_spec, &interval_ms);
if (attrd_failure_regex(&regex, rsc, op, interval_ms) != pcmk_ok) {
crm_info("Ignoring invalid request to clear failures for %s",
pcmk__s(rsc, "all resources"));
return;
}
crm_xml_add(xml, PCMK_XA_TASK, PCMK__ATTRD_CMD_UPDATE);
/* Make sure value is not set, so we delete */
pcmk__xe_remove_attr(xml, PCMK__XA_ATTR_VALUE);
g_hash_table_iter_init(&iter, attributes);
while (g_hash_table_iter_next(&iter, (gpointer *) &attr, NULL)) {
if (regexec(&regex, attr, 0, NULL, 0) == 0) {
crm_trace("Matched %s when clearing %s",
attr, pcmk__s(rsc, "all resources"));
crm_xml_add(xml, PCMK__XA_ATTR_NAME, attr);
attrd_peer_update(peer, xml, host, false);
}
}
regfree(&regex);
}
/*!
* \internal
* \brief Load attributes from a peer sync response
*
* \param[in] peer Peer that sent sync response
* \param[in] peer_won Whether peer is the attribute writer
* \param[in,out] xml Request XML
*/
void
attrd_peer_sync_response(const pcmk__node_status_t *peer, bool peer_won,
xmlNode *xml)
{
crm_info("Processing " PCMK__ATTRD_CMD_SYNC_RESPONSE " from %s",
peer->name);
if (peer_won) {
/* Initialize the "seen" flag for all attributes to cleared, so we can
* detect attributes that local node has but the writer doesn't.
*/
attrd_clear_value_seen();
}
// Process each attribute update in the sync response
for (xmlNode *child = pcmk__xe_first_child(xml, NULL, NULL, NULL);
child != NULL; child = pcmk__xe_next(child, NULL)) {
attrd_peer_update(peer, child,
crm_element_value(child, PCMK__XA_ATTR_HOST), true);
}
if (peer_won) {
/* If any attributes are still not marked as seen, the writer doesn't
* know about them, so send all peers an update with them.
*/
broadcast_unseen_local_values();
}
}
/*!
* \internal
* \brief Remove all attributes and optionally peer cache entries for a node
*
* \param[in] host Name of node to purge
* \param[in] uncache If true, remove node from peer caches
* \param[in] source Who requested removal (only used for logging)
*/
void
attrd_peer_remove(const char *host, bool uncache, const char *source)
{
attribute_t *a = NULL;
GHashTableIter aIter;
CRM_CHECK(host != NULL, return);
crm_notice("Removing all %s attributes for node %s "
QB_XS " %s reaping node from cache",
host, source, (uncache? "and" : "without"));
g_hash_table_iter_init(&aIter, attributes);
- while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) {
- if(g_hash_table_remove(a->values, host)) {
- crm_debug("Removed %s[%s] for peer %s", a->id, host, source);
+ while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) &a)) {
+ /* If the attribute won't be written to the CIB, we can drop the value
+ * now. Otherwise we need to set it NULL and wait for a notification
+ * that it was erased, because if there's no writer or the current
+ * writer fails to write it then leaves, we may become the writer and
+ * need to do it.
+ */
+ if (attrd_for_cib(a)) {
+ attribute_value_t *v = g_hash_table_lookup(a->values, host);
+
+ if ((v != NULL) && (v->current != NULL)) {
+ crm_debug("Removed %s[%s] (by setting NULL) for %s",
+ a->id, host, source);
+ pcmk__str_update(&(v->current), NULL);
+ attrd_set_attr_flags(a, attrd_attr_changed);
+ }
+ } else if (g_hash_table_remove(a->values, host)) {
+ crm_debug("Removed %s[%s] immediately for %s",
+ a->id, host, source);
}
}
+ if (attrd_election_won()) {
+ attrd_cib_erase_transient_attrs(host); // Wipe from CIB
+ } else {
+ attrd_start_election_if_needed(); // Make sure CIB gets updated
+ }
+
+ // Remove node from caches if requested
if (uncache) {
pcmk__purge_node_from_cache(host, 0);
attrd_forget_node_xml_id(host);
}
}
/*!
* \internal
* \brief Send all known attributes and values to a peer
*
* \param[in] peer Peer to send sync to (if NULL, broadcast to all peers)
*/
void
attrd_peer_sync(pcmk__node_status_t *peer)
{
GHashTableIter aIter;
GHashTableIter vIter;
attribute_t *a = NULL;
attribute_value_t *v = NULL;
xmlNode *sync = pcmk__xe_create(NULL, __func__);
crm_xml_add(sync, PCMK_XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE);
g_hash_table_iter_init(&aIter, attributes);
while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) {
g_hash_table_iter_init(&vIter, a->values);
while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) {
crm_debug("Syncing %s[%s]='%s' to %s",
a->id, v->nodename, readable_value(v),
readable_peer(peer));
attrd_add_value_xml(sync, a, v, false);
}
}
crm_debug("Syncing values to %s", readable_peer(peer));
attrd_send_message(peer, sync, false);
pcmk__xml_free(sync);
}
void
attrd_peer_update(const pcmk__node_status_t *peer, xmlNode *xml,
const char *host, bool filter)
{
bool handle_sync_point = false;
CRM_CHECK((peer != NULL) && (xml != NULL), return);
if (xml->children != NULL) {
for (xmlNode *child = pcmk__xe_first_child(xml, PCMK_XE_OP, NULL, NULL);
child != NULL; child = pcmk__xe_next(child, PCMK_XE_OP)) {
pcmk__xe_copy_attrs(child, xml, pcmk__xaf_no_overwrite);
attrd_peer_update_one(peer, child, filter);
if (attrd_request_has_sync_point(child)) {
handle_sync_point = true;
}
}
} else {
attrd_peer_update_one(peer, xml, filter);
if (attrd_request_has_sync_point(xml)) {
handle_sync_point = true;
}
}
/* If the update XML specified that the client wanted to wait for a sync
* point, process that now.
*/
if (handle_sync_point) {
crm_trace("Hit local sync point for attribute update");
attrd_ack_waitlist_clients(attrd_sync_point_local, xml);
}
}
diff --git a/daemons/attrd/attrd_elections.c b/daemons/attrd/attrd_elections.c
index 281ec12c2f..1a79a93df2 100644
--- a/daemons/attrd/attrd_elections.c
+++ b/daemons/attrd/attrd_elections.c
@@ -1,176 +1,176 @@
/*
- * Copyright 2013-2024 the Pacemaker project contributors
+ * Copyright 2013-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <crm/cluster.h>
#include <crm/cluster/election_internal.h>
#include <crm/common/xml.h>
#include "pacemaker-attrd.h"
static char *peer_writer = NULL;
static void
attrd_election_cb(pcmk_cluster_t *cluster)
{
attrd_declare_winner();
/* Update the peers after an election */
attrd_peer_sync(NULL);
/* After winning an election, update the CIB with the values of all
* attributes as the winner knows them.
*/
attrd_write_attributes(attrd_write_all);
}
void
attrd_election_init(void)
{
election_init(attrd_cluster, attrd_election_cb);
}
void
attrd_start_election_if_needed(void)
{
if ((peer_writer == NULL)
&& (election_state(attrd_cluster) != election_in_progress)
- && !attrd_shutting_down(false)) {
+ && !attrd_shutting_down()) {
crm_info("Starting an election to determine the writer");
election_vote(attrd_cluster);
}
}
bool
attrd_election_won(void)
{
return (election_state(attrd_cluster) == election_won);
}
void
attrd_handle_election_op(const pcmk__node_status_t *peer, xmlNode *xml)
{
enum election_result rc = 0;
enum election_result previous = election_state(attrd_cluster);
crm_xml_add(xml, PCMK__XA_SRC, peer->name);
// Don't become writer if we're shutting down
- rc = election_count_vote(attrd_cluster, xml, !attrd_shutting_down(false));
+ rc = election_count_vote(attrd_cluster, xml, !attrd_shutting_down());
switch(rc) {
case election_start:
crm_debug("Unsetting writer (was %s) and starting new election",
peer_writer? peer_writer : "unset");
free(peer_writer);
peer_writer = NULL;
election_vote(attrd_cluster);
break;
case election_lost:
/* The election API should really distinguish between "we just lost
* to this peer" and "we already lost previously, and we are
* discarding this vote for some reason", but it doesn't.
*
* In the first case, we want to tentatively set the peer writer to
* this peer, even though another peer may eventually win (which we
* will learn via attrd_check_for_new_writer()), so
* attrd_start_election_if_needed() doesn't start a new election.
*
* Approximate a test for that case as best as possible.
*/
if ((peer_writer == NULL) || (previous != election_lost)) {
pcmk__str_update(&peer_writer, peer->name);
crm_debug("Election lost, presuming %s is writer for now",
peer_writer);
}
break;
case election_in_progress:
election_check(attrd_cluster);
break;
default:
crm_info("Ignoring election op from %s due to error", peer->name);
break;
}
}
bool
attrd_check_for_new_writer(const pcmk__node_status_t *peer, const xmlNode *xml)
{
int peer_state = 0;
crm_element_value_int(xml, PCMK__XA_ATTR_WRITER, &peer_state);
if (peer_state == election_won) {
if ((election_state(attrd_cluster) == election_won)
&& !pcmk__str_eq(peer->name, attrd_cluster->priv->node_name,
pcmk__str_casei)) {
crm_notice("Detected another attribute writer (%s), starting new "
"election",
peer->name);
election_vote(attrd_cluster);
} else if (!pcmk__str_eq(peer->name, peer_writer, pcmk__str_casei)) {
crm_notice("Recorded new attribute writer: %s (was %s)",
peer->name, pcmk__s(peer_writer, "unset"));
pcmk__str_update(&peer_writer, peer->name);
}
}
return (peer_state == election_won);
}
void
attrd_declare_winner(void)
{
crm_notice("Recorded local node as attribute writer (was %s)",
(peer_writer? peer_writer : "unset"));
pcmk__str_update(&peer_writer, attrd_cluster->priv->node_name);
}
void
attrd_remove_voter(const pcmk__node_status_t *peer)
{
election_remove(attrd_cluster, peer->name);
if ((peer_writer != NULL)
&& pcmk__str_eq(peer->name, peer_writer, pcmk__str_casei)) {
free(peer_writer);
peer_writer = NULL;
crm_notice("Lost attribute writer %s", peer->name);
/* Clear any election dampening in effect. Otherwise, if the lost writer
* had just won, the election could fizzle out with no new writer.
*/
election_clear_dampening(attrd_cluster);
/* If the writer received attribute updates during its shutdown, it will
* not have written them to the CIB. Ensure we get a new writer so they
* are written out. This means that every node that sees the writer
* leave will start a new election, but that's better than losing
* attributes.
*/
attrd_start_election_if_needed();
/* If an election is in progress, we need to call election_check(), in case
* this lost peer is the only one that hasn't voted, otherwise the election
* would be pending until it's timed out.
*/
} else if (election_state(attrd_cluster) == election_in_progress) {
crm_debug("Checking election status upon loss of voter %s", peer->name);
election_check(attrd_cluster);
}
}
void
attrd_xml_add_writer(xmlNode *xml)
{
crm_xml_add_int(xml, PCMK__XA_ATTR_WRITER, election_state(attrd_cluster));
}
diff --git a/daemons/attrd/attrd_ipc.c b/daemons/attrd/attrd_ipc.c
index 07fe84399a..4e83f7623a 100644
--- a/daemons/attrd/attrd_ipc.c
+++ b/daemons/attrd/attrd_ipc.c
@@ -1,630 +1,630 @@
/*
* Copyright 2004-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <errno.h>
#include <stdint.h>
#include <stdlib.h>
#include <inttypes.h> // PRIu32
#include <sys/types.h>
#include <crm/cluster.h>
#include <crm/cluster/internal.h>
#include <crm/common/acl_internal.h>
#include <crm/common/ipc_internal.h>
#include <crm/common/logging.h>
#include <crm/common/results.h>
#include <crm/common/strings_internal.h>
#include <crm/common/util.h>
#include <crm/common/xml.h>
#include "pacemaker-attrd.h"
static qb_ipcs_service_t *ipcs = NULL;
/*!
* \internal
* \brief Build the XML reply to a client query
*
* \param[in] attr Name of requested attribute
* \param[in] host Name of requested host (or NULL for all hosts)
*
* \return New XML reply
* \note Caller is responsible for freeing the resulting XML
*/
static xmlNode *build_query_reply(const char *attr, const char *host)
{
xmlNode *reply = pcmk__xe_create(NULL, __func__);
attribute_t *a;
crm_xml_add(reply, PCMK__XA_T, PCMK__VALUE_ATTRD);
crm_xml_add(reply, PCMK__XA_SUBT, PCMK__ATTRD_CMD_QUERY);
crm_xml_add(reply, PCMK__XA_ATTR_VERSION, ATTRD_PROTOCOL_VERSION);
/* If desired attribute exists, add its value(s) to the reply */
a = g_hash_table_lookup(attributes, attr);
if (a) {
attribute_value_t *v;
xmlNode *host_value;
crm_xml_add(reply, PCMK__XA_ATTR_NAME, attr);
/* Allow caller to use "localhost" to refer to local node */
if (pcmk__str_eq(host, "localhost", pcmk__str_casei)) {
host = attrd_cluster->priv->node_name;
crm_trace("Mapped localhost to %s", host);
}
/* If a specific node was requested, add its value */
if (host) {
v = g_hash_table_lookup(a->values, host);
host_value = pcmk__xe_create(reply, PCMK_XE_NODE);
crm_xml_add(host_value, PCMK__XA_ATTR_HOST, host);
crm_xml_add(host_value, PCMK__XA_ATTR_VALUE,
(v? v->current : NULL));
/* Otherwise, add all nodes' values */
} else {
GHashTableIter iter;
g_hash_table_iter_init(&iter, a->values);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &v)) {
host_value = pcmk__xe_create(reply, PCMK_XE_NODE);
crm_xml_add(host_value, PCMK__XA_ATTR_HOST, v->nodename);
crm_xml_add(host_value, PCMK__XA_ATTR_VALUE, v->current);
}
}
}
return reply;
}
xmlNode *
attrd_client_clear_failure(pcmk__request_t *request)
{
xmlNode *xml = request->xml;
const char *rsc, *op, *interval_spec;
if (minimum_protocol_version >= 2) {
/* Propagate to all peers (including ourselves).
* This ends up at attrd_peer_message().
*/
attrd_send_message(NULL, xml, false);
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
return NULL;
}
rsc = crm_element_value(xml, PCMK__XA_ATTR_RESOURCE);
op = crm_element_value(xml, PCMK__XA_ATTR_CLEAR_OPERATION);
interval_spec = crm_element_value(xml, PCMK__XA_ATTR_CLEAR_INTERVAL);
/* Map this to an update */
crm_xml_add(xml, PCMK_XA_TASK, PCMK__ATTRD_CMD_UPDATE);
/* Add regular expression matching desired attributes */
if (rsc) {
char *pattern;
if (op == NULL) {
pattern = crm_strdup_printf(ATTRD_RE_CLEAR_ONE, rsc);
} else {
guint interval_ms = 0U;
pcmk_parse_interval_spec(interval_spec, &interval_ms);
pattern = crm_strdup_printf(ATTRD_RE_CLEAR_OP,
rsc, op, interval_ms);
}
crm_xml_add(xml, PCMK__XA_ATTR_REGEX, pattern);
free(pattern);
} else {
crm_xml_add(xml, PCMK__XA_ATTR_REGEX, ATTRD_RE_CLEAR_ALL);
}
/* Make sure attribute and value are not set, so we delete via regex */
pcmk__xe_remove_attr(xml, PCMK__XA_ATTR_NAME);
pcmk__xe_remove_attr(xml, PCMK__XA_ATTR_VALUE);
return attrd_client_update(request);
}
xmlNode *
attrd_client_peer_remove(pcmk__request_t *request)
{
xmlNode *xml = request->xml;
// Host and ID are not used in combination, rather host has precedence
const char *host = crm_element_value(xml, PCMK__XA_ATTR_HOST);
char *host_alloc = NULL;
attrd_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags);
if (host == NULL) {
int nodeid = 0;
crm_element_value_int(xml, PCMK__XA_ATTR_HOST_ID, &nodeid);
if (nodeid > 0) {
pcmk__node_status_t *node = NULL;
char *host_alloc = NULL;
node = pcmk__search_node_caches(nodeid, NULL, NULL,
pcmk__node_search_cluster_member);
if ((node != NULL) && (node->name != NULL)) {
// Use cached name if available
host = node->name;
} else {
// Otherwise ask cluster layer
host_alloc = pcmk__cluster_node_name(nodeid);
host = host_alloc;
}
crm_xml_add(xml, PCMK__XA_ATTR_HOST, host);
}
}
if (host) {
crm_info("Client %s is requesting all values for %s be removed",
pcmk__client_name(request->ipc_client), host);
attrd_send_message(NULL, xml, false); /* ends up at attrd_peer_message() */
free(host_alloc);
} else {
crm_info("Ignoring request by client %s to remove all peer values without specifying peer",
pcmk__client_name(request->ipc_client));
}
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
return NULL;
}
xmlNode *
attrd_client_query(pcmk__request_t *request)
{
xmlNode *query = request->xml;
xmlNode *reply = NULL;
const char *attr = NULL;
crm_debug("Query arrived from %s", pcmk__client_name(request->ipc_client));
/* Request must specify attribute name to query */
attr = crm_element_value(query, PCMK__XA_ATTR_NAME);
if (attr == NULL) {
pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR,
"Ignoring malformed query from %s (no attribute name given)",
pcmk__client_name(request->ipc_client));
return NULL;
}
/* Build the XML reply */
reply = build_query_reply(attr,
crm_element_value(query, PCMK__XA_ATTR_HOST));
if (reply == NULL) {
pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR,
"Could not respond to query from %s: could not create XML reply",
pcmk__client_name(request->ipc_client));
return NULL;
} else {
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
}
request->ipc_client->request_id = 0;
return reply;
}
xmlNode *
attrd_client_refresh(pcmk__request_t *request)
{
crm_info("Updating all attributes");
attrd_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags);
attrd_write_attributes(attrd_write_all|attrd_write_no_delay);
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
return NULL;
}
static void
handle_missing_host(xmlNode *xml)
{
if (crm_element_value(xml, PCMK__XA_ATTR_HOST) == NULL) {
crm_trace("Inferring local node %s with XML ID %s",
attrd_cluster->priv->node_name,
attrd_cluster->priv->node_xml_id);
crm_xml_add(xml, PCMK__XA_ATTR_HOST, attrd_cluster->priv->node_name);
crm_xml_add(xml, PCMK__XA_ATTR_HOST_ID,
attrd_cluster->priv->node_xml_id);
}
}
/* Convert a single IPC message with a regex into one with multiple children, one
* for each regex match.
*/
static int
expand_regexes(xmlNode *xml, const char *attr, const char *value, const char *regex)
{
if (attr == NULL && regex) {
bool matched = false;
GHashTableIter aIter;
regex_t r_patt;
crm_debug("Setting %s to %s", regex, value);
if (regcomp(&r_patt, regex, REG_EXTENDED|REG_NOSUB)) {
return EINVAL;
}
g_hash_table_iter_init(&aIter, attributes);
while (g_hash_table_iter_next(&aIter, (gpointer *) & attr, NULL)) {
int status = regexec(&r_patt, attr, 0, NULL, 0);
if (status == 0) {
xmlNode *child = pcmk__xe_create(xml, PCMK_XE_OP);
crm_trace("Matched %s with %s", attr, regex);
matched = true;
/* Copy all the non-conflicting attributes from the parent over,
* but remove the regex and replace it with the name.
*/
pcmk__xe_copy_attrs(child, xml, pcmk__xaf_no_overwrite);
pcmk__xe_remove_attr(child, PCMK__XA_ATTR_REGEX);
crm_xml_add(child, PCMK__XA_ATTR_NAME, attr);
}
}
regfree(&r_patt);
/* Return a code if we never matched anything. This should not be treated
* as an error. It indicates there was a regex, and it was a valid regex,
* but simply did not match anything and the caller should not continue
* doing any regex-related processing.
*/
if (!matched) {
return pcmk_rc_op_unsatisfied;
}
} else if (attr == NULL) {
return pcmk_rc_bad_nvpair;
}
return pcmk_rc_ok;
}
static int
handle_regexes(pcmk__request_t *request)
{
xmlNode *xml = request->xml;
int rc = pcmk_rc_ok;
const char *attr = crm_element_value(xml, PCMK__XA_ATTR_NAME);
const char *value = crm_element_value(xml, PCMK__XA_ATTR_VALUE);
const char *regex = crm_element_value(xml, PCMK__XA_ATTR_REGEX);
rc = expand_regexes(xml, attr, value, regex);
if (rc == EINVAL) {
pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR,
"Bad regex '%s' for update from client %s", regex,
pcmk__client_name(request->ipc_client));
} else if (rc == pcmk_rc_bad_nvpair) {
crm_err("Update request did not specify attribute or regular expression");
pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR,
"Client %s update request did not specify attribute or regular expression",
pcmk__client_name(request->ipc_client));
}
return rc;
}
static int
handle_value_expansion(const char **value, xmlNode *xml, const char *op,
const char *attr)
{
attribute_t *a = g_hash_table_lookup(attributes, attr);
if (a == NULL && pcmk__str_eq(op, PCMK__ATTRD_CMD_UPDATE_DELAY, pcmk__str_none)) {
return EINVAL;
}
if (*value && attrd_value_needs_expansion(*value)) {
int int_value;
attribute_value_t *v = NULL;
if (a) {
const char *host = crm_element_value(xml, PCMK__XA_ATTR_HOST);
v = g_hash_table_lookup(a->values, host);
}
int_value = attrd_expand_value(*value, (v? v->current : NULL));
crm_info("Expanded %s=%s to %d", attr, *value, int_value);
crm_xml_add_int(xml, PCMK__XA_ATTR_VALUE, int_value);
/* Replacing the value frees the previous memory, so re-query it */
*value = crm_element_value(xml, PCMK__XA_ATTR_VALUE);
}
return pcmk_rc_ok;
}
static void
send_update_msg_to_cluster(pcmk__request_t *request, xmlNode *xml)
{
if (pcmk__str_eq(attrd_request_sync_point(xml), PCMK__VALUE_CLUSTER, pcmk__str_none)) {
/* The client is waiting on the cluster-wide sync point. In this case,
* the response ACK is not sent until this attrd broadcasts the update
* and receives its own confirmation back from all peers.
*/
attrd_expect_confirmations(request, attrd_cluster_sync_point_update);
attrd_send_message(NULL, xml, true); /* ends up at attrd_peer_message() */
} else {
/* The client is either waiting on the local sync point or was not
* waiting on any sync point at all. For the local sync point, the
* response ACK is sent in attrd_peer_update. For clients not
* waiting on any sync point, the response ACK is sent in
* handle_update_request immediately before this function was called.
*/
attrd_send_message(NULL, xml, false); /* ends up at attrd_peer_message() */
}
}
static int
send_child_update(xmlNode *child, void *data)
{
pcmk__request_t *request = (pcmk__request_t *) data;
/* Calling pcmk__set_result is handled by one of these calls to
* attrd_client_update, so no need to do it again here.
*/
request->xml = child;
attrd_client_update(request);
return pcmk_rc_ok;
}
xmlNode *
attrd_client_update(pcmk__request_t *request)
{
xmlNode *xml = NULL;
const char *attr, *value, *regex;
CRM_CHECK((request != NULL) && (request->xml != NULL), return NULL);
xml = request->xml;
/* If the message has children, that means it is a message from a newer
* client that supports sending multiple operations at a time. There are
* two ways we can handle that.
*/
if (xml->children != NULL) {
if (ATTRD_SUPPORTS_MULTI_MESSAGE(minimum_protocol_version)) {
/* First, if all peers support a certain protocol version, we can
* just broadcast the big message and they'll handle it. However,
* we also need to apply all the transformations in this function
* to the children since they don't happen anywhere else.
*/
for (xmlNode *child = pcmk__xe_first_child(xml, PCMK_XE_OP, NULL,
NULL);
child != NULL; child = pcmk__xe_next(child, PCMK_XE_OP)) {
attr = crm_element_value(child, PCMK__XA_ATTR_NAME);
value = crm_element_value(child, PCMK__XA_ATTR_VALUE);
handle_missing_host(child);
if (handle_value_expansion(&value, child, request->op, attr) == EINVAL) {
pcmk__format_result(&request->result, CRM_EX_NOSUCH, PCMK_EXEC_ERROR,
"Attribute %s does not exist", attr);
return NULL;
}
}
send_update_msg_to_cluster(request, xml);
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
} else {
/* Save the original xml node pointer so it can be restored after iterating
* over all the children.
*/
xmlNode *orig_xml = request->xml;
/* Second, if they do not support that protocol version, split it
* up into individual messages and call attrd_client_update on
* each one.
*/
pcmk__xe_foreach_child(xml, PCMK_XE_OP, send_child_update, request);
request->xml = orig_xml;
}
return NULL;
}
attr = crm_element_value(xml, PCMK__XA_ATTR_NAME);
value = crm_element_value(xml, PCMK__XA_ATTR_VALUE);
regex = crm_element_value(xml, PCMK__XA_ATTR_REGEX);
if (handle_regexes(request) != pcmk_rc_ok) {
/* Error handling was already dealt with in handle_regexes, so just return. */
return NULL;
} else if (regex) {
/* Recursively call attrd_client_update on the new message with regexes
* expanded. If supported by the attribute daemon, this means that all
* matches can also be handled atomically.
*/
return attrd_client_update(request);
}
handle_missing_host(xml);
if (handle_value_expansion(&value, xml, request->op, attr) == EINVAL) {
pcmk__format_result(&request->result, CRM_EX_NOSUCH, PCMK_EXEC_ERROR,
"Attribute %s does not exist", attr);
return NULL;
}
crm_debug("Broadcasting %s[%s]=%s%s",
attr, crm_element_value(xml, PCMK__XA_ATTR_HOST),
value, (attrd_election_won()? " (writer)" : ""));
send_update_msg_to_cluster(request, xml);
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
return NULL;
}
/*!
* \internal
* \brief Accept a new client IPC connection
*
* \param[in,out] c New connection
* \param[in] uid Client user id
* \param[in] gid Client group id
*
* \return pcmk_ok on success, -errno otherwise
*/
static int32_t
attrd_ipc_accept(qb_ipcs_connection_t *c, uid_t uid, gid_t gid)
{
crm_trace("New client connection %p", c);
- if (attrd_shutting_down(false)) {
+ if (attrd_shutting_down()) {
crm_info("Ignoring new connection from pid %d during shutdown",
pcmk__client_pid(c));
return -ECONNREFUSED;
}
if (pcmk__new_client(c, uid, gid) == NULL) {
return -ENOMEM;
}
return pcmk_ok;
}
/*!
* \internal
* \brief Destroy a client IPC connection
*
* \param[in] c Connection to destroy
*
* \return FALSE (i.e. do not re-run this callback)
*/
static int32_t
attrd_ipc_closed(qb_ipcs_connection_t *c)
{
pcmk__client_t *client = pcmk__find_client(c);
if (client == NULL) {
crm_trace("Ignoring request to clean up unknown connection %p", c);
} else {
crm_trace("Cleaning up closed client connection %p", c);
/* Remove the client from the sync point waitlist if it's present. */
attrd_remove_client_from_waitlist(client);
/* And no longer wait for confirmations from any peers. */
attrd_do_not_wait_for_client(client);
pcmk__free_client(client);
}
return FALSE;
}
/*!
* \internal
* \brief Destroy a client IPC connection
*
* \param[in,out] c Connection to destroy
*
* \note We handle a destroyed connection the same as a closed one,
* but we need a separate handler because the return type is different.
*/
static void
attrd_ipc_destroy(qb_ipcs_connection_t *c)
{
crm_trace("Destroying client connection %p", c);
attrd_ipc_closed(c);
}
static int32_t
attrd_ipc_dispatch(qb_ipcs_connection_t * c, void *data, size_t size)
{
uint32_t id = 0;
uint32_t flags = 0;
pcmk__client_t *client = pcmk__find_client(c);
xmlNode *xml = NULL;
// Sanity-check, and parse XML from IPC data
CRM_CHECK((c != NULL) && (client != NULL), return 0);
if (data == NULL) {
crm_debug("No IPC data from PID %d", pcmk__client_pid(c));
return 0;
}
xml = pcmk__client_data2xml(client, data, &id, &flags);
if (xml == NULL) {
crm_debug("Unrecognizable IPC data from PID %d", pcmk__client_pid(c));
pcmk__ipc_send_ack(client, id, flags, PCMK__XE_ACK, NULL,
CRM_EX_PROTOCOL);
return 0;
} else {
pcmk__request_t request = {
.ipc_client = client,
.ipc_id = id,
.ipc_flags = flags,
.peer = NULL,
.xml = xml,
.call_options = 0,
.result = PCMK__UNKNOWN_RESULT,
};
pcmk__assert(client->user != NULL);
pcmk__update_acl_user(xml, PCMK__XA_ATTR_USER, client->user);
request.op = crm_element_value_copy(request.xml, PCMK_XA_TASK);
CRM_CHECK(request.op != NULL, return 0);
attrd_handle_request(&request);
pcmk__reset_request(&request);
}
pcmk__xml_free(xml);
return 0;
}
static struct qb_ipcs_service_handlers ipc_callbacks = {
.connection_accept = attrd_ipc_accept,
.connection_created = NULL,
.msg_process = attrd_ipc_dispatch,
.connection_closed = attrd_ipc_closed,
.connection_destroyed = attrd_ipc_destroy
};
void
attrd_ipc_fini(void)
{
if (ipcs != NULL) {
pcmk__drop_all_clients(ipcs);
qb_ipcs_destroy(ipcs);
ipcs = NULL;
}
attrd_unregister_handlers();
pcmk__client_cleanup();
}
/*!
* \internal
* \brief Set up attrd IPC communication
*/
void
attrd_init_ipc(void)
{
pcmk__serve_attrd_ipc(&ipcs, &ipc_callbacks);
}
diff --git a/daemons/attrd/attrd_utils.c b/daemons/attrd/attrd_utils.c
index f219b8862d..f6c6f0ac52 100644
--- a/daemons/attrd/attrd_utils.c
+++ b/daemons/attrd/attrd_utils.c
@@ -1,306 +1,277 @@
/*
- * Copyright 2004-2024 the Pacemaker project contributors
+ * Copyright 2004-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <stdio.h>
#include <stdbool.h>
#include <errno.h>
#include <glib.h>
#include <regex.h>
#include <sys/types.h>
#include <crm/crm.h>
#include <crm/common/ipc_internal.h>
#include <crm/common/mainloop.h>
#include <crm/common/xml.h>
#include "pacemaker-attrd.h"
cib_t *the_cib = NULL;
-static bool requesting_shutdown = false;
static bool shutting_down = false;
static GMainLoop *mloop = NULL;
/* A hash table storing information on the protocol version of each peer attrd.
* The key is the peer's uname, and the value is the protocol version number.
*/
GHashTable *peer_protocol_vers = NULL;
-/*!
- * \internal
- * \brief Set requesting_shutdown state
- */
-void
-attrd_set_requesting_shutdown(void)
-{
- requesting_shutdown = true;
-}
-
-/*!
- * \internal
- * \brief Clear requesting_shutdown state
- */
-void
-attrd_clear_requesting_shutdown(void)
-{
- requesting_shutdown = false;
-}
-
/*!
* \internal
* \brief Check whether local attribute manager is shutting down
*
- * \param[in] if_requested If \c true, also consider presence of
- * \c PCMK__NODE_ATTR_SHUTDOWN attribute
- *
- * \return \c true if local attribute manager has begun shutdown sequence
- * or (if \p if_requested is \c true) whether local node has a nonzero
- * \c PCMK__NODE_ATTR_SHUTDOWN attribute set, otherwise \c false
- * \note Most callers should pass \c false for \p if_requested, because the
- * attribute manager needs to continue performing while the controller is
- * shutting down, and even needs to be eligible for election in case all
- * nodes are shutting down.
+ * \return \c true if local attribute manager has begun shutdown sequence,
+ * otherwise \c false
*/
bool
-attrd_shutting_down(bool if_requested)
+attrd_shutting_down(void)
{
- return shutting_down || (if_requested && requesting_shutdown);
+ return shutting_down;
}
/*!
* \internal
* \brief Exit (using mainloop or not, as appropriate)
*
* \param[in] nsig Ignored
*/
void
attrd_shutdown(int nsig)
{
// Tell various functions not to do anthing
shutting_down = true;
// Don't respond to signals while shutting down
mainloop_destroy_signal(SIGTERM);
mainloop_destroy_signal(SIGCHLD);
mainloop_destroy_signal(SIGPIPE);
mainloop_destroy_signal(SIGUSR1);
mainloop_destroy_signal(SIGUSR2);
mainloop_destroy_signal(SIGTRAP);
attrd_free_waitlist();
attrd_free_confirmations();
if (peer_protocol_vers != NULL) {
g_hash_table_destroy(peer_protocol_vers);
peer_protocol_vers = NULL;
}
if ((mloop == NULL) || !g_main_loop_is_running(mloop)) {
/* If there's no main loop active, just exit. This should be possible
* only if we get SIGTERM in brief windows at start-up and shutdown.
*/
crm_exit(CRM_EX_OK);
} else {
g_main_loop_quit(mloop);
g_main_loop_unref(mloop);
}
}
/*!
* \internal
* \brief Create a main loop for attrd
*/
void
attrd_init_mainloop(void)
{
mloop = g_main_loop_new(NULL, FALSE);
}
/*!
* \internal
* \brief Run attrd main loop
*/
void
attrd_run_mainloop(void)
{
g_main_loop_run(mloop);
}
/* strlen("value") */
#define plus_plus_len (5)
/*!
* \internal
* \brief Check whether an attribute value should be expanded
*
* \param[in] value Attribute value to check
*
* \return true if value needs expansion, false otherwise
*/
bool
attrd_value_needs_expansion(const char *value)
{
return ((strlen(value) >= (plus_plus_len + 2))
&& (value[plus_plus_len] == '+')
&& ((value[plus_plus_len + 1] == '+')
|| (value[plus_plus_len + 1] == '=')));
}
/*!
* \internal
* \brief Expand an increment expression into an integer
*
* \param[in] value Attribute increment expression to expand
* \param[in] old_value Previous value of attribute
*
* \return Expanded value
*/
int
attrd_expand_value(const char *value, const char *old_value)
{
int increment = 1;
int score = 0;
if (pcmk_parse_score(old_value, &score, 0) != pcmk_rc_ok) {
return 0; // Original value is not a score
}
// value++ means increment by one, value+=OFFSET means incremement by OFFSET
if ((value[plus_plus_len + 1] != '+')
&& (pcmk_parse_score(value + plus_plus_len + 2, &increment,
0) != pcmk_rc_ok)) {
increment = 0; // Invalid increment
}
if (increment < 0) {
return QB_MAX(score + increment, -PCMK_SCORE_INFINITY);
}
return QB_MIN(score + increment, PCMK_SCORE_INFINITY);
}
/*!
* \internal
* \brief Create regular expression matching failure-related attributes
*
* \param[out] regex Where to store created regular expression
* \param[in] rsc Name of resource to clear (or NULL for all)
* \param[in] op Operation to clear if rsc is specified (or NULL for all)
* \param[in] interval_ms Interval of operation to clear if op is specified
*
* \return pcmk_ok on success, -EINVAL if arguments are invalid
*
* \note The caller is responsible for freeing the result with regfree().
*/
int
attrd_failure_regex(regex_t *regex, const char *rsc, const char *op,
guint interval_ms)
{
char *pattern = NULL;
int rc;
/* Create a pattern that matches desired attributes */
if (rsc == NULL) {
pattern = pcmk__str_copy(ATTRD_RE_CLEAR_ALL);
} else if (op == NULL) {
pattern = crm_strdup_printf(ATTRD_RE_CLEAR_ONE, rsc);
} else {
pattern = crm_strdup_printf(ATTRD_RE_CLEAR_OP, rsc, op, interval_ms);
}
/* Compile pattern into regular expression */
crm_trace("Clearing attributes matching %s", pattern);
rc = regcomp(regex, pattern, REG_EXTENDED|REG_NOSUB);
free(pattern);
return (rc == 0)? pcmk_ok : -EINVAL;
}
void
attrd_free_attribute_value(gpointer data)
{
attribute_value_t *v = data;
free(v->nodename);
free(v->current);
free(v->requested);
free(v);
}
void
attrd_free_attribute(gpointer data)
{
attribute_t *a = data;
if(a) {
free(a->id);
free(a->set_id);
free(a->set_type);
free(a->user);
mainloop_timer_del(a->timer);
g_hash_table_destroy(a->values);
free(a);
}
}
/*!
* \internal
* \brief When a peer node leaves the cluster, stop tracking its protocol version.
*
* \param[in] host The peer node's uname to be removed
*/
void
attrd_remove_peer_protocol_ver(const char *host)
{
if (peer_protocol_vers != NULL) {
g_hash_table_remove(peer_protocol_vers, host);
}
}
/*!
* \internal
* \brief When a peer node broadcasts a message with its protocol version, keep
* track of that information.
*
* We keep track of each peer's protocol version so we know which peers to
* expect confirmation messages from when handling cluster-wide sync points.
* We additionally keep track of the lowest protocol version supported by all
* peers so we know when we can send IPC messages containing more than one
* request.
*
* \param[in] host The peer node's uname to be tracked
* \param[in] value The peer node's protocol version
*/
void
attrd_update_minimum_protocol_ver(const char *host, const char *value)
{
int ver;
if (peer_protocol_vers == NULL) {
peer_protocol_vers = pcmk__strkey_table(free, NULL);
}
pcmk__scan_min_int(value, &ver, 0);
if (ver > 0) {
/* Record the peer attrd's protocol version. */
g_hash_table_insert(peer_protocol_vers, pcmk__str_copy(host),
GINT_TO_POINTER(ver));
/* If the protocol version is a new minimum, record it as such. */
if (minimum_protocol_version == -1 || ver < minimum_protocol_version) {
minimum_protocol_version = ver;
crm_trace("Set minimum attrd protocol version to %d",
minimum_protocol_version);
}
}
}
diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h
index d9423c8915..627dd97976 100644
--- a/daemons/attrd/pacemaker-attrd.h
+++ b/daemons/attrd/pacemaker-attrd.h
@@ -1,260 +1,262 @@
/*
* Copyright 2013-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#ifndef PACEMAKER_ATTRD__H
# define PACEMAKER_ATTRD__H
#include <regex.h>
#include <glib.h>
#include <crm/crm.h>
#include <crm/cluster.h>
#include <crm/cluster/election_internal.h>
#include <crm/common/messages_internal.h>
#include <crm/cib/cib_types.h>
/*
* Legacy attrd (all pre-1.1.11 Pacemaker versions, plus all versions when used
* with the no-longer-supported CMAN or corosync-plugin stacks) is unversioned.
*
* With atomic attrd, each attrd will send ATTRD_PROTOCOL_VERSION with every
* peer request and reply. As of Pacemaker 2.0.0, at start-up each attrd will
* also set a private attribute for itself with its version, so any attrd can
* determine the minimum version supported by all peers.
*
* Protocol Pacemaker Significant changes
* -------- --------- -------------------
* 1 1.1.11 PCMK__ATTRD_CMD_UPDATE (PCMK__XA_ATTR_NAME only),
* PCMK__ATTRD_CMD_PEER_REMOVE, PCMK__ATTRD_CMD_REFRESH,
* "flush", PCMK__ATTRD_CMD_SYNC_RESPONSE
* 1 1.1.13 PCMK__ATTRD_CMD_UPDATE (with PCMK__XA_ATTR_REGEX),
* PCMK__ATTRD_CMD_QUERY
* 1 1.1.15 PCMK__ATTRD_CMD_UPDATE_BOTH,
* PCMK__ATTRD_CMD_UPDATE_DELAY
* 2 1.1.17 PCMK__ATTRD_CMD_CLEAR_FAILURE
* 3 2.1.1 PCMK__ATTRD_CMD_SYNC_RESPONSE indicates remote nodes
* 4 2.1.5 Multiple attributes can be updated in a single IPC
* message
* 5 2.1.5 Peers can request confirmation of a sent message
* 6 2.1.7 PCMK__ATTRD_CMD_PEER_REMOVE supports PCMK__XA_REAP
* 7 3.0.0 "flush" support dropped
*/
#define ATTRD_PROTOCOL_VERSION "7"
#define ATTRD_SUPPORTS_MULTI_MESSAGE(x) ((x) >= 4)
#define ATTRD_SUPPORTS_CONFIRMATION(x) ((x) >= 5)
#define attrd_send_ack(client, id, flags) \
pcmk__ipc_send_ack((client), (id), (flags), PCMK__XE_ACK, \
ATTRD_PROTOCOL_VERSION, CRM_EX_INDETERMINATE)
void attrd_init_mainloop(void);
void attrd_run_mainloop(void);
-void attrd_set_requesting_shutdown(void);
-void attrd_clear_requesting_shutdown(void);
void attrd_free_waitlist(void);
-bool attrd_shutting_down(bool if_requested);
+bool attrd_shutting_down(void);
void attrd_shutdown(int nsig);
void attrd_init_ipc(void);
void attrd_ipc_fini(void);
int attrd_cib_connect(int max_retry);
void attrd_cib_disconnect(void);
void attrd_cib_init(void);
void attrd_cib_erase_transient_attrs(const char *node);
bool attrd_value_needs_expansion(const char *value);
int attrd_expand_value(const char *value, const char *old_value);
/* regular expression to clear failures of all resources */
#define ATTRD_RE_CLEAR_ALL \
"^(" PCMK__FAIL_COUNT_PREFIX "|" PCMK__LAST_FAILURE_PREFIX ")-"
/* regular expression to clear failure of all operations for one resource
* (format takes resource name)
*/
#define ATTRD_RE_CLEAR_ONE ATTRD_RE_CLEAR_ALL "%s#.+_[0-9]+$"
/* regular expression to clear failure of one operation for one resource
* (format takes resource name, operation name, and interval)
*/
#define ATTRD_RE_CLEAR_OP ATTRD_RE_CLEAR_ALL "%s#%s_%u$"
int attrd_failure_regex(regex_t *regex, const char *rsc, const char *op,
guint interval_ms);
extern cib_t *the_cib;
extern crm_exit_t attrd_exit_status;
/* Alerts */
extern lrmd_t *the_lrmd;
extern crm_trigger_t *attrd_config_read;
void attrd_lrmd_disconnect(void);
gboolean attrd_read_options(gpointer user_data);
int attrd_send_attribute_alert(const char *node, const char *node_xml_id,
const char *attr, const char *value);
// Elections
void attrd_election_init(void);
void attrd_start_election_if_needed(void);
bool attrd_election_won(void);
void attrd_handle_election_op(const pcmk__node_status_t *peer, xmlNode *xml);
bool attrd_check_for_new_writer(const pcmk__node_status_t *peer,
const xmlNode *xml);
void attrd_declare_winner(void);
void attrd_remove_voter(const pcmk__node_status_t *peer);
void attrd_xml_add_writer(xmlNode *xml);
enum attrd_attr_flags {
attrd_attr_none = 0U,
// At least one of attribute's values has changed since last write
attrd_attr_changed = (1U << 0),
// At least one of attribute's values has an unknown node XML ID
attrd_attr_node_unknown = (1U << 1),
// This attribute should never be written to the CIB
attrd_attr_is_private = (1U << 2),
// Ignore any configured delay for next write of this attribute
attrd_attr_force_write = (1U << 3),
};
typedef struct attribute_s {
char *id; // Attribute name
char *set_type; // PCMK_XE_INSTANCE_ATTRIBUTES or PCMK_XE_UTILIZATION
char *set_id; // Set's XML ID to use when writing
char *user; // ACL user to use for CIB writes
int update; // Call ID of pending write
int timeout_ms; // How long to wait for more changes before writing
uint32_t flags; // Group of enum attrd_attr_flags
GHashTable *values; // Key: node name, value: attribute_value_t
mainloop_timer_t *timer; // Timer to use for timeout_ms
} attribute_t;
#define attrd_set_attr_flags(attr, flags_to_set) do { \
(attr)->flags = pcmk__set_flags_as(__func__, __LINE__, \
LOG_TRACE, "Value for attribute", (attr)->id, \
(attr)->flags, (flags_to_set), #flags_to_set); \
} while (0)
#define attrd_clear_attr_flags(attr, flags_to_clear) do { \
(attr)->flags = pcmk__clear_flags_as(__func__, __LINE__, \
LOG_TRACE, "Value for attribute", (attr)->id, \
(attr)->flags, (flags_to_clear), #flags_to_clear); \
} while (0)
enum attrd_value_flags {
attrd_value_none = 0U,
attrd_value_remote = (1U << 0), // Value is for Pacemaker Remote node
attrd_value_from_peer = (1U << 1), // Value is from peer sync response
};
typedef struct attribute_value_s {
char *nodename; // Node that this value is for
char *current; // Attribute value
char *requested; // Value specified in pending CIB write, if any
uint32_t flags; // Group of attrd_value_flags
} attribute_value_t;
#define attrd_set_value_flags(attr_value, flags_to_set) do { \
(attr_value)->flags = pcmk__set_flags_as(__func__, __LINE__, \
LOG_TRACE, "Value for node", (attr_value)->nodename, \
(attr_value)->flags, (flags_to_set), #flags_to_set); \
} while (0)
#define attrd_clear_value_flags(attr_value, flags_to_clear) do { \
(attr_value)->flags = pcmk__clear_flags_as(__func__, __LINE__, \
LOG_TRACE, "Value for node", (attr_value)->nodename, \
(attr_value)->flags, (flags_to_clear), #flags_to_clear); \
} while (0)
extern pcmk_cluster_t *attrd_cluster;
extern GHashTable *attributes;
extern GHashTable *peer_protocol_vers;
#define CIB_OP_TIMEOUT_S 120
int attrd_cluster_connect(void);
void attrd_broadcast_value(const attribute_t *a, const attribute_value_t *v);
void attrd_peer_update(const pcmk__node_status_t *peer, xmlNode *xml,
const char *host, bool filter);
void attrd_peer_sync(pcmk__node_status_t *peer);
void attrd_peer_remove(const char *host, bool uncache, const char *source);
void attrd_peer_clear_failure(pcmk__request_t *request);
void attrd_peer_sync_response(const pcmk__node_status_t *peer, bool peer_won,
xmlNode *xml);
void attrd_send_protocol(const pcmk__node_status_t *peer);
xmlNode *attrd_client_peer_remove(pcmk__request_t *request);
xmlNode *attrd_client_clear_failure(pcmk__request_t *request);
xmlNode *attrd_client_update(pcmk__request_t *request);
xmlNode *attrd_client_refresh(pcmk__request_t *request);
xmlNode *attrd_client_query(pcmk__request_t *request);
gboolean attrd_send_message(const pcmk__node_status_t *node, xmlNode *data,
bool confirm);
xmlNode *attrd_add_value_xml(xmlNode *parent, const attribute_t *a,
const attribute_value_t *v, bool force_write);
void attrd_clear_value_seen(void);
void attrd_free_attribute(gpointer data);
void attrd_free_attribute_value(gpointer data);
attribute_t *attrd_populate_attribute(xmlNode *xml, const char *attr);
char *attrd_set_id(const attribute_t *attr, const char *node_state_id);
char *attrd_nvpair_id(const attribute_t *attr, const char *node_state_id);
+bool attrd_for_cib(const attribute_t *a);
+void attrd_drop_removed_value(const char *cib_id);
+void attrd_drop_removed_set(const char *set_type, const char *cib_id);
+void attrd_drop_removed_values(const char *cib_id);
enum attrd_write_options {
attrd_write_changed = 0,
attrd_write_all = (1 << 0),
attrd_write_no_delay = (1 << 1),
};
void attrd_write_attributes(uint32_t options);
void attrd_write_or_elect_attribute(attribute_t *a);
extern int minimum_protocol_version;
void attrd_remove_peer_protocol_ver(const char *host);
void attrd_update_minimum_protocol_ver(const char *host, const char *value);
mainloop_timer_t *attrd_add_timer(const char *id, int timeout_ms, attribute_t *attr);
void attrd_unregister_handlers(void);
void attrd_handle_request(pcmk__request_t *request);
enum attrd_sync_point {
attrd_sync_point_local,
attrd_sync_point_cluster,
};
typedef int (*attrd_confirmation_action_fn)(xmlNode *);
void attrd_add_client_to_waitlist(pcmk__request_t *request);
void attrd_ack_waitlist_clients(enum attrd_sync_point sync_point, const xmlNode *xml);
int attrd_cluster_sync_point_update(xmlNode *xml);
void attrd_do_not_expect_from_peer(const char *host);
void attrd_do_not_wait_for_client(pcmk__client_t *client);
void attrd_expect_confirmations(pcmk__request_t *request, attrd_confirmation_action_fn fn);
void attrd_free_confirmations(void);
void attrd_handle_confirmation(int callid, const char *host);
void attrd_remove_client_from_waitlist(pcmk__client_t *client);
const char *attrd_request_sync_point(xmlNode *xml);
bool attrd_request_has_sync_point(xmlNode *xml);
extern gboolean stand_alone;
// Node utilities (from attrd_nodes.c)
const char *attrd_get_node_xml_id(const char *node_name);
void attrd_set_node_xml_id(const char *node_name, const char *node_xml_id);
void attrd_forget_node_xml_id(const char *node_name);
void attrd_cleanup_xml_ids(void);
#endif /* PACEMAKER_ATTRD__H */
diff --git a/daemons/controld/controld_attrd.c b/daemons/controld/controld_attrd.c
index eff8070818..32fcbe2568 100644
--- a/daemons/controld/controld_attrd.c
+++ b/daemons/controld/controld_attrd.c
@@ -1,158 +1,165 @@
/*
- * Copyright 2006-2024 the Pacemaker project contributors
+ * Copyright 2006-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <crm/crm.h>
#include <crm/common/attrs_internal.h>
#include <crm/common/ipc.h>
#include <crm/common/ipc_attrd_internal.h>
#include <crm/common/xml.h>
#include <pacemaker-controld.h>
static pcmk_ipc_api_t *attrd_api = NULL;
void
controld_close_attrd_ipc(void)
{
if (attrd_api != NULL) {
crm_trace("Closing connection to " PCMK__SERVER_ATTRD);
pcmk_disconnect_ipc(attrd_api);
pcmk_free_ipc_api(attrd_api);
attrd_api = NULL;
}
}
static inline const char *
node_type(bool is_remote)
{
return is_remote? "Pacemaker Remote" : "cluster";
}
static inline const char *
when(void)
{
return pcmk_is_set(controld_globals.fsa_input_register,
R_SHUTDOWN)? " at shutdown" : "";
}
static void
handle_attr_error(void)
{
if (AM_I_DC) {
/* We are unable to provide accurate information to the
* scheduler, so allow another node to take over DC.
* @TODO Should we do this unconditionally on any failure?
*/
crmd_exit(CRM_EX_FATAL);
} else if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
// Fast-track shutdown since unable to request via attribute
register_fsa_input(C_FSA_INTERNAL, I_FAIL, NULL);
}
}
void
update_attrd(const char *host, const char *name, const char *value,
const char *user_name, gboolean is_remote_node)
{
int rc = pcmk_rc_ok;
if (attrd_api == NULL) {
rc = pcmk_new_ipc_api(&attrd_api, pcmk_ipc_attrd);
}
if (rc == pcmk_rc_ok) {
uint32_t attrd_opts = pcmk__node_attr_value;
if (is_remote_node) {
pcmk__set_node_attr_flags(attrd_opts, pcmk__node_attr_remote);
}
rc = pcmk__attrd_api_update(attrd_api, host, name, value,
NULL, NULL, user_name, attrd_opts);
}
if (rc != pcmk_rc_ok) {
do_crm_log(AM_I_DC? LOG_CRIT : LOG_ERR,
"Could not update attribute %s=%s for %s node %s%s: %s "
QB_XS " rc=%d", name, value, node_type(is_remote_node),
host, when(), pcmk_rc_str(rc), rc);
handle_attr_error();
}
}
void
update_attrd_list(GList *attrs, uint32_t opts)
{
int rc = pcmk_rc_ok;
if (attrd_api == NULL) {
rc = pcmk_new_ipc_api(&attrd_api, pcmk_ipc_attrd);
}
if (rc == pcmk_rc_ok) {
rc = pcmk__attrd_api_update_list(attrd_api, attrs, NULL, NULL, NULL,
opts | pcmk__node_attr_value);
}
if (rc != pcmk_rc_ok) {
do_crm_log(AM_I_DC? LOG_CRIT : LOG_ERR,
"Could not update multiple node attributes: %s "
QB_XS " rc=%d", pcmk_rc_str(rc), rc);
handle_attr_error();
}
}
+/*!
+ * \internal
+ * \brief Ask attribute manager to purge a node and its transient attributes
+ *
+ * \param[in] node_name Node to purge
+ * \param[in] from_cache If true, purge from node caches as well
+ */
void
-update_attrd_remote_node_removed(const char *host, const char *user_name)
+controld_purge_node_attrs(const char *node_name, bool from_cache)
{
int rc = pcmk_rc_ok;
if (attrd_api == NULL) {
rc = pcmk_new_ipc_api(&attrd_api, pcmk_ipc_attrd);
}
if (rc == pcmk_rc_ok) {
- crm_trace("Asking attribute manager to purge Pacemaker Remote node %s",
- host);
- rc = pcmk__attrd_api_purge(attrd_api, host, true);
+ crm_debug("Asking %s to purge transient attributes%s for %s ",
+ pcmk_ipc_name(attrd_api, true),
+ (from_cache? " and node cache" : ""), node_name);
+ rc = pcmk__attrd_api_purge(attrd_api, node_name, from_cache);
}
if (rc != pcmk_rc_ok) {
- crm_err("Could not purge Pacemaker Remote node %s "
- "in attribute manager%s: %s " QB_XS " rc=%d",
- host, when(), pcmk_rc_str(rc), rc);
+ crm_err("Could not purge node %s from attribute manager%s: %s "
+ QB_XS " rc=%d", node_name, when(), pcmk_rc_str(rc), rc);
}
}
void
update_attrd_clear_failures(const char *host, const char *rsc, const char *op,
const char *interval_spec, gboolean is_remote_node)
{
int rc = pcmk_rc_ok;
if (attrd_api == NULL) {
rc = pcmk_new_ipc_api(&attrd_api, pcmk_ipc_attrd);
}
if (rc == pcmk_rc_ok) {
uint32_t attrd_opts = pcmk__node_attr_none;
if (is_remote_node) {
pcmk__set_node_attr_flags(attrd_opts, pcmk__node_attr_remote);
}
rc = pcmk__attrd_api_clear_failures(attrd_api, host, rsc, op,
interval_spec, NULL, attrd_opts);
}
if (rc != pcmk_rc_ok) {
const char *interval_desc = "all";
if (op != NULL) {
interval_desc = pcmk__s(interval_spec, "nonrecurring");
}
crm_err("Could not clear failure of %s %s for %s on %s node %s%s: %s "
QB_XS " rc=%d", interval_desc, pcmk__s(op, "operations"),
pcmk__s(rsc, "all resources"), node_type(is_remote_node), host,
when(), pcmk_rc_str(rc), rc);
}
}
diff --git a/daemons/controld/controld_callbacks.c b/daemons/controld/controld_callbacks.c
index 48c255ed19..cd4b077dd5 100644
--- a/daemons/controld/controld_callbacks.c
+++ b/daemons/controld/controld_callbacks.c
@@ -1,396 +1,382 @@
/*
* Copyright 2004-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <inttypes.h> // PRIu32
#include <stdbool.h> // bool
#include <stdint.h> // uint32_t
#include <stdio.h> // NULL
#include <sys/param.h>
#include <string.h>
#include <crm/crm.h>
#include <crm/common/xml.h>
#include <crm/cluster.h>
#include <crm/cib.h>
#include <pacemaker-controld.h>
/* From join_dc... */
extern gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source);
void
crmd_ha_msg_filter(xmlNode * msg)
{
if (AM_I_DC) {
const char *sys_from = crm_element_value(msg, PCMK__XA_CRM_SYS_FROM);
if (pcmk__str_eq(sys_from, CRM_SYSTEM_DC, pcmk__str_casei)) {
const char *from = crm_element_value(msg, PCMK__XA_SRC);
if (!controld_is_local_node(from)) {
int level = LOG_INFO;
const char *op = crm_element_value(msg, PCMK__XA_CRM_TASK);
/* make sure the election happens NOW */
if (controld_globals.fsa_state != S_ELECTION) {
ha_msg_input_t new_input;
level = LOG_WARNING;
new_input.msg = msg;
register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION, NULL, &new_input,
__func__);
}
do_crm_log(level, "Another DC detected: %s (op=%s)", from, op);
goto done;
}
}
} else {
const char *sys_to = crm_element_value(msg, PCMK__XA_CRM_SYS_TO);
if (pcmk__str_eq(sys_to, CRM_SYSTEM_DC, pcmk__str_casei)) {
return;
}
}
/* crm_log_xml_trace(msg, "HA[inbound]"); */
route_message(C_HA_MESSAGE, msg);
done:
controld_trigger_fsa();
}
/*!
* \internal
* \brief Check whether a node is online
*
* \param[in] node Node to check
*
* \retval -1 if completely dead
* \retval 0 if partially alive
* \retval 1 if completely alive
*/
static int
node_alive(const pcmk__node_status_t *node)
{
if (pcmk_is_set(node->flags, pcmk__node_status_remote)) {
// Pacemaker Remote nodes can't be partially alive
if (pcmk__str_eq(node->state, PCMK_VALUE_MEMBER, pcmk__str_none)) {
return 1;
}
return -1;
} else if (pcmk__cluster_is_node_active(node)) {
// Completely up cluster node: both cluster member and peer
return 1;
} else if (!pcmk_is_set(node->processes, crm_get_cluster_proc())
&& !pcmk__str_eq(node->state, PCMK_VALUE_MEMBER,
pcmk__str_none)) {
// Completely down cluster node: neither cluster member nor peer
return -1;
}
// Partially up cluster node: only cluster member or only peer
return 0;
}
#define state_text(state) ((state)? (const char *)(state) : "in unknown state")
// @TODO This is insanely long, and some parts should be functionized
void
peer_update_callback(enum pcmk__node_update type, pcmk__node_status_t *node,
const void *data)
{
uint32_t old = 0;
bool appeared = FALSE;
bool is_remote = pcmk_is_set(node->flags, pcmk__node_status_remote);
controld_node_pending_timer(node);
/* The controller waits to receive some information from the membership
* layer before declaring itself operational. If this is being called for a
* cluster node, indicate that we have it.
*/
if (!is_remote) {
controld_set_fsa_input_flags(R_PEER_DATA);
}
if ((type == pcmk__node_update_processes)
&& pcmk_is_set(node->processes, crm_get_cluster_proc())
&& !AM_I_DC
&& !is_remote) {
/* relay_message() on the recipient ignores these messages, but
* libcrmcluster will have cached the node name by then
*/
xmlNode *query = pcmk__new_request(pcmk_ipc_controld, CRM_SYSTEM_CRMD,
NULL, CRM_SYSTEM_CRMD, CRM_OP_HELLO,
NULL);
crm_debug("Sending hello to node %" PRIu32 " so that it learns our "
"node name",
node->cluster_layer_id);
pcmk__cluster_send_message(node, pcmk_ipc_controld, query);
pcmk__xml_free(query);
}
if (node->name == NULL) {
return;
}
switch (type) {
case pcmk__node_update_name:
/* If we've never seen the node, then it also won't be in the status section */
crm_info("%s node %s is now %s",
(is_remote? "Remote" : "Cluster"),
node->name, state_text(node->state));
return;
case pcmk__node_update_state:
/* This callback should not be called unless the state actually
* changed, but here's a failsafe just in case.
*/
CRM_CHECK(!pcmk__str_eq(data, node->state, pcmk__str_casei),
return);
crm_info("%s node %s is now %s (was %s)",
(is_remote? "Remote" : "Cluster"),
node->name, state_text(node->state), state_text(data));
if (pcmk__str_eq(PCMK_VALUE_MEMBER, node->state, pcmk__str_none)) {
appeared = TRUE;
if (!is_remote) {
remove_stonith_cleanup(node->name);
}
} else {
controld_remove_failed_sync_node(node->name);
controld_remove_voter(node->name);
}
crmd_alert_node_event(node);
break;
case pcmk__node_update_processes:
CRM_CHECK(data != NULL, return);
old = *(const uint32_t *)data;
appeared = pcmk_is_set(node->processes, crm_get_cluster_proc());
{
const char *dc_s = controld_globals.dc_name;
if ((dc_s == NULL) && AM_I_DC) {
dc_s = PCMK_VALUE_TRUE;
}
crm_info("Node %s is %s a peer " QB_XS
" DC=%s old=%#07x new=%#07x",
node->name, (appeared? "now" : "no longer"),
pcmk__s(dc_s, "<none>"), old, node->processes);
}
if (!pcmk_is_set((node->processes ^ old), crm_get_cluster_proc())) {
/* Peer status did not change. This should not be possible,
* since we don't track process flags other than peer status.
*/
crm_trace("Process flag %#7x did not change from %#7x to %#7x",
crm_get_cluster_proc(), old, node->processes);
return;
}
if (!appeared) {
node->peer_lost = time(NULL);
controld_remove_failed_sync_node(node->name);
controld_remove_voter(node->name);
}
if (!pcmk_is_set(controld_globals.fsa_input_register,
R_CIB_CONNECTED)) {
crm_trace("Ignoring peer status change because not connected to CIB");
return;
} else if (controld_globals.fsa_state == S_STOPPING) {
crm_trace("Ignoring peer status change because stopping");
return;
}
if (!appeared && controld_is_local_node(node->name)) {
/* Did we get evicted? */
crm_notice("Our peer connection failed");
register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ERROR, NULL);
} else if (pcmk__str_eq(node->name, controld_globals.dc_name,
pcmk__str_casei)
&& !pcmk__cluster_is_node_active(node)) {
- /* The DC has left, so delete its transient attributes and
- * trigger a new election.
- *
- * A DC sends its shutdown request to all peers, who update the
- * DC's expected state to down. This avoids fencing upon
- * deletion of its transient attributes.
- */
+ // The DC has left, so trigger a new election
crm_notice("Our peer on the DC (%s) is dead",
controld_globals.dc_name);
-
register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ELECTION, NULL);
- controld_delete_node_state(node->name, controld_section_attrs,
- cib_none);
-
} else if (AM_I_DC
|| pcmk_is_set(controld_globals.flags, controld_dc_left)
|| (controld_globals.dc_name == NULL)) {
/* This only needs to be done once, so normally the DC should do
* it. However if there is no DC, every node must do it, since
* there is no other way to ensure some one node does it.
*/
if (appeared) {
te_trigger_stonith_history_sync(FALSE);
- } else {
- controld_delete_node_state(node->name,
- controld_section_attrs,
- cib_none);
}
}
break;
}
if (AM_I_DC) {
xmlNode *update = NULL;
uint32_t flags = controld_node_update_peer;
int alive = node_alive(node);
pcmk__graph_action_t *down = match_down_event(node->xml_id);
crm_trace("Alive=%d, appeared=%d, down=%d",
alive, appeared, (down? down->id : -1));
if (appeared && (alive > 0) && !is_remote) {
register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL);
}
if (down) {
const char *task = crm_element_value(down->xml, PCMK_XA_OPERATION);
if (pcmk__str_eq(task, PCMK_ACTION_STONITH, pcmk__str_casei)) {
const bool confirmed =
pcmk_is_set(down->flags, pcmk__graph_action_confirmed);
/* tengine_stonith_callback() confirms fence actions */
crm_trace("Updating CIB %s fencer reported fencing of %s complete",
(confirmed? "after" : "before"), node->name);
} else if (!appeared && pcmk__str_eq(task, PCMK_ACTION_DO_SHUTDOWN,
pcmk__str_casei)) {
// Shutdown actions are immediately confirmed (i.e. no_wait)
if (!is_remote) {
flags |= controld_node_update_join
|controld_node_update_expected;
crmd_peer_down(node, FALSE);
check_join_state(controld_globals.fsa_state, __func__);
}
if (alive >= 0) {
crm_info("%s of peer %s is in progress " QB_XS " action=%d",
task, node->name, down->id);
} else {
crm_notice("%s of peer %s is complete " QB_XS " action=%d",
task, node->name, down->id);
pcmk__update_graph(controld_globals.transition_graph, down);
trigger_graph();
}
} else {
const char *liveness = "alive";
if (alive == 0) {
liveness = "partially alive";
} else if (alive < 0) {
liveness = "dead";
}
crm_trace("Node %s is %s, was expected to %s (op %d)",
node->name, liveness, task, down->id);
}
} else if (appeared == FALSE) {
if ((controld_globals.transition_graph == NULL)
|| (controld_globals.transition_graph->id <= 0)) {
crm_info("Stonith/shutdown of node %s is unknown to the "
"current DC", node->name);
} else {
crm_warn("Stonith/shutdown of node %s was not expected",
node->name);
}
if (!is_remote) {
crm_update_peer_join(__func__, node, controld_join_none);
check_join_state(controld_globals.fsa_state, __func__);
}
abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
"Node failure", NULL);
fail_incompletable_actions(controld_globals.transition_graph,
node->xml_id);
} else {
crm_trace("Node %s came up, was not expected to be down",
node->name);
}
if (is_remote) {
/* A pacemaker_remote node won't have its cluster status updated
* in the CIB by membership-layer callbacks, so do it here.
*/
flags |= controld_node_update_cluster;
/* Trigger resource placement on newly integrated nodes */
if (appeared) {
abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
"Pacemaker Remote node integrated", NULL);
}
}
if (!appeared && (type == pcmk__node_update_processes)
&& (node->when_member > 1)) {
/* The node left CPG but is still a cluster member. Set its
* membership time to 1 to record it in the cluster state as a
* boolean, so we don't fence it due to
* PCMK_OPT_NODE_PENDING_TIMEOUT.
*/
node->when_member = 1;
flags |= controld_node_update_cluster;
controld_node_pending_timer(node);
}
/* Update the CIB node state */
update = create_node_state_update(node, flags, NULL, __func__);
if (update == NULL) {
crm_debug("Node state update not yet possible for %s", node->name);
} else {
fsa_cib_anon_update(PCMK_XE_STATUS, update);
}
pcmk__xml_free(update);
}
controld_trigger_fsa();
}
gboolean
crm_fsa_trigger(gpointer user_data)
{
crm_trace("Invoked (queue len: %d)",
g_list_length(controld_globals.fsa_message_queue));
s_crmd_fsa(C_FSA_INTERNAL);
crm_trace("Exited (queue len: %d)",
g_list_length(controld_globals.fsa_message_queue));
return TRUE;
}
diff --git a/daemons/controld/controld_cib.c b/daemons/controld/controld_cib.c
index e2a0d50179..b5e6a29146 100644
--- a/daemons/controld/controld_cib.c
+++ b/daemons/controld/controld_cib.c
@@ -1,1061 +1,1028 @@
/*
* Copyright 2004-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <unistd.h> /* sleep */
#include <crm/common/alerts_internal.h>
#include <crm/common/xml.h>
#include <crm/crm.h>
#include <crm/lrmd_internal.h>
#include <pacemaker-controld.h>
// Call ID of the most recent in-progress CIB resource update (or 0 if none)
static int pending_rsc_update = 0;
/*!
* \internal
* \brief Respond to a dropped CIB connection
*
* \param[in] user_data CIB connection that dropped
*/
static void
handle_cib_disconnect(gpointer user_data)
{
CRM_LOG_ASSERT(user_data == controld_globals.cib_conn);
controld_trigger_fsa();
controld_globals.cib_conn->state = cib_disconnected;
if (pcmk_is_set(controld_globals.fsa_input_register, R_CIB_CONNECTED)) {
// @TODO This should trigger a reconnect, not a shutdown
crm_crit("Lost connection to the CIB manager, shutting down");
register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL);
controld_clear_fsa_input_flags(R_CIB_CONNECTED);
} else { // Expected
crm_info("Disconnected from the CIB manager");
}
}
static void
do_cib_updated(const char *event, xmlNode * msg)
{
const xmlNode *patchset = NULL;
const char *client_name = NULL;
crm_debug("Received CIB diff notification: DC=%s", pcmk__btoa(AM_I_DC));
if (cib__get_notify_patchset(msg, &patchset) != pcmk_rc_ok) {
return;
}
if (pcmk__cib_element_in_patchset(patchset, PCMK_XE_ALERTS)
|| pcmk__cib_element_in_patchset(patchset, PCMK_XE_CRM_CONFIG)) {
controld_trigger_config();
}
if (!AM_I_DC) {
// We're not in control of the join sequence
return;
}
client_name = crm_element_value(msg, PCMK__XA_CIB_CLIENTNAME);
if (!cib__client_triggers_refresh(client_name)) {
// The CIB is still accurate
return;
}
if (pcmk__cib_element_in_patchset(patchset, PCMK_XE_NODES)
|| pcmk__cib_element_in_patchset(patchset, PCMK_XE_STATUS)) {
/* An unsafe client modified the PCMK_XE_NODES or PCMK_XE_STATUS
* section. Ensure the node list is up-to-date, and start the join
* process again so we get everyone's current resource history.
*/
if (client_name == NULL) {
client_name = crm_element_value(msg, PCMK__XA_CIB_CLIENTID);
}
crm_notice("Populating nodes and starting an election after %s event "
"triggered by %s",
event, pcmk__s(client_name, "(unidentified client)"));
populate_cib_nodes(controld_node_update_quick|controld_node_update_all,
__func__);
register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL);
}
}
void
controld_disconnect_cib_manager(void)
{
cib_t *cib_conn = controld_globals.cib_conn;
pcmk__assert(cib_conn != NULL);
crm_debug("Disconnecting from the CIB manager");
controld_clear_fsa_input_flags(R_CIB_CONNECTED);
cib_conn->cmds->del_notify_callback(cib_conn, PCMK__VALUE_CIB_DIFF_NOTIFY,
do_cib_updated);
cib_free_callbacks(cib_conn);
if (cib_conn->state != cib_disconnected) {
cib_conn->cmds->set_secondary(cib_conn, cib_discard_reply);
cib_conn->cmds->signoff(cib_conn);
}
}
/* A_CIB_STOP, A_CIB_START, O_CIB_RESTART */
void
do_cib_control(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
static int cib_retries = 0;
cib_t *cib_conn = controld_globals.cib_conn;
void (*dnotify_fn) (gpointer user_data) = handle_cib_disconnect;
void (*update_cb) (const char *event, xmlNodePtr msg) = do_cib_updated;
int rc = pcmk_ok;
pcmk__assert(cib_conn != NULL);
if (pcmk_is_set(action, A_CIB_STOP)) {
if ((cib_conn->state != cib_disconnected)
&& (pending_rsc_update != 0)) {
crm_info("Waiting for resource update %d to complete",
pending_rsc_update);
crmd_fsa_stall(FALSE);
return;
}
controld_disconnect_cib_manager();
}
if (!pcmk_is_set(action, A_CIB_START)) {
return;
}
if (cur_state == S_STOPPING) {
crm_err("Ignoring request to connect to the CIB manager after "
"shutdown");
return;
}
rc = cib_conn->cmds->signon(cib_conn, crm_system_name,
cib_command_nonblocking);
if (rc != pcmk_ok) {
// A short wait that usually avoids stalling the FSA
sleep(1);
rc = cib_conn->cmds->signon(cib_conn, crm_system_name,
cib_command_nonblocking);
}
if (rc != pcmk_ok) {
crm_info("Could not connect to the CIB manager: %s", pcmk_strerror(rc));
} else if (cib_conn->cmds->set_connection_dnotify(cib_conn,
dnotify_fn) != pcmk_ok) {
crm_err("Could not set dnotify callback");
} else if (cib_conn->cmds->add_notify_callback(cib_conn,
PCMK__VALUE_CIB_DIFF_NOTIFY,
update_cb) != pcmk_ok) {
crm_err("Could not set CIB notification callback (update)");
} else {
controld_set_fsa_input_flags(R_CIB_CONNECTED);
cib_retries = 0;
}
if (!pcmk_is_set(controld_globals.fsa_input_register, R_CIB_CONNECTED)) {
cib_retries++;
if (cib_retries < 30) {
crm_warn("Couldn't complete CIB registration %d times... "
"pause and retry", cib_retries);
controld_start_wait_timer();
crmd_fsa_stall(FALSE);
} else {
crm_err("Could not complete CIB registration %d times... "
"hard error", cib_retries);
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
}
}
#define MIN_CIB_OP_TIMEOUT (30)
/*!
* \internal
* \brief Get the timeout (in seconds) that should be used with CIB operations
*
* \return The maximum of 30 seconds, the value of the PCMK_cib_timeout
* environment variable, or 10 seconds times one more than the number of
* nodes in the cluster.
*/
unsigned int
cib_op_timeout(void)
{
unsigned int calculated_timeout = 10U * (pcmk__cluster_num_active_nodes()
+ pcmk__cluster_num_remote_nodes()
+ 1U);
calculated_timeout = QB_MAX(calculated_timeout, MIN_CIB_OP_TIMEOUT);
crm_trace("Calculated timeout: %s",
pcmk__readable_interval(calculated_timeout * 1000));
if (controld_globals.cib_conn) {
controld_globals.cib_conn->call_timeout = calculated_timeout;
}
return calculated_timeout;
}
/*!
* \internal
* \brief Get CIB call options to use local scope if primary is unavailable
*
* \return CIB call options
*/
int
crmd_cib_smart_opt(void)
{
int call_opt = cib_none;
if ((controld_globals.fsa_state == S_ELECTION)
|| (controld_globals.fsa_state == S_PENDING)) {
crm_info("Sending update to local CIB in state: %s",
fsa_state2string(controld_globals.fsa_state));
cib__set_call_options(call_opt, "update", cib_none);
}
return call_opt;
}
static void
cib_delete_callback(xmlNode *msg, int call_id, int rc, xmlNode *output,
void *user_data)
{
char *desc = user_data;
if (rc == 0) {
crm_debug("Deletion of %s (via CIB call %d) succeeded", desc, call_id);
} else {
crm_warn("Deletion of %s (via CIB call %d) failed: %s " QB_XS " rc=%d",
desc, call_id, pcmk_strerror(rc), rc);
}
}
// Searches for various portions of PCMK__XE_NODE_STATE to delete
// Match a particular node's PCMK__XE_NODE_STATE (takes node name 1x)
#define XPATH_NODE_STATE "//" PCMK__XE_NODE_STATE "[@" PCMK_XA_UNAME "='%s']"
// Node's lrm section (name 1x)
#define XPATH_NODE_LRM XPATH_NODE_STATE "/" PCMK__XE_LRM
/* Node's PCMK__XE_LRM_RSC_OP entries and PCMK__XE_LRM_RESOURCE entries without
* unexpired lock
* (name 2x, (seconds_since_epoch - PCMK_OPT_SHUTDOWN_LOCK_LIMIT) 1x)
*/
#define XPATH_NODE_LRM_UNLOCKED XPATH_NODE_STATE "//" PCMK__XE_LRM_RSC_OP \
"|" XPATH_NODE_STATE \
"//" PCMK__XE_LRM_RESOURCE \
"[not(@" PCMK_OPT_SHUTDOWN_LOCK ") " \
"or " PCMK_OPT_SHUTDOWN_LOCK "<%lld]"
-// Node's PCMK__XE_TRANSIENT_ATTRIBUTES section (name 1x)
-#define XPATH_NODE_ATTRS XPATH_NODE_STATE "/" PCMK__XE_TRANSIENT_ATTRIBUTES
-
-// Everything under PCMK__XE_NODE_STATE (name 1x)
-#define XPATH_NODE_ALL XPATH_NODE_STATE "/*"
-
-/* Unlocked history + transient attributes
- * (name 2x, (seconds_since_epoch - PCMK_OPT_SHUTDOWN_LOCK_LIMIT) 1x, name 1x)
- */
-#define XPATH_NODE_ALL_UNLOCKED XPATH_NODE_LRM_UNLOCKED "|" XPATH_NODE_ATTRS
-
/*!
* \internal
- * \brief Get the XPath and description of a node state section to be deleted
+ * \brief Get the XPath and description of resource history to be deleted
*
- * \param[in] uname Desired node
- * \param[in] section Subsection of \c PCMK__XE_NODE_STATE to be deleted
- * \param[out] xpath Where to store XPath of \p section
- * \param[out] desc If not \c NULL, where to store description of \p section
+ * \param[in] uname Name of node to delete resource history for
+ * \param[in] unlocked_only If true, delete history of only unlocked resources
+ * \param[out] xpath Where to store XPath for history deletion
+ * \param[out] desc If not NULL, where to store loggable description
*/
void
-controld_node_state_deletion_strings(const char *uname,
- enum controld_section_e section,
- char **xpath, char **desc)
+controld_node_history_deletion_strings(const char *uname, bool unlocked_only,
+ char **xpath, char **desc)
{
const char *desc_pre = NULL;
// Shutdown locks that started before this time are expired
long long expire = (long long) time(NULL)
- controld_globals.shutdown_lock_limit;
- switch (section) {
- case controld_section_lrm:
- *xpath = crm_strdup_printf(XPATH_NODE_LRM, uname);
- desc_pre = "resource history";
- break;
- case controld_section_lrm_unlocked:
- *xpath = crm_strdup_printf(XPATH_NODE_LRM_UNLOCKED,
- uname, uname, expire);
- desc_pre = "resource history (other than shutdown locks)";
- break;
- case controld_section_attrs:
- *xpath = crm_strdup_printf(XPATH_NODE_ATTRS, uname);
- desc_pre = "transient attributes";
- break;
- case controld_section_all:
- *xpath = crm_strdup_printf(XPATH_NODE_ALL, uname);
- desc_pre = "all state";
- break;
- case controld_section_all_unlocked:
- *xpath = crm_strdup_printf(XPATH_NODE_ALL_UNLOCKED,
- uname, uname, expire, uname);
- desc_pre = "all state (other than shutdown locks)";
- break;
- default:
- // We called this function incorrectly
- pcmk__assert(false);
- break;
+ if (unlocked_only) {
+ *xpath = crm_strdup_printf(XPATH_NODE_LRM_UNLOCKED,
+ uname, uname, expire);
+ desc_pre = "resource history (other than shutdown locks)";
+ } else {
+ *xpath = crm_strdup_printf(XPATH_NODE_LRM, uname);
+ desc_pre = "resource history";
}
if (desc != NULL) {
*desc = crm_strdup_printf("%s for node %s", desc_pre, uname);
}
}
+
/*!
* \internal
- * \brief Delete subsection of a node's CIB \c PCMK__XE_NODE_STATE
+ * \brief Delete a node's resource history from the CIB
*
- * \param[in] uname Desired node
- * \param[in] section Subsection of \c PCMK__XE_NODE_STATE to delete
- * \param[in] options CIB call options to use
+ * \param[in] uname Desired node
+ * \param[in] unlocked_only If true, delete history of only unlocked resources
+ * \param[in] options CIB call options to use
*/
void
-controld_delete_node_state(const char *uname, enum controld_section_e section,
- int options)
+controld_delete_node_history(const char *uname, bool unlocked_only, int options)
{
cib_t *cib = controld_globals.cib_conn;
char *xpath = NULL;
char *desc = NULL;
int cib_rc = pcmk_ok;
pcmk__assert((uname != NULL) && (cib != NULL));
- controld_node_state_deletion_strings(uname, section, &xpath, &desc);
-
+ controld_node_history_deletion_strings(uname, unlocked_only, &xpath, &desc);
cib__set_call_options(options, "node state deletion",
cib_xpath|cib_multiple);
cib_rc = cib->cmds->remove(cib, xpath, NULL, options);
fsa_register_cib_callback(cib_rc, desc, cib_delete_callback);
crm_info("Deleting %s (via CIB call %d) " QB_XS " xpath=%s",
desc, cib_rc, xpath);
// CIB library handles freeing desc
free(xpath);
}
// Takes node name and resource ID
#define XPATH_RESOURCE_HISTORY "//" PCMK__XE_NODE_STATE \
"[@" PCMK_XA_UNAME "='%s']/" \
PCMK__XE_LRM "/" PCMK__XE_LRM_RESOURCES \
"/" PCMK__XE_LRM_RESOURCE \
"[@" PCMK_XA_ID "='%s']"
// @TODO could add "and @PCMK_OPT_SHUTDOWN_LOCK" to limit to locks
/*!
* \internal
* \brief Clear resource history from CIB for a given resource and node
*
* \param[in] rsc_id ID of resource to be cleared
* \param[in] node Node whose resource history should be cleared
* \param[in] user_name ACL user name to use
* \param[in] call_options CIB call options
*
* \return Standard Pacemaker return code
*/
int
controld_delete_resource_history(const char *rsc_id, const char *node,
const char *user_name, int call_options)
{
char *desc = NULL;
char *xpath = NULL;
int rc = pcmk_rc_ok;
cib_t *cib = controld_globals.cib_conn;
CRM_CHECK((rsc_id != NULL) && (node != NULL), return EINVAL);
desc = crm_strdup_printf("resource history for %s on %s", rsc_id, node);
if (cib == NULL) {
crm_err("Unable to clear %s: no CIB connection", desc);
free(desc);
return ENOTCONN;
}
// Ask CIB to delete the entry
xpath = crm_strdup_printf(XPATH_RESOURCE_HISTORY, node, rsc_id);
cib->cmds->set_user(cib, user_name);
rc = cib->cmds->remove(cib, xpath, NULL, call_options|cib_xpath);
cib->cmds->set_user(cib, NULL);
if (rc < 0) {
rc = pcmk_legacy2rc(rc);
crm_err("Could not delete resource status of %s on %s%s%s: %s "
QB_XS " rc=%d", rsc_id, node,
(user_name? " for user " : ""), (user_name? user_name : ""),
pcmk_rc_str(rc), rc);
free(desc);
free(xpath);
return rc;
}
if (pcmk_is_set(call_options, cib_sync_call)) {
if (pcmk_is_set(call_options, cib_dryrun)) {
crm_debug("Deletion of %s would succeed", desc);
} else {
crm_debug("Deletion of %s succeeded", desc);
}
free(desc);
} else {
crm_info("Clearing %s (via CIB call %d) " QB_XS " xpath=%s",
desc, rc, xpath);
fsa_register_cib_callback(rc, desc, cib_delete_callback);
// CIB library handles freeing desc
}
free(xpath);
return pcmk_rc_ok;
}
/*!
* \internal
* \brief Build XML and string of parameters meeting some criteria, for digest
*
* \param[in] op Executor event with parameter table to use
* \param[in] metadata Parsed meta-data for executed resource agent
* \param[in] param_type Flag used for selection criteria
* \param[out] result Will be set to newly created XML with selected
* parameters as attributes
*
* \return Newly allocated space-separated string of parameter names
* \note Selection criteria varies by param_type: for the restart digest, we
* want parameters that are *not* marked reloadable (OCF 1.1) or that
* *are* marked unique (pre-1.1), for both string and XML results; for the
* secure digest, we want parameters that *are* marked private for the
* string, but parameters that are *not* marked private for the XML.
* \note It is the caller's responsibility to free the string return value with
* \p g_string_free() and the XML result with \p pcmk__xml_free().
*/
static GString *
build_parameter_list(const lrmd_event_data_t *op,
const struct ra_metadata_s *metadata,
enum ra_param_flags_e param_type, xmlNode **result)
{
GString *list = NULL;
*result = pcmk__xe_create(NULL, PCMK_XE_PARAMETERS);
/* Consider all parameters only except private ones to be consistent with
* what scheduler does with calculate_secure_digest().
*/
if (param_type == ra_param_private
&& compare_version(controld_globals.dc_version, "3.16.0") >= 0) {
g_hash_table_foreach(op->params, hash2field, *result);
pcmk__filter_op_for_digest(*result);
}
for (GList *iter = metadata->ra_params; iter != NULL; iter = iter->next) {
struct ra_param_s *param = (struct ra_param_s *) iter->data;
bool accept_for_list = false;
bool accept_for_xml = false;
switch (param_type) {
case ra_param_reloadable:
accept_for_list = !pcmk_is_set(param->rap_flags, param_type);
accept_for_xml = accept_for_list;
break;
case ra_param_unique:
accept_for_list = pcmk_is_set(param->rap_flags, param_type);
accept_for_xml = accept_for_list;
break;
case ra_param_private:
accept_for_list = pcmk_is_set(param->rap_flags, param_type);
accept_for_xml = !accept_for_list;
break;
}
if (accept_for_list) {
crm_trace("Attr %s is %s", param->rap_name, ra_param_flag2text(param_type));
if (list == NULL) {
// We will later search for " WORD ", so start list with a space
pcmk__add_word(&list, 256, " ");
}
pcmk__add_word(&list, 0, param->rap_name);
} else {
crm_trace("Rejecting %s for %s", param->rap_name, ra_param_flag2text(param_type));
}
if (accept_for_xml) {
const char *v = g_hash_table_lookup(op->params, param->rap_name);
if (v != NULL) {
crm_trace("Adding attr %s=%s to the xml result", param->rap_name, v);
crm_xml_add(*result, param->rap_name, v);
}
} else {
crm_trace("Removing attr %s from the xml result", param->rap_name);
pcmk__xe_remove_attr(*result, param->rap_name);
}
}
if (list != NULL) {
// We will later search for " WORD ", so end list with a space
pcmk__add_word(&list, 0, " ");
}
return list;
}
static void
append_restart_list(lrmd_event_data_t *op, struct ra_metadata_s *metadata,
xmlNode *update, const char *version)
{
GString *list = NULL;
char *digest = NULL;
xmlNode *restart = NULL;
CRM_LOG_ASSERT(op->params != NULL);
if (op->interval_ms > 0) {
/* monitors are not reloadable */
return;
}
if (pcmk_is_set(metadata->ra_flags, ra_supports_reload_agent)) {
/* Add parameters not marked reloadable to the PCMK__XA_OP_FORCE_RESTART
* list
*/
list = build_parameter_list(op, metadata, ra_param_reloadable,
&restart);
} else if (pcmk_is_set(metadata->ra_flags, ra_supports_legacy_reload)) {
/* @COMPAT pre-OCF-1.1 resource agents
*
* Before OCF 1.1, Pacemaker abused "unique=0" to indicate
* reloadability. Add any parameters with unique="1" to the
* PCMK__XA_OP_FORCE_RESTART list.
*/
list = build_parameter_list(op, metadata, ra_param_unique, &restart);
} else {
// Resource does not support agent reloads
return;
}
digest = pcmk__digest_operation(restart);
/* Add PCMK__XA_OP_FORCE_RESTART and PCMK__XA_OP_RESTART_DIGEST to indicate
* the resource supports reload, no matter if it actually supports any
* reloadable parameters
*/
crm_xml_add(update, PCMK__XA_OP_FORCE_RESTART,
(list == NULL)? "" : (const char *) list->str);
crm_xml_add(update, PCMK__XA_OP_RESTART_DIGEST, digest);
if ((list != NULL) && (list->len > 0)) {
crm_trace("%s: %s, %s", op->rsc_id, digest, (const char *) list->str);
} else {
crm_trace("%s: %s", op->rsc_id, digest);
}
if (list != NULL) {
g_string_free(list, TRUE);
}
pcmk__xml_free(restart);
free(digest);
}
static void
append_secure_list(lrmd_event_data_t *op, struct ra_metadata_s *metadata,
xmlNode *update, const char *version)
{
GString *list = NULL;
char *digest = NULL;
xmlNode *secure = NULL;
CRM_LOG_ASSERT(op->params != NULL);
/* To keep PCMK__XA_OP_SECURE_PARAMS short, we want it to contain the secure
* parameters but PCMK__XA_OP_SECURE_DIGEST to be based on the insecure ones
*/
list = build_parameter_list(op, metadata, ra_param_private, &secure);
if (list != NULL) {
digest = pcmk__digest_operation(secure);
crm_xml_add(update, PCMK__XA_OP_SECURE_PARAMS,
(const char *) list->str);
crm_xml_add(update, PCMK__XA_OP_SECURE_DIGEST, digest);
crm_trace("%s: %s, %s", op->rsc_id, digest, (const char *) list->str);
g_string_free(list, TRUE);
} else {
crm_trace("%s: no secure parameters", op->rsc_id);
}
pcmk__xml_free(secure);
free(digest);
}
/*!
* \internal
* \brief Create XML for a resource history entry
*
* \param[in] func Function name of caller
* \param[in,out] parent XML to add entry to
* \param[in] rsc Affected resource
* \param[in,out] op Action to add an entry for (or NULL to do nothing)
* \param[in] node_name Node where action occurred
*/
void
controld_add_resource_history_xml_as(const char *func, xmlNode *parent,
const lrmd_rsc_info_t *rsc,
lrmd_event_data_t *op,
const char *node_name)
{
int target_rc = 0;
xmlNode *xml_op = NULL;
struct ra_metadata_s *metadata = NULL;
const char *caller_version = NULL;
lrm_state_t *lrm_state = NULL;
if (op == NULL) {
return;
}
target_rc = rsc_op_expected_rc(op);
caller_version = g_hash_table_lookup(op->params, PCMK_XA_CRM_FEATURE_SET);
CRM_CHECK(caller_version != NULL, caller_version = CRM_FEATURE_SET);
xml_op = pcmk__create_history_xml(parent, op, caller_version, target_rc,
controld_globals.cluster->priv->node_name,
func);
if (xml_op == NULL) {
return;
}
if ((rsc == NULL) || (op->params == NULL)
|| !crm_op_needs_metadata(rsc->standard, op->op_type)) {
crm_trace("No digests needed for %s action on %s (params=%p rsc=%p)",
op->op_type, op->rsc_id, op->params, rsc);
return;
}
lrm_state = controld_get_executor_state(node_name, false);
if (lrm_state == NULL) {
crm_warn("Cannot calculate digests for operation " PCMK__OP_FMT
" because we have no connection to executor for %s",
op->rsc_id, op->op_type, op->interval_ms, node_name);
return;
}
/* Ideally the metadata is cached, and the agent is just a fallback.
*
* @TODO Go through all callers and ensure they get metadata asynchronously
* first.
*/
metadata = controld_get_rsc_metadata(lrm_state, rsc,
controld_metadata_from_agent
|controld_metadata_from_cache);
if (metadata == NULL) {
return;
}
crm_trace("Including additional digests for %s:%s:%s",
rsc->standard, rsc->provider, rsc->type);
append_restart_list(op, metadata, xml_op, caller_version);
append_secure_list(op, metadata, xml_op, caller_version);
return;
}
/*!
* \internal
* \brief Record an action as pending in the CIB, if appropriate
*
* \param[in] node_name Node where the action is pending
* \param[in] rsc Resource that action is for
* \param[in,out] op Pending action
*
* \return true if action was recorded in CIB, otherwise false
*/
bool
controld_record_pending_op(const char *node_name, const lrmd_rsc_info_t *rsc,
lrmd_event_data_t *op)
{
const char *record_pending = NULL;
CRM_CHECK((node_name != NULL) && (rsc != NULL) && (op != NULL),
return false);
// Never record certain operation types as pending
if ((op->op_type == NULL) || (op->params == NULL)
|| !controld_action_is_recordable(op->op_type)) {
return false;
}
// Check action's PCMK_META_RECORD_PENDING meta-attribute (defaults to true)
record_pending = crm_meta_value(op->params, PCMK_META_RECORD_PENDING);
if ((record_pending != NULL) && !crm_is_true(record_pending)) {
pcmk__warn_once(pcmk__wo_record_pending,
"The " PCMK_META_RECORD_PENDING " option (for example, "
"for the %s resource's %s operation) is deprecated and "
"will be removed in a future release",
rsc->id, op->op_type);
return false;
}
op->call_id = -1;
op->t_run = time(NULL);
op->t_rcchange = op->t_run;
lrmd__set_result(op, PCMK_OCF_UNKNOWN, PCMK_EXEC_PENDING, NULL);
crm_debug("Recording pending %s-interval %s for %s on %s in the CIB",
pcmk__readable_interval(op->interval_ms), op->op_type, op->rsc_id,
node_name);
controld_update_resource_history(node_name, rsc, op, 0);
return true;
}
static void
cib_rsc_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
switch (rc) {
case pcmk_ok:
case -pcmk_err_diff_failed:
case -pcmk_err_diff_resync:
crm_trace("Resource history update completed (call=%d rc=%d)",
call_id, rc);
break;
default:
if (call_id > 0) {
crm_warn("Resource history update %d failed: %s "
QB_XS " rc=%d", call_id, pcmk_strerror(rc), rc);
} else {
crm_warn("Resource history update failed: %s " QB_XS " rc=%d",
pcmk_strerror(rc), rc);
}
}
if (call_id == pending_rsc_update) {
pending_rsc_update = 0;
controld_trigger_fsa();
}
}
/* Only successful stops, and probes that found the resource inactive, get locks
* recorded in the history. This ensures the resource stays locked to the node
* until it is active there again after the node comes back up.
*/
static bool
should_preserve_lock(lrmd_event_data_t *op)
{
if (!pcmk_is_set(controld_globals.flags, controld_shutdown_lock_enabled)) {
return false;
}
if (!strcmp(op->op_type, PCMK_ACTION_STOP) && (op->rc == PCMK_OCF_OK)) {
return true;
}
if (!strcmp(op->op_type, PCMK_ACTION_MONITOR)
&& (op->rc == PCMK_OCF_NOT_RUNNING)) {
return true;
}
return false;
}
/*!
* \internal
* \brief Request a CIB update
*
* \param[in] section Section of CIB to update
* \param[in] data New XML of CIB section to update
* \param[in] options CIB call options
* \param[in] callback If not \c NULL, set this as the operation callback
*
* \return Standard Pacemaker return code
*
* \note If \p callback is \p cib_rsc_callback(), the CIB update's call ID is
* stored in \p pending_rsc_update on success.
*/
int
controld_update_cib(const char *section, xmlNode *data, int options,
void (*callback)(xmlNode *, int, int, xmlNode *, void *))
{
cib_t *cib = controld_globals.cib_conn;
int cib_rc = -ENOTCONN;
pcmk__assert(data != NULL);
if (cib != NULL) {
cib_rc = cib->cmds->modify(cib, section, data, options);
if (cib_rc >= 0) {
crm_debug("Submitted CIB update %d for %s section",
cib_rc, section);
}
}
if (callback == NULL) {
if (cib_rc < 0) {
crm_err("Failed to update CIB %s section: %s",
section, pcmk_rc_str(pcmk_legacy2rc(cib_rc)));
}
} else {
if ((cib_rc >= 0) && (callback == cib_rsc_callback)) {
/* Checking for a particular callback is a little hacky, but it
* didn't seem worth adding an output argument for cib_rc for just
* one use case.
*/
pending_rsc_update = cib_rc;
}
fsa_register_cib_callback(cib_rc, NULL, callback);
}
return (cib_rc >= 0)? pcmk_rc_ok : pcmk_legacy2rc(cib_rc);
}
/*!
* \internal
* \brief Update resource history entry in CIB
*
* \param[in] node_name Node where action occurred
* \param[in] rsc Resource that action is for
* \param[in,out] op Action to record
* \param[in] lock_time If nonzero, when resource was locked to node
*
* \note On success, the CIB update's call ID will be stored in
* pending_rsc_update.
*/
void
controld_update_resource_history(const char *node_name,
const lrmd_rsc_info_t *rsc,
lrmd_event_data_t *op, time_t lock_time)
{
xmlNode *update = NULL;
xmlNode *xml = NULL;
int call_opt = crmd_cib_smart_opt();
const char *node_id = NULL;
const char *container = NULL;
CRM_CHECK((node_name != NULL) && (op != NULL), return);
if (rsc == NULL) {
crm_warn("Resource %s no longer exists in the executor", op->rsc_id);
controld_ack_event_directly(NULL, NULL, rsc, op, op->rsc_id);
return;
}
// <status>
update = pcmk__xe_create(NULL, PCMK_XE_STATUS);
// <node_state ...>
xml = pcmk__xe_create(update, PCMK__XE_NODE_STATE);
if (controld_is_local_node(node_name)) {
node_id = controld_globals.our_uuid;
} else {
node_id = node_name;
pcmk__xe_set_bool_attr(xml, PCMK_XA_REMOTE_NODE, true);
}
crm_xml_add(xml, PCMK_XA_ID, node_id);
crm_xml_add(xml, PCMK_XA_UNAME, node_name);
crm_xml_add(xml, PCMK_XA_CRM_DEBUG_ORIGIN, __func__);
// <lrm ...>
xml = pcmk__xe_create(xml, PCMK__XE_LRM);
crm_xml_add(xml, PCMK_XA_ID, node_id);
// <lrm_resources>
xml = pcmk__xe_create(xml, PCMK__XE_LRM_RESOURCES);
// <lrm_resource ...>
xml = pcmk__xe_create(xml, PCMK__XE_LRM_RESOURCE);
crm_xml_add(xml, PCMK_XA_ID, op->rsc_id);
crm_xml_add(xml, PCMK_XA_CLASS, rsc->standard);
crm_xml_add(xml, PCMK_XA_PROVIDER, rsc->provider);
crm_xml_add(xml, PCMK_XA_TYPE, rsc->type);
if (lock_time != 0) {
/* Actions on a locked resource should either preserve the lock by
* recording it with the action result, or clear it.
*/
if (!should_preserve_lock(op)) {
lock_time = 0;
}
crm_xml_add_ll(xml, PCMK_OPT_SHUTDOWN_LOCK, (long long) lock_time);
}
if (op->params != NULL) {
container = g_hash_table_lookup(op->params,
CRM_META "_" PCMK__META_CONTAINER);
if (container != NULL) {
crm_trace("Resource %s is a part of container resource %s",
op->rsc_id, container);
crm_xml_add(xml, PCMK__META_CONTAINER, container);
}
}
// <lrm_resource_op ...> (possibly more than one)
controld_add_resource_history_xml(xml, rsc, op, node_name);
/* Update CIB asynchronously. Even if it fails, the resource state should be
* discovered during the next election. Worst case, the node is wrongly
* fenced for running a resource it isn't.
*/
crm_log_xml_trace(update, __func__);
controld_update_cib(PCMK_XE_STATUS, update, call_opt, cib_rsc_callback);
pcmk__xml_free(update);
}
/*!
* \internal
* \brief Erase an LRM history entry from the CIB, given the operation data
*
* \param[in] op Operation whose history should be deleted
*/
void
controld_delete_action_history(const lrmd_event_data_t *op)
{
xmlNode *xml_top = NULL;
CRM_CHECK(op != NULL, return);
xml_top = pcmk__xe_create(NULL, PCMK__XE_LRM_RSC_OP);
crm_xml_add_int(xml_top, PCMK__XA_CALL_ID, op->call_id);
crm_xml_add(xml_top, PCMK__XA_TRANSITION_KEY, op->user_data);
if (op->interval_ms > 0) {
char *op_id = pcmk__op_key(op->rsc_id, op->op_type, op->interval_ms);
/* Avoid deleting last_failure too (if it was a result of this recurring op failing) */
crm_xml_add(xml_top, PCMK_XA_ID, op_id);
free(op_id);
}
crm_debug("Erasing resource operation history for " PCMK__OP_FMT " (call=%d)",
op->rsc_id, op->op_type, op->interval_ms, op->call_id);
controld_globals.cib_conn->cmds->remove(controld_globals.cib_conn,
PCMK_XE_STATUS, xml_top, cib_none);
crm_log_xml_trace(xml_top, "op:cancel");
pcmk__xml_free(xml_top);
}
/* Define xpath to find LRM resource history entry by node and resource */
#define XPATH_HISTORY \
"/" PCMK_XE_CIB "/" PCMK_XE_STATUS \
"/" PCMK__XE_NODE_STATE "[@" PCMK_XA_UNAME "='%s']" \
"/" PCMK__XE_LRM "/" PCMK__XE_LRM_RESOURCES \
"/" PCMK__XE_LRM_RESOURCE "[@" PCMK_XA_ID "='%s']" \
"/" PCMK__XE_LRM_RSC_OP
/* ... and also by operation key */
#define XPATH_HISTORY_ID XPATH_HISTORY "[@" PCMK_XA_ID "='%s']"
/* ... and also by operation key and operation call ID */
#define XPATH_HISTORY_CALL XPATH_HISTORY \
"[@" PCMK_XA_ID "='%s' and @" PCMK__XA_CALL_ID "='%d']"
/* ... and also by operation key and original operation key */
#define XPATH_HISTORY_ORIG XPATH_HISTORY \
"[@" PCMK_XA_ID "='%s' and @" PCMK__XA_OPERATION_KEY "='%s']"
/*!
* \internal
* \brief Delete a last_failure resource history entry from the CIB
*
* \param[in] rsc_id Name of resource to clear history for
* \param[in] node Name of node to clear history for
* \param[in] action If specified, delete only if this was failed action
* \param[in] interval_ms If \p action is specified, it has this interval
*/
void
controld_cib_delete_last_failure(const char *rsc_id, const char *node,
const char *action, guint interval_ms)
{
char *xpath = NULL;
char *last_failure_key = NULL;
CRM_CHECK((rsc_id != NULL) && (node != NULL), return);
// Generate XPath to match desired entry
last_failure_key = pcmk__op_key(rsc_id, "last_failure", 0);
if (action == NULL) {
xpath = crm_strdup_printf(XPATH_HISTORY_ID, node, rsc_id,
last_failure_key);
} else {
char *action_key = pcmk__op_key(rsc_id, action, interval_ms);
xpath = crm_strdup_printf(XPATH_HISTORY_ORIG, node, rsc_id,
last_failure_key, action_key);
free(action_key);
}
free(last_failure_key);
controld_globals.cib_conn->cmds->remove(controld_globals.cib_conn, xpath,
NULL, cib_xpath);
free(xpath);
}
/*!
* \internal
* \brief Delete resource history entry from the CIB, given operation key
*
* \param[in] rsc_id Name of resource to clear history for
* \param[in] node Name of node to clear history for
* \param[in] key Operation key of operation to clear history for
* \param[in] call_id If specified, delete entry only if it has this call ID
*/
void
controld_delete_action_history_by_key(const char *rsc_id, const char *node,
const char *key, int call_id)
{
char *xpath = NULL;
CRM_CHECK((rsc_id != NULL) && (node != NULL) && (key != NULL), return);
if (call_id > 0) {
xpath = crm_strdup_printf(XPATH_HISTORY_CALL, node, rsc_id, key,
call_id);
} else {
xpath = crm_strdup_printf(XPATH_HISTORY_ID, node, rsc_id, key);
}
controld_globals.cib_conn->cmds->remove(controld_globals.cib_conn, xpath,
NULL, cib_xpath);
free(xpath);
}
diff --git a/daemons/controld/controld_cib.h b/daemons/controld/controld_cib.h
index b8622d5228..116db64924 100644
--- a/daemons/controld/controld_cib.h
+++ b/daemons/controld/controld_cib.h
@@ -1,121 +1,112 @@
/*
- * Copyright 2004-2024 the Pacemaker project contributors
+ * Copyright 2004-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef PCMK__CONTROLD_CIB__H
#define PCMK__CONTROLD_CIB__H
#include <crm_internal.h>
#include <glib.h>
#include <crm/crm.h>
#include <crm/common/xml.h>
#include <crm/cib/internal.h> // cib__*
#include "controld_globals.h" // controld_globals.cib_conn
static inline void
fsa_cib_anon_update(const char *section, xmlNode *data) {
if (controld_globals.cib_conn == NULL) {
crm_err("No CIB connection available");
} else {
controld_globals.cib_conn->cmds->modify(controld_globals.cib_conn,
section, data, cib_can_create);
}
}
static inline void
fsa_cib_anon_update_discard_reply(const char *section, xmlNode *data) {
if (controld_globals.cib_conn == NULL) {
crm_err("No CIB connection available");
} else {
controld_globals.cib_conn->cmds->modify(controld_globals.cib_conn,
section, data,
cib_can_create
|cib_discard_reply);
}
}
int controld_update_cib(const char *section, xmlNode *data, int options,
void (*callback)(xmlNode *, int, int, xmlNode *,
void *));
unsigned int cib_op_timeout(void);
-// Subsections of PCMK__XE_NODE_STATE
-enum controld_section_e {
- controld_section_lrm,
- controld_section_lrm_unlocked,
- controld_section_attrs,
- controld_section_all,
- controld_section_all_unlocked
-};
-
-void controld_node_state_deletion_strings(const char *uname,
- enum controld_section_e section,
- char **xpath, char **desc);
-void controld_delete_node_state(const char *uname,
- enum controld_section_e section, int options);
+void controld_node_history_deletion_strings(const char *uname,
+ bool unlocked_only,
+ char **xpath, char **desc);
+void controld_delete_node_history(const char *uname, bool unlocked_only,
+ int options);
int controld_delete_resource_history(const char *rsc_id, const char *node,
const char *user_name, int call_options);
/* Convenience macro for registering a CIB callback
* (assumes that data can be freed with free())
*/
# define fsa_register_cib_callback(id, data, fn) do { \
cib_t *cib_conn = controld_globals.cib_conn; \
\
pcmk__assert(cib_conn != NULL); \
cib_conn->cmds->register_callback_full(cib_conn, id, cib_op_timeout(), \
FALSE, data, #fn, fn, free); \
} while(0)
void controld_add_resource_history_xml_as(const char *func, xmlNode *parent,
const lrmd_rsc_info_t *rsc,
lrmd_event_data_t *op,
const char *node_name);
#define controld_add_resource_history_xml(parent, rsc, op, node_name) \
controld_add_resource_history_xml_as(__func__, (parent), (rsc), \
(op), (node_name))
bool controld_record_pending_op(const char *node_name,
const lrmd_rsc_info_t *rsc,
lrmd_event_data_t *op);
void controld_update_resource_history(const char *node_name,
const lrmd_rsc_info_t *rsc,
lrmd_event_data_t *op, time_t lock_time);
void controld_delete_action_history(const lrmd_event_data_t *op);
void controld_cib_delete_last_failure(const char *rsc_id, const char *node,
const char *action, guint interval_ms);
void controld_delete_action_history_by_key(const char *rsc_id, const char *node,
const char *key, int call_id);
void controld_disconnect_cib_manager(void);
int crmd_cib_smart_opt(void);
/*!
* \internal
* \brief Check whether an action type should be recorded in the CIB
*
* \param[in] action Action type
*
* \return true if action should be recorded, false otherwise
*/
static inline bool
controld_action_is_recordable(const char *action)
{
return !pcmk__str_any_of(action, PCMK_ACTION_CANCEL, PCMK_ACTION_DELETE,
PCMK_ACTION_NOTIFY, PCMK_ACTION_META_DATA, NULL);
}
#endif // PCMK__CONTROLD_CIB__H
diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
index 2ec689394d..977acf01ab 100644
--- a/daemons/controld/controld_execd.c
+++ b/daemons/controld/controld_execd.c
@@ -1,2406 +1,2405 @@
/*
* Copyright 2004-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <regex.h>
#include <sys/param.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <crm/crm.h>
#include <crm/lrmd.h> // lrmd_event_data_t, lrmd_rsc_info_t, etc.
#include <crm/services.h>
#include <crm/common/xml.h>
#include <crm/lrmd_internal.h>
#include <pacemaker-internal.h>
#include <pacemaker-controld.h>
#define START_DELAY_THRESHOLD 5 * 60 * 1000
#define MAX_LRM_REG_FAILS 30
struct delete_event_s {
int rc;
const char *rsc;
lrm_state_t *lrm_state;
};
static gboolean is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id);
static gboolean build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list);
static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer user_data);
static lrmd_event_data_t *construct_op(const lrm_state_t *lrm_state,
const xmlNode *rsc_op,
const char *rsc_id,
const char *operation);
static void do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc,
xmlNode *msg, struct ra_metadata_s *md);
static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state,
int log_level);
static void
lrm_connection_destroy(void)
{
if (pcmk_is_set(controld_globals.fsa_input_register, R_LRM_CONNECTED)) {
crm_crit("Lost connection to local executor");
register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL);
controld_clear_fsa_input_flags(R_LRM_CONNECTED);
}
}
static char *
make_stop_id(const char *rsc, int call_id)
{
return crm_strdup_printf("%s:%d", rsc, call_id);
}
static void
copy_instance_keys(gpointer key, gpointer value, gpointer user_data)
{
if (strstr(key, CRM_META "_") == NULL) {
pcmk__insert_dup(user_data, (const char *) key, (const char *) value);
}
}
static void
copy_meta_keys(gpointer key, gpointer value, gpointer user_data)
{
if (strstr(key, CRM_META "_") != NULL) {
pcmk__insert_dup(user_data, (const char *) key, (const char *) value);
}
}
/*!
* \internal
* \brief Remove a recurring operation from a resource's history
*
* \param[in,out] history Resource history to modify
* \param[in] op Operation to remove
*
* \return TRUE if the operation was found and removed, FALSE otherwise
*/
static gboolean
history_remove_recurring_op(rsc_history_t *history, const lrmd_event_data_t *op)
{
GList *iter;
for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) {
lrmd_event_data_t *existing = iter->data;
if ((op->interval_ms == existing->interval_ms)
&& pcmk__str_eq(op->rsc_id, existing->rsc_id, pcmk__str_none)
&& pcmk__str_eq(op->op_type, existing->op_type, pcmk__str_casei)) {
history->recurring_op_list = g_list_delete_link(history->recurring_op_list, iter);
lrmd_free_event(existing);
return TRUE;
}
}
return FALSE;
}
/*!
* \internal
* \brief Free all recurring operations in resource history
*
* \param[in,out] history Resource history to modify
*/
static void
history_free_recurring_ops(rsc_history_t *history)
{
GList *iter;
for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) {
lrmd_free_event(iter->data);
}
g_list_free(history->recurring_op_list);
history->recurring_op_list = NULL;
}
/*!
* \internal
* \brief Free resource history
*
* \param[in,out] history Resource history to free
*/
void
history_free(gpointer data)
{
rsc_history_t *history = (rsc_history_t*)data;
if (history->stop_params) {
g_hash_table_destroy(history->stop_params);
}
/* Don't need to free history->rsc.id because it's set to history->id */
free(history->rsc.type);
free(history->rsc.standard);
free(history->rsc.provider);
lrmd_free_event(history->failed);
lrmd_free_event(history->last);
free(history->id);
history_free_recurring_ops(history);
free(history);
}
static void
update_history_cache(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op)
{
int target_rc = 0;
rsc_history_t *entry = NULL;
if (op->rsc_deleted) {
crm_debug("Purged history for '%s' after %s", op->rsc_id, op->op_type);
controld_delete_resource_history(op->rsc_id, lrm_state->node_name,
NULL, crmd_cib_smart_opt());
return;
}
if (pcmk__str_eq(op->op_type, PCMK_ACTION_NOTIFY, pcmk__str_casei)) {
return;
}
crm_debug("Updating history for '%s' with %s op", op->rsc_id, op->op_type);
entry = g_hash_table_lookup(lrm_state->resource_history, op->rsc_id);
if (entry == NULL && rsc) {
entry = pcmk__assert_alloc(1, sizeof(rsc_history_t));
entry->id = pcmk__str_copy(op->rsc_id);
g_hash_table_insert(lrm_state->resource_history, entry->id, entry);
entry->rsc.id = entry->id;
entry->rsc.type = pcmk__str_copy(rsc->type);
entry->rsc.standard = pcmk__str_copy(rsc->standard);
entry->rsc.provider = pcmk__str_copy(rsc->provider);
} else if (entry == NULL) {
crm_info("Resource %s no longer exists, not updating cache", op->rsc_id);
return;
}
entry->last_callid = op->call_id;
target_rc = rsc_op_expected_rc(op);
if (op->op_status == PCMK_EXEC_CANCELLED) {
if (op->interval_ms > 0) {
crm_trace("Removing cancelled recurring op: " PCMK__OP_FMT,
op->rsc_id, op->op_type, op->interval_ms);
history_remove_recurring_op(entry, op);
return;
} else {
crm_trace("Skipping " PCMK__OP_FMT " rc=%d, status=%d",
op->rsc_id, op->op_type, op->interval_ms, op->rc,
op->op_status);
}
} else if (did_rsc_op_fail(op, target_rc)) {
/* Store failed monitors here, otherwise the block below will cause them
* to be forgotten when a stop happens.
*/
if (entry->failed) {
lrmd_free_event(entry->failed);
}
entry->failed = lrmd_copy_event(op);
} else if (op->interval_ms == 0) {
if (entry->last) {
lrmd_free_event(entry->last);
}
entry->last = lrmd_copy_event(op);
if (op->params && pcmk__strcase_any_of(op->op_type, PCMK_ACTION_START,
PCMK_ACTION_RELOAD,
PCMK_ACTION_RELOAD_AGENT,
PCMK_ACTION_MONITOR, NULL)) {
if (entry->stop_params) {
g_hash_table_destroy(entry->stop_params);
}
entry->stop_params = pcmk__strkey_table(free, free);
g_hash_table_foreach(op->params, copy_instance_keys, entry->stop_params);
}
}
if (op->interval_ms > 0) {
/* Ensure there are no duplicates */
history_remove_recurring_op(entry, op);
crm_trace("Adding recurring op: " PCMK__OP_FMT,
op->rsc_id, op->op_type, op->interval_ms);
entry->recurring_op_list = g_list_prepend(entry->recurring_op_list, lrmd_copy_event(op));
} else if ((entry->recurring_op_list != NULL)
&& !pcmk__str_eq(op->op_type, PCMK_ACTION_MONITOR,
pcmk__str_casei)) {
crm_trace("Dropping %d recurring ops because of: " PCMK__OP_FMT,
g_list_length(entry->recurring_op_list), op->rsc_id,
op->op_type, op->interval_ms);
history_free_recurring_ops(entry);
}
}
/*!
* \internal
* \brief Send a direct OK ack for a resource task
*
* \param[in] lrm_state LRM connection
* \param[in] input Input message being ack'ed
* \param[in] rsc_id ID of affected resource
* \param[in] rsc Affected resource (if available)
* \param[in] task Operation task being ack'ed
* \param[in] ack_host Name of host to send ack to
* \param[in] ack_sys IPC system name to ack
*/
static void
send_task_ok_ack(const lrm_state_t *lrm_state, const ha_msg_input_t *input,
const char *rsc_id, const lrmd_rsc_info_t *rsc,
const char *task, const char *ack_host, const char *ack_sys)
{
lrmd_event_data_t *op = construct_op(lrm_state, input->xml, rsc_id, task);
lrmd__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
controld_ack_event_directly(ack_host, ack_sys, rsc, op, rsc_id);
lrmd_free_event(op);
}
static inline const char *
op_node_name(lrmd_event_data_t *op)
{
return pcmk__s(op->remote_nodename,
controld_globals.cluster->priv->node_name);
}
void
lrm_op_callback(lrmd_event_data_t * op)
{
CRM_CHECK(op != NULL, return);
switch (op->type) {
case lrmd_event_disconnect:
if (op->remote_nodename == NULL) {
/* If this is the local executor IPC connection, set the right
* bits in the controller when the connection goes down.
*/
lrm_connection_destroy();
}
break;
case lrmd_event_exec_complete:
{
lrm_state_t *lrm_state =
controld_get_executor_state(op_node_name(op), false);
pcmk__assert(lrm_state != NULL);
process_lrm_event(lrm_state, op, NULL, NULL);
}
break;
default:
break;
}
}
static void
try_local_executor_connect(long long action, fsa_data_t *msg_data,
lrm_state_t *lrm_state)
{
int rc = pcmk_rc_ok;
crm_debug("Connecting to the local executor");
// If we can connect, great
rc = controld_connect_local_executor(lrm_state);
if (rc == pcmk_rc_ok) {
controld_set_fsa_input_flags(R_LRM_CONNECTED);
crm_info("Connection to the local executor established");
return;
}
// Otherwise, if we can try again, set a timer to do so
if (lrm_state->num_lrm_register_fails < MAX_LRM_REG_FAILS) {
crm_warn("Failed to connect to the local executor %d time%s "
"(%d max): %s", lrm_state->num_lrm_register_fails,
pcmk__plural_s(lrm_state->num_lrm_register_fails),
MAX_LRM_REG_FAILS, pcmk_rc_str(rc));
controld_start_wait_timer();
crmd_fsa_stall(FALSE);
return;
}
// Otherwise give up
crm_err("Failed to connect to the executor the max allowed "
"%d time%s: %s", lrm_state->num_lrm_register_fails,
pcmk__plural_s(lrm_state->num_lrm_register_fails),
pcmk_rc_str(rc));
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
/* A_LRM_CONNECT */
void
do_lrm_control(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
/* This only pertains to local executor connections. Remote connections are
* handled as resources within the scheduler. Connecting and disconnecting
* from remote executor instances is handled differently.
*/
lrm_state_t *lrm_state = NULL;
if (controld_globals.cluster->priv->node_name == NULL) {
return; // Shouldn't be possible
}
lrm_state = controld_get_executor_state(NULL, true);
if (lrm_state == NULL) {
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
return;
}
if (action & A_LRM_DISCONNECT) {
if (lrm_state_verify_stopped(lrm_state, cur_state, LOG_INFO) == FALSE) {
if (action == A_LRM_DISCONNECT) {
crmd_fsa_stall(FALSE);
return;
}
}
controld_clear_fsa_input_flags(R_LRM_CONNECTED);
lrm_state_disconnect(lrm_state);
lrm_state_reset_tables(lrm_state, FALSE);
}
if (action & A_LRM_CONNECT) {
try_local_executor_connect(action, msg_data, lrm_state);
}
if (action & ~(A_LRM_CONNECT | A_LRM_DISCONNECT)) {
crm_err("Unexpected action %s in %s", fsa_action2string(action),
__func__);
}
}
static gboolean
lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, int log_level)
{
int counter = 0;
gboolean rc = TRUE;
const char *when = "lrm disconnect";
GHashTableIter gIter;
const char *key = NULL;
rsc_history_t *entry = NULL;
active_op_t *pending = NULL;
crm_debug("Checking for active resources before exit");
if (cur_state == S_TERMINATE) {
log_level = LOG_ERR;
when = "shutdown";
} else if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
when = "shutdown... waiting";
}
if ((lrm_state->active_ops != NULL) && lrm_state_is_connected(lrm_state)) {
guint removed = g_hash_table_foreach_remove(lrm_state->active_ops,
stop_recurring_actions,
lrm_state);
guint nremaining = g_hash_table_size(lrm_state->active_ops);
if (removed || nremaining) {
crm_notice("Stopped %u recurring operation%s at %s (%u remaining)",
removed, pcmk__plural_s(removed), when, nremaining);
}
}
if (lrm_state->active_ops != NULL) {
g_hash_table_iter_init(&gIter, lrm_state->active_ops);
while (g_hash_table_iter_next(&gIter, NULL, (void **)&pending)) {
/* Ignore recurring actions in the shutdown calculations */
if (pending->interval_ms == 0) {
counter++;
}
}
}
if (counter > 0) {
do_crm_log(log_level, "%d pending executor operation%s at %s",
counter, pcmk__plural_s(counter), when);
if ((cur_state == S_TERMINATE)
|| !pcmk_is_set(controld_globals.fsa_input_register,
R_SENT_RSC_STOP)) {
g_hash_table_iter_init(&gIter, lrm_state->active_ops);
while (g_hash_table_iter_next(&gIter, (gpointer*)&key, (gpointer*)&pending)) {
do_crm_log(log_level, "Pending action: %s (%s)", key, pending->op_key);
}
} else {
rc = FALSE;
}
return rc;
}
if (lrm_state->resource_history == NULL) {
return rc;
}
if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
/* At this point we're not waiting, we're just shutting down */
when = "shutdown";
}
counter = 0;
g_hash_table_iter_init(&gIter, lrm_state->resource_history);
while (g_hash_table_iter_next(&gIter, NULL, (gpointer*)&entry)) {
if (is_rsc_active(lrm_state, entry->id) == FALSE) {
continue;
}
counter++;
if (log_level == LOG_ERR) {
crm_info("Found %s active at %s", entry->id, when);
} else {
crm_trace("Found %s active at %s", entry->id, when);
}
if (lrm_state->active_ops != NULL) {
GHashTableIter hIter;
g_hash_table_iter_init(&hIter, lrm_state->active_ops);
while (g_hash_table_iter_next(&hIter, (gpointer*)&key, (gpointer*)&pending)) {
if (pcmk__str_eq(entry->id, pending->rsc_id, pcmk__str_none)) {
crm_notice("%sction %s (%s) incomplete at %s",
pending->interval_ms == 0 ? "A" : "Recurring a",
key, pending->op_key, when);
}
}
}
}
if (counter) {
crm_err("%d resource%s active at %s",
counter, (counter == 1)? " was" : "s were", when);
}
return rc;
}
static gboolean
is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id)
{
rsc_history_t *entry = NULL;
entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
if (entry == NULL || entry->last == NULL) {
return FALSE;
}
crm_trace("Processing %s: %s.%d=%d", rsc_id, entry->last->op_type,
entry->last->interval_ms, entry->last->rc);
if ((entry->last->rc == PCMK_OCF_OK)
&& pcmk__str_eq(entry->last->op_type, PCMK_ACTION_STOP,
pcmk__str_casei)) {
return FALSE;
} else if (entry->last->rc == PCMK_OCF_OK
&& pcmk__str_eq(entry->last->op_type, PCMK_ACTION_MIGRATE_TO,
pcmk__str_casei)) {
// A stricter check is too complex ... leave that to the scheduler
return FALSE;
} else if (entry->last->rc == PCMK_OCF_NOT_RUNNING) {
return FALSE;
} else if ((entry->last->interval_ms == 0)
&& (entry->last->rc == PCMK_OCF_NOT_CONFIGURED)) {
/* Badly configured resources can't be reliably stopped */
return FALSE;
}
return TRUE;
}
static gboolean
build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list)
{
GHashTableIter iter;
rsc_history_t *entry = NULL;
g_hash_table_iter_init(&iter, lrm_state->resource_history);
while (g_hash_table_iter_next(&iter, NULL, (void **)&entry)) {
GList *gIter = NULL;
xmlNode *xml_rsc = pcmk__xe_create(rsc_list, PCMK__XE_LRM_RESOURCE);
crm_xml_add(xml_rsc, PCMK_XA_ID, entry->id);
crm_xml_add(xml_rsc, PCMK_XA_TYPE, entry->rsc.type);
crm_xml_add(xml_rsc, PCMK_XA_CLASS, entry->rsc.standard);
crm_xml_add(xml_rsc, PCMK_XA_PROVIDER, entry->rsc.provider);
if (entry->last && entry->last->params) {
static const char *name = CRM_META "_" PCMK__META_CONTAINER;
const char *container = g_hash_table_lookup(entry->last->params,
name);
if (container) {
crm_trace("Resource %s is a part of container resource %s", entry->id, container);
crm_xml_add(xml_rsc, PCMK__META_CONTAINER, container);
}
}
controld_add_resource_history_xml(xml_rsc, &(entry->rsc), entry->failed,
lrm_state->node_name);
controld_add_resource_history_xml(xml_rsc, &(entry->rsc), entry->last,
lrm_state->node_name);
for (gIter = entry->recurring_op_list; gIter != NULL; gIter = gIter->next) {
controld_add_resource_history_xml(xml_rsc, &(entry->rsc), gIter->data,
lrm_state->node_name);
}
}
return FALSE;
}
xmlNode *
controld_query_executor_state(void)
{
// @TODO Ensure all callers handle NULL returns
xmlNode *xml_state = NULL;
xmlNode *xml_data = NULL;
xmlNode *rsc_list = NULL;
pcmk__node_status_t *peer = NULL;
lrm_state_t *lrm_state = controld_get_executor_state(NULL, false);
if (!lrm_state) {
crm_err("Could not get executor state for local node");
return NULL;
}
peer = pcmk__get_node(0, lrm_state->node_name, NULL, pcmk__node_search_any);
CRM_CHECK(peer != NULL, return NULL);
xml_state = create_node_state_update(peer,
controld_node_update_cluster
|controld_node_update_peer,
NULL, __func__);
if (xml_state == NULL) {
return NULL;
}
xml_data = pcmk__xe_create(xml_state, PCMK__XE_LRM);
crm_xml_add(xml_data, PCMK_XA_ID, peer->xml_id);
rsc_list = pcmk__xe_create(xml_data, PCMK__XE_LRM_RESOURCES);
// Build a list of active (not necessarily running) resources
build_active_RAs(lrm_state, rsc_list);
crm_log_xml_trace(xml_state, "Current executor state");
return xml_state;
}
/*!
* \internal
* \brief Map standard Pacemaker return code to operation status and OCF code
*
* \param[out] event Executor event whose status and return code should be set
* \param[in] rc Standard Pacemaker return code
*/
void
controld_rc2event(lrmd_event_data_t *event, int rc)
{
/* This is called for cleanup requests from controller peers/clients, not
* for resource actions, so no exit reason is needed.
*/
switch (rc) {
case pcmk_rc_ok:
lrmd__set_result(event, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
break;
case EACCES:
lrmd__set_result(event, PCMK_OCF_INSUFFICIENT_PRIV,
PCMK_EXEC_ERROR, NULL);
break;
default:
lrmd__set_result(event, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR,
NULL);
break;
}
}
/*!
* \internal
* \brief Trigger a new transition after CIB status was deleted
*
* If a CIB status delete was not expected (as part of the transition graph),
* trigger a new transition by updating the (arbitrary) "last-lrm-refresh"
* cluster property.
*
* \param[in] from_sys IPC name that requested the delete
* \param[in] rsc_id Resource whose status was deleted (for logging only)
*/
void
controld_trigger_delete_refresh(const char *from_sys, const char *rsc_id)
{
if (!pcmk__str_eq(from_sys, CRM_SYSTEM_TENGINE, pcmk__str_casei)) {
char *now_s = crm_strdup_printf("%lld", (long long) time(NULL));
crm_debug("Triggering a refresh after %s cleaned %s", from_sys, rsc_id);
cib__update_node_attr(controld_globals.logger_out,
controld_globals.cib_conn, cib_none,
PCMK_XE_CRM_CONFIG, NULL, NULL, NULL, NULL,
"last-lrm-refresh", now_s, NULL, NULL);
free(now_s);
}
}
static void
notify_deleted(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_id, int rc)
{
lrmd_event_data_t *op = NULL;
const char *from_sys = crm_element_value(input->msg, PCMK__XA_CRM_SYS_FROM);
const char *from_host = crm_element_value(input->msg, PCMK__XA_SRC);
crm_info("Notifying %s on %s that %s was%s deleted",
from_sys, (from_host? from_host : "localhost"), rsc_id,
((rc == pcmk_ok)? "" : " not"));
op = construct_op(lrm_state, input->xml, rsc_id, PCMK_ACTION_DELETE);
controld_rc2event(op, pcmk_legacy2rc(rc));
controld_ack_event_directly(from_host, from_sys, NULL, op, rsc_id);
lrmd_free_event(op);
controld_trigger_delete_refresh(from_sys, rsc_id);
}
static gboolean
lrm_remove_deleted_rsc(gpointer key, gpointer value, gpointer user_data)
{
struct delete_event_s *event = user_data;
struct pending_deletion_op_s *op = value;
if (pcmk__str_eq(event->rsc, op->rsc, pcmk__str_none)) {
notify_deleted(event->lrm_state, op->input, event->rsc, event->rc);
return TRUE;
}
return FALSE;
}
static gboolean
lrm_remove_deleted_op(gpointer key, gpointer value, gpointer user_data)
{
const char *rsc = user_data;
active_op_t *pending = value;
if (pcmk__str_eq(rsc, pending->rsc_id, pcmk__str_none)) {
crm_info("Removing op %s:%d for deleted resource %s",
pending->op_key, pending->call_id, rsc);
return TRUE;
}
return FALSE;
}
static void
delete_rsc_entry(lrm_state_t *lrm_state, ha_msg_input_t *input,
const char *rsc_id, GHashTableIter *rsc_iter, int rc,
const char *user_name, bool from_cib)
{
struct delete_event_s event;
CRM_CHECK(rsc_id != NULL, return);
if (rc == pcmk_ok) {
char *rsc_id_copy = pcmk__str_copy(rsc_id);
if (rsc_iter) {
g_hash_table_iter_remove(rsc_iter);
} else {
g_hash_table_remove(lrm_state->resource_history, rsc_id_copy);
}
if (from_cib) {
controld_delete_resource_history(rsc_id_copy, lrm_state->node_name,
user_name, crmd_cib_smart_opt());
}
g_hash_table_foreach_remove(lrm_state->active_ops,
lrm_remove_deleted_op, rsc_id_copy);
free(rsc_id_copy);
}
if (input) {
notify_deleted(lrm_state, input, rsc_id, rc);
}
event.rc = rc;
event.rsc = rsc_id;
event.lrm_state = lrm_state;
g_hash_table_foreach_remove(lrm_state->deletion_ops, lrm_remove_deleted_rsc, &event);
}
static inline gboolean
last_failed_matches_op(rsc_history_t *entry, const char *op, guint interval_ms)
{
if (entry == NULL) {
return FALSE;
}
if (op == NULL) {
return TRUE;
}
return (pcmk__str_eq(op, entry->failed->op_type, pcmk__str_casei)
&& (interval_ms == entry->failed->interval_ms));
}
/*!
* \internal
* \brief Clear a resource's last failure
*
* Erase a resource's last failure on a particular node from both the
* LRM resource history in the CIB, and the resource history remembered
* for the LRM state.
*
* \param[in] rsc_id Resource name
* \param[in] node_name Node name
* \param[in] operation If specified, only clear if matching this operation
* \param[in] interval_ms If operation is specified, it has this interval
*/
void
lrm_clear_last_failure(const char *rsc_id, const char *node_name,
const char *operation, guint interval_ms)
{
lrm_state_t *lrm_state = controld_get_executor_state(node_name, false);
if (lrm_state == NULL) {
return;
}
if (lrm_state->resource_history != NULL) {
rsc_history_t *entry = g_hash_table_lookup(lrm_state->resource_history,
rsc_id);
if (last_failed_matches_op(entry, operation, interval_ms)) {
lrmd_free_event(entry->failed);
entry->failed = NULL;
}
}
}
/* Returns: gboolean - cancellation is in progress */
static gboolean
cancel_op(lrm_state_t * lrm_state, const char *rsc_id, const char *key, int op, gboolean remove)
{
int rc = pcmk_ok;
char *local_key = NULL;
active_op_t *pending = NULL;
CRM_CHECK(op != 0, return FALSE);
CRM_CHECK(rsc_id != NULL, return FALSE);
if (key == NULL) {
local_key = make_stop_id(rsc_id, op);
key = local_key;
}
pending = g_hash_table_lookup(lrm_state->active_ops, key);
if (pending) {
if (remove && !pcmk_is_set(pending->flags, active_op_remove)) {
controld_set_active_op_flags(pending, active_op_remove);
crm_debug("Scheduling %s for removal", key);
}
if (pcmk_is_set(pending->flags, active_op_cancelled)) {
crm_debug("Operation %s already cancelled", key);
free(local_key);
return FALSE;
}
controld_set_active_op_flags(pending, active_op_cancelled);
} else {
crm_info("No pending op found for %s", key);
free(local_key);
return FALSE;
}
crm_debug("Cancelling op %d for %s (%s)", op, rsc_id, key);
rc = lrm_state_cancel(lrm_state, pending->rsc_id, pending->op_type,
pending->interval_ms);
if (rc == pcmk_ok) {
crm_debug("Op %d for %s (%s): cancelled", op, rsc_id, key);
free(local_key);
return TRUE;
}
crm_debug("Op %d for %s (%s): Nothing to cancel", op, rsc_id, key);
/* The caller needs to make sure the entry is
* removed from the active operations list
*
* Usually by returning TRUE inside the worker function
* supplied to g_hash_table_foreach_remove()
*
* Not removing the entry from active operations will block
* the node from shutting down
*/
free(local_key);
return FALSE;
}
struct cancel_data {
gboolean done;
gboolean remove;
const char *key;
lrmd_rsc_info_t *rsc;
lrm_state_t *lrm_state;
};
static gboolean
cancel_action_by_key(gpointer key, gpointer value, gpointer user_data)
{
gboolean remove = FALSE;
struct cancel_data *data = user_data;
active_op_t *op = value;
if (pcmk__str_eq(op->op_key, data->key, pcmk__str_none)) {
data->done = TRUE;
remove = !cancel_op(data->lrm_state, data->rsc->id, key, op->call_id, data->remove);
}
return remove;
}
static gboolean
cancel_op_key(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *key, gboolean remove)
{
guint removed = 0;
struct cancel_data data;
CRM_CHECK(rsc != NULL, return FALSE);
CRM_CHECK(key != NULL, return FALSE);
data.key = key;
data.rsc = rsc;
data.done = FALSE;
data.remove = remove;
data.lrm_state = lrm_state;
removed = g_hash_table_foreach_remove(lrm_state->active_ops,
cancel_action_by_key, &data);
crm_trace("Removed %u op cache entries, new size: %u",
removed, g_hash_table_size(lrm_state->active_ops));
return data.done;
}
/*!
* \internal
* \brief Retrieve resource information from LRM
*
* \param[in,out] lrm_state Executor connection state to use
* \param[in] rsc_xml XML containing resource configuration
* \param[in] do_create If true, register resource if not already
* \param[out] rsc_info Where to store information obtained from executor
*
* \retval pcmk_ok Success (and rsc_info holds newly allocated result)
* \retval -EINVAL Required information is missing from arguments
* \retval -ENOTCONN No active connection to LRM
* \retval -ENODEV Resource not found
* \retval -errno Error communicating with executor when registering resource
*
* \note Caller is responsible for freeing result on success.
*/
static int
get_lrm_resource(lrm_state_t *lrm_state, const xmlNode *rsc_xml,
gboolean do_create, lrmd_rsc_info_t **rsc_info)
{
const char *id = pcmk__xe_id(rsc_xml);
CRM_CHECK(lrm_state && rsc_xml && rsc_info, return -EINVAL);
CRM_CHECK(id, return -EINVAL);
if (lrm_state_is_connected(lrm_state) == FALSE) {
return -ENOTCONN;
}
crm_trace("Retrieving resource information for %s from the executor", id);
*rsc_info = lrm_state_get_rsc_info(lrm_state, id, 0);
// If resource isn't known by ID, try clone name, if provided
if (!*rsc_info) {
const char *long_id = crm_element_value(rsc_xml, PCMK__XA_LONG_ID);
if (long_id) {
*rsc_info = lrm_state_get_rsc_info(lrm_state, long_id, 0);
}
}
if ((*rsc_info == NULL) && do_create) {
const char *class = crm_element_value(rsc_xml, PCMK_XA_CLASS);
const char *provider = crm_element_value(rsc_xml, PCMK_XA_PROVIDER);
const char *type = crm_element_value(rsc_xml, PCMK_XA_TYPE);
int rc;
crm_trace("Registering resource %s with the executor", id);
rc = lrm_state_register_rsc(lrm_state, id, class, provider, type,
lrmd_opt_drop_recurring);
if (rc != pcmk_ok) {
fsa_data_t *msg_data = NULL;
crm_err("Could not register resource %s with the executor on %s: %s "
QB_XS " rc=%d",
id, lrm_state->node_name, pcmk_strerror(rc), rc);
/* Register this as an internal error if this involves the local
* executor. Otherwise, we're likely dealing with an unresponsive
* remote node, which is not an FSA failure.
*/
if (lrm_state_is_local(lrm_state) == TRUE) {
register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
}
return rc;
}
*rsc_info = lrm_state_get_rsc_info(lrm_state, id, 0);
}
return *rsc_info? pcmk_ok : -ENODEV;
}
static void
delete_resource(lrm_state_t *lrm_state, const char *id, lrmd_rsc_info_t *rsc,
GHashTableIter *iter, const char *sys, const char *user,
ha_msg_input_t *request, bool unregister, bool from_cib)
{
int rc = pcmk_ok;
crm_info("Removing resource %s from executor for %s%s%s",
id, sys, (user? " as " : ""), (user? user : ""));
if (rsc && unregister) {
rc = lrm_state_unregister_rsc(lrm_state, id, 0);
}
if (rc == pcmk_ok) {
crm_trace("Resource %s deleted from executor", id);
} else if (rc == -EINPROGRESS) {
crm_info("Deletion of resource '%s' from executor is pending", id);
if (request) {
struct pending_deletion_op_s *op = NULL;
char *ref = crm_element_value_copy(request->msg, PCMK_XA_REFERENCE);
op = pcmk__assert_alloc(1, sizeof(struct pending_deletion_op_s));
op->rsc = pcmk__str_copy(rsc->id);
op->input = copy_ha_msg_input(request);
g_hash_table_insert(lrm_state->deletion_ops, ref, op);
}
return;
} else {
crm_warn("Could not delete '%s' from executor for %s%s%s: %s "
QB_XS " rc=%d", id, sys, (user? " as " : ""),
(user? user : ""), pcmk_strerror(rc), rc);
}
delete_rsc_entry(lrm_state, request, id, iter, rc, user, from_cib);
}
static int
get_fake_call_id(lrm_state_t *lrm_state, const char *rsc_id)
{
int call_id = 999999999;
rsc_history_t *entry = NULL;
if(lrm_state) {
entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
}
/* Make sure the call id is greater than the last successful operation,
* otherwise the failure will not result in a possible recovery of the resource
* as it could appear the failure occurred before the successful start */
if (entry) {
call_id = entry->last_callid + 1;
}
if (call_id < 0) {
call_id = 1;
}
return call_id;
}
static void
fake_op_status(lrm_state_t *lrm_state, lrmd_event_data_t *op, int op_status,
enum ocf_exitcode op_exitcode, const char *exit_reason)
{
op->call_id = get_fake_call_id(lrm_state, op->rsc_id);
op->t_run = time(NULL);
op->t_rcchange = op->t_run;
lrmd__set_result(op, op_exitcode, op_status, exit_reason);
}
static void
force_reprobe(lrm_state_t *lrm_state, const char *from_sys,
const char *from_host, const char *user_name,
gboolean is_remote_node, bool reprobe_all_nodes)
{
GHashTableIter gIter;
rsc_history_t *entry = NULL;
crm_info("Clearing resource history on node %s", lrm_state->node_name);
g_hash_table_iter_init(&gIter, lrm_state->resource_history);
while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
/* only unregister the resource during a reprobe if it is not a remote connection
* resource. otherwise unregistering the connection will terminate remote-node
* membership */
bool unregister = true;
if (is_remote_lrmd_ra(NULL, NULL, entry->id)) {
unregister = false;
if (reprobe_all_nodes) {
lrm_state_t *remote_lrm_state =
controld_get_executor_state(entry->id, false);
if (remote_lrm_state != NULL) {
/* If reprobing all nodes, be sure to reprobe the remote
* node before clearing its connection resource
*/
force_reprobe(remote_lrm_state, from_sys, from_host,
user_name, TRUE, reprobe_all_nodes);
}
}
}
/* Don't delete from the CIB, since we'll delete the whole node's LRM
* state from the CIB soon
*/
delete_resource(lrm_state, entry->id, &entry->rsc, &gIter, from_sys,
user_name, NULL, unregister, false);
}
/* Now delete the copy in the CIB */
- controld_delete_node_state(lrm_state->node_name, controld_section_lrm,
- cib_none);
+ controld_delete_node_history(lrm_state->node_name, false, cib_none);
}
/*!
* \internal
* \brief Fail a requested action without actually executing it
*
* For an action that can't be executed, process it similarly to an actual
* execution result, with specified error status (except for notify actions,
* which will always be treated as successful).
*
* \param[in,out] lrm_state Executor connection that action is for
* \param[in] action Action XML from request
* \param[in] rc Desired return code to use
* \param[in] op_status Desired operation status to use
* \param[in] exit_reason Human-friendly detail, if error
*/
static void
synthesize_lrmd_failure(lrm_state_t *lrm_state, const xmlNode *action,
int op_status, enum ocf_exitcode rc,
const char *exit_reason)
{
lrmd_event_data_t *op = NULL;
const char *operation = crm_element_value(action, PCMK_XA_OPERATION);
const char *target_node = crm_element_value(action, PCMK__META_ON_NODE);
xmlNode *xml_rsc = pcmk__xe_first_child(action, PCMK_XE_PRIMITIVE, NULL,
NULL);
if ((xml_rsc == NULL) || (pcmk__xe_id(xml_rsc) == NULL)) {
/* @TODO Should we do something else, like direct ack? */
crm_info("Can't fake %s failure (%d) on %s without resource configuration",
crm_element_value(action, PCMK__XA_OPERATION_KEY), rc,
target_node);
return;
} else if(operation == NULL) {
/* This probably came from crm_resource -C, nothing to do */
crm_info("Can't fake %s failure (%d) on %s without operation",
pcmk__xe_id(xml_rsc), rc, target_node);
return;
}
op = construct_op(lrm_state, action, pcmk__xe_id(xml_rsc), operation);
if (pcmk__str_eq(operation, PCMK_ACTION_NOTIFY, pcmk__str_casei)) {
// Notifications can't fail
fake_op_status(lrm_state, op, PCMK_EXEC_DONE, PCMK_OCF_OK, NULL);
} else {
fake_op_status(lrm_state, op, op_status, rc, exit_reason);
}
crm_info("Faking " PCMK__OP_FMT " result (%d) on %s",
op->rsc_id, op->op_type, op->interval_ms, op->rc, target_node);
// Process the result as if it came from the LRM
process_lrm_event(lrm_state, op, NULL, action);
lrmd_free_event(op);
}
/*!
* \internal
* \brief Get target of an LRM operation (replacing \p NULL with local node
* name)
*
* \param[in] xml LRM operation data XML
*
* \return LRM operation target node name (local node or Pacemaker Remote node)
*/
static const char *
lrm_op_target(const xmlNode *xml)
{
const char *target = NULL;
if (xml) {
target = crm_element_value(xml, PCMK__META_ON_NODE);
}
if (target == NULL) {
target = controld_globals.cluster->priv->node_name;
}
return target;
}
static void
fail_lrm_resource(xmlNode *xml, lrm_state_t *lrm_state, const char *user_name,
const char *from_host, const char *from_sys)
{
lrmd_event_data_t *op = NULL;
lrmd_rsc_info_t *rsc = NULL;
xmlNode *xml_rsc = pcmk__xe_first_child(xml, PCMK_XE_PRIMITIVE, NULL, NULL);
CRM_CHECK(xml_rsc != NULL, return);
/* The executor simply executes operations and reports the results, without
* any concept of success or failure, so to fail a resource, we must fake
* what a failure looks like.
*
* To do this, we create a fake executor operation event for the resource,
* and pass that event to the executor client callback so it will be
* processed as if it came from the executor.
*/
op = construct_op(lrm_state, xml, pcmk__xe_id(xml_rsc), "asyncmon");
free((char*) op->user_data);
op->user_data = NULL;
op->interval_ms = 0;
if (user_name && !pcmk__is_privileged(user_name)) {
crm_err("%s does not have permission to fail %s",
user_name, pcmk__xe_id(xml_rsc));
fake_op_status(lrm_state, op, PCMK_EXEC_ERROR,
PCMK_OCF_INSUFFICIENT_PRIV,
"Unprivileged user cannot fail resources");
controld_ack_event_directly(from_host, from_sys, NULL, op,
pcmk__xe_id(xml_rsc));
lrmd_free_event(op);
return;
}
if (get_lrm_resource(lrm_state, xml_rsc, TRUE, &rsc) == pcmk_ok) {
crm_info("Failing resource %s...", rsc->id);
fake_op_status(lrm_state, op, PCMK_EXEC_DONE, PCMK_OCF_UNKNOWN_ERROR,
"Simulated failure");
process_lrm_event(lrm_state, op, NULL, xml);
op->rc = PCMK_OCF_OK; // The request to fail the resource succeeded
lrmd_free_rsc_info(rsc);
} else {
crm_info("Cannot find/create resource in order to fail it...");
crm_log_xml_warn(xml, "bad input");
fake_op_status(lrm_state, op, PCMK_EXEC_ERROR, PCMK_OCF_UNKNOWN_ERROR,
"Cannot fail unknown resource");
}
controld_ack_event_directly(from_host, from_sys, NULL, op,
pcmk__xe_id(xml_rsc));
lrmd_free_event(op);
}
static void
handle_reprobe_op(lrm_state_t *lrm_state, xmlNode *msg, const char *from_sys,
const char *from_host, const char *user_name,
gboolean is_remote_node, bool reprobe_all_nodes)
{
crm_notice("Forcing the status of all resources to be redetected");
force_reprobe(lrm_state, from_sys, from_host, user_name, is_remote_node,
reprobe_all_nodes);
if (!pcmk__strcase_any_of(from_sys, CRM_SYSTEM_PENGINE, CRM_SYSTEM_TENGINE, NULL)) {
xmlNode *reply = pcmk__new_reply(msg, NULL);
crm_debug("ACK'ing re-probe from %s (%s)", from_sys, from_host);
if (relay_message(reply, TRUE) == FALSE) {
crm_log_xml_err(reply, "Unable to route reply");
}
pcmk__xml_free(reply);
}
}
static bool do_lrm_cancel(ha_msg_input_t *input, lrm_state_t *lrm_state,
lrmd_rsc_info_t *rsc, const char *from_host, const char *from_sys)
{
char *op_key = NULL;
char *meta_key = NULL;
int call = 0;
const char *call_id = NULL;
const char *op_task = NULL;
guint interval_ms = 0;
gboolean in_progress = FALSE;
xmlNode *params = pcmk__xe_first_child(input->xml, PCMK__XE_ATTRIBUTES,
NULL, NULL);
CRM_CHECK(params != NULL, return FALSE);
meta_key = crm_meta_name(PCMK_XA_OPERATION);
op_task = crm_element_value(params, meta_key);
free(meta_key);
CRM_CHECK(op_task != NULL, return FALSE);
meta_key = crm_meta_name(PCMK_META_INTERVAL);
if (crm_element_value_ms(params, meta_key, &interval_ms) != pcmk_ok) {
free(meta_key);
return FALSE;
}
free(meta_key);
op_key = pcmk__op_key(rsc->id, op_task, interval_ms);
meta_key = crm_meta_name(PCMK__XA_CALL_ID);
call_id = crm_element_value(params, meta_key);
free(meta_key);
crm_debug("Scheduler requested op %s (call=%s) be cancelled",
op_key, (call_id? call_id : "NA"));
pcmk__scan_min_int(call_id, &call, 0);
if (call == 0) {
// Normal case when the scheduler cancels a recurring op
in_progress = cancel_op_key(lrm_state, rsc, op_key, TRUE);
} else {
// Normal case when the scheduler cancels an orphan op
in_progress = cancel_op(lrm_state, rsc->id, NULL, call, TRUE);
}
// Acknowledge cancellation operation if for a remote connection resource
if (!in_progress || is_remote_lrmd_ra(NULL, NULL, rsc->id)) {
char *op_id = make_stop_id(rsc->id, call);
if (is_remote_lrmd_ra(NULL, NULL, rsc->id) == FALSE) {
crm_info("Nothing known about operation %d for %s", call, op_key);
}
controld_delete_action_history_by_key(rsc->id, lrm_state->node_name,
op_key, call);
send_task_ok_ack(lrm_state, input, rsc->id, rsc, op_task,
from_host, from_sys);
/* needed at least for cancellation of a remote operation */
if (lrm_state->active_ops != NULL) {
g_hash_table_remove(lrm_state->active_ops, op_id);
}
free(op_id);
}
free(op_key);
return TRUE;
}
static void
do_lrm_delete(ha_msg_input_t *input, lrm_state_t *lrm_state,
lrmd_rsc_info_t *rsc, const char *from_sys, const char *from_host,
bool crm_rsc_delete, const char *user_name)
{
bool unregister = true;
int cib_rc = controld_delete_resource_history(rsc->id, lrm_state->node_name,
user_name,
cib_dryrun|cib_sync_call);
if (cib_rc != pcmk_rc_ok) {
lrmd_event_data_t *op = NULL;
op = construct_op(lrm_state, input->xml, rsc->id, PCMK_ACTION_DELETE);
/* These are resource clean-ups, not actions, so no exit reason is
* needed.
*/
lrmd__set_result(op, pcmk_rc2ocf(cib_rc), PCMK_EXEC_ERROR, NULL);
controld_ack_event_directly(from_host, from_sys, NULL, op, rsc->id);
lrmd_free_event(op);
return;
}
if (crm_rsc_delete && is_remote_lrmd_ra(NULL, NULL, rsc->id)) {
unregister = false;
}
delete_resource(lrm_state, rsc->id, rsc, NULL, from_sys,
user_name, input, unregister, true);
}
// User data for asynchronous metadata execution
struct metadata_cb_data {
lrmd_rsc_info_t *rsc; // Copy of resource information
xmlNode *input_xml; // Copy of FSA input XML
};
static struct metadata_cb_data *
new_metadata_cb_data(lrmd_rsc_info_t *rsc, xmlNode *input_xml)
{
struct metadata_cb_data *data = NULL;
data = pcmk__assert_alloc(1, sizeof(struct metadata_cb_data));
data->input_xml = pcmk__xml_copy(NULL, input_xml);
data->rsc = lrmd_copy_rsc_info(rsc);
return data;
}
static void
free_metadata_cb_data(struct metadata_cb_data *data)
{
lrmd_free_rsc_info(data->rsc);
pcmk__xml_free(data->input_xml);
free(data);
}
/*!
* \internal
* \brief Execute an action after metadata has been retrieved
*
* \param[in] pid Ignored
* \param[in] result Result of metadata action
* \param[in] user_data Metadata callback data
*/
static void
metadata_complete(int pid, const pcmk__action_result_t *result, void *user_data)
{
struct metadata_cb_data *data = (struct metadata_cb_data *) user_data;
struct ra_metadata_s *md = NULL;
lrm_state_t *lrm_state =
controld_get_executor_state(lrm_op_target(data->input_xml), false);
if ((lrm_state != NULL) && pcmk__result_ok(result)) {
md = controld_cache_metadata(lrm_state->metadata_cache, data->rsc,
result->action_stdout);
}
if (!pcmk_is_set(controld_globals.fsa_input_register, R_HA_DISCONNECTED)) {
do_lrm_rsc_op(lrm_state, data->rsc, data->input_xml, md);
}
free_metadata_cb_data(data);
}
/* A_LRM_INVOKE */
void
do_lrm_invoke(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
lrm_state_t *lrm_state = NULL;
const char *crm_op = NULL;
const char *from_sys = NULL;
const char *from_host = NULL;
const char *operation = NULL;
ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg);
const char *user_name = NULL;
const char *target_node = lrm_op_target(input->xml);
gboolean is_remote_node = FALSE;
bool crm_rsc_delete = FALSE;
// Message routed to the local node is targeting a specific, non-local node
is_remote_node = !controld_is_local_node(target_node);
lrm_state = controld_get_executor_state(target_node, false);
if ((lrm_state == NULL) && is_remote_node) {
crm_err("Failing action because local node has never had connection to remote node %s",
target_node);
synthesize_lrmd_failure(NULL, input->xml, PCMK_EXEC_NOT_CONNECTED,
PCMK_OCF_UNKNOWN_ERROR,
"Local node has no connection to remote");
return;
}
pcmk__assert(lrm_state != NULL);
user_name = pcmk__update_acl_user(input->msg, PCMK__XA_CRM_USER, NULL);
crm_op = crm_element_value(input->msg, PCMK__XA_CRM_TASK);
from_sys = crm_element_value(input->msg, PCMK__XA_CRM_SYS_FROM);
if (!pcmk__str_eq(from_sys, CRM_SYSTEM_TENGINE, pcmk__str_none)) {
from_host = crm_element_value(input->msg, PCMK__XA_SRC);
}
if (pcmk__str_eq(crm_op, PCMK_ACTION_LRM_DELETE, pcmk__str_none)) {
if (!pcmk__str_eq(from_sys, CRM_SYSTEM_TENGINE, pcmk__str_none)) {
crm_rsc_delete = TRUE; // from crm_resource
}
operation = PCMK_ACTION_DELETE;
} else if (input->xml != NULL) {
operation = crm_element_value(input->xml, PCMK_XA_OPERATION);
}
CRM_CHECK(!pcmk__str_empty(crm_op) || !pcmk__str_empty(operation), return);
crm_trace("'%s' execution request from %s as %s user",
pcmk__s(crm_op, operation),
pcmk__s(from_sys, "unknown subsystem"),
pcmk__s(user_name, "current"));
if (pcmk__str_eq(crm_op, CRM_OP_LRM_FAIL, pcmk__str_none)) {
fail_lrm_resource(input->xml, lrm_state, user_name, from_host,
from_sys);
} else if (pcmk__str_eq(crm_op, CRM_OP_REPROBE, pcmk__str_none)
|| pcmk__str_eq(operation, CRM_OP_REPROBE, pcmk__str_none)) {
const char *raw_target = NULL;
if (input->xml != NULL) {
// For CRM_OP_REPROBE, a NULL target means we're targeting all nodes
raw_target = crm_element_value(input->xml, PCMK__META_ON_NODE);
}
handle_reprobe_op(lrm_state, input->msg, from_sys, from_host, user_name,
is_remote_node, (raw_target == NULL));
} else if (operation != NULL) {
lrmd_rsc_info_t *rsc = NULL;
xmlNode *xml_rsc = pcmk__xe_first_child(input->xml, PCMK_XE_PRIMITIVE,
NULL, NULL);
gboolean create_rsc = !pcmk__str_eq(operation, PCMK_ACTION_DELETE,
pcmk__str_none);
int rc;
// We can't return anything meaningful without a resource ID
CRM_CHECK((xml_rsc != NULL) && (pcmk__xe_id(xml_rsc) != NULL), return);
rc = get_lrm_resource(lrm_state, xml_rsc, create_rsc, &rsc);
if (rc == -ENOTCONN) {
synthesize_lrmd_failure(lrm_state, input->xml,
PCMK_EXEC_NOT_CONNECTED,
PCMK_OCF_UNKNOWN_ERROR,
"Not connected to remote executor");
return;
} else if ((rc < 0) && !create_rsc) {
/* Delete of malformed or nonexistent resource
* (deleting something that does not exist is a success)
*/
crm_debug("Not registering resource '%s' for a %s event "
QB_XS " get-rc=%d (%s) transition-key=%s",
pcmk__xe_id(xml_rsc), operation,
rc, pcmk_strerror(rc), pcmk__xe_id(input->xml));
delete_rsc_entry(lrm_state, input, pcmk__xe_id(xml_rsc), NULL,
pcmk_ok, user_name, true);
return;
} else if (rc == -EINVAL) {
// Resource operation on malformed resource
crm_err("Invalid resource definition for %s", pcmk__xe_id(xml_rsc));
crm_log_xml_warn(input->msg, "invalid resource");
synthesize_lrmd_failure(lrm_state, input->xml, PCMK_EXEC_ERROR,
PCMK_OCF_NOT_CONFIGURED, // fatal error
"Invalid resource definition");
return;
} else if (rc < 0) {
// Error communicating with the executor
crm_err("Could not register resource '%s' with executor: %s "
QB_XS " rc=%d",
pcmk__xe_id(xml_rsc), pcmk_strerror(rc), rc);
crm_log_xml_warn(input->msg, "failed registration");
synthesize_lrmd_failure(lrm_state, input->xml, PCMK_EXEC_ERROR,
PCMK_OCF_INVALID_PARAM, // hard error
"Could not register resource with executor");
return;
}
if (pcmk__str_eq(operation, PCMK_ACTION_CANCEL, pcmk__str_none)) {
if (!do_lrm_cancel(input, lrm_state, rsc, from_host, from_sys)) {
crm_log_xml_warn(input->xml, "Bad command");
}
} else if (pcmk__str_eq(operation, PCMK_ACTION_DELETE,
pcmk__str_none)) {
do_lrm_delete(input, lrm_state, rsc, from_sys, from_host,
crm_rsc_delete, user_name);
} else {
struct ra_metadata_s *md = NULL;
/* Getting metadata from cache is OK except for start actions --
* always refresh from the agent for those, in case the resource
* agent was updated.
*
* @TODO Only refresh metadata for starts if the agent actually
* changed (using something like inotify, or a hash or modification
* time of the agent executable).
*/
if (strcmp(operation, PCMK_ACTION_START) != 0) {
md = controld_get_rsc_metadata(lrm_state, rsc,
controld_metadata_from_cache);
}
if ((md == NULL) && crm_op_needs_metadata(rsc->standard,
operation)) {
/* Most likely, we'll need the agent metadata to record the
* pending operation and the operation result. Get it now rather
* than wait until then, so the metadata action doesn't eat into
* the real action's timeout.
*
* @TODO Metadata is retrieved via direct execution of the
* agent, which has a couple of related issues: the executor
* should execute agents, not the controller; and metadata for
* Pacemaker Remote nodes should be collected on those nodes,
* not locally.
*/
struct metadata_cb_data *data = NULL;
data = new_metadata_cb_data(rsc, input->xml);
crm_info("Retrieving metadata for %s (%s%s%s:%s) asynchronously",
rsc->id, rsc->standard,
((rsc->provider == NULL)? "" : ":"),
((rsc->provider == NULL)? "" : rsc->provider),
rsc->type);
(void) lrmd__metadata_async(rsc, metadata_complete,
(void *) data);
} else {
do_lrm_rsc_op(lrm_state, rsc, input->xml, md);
}
}
lrmd_free_rsc_info(rsc);
} else {
crm_err("Invalid execution request: unknown command '%s' (bug?)",
crm_op);
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
}
static lrmd_event_data_t *
construct_op(const lrm_state_t *lrm_state, const xmlNode *rsc_op,
const char *rsc_id, const char *operation)
{
lrmd_event_data_t *op = NULL;
const char *op_delay = NULL;
const char *op_timeout = NULL;
GHashTable *params = NULL;
xmlNode *primitive = NULL;
const char *class = NULL;
const char *transition = NULL;
pcmk__assert((rsc_id != NULL) && (operation != NULL));
op = lrmd_new_event(rsc_id, operation, 0);
op->type = lrmd_event_exec_complete;
op->timeout = 0;
op->start_delay = 0;
lrmd__set_result(op, PCMK_OCF_UNKNOWN, PCMK_EXEC_PENDING, NULL);
if (rsc_op == NULL) {
CRM_LOG_ASSERT(pcmk__str_eq(operation, PCMK_ACTION_STOP,
pcmk__str_casei));
op->user_data = NULL;
/* the stop_all_resources() case
* by definition there is no DC (or they'd be shutting
* us down).
* So we should put our version here.
*/
op->params = pcmk__strkey_table(free, free);
pcmk__insert_dup(op->params, PCMK_XA_CRM_FEATURE_SET, CRM_FEATURE_SET);
crm_trace("Constructed %s op for %s", operation, rsc_id);
return op;
}
params = xml2list(rsc_op);
g_hash_table_remove(params, CRM_META "_" PCMK__META_OP_TARGET_RC);
op_delay = crm_meta_value(params, PCMK_META_START_DELAY);
pcmk__scan_min_int(op_delay, &op->start_delay, 0);
op_timeout = crm_meta_value(params, PCMK_META_TIMEOUT);
pcmk__scan_min_int(op_timeout, &op->timeout, 0);
if (pcmk__guint_from_hash(params, CRM_META "_" PCMK_META_INTERVAL, 0,
&(op->interval_ms)) != pcmk_rc_ok) {
op->interval_ms = 0;
}
/* Use pcmk_monitor_timeout instead of meta timeout for stonith
recurring monitor, if set */
primitive = pcmk__xe_first_child(rsc_op, PCMK_XE_PRIMITIVE, NULL, NULL);
class = crm_element_value(primitive, PCMK_XA_CLASS);
if (pcmk_is_set(pcmk_get_ra_caps(class), pcmk_ra_cap_fence_params)
&& pcmk__str_eq(operation, PCMK_ACTION_MONITOR, pcmk__str_casei)
&& (op->interval_ms > 0)) {
op_timeout = g_hash_table_lookup(params, "pcmk_monitor_timeout");
if (op_timeout != NULL) {
long long timeout_ms = crm_get_msec(op_timeout);
op->timeout = (int) QB_MIN(timeout_ms, INT_MAX);
}
}
if (!pcmk__str_eq(operation, PCMK_ACTION_STOP, pcmk__str_casei)) {
op->params = params;
} else {
rsc_history_t *entry = NULL;
if (lrm_state) {
entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
}
/* If we do not have stop parameters cached, use
* whatever we are given */
if (!entry || !entry->stop_params) {
op->params = params;
} else {
/* Copy the cached parameter list so that we stop the resource
* with the old attributes, not the new ones */
op->params = pcmk__strkey_table(free, free);
g_hash_table_foreach(params, copy_meta_keys, op->params);
g_hash_table_foreach(entry->stop_params, copy_instance_keys, op->params);
g_hash_table_destroy(params);
params = NULL;
}
}
/* sanity */
if (op->timeout <= 0) {
op->timeout = op->interval_ms;
}
if (op->start_delay < 0) {
op->start_delay = 0;
}
transition = crm_element_value(rsc_op, PCMK__XA_TRANSITION_KEY);
CRM_CHECK(transition != NULL, return op);
op->user_data = pcmk__str_copy(transition);
if (op->interval_ms != 0) {
if (pcmk__strcase_any_of(operation, PCMK_ACTION_START, PCMK_ACTION_STOP,
NULL)) {
crm_err("Start and Stop actions cannot have an interval: %u",
op->interval_ms);
op->interval_ms = 0;
}
}
crm_trace("Constructed %s op for %s: interval=%u",
operation, rsc_id, op->interval_ms);
return op;
}
/*!
* \internal
* \brief Send a (synthesized) event result
*
* Reply with a synthesized event result directly, as opposed to going through
* the executor.
*
* \param[in] to_host Host to send result to
* \param[in] to_sys IPC name to send result (NULL for transition engine)
* \param[in] rsc Type information about resource the result is for
* \param[in,out] op Event with result to send
* \param[in] rsc_id ID of resource the result is for
*/
void
controld_ack_event_directly(const char *to_host, const char *to_sys,
const lrmd_rsc_info_t *rsc, lrmd_event_data_t *op,
const char *rsc_id)
{
xmlNode *reply = NULL;
xmlNode *update, *iter;
pcmk__node_status_t *peer = NULL;
CRM_CHECK(op != NULL, return);
if (op->rsc_id == NULL) {
// op->rsc_id is a (const char *) but lrmd_free_event() frees it
pcmk__assert(rsc_id != NULL);
op->rsc_id = pcmk__str_copy(rsc_id);
}
if (to_sys == NULL) {
to_sys = CRM_SYSTEM_TENGINE;
}
peer = controld_get_local_node_status();
update = create_node_state_update(peer, controld_node_update_none, NULL,
__func__);
iter = pcmk__xe_create(update, PCMK__XE_LRM);
crm_xml_add(iter, PCMK_XA_ID, controld_globals.our_uuid);
iter = pcmk__xe_create(iter, PCMK__XE_LRM_RESOURCES);
iter = pcmk__xe_create(iter, PCMK__XE_LRM_RESOURCE);
crm_xml_add(iter, PCMK_XA_ID, op->rsc_id);
controld_add_resource_history_xml(iter, rsc, op,
controld_globals.cluster->priv->node_name);
/* We don't have the original message ID, so use "direct-ack" (we just need
* something non-NULL for this to create a reply)
*
* @TODO It would be better to use the server, message ID, and task from the
* original request when callers have it available
*/
reply = pcmk__new_message(pcmk_ipc_controld, "direct-ack", CRM_SYSTEM_LRMD,
to_host, to_sys, CRM_OP_INVOKE_LRM, update);
crm_log_xml_trace(update, "[direct ACK]");
crm_debug("ACK'ing resource op " PCMK__OP_FMT " from %s: %s",
op->rsc_id, op->op_type, op->interval_ms, op->user_data,
crm_element_value(reply, PCMK_XA_REFERENCE));
if (relay_message(reply, TRUE) == FALSE) {
crm_log_xml_err(reply, "Unable to route reply");
}
pcmk__xml_free(update);
pcmk__xml_free(reply);
}
gboolean
verify_stopped(enum crmd_fsa_state cur_state, int log_level)
{
gboolean res = TRUE;
GList *lrm_state_list = lrm_state_get_list();
GList *state_entry;
for (state_entry = lrm_state_list; state_entry != NULL; state_entry = state_entry->next) {
lrm_state_t *lrm_state = state_entry->data;
if (!lrm_state_verify_stopped(lrm_state, cur_state, log_level)) {
/* keep iterating through all even when false is returned */
res = FALSE;
}
}
controld_set_fsa_input_flags(R_SENT_RSC_STOP);
g_list_free(lrm_state_list); lrm_state_list = NULL;
return res;
}
struct stop_recurring_action_s {
lrmd_rsc_info_t *rsc;
lrm_state_t *lrm_state;
};
static gboolean
stop_recurring_action_by_rsc(gpointer key, gpointer value, gpointer user_data)
{
gboolean remove = FALSE;
struct stop_recurring_action_s *event = user_data;
active_op_t *op = value;
if ((op->interval_ms != 0)
&& pcmk__str_eq(op->rsc_id, event->rsc->id, pcmk__str_none)) {
crm_debug("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id, (char*)key);
remove = !cancel_op(event->lrm_state, event->rsc->id, key, op->call_id, FALSE);
}
return remove;
}
static gboolean
stop_recurring_actions(gpointer key, gpointer value, gpointer user_data)
{
gboolean remove = FALSE;
lrm_state_t *lrm_state = user_data;
active_op_t *op = value;
if (op->interval_ms != 0) {
crm_info("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id,
(const char *) key);
remove = !cancel_op(lrm_state, op->rsc_id, key, op->call_id, FALSE);
}
return remove;
}
/*!
* \internal
* \brief Check whether recurring actions should be cancelled before an action
*
* \param[in] rsc_id Resource that action is for
* \param[in] action Action being performed
* \param[in] interval_ms Operation interval of \p action (in milliseconds)
*
* \return true if recurring actions should be cancelled, otherwise false
*/
static bool
should_cancel_recurring(const char *rsc_id, const char *action, guint interval_ms)
{
if (is_remote_lrmd_ra(NULL, NULL, rsc_id) && (interval_ms == 0)
&& (strcmp(action, PCMK_ACTION_MIGRATE_TO) == 0)) {
/* Don't stop monitoring a migrating Pacemaker Remote connection
* resource until the entire migration has completed. We must detect if
* the connection is unexpectedly severed, even during a migration.
*/
return false;
}
// Cancel recurring actions before changing resource state
return (interval_ms == 0)
&& !pcmk__str_any_of(action, PCMK_ACTION_MONITOR,
PCMK_ACTION_NOTIFY, NULL);
}
/*!
* \internal
* \brief Check whether an action should not be performed at this time
*
* \param[in] operation Action to be performed
*
* \return Readable description of why action should not be performed,
* or NULL if it should be performed
*/
static const char *
should_nack_action(const char *action)
{
if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)
&& pcmk__str_eq(action, PCMK_ACTION_START, pcmk__str_none)) {
register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL);
return "Not attempting start due to shutdown in progress";
}
switch (controld_globals.fsa_state) {
case S_NOT_DC:
case S_POLICY_ENGINE: // Recalculating
case S_TRANSITION_ENGINE:
break;
default:
if (!pcmk__str_eq(action, PCMK_ACTION_STOP, pcmk__str_none)) {
return "Controller cannot attempt actions at this time";
}
break;
}
return NULL;
}
static void
do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, xmlNode *msg,
struct ra_metadata_s *md)
{
int rc;
int call_id = 0;
char *op_id = NULL;
lrmd_event_data_t *op = NULL;
fsa_data_t *msg_data = NULL;
const char *transition = NULL;
const char *operation = NULL;
const char *nack_reason = NULL;
CRM_CHECK((rsc != NULL) && (msg != NULL), return);
operation = crm_element_value(msg, PCMK_XA_OPERATION);
CRM_CHECK(!pcmk__str_empty(operation), return);
transition = crm_element_value(msg, PCMK__XA_TRANSITION_KEY);
if (pcmk__str_empty(transition)) {
crm_log_xml_err(msg, "Missing transition number");
}
if (lrm_state == NULL) {
// This shouldn't be possible, but provide a failsafe just in case
crm_err("Cannot execute %s of %s: No executor connection "
QB_XS " transition_key=%s",
operation, rsc->id, pcmk__s(transition, ""));
synthesize_lrmd_failure(NULL, msg, PCMK_EXEC_INVALID,
PCMK_OCF_UNKNOWN_ERROR,
"No executor connection");
return;
}
if (pcmk__str_any_of(operation, PCMK_ACTION_RELOAD,
PCMK_ACTION_RELOAD_AGENT, NULL)) {
/* Pre-2.1.0 DCs will schedule reload actions only, and 2.1.0+ DCs
* will schedule reload-agent actions only. In either case, we need
* to map that to whatever the resource agent actually supports.
* Default to the OCF 1.1 name.
*/
if ((md != NULL)
&& pcmk_is_set(md->ra_flags, ra_supports_legacy_reload)) {
operation = PCMK_ACTION_RELOAD;
} else {
operation = PCMK_ACTION_RELOAD_AGENT;
}
}
op = construct_op(lrm_state, msg, rsc->id, operation);
CRM_CHECK(op != NULL, return);
if (should_cancel_recurring(rsc->id, operation, op->interval_ms)) {
guint removed = 0;
struct stop_recurring_action_s data;
data.rsc = rsc;
data.lrm_state = lrm_state;
removed = g_hash_table_foreach_remove(lrm_state->active_ops,
stop_recurring_action_by_rsc,
&data);
if (removed) {
crm_debug("Stopped %u recurring operation%s in preparation for "
PCMK__OP_FMT, removed, pcmk__plural_s(removed),
rsc->id, operation, op->interval_ms);
}
}
nack_reason = should_nack_action(operation);
if (nack_reason != NULL) {
crm_notice("Not requesting local execution of %s operation for %s on %s"
" in state %s: %s",
pcmk__readable_action(op->op_type, op->interval_ms), rsc->id,
lrm_state->node_name,
fsa_state2string(controld_globals.fsa_state), nack_reason);
lrmd__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_INVALID,
nack_reason);
controld_ack_event_directly(NULL, NULL, rsc, op, rsc->id);
lrmd_free_event(op);
free(op_id);
return;
}
crm_notice("Requesting local execution of %s operation for %s on %s "
QB_XS " transition %s",
pcmk__readable_action(op->op_type, op->interval_ms), rsc->id,
lrm_state->node_name, pcmk__s(transition, ""));
controld_record_pending_op(lrm_state->node_name, rsc, op);
op_id = pcmk__op_key(rsc->id, op->op_type, op->interval_ms);
if (op->interval_ms > 0) {
/* cancel it so we can then restart it without conflict */
cancel_op_key(lrm_state, rsc, op_id, FALSE);
}
rc = controld_execute_resource_agent(lrm_state, rsc->id, op->op_type,
op->user_data, op->interval_ms,
op->timeout, op->start_delay,
op->params, &call_id);
if (rc == pcmk_rc_ok) {
/* record all operations so we can wait
* for them to complete during shutdown
*/
char *call_id_s = make_stop_id(rsc->id, call_id);
active_op_t *pending = NULL;
pending = pcmk__assert_alloc(1, sizeof(active_op_t));
crm_trace("Recording pending op: %d - %s %s", call_id, op_id, call_id_s);
pending->call_id = call_id;
pending->interval_ms = op->interval_ms;
pending->op_type = pcmk__str_copy(operation);
pending->op_key = pcmk__str_copy(op_id);
pending->rsc_id = pcmk__str_copy(rsc->id);
pending->start_time = time(NULL);
pending->user_data = pcmk__str_copy(op->user_data);
if (crm_element_value_epoch(msg, PCMK_OPT_SHUTDOWN_LOCK,
&(pending->lock_time)) != pcmk_ok) {
pending->lock_time = 0;
}
g_hash_table_replace(lrm_state->active_ops, call_id_s, pending);
if ((op->interval_ms > 0)
&& (op->start_delay > START_DELAY_THRESHOLD)) {
int target_rc = PCMK_OCF_OK;
crm_info("Faking confirmation of %s: execution postponed for over 5 minutes", op_id);
decode_transition_key(op->user_data, NULL, NULL, NULL, &target_rc);
lrmd__set_result(op, target_rc, PCMK_EXEC_DONE, NULL);
controld_ack_event_directly(NULL, NULL, rsc, op, rsc->id);
}
pending->params = op->params;
op->params = NULL;
} else if (lrm_state_is_local(lrm_state)) {
crm_err("Could not initiate %s action for resource %s locally: %s "
QB_XS " rc=%d", operation, rsc->id, pcmk_rc_str(rc), rc);
fake_op_status(lrm_state, op, PCMK_EXEC_NOT_CONNECTED,
PCMK_OCF_UNKNOWN_ERROR, pcmk_rc_str(rc));
process_lrm_event(lrm_state, op, NULL, NULL);
register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
} else {
crm_err("Could not initiate %s action for resource %s remotely on %s: "
"%s " QB_XS " rc=%d",
operation, rsc->id, lrm_state->node_name, pcmk_rc_str(rc), rc);
fake_op_status(lrm_state, op, PCMK_EXEC_NOT_CONNECTED,
PCMK_OCF_UNKNOWN_ERROR, pcmk_rc_str(rc));
process_lrm_event(lrm_state, op, NULL, NULL);
}
free(op_id);
lrmd_free_event(op);
}
static char *
unescape_newlines(const char *string)
{
char *pch = NULL;
char *ret = NULL;
static const char *escaped_newline = "\\n";
if (!string) {
return NULL;
}
ret = pcmk__str_copy(string);
pch = strstr(ret, escaped_newline);
while (pch != NULL) {
/* Replace newline escape pattern with actual newline (and a space so we
* don't have to shuffle the rest of the buffer)
*/
pch[0] = '\n';
pch[1] = ' ';
pch = strstr(pch, escaped_newline);
}
return ret;
}
static bool
did_lrm_rsc_op_fail(lrm_state_t *lrm_state, const char * rsc_id,
const char * op_type, guint interval_ms)
{
rsc_history_t *entry = NULL;
CRM_CHECK(lrm_state != NULL, return FALSE);
CRM_CHECK(rsc_id != NULL, return FALSE);
CRM_CHECK(op_type != NULL, return FALSE);
entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
if (entry == NULL || entry->failed == NULL) {
return FALSE;
}
if (pcmk__str_eq(entry->failed->rsc_id, rsc_id, pcmk__str_none)
&& pcmk__str_eq(entry->failed->op_type, op_type, pcmk__str_casei)
&& entry->failed->interval_ms == interval_ms) {
return TRUE;
}
return FALSE;
}
/*!
* \internal
* \brief Log the result of an executor action (actual or synthesized)
*
* \param[in] op Executor action to log result for
* \param[in] op_key Operation key for action
* \param[in] node_name Name of node action was performed on, if known
* \param[in] confirmed Whether to log that graph action was confirmed
*/
static void
log_executor_event(const lrmd_event_data_t *op, const char *op_key,
const char *node_name, gboolean confirmed)
{
int log_level = LOG_ERR;
GString *str = g_string_sized_new(100); // reasonable starting size
pcmk__g_strcat(str,
"Result of ",
pcmk__readable_action(op->op_type, op->interval_ms),
" operation for ", op->rsc_id, NULL);
if (node_name != NULL) {
pcmk__g_strcat(str, " on ", node_name, NULL);
}
switch (op->op_status) {
case PCMK_EXEC_DONE:
log_level = LOG_NOTICE;
pcmk__g_strcat(str, ": ", crm_exit_str((crm_exit_t) op->rc), NULL);
break;
case PCMK_EXEC_TIMEOUT:
pcmk__g_strcat(str,
": ", pcmk_exec_status_str(op->op_status), " after ",
pcmk__readable_interval(op->timeout), NULL);
break;
case PCMK_EXEC_CANCELLED:
log_level = LOG_INFO;
pcmk__g_strcat(str, ": ", pcmk_exec_status_str(op->op_status),
NULL);
break;
default:
pcmk__g_strcat(str, ": ", pcmk_exec_status_str(op->op_status),
NULL);
break;
}
if ((op->exit_reason != NULL)
&& ((op->op_status != PCMK_EXEC_DONE) || (op->rc != PCMK_OCF_OK))) {
pcmk__g_strcat(str, " (", op->exit_reason, ")", NULL);
}
g_string_append(str, " " QB_XS);
g_string_append_printf(str, " graph action %sconfirmed; call=%d key=%s",
(confirmed? "" : "un"), op->call_id, op_key);
if (op->op_status == PCMK_EXEC_DONE) {
g_string_append_printf(str, " rc=%d", op->rc);
}
do_crm_log(log_level, "%s", str->str);
g_string_free(str, TRUE);
/* The services library has already logged the output at info or debug
* level, so just raise to notice if it looks like a failure.
*/
if ((op->output != NULL) && (op->rc != PCMK_OCF_OK)) {
char *prefix = crm_strdup_printf(PCMK__OP_FMT "@%s output",
op->rsc_id, op->op_type,
op->interval_ms, node_name);
crm_log_output(LOG_NOTICE, prefix, op->output);
free(prefix);
}
}
void
process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op,
active_op_t *pending, const xmlNode *action_xml)
{
char *op_id = NULL;
char *op_key = NULL;
gboolean remove = FALSE;
gboolean removed = FALSE;
bool need_direct_ack = FALSE;
lrmd_rsc_info_t *rsc = NULL;
const char *node_name = NULL;
CRM_CHECK(op != NULL, return);
CRM_CHECK(op->rsc_id != NULL, return);
// Remap new status codes for older DCs
if (compare_version(controld_globals.dc_version, "3.2.0") < 0) {
switch (op->op_status) {
case PCMK_EXEC_NOT_CONNECTED:
lrmd__set_result(op, PCMK_OCF_CONNECTION_DIED,
PCMK_EXEC_ERROR, op->exit_reason);
break;
case PCMK_EXEC_INVALID:
lrmd__set_result(op, CRM_DIRECT_NACK_RC, PCMK_EXEC_ERROR,
op->exit_reason);
break;
default:
break;
}
}
op_id = make_stop_id(op->rsc_id, op->call_id);
op_key = pcmk__op_key(op->rsc_id, op->op_type, op->interval_ms);
// Get resource info if available (from executor state or action XML)
if (lrm_state) {
rsc = lrm_state_get_rsc_info(lrm_state, op->rsc_id, 0);
}
if ((rsc == NULL) && action_xml) {
xmlNode *xml = pcmk__xe_first_child(action_xml, PCMK_XE_PRIMITIVE, NULL,
NULL);
const char *standard = crm_element_value(xml, PCMK_XA_CLASS);
const char *provider = crm_element_value(xml, PCMK_XA_PROVIDER);
const char *type = crm_element_value(xml, PCMK_XA_TYPE);
if (standard && type) {
crm_info("%s agent information not cached, using %s%s%s:%s from action XML",
op->rsc_id, standard,
(provider? ":" : ""), (provider? provider : ""), type);
rsc = lrmd_new_rsc_info(op->rsc_id, standard, provider, type);
} else {
crm_err("Can't process %s result because %s agent information not cached or in XML",
op_key, op->rsc_id);
}
}
// Get node name if available (from executor state or action XML)
if (lrm_state) {
node_name = lrm_state->node_name;
} else if (action_xml) {
node_name = crm_element_value(action_xml, PCMK__META_ON_NODE);
}
if(pending == NULL) {
remove = TRUE;
if (lrm_state) {
pending = g_hash_table_lookup(lrm_state->active_ops, op_id);
}
}
if (op->op_status == PCMK_EXEC_ERROR) {
switch(op->rc) {
case PCMK_OCF_NOT_RUNNING:
case PCMK_OCF_RUNNING_PROMOTED:
case PCMK_OCF_DEGRADED:
case PCMK_OCF_DEGRADED_PROMOTED:
// Leave it to the TE/scheduler to decide if this is an error
op->op_status = PCMK_EXEC_DONE;
break;
default:
/* Nothing to do */
break;
}
}
if (op->op_status != PCMK_EXEC_CANCELLED) {
/* We might not record the result, so directly acknowledge it to the
* originator instead, so it doesn't time out waiting for the result
* (especially important if part of a transition).
*/
need_direct_ack = TRUE;
if (controld_action_is_recordable(op->op_type)) {
if (node_name && rsc) {
// We should record the result, and happily, we can
time_t lock_time = (pending == NULL)? 0 : pending->lock_time;
controld_update_resource_history(node_name, rsc, op, lock_time);
need_direct_ack = FALSE;
} else if (op->rsc_deleted) {
/* We shouldn't record the result (likely the resource was
* refreshed, cleaned, or removed while this operation was
* in flight).
*/
crm_notice("Not recording %s result in CIB because "
"resource information was removed since it was initiated",
op_key);
} else {
/* This shouldn't be possible; the executor didn't consider the
* resource deleted, but we couldn't find resource or node
* information.
*/
crm_err("Unable to record %s result in CIB: %s", op_key,
(node_name? "No resource information" : "No node name"));
}
}
} else if (op->interval_ms == 0) {
/* A non-recurring operation was cancelled. Most likely, the
* never-initiated action was removed from the executor's pending
* operations list upon resource removal.
*/
need_direct_ack = TRUE;
} else if (pending == NULL) {
/* This recurring operation was cancelled, but was not pending. No
* transition actions are waiting on it, nothing needs to be done.
*/
} else if (op->user_data == NULL) {
/* This recurring operation was cancelled and pending, but we don't
* have a transition key. This should never happen.
*/
crm_err("Recurring operation %s was cancelled without transition information",
op_key);
} else if (pcmk_is_set(pending->flags, active_op_remove)) {
/* This recurring operation was cancelled (by us) and pending, and we
* have been waiting for it to finish.
*/
if (lrm_state) {
controld_delete_action_history(op);
}
/* Directly acknowledge failed recurring actions here. The above call to
* controld_delete_action_history() will not erase any corresponding
* last_failure entry, which means that the DC won't confirm the
* cancellation via process_op_deletion(), and the transition would
* otherwise wait for the action timer to pop.
*/
if (did_lrm_rsc_op_fail(lrm_state, pending->rsc_id,
pending->op_type, pending->interval_ms)) {
need_direct_ack = TRUE;
}
} else if (op->rsc_deleted) {
/* This recurring operation was cancelled (but not by us, and the
* executor does not have resource information, likely due to resource
* cleanup, refresh, or removal) and pending.
*/
crm_debug("Recurring op %s was cancelled due to resource deletion",
op_key);
need_direct_ack = TRUE;
} else {
/* This recurring operation was cancelled (but not by us, likely by the
* executor before stopping the resource) and pending. We don't need to
* do anything special.
*/
}
if (need_direct_ack) {
controld_ack_event_directly(NULL, NULL, NULL, op, op->rsc_id);
}
if(remove == FALSE) {
/* The caller will do this afterwards, but keep the logging consistent */
removed = TRUE;
} else if (lrm_state && ((op->interval_ms == 0)
|| (op->op_status == PCMK_EXEC_CANCELLED))) {
gboolean found = g_hash_table_remove(lrm_state->active_ops, op_id);
if (op->interval_ms != 0) {
removed = TRUE;
} else if (found) {
removed = TRUE;
crm_trace("Op %s (call=%d, stop-id=%s, remaining=%u): Confirmed",
op_key, op->call_id, op_id,
g_hash_table_size(lrm_state->active_ops));
}
}
log_executor_event(op, op_key, node_name, removed);
if (lrm_state) {
if (!pcmk__str_eq(op->op_type, PCMK_ACTION_META_DATA,
pcmk__str_casei)) {
crmd_alert_resource_op(lrm_state->node_name, op);
} else if (rsc && (op->rc == PCMK_OCF_OK)) {
char *metadata = unescape_newlines(op->output);
controld_cache_metadata(lrm_state->metadata_cache, rsc, metadata);
free(metadata);
}
}
if (op->rsc_deleted) {
crm_info("Deletion of resource '%s' complete after %s", op->rsc_id, op_key);
if (lrm_state) {
delete_rsc_entry(lrm_state, NULL, op->rsc_id, NULL, pcmk_ok, NULL,
true);
}
}
/* If a shutdown was escalated while operations were pending,
* then the FSA will be stalled right now... allow it to continue
*/
controld_trigger_fsa();
if (lrm_state && rsc) {
update_history_cache(lrm_state, rsc, op);
}
lrmd_free_rsc_info(rsc);
free(op_key);
free(op_id);
}
diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c
index 4304ae799e..3d0e017355 100644
--- a/daemons/controld/controld_fencing.c
+++ b/daemons/controld/controld_fencing.c
@@ -1,1113 +1,1117 @@
/*
* Copyright 2004-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <crm/crm.h>
#include <crm/common/xml.h>
#include <crm/stonith-ng.h>
#include <crm/fencing/internal.h>
#include <pacemaker-controld.h>
static void
tengine_stonith_history_synced(stonith_t *st, stonith_event_t *st_event);
/*
* stonith failure counting
*
* We don't want to get stuck in a permanent fencing loop. Keep track of the
* number of fencing failures for each target node, and the most we'll restart a
* transition for.
*/
struct st_fail_rec {
int count;
};
#define DEFAULT_STONITH_MAX_ATTEMPTS 10
static bool fence_reaction_panic = false;
static unsigned long int stonith_max_attempts = DEFAULT_STONITH_MAX_ATTEMPTS;
static GHashTable *stonith_failures = NULL;
/*!
* \internal
* \brief Update max fencing attempts before giving up
*
* \param[in] value New max fencing attempts
*/
static void
update_stonith_max_attempts(const char *value)
{
int score = 0;
int rc = pcmk_parse_score(value, &score, DEFAULT_STONITH_MAX_ATTEMPTS);
// The option validator ensures invalid values shouldn't be possible
CRM_CHECK((rc == pcmk_rc_ok) && (score > 0), return);
if (stonith_max_attempts != score) {
crm_debug("Maximum fencing attempts per transition is now %d (was %lu)",
score, stonith_max_attempts);
}
stonith_max_attempts = score;
}
/*!
* \internal
* \brief Configure reaction to notification of local node being fenced
*
* \param[in] reaction_s Reaction type
*/
static void
set_fence_reaction(const char *reaction_s)
{
if (pcmk__str_eq(reaction_s, "panic", pcmk__str_casei)) {
fence_reaction_panic = true;
} else {
if (!pcmk__str_eq(reaction_s, PCMK_VALUE_STOP, pcmk__str_casei)) {
crm_warn("Invalid value '%s' for %s, using 'stop'",
reaction_s, PCMK_OPT_FENCE_REACTION);
}
fence_reaction_panic = false;
}
}
/*!
* \internal
* \brief Configure fencing options based on the CIB
*
* \param[in,out] options Name/value pairs for configured options
*/
void
controld_configure_fencing(GHashTable *options)
{
const char *value = NULL;
value = g_hash_table_lookup(options, PCMK_OPT_FENCE_REACTION);
set_fence_reaction(value);
value = g_hash_table_lookup(options, PCMK_OPT_STONITH_MAX_ATTEMPTS);
update_stonith_max_attempts(value);
}
static gboolean
too_many_st_failures(const char *target)
{
GHashTableIter iter;
const char *key = NULL;
struct st_fail_rec *value = NULL;
if (stonith_failures == NULL) {
return FALSE;
}
if (target == NULL) {
g_hash_table_iter_init(&iter, stonith_failures);
while (g_hash_table_iter_next(&iter, (gpointer *) &key,
(gpointer *) &value)) {
if (value->count >= stonith_max_attempts) {
target = (const char*)key;
goto too_many;
}
}
} else {
value = g_hash_table_lookup(stonith_failures, target);
if ((value != NULL) && (value->count >= stonith_max_attempts)) {
goto too_many;
}
}
return FALSE;
too_many:
crm_warn("Too many failures (%d) to fence %s, giving up",
value->count, target);
return TRUE;
}
/*!
* \internal
* \brief Reset a stonith fail count
*
* \param[in] target Name of node to reset, or NULL for all
*/
void
st_fail_count_reset(const char *target)
{
if (stonith_failures == NULL) {
return;
}
if (target) {
struct st_fail_rec *rec = NULL;
rec = g_hash_table_lookup(stonith_failures, target);
if (rec) {
rec->count = 0;
}
} else {
GHashTableIter iter;
const char *key = NULL;
struct st_fail_rec *rec = NULL;
g_hash_table_iter_init(&iter, stonith_failures);
while (g_hash_table_iter_next(&iter, (gpointer *) &key,
(gpointer *) &rec)) {
rec->count = 0;
}
}
}
static void
st_fail_count_increment(const char *target)
{
struct st_fail_rec *rec = NULL;
if (stonith_failures == NULL) {
stonith_failures = pcmk__strkey_table(free, free);
}
rec = g_hash_table_lookup(stonith_failures, target);
if (rec) {
rec->count++;
} else {
rec = malloc(sizeof(struct st_fail_rec));
if(rec == NULL) {
return;
}
rec->count = 1;
g_hash_table_insert(stonith_failures, pcmk__str_copy(target), rec);
}
}
/* end stonith fail count functions */
static void
cib_fencing_updated(xmlNode *msg, int call_id, int rc, xmlNode *output,
void *user_data)
{
if (rc < pcmk_ok) {
crm_err("Fencing update %d for %s: failed - %s (%d)",
call_id, (char *)user_data, pcmk_strerror(rc), rc);
crm_log_xml_warn(msg, "Failed update");
abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_shutdown,
"CIB update failed", NULL);
} else {
crm_info("Fencing update %d for %s: complete", call_id, (char *)user_data);
}
}
/*!
* \internal
* \brief Update a fencing target's node state
*
* \param[in] target Node that was successfully fenced
* \param[in] target_xml_id CIB XML ID of target
*/
static void
update_node_state_after_fencing(const char *target, const char *target_xml_id)
{
int rc = pcmk_ok;
pcmk__node_status_t *peer = NULL;
xmlNode *node_state = NULL;
/* We (usually) rely on the membership layer to do
* controld_node_update_cluster, and the peer status callback to do
* controld_node_update_peer, because the node might have already rejoined
* before we get the stonith result here.
*/
uint32_t flags = controld_node_update_join|controld_node_update_expected;
CRM_CHECK((target != NULL) && (target_xml_id != NULL), return);
// Ensure target is cached
peer = pcmk__get_node(0, target, target_xml_id, pcmk__node_search_any);
CRM_CHECK(peer != NULL, return);
if (peer->state == NULL) {
/* Usually, we rely on the membership layer to update the cluster state
* in the CIB. However, if the node has never been seen, do it here, so
* the node is not considered unclean.
*/
flags |= controld_node_update_cluster;
}
if (peer->xml_id == NULL) {
crm_info("Recording XML ID '%s' for node '%s'", target_xml_id, target);
peer->xml_id = pcmk__str_copy(target_xml_id);
}
crmd_peer_down(peer, TRUE);
node_state = create_node_state_update(peer, flags, NULL, __func__);
crm_xml_add(node_state, PCMK_XA_ID, target_xml_id);
if (pcmk_is_set(peer->flags, pcmk__node_status_remote)) {
char *now_s = pcmk__ttoa(time(NULL));
crm_xml_add(node_state, PCMK__XA_NODE_FENCED, now_s);
free(now_s);
}
rc = controld_globals.cib_conn->cmds->modify(controld_globals.cib_conn,
PCMK_XE_STATUS, node_state,
cib_can_create);
pcmk__xml_free(node_state);
crm_debug("Updating node state for %s after fencing (call %d)", target, rc);
fsa_register_cib_callback(rc, pcmk__str_copy(target), cib_fencing_updated);
- controld_delete_node_state(peer->name, controld_section_all, cib_none);
+ // Delete node's resource history from CIB
+ controld_delete_node_history(peer->name, false, cib_none);
+
+ // Ask attribute manager to delete node's transient attributes
+ controld_purge_node_attrs(peer->name, false);
}
/*!
* \internal
* \brief Abort transition due to stonith failure
*
* \param[in] abort_action Whether to restart or stop transition
* \param[in] target Don't restart if this (NULL for any) has too many failures
* \param[in] reason Log this stonith action XML as abort reason (or NULL)
*/
static void
abort_for_stonith_failure(enum pcmk__graph_next abort_action,
const char *target, const xmlNode *reason)
{
/* If stonith repeatedly fails, we eventually give up on starting a new
* transition for that reason.
*/
if ((abort_action != pcmk__graph_wait) && too_many_st_failures(target)) {
abort_action = pcmk__graph_wait;
}
abort_transition(PCMK_SCORE_INFINITY, abort_action, "Stonith failed",
reason);
}
/*
* stonith cleanup list
*
* If the DC is shot, proper notifications might not go out.
* The stonith cleanup list allows the cluster to (re-)send
* notifications once a new DC is elected.
*/
static GList *stonith_cleanup_list = NULL;
/*!
* \internal
* \brief Add a node to the stonith cleanup list
*
* \param[in] target Name of node to add
*/
void
add_stonith_cleanup(const char *target) {
stonith_cleanup_list = g_list_append(stonith_cleanup_list,
pcmk__str_copy(target));
}
/*!
* \internal
* \brief Remove a node from the stonith cleanup list
*
* \param[in] Name of node to remove
*/
void
remove_stonith_cleanup(const char *target)
{
GList *iter = stonith_cleanup_list;
while (iter != NULL) {
GList *tmp = iter;
char *iter_name = tmp->data;
iter = iter->next;
if (pcmk__str_eq(target, iter_name, pcmk__str_casei)) {
crm_trace("Removing %s from the cleanup list", iter_name);
stonith_cleanup_list = g_list_delete_link(stonith_cleanup_list, tmp);
free(iter_name);
}
}
}
/*!
* \internal
* \brief Purge all entries from the stonith cleanup list
*/
void
purge_stonith_cleanup(void)
{
if (stonith_cleanup_list) {
GList *iter = NULL;
for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) {
char *target = iter->data;
crm_info("Purging %s from stonith cleanup list", target);
free(target);
}
g_list_free(stonith_cleanup_list);
stonith_cleanup_list = NULL;
}
}
/*!
* \internal
* \brief Send stonith updates for all entries in cleanup list, then purge it
*/
void
execute_stonith_cleanup(void)
{
GList *iter;
for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) {
char *target = iter->data;
pcmk__node_status_t *target_node =
pcmk__get_node(0, target, NULL, pcmk__node_search_cluster_member);
const char *uuid = pcmk__cluster_get_xml_id(target_node);
crm_notice("Marking %s, target of a previous stonith action, as clean", target);
update_node_state_after_fencing(target, uuid);
free(target);
}
g_list_free(stonith_cleanup_list);
stonith_cleanup_list = NULL;
}
/* end stonith cleanup list functions */
/* stonith API client
*
* Functions that need to interact directly with the fencer via its API
*/
static stonith_t *stonith_api = NULL;
static mainloop_timer_t *controld_fencer_connect_timer = NULL;
static char *te_client_id = NULL;
static gboolean
fail_incompletable_stonith(pcmk__graph_t *graph)
{
GList *lpc = NULL;
const char *task = NULL;
xmlNode *last_action = NULL;
if (graph == NULL) {
return FALSE;
}
for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) {
GList *lpc2 = NULL;
pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) lpc->data;
if (pcmk_is_set(synapse->flags, pcmk__synapse_confirmed)) {
continue;
}
for (lpc2 = synapse->actions; lpc2 != NULL; lpc2 = lpc2->next) {
pcmk__graph_action_t *action = (pcmk__graph_action_t *) lpc2->data;
if ((action->type != pcmk__cluster_graph_action)
|| pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
continue;
}
task = crm_element_value(action->xml, PCMK_XA_OPERATION);
if (pcmk__str_eq(task, PCMK_ACTION_STONITH, pcmk__str_casei)) {
pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);
last_action = action->xml;
pcmk__update_graph(graph, action);
crm_notice("Failing action %d (%s): fencer terminated",
action->id, pcmk__xe_id(action->xml));
}
}
}
if (last_action != NULL) {
crm_warn("Fencer failure resulted in unrunnable actions");
abort_for_stonith_failure(pcmk__graph_restart, NULL, last_action);
return TRUE;
}
return FALSE;
}
static void
tengine_stonith_connection_destroy(stonith_t *st, stonith_event_t *e)
{
te_cleanup_stonith_history_sync(st, FALSE);
if (pcmk_is_set(controld_globals.fsa_input_register, R_ST_REQUIRED)) {
crm_err("Lost fencer connection (will attempt to reconnect)");
if (!mainloop_timer_running(controld_fencer_connect_timer)) {
mainloop_timer_start(controld_fencer_connect_timer);
}
} else {
crm_info("Disconnected from fencer");
}
if (stonith_api) {
/* the client API won't properly reconnect notifications
* if they are still in the table - so remove them
*/
if (stonith_api->state != stonith_disconnected) {
stonith_api->cmds->disconnect(st);
}
stonith_api->cmds->remove_notification(stonith_api, NULL);
}
if (AM_I_DC) {
fail_incompletable_stonith(controld_globals.transition_graph);
trigger_graph();
}
}
/*!
* \internal
* \brief Handle an event notification from the fencing API
*
* \param[in] st Fencing API connection (ignored)
* \param[in] event Fencing API event notification
*/
static void
handle_fence_notification(stonith_t *st, stonith_event_t *event)
{
bool succeeded = true;
const char *executioner = "the cluster";
const char *client = "a client";
const char *reason = NULL;
int exec_status;
if (te_client_id == NULL) {
te_client_id = crm_strdup_printf("%s.%lu", crm_system_name,
(unsigned long) getpid());
}
if (event == NULL) {
crm_err("Notify data not found");
return;
}
if (event->executioner != NULL) {
executioner = event->executioner;
}
if (event->client_origin != NULL) {
client = event->client_origin;
}
exec_status = stonith__event_execution_status(event);
if ((stonith__event_exit_status(event) != CRM_EX_OK)
|| (exec_status != PCMK_EXEC_DONE)) {
succeeded = false;
if (exec_status == PCMK_EXEC_DONE) {
exec_status = PCMK_EXEC_ERROR;
}
}
reason = stonith__event_exit_reason(event);
crmd_alert_fencing_op(event);
if (pcmk__str_eq(PCMK_ACTION_ON, event->action, pcmk__str_none)) {
// Unfencing doesn't need special handling, just a log message
if (succeeded) {
crm_notice("%s was unfenced by %s at the request of %s@%s",
event->target, executioner, client, event->origin);
} else {
crm_err("Unfencing of %s by %s failed (%s%s%s) with exit status %d",
event->target, executioner,
pcmk_exec_status_str(exec_status),
((reason == NULL)? "" : ": "),
((reason == NULL)? "" : reason),
stonith__event_exit_status(event));
}
return;
}
if (succeeded && controld_is_local_node(event->target)) {
/* We were notified of our own fencing. Most likely, either fencing was
* misconfigured, or fabric fencing that doesn't cut cluster
* communication is in use.
*
* Either way, shutting down the local host is a good idea, to require
* administrator intervention. Also, other nodes would otherwise likely
* set our status to lost because of the fencing callback and discard
* our subsequent election votes as "not part of our cluster".
*/
crm_crit("We were allegedly just fenced by %s for %s!",
executioner, event->origin); // Dumps blackbox if enabled
if (fence_reaction_panic) {
pcmk__panic("Notified of own fencing");
} else {
crm_exit(CRM_EX_FATAL);
}
return; // Should never get here
}
/* Update the count of fencing failures for this target, in case we become
* DC later. The current DC has already updated its fail count in
* tengine_stonith_callback().
*/
if (!AM_I_DC) {
if (succeeded) {
st_fail_count_reset(event->target);
} else {
st_fail_count_increment(event->target);
}
}
crm_notice("Peer %s was%s terminated (%s) by %s on behalf of %s@%s: "
"%s%s%s%s " QB_XS " event=%s",
event->target, (succeeded? "" : " not"),
event->action, executioner, client, event->origin,
(succeeded? "OK" : pcmk_exec_status_str(exec_status)),
((reason == NULL)? "" : " ("),
((reason == NULL)? "" : reason),
((reason == NULL)? "" : ")"),
event->id);
if (succeeded) {
const uint32_t flags = pcmk__node_search_any
|pcmk__node_search_cluster_cib;
pcmk__node_status_t *peer = pcmk__search_node_caches(0, event->target,
NULL, flags);
const char *uuid = NULL;
if (peer == NULL) {
return;
}
uuid = pcmk__cluster_get_xml_id(peer);
if (AM_I_DC) {
/* The DC always sends updates */
update_node_state_after_fencing(event->target, uuid);
/* @TODO Ideally, at this point, we'd check whether the fenced node
* hosted any guest nodes, and call remote_node_down() for them.
* Unfortunately, the controller doesn't have a simple, reliable way
* to map hosts to guests. It might be possible to track this in the
* peer cache via refresh_remote_nodes(). For now, we rely on the
* scheduler creating fence pseudo-events for the guests.
*/
if (!pcmk__str_eq(client, te_client_id, pcmk__str_casei)) {
/* Abort the current transition if it wasn't the cluster that
* initiated fencing.
*/
crm_info("External fencing operation from %s fenced %s",
client, event->target);
abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
"External Fencing Operation", NULL);
}
} else if (pcmk__str_eq(controld_globals.dc_name, event->target,
pcmk__str_null_matches|pcmk__str_casei)
&& !pcmk_is_set(peer->flags, pcmk__node_status_remote)) {
// Assume the target was our DC if we don't currently have one
if (controld_globals.dc_name != NULL) {
crm_notice("Fencing target %s was our DC", event->target);
} else {
crm_notice("Fencing target %s may have been our DC",
event->target);
}
/* Given the CIB resyncing that occurs around elections,
* have one node update the CIB now and, if the new DC is different,
* have them do so too after the election
*/
if (controld_is_local_node(event->executioner)) {
update_node_state_after_fencing(event->target, uuid);
}
add_stonith_cleanup(event->target);
}
/* If the target is a remote node, and we host its connection,
* immediately fail all monitors so it can be recovered quickly.
* The connection won't necessarily drop when a remote node is fenced,
* so the failure might not otherwise be detected until the next poke.
*/
if (pcmk_is_set(peer->flags, pcmk__node_status_remote)) {
remote_ra_fail(event->target);
}
crmd_peer_down(peer, TRUE);
}
}
/*!
* \brief Connect to fencer
*
* \param[in] user_data If NULL, retry failures now, otherwise retry in mainloop timer
*
* \return G_SOURCE_REMOVE on success, G_SOURCE_CONTINUE to retry
* \note If user_data is NULL, this will wait 2s between attempts, for up to
* 30 attempts, meaning the controller could be blocked as long as 58s.
*/
gboolean
controld_timer_fencer_connect(gpointer user_data)
{
int rc = pcmk_ok;
if (stonith_api == NULL) {
stonith_api = stonith_api_new();
if (stonith_api == NULL) {
crm_err("Could not connect to fencer: API memory allocation failed");
return G_SOURCE_REMOVE;
}
}
if (stonith_api->state != stonith_disconnected) {
crm_trace("Already connected to fencer, no need to retry");
return G_SOURCE_REMOVE;
}
if (user_data == NULL) {
// Blocking (retry failures now until successful)
rc = stonith_api_connect_retry(stonith_api, crm_system_name, 30);
if (rc != pcmk_ok) {
crm_err("Could not connect to fencer in 30 attempts: %s "
QB_XS " rc=%d", pcmk_strerror(rc), rc);
}
} else {
// Non-blocking (retry failures later in main loop)
rc = stonith_api->cmds->connect(stonith_api, crm_system_name, NULL);
if (controld_fencer_connect_timer == NULL) {
controld_fencer_connect_timer =
mainloop_timer_add("controld_fencer_connect", 1000,
TRUE, controld_timer_fencer_connect,
GINT_TO_POINTER(TRUE));
}
if (rc != pcmk_ok) {
if (pcmk_is_set(controld_globals.fsa_input_register,
R_ST_REQUIRED)) {
crm_notice("Fencer connection failed (will retry): %s "
QB_XS " rc=%d", pcmk_strerror(rc), rc);
if (!mainloop_timer_running(controld_fencer_connect_timer)) {
mainloop_timer_start(controld_fencer_connect_timer);
}
return G_SOURCE_CONTINUE;
} else {
crm_info("Fencer connection failed (ignoring because no longer required): %s "
QB_XS " rc=%d", pcmk_strerror(rc), rc);
}
return G_SOURCE_REMOVE;
}
}
if (rc == pcmk_ok) {
stonith_api_operations_t *cmds = stonith_api->cmds;
cmds->register_notification(stonith_api,
PCMK__VALUE_ST_NOTIFY_DISCONNECT,
tengine_stonith_connection_destroy);
cmds->register_notification(stonith_api, PCMK__VALUE_ST_NOTIFY_FENCE,
handle_fence_notification);
cmds->register_notification(stonith_api,
PCMK__VALUE_ST_NOTIFY_HISTORY_SYNCED,
tengine_stonith_history_synced);
te_trigger_stonith_history_sync(TRUE);
crm_notice("Fencer successfully connected");
}
return G_SOURCE_REMOVE;
}
void
controld_disconnect_fencer(bool destroy)
{
if (stonith_api) {
// Prevent fencer connection from coming up again
controld_clear_fsa_input_flags(R_ST_REQUIRED);
if (stonith_api->state != stonith_disconnected) {
stonith_api->cmds->disconnect(stonith_api);
}
stonith_api->cmds->remove_notification(stonith_api, NULL);
}
if (destroy) {
if (stonith_api) {
stonith_api->cmds->free(stonith_api);
stonith_api = NULL;
}
if (controld_fencer_connect_timer) {
mainloop_timer_del(controld_fencer_connect_timer);
controld_fencer_connect_timer = NULL;
}
if (te_client_id) {
free(te_client_id);
te_client_id = NULL;
}
}
}
static gboolean
do_stonith_history_sync(gpointer user_data)
{
if (stonith_api && (stonith_api->state != stonith_disconnected)) {
stonith_history_t *history = NULL;
te_cleanup_stonith_history_sync(stonith_api, FALSE);
stonith_api->cmds->history(stonith_api,
st_opt_sync_call | st_opt_broadcast,
NULL, &history, 5);
stonith_history_free(history);
return TRUE;
} else {
crm_info("Skip triggering stonith history-sync as stonith is disconnected");
return FALSE;
}
}
static void
tengine_stonith_callback(stonith_t *stonith, stonith_callback_data_t *data)
{
char *uuid = NULL;
int stonith_id = -1;
int transition_id = -1;
pcmk__graph_action_t *action = NULL;
const char *target = NULL;
if ((data == NULL) || (data->userdata == NULL)) {
crm_err("Ignoring fence operation %d result: "
"No transition key given (bug?)",
((data == NULL)? -1 : data->call_id));
return;
}
if (!AM_I_DC) {
const char *reason = stonith__exit_reason(data);
if (reason == NULL) {
reason = pcmk_exec_status_str(stonith__execution_status(data));
}
crm_notice("Result of fence operation %d: %d (%s) " QB_XS " key=%s",
data->call_id, stonith__exit_status(data), reason,
(const char *) data->userdata);
return;
}
CRM_CHECK(decode_transition_key(data->userdata, &uuid, &transition_id,
&stonith_id, NULL),
goto bail);
if (controld_globals.transition_graph->complete || (stonith_id < 0)
|| !pcmk__str_eq(uuid, controld_globals.te_uuid, pcmk__str_none)
|| (controld_globals.transition_graph->id != transition_id)) {
crm_info("Ignoring fence operation %d result: "
"Not from current transition " QB_XS
" complete=%s action=%d uuid=%s (vs %s) transition=%d (vs %d)",
data->call_id,
pcmk__btoa(controld_globals.transition_graph->complete),
stonith_id, uuid, controld_globals.te_uuid, transition_id,
controld_globals.transition_graph->id);
goto bail;
}
action = controld_get_action(stonith_id);
if (action == NULL) {
crm_err("Ignoring fence operation %d result: "
"Action %d not found in transition graph (bug?) "
QB_XS " uuid=%s transition=%d",
data->call_id, stonith_id, uuid, transition_id);
goto bail;
}
target = crm_element_value(action->xml, PCMK__META_ON_NODE);
if (target == NULL) {
crm_err("Ignoring fence operation %d result: No target given (bug?)",
data->call_id);
goto bail;
}
stop_te_timer(action);
if (stonith__exit_status(data) == CRM_EX_OK) {
const char *uuid = crm_element_value(action->xml,
PCMK__META_ON_NODE_UUID);
const char *op = crm_meta_value(action->params,
PCMK__META_STONITH_ACTION);
crm_info("Fence operation %d for %s succeeded", data->call_id, target);
if (!(pcmk_is_set(action->flags, pcmk__graph_action_confirmed))) {
te_action_confirmed(action, NULL);
if (pcmk__str_eq(PCMK_ACTION_ON, op, pcmk__str_casei)) {
const char *value = NULL;
char *now = pcmk__ttoa(time(NULL));
gboolean is_remote_node = FALSE;
/* This check is not 100% reliable, since this node is not
* guaranteed to have the remote node cached. However, it
* doesn't have to be reliable, since the attribute manager can
* learn a node's "remoteness" by other means sooner or later.
* This allows it to learn more quickly if this node does have
* the information.
*/
if (g_hash_table_lookup(pcmk__remote_peer_cache,
uuid) != NULL) {
is_remote_node = TRUE;
}
update_attrd(target, CRM_ATTR_UNFENCED, now, NULL,
is_remote_node);
free(now);
value = crm_meta_value(action->params, PCMK__META_DIGESTS_ALL);
update_attrd(target, CRM_ATTR_DIGESTS_ALL, value, NULL,
is_remote_node);
value = crm_meta_value(action->params,
PCMK__META_DIGESTS_SECURE);
update_attrd(target, CRM_ATTR_DIGESTS_SECURE, value, NULL,
is_remote_node);
} else if (!(pcmk_is_set(action->flags, pcmk__graph_action_sent_update))) {
update_node_state_after_fencing(target, uuid);
pcmk__set_graph_action_flags(action,
pcmk__graph_action_sent_update);
}
}
st_fail_count_reset(target);
} else {
enum pcmk__graph_next abort_action = pcmk__graph_restart;
int status = stonith__execution_status(data);
const char *reason = stonith__exit_reason(data);
if (reason == NULL) {
if (status == PCMK_EXEC_DONE) {
reason = "Agent returned error";
} else {
reason = pcmk_exec_status_str(status);
}
}
pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);
/* If no fence devices were available, there's no use in immediately
* checking again, so don't start a new transition in that case.
*/
if (status == PCMK_EXEC_NO_FENCE_DEVICE) {
crm_warn("Fence operation %d for %s failed: %s "
"(aborting transition and giving up for now)",
data->call_id, target, reason);
abort_action = pcmk__graph_wait;
} else {
crm_notice("Fence operation %d for %s failed: %s "
"(aborting transition)", data->call_id, target, reason);
}
/* Increment the fail count now, so abort_for_stonith_failure() can
* check it. Non-DC nodes will increment it in
* handle_fence_notification().
*/
st_fail_count_increment(target);
abort_for_stonith_failure(abort_action, target, NULL);
}
pcmk__update_graph(controld_globals.transition_graph, action);
trigger_graph();
bail:
free(data->userdata);
free(uuid);
return;
}
static int
fence_with_delay(const char *target, const char *type, int delay)
{
uint32_t options = st_opt_none; // Group of enum stonith_call_options
int timeout_sec = pcmk__timeout_ms2s(controld_globals.transition_graph->stonith_timeout);
if (crmd_join_phase_count(controld_join_confirmed) == 1) {
stonith__set_call_options(options, target, st_opt_allow_self_fencing);
}
return stonith_api->cmds->fence_with_delay(stonith_api, options, target,
type, timeout_sec, 0, delay);
}
/*!
* \internal
* \brief Execute a fencing action from a transition graph
*
* \param[in] graph Transition graph being executed (ignored)
* \param[in] action Fencing action to execute
*
* \return Standard Pacemaker return code
*/
int
controld_execute_fence_action(pcmk__graph_t *graph,
pcmk__graph_action_t *action)
{
int rc = 0;
const char *id = pcmk__xe_id(action->xml);
const char *uuid = crm_element_value(action->xml, PCMK__META_ON_NODE_UUID);
const char *target = crm_element_value(action->xml, PCMK__META_ON_NODE);
const char *type = crm_meta_value(action->params,
PCMK__META_STONITH_ACTION);
char *transition_key = NULL;
const char *priority_delay = NULL;
int delay_i = 0;
gboolean invalid_action = FALSE;
int stonith_timeout = pcmk__timeout_ms2s(controld_globals.transition_graph->stonith_timeout);
CRM_CHECK(id != NULL, invalid_action = TRUE);
CRM_CHECK(uuid != NULL, invalid_action = TRUE);
CRM_CHECK(type != NULL, invalid_action = TRUE);
CRM_CHECK(target != NULL, invalid_action = TRUE);
if (invalid_action) {
crm_log_xml_warn(action->xml, "BadAction");
return EPROTO;
}
priority_delay = crm_meta_value(action->params,
PCMK_OPT_PRIORITY_FENCING_DELAY);
crm_notice("Requesting fencing (%s) targeting node %s "
QB_XS " action=%s timeout=%i%s%s",
type, target, id, stonith_timeout,
priority_delay ? " priority_delay=" : "",
priority_delay ? priority_delay : "");
/* Passing NULL means block until we can connect... */
controld_timer_fencer_connect(NULL);
pcmk__scan_min_int(priority_delay, &delay_i, 0);
rc = fence_with_delay(target, type, delay_i);
transition_key = pcmk__transition_key(controld_globals.transition_graph->id,
action->id, 0,
controld_globals.te_uuid),
stonith_api->cmds->register_callback(stonith_api, rc,
(stonith_timeout
+ (delay_i > 0 ? delay_i : 0)),
st_opt_timeout_updates, transition_key,
"tengine_stonith_callback",
tengine_stonith_callback);
return pcmk_rc_ok;
}
bool
controld_verify_stonith_watchdog_timeout(const char *value)
{
long long st_timeout = (value != NULL)? crm_get_msec(value) : 0;
const char *our_nodename = controld_globals.cluster->priv->node_name;
if (st_timeout == 0
|| (stonith_api && (stonith_api->state != stonith_disconnected) &&
stonith__watchdog_fencing_enabled_for_node_api(stonith_api,
our_nodename))) {
return pcmk__valid_stonith_watchdog_timeout(value);
}
return true;
}
/* end stonith API client functions */
/*
* stonith history synchronization
*
* Each node's fencer keeps track of a cluster-wide fencing history. When a node
* joins or leaves, we need to synchronize the history across all nodes.
*/
static crm_trigger_t *stonith_history_sync_trigger = NULL;
static mainloop_timer_t *stonith_history_sync_timer_short = NULL;
static mainloop_timer_t *stonith_history_sync_timer_long = NULL;
void
te_cleanup_stonith_history_sync(stonith_t *st, bool free_timers)
{
if (free_timers) {
mainloop_timer_del(stonith_history_sync_timer_short);
stonith_history_sync_timer_short = NULL;
mainloop_timer_del(stonith_history_sync_timer_long);
stonith_history_sync_timer_long = NULL;
} else {
mainloop_timer_stop(stonith_history_sync_timer_short);
mainloop_timer_stop(stonith_history_sync_timer_long);
}
if (st) {
st->cmds->remove_notification(st, PCMK__VALUE_ST_NOTIFY_HISTORY_SYNCED);
}
}
static void
tengine_stonith_history_synced(stonith_t *st, stonith_event_t *st_event)
{
te_cleanup_stonith_history_sync(st, FALSE);
crm_debug("Fence-history synced - cancel all timers");
}
static gboolean
stonith_history_sync_set_trigger(gpointer user_data)
{
mainloop_set_trigger(stonith_history_sync_trigger);
return FALSE;
}
void
te_trigger_stonith_history_sync(bool long_timeout)
{
/* trigger a sync in 5s to give more nodes the
* chance to show up so that we don't create
* unnecessary stonith-history-sync traffic
*
* the long timeout of 30s is there as a fallback
* so that after a successful connection to fenced
* we will wait for 30s for the DC to trigger a
* history-sync
* if this doesn't happen we trigger a sync locally
* (e.g. fenced segfaults and is restarted by pacemakerd)
*/
/* as we are finally checking the stonith-connection
* in do_stonith_history_sync we should be fine
* leaving stonith_history_sync_time & stonith_history_sync_trigger
* around
*/
if (stonith_history_sync_trigger == NULL) {
stonith_history_sync_trigger =
mainloop_add_trigger(G_PRIORITY_LOW,
do_stonith_history_sync, NULL);
}
if (long_timeout) {
if(stonith_history_sync_timer_long == NULL) {
stonith_history_sync_timer_long =
mainloop_timer_add("history_sync_long", 30000,
FALSE, stonith_history_sync_set_trigger,
NULL);
}
crm_info("Fence history will be synchronized cluster-wide within 30 seconds");
mainloop_timer_start(stonith_history_sync_timer_long);
} else {
if(stonith_history_sync_timer_short == NULL) {
stonith_history_sync_timer_short =
mainloop_timer_add("history_sync_short", 5000,
FALSE, stonith_history_sync_set_trigger,
NULL);
}
crm_info("Fence history will be synchronized cluster-wide within 5 seconds");
mainloop_timer_start(stonith_history_sync_timer_short);
}
}
/* end stonith history synchronization functions */
diff --git a/daemons/controld/controld_join_dc.c b/daemons/controld/controld_join_dc.c
index a91fbfa3a7..8c494b97e0 100644
--- a/daemons/controld/controld_join_dc.c
+++ b/daemons/controld/controld_join_dc.c
@@ -1,1096 +1,1095 @@
/*
- * Copyright 2004-2024 the Pacemaker project contributors
+ * Copyright 2004-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <inttypes.h> // PRIu32
#include <stdbool.h> // bool, true, false
#include <stdio.h> // NULL
#include <stdlib.h> // free(), etc.
#include <glib.h> // gboolean, etc.
#include <libxml/tree.h> // xmlNode
#include <crm/crm.h>
#include <crm/common/xml.h>
#include <crm/cluster.h>
#include <pacemaker-controld.h>
static char *max_generation_from = NULL;
static xmlNodePtr max_generation_xml = NULL;
/*!
* \internal
* \brief Nodes from which a CIB sync has failed since the peer joined
*
* This table is of the form (<tt>node_name -> join_id</tt>). \p node_name is
* the name of a client node from which a CIB \p sync_from() call has failed in
* \p do_dc_join_finalize() since the client joined the cluster as a peer.
* \p join_id is the ID of the join round in which the \p sync_from() failed,
* and is intended for use in nack log messages.
*/
static GHashTable *failed_sync_nodes = NULL;
void finalize_join_for(gpointer key, gpointer value, gpointer user_data);
void finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data);
gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source);
/* Numeric counter used to identify join rounds (an unsigned int would be
* appropriate, except we get and set it in XML as int)
*/
static int current_join_id = 0;
/*!
* \internal
* \brief Get log-friendly string equivalent of a controller group join phase
*
* \param[in] phase Join phase
*
* \return Log-friendly string equivalent of \p phase
*/
static const char *
join_phase_text(enum controld_join_phase phase)
{
switch (phase) {
case controld_join_nack:
return "nack";
case controld_join_none:
return "none";
case controld_join_welcomed:
return "welcomed";
case controld_join_integrated:
return "integrated";
case controld_join_finalized:
return "finalized";
case controld_join_confirmed:
return "confirmed";
default:
return "invalid";
}
}
/*!
* \internal
* \brief Destroy the hash table containing failed sync nodes
*/
void
controld_destroy_failed_sync_table(void)
{
if (failed_sync_nodes != NULL) {
g_hash_table_destroy(failed_sync_nodes);
failed_sync_nodes = NULL;
}
}
/*!
* \internal
* \brief Remove a node from the failed sync nodes table if present
*
* \param[in] node_name Node name to remove
*/
void
controld_remove_failed_sync_node(const char *node_name)
{
if (failed_sync_nodes != NULL) {
g_hash_table_remove(failed_sync_nodes, (gchar *) node_name);
}
}
/*!
* \internal
* \brief Add to a hash table a node whose CIB failed to sync
*
* \param[in] node_name Name of node whose CIB failed to sync
* \param[in] join_id Join round when the failure occurred
*/
static void
record_failed_sync_node(const char *node_name, gint join_id)
{
if (failed_sync_nodes == NULL) {
failed_sync_nodes = pcmk__strikey_table(g_free, NULL);
}
/* If the node is already in the table then we failed to nack it during the
* filter offer step
*/
CRM_LOG_ASSERT(g_hash_table_insert(failed_sync_nodes, g_strdup(node_name),
GINT_TO_POINTER(join_id)));
}
/*!
* \internal
* \brief Look up a node name in the failed sync table
*
* \param[in] node_name Name of node to look up
* \param[out] join_id Where to store the join ID of when the sync failed
*
* \return Standard Pacemaker return code. Specifically, \p pcmk_rc_ok if the
* node name was found, or \p pcmk_rc_node_unknown otherwise.
* \note \p *join_id is set to -1 if the node is not found.
*/
static int
lookup_failed_sync_node(const char *node_name, gint *join_id)
{
*join_id = -1;
if (failed_sync_nodes != NULL) {
gpointer result = g_hash_table_lookup(failed_sync_nodes,
(gchar *) node_name);
if (result != NULL) {
*join_id = GPOINTER_TO_INT(result);
return pcmk_rc_ok;
}
}
return pcmk_rc_node_unknown;
}
void
crm_update_peer_join(const char *source, pcmk__node_status_t *node,
enum controld_join_phase phase)
{
enum controld_join_phase last = controld_get_join_phase(node);
CRM_CHECK(node != NULL, return);
/* Remote nodes do not participate in joins */
if (pcmk_is_set(node->flags, pcmk__node_status_remote)) {
return;
}
if (phase == last) {
crm_trace("Node %s join-%d phase is still %s "
QB_XS " nodeid=%" PRIu32 " source=%s",
node->name, current_join_id, join_phase_text(last),
node->cluster_layer_id, source);
return;
}
if ((phase <= controld_join_none) || (phase == (last + 1))) {
struct controld_node_status_data *data = NULL;
if (node->user_data == NULL) {
node->user_data =
pcmk__assert_alloc(1, sizeof(struct controld_node_status_data));
}
data = node->user_data;
data->join_phase = phase;
crm_trace("Node %s join-%d phase is now %s (was %s) "
QB_XS " nodeid=%" PRIu32 " source=%s",
node->name, current_join_id, join_phase_text(phase),
join_phase_text(last), node->cluster_layer_id,
source);
return;
}
crm_warn("Rejecting join-%d phase update for node %s because can't go from "
"%s to %s " QB_XS " nodeid=%" PRIu32 " source=%s",
current_join_id, node->name, join_phase_text(last),
join_phase_text(phase), node->cluster_layer_id, source);
}
static void
start_join_round(void)
{
GHashTableIter iter;
pcmk__node_status_t *peer = NULL;
crm_debug("Starting new join round join-%d", current_join_id);
g_hash_table_iter_init(&iter, pcmk__peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
crm_update_peer_join(__func__, peer, controld_join_none);
}
if (max_generation_from != NULL) {
free(max_generation_from);
max_generation_from = NULL;
}
if (max_generation_xml != NULL) {
pcmk__xml_free(max_generation_xml);
max_generation_xml = NULL;
}
controld_clear_fsa_input_flags(R_HAVE_CIB);
}
/*!
* \internal
* \brief Create a join message from the DC
*
* \param[in] join_op Join operation name
* \param[in] host_to Recipient of message
*/
static xmlNode *
create_dc_message(const char *join_op, const char *host_to)
{
xmlNode *msg = pcmk__new_request(pcmk_ipc_controld, CRM_SYSTEM_DC, host_to,
CRM_SYSTEM_CRMD, join_op, NULL);
/* Identify which election this is a part of */
crm_xml_add_int(msg, PCMK__XA_JOIN_ID, current_join_id);
/* Add a field specifying whether the DC is shutting down. This keeps the
* joining node from fencing the old DC if it becomes the new DC.
*/
pcmk__xe_set_bool_attr(msg, PCMK__XA_DC_LEAVING,
pcmk_is_set(controld_globals.fsa_input_register,
R_SHUTDOWN));
return msg;
}
static void
join_make_offer(gpointer key, gpointer value, gpointer user_data)
{
/* @TODO We don't use user_data except to distinguish one particular call
* from others. Make this clearer.
*/
xmlNode *offer = NULL;
pcmk__node_status_t *member = (pcmk__node_status_t *) value;
pcmk__assert(member != NULL);
if (!pcmk__cluster_is_node_active(member)) {
crm_info("Not making join-%d offer to inactive node %s",
current_join_id, pcmk__s(member->name, "with unknown name"));
if ((member->expected == NULL)
&& pcmk__str_eq(member->state, PCMK__VALUE_LOST, pcmk__str_none)) {
/* You would think this unsafe, but in fact this plus an
* active resource is what causes it to be fenced.
*
* Yes, this does mean that any node that dies at the same
* time as the old DC and is not running resource (still)
* won't be fenced.
*
* I'm not happy about this either.
*/
pcmk__update_peer_expected(__func__, member, CRMD_JOINSTATE_DOWN);
}
return;
}
if (member->name == NULL) {
crm_info("Not making join-%d offer to node uuid %s with unknown name",
current_join_id, member->xml_id);
return;
}
if (controld_globals.membership_id != controld_globals.peer_seq) {
controld_globals.membership_id = controld_globals.peer_seq;
crm_info("Making join-%d offers based on membership event %llu",
current_join_id, controld_globals.peer_seq);
}
if (user_data != NULL) {
enum controld_join_phase phase = controld_get_join_phase(member);
if (phase > controld_join_none) {
crm_info("Not making join-%d offer to already known node %s (%s)",
current_join_id, member->name, join_phase_text(phase));
return;
}
}
crm_update_peer_join(__func__, (pcmk__node_status_t*) member,
controld_join_none);
offer = create_dc_message(CRM_OP_JOIN_OFFER, member->name);
// Advertise our feature set so the joining node can bail if not compatible
crm_xml_add(offer, PCMK_XA_CRM_FEATURE_SET, CRM_FEATURE_SET);
crm_info("Sending join-%d offer to %s", current_join_id, member->name);
pcmk__cluster_send_message(member, pcmk_ipc_controld, offer);
pcmk__xml_free(offer);
crm_update_peer_join(__func__, member, controld_join_welcomed);
}
/* A_DC_JOIN_OFFER_ALL */
void
do_dc_join_offer_all(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
int count;
/* Reset everyone's status back to down or in_ccm in the CIB.
* Any nodes that are active in the CIB but not in the cluster membership
* will be seen as offline by the scheduler anyway.
*/
current_join_id++;
start_join_round();
update_dc(NULL);
if (cause == C_HA_MESSAGE && current_input == I_NODE_JOIN) {
crm_info("A new node joined the cluster");
}
g_hash_table_foreach(pcmk__peer_cache, join_make_offer, NULL);
count = crmd_join_phase_count(controld_join_welcomed);
crm_info("Waiting on join-%d requests from %d outstanding node%s",
current_join_id, count, pcmk__plural_s(count));
// Don't waste time by invoking the scheduler yet
}
/* A_DC_JOIN_OFFER_ONE */
void
do_dc_join_offer_one(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
pcmk__node_status_t *member = NULL;
ha_msg_input_t *welcome = NULL;
int count;
const char *join_to = NULL;
if (msg_data->data == NULL) {
crm_info("Making join-%d offers to any unconfirmed nodes "
"because an unknown node joined", current_join_id);
g_hash_table_foreach(pcmk__peer_cache, join_make_offer, &member);
check_join_state(cur_state, __func__);
return;
}
welcome = fsa_typed_data(fsa_dt_ha_msg);
if (welcome == NULL) {
// fsa_typed_data() already logged an error
return;
}
join_to = crm_element_value(welcome->msg, PCMK__XA_SRC);
if (join_to == NULL) {
crm_err("Can't make join-%d offer to unknown node", current_join_id);
return;
}
member = pcmk__get_node(0, join_to, NULL, pcmk__node_search_cluster_member);
/* It is possible that a node will have been sick or starting up when the
* original offer was made. However, it will either re-announce itself in
* due course, or we can re-store the original offer on the client.
*/
crm_update_peer_join(__func__, member, controld_join_none);
join_make_offer(NULL, member, NULL);
/* If the offer isn't to the local node, make an offer to the local node as
* well, to ensure the correct value for max_generation_from.
*/
if (!controld_is_local_node(join_to)) {
member = controld_get_local_node_status();
join_make_offer(NULL, member, NULL);
}
/* This was a genuine join request; cancel any existing transition and
* invoke the scheduler.
*/
abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart, "Node join",
NULL);
count = crmd_join_phase_count(controld_join_welcomed);
crm_info("Waiting on join-%d requests from %d outstanding node%s",
current_join_id, count, pcmk__plural_s(count));
// Don't waste time by invoking the scheduler yet
}
static int
compare_int_fields(xmlNode * left, xmlNode * right, const char *field)
{
const char *elem_l = crm_element_value(left, field);
const char *elem_r = crm_element_value(right, field);
long long int_elem_l;
long long int_elem_r;
int rc = pcmk_rc_ok;
rc = pcmk__scan_ll(elem_l, &int_elem_l, -1LL);
if (rc != pcmk_rc_ok) { // Shouldn't be possible
crm_warn("Comparing current CIB %s as -1 "
"because '%s' is not an integer", field, elem_l);
}
rc = pcmk__scan_ll(elem_r, &int_elem_r, -1LL);
if (rc != pcmk_rc_ok) { // Shouldn't be possible
crm_warn("Comparing joining node's CIB %s as -1 "
"because '%s' is not an integer", field, elem_r);
}
if (int_elem_l < int_elem_r) {
return -1;
} else if (int_elem_l > int_elem_r) {
return 1;
}
return 0;
}
/* A_DC_JOIN_PROCESS_REQ */
void
do_dc_join_filter_offer(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
xmlNode *generation = NULL;
int cmp = 0;
int join_id = -1;
int count = 0;
gint value = 0;
gboolean ack_nack_bool = TRUE;
ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg);
const char *join_from = crm_element_value(join_ack->msg, PCMK__XA_SRC);
const char *ref = crm_element_value(join_ack->msg, PCMK_XA_REFERENCE);
const char *join_version = crm_element_value(join_ack->msg,
PCMK_XA_CRM_FEATURE_SET);
pcmk__node_status_t *join_node = NULL;
if (join_from == NULL) {
crm_err("Ignoring invalid join request without node name");
return;
}
join_node = pcmk__get_node(0, join_from, NULL,
pcmk__node_search_cluster_member);
crm_element_value_int(join_ack->msg, PCMK__XA_JOIN_ID, &join_id);
if (join_id != current_join_id) {
crm_debug("Ignoring join-%d request from %s because we are on join-%d",
join_id, join_from, current_join_id);
check_join_state(cur_state, __func__);
return;
}
generation = join_ack->xml;
if (max_generation_xml != NULL && generation != NULL) {
int lpc = 0;
const char *attributes[] = {
PCMK_XA_ADMIN_EPOCH,
PCMK_XA_EPOCH,
PCMK_XA_NUM_UPDATES,
};
/* It's not obvious that join_ack->xml is the PCMK__XE_GENERATION_TUPLE
* element from the join client. The "if" guard is for clarity.
*/
if (pcmk__xe_is(generation, PCMK__XE_GENERATION_TUPLE)) {
for (lpc = 0; cmp == 0 && lpc < PCMK__NELEM(attributes); lpc++) {
cmp = compare_int_fields(max_generation_xml, generation,
attributes[lpc]);
}
} else { // Should always be PCMK__XE_GENERATION_TUPLE
CRM_LOG_ASSERT(false);
}
}
if (ref == NULL) {
ref = "none"; // for logging only
}
if (lookup_failed_sync_node(join_from, &value) == pcmk_rc_ok) {
crm_err("Rejecting join-%d request from node %s because we failed to "
"sync its CIB in join-%d " QB_XS " ref=%s",
join_id, join_from, value, ref);
ack_nack_bool = FALSE;
} else if (!pcmk__cluster_is_node_active(join_node)) {
if (match_down_event(join_from) != NULL) {
/* The join request was received after the node was fenced or
* otherwise shutdown in a way that we're aware of. No need to log
* an error in this rare occurrence; we know the client was recently
* shut down, and receiving a lingering in-flight request is not
* cause for alarm.
*/
crm_debug("Rejecting join-%d request from inactive node %s "
QB_XS " ref=%s", join_id, join_from, ref);
} else {
crm_err("Rejecting join-%d request from inactive node %s "
QB_XS " ref=%s", join_id, join_from, ref);
}
ack_nack_bool = FALSE;
} else if (generation == NULL) {
crm_err("Rejecting invalid join-%d request from node %s "
"missing CIB generation " QB_XS " ref=%s",
join_id, join_from, ref);
ack_nack_bool = FALSE;
} else if ((join_version == NULL)
|| !feature_set_compatible(CRM_FEATURE_SET, join_version)) {
crm_err("Rejecting join-%d request from node %s because feature set %s"
" is incompatible with ours (%s) " QB_XS " ref=%s",
join_id, join_from, (join_version? join_version : "pre-3.1.0"),
CRM_FEATURE_SET, ref);
ack_nack_bool = FALSE;
} else if (max_generation_xml == NULL) {
const char *validation = crm_element_value(generation,
PCMK_XA_VALIDATE_WITH);
if (pcmk__get_schema(validation) == NULL) {
crm_err("Rejecting join-%d request from %s (with first CIB "
"generation) due to %s schema version %s " QB_XS " ref=%s",
join_id, join_from,
((validation == NULL)? "missing" : "unknown"),
pcmk__s(validation, ""), ref);
ack_nack_bool = FALSE;
} else {
crm_debug("Accepting join-%d request from %s (with first CIB "
"generation) " QB_XS " ref=%s",
join_id, join_from, ref);
max_generation_xml = pcmk__xml_copy(NULL, generation);
pcmk__str_update(&max_generation_from, join_from);
}
} else if ((cmp < 0)
|| ((cmp == 0) && controld_is_local_node(join_from))) {
const char *validation = crm_element_value(generation,
PCMK_XA_VALIDATE_WITH);
if (pcmk__get_schema(validation) == NULL) {
crm_err("Rejecting join-%d request from %s (with better CIB "
"generation than current best from %s) due to %s "
"schema version %s " QB_XS " ref=%s",
join_id, join_from, max_generation_from,
((validation == NULL)? "missing" : "unknown"),
pcmk__s(validation, ""), ref);
ack_nack_bool = FALSE;
} else {
crm_debug("Accepting join-%d request from %s (with better CIB "
"generation than current best from %s) " QB_XS " ref=%s",
join_id, join_from, max_generation_from, ref);
crm_log_xml_debug(max_generation_xml, "Old max generation");
crm_log_xml_debug(generation, "New max generation");
pcmk__xml_free(max_generation_xml);
max_generation_xml = pcmk__xml_copy(NULL, join_ack->xml);
pcmk__str_update(&max_generation_from, join_from);
}
} else {
crm_debug("Accepting join-%d request from %s " QB_XS " ref=%s",
join_id, join_from, ref);
}
if (!ack_nack_bool) {
crm_update_peer_join(__func__, join_node, controld_join_nack);
pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_NACK);
} else {
crm_update_peer_join(__func__, join_node, controld_join_integrated);
pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_MEMBER);
}
count = crmd_join_phase_count(controld_join_integrated);
crm_debug("%d node%s currently integrated in join-%d",
count, pcmk__plural_s(count), join_id);
if (check_join_state(cur_state, __func__) == FALSE) {
// Don't waste time by invoking the scheduler yet
count = crmd_join_phase_count(controld_join_welcomed);
crm_debug("Waiting on join-%d requests from %d outstanding node%s",
join_id, count, pcmk__plural_s(count));
}
}
/* A_DC_JOIN_FINALIZE */
void
do_dc_join_finalize(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
char *sync_from = NULL;
int rc = pcmk_ok;
int count_welcomed = crmd_join_phase_count(controld_join_welcomed);
int count_finalizable = crmd_join_phase_count(controld_join_integrated)
+ crmd_join_phase_count(controld_join_nack);
/* This we can do straight away and avoid clients timing us out
* while we compute the latest CIB
*/
if (count_welcomed != 0) {
crm_debug("Waiting on join-%d requests from %d outstanding node%s "
"before finalizing join", current_join_id, count_welcomed,
pcmk__plural_s(count_welcomed));
crmd_join_phase_log(LOG_DEBUG);
/* crmd_fsa_stall(FALSE); Needed? */
return;
} else if (count_finalizable == 0) {
crm_debug("Finalization not needed for join-%d at the current time",
current_join_id);
crmd_join_phase_log(LOG_DEBUG);
check_join_state(controld_globals.fsa_state, __func__);
return;
}
controld_clear_fsa_input_flags(R_HAVE_CIB);
if ((max_generation_from == NULL)
|| controld_is_local_node(max_generation_from)) {
controld_set_fsa_input_flags(R_HAVE_CIB);
}
if (!controld_globals.transition_graph->complete) {
crm_warn("Delaying join-%d finalization while transition in progress",
current_join_id);
crmd_join_phase_log(LOG_DEBUG);
crmd_fsa_stall(FALSE);
return;
}
if (pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) {
// Send our CIB out to everyone
sync_from = pcmk__str_copy(controld_globals.cluster->priv->node_name);
} else {
// Ask for the agreed best CIB
sync_from = pcmk__str_copy(max_generation_from);
}
crm_notice("Finalizing join-%d for %d node%s (sync'ing CIB %s.%s.%s "
"with schema %s and feature set %s from %s)",
current_join_id, count_finalizable,
pcmk__plural_s(count_finalizable),
crm_element_value(max_generation_xml, PCMK_XA_ADMIN_EPOCH),
crm_element_value(max_generation_xml, PCMK_XA_EPOCH),
crm_element_value(max_generation_xml, PCMK_XA_NUM_UPDATES),
crm_element_value(max_generation_xml, PCMK_XA_VALIDATE_WITH),
crm_element_value(max_generation_xml, PCMK_XA_CRM_FEATURE_SET),
sync_from);
crmd_join_phase_log(LOG_DEBUG);
rc = controld_globals.cib_conn->cmds->sync_from(controld_globals.cib_conn,
sync_from, NULL, cib_none);
fsa_register_cib_callback(rc, sync_from, finalize_sync_callback);
}
void
free_max_generation(void)
{
free(max_generation_from);
max_generation_from = NULL;
pcmk__xml_free(max_generation_xml);
max_generation_xml = NULL;
}
void
finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
CRM_LOG_ASSERT(-EPERM != rc);
if (rc != pcmk_ok) {
const char *sync_from = (const char *) user_data;
do_crm_log(((rc == -pcmk_err_old_data)? LOG_WARNING : LOG_ERR),
"Could not sync CIB from %s in join-%d: %s",
sync_from, current_join_id, pcmk_strerror(rc));
if (rc != -pcmk_err_old_data) {
record_failed_sync_node(sync_from, current_join_id);
}
/* restart the whole join process */
register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION_DC, NULL, NULL,
__func__);
} else if (!AM_I_DC) {
crm_debug("Sync'ed CIB for join-%d but no longer DC", current_join_id);
} else if (controld_globals.fsa_state != S_FINALIZE_JOIN) {
crm_debug("Sync'ed CIB for join-%d but no longer in S_FINALIZE_JOIN "
"(%s)", current_join_id,
fsa_state2string(controld_globals.fsa_state));
} else {
controld_set_fsa_input_flags(R_HAVE_CIB);
/* make sure dc_uuid is re-set to us */
if (!check_join_state(controld_globals.fsa_state, __func__)) {
int count_finalizable = 0;
count_finalizable = crmd_join_phase_count(controld_join_integrated)
+ crmd_join_phase_count(controld_join_nack);
crm_debug("Notifying %d node%s of join-%d results",
count_finalizable, pcmk__plural_s(count_finalizable),
current_join_id);
g_hash_table_foreach(pcmk__peer_cache, finalize_join_for, NULL);
}
}
}
static void
join_node_state_commit_callback(xmlNode *msg, int call_id, int rc,
xmlNode *output, void *user_data)
{
const char *node = user_data;
if (rc != pcmk_ok) {
fsa_data_t *msg_data = NULL; // for register_fsa_error() macro
crm_crit("join-%d node history update (via CIB call %d) for node %s "
"failed: %s",
current_join_id, call_id, node, pcmk_strerror(rc));
crm_log_xml_debug(msg, "failed");
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
crm_debug("join-%d node history update (via CIB call %d) for node %s "
"complete",
current_join_id, call_id, node);
check_join_state(controld_globals.fsa_state, __func__);
}
/* A_DC_JOIN_PROCESS_ACK */
void
do_dc_join_ack(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
int join_id = -1;
ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg);
const char *op = crm_element_value(join_ack->msg, PCMK__XA_CRM_TASK);
char *join_from = crm_element_value_copy(join_ack->msg, PCMK__XA_SRC);
pcmk__node_status_t *peer = NULL;
enum controld_join_phase phase = controld_join_none;
- enum controld_section_e section = controld_section_lrm;
+ const bool unlocked_only = pcmk_is_set(controld_globals.flags,
+ controld_shutdown_lock_enabled);
char *xpath = NULL;
xmlNode *state = join_ack->xml;
xmlNode *execd_state = NULL;
cib_t *cib = controld_globals.cib_conn;
int rc = pcmk_ok;
// Sanity checks
if (join_from == NULL) {
crm_warn("Ignoring message received without node identification");
goto done;
}
if (op == NULL) {
crm_warn("Ignoring message received from %s without task", join_from);
goto done;
}
if (strcmp(op, CRM_OP_JOIN_CONFIRM)) {
crm_debug("Ignoring '%s' message from %s while waiting for '%s'",
op, join_from, CRM_OP_JOIN_CONFIRM);
goto done;
}
if (crm_element_value_int(join_ack->msg, PCMK__XA_JOIN_ID, &join_id) != 0) {
crm_warn("Ignoring join confirmation from %s without valid join ID",
join_from);
goto done;
}
peer = pcmk__get_node(0, join_from, NULL, pcmk__node_search_cluster_member);
phase = controld_get_join_phase(peer);
if (phase != controld_join_finalized) {
crm_info("Ignoring out-of-sequence join-%d confirmation from %s "
"(currently %s not %s)",
join_id, join_from, join_phase_text(phase),
join_phase_text(controld_join_finalized));
goto done;
}
if (join_id != current_join_id) {
crm_err("Rejecting join-%d confirmation from %s "
"because currently on join-%d",
join_id, join_from, current_join_id);
crm_update_peer_join(__func__, peer, controld_join_nack);
goto done;
}
crm_update_peer_join(__func__, peer, controld_join_confirmed);
/* Update CIB with node's current executor state. A new transition will be
* triggered later, when the CIB manager notifies us of the change.
*
* The delete and modify requests are part of an atomic transaction.
*/
rc = cib->cmds->init_transaction(cib);
if (rc != pcmk_ok) {
goto done;
}
// Delete relevant parts of node's current executor state from CIB
- if (pcmk_is_set(controld_globals.flags, controld_shutdown_lock_enabled)) {
- section = controld_section_lrm_unlocked;
- }
- controld_node_state_deletion_strings(join_from, section, &xpath, NULL);
+ controld_node_history_deletion_strings(join_from, unlocked_only, &xpath,
+ NULL);
rc = cib->cmds->remove(cib, xpath, NULL,
cib_xpath|cib_multiple|cib_transaction);
if (rc != pcmk_ok) {
goto done;
}
// Update CIB with node's latest known executor state
if (controld_is_local_node(join_from)) {
// Use the latest possible state if processing our own join ack
execd_state = controld_query_executor_state();
if (execd_state != NULL) {
crm_debug("Updating local node history for join-%d from query "
"result",
current_join_id);
state = execd_state;
} else {
crm_warn("Updating local node history from join-%d confirmation "
"because query failed",
current_join_id);
}
} else {
crm_debug("Updating node history for %s from join-%d confirmation",
join_from, current_join_id);
}
rc = cib->cmds->modify(cib, PCMK_XE_STATUS, state,
cib_can_create|cib_transaction);
pcmk__xml_free(execd_state);
if (rc != pcmk_ok) {
goto done;
}
// Commit the transaction
rc = cib->cmds->end_transaction(cib, true, cib_none);
fsa_register_cib_callback(rc, join_from, join_node_state_commit_callback);
if (rc > 0) {
// join_from will be freed after callback
join_from = NULL;
rc = pcmk_ok;
}
done:
if (rc != pcmk_ok) {
crm_crit("join-%d node history update for node %s failed: %s",
current_join_id, join_from, pcmk_strerror(rc));
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
free(join_from);
free(xpath);
}
void
finalize_join_for(gpointer key, gpointer value, gpointer user_data)
{
xmlNode *acknak = NULL;
xmlNode *tmp1 = NULL;
pcmk__node_status_t *join_node = value;
const char *join_to = join_node->name;
enum controld_join_phase phase = controld_get_join_phase(join_node);
bool integrated = false;
switch (phase) {
case controld_join_integrated:
integrated = true;
break;
case controld_join_nack:
break;
default:
crm_trace("Not updating non-integrated and non-nacked node %s (%s) "
"for join-%d",
join_to, join_phase_text(phase), current_join_id);
return;
}
/* Update the <node> element with the node's name and UUID, in case they
* weren't known before
*/
crm_trace("Updating node name and UUID in CIB for %s", join_to);
tmp1 = pcmk__xe_create(NULL, PCMK_XE_NODE);
crm_xml_add(tmp1, PCMK_XA_ID, pcmk__cluster_get_xml_id(join_node));
crm_xml_add(tmp1, PCMK_XA_UNAME, join_to);
fsa_cib_anon_update(PCMK_XE_NODES, tmp1);
pcmk__xml_free(tmp1);
join_node = pcmk__get_node(0, join_to, NULL,
pcmk__node_search_cluster_member);
if (!pcmk__cluster_is_node_active(join_node)) {
/*
* NACK'ing nodes that the membership layer doesn't know about yet
* simply creates more churn
*
* Better to leave them waiting and let the join restart when
* the new membership event comes in
*
* All other NACKs (due to versions etc) should still be processed
*/
pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_PENDING);
return;
}
// Acknowledge or nack node's join request
crm_debug("%sing join-%d request from %s",
integrated? "Acknowledg" : "Nack", current_join_id, join_to);
acknak = create_dc_message(CRM_OP_JOIN_ACKNAK, join_to);
pcmk__xe_set_bool_attr(acknak, CRM_OP_JOIN_ACKNAK, integrated);
if (integrated) {
// No change needed for a nacked node
crm_update_peer_join(__func__, join_node, controld_join_finalized);
pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_MEMBER);
/* Iterate through the remote peer cache and add information on which
* node hosts each to the ACK message. This keeps new controllers in
* sync with what has already happened.
*/
if (pcmk__cluster_num_remote_nodes() > 0) {
GHashTableIter iter;
pcmk__node_status_t *node = NULL;
xmlNode *remotes = pcmk__xe_create(acknak, PCMK_XE_NODES);
g_hash_table_iter_init(&iter, pcmk__remote_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
xmlNode *remote = NULL;
if (!node->conn_host) {
continue;
}
remote = pcmk__xe_create(remotes, PCMK_XE_NODE);
pcmk__xe_set_props(remote,
PCMK_XA_ID, node->name,
PCMK__XA_NODE_STATE, node->state,
PCMK__XA_CONNECTION_HOST, node->conn_host,
NULL);
}
}
}
pcmk__cluster_send_message(join_node, pcmk_ipc_controld, acknak);
pcmk__xml_free(acknak);
return;
}
gboolean
check_join_state(enum crmd_fsa_state cur_state, const char *source)
{
static unsigned long long highest_seq = 0;
if (controld_globals.membership_id != controld_globals.peer_seq) {
crm_debug("join-%d: Membership changed from %llu to %llu "
QB_XS " highest=%llu state=%s for=%s",
current_join_id, controld_globals.membership_id,
controld_globals.peer_seq, highest_seq,
fsa_state2string(cur_state), source);
if (highest_seq < controld_globals.peer_seq) {
/* Don't spam the FSA with duplicates */
highest_seq = controld_globals.peer_seq;
register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL);
}
} else if (cur_state == S_INTEGRATION) {
if (crmd_join_phase_count(controld_join_welcomed) == 0) {
int count = crmd_join_phase_count(controld_join_integrated);
crm_debug("join-%d: Integration of %d peer%s complete "
QB_XS " state=%s for=%s",
current_join_id, count, pcmk__plural_s(count),
fsa_state2string(cur_state), source);
register_fsa_input_before(C_FSA_INTERNAL, I_INTEGRATED, NULL);
return TRUE;
}
} else if (cur_state == S_FINALIZE_JOIN) {
if (!pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) {
crm_debug("join-%d: Delaying finalization until we have CIB "
QB_XS " state=%s for=%s",
current_join_id, fsa_state2string(cur_state), source);
return TRUE;
} else if (crmd_join_phase_count(controld_join_welcomed) != 0) {
int count = crmd_join_phase_count(controld_join_welcomed);
crm_debug("join-%d: Still waiting on %d welcomed node%s "
QB_XS " state=%s for=%s",
current_join_id, count, pcmk__plural_s(count),
fsa_state2string(cur_state), source);
crmd_join_phase_log(LOG_DEBUG);
} else if (crmd_join_phase_count(controld_join_integrated) != 0) {
int count = crmd_join_phase_count(controld_join_integrated);
crm_debug("join-%d: Still waiting on %d integrated node%s "
QB_XS " state=%s for=%s",
current_join_id, count, pcmk__plural_s(count),
fsa_state2string(cur_state), source);
crmd_join_phase_log(LOG_DEBUG);
} else if (crmd_join_phase_count(controld_join_finalized) != 0) {
int count = crmd_join_phase_count(controld_join_finalized);
crm_debug("join-%d: Still waiting on %d finalized node%s "
QB_XS " state=%s for=%s",
current_join_id, count, pcmk__plural_s(count),
fsa_state2string(cur_state), source);
crmd_join_phase_log(LOG_DEBUG);
} else {
crm_debug("join-%d: Complete " QB_XS " state=%s for=%s",
current_join_id, fsa_state2string(cur_state), source);
register_fsa_input_later(C_FSA_INTERNAL, I_FINALIZED, NULL);
return TRUE;
}
}
return FALSE;
}
void
do_dc_join_final(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
crm_debug("Ensuring DC, quorum and node attributes are up-to-date");
crm_update_quorum(pcmk__cluster_has_quorum(), TRUE);
}
int crmd_join_phase_count(enum controld_join_phase phase)
{
int count = 0;
pcmk__node_status_t *peer;
GHashTableIter iter;
g_hash_table_iter_init(&iter, pcmk__peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
if (controld_get_join_phase(peer) == phase) {
count++;
}
}
return count;
}
void crmd_join_phase_log(int level)
{
pcmk__node_status_t *peer;
GHashTableIter iter;
g_hash_table_iter_init(&iter, pcmk__peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
do_crm_log(level, "join-%d: %s=%s", current_join_id, peer->name,
join_phase_text(controld_get_join_phase(peer)));
}
}
diff --git a/daemons/controld/controld_remote_ra.c b/daemons/controld/controld_remote_ra.c
index 1cc4ae008b..1c52477689 100644
--- a/daemons/controld/controld_remote_ra.c
+++ b/daemons/controld/controld_remote_ra.c
@@ -1,1493 +1,1472 @@
/*
* Copyright 2013-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <crm/crm.h>
#include <crm/common/xml.h>
#include <crm/common/xml_internal.h>
#include <crm/lrmd.h>
#include <crm/lrmd_internal.h>
#include <crm/services.h>
#include <libxml/xpath.h> // xmlXPathObject, etc.
#include <pacemaker-controld.h>
#define REMOTE_LRMD_RA "remote"
/* The max start timeout before cmd retry */
#define MAX_START_TIMEOUT_MS 10000
#define cmd_set_flags(cmd, flags_to_set) do { \
(cmd)->status = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \
"Remote command", (cmd)->rsc_id, (cmd)->status, \
(flags_to_set), #flags_to_set); \
} while (0)
#define cmd_clear_flags(cmd, flags_to_clear) do { \
(cmd)->status = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, \
"Remote command", (cmd)->rsc_id, (cmd)->status, \
(flags_to_clear), #flags_to_clear); \
} while (0)
enum remote_cmd_status {
cmd_reported_success = (1 << 0),
cmd_cancel = (1 << 1),
};
typedef struct remote_ra_cmd_s {
/*! the local node the cmd is issued from */
char *owner;
/*! the remote node the cmd is executed on */
char *rsc_id;
/*! the action to execute */
char *action;
/*! some string the client wants us to give it back */
char *userdata;
/*! start delay in ms */
int start_delay;
/*! timer id used for start delay. */
int delay_id;
/*! timeout in ms for cmd */
int timeout;
/*! recurring interval in ms */
guint interval_ms;
/*! interval timer id */
int interval_id;
int monitor_timeout_id;
int takeover_timeout_id;
/*! action parameters */
lrmd_key_value_t *params;
pcmk__action_result_t result;
int call_id;
time_t start_time;
uint32_t status;
} remote_ra_cmd_t;
#define lrm_remote_set_flags(lrm_state, flags_to_set) do { \
lrm_state_t *lrm = (lrm_state); \
remote_ra_data_t *ra = lrm->remote_ra_data; \
ra->status = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, "Remote", \
lrm->node_name, ra->status, \
(flags_to_set), #flags_to_set); \
} while (0)
#define lrm_remote_clear_flags(lrm_state, flags_to_clear) do { \
lrm_state_t *lrm = (lrm_state); \
remote_ra_data_t *ra = lrm->remote_ra_data; \
ra->status = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, "Remote", \
lrm->node_name, ra->status, \
(flags_to_clear), #flags_to_clear); \
} while (0)
enum remote_status {
expect_takeover = (1 << 0),
takeover_complete = (1 << 1),
remote_active = (1 << 2),
/* Maintenance mode is difficult to determine from the controller's context,
* so we have it signalled back with the transition from the scheduler.
*/
remote_in_maint = (1 << 3),
/* Similar for whether we are controlling a guest node or remote node.
* Fortunately there is a meta-attribute in the transition already and
* as the situation doesn't change over time we can use the
* resource start for noting down the information for later use when
* the attributes aren't at hand.
*/
controlling_guest = (1 << 4),
};
typedef struct remote_ra_data_s {
crm_trigger_t *work;
remote_ra_cmd_t *cur_cmd;
GList *cmds;
GList *recurring_cmds;
uint32_t status;
} remote_ra_data_t;
static int handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms);
static void handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd);
static GList *fail_all_monitor_cmds(GList * list);
static void
free_cmd(gpointer user_data)
{
remote_ra_cmd_t *cmd = user_data;
if (!cmd) {
return;
}
if (cmd->delay_id) {
g_source_remove(cmd->delay_id);
}
if (cmd->interval_id) {
g_source_remove(cmd->interval_id);
}
if (cmd->monitor_timeout_id) {
g_source_remove(cmd->monitor_timeout_id);
}
if (cmd->takeover_timeout_id) {
g_source_remove(cmd->takeover_timeout_id);
}
free(cmd->owner);
free(cmd->rsc_id);
free(cmd->action);
free(cmd->userdata);
pcmk__reset_result(&(cmd->result));
lrmd_key_value_freeall(cmd->params);
free(cmd);
}
static int
generate_callid(void)
{
static int remote_ra_callid = 0;
remote_ra_callid++;
if (remote_ra_callid <= 0) {
remote_ra_callid = 1;
}
return remote_ra_callid;
}
static gboolean
recurring_helper(gpointer data)
{
remote_ra_cmd_t *cmd = data;
lrm_state_t *connection_rsc = NULL;
cmd->interval_id = 0;
connection_rsc = controld_get_executor_state(cmd->rsc_id, false);
if (connection_rsc && connection_rsc->remote_ra_data) {
remote_ra_data_t *ra_data = connection_rsc->remote_ra_data;
ra_data->recurring_cmds = g_list_remove(ra_data->recurring_cmds, cmd);
ra_data->cmds = g_list_append(ra_data->cmds, cmd);
mainloop_set_trigger(ra_data->work);
}
return FALSE;
}
static gboolean
start_delay_helper(gpointer data)
{
remote_ra_cmd_t *cmd = data;
lrm_state_t *connection_rsc = NULL;
cmd->delay_id = 0;
connection_rsc = controld_get_executor_state(cmd->rsc_id, false);
if (connection_rsc && connection_rsc->remote_ra_data) {
remote_ra_data_t *ra_data = connection_rsc->remote_ra_data;
mainloop_set_trigger(ra_data->work);
}
return FALSE;
}
static bool
should_purge_attributes(pcmk__node_status_t *node)
{
pcmk__node_status_t *conn_node = NULL;
lrm_state_t *connection_rsc = NULL;
if ((node->conn_host == NULL) || (node->name == NULL)) {
return true;
}
/* Get the node that was hosting the remote connection resource from the
* peer cache. That's the one we really care about here.
*/
conn_node = pcmk__get_node(0, node->conn_host, NULL,
pcmk__node_search_cluster_member);
if (conn_node == NULL) {
return true;
}
/* Check the uptime of connection_rsc. If it hasn't been running long
* enough, set purge=true. "Long enough" means it started running earlier
* than the timestamp when we noticed it went away in the first place.
*/
connection_rsc = controld_get_executor_state(node->name, false);
if (connection_rsc != NULL) {
lrmd_t *lrm = connection_rsc->conn;
time_t uptime = lrmd__uptime(lrm);
time_t now = time(NULL);
/* Add 20s of fuzziness to give corosync a while to notice the remote
* host is gone. On various error conditions (failure to get uptime,
* peer_lost isn't set) we default to purging.
*/
if (uptime > 0 &&
conn_node->peer_lost > 0 &&
uptime + 20 >= now - conn_node->peer_lost) {
return false;
}
}
return true;
}
-static enum controld_section_e
-section_to_delete(bool purge)
-{
- if (pcmk_is_set(controld_globals.flags, controld_shutdown_lock_enabled)) {
- if (purge) {
- return controld_section_all_unlocked;
- } else {
- return controld_section_lrm_unlocked;
- }
- } else {
- if (purge) {
- return controld_section_all;
- } else {
- return controld_section_lrm;
- }
- }
-}
-
static void
purge_remote_node_attrs(int call_opt, pcmk__node_status_t *node)
{
- bool purge = should_purge_attributes(node);
- enum controld_section_e section = section_to_delete(purge);
+ const bool unlocked_only = pcmk_is_set(controld_globals.flags,
+ controld_shutdown_lock_enabled);
- /* Purge node from attrd's memory */
- if (purge) {
- update_attrd_remote_node_removed(node->name, NULL);
+ // Purge node's transient attributes (from attribute manager and CIB)
+ if (should_purge_attributes(node)) {
+ controld_purge_node_attrs(node->name, true);
}
- controld_delete_node_state(node->name, section, call_opt);
+ controld_delete_node_history(node->name, unlocked_only, call_opt);
}
/*!
* \internal
* \brief Handle cluster communication related to pacemaker_remote node joining
*
* \param[in] node_name Name of newly integrated pacemaker_remote node
*/
static void
remote_node_up(const char *node_name)
{
int call_opt;
xmlNode *update, *state;
pcmk__node_status_t *node = NULL;
lrm_state_t *connection_rsc = NULL;
CRM_CHECK(node_name != NULL, return);
crm_info("Announcing Pacemaker Remote node %s", node_name);
call_opt = crmd_cib_smart_opt();
/* Delete node's CRM_OP_PROBED attribute. Deleting any attribute ensures
* that the attribute manager learns the node is remote. Deletion of this
* specfic attribute is a holdover from when it had special meaning.
*
* @COMPAT Find another way to tell attrd that the node is remote, without
* risking deletion or overwrite of an arbitrary attribute. Then work on
* deprecating CRM_OP_PROBED.
*/
update_attrd(node_name, CRM_OP_PROBED, NULL, NULL, TRUE);
/* Ensure node is in the remote peer cache with member status */
node = pcmk__cluster_lookup_remote_node(node_name);
CRM_CHECK((node != NULL) && (node->name != NULL), return);
purge_remote_node_attrs(call_opt, node);
pcmk__update_peer_state(__func__, node, PCMK_VALUE_MEMBER, 0);
/* Apply any start state that we were given from the environment on the
* remote node.
*/
connection_rsc = controld_get_executor_state(node->name, false);
if (connection_rsc != NULL) {
lrmd_t *lrm = connection_rsc->conn;
const char *start_state = lrmd__node_start_state(lrm);
if (start_state) {
set_join_state(start_state, node->name, node->xml_id, true);
}
}
/* pacemaker_remote nodes don't participate in the membership layer,
* so cluster nodes don't automatically get notified when they come and go.
* We send a cluster message to the DC, and update the CIB node state entry,
* so the DC will get it sooner (via message) or later (via CIB refresh),
* and any other interested parties can query the CIB.
*/
broadcast_remote_state_message(node_name, true);
update = pcmk__xe_create(NULL, PCMK_XE_STATUS);
state = create_node_state_update(node, controld_node_update_cluster, update,
__func__);
/* Clear the PCMK__XA_NODE_FENCED flag in the node state. If the node ever
* needs to be fenced, this flag will allow various actions to determine
* whether the fencing has happened yet.
*/
crm_xml_add(state, PCMK__XA_NODE_FENCED, "0");
/* TODO: If the remote connection drops, and this (async) CIB update either
* failed or has not yet completed, later actions could mistakenly think the
* node has already been fenced (if the PCMK__XA_NODE_FENCED attribute was
* previously set, because it won't have been cleared). This could prevent
* actual fencing or allow recurring monitor failures to be cleared too
* soon. Ideally, we wouldn't rely on the CIB for the fenced status.
*/
controld_update_cib(PCMK_XE_STATUS, update, call_opt, NULL);
pcmk__xml_free(update);
}
enum down_opts {
DOWN_KEEP_LRM,
DOWN_ERASE_LRM
};
/*!
* \internal
* \brief Handle cluster communication related to pacemaker_remote node leaving
*
* \param[in] node_name Name of lost node
* \param[in] opts Whether to keep or erase LRM history
*/
static void
remote_node_down(const char *node_name, const enum down_opts opts)
{
xmlNode *update;
int call_opt = crmd_cib_smart_opt();
pcmk__node_status_t *node = NULL;
- /* Purge node from attrd's memory */
- update_attrd_remote_node_removed(node_name, NULL);
+ // Purge node's transient attributes (from attribute manager and CIB)
+ controld_purge_node_attrs(node_name, true);
- /* Normally, only node attributes should be erased, and the resource history
- * should be kept until the node comes back up. However, after a successful
- * fence, we want to clear the history as well, so we don't think resources
- * are still running on the node.
+ /* Normally, the resource history should be kept until the node comes back
+ * up. However, after a successful fence, clear the history so we don't
+ * think resources are still running on the node.
*/
if (opts == DOWN_ERASE_LRM) {
- controld_delete_node_state(node_name, controld_section_all, call_opt);
- } else {
- controld_delete_node_state(node_name, controld_section_attrs, call_opt);
+ controld_delete_node_history(node_name, false, call_opt);
}
/* Ensure node is in the remote peer cache with lost state */
node = pcmk__cluster_lookup_remote_node(node_name);
CRM_CHECK(node != NULL, return);
pcmk__update_peer_state(__func__, node, PCMK__VALUE_LOST, 0);
/* Notify DC */
broadcast_remote_state_message(node_name, false);
/* Update CIB node state */
update = pcmk__xe_create(NULL, PCMK_XE_STATUS);
create_node_state_update(node, controld_node_update_cluster, update,
__func__);
controld_update_cib(PCMK_XE_STATUS, update, call_opt, NULL);
pcmk__xml_free(update);
}
/*!
* \internal
* \brief Handle effects of a remote RA command on node state
*
* \param[in] cmd Completed remote RA command
*/
static void
check_remote_node_state(const remote_ra_cmd_t *cmd)
{
/* Only successful actions can change node state */
if (!pcmk__result_ok(&(cmd->result))) {
return;
}
if (pcmk__str_eq(cmd->action, PCMK_ACTION_START, pcmk__str_casei)) {
remote_node_up(cmd->rsc_id);
} else if (pcmk__str_eq(cmd->action, PCMK_ACTION_MIGRATE_FROM,
pcmk__str_casei)) {
/* After a successful migration, we don't need to do remote_node_up()
* because the DC already knows the node is up, and we don't want to
* clear LRM history etc. We do need to add the remote node to this
* host's remote peer cache, because (unless it happens to be DC)
* it hasn't been tracking the remote node, and other code relies on
* the cache to distinguish remote nodes from unseen cluster nodes.
*/
pcmk__node_status_t *node =
pcmk__cluster_lookup_remote_node(cmd->rsc_id);
CRM_CHECK(node != NULL, return);
pcmk__update_peer_state(__func__, node, PCMK_VALUE_MEMBER, 0);
} else if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP, pcmk__str_casei)) {
lrm_state_t *lrm_state = controld_get_executor_state(cmd->rsc_id,
false);
remote_ra_data_t *ra_data = lrm_state? lrm_state->remote_ra_data : NULL;
if (ra_data) {
if (!pcmk_is_set(ra_data->status, takeover_complete)) {
/* Stop means down if we didn't successfully migrate elsewhere */
remote_node_down(cmd->rsc_id, DOWN_KEEP_LRM);
} else if (AM_I_DC == FALSE) {
/* Only the connection host and DC track node state,
* so if the connection migrated elsewhere and we aren't DC,
* un-cache the node, so we don't have stale info
*/
pcmk__cluster_forget_remote_node(cmd->rsc_id);
}
}
}
/* We don't do anything for successful monitors, which is correct for
* routine recurring monitors, and for monitors on nodes where the
* connection isn't supposed to be (the cluster will stop the connection in
* that case). However, if the initial probe finds the connection already
* active on the node where we want it, we probably should do
* remote_node_up(). Unfortunately, we can't distinguish that case here.
* Given that connections have to be initiated by the cluster, the chance of
* that should be close to zero.
*/
}
static void
report_remote_ra_result(remote_ra_cmd_t * cmd)
{
lrmd_event_data_t op = { 0, };
check_remote_node_state(cmd);
op.type = lrmd_event_exec_complete;
op.rsc_id = cmd->rsc_id;
op.op_type = cmd->action;
op.user_data = cmd->userdata;
op.timeout = cmd->timeout;
op.interval_ms = cmd->interval_ms;
op.t_run = cmd->start_time;
op.t_rcchange = cmd->start_time;
lrmd__set_result(&op, cmd->result.exit_status, cmd->result.execution_status,
cmd->result.exit_reason);
if (pcmk_is_set(cmd->status, cmd_reported_success) && !pcmk__result_ok(&(cmd->result))) {
op.t_rcchange = time(NULL);
/* This edge case will likely never ever occur, but if it does the
* result is that a failure will not be processed correctly. This is only
* remotely possible because we are able to detect a connection resource's tcp
* connection has failed at any moment after start has completed. The actual
* recurring operation is just a connectivity ping.
*
* basically, we are not guaranteed that the first successful monitor op and
* a subsequent failed monitor op will not occur in the same timestamp. We have to
* make it look like the operations occurred at separate times though. */
if (op.t_rcchange == op.t_run) {
op.t_rcchange++;
}
}
if (cmd->params) {
lrmd_key_value_t *tmp;
op.params = pcmk__strkey_table(free, free);
for (tmp = cmd->params; tmp; tmp = tmp->next) {
pcmk__insert_dup(op.params, tmp->key, tmp->value);
}
}
op.call_id = cmd->call_id;
op.remote_nodename = cmd->owner;
lrm_op_callback(&op);
if (op.params) {
g_hash_table_destroy(op.params);
}
lrmd__reset_result(&op);
}
/*!
* \internal
* \brief Return a remote command's remaining timeout in seconds
*
* \param[in] cmd Remote command to check
*
* \return Command's remaining timeout in seconds
*/
static int
remaining_timeout_sec(const remote_ra_cmd_t *cmd)
{
return pcmk__timeout_ms2s(cmd->timeout) - (time(NULL) - cmd->start_time);
}
static gboolean
retry_start_cmd_cb(gpointer data)
{
lrm_state_t *lrm_state = data;
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
remote_ra_cmd_t *cmd = NULL;
int rc = ETIME;
int remaining = 0;
if (!ra_data || !ra_data->cur_cmd) {
return FALSE;
}
cmd = ra_data->cur_cmd;
if (!pcmk__is_up_action(cmd->action)) {
return FALSE;
}
remaining = remaining_timeout_sec(cmd);
if (remaining > 0) {
rc = handle_remote_ra_start(lrm_state, cmd, remaining * 1000);
} else {
pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
PCMK_EXEC_TIMEOUT,
"Not enough time remains to retry remote connection");
}
if (rc != pcmk_rc_ok) {
report_remote_ra_result(cmd);
if (ra_data->cmds) {
mainloop_set_trigger(ra_data->work);
}
ra_data->cur_cmd = NULL;
free_cmd(cmd);
} else {
/* wait for connection event */
}
return FALSE;
}
static gboolean
connection_takeover_timeout_cb(gpointer data)
{
lrm_state_t *lrm_state = NULL;
remote_ra_cmd_t *cmd = data;
crm_info("takeover event timed out for node %s", cmd->rsc_id);
cmd->takeover_timeout_id = 0;
lrm_state = controld_get_executor_state(cmd->rsc_id, false);
handle_remote_ra_stop(lrm_state, cmd);
free_cmd(cmd);
return FALSE;
}
static gboolean
monitor_timeout_cb(gpointer data)
{
lrm_state_t *lrm_state = NULL;
remote_ra_cmd_t *cmd = data;
lrm_state = controld_get_executor_state(cmd->rsc_id, false);
crm_info("Timed out waiting for remote poke response from %s%s",
cmd->rsc_id, (lrm_state? "" : " (no LRM state)"));
cmd->monitor_timeout_id = 0;
pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_TIMEOUT,
"Remote executor did not respond");
if (lrm_state && lrm_state->remote_ra_data) {
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
if (ra_data->cur_cmd == cmd) {
ra_data->cur_cmd = NULL;
}
if (ra_data->cmds) {
mainloop_set_trigger(ra_data->work);
}
}
report_remote_ra_result(cmd);
free_cmd(cmd);
if(lrm_state) {
// @TODO Should we move this before reporting the result above?
lrm_state_disconnect(lrm_state);
}
return FALSE;
}
static void
synthesize_lrmd_success(lrm_state_t *lrm_state, const char *rsc_id, const char *op_type)
{
lrmd_event_data_t op = { 0, };
if (lrm_state == NULL) {
/* if lrm_state not given assume local */
lrm_state = controld_get_executor_state(NULL, false);
}
pcmk__assert(lrm_state != NULL);
op.type = lrmd_event_exec_complete;
op.rsc_id = rsc_id;
op.op_type = op_type;
op.t_run = time(NULL);
op.t_rcchange = op.t_run;
op.call_id = generate_callid();
lrmd__set_result(&op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
process_lrm_event(lrm_state, &op, NULL, NULL);
}
void
remote_lrm_op_callback(lrmd_event_data_t * op)
{
gboolean cmd_handled = FALSE;
lrm_state_t *lrm_state = NULL;
remote_ra_data_t *ra_data = NULL;
remote_ra_cmd_t *cmd = NULL;
CRM_CHECK((op != NULL) && (op->remote_nodename != NULL), return);
crm_debug("Processing '%s%s%s' event on remote connection to %s: %s "
"(%d) status=%s (%d)",
(op->op_type? op->op_type : ""), (op->op_type? " " : ""),
lrmd_event_type2str(op->type), op->remote_nodename,
crm_exit_str((crm_exit_t) op->rc), op->rc,
pcmk_exec_status_str(op->op_status), op->op_status);
lrm_state = controld_get_executor_state(op->remote_nodename, false);
if (!lrm_state || !lrm_state->remote_ra_data) {
crm_debug("No state information found for remote connection event");
return;
}
ra_data = lrm_state->remote_ra_data;
if (op->type == lrmd_event_new_client) {
// Another client has connected to the remote daemon
if (pcmk_is_set(ra_data->status, expect_takeover)) {
// Great, we knew this was coming
lrm_remote_clear_flags(lrm_state, expect_takeover);
lrm_remote_set_flags(lrm_state, takeover_complete);
} else {
crm_err("Disconnecting from Pacemaker Remote node %s due to "
"unexpected client takeover", op->remote_nodename);
/* In this case, lrmd_tls_connection_destroy() will be called under the control of mainloop. */
/* Do not free lrm_state->conn yet. */
/* It'll be freed in the following stop action. */
lrm_state_disconnect_only(lrm_state);
}
return;
}
/* filter all EXEC events up */
if (op->type == lrmd_event_exec_complete) {
if (pcmk_is_set(ra_data->status, takeover_complete)) {
crm_debug("ignoring event, this connection is taken over by another node");
} else {
lrm_op_callback(op);
}
return;
}
if ((op->type == lrmd_event_disconnect) && (ra_data->cur_cmd == NULL)) {
if (!pcmk_is_set(ra_data->status, remote_active)) {
crm_debug("Disconnection from Pacemaker Remote node %s complete",
lrm_state->node_name);
} else if (!remote_ra_is_in_maintenance(lrm_state)) {
crm_err("Lost connection to Pacemaker Remote node %s",
lrm_state->node_name);
ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds);
ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds);
} else {
crm_notice("Unmanaged Pacemaker Remote node %s disconnected",
lrm_state->node_name);
/* Do roughly what a 'stop' on the remote-resource would do */
handle_remote_ra_stop(lrm_state, NULL);
remote_node_down(lrm_state->node_name, DOWN_KEEP_LRM);
/* now fake the reply of a successful 'stop' */
synthesize_lrmd_success(NULL, lrm_state->node_name,
PCMK_ACTION_STOP);
}
return;
}
if (!ra_data->cur_cmd) {
crm_debug("no event to match");
return;
}
cmd = ra_data->cur_cmd;
/* Start actions and migrate from actions complete after connection
* comes back to us. */
if ((op->type == lrmd_event_connect) && pcmk__is_up_action(cmd->action)) {
if (op->connection_rc < 0) {
int remaining = remaining_timeout_sec(cmd);
if ((op->connection_rc == -ENOKEY)
|| (op->connection_rc == -EKEYREJECTED)) {
// Hard error, don't retry
pcmk__set_result(&(cmd->result), PCMK_OCF_INVALID_PARAM,
PCMK_EXEC_ERROR,
pcmk_strerror(op->connection_rc));
} else if (remaining > 3) {
crm_trace("Rescheduling start (%ds remains before timeout)",
remaining);
pcmk__create_timer(1000, retry_start_cmd_cb, lrm_state);
return;
} else {
crm_trace("Not enough time before timeout (%ds) "
"to reschedule start", remaining);
pcmk__format_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
PCMK_EXEC_TIMEOUT,
"%s without enough time to retry",
pcmk_strerror(op->connection_rc));
}
} else {
lrm_state_reset_tables(lrm_state, TRUE);
pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
lrm_remote_set_flags(lrm_state, remote_active);
}
crm_debug("Remote connection event matched %s action", cmd->action);
report_remote_ra_result(cmd);
cmd_handled = TRUE;
} else if ((op->type == lrmd_event_poke)
&& pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR,
pcmk__str_casei)) {
if (cmd->monitor_timeout_id) {
g_source_remove(cmd->monitor_timeout_id);
cmd->monitor_timeout_id = 0;
}
/* Only report success the first time, after that only worry about failures.
* For this function, if we get the poke pack, it is always a success. Pokes
* only fail if the send fails, or the response times out. */
if (!pcmk_is_set(cmd->status, cmd_reported_success)) {
pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
report_remote_ra_result(cmd);
cmd_set_flags(cmd, cmd_reported_success);
}
crm_debug("Remote poke event matched %s action", cmd->action);
/* success, keep rescheduling if interval is present. */
if (cmd->interval_ms && !pcmk_is_set(cmd->status, cmd_cancel)) {
ra_data->recurring_cmds = g_list_append(ra_data->recurring_cmds, cmd);
cmd->interval_id = pcmk__create_timer(cmd->interval_ms,
recurring_helper, cmd);
cmd = NULL; /* prevent free */
}
cmd_handled = TRUE;
} else if ((op->type == lrmd_event_disconnect)
&& pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR,
pcmk__str_casei)) {
if (pcmk_is_set(ra_data->status, remote_active) &&
!pcmk_is_set(cmd->status, cmd_cancel)) {
pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
PCMK_EXEC_ERROR,
"Remote connection unexpectedly dropped "
"during monitor");
report_remote_ra_result(cmd);
crm_err("Remote connection to %s unexpectedly dropped during monitor",
lrm_state->node_name);
}
cmd_handled = TRUE;
} else {
crm_debug("Event did not match %s action", ra_data->cur_cmd->action);
}
if (cmd_handled) {
ra_data->cur_cmd = NULL;
if (ra_data->cmds) {
mainloop_set_trigger(ra_data->work);
}
free_cmd(cmd);
}
}
static void
handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd)
{
remote_ra_data_t *ra_data = NULL;
pcmk__assert(lrm_state != NULL);
ra_data = lrm_state->remote_ra_data;
if (!pcmk_is_set(ra_data->status, takeover_complete)) {
/* delete pending ops when ever the remote connection is intentionally stopped */
g_hash_table_remove_all(lrm_state->active_ops);
} else {
/* we no longer hold the history if this connection has been migrated,
* however, we keep metadata cache for future use */
lrm_state_reset_tables(lrm_state, FALSE);
}
lrm_remote_clear_flags(lrm_state, remote_active);
lrm_state_disconnect(lrm_state);
if (ra_data->cmds) {
g_list_free_full(ra_data->cmds, free_cmd);
}
if (ra_data->recurring_cmds) {
g_list_free_full(ra_data->recurring_cmds, free_cmd);
}
ra_data->cmds = NULL;
ra_data->recurring_cmds = NULL;
ra_data->cur_cmd = NULL;
if (cmd) {
pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
report_remote_ra_result(cmd);
}
}
// \return Standard Pacemaker return code
static int
handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms)
{
const char *server = NULL;
lrmd_key_value_t *tmp = NULL;
int port = 0;
int timeout_used = timeout_ms > MAX_START_TIMEOUT_MS ? MAX_START_TIMEOUT_MS : timeout_ms;
int rc = pcmk_rc_ok;
for (tmp = cmd->params; tmp; tmp = tmp->next) {
if (pcmk__strcase_any_of(tmp->key,
PCMK_REMOTE_RA_ADDR, PCMK_REMOTE_RA_SERVER,
NULL)) {
server = tmp->value;
} else if (pcmk__str_eq(tmp->key, PCMK_REMOTE_RA_PORT,
pcmk__str_none)) {
port = atoi(tmp->value);
} else if (pcmk__str_eq(tmp->key, CRM_META "_" PCMK__META_CONTAINER,
pcmk__str_none)) {
lrm_remote_set_flags(lrm_state, controlling_guest);
}
}
rc = controld_connect_remote_executor(lrm_state, server, port,
timeout_used);
if (rc != pcmk_rc_ok) {
pcmk__format_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
PCMK_EXEC_ERROR,
"Could not connect to Pacemaker Remote node %s: %s",
lrm_state->node_name, pcmk_rc_str(rc));
}
return rc;
}
static gboolean
handle_remote_ra_exec(gpointer user_data)
{
int rc = 0;
lrm_state_t *lrm_state = user_data;
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
remote_ra_cmd_t *cmd;
GList *first = NULL;
if (ra_data->cur_cmd) {
/* still waiting on previous cmd */
return TRUE;
}
while (ra_data->cmds) {
first = ra_data->cmds;
cmd = first->data;
if (cmd->delay_id) {
/* still waiting for start delay timer to trip */
return TRUE;
}
ra_data->cmds = g_list_remove_link(ra_data->cmds, first);
g_list_free_1(first);
if (pcmk__str_any_of(cmd->action, PCMK_ACTION_START,
PCMK_ACTION_MIGRATE_FROM, NULL)) {
lrm_remote_clear_flags(lrm_state, expect_takeover | takeover_complete);
if (handle_remote_ra_start(lrm_state, cmd,
cmd->timeout) == pcmk_rc_ok) {
/* take care of this later when we get async connection result */
crm_debug("Initiated async remote connection, %s action will complete after connect event",
cmd->action);
ra_data->cur_cmd = cmd;
return TRUE;
}
report_remote_ra_result(cmd);
} else if (!strcmp(cmd->action, PCMK_ACTION_MONITOR)) {
if (lrm_state_is_connected(lrm_state) == TRUE) {
rc = lrm_state_poke_connection(lrm_state);
if (rc < 0) {
pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
PCMK_EXEC_ERROR, pcmk_strerror(rc));
}
} else {
rc = -1;
pcmk__set_result(&(cmd->result), PCMK_OCF_NOT_RUNNING,
PCMK_EXEC_DONE, "Remote connection inactive");
}
if (rc == 0) {
crm_debug("Poked Pacemaker Remote at node %s, waiting for async response",
cmd->rsc_id);
ra_data->cur_cmd = cmd;
cmd->monitor_timeout_id = pcmk__create_timer(cmd->timeout, monitor_timeout_cb, cmd);
return TRUE;
}
report_remote_ra_result(cmd);
} else if (!strcmp(cmd->action, PCMK_ACTION_STOP)) {
if (pcmk_is_set(ra_data->status, expect_takeover)) {
/* Briefly wait on stop for an expected takeover to occur. If
* the takeover does not occur during the wait, that's fine; it
* just means that the remote node's resource history will be
* cleared, which will require probing all resources on the
* remote node. If the takeover does occur successfully, then we
* can leave the status section intact.
*/
cmd->takeover_timeout_id = pcmk__create_timer((cmd->timeout/2),
connection_takeover_timeout_cb,
cmd);
ra_data->cur_cmd = cmd;
return TRUE;
}
handle_remote_ra_stop(lrm_state, cmd);
} else if (strcmp(cmd->action, PCMK_ACTION_MIGRATE_TO) == 0) {
lrm_remote_clear_flags(lrm_state, takeover_complete);
lrm_remote_set_flags(lrm_state, expect_takeover);
pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
report_remote_ra_result(cmd);
} else if (pcmk__str_any_of(cmd->action, PCMK_ACTION_RELOAD,
PCMK_ACTION_RELOAD_AGENT, NULL)) {
/* Currently the only reloadable parameter is
* PCMK_REMOTE_RA_RECONNECT_INTERVAL, which is only used by the
* scheduler via the CIB, so reloads are a no-op.
*
* @COMPAT DC <2.1.0: We only need to check for "reload" in case
* we're in a rolling upgrade with a DC scheduling "reload" instead
* of "reload-agent". An OCF 1.1 "reload" would be a no-op anyway,
* so this would work for that purpose as well.
*/
pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
report_remote_ra_result(cmd);
}
free_cmd(cmd);
}
return TRUE;
}
static void
remote_ra_data_init(lrm_state_t * lrm_state)
{
remote_ra_data_t *ra_data = NULL;
if (lrm_state->remote_ra_data) {
return;
}
ra_data = pcmk__assert_alloc(1, sizeof(remote_ra_data_t));
ra_data->work = mainloop_add_trigger(G_PRIORITY_HIGH, handle_remote_ra_exec, lrm_state);
lrm_state->remote_ra_data = ra_data;
}
void
remote_ra_cleanup(lrm_state_t * lrm_state)
{
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
if (!ra_data) {
return;
}
if (ra_data->cmds) {
g_list_free_full(ra_data->cmds, free_cmd);
}
if (ra_data->recurring_cmds) {
g_list_free_full(ra_data->recurring_cmds, free_cmd);
}
mainloop_destroy_trigger(ra_data->work);
free(ra_data);
lrm_state->remote_ra_data = NULL;
}
gboolean
is_remote_lrmd_ra(const char *agent, const char *provider, const char *id)
{
if (agent && provider && !strcmp(agent, REMOTE_LRMD_RA) && !strcmp(provider, "pacemaker")) {
return TRUE;
}
return (id != NULL) && (controld_get_executor_state(id, false) != NULL)
&& !controld_is_local_node(id);
}
lrmd_rsc_info_t *
remote_ra_get_rsc_info(lrm_state_t * lrm_state, const char *rsc_id)
{
lrmd_rsc_info_t *info = NULL;
CRM_CHECK(rsc_id != NULL, return NULL);
if (controld_get_executor_state(rsc_id, false) != NULL) {
info = pcmk__assert_alloc(1, sizeof(lrmd_rsc_info_t));
info->id = pcmk__str_copy(rsc_id);
info->type = pcmk__str_copy(REMOTE_LRMD_RA);
info->standard = pcmk__str_copy(PCMK_RESOURCE_CLASS_OCF);
info->provider = pcmk__str_copy("pacemaker");
}
return info;
}
static gboolean
is_remote_ra_supported_action(const char *action)
{
return pcmk__str_any_of(action,
PCMK_ACTION_START,
PCMK_ACTION_STOP,
PCMK_ACTION_MONITOR,
PCMK_ACTION_MIGRATE_TO,
PCMK_ACTION_MIGRATE_FROM,
PCMK_ACTION_RELOAD_AGENT,
PCMK_ACTION_RELOAD,
NULL);
}
static GList *
fail_all_monitor_cmds(GList * list)
{
GList *rm_list = NULL;
remote_ra_cmd_t *cmd = NULL;
GList *gIter = NULL;
for (gIter = list; gIter != NULL; gIter = gIter->next) {
cmd = gIter->data;
if ((cmd->interval_ms > 0)
&& pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR,
pcmk__str_casei)) {
rm_list = g_list_append(rm_list, cmd);
}
}
for (gIter = rm_list; gIter != NULL; gIter = gIter->next) {
cmd = gIter->data;
pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
PCMK_EXEC_ERROR, "Lost connection to remote executor");
crm_trace("Pre-emptively failing %s %s (interval=%u, %s)",
cmd->action, cmd->rsc_id, cmd->interval_ms, cmd->userdata);
report_remote_ra_result(cmd);
list = g_list_remove(list, cmd);
free_cmd(cmd);
}
/* frees only the list data, not the cmds */
g_list_free(rm_list);
return list;
}
static GList *
remove_cmd(GList * list, const char *action, guint interval_ms)
{
remote_ra_cmd_t *cmd = NULL;
GList *gIter = NULL;
for (gIter = list; gIter != NULL; gIter = gIter->next) {
cmd = gIter->data;
if ((cmd->interval_ms == interval_ms)
&& pcmk__str_eq(cmd->action, action, pcmk__str_casei)) {
break;
}
cmd = NULL;
}
if (cmd) {
list = g_list_remove(list, cmd);
free_cmd(cmd);
}
return list;
}
int
remote_ra_cancel(lrm_state_t *lrm_state, const char *rsc_id,
const char *action, guint interval_ms)
{
lrm_state_t *connection_rsc = NULL;
remote_ra_data_t *ra_data = NULL;
CRM_CHECK(rsc_id != NULL, return -EINVAL);
connection_rsc = controld_get_executor_state(rsc_id, false);
if (!connection_rsc || !connection_rsc->remote_ra_data) {
return -EINVAL;
}
ra_data = connection_rsc->remote_ra_data;
ra_data->cmds = remove_cmd(ra_data->cmds, action, interval_ms);
ra_data->recurring_cmds = remove_cmd(ra_data->recurring_cmds, action,
interval_ms);
if (ra_data->cur_cmd &&
(ra_data->cur_cmd->interval_ms == interval_ms) &&
(pcmk__str_eq(ra_data->cur_cmd->action, action, pcmk__str_casei))) {
cmd_set_flags(ra_data->cur_cmd, cmd_cancel);
}
return 0;
}
static remote_ra_cmd_t *
handle_dup_monitor(remote_ra_data_t *ra_data, guint interval_ms,
const char *userdata)
{
GList *gIter = NULL;
remote_ra_cmd_t *cmd = NULL;
/* there are 3 places a potential duplicate monitor operation
* could exist.
* 1. recurring_cmds list. where the op is waiting for its next interval
* 2. cmds list, where the op is queued to get executed immediately
* 3. cur_cmd, which means the monitor op is in flight right now.
*/
if (interval_ms == 0) {
return NULL;
}
if (ra_data->cur_cmd &&
!pcmk_is_set(ra_data->cur_cmd->status, cmd_cancel) &&
(ra_data->cur_cmd->interval_ms == interval_ms)
&& pcmk__str_eq(ra_data->cur_cmd->action, PCMK_ACTION_MONITOR,
pcmk__str_casei)) {
cmd = ra_data->cur_cmd;
goto handle_dup;
}
for (gIter = ra_data->recurring_cmds; gIter != NULL; gIter = gIter->next) {
cmd = gIter->data;
if ((cmd->interval_ms == interval_ms)
&& pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR,
pcmk__str_casei)) {
goto handle_dup;
}
}
for (gIter = ra_data->cmds; gIter != NULL; gIter = gIter->next) {
cmd = gIter->data;
if ((cmd->interval_ms == interval_ms)
&& pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR,
pcmk__str_casei)) {
goto handle_dup;
}
}
return NULL;
handle_dup:
crm_trace("merging duplicate monitor cmd " PCMK__OP_FMT,
cmd->rsc_id, PCMK_ACTION_MONITOR, interval_ms);
/* update the userdata */
if (userdata) {
free(cmd->userdata);
cmd->userdata = pcmk__str_copy(userdata);
}
/* if we've already reported success, generate a new call id */
if (pcmk_is_set(cmd->status, cmd_reported_success)) {
cmd->start_time = time(NULL);
cmd->call_id = generate_callid();
cmd_clear_flags(cmd, cmd_reported_success);
}
/* if we have an interval_id set, that means we are in the process of
* waiting for this cmd's next interval. instead of waiting, cancel
* the timer and execute the action immediately */
if (cmd->interval_id) {
g_source_remove(cmd->interval_id);
cmd->interval_id = 0;
recurring_helper(cmd);
}
return cmd;
}
/*!
* \internal
* \brief Execute an action using the (internal) ocf:pacemaker:remote agent
*
* \param[in] lrm_state Executor state object for remote connection
* \param[in] rsc_id Connection resource ID
* \param[in] action Action to execute
* \param[in] userdata String to copy and pass to execution callback
* \param[in] interval_ms Action interval (in milliseconds)
* \param[in] timeout_ms Action timeout (in milliseconds)
* \param[in] start_delay_ms Delay (in milliseconds) before executing action
* \param[in,out] params Connection resource parameters
* \param[out] call_id Where to store call ID on success
*
* \return Standard Pacemaker return code
* \note This takes ownership of \p params, which should not be used or freed
* after calling this function.
*/
int
controld_execute_remote_agent(const lrm_state_t *lrm_state, const char *rsc_id,
const char *action, const char *userdata,
guint interval_ms, int timeout_ms,
int start_delay_ms, lrmd_key_value_t *params,
int *call_id)
{
lrm_state_t *connection_rsc = NULL;
remote_ra_cmd_t *cmd = NULL;
remote_ra_data_t *ra_data = NULL;
*call_id = 0;
CRM_CHECK((lrm_state != NULL) && (rsc_id != NULL) && (action != NULL)
&& (userdata != NULL) && (call_id != NULL),
lrmd_key_value_freeall(params); return EINVAL);
if (!is_remote_ra_supported_action(action)) {
lrmd_key_value_freeall(params);
return EOPNOTSUPP;
}
connection_rsc = controld_get_executor_state(rsc_id, false);
if (connection_rsc == NULL) {
lrmd_key_value_freeall(params);
return ENOTCONN;
}
remote_ra_data_init(connection_rsc);
ra_data = connection_rsc->remote_ra_data;
cmd = handle_dup_monitor(ra_data, interval_ms, userdata);
if (cmd) {
*call_id = cmd->call_id;
lrmd_key_value_freeall(params);
return pcmk_rc_ok;
}
cmd = pcmk__assert_alloc(1, sizeof(remote_ra_cmd_t));
cmd->owner = pcmk__str_copy(lrm_state->node_name);
cmd->rsc_id = pcmk__str_copy(rsc_id);
cmd->action = pcmk__str_copy(action);
cmd->userdata = pcmk__str_copy(userdata);
cmd->interval_ms = interval_ms;
cmd->timeout = timeout_ms;
cmd->start_delay = start_delay_ms;
cmd->params = params;
cmd->start_time = time(NULL);
cmd->call_id = generate_callid();
if (cmd->start_delay) {
cmd->delay_id = pcmk__create_timer(cmd->start_delay, start_delay_helper, cmd);
}
ra_data->cmds = g_list_append(ra_data->cmds, cmd);
mainloop_set_trigger(ra_data->work);
*call_id = cmd->call_id;
return pcmk_rc_ok;
}
/*!
* \internal
* \brief Immediately fail all monitors of a remote node, if proxied here
*
* \param[in] node_name Name of pacemaker_remote node
*/
void
remote_ra_fail(const char *node_name)
{
lrm_state_t *lrm_state = NULL;
CRM_CHECK(node_name != NULL, return);
lrm_state = controld_get_executor_state(node_name, false);
if (lrm_state && lrm_state_is_connected(lrm_state)) {
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
crm_info("Failing monitors on Pacemaker Remote node %s", node_name);
ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds);
ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds);
}
}
/* A guest node fencing implied by host fencing looks like:
*
* <pseudo_event id="103" operation="stonith" operation_key="stonith-lxc1-off"
* on_node="lxc1" on_node_uuid="lxc1">
* <attributes CRM_meta_on_node="lxc1" CRM_meta_on_node_uuid="lxc1"
* CRM_meta_stonith_action="off" crm_feature_set="3.0.12"/>
* <downed>
* <node id="lxc1"/>
* </downed>
* </pseudo_event>
*/
#define XPATH_PSEUDO_FENCE "/" PCMK__XE_PSEUDO_EVENT \
"[@" PCMK_XA_OPERATION "='stonith']/" PCMK__XE_DOWNED "/" PCMK_XE_NODE
/*!
* \internal
* \brief Check a pseudo-action for Pacemaker Remote node side effects
*
* \param[in,out] xml XML of pseudo-action to check
*/
void
remote_ra_process_pseudo(xmlNode *xml)
{
xmlXPathObject *search = pcmk__xpath_search(xml->doc, XPATH_PSEUDO_FENCE);
if (pcmk__xpath_num_results(search) == 1) {
xmlNode *result = pcmk__xpath_result(search, 0);
/* Normally, we handle the necessary side effects of a guest node stop
* action when reporting the remote agent's result. However, if the stop
* is implied due to fencing, it will be a fencing pseudo-event, and
* there won't be a result to report. Handle that case here.
*
* This will result in a duplicate call to remote_node_down() if the
* guest stop was real instead of implied, but that shouldn't hurt.
*
* There is still one corner case that isn't handled: if a guest node
* isn't running any resources when its host is fenced, it will appear
* to be cleanly stopped, so there will be no pseudo-fence, and our
* peer cache state will be incorrect unless and until the guest is
* recovered.
*/
if (result) {
const char *remote = pcmk__xe_id(result);
if (remote) {
remote_node_down(remote, DOWN_ERASE_LRM);
}
}
}
xmlXPathFreeObject(search);
}
static void
remote_ra_maintenance(lrm_state_t * lrm_state, gboolean maintenance)
{
xmlNode *update, *state;
int call_opt;
pcmk__node_status_t *node = NULL;
call_opt = crmd_cib_smart_opt();
node = pcmk__cluster_lookup_remote_node(lrm_state->node_name);
CRM_CHECK(node != NULL, return);
update = pcmk__xe_create(NULL, PCMK_XE_STATUS);
state = create_node_state_update(node, controld_node_update_none, update,
__func__);
crm_xml_add(state, PCMK__XA_NODE_IN_MAINTENANCE, (maintenance? "1" : "0"));
if (controld_update_cib(PCMK_XE_STATUS, update, call_opt,
NULL) == pcmk_rc_ok) {
/* TODO: still not 100% sure that async update will succeed ... */
if (maintenance) {
lrm_remote_set_flags(lrm_state, remote_in_maint);
} else {
lrm_remote_clear_flags(lrm_state, remote_in_maint);
}
}
pcmk__xml_free(update);
}
#define XPATH_PSEUDO_MAINTENANCE "//" PCMK__XE_PSEUDO_EVENT \
"[@" PCMK_XA_OPERATION "='" PCMK_ACTION_MAINTENANCE_NODES "']/" \
PCMK__XE_MAINTENANCE
/*!
* \internal
* \brief Check a pseudo-action holding updates for maintenance state
*
* \param[in,out] xml XML of pseudo-action to check
*/
void
remote_ra_process_maintenance_nodes(xmlNode *xml)
{
xmlXPathObject *search = pcmk__xpath_search(xml->doc,
XPATH_PSEUDO_MAINTENANCE);
if (pcmk__xpath_num_results(search) == 1) {
xmlNode *node;
int cnt = 0, cnt_remote = 0;
for (node = pcmk__xe_first_child(pcmk__xpath_result(search, 0),
PCMK_XE_NODE, NULL, NULL);
node != NULL; node = pcmk__xe_next(node, PCMK_XE_NODE)) {
lrm_state_t *lrm_state = NULL;
const char *id = pcmk__xe_id(node);
cnt++;
if (id == NULL) {
continue; // Shouldn't be possible
}
lrm_state = controld_get_executor_state(id, false);
if (lrm_state && lrm_state->remote_ra_data &&
pcmk_is_set(((remote_ra_data_t *) lrm_state->remote_ra_data)->status, remote_active)) {
const char *in_maint_s = NULL;
int in_maint;
cnt_remote++;
in_maint_s = crm_element_value(node,
PCMK__XA_NODE_IN_MAINTENANCE);
pcmk__scan_min_int(in_maint_s, &in_maint, 0);
remote_ra_maintenance(lrm_state, in_maint);
}
}
crm_trace("Action holds %d nodes (%d remotes found) adjusting "
PCMK_OPT_MAINTENANCE_MODE,
cnt, cnt_remote);
}
xmlXPathFreeObject(search);
}
gboolean
remote_ra_is_in_maintenance(lrm_state_t * lrm_state)
{
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
return pcmk_is_set(ra_data->status, remote_in_maint);
}
gboolean
remote_ra_controlling_guest(lrm_state_t * lrm_state)
{
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
return pcmk_is_set(ra_data->status, controlling_guest);
}
diff --git a/daemons/controld/controld_utils.h b/daemons/controld/controld_utils.h
index e6338882e7..262e0d1f39 100644
--- a/daemons/controld/controld_utils.h
+++ b/daemons/controld/controld_utils.h
@@ -1,87 +1,87 @@
/*
* Copyright 2004-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef CRMD_UTILS__H
#define CRMD_UTILS__H
#include <stdint.h> // UINT32_C(), uint32_t
#include <glib.h> // gboolean
#include <libxml/tree.h> // xmlNode
#include <crm/crm.h>
#include <crm/cluster.h> // enum controld_join_phase
#include <crm/cluster/internal.h> // pcmk__node_status_t
#include <crm/common/xml.h>
# define FAKE_TE_ID "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"
//! Flags determining how the controller updates node information in the CIB
enum controld_node_update_flags {
//! This flag has no effect
controld_node_update_none = UINT32_C(0),
//! Skip loading the node list from the cluster layer
controld_node_update_quick = (UINT32_C(1) << 0),
//! Update \c PCMK__XA_IN_CCM with the time the node became a cluster member
controld_node_update_cluster = (UINT32_C(1) << 1),
//! Update \c PCMK_XA_CRMD with the time the node joined the CPG
controld_node_update_peer = (UINT32_C(1) << 2),
//! Update \c PCMK__XA_JOIN with the node's join state
controld_node_update_join = (UINT32_C(1) << 3),
//! Update \c PCMK_XA_EXPECTED with the node's expected join state
controld_node_update_expected = (UINT32_C(1) << 4),
//! Convenience alias to update all of the attributes mentioned above
controld_node_update_all = controld_node_update_cluster
|controld_node_update_peer
|controld_node_update_join
|controld_node_update_expected,
};
crm_exit_t crmd_exit(crm_exit_t exit_code);
_Noreturn void crmd_fast_exit(crm_exit_t exit_code);
void controld_shutdown_schedulerd_ipc(void);
void controld_stop_sched_timer(void);
void controld_free_sched_timer(void);
void controld_expect_sched_reply(char *ref);
void fsa_dump_actions(uint64_t action, const char *text);
void fsa_dump_inputs(int log_level, const char *text, long long input_register);
gboolean update_dc(xmlNode * msg);
void crm_update_peer_join(const char *source, pcmk__node_status_t *node,
enum controld_join_phase phase);
xmlNode *create_node_state_update(pcmk__node_status_t *node, uint32_t flags,
xmlNode *parent, const char *source);
void populate_cib_nodes(uint32_t flags, const char *source);
void crm_update_quorum(gboolean quorum, gboolean force_update);
void controld_close_attrd_ipc(void);
void update_attrd(const char *host, const char *name, const char *value, const char *user_name, gboolean is_remote_node);
void update_attrd_list(GList *attrs, uint32_t opts);
-void update_attrd_remote_node_removed(const char *host, const char *user_name);
+void controld_purge_node_attrs(const char *node_name, bool from_cache);
void update_attrd_clear_failures(const char *host, const char *rsc,
const char *op, const char *interval_spec,
gboolean is_remote_node);
int crmd_join_phase_count(enum controld_join_phase phase);
void crmd_join_phase_log(int level);
void crmd_peer_down(pcmk__node_status_t *peer, bool full);
bool feature_set_compatible(const char *dc_version, const char *join_version);
bool controld_is_local_node(const char *name);
pcmk__node_status_t *controld_get_local_node_status(void);
const char *get_node_id(xmlNode *lrm_rsc_op);
#endif

File Metadata

Mime Type
text/x-diff
Expires
Mon, Apr 21, 2:50 PM (1 d, 1 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1664933
Default Alt Text
(400 KB)

Event Timeline