diff --git a/daemons/attrd/attrd_cib.c b/daemons/attrd/attrd_cib.c index 2de37a7cb6..95c9e1366f 100644 --- a/daemons/attrd/attrd_cib.c +++ b/daemons/attrd/attrd_cib.c @@ -1,679 +1,685 @@ /* * Copyright 2013-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include // PRIu32 #include #include #include #include #include #include #include #include #include "pacemaker-attrd.h" static int last_cib_op_done = 0; +static void write_attribute(attribute_t *a, bool ignore_delay); + static void attrd_cib_destroy_cb(gpointer user_data) { cib_t *cib = user_data; cib->cmds->signoff(cib); if (attrd_shutting_down(false)) { crm_info("Disconnected from the CIB manager"); } else { // @TODO This should trigger a reconnect, not a shutdown crm_crit("Lost connection to the CIB manager, shutting down"); attrd_exit_status = CRM_EX_DISCONNECT; attrd_shutdown(0); } } static void attrd_cib_updated_cb(const char *event, xmlNode *msg) { const xmlNode *patchset = NULL; const char *client_name = NULL; if (attrd_shutting_down(true)) { return; } if (cib__get_notify_patchset(msg, &patchset) != pcmk_rc_ok) { return; } if (cib__element_in_patchset(patchset, XML_CIB_TAG_ALERTS)) { mainloop_set_trigger(attrd_config_read); } if (!attrd_election_won()) { // Don't write attributes if we're not the writer return; } client_name = crm_element_value(msg, F_CIB_CLIENTNAME); if (!cib__client_triggers_refresh(client_name)) { // The CIB is still accurate return; } if (cib__element_in_patchset(patchset, XML_CIB_TAG_NODES) || cib__element_in_patchset(patchset, XML_CIB_TAG_STATUS)) { /* An unsafe client modified the nodes or status section. Write * transient attributes to ensure they're up-to-date in the CIB. */ if (client_name == NULL) { client_name = crm_element_value(msg, F_CIB_CLIENTID); } crm_notice("Updating all attributes after %s event triggered by %s", event, pcmk__s(client_name, "(unidentified client)")); attrd_write_attributes(attrd_write_all); } } int attrd_cib_connect(int max_retry) { static int attempts = 0; int rc = -ENOTCONN; the_cib = cib_new(); if (the_cib == NULL) { return -ENOTCONN; } do { if (attempts > 0) { sleep(attempts); } attempts++; crm_debug("Connection attempt %d to the CIB manager", attempts); rc = the_cib->cmds->signon(the_cib, T_ATTRD, cib_command); } while ((rc != pcmk_ok) && (attempts < max_retry)); if (rc != pcmk_ok) { crm_err("Connection to the CIB manager failed: %s " CRM_XS " rc=%d", pcmk_strerror(rc), rc); goto cleanup; } crm_debug("Connected to the CIB manager after %d attempts", attempts); rc = the_cib->cmds->set_connection_dnotify(the_cib, attrd_cib_destroy_cb); if (rc != pcmk_ok) { crm_err("Could not set disconnection callback"); goto cleanup; } rc = the_cib->cmds->add_notify_callback(the_cib, T_CIB_DIFF_NOTIFY, attrd_cib_updated_cb); if (rc != pcmk_ok) { crm_err("Could not set CIB notification callback"); goto cleanup; } return pcmk_ok; cleanup: cib__clean_up_connection(&the_cib); return -ENOTCONN; } void attrd_cib_disconnect(void) { CRM_CHECK(the_cib != NULL, return); the_cib->cmds->del_notify_callback(the_cib, T_CIB_DIFF_NOTIFY, attrd_cib_updated_cb); cib__clean_up_connection(&the_cib); } static void attrd_erase_cb(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { do_crm_log_unlikely(((rc != pcmk_ok)? LOG_NOTICE : LOG_DEBUG), "Cleared transient attributes: %s " CRM_XS " xpath=%s rc=%d", pcmk_strerror(rc), (char *) user_data, rc); } #define XPATH_TRANSIENT "//node_state[@uname='%s']/" XML_TAG_TRANSIENT_NODEATTRS /*! * \internal * \brief Wipe all transient attributes for this node from the CIB * * Clear any previous transient node attributes from the CIB. This is * normally done by the DC's controller when this node leaves the cluster, but * this handles the case where the node restarted so quickly that the * cluster layer didn't notice. * * \todo If pacemaker-attrd respawns after crashing (see PCMK_respawned), * ideally we'd skip this and sync our attributes from the writer. * However, currently we reject any values for us that the writer has, in * attrd_peer_update(). */ static void attrd_erase_attrs(void) { int call_id = 0; char *xpath = crm_strdup_printf(XPATH_TRANSIENT, attrd_cluster->uname); crm_info("Clearing transient attributes from CIB " CRM_XS " xpath=%s", xpath); call_id = the_cib->cmds->remove(the_cib, xpath, NULL, cib_xpath); the_cib->cmds->register_callback_full(the_cib, call_id, 120, FALSE, xpath, "attrd_erase_cb", attrd_erase_cb, free); } /*! * \internal * \brief Prepare the CIB after cluster is connected */ void attrd_cib_init(void) { // We have no attribute values in memory, wipe the CIB to match attrd_erase_attrs(); // Set a trigger for reading the CIB (for the alerts section) attrd_config_read = mainloop_add_trigger(G_PRIORITY_HIGH, attrd_read_options, NULL); // Always read the CIB at start-up mainloop_set_trigger(attrd_config_read); } static gboolean attribute_timer_cb(gpointer data) { attribute_t *a = data; crm_trace("Dampen interval expired for %s", a->id); attrd_write_or_elect_attribute(a); return FALSE; } static void attrd_cib_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { int level = LOG_ERR; GHashTableIter iter; const char *peer = NULL; attribute_value_t *v = NULL; char *name = user_data; attribute_t *a = g_hash_table_lookup(attributes, name); if(a == NULL) { crm_info("Attribute %s no longer exists", name); return; } a->update = 0; if (rc == pcmk_ok && call_id < 0) { rc = call_id; } switch (rc) { case pcmk_ok: level = LOG_INFO; last_cib_op_done = call_id; if (a->timer && !a->timeout_ms) { // Remove temporary dampening for failed writes mainloop_timer_del(a->timer); a->timer = NULL; } break; case -pcmk_err_diff_failed: /* When an attr changes while the CIB is syncing */ case -ETIME: /* When an attr changes while there is a DC election */ case -ENXIO: /* When an attr changes while the CIB is syncing a * newer config from a node that just came up */ level = LOG_WARNING; break; } do_crm_log(level, "CIB update %d result for %s: %s " CRM_XS " rc=%d", call_id, a->id, pcmk_strerror(rc), rc); g_hash_table_iter_init(&iter, a->values); while (g_hash_table_iter_next(&iter, (gpointer *) & peer, (gpointer *) & v)) { do_crm_log(level, "* %s[%s]=%s", a->id, peer, v->requested); free(v->requested); v->requested = NULL; if (rc != pcmk_ok) { a->changed = true; /* Attempt write out again */ } } if (a->changed && attrd_election_won()) { if (rc == pcmk_ok) { /* We deferred a write of a new update because this update was in * progress. Write out the new value without additional delay. */ - attrd_write_attribute(a, false); + write_attribute(a, false); /* We're re-attempting a write because the original failed; delay * the next attempt so we don't potentially flood the CIB manager * and logs with a zillion attempts per second. * * @TODO We could elect a new writer instead. However, we'd have to * somehow downgrade our vote, and we'd still need something like this * if all peers similarly fail to write this attribute (which may * indicate a corrupted attribute entry rather than a CIB issue). */ } else if (a->timer) { // Attribute has a dampening value, so use that as delay if (!mainloop_timer_running(a->timer)) { crm_trace("Delayed re-attempted write for %s by %s", name, pcmk__readable_interval(a->timeout_ms)); mainloop_timer_start(a->timer); } } else { /* Set a temporary dampening of 2 seconds (timer will continue * to exist until the attribute's dampening gets set or the * write succeeds). */ a->timer = attrd_add_timer(a->id, 2000, a); mainloop_timer_start(a->timer); } } } /*! * \internal * \brief Add a set-attribute update request to the current CIB transaction * * \param[in] attr Attribute to update * \param[in] attr_id ID of attribute to update * \param[in] node_id ID of node for which to update attribute value * \param[in] set_id ID of attribute set * \param[in] value New value for attribute * * \return Standard Pacemaker return code */ static int add_set_attr_update(const attribute_t *attr, const char *attr_id, const char *node_id, const char *set_id, const char *value) { xmlNode *update = create_xml_node(NULL, XML_CIB_TAG_STATE); xmlNode *child = update; int rc = ENOMEM; if (child == NULL) { goto done; } crm_xml_add(child, XML_ATTR_ID, node_id); child = create_xml_node(child, XML_TAG_TRANSIENT_NODEATTRS); if (child == NULL) { goto done; } crm_xml_add(child, XML_ATTR_ID, node_id); child = create_xml_node(child, attr->set_type); if (child == NULL) { goto done; } crm_xml_add(child, XML_ATTR_ID, set_id); child = create_xml_node(child, XML_CIB_TAG_NVPAIR); if (child == NULL) { goto done; } crm_xml_add(child, XML_ATTR_ID, attr_id); crm_xml_add(child, XML_NVPAIR_ATTR_NAME, attr->id); crm_xml_add(child, XML_NVPAIR_ATTR_VALUE, value); rc = the_cib->cmds->modify(the_cib, XML_CIB_TAG_STATUS, update, cib_can_create|cib_transaction); rc = pcmk_legacy2rc(rc); done: free_xml(update); return rc; } /*! * \internal * \brief Add an unset-attribute update request to the current CIB transaction * * \param[in] attr Attribute to update * \param[in] attr_id ID of attribute to update * \param[in] node_id ID of node for which to update attribute value * \param[in] set_id ID of attribute set * * \return Standard Pacemaker return code */ static int add_unset_attr_update(const attribute_t *attr, const char *attr_id, const char *node_id, const char *set_id) { char *xpath = crm_strdup_printf("/" XML_CIB_TAG_STATUS "/" XML_CIB_TAG_STATE "[@" XML_ATTR_ID "='%s']" "/" XML_TAG_TRANSIENT_NODEATTRS "[@" XML_ATTR_ID "='%s']" "/%s[@" XML_ATTR_ID "='%s']" "/" XML_CIB_TAG_NVPAIR "[@" XML_ATTR_ID "='%s' " "and @" XML_NVPAIR_ATTR_NAME "='%s']", node_id, node_id, attr->set_type, set_id, attr_id, attr->id); int rc = the_cib->cmds->remove(the_cib, xpath, NULL, cib_xpath|cib_transaction); free(xpath); return pcmk_legacy2rc(rc); } /*! * \internal * \brief Add an attribute update request to the current CIB transaction * * \param[in] attr Attribute to update * \param[in] value New value for attribute * \param[in] node_id ID of node for which to update attribute value * * \return Standard Pacemaker return code */ static int add_attr_update(const attribute_t *attr, const char *value, const char *node_id) { char *set_id = NULL; char *attr_id = NULL; int rc = pcmk_rc_ok; if (attr->set_id != NULL) { pcmk__str_update(&set_id, attr->set_id); } else { set_id = crm_strdup_printf("%s-%s", XML_CIB_TAG_STATUS, node_id); } crm_xml_sanitize_id(set_id); if (attr->uuid != NULL) { pcmk__str_update(&attr_id, attr->uuid); } else { attr_id = crm_strdup_printf("%s-%s", set_id, attr->id); } crm_xml_sanitize_id(attr_id); if (value != NULL) { rc = add_set_attr_update(attr, attr_id, node_id, set_id, value); } else { rc = add_unset_attr_update(attr, attr_id, node_id, set_id); } free(set_id); free(attr_id); return rc; } static void send_alert_attributes_value(attribute_t *a, GHashTable *t) { int rc = 0; attribute_value_t *at = NULL; GHashTableIter vIter; g_hash_table_iter_init(&vIter, t); while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & at)) { rc = attrd_send_attribute_alert(at->nodename, at->nodeid, a->id, at->current); crm_trace("Sent alerts for %s[%s]=%s: nodeid=%d rc=%d", a->id, at->nodename, at->current, at->nodeid, rc); } } static void set_alert_attribute_value(GHashTable *t, attribute_value_t *v) { attribute_value_t *a_v = NULL; a_v = calloc(1, sizeof(attribute_value_t)); CRM_ASSERT(a_v != NULL); a_v->nodeid = v->nodeid; a_v->nodename = strdup(v->nodename); pcmk__str_update(&a_v->current, v->current); g_hash_table_replace(t, a_v->nodename, a_v); } mainloop_timer_t * attrd_add_timer(const char *id, int timeout_ms, attribute_t *attr) { return mainloop_timer_add(id, timeout_ms, FALSE, attribute_timer_cb, attr); } -void -attrd_write_attribute(attribute_t *a, bool ignore_delay) +/*! + * \internal + * \brief Write an attribute's values to the CIB if appropriate + * + * \param[in,out] a Attribute to write + * \param[in] ignore_delay If true, write attribute now regardless of any + * configured delay + */ +static void +write_attribute(attribute_t *a, bool ignore_delay) { int private_updates = 0, cib_updates = 0; attribute_value_t *v = NULL; GHashTableIter iter; GHashTable *alert_attribute_value = NULL; int rc = pcmk_ok; if (a == NULL) { return; } /* If this attribute will be written to the CIB ... */ if (!stand_alone && !a->is_private) { /* Defer the write if now's not a good time */ if (a->update && (a->update < last_cib_op_done)) { - crm_info("Write out of '%s' continuing: update %d considered lost", a->id, a->update); + crm_info("Write out of '%s' continuing: update %d considered lost", + a->id, a->update); a->update = 0; // Don't log this message again } else if (a->update) { - crm_info("Write out of '%s' delayed: update %d in progress", a->id, a->update); + crm_info("Write out of '%s' delayed: update %d in progress", + a->id, a->update); goto done; } else if (mainloop_timer_running(a->timer)) { if (ignore_delay) { - /* 'refresh' forces a write of the current value of all attributes - * Cancel any existing timers, we're writing it NOW - */ mainloop_timer_stop(a->timer); - crm_debug("Write out of '%s': timer is running but ignore delay", a->id); + crm_debug("Overriding '%s' write delay", a->id); } else { - crm_info("Write out of '%s' delayed: timer is running", a->id); + crm_info("Delaying write of '%s'", a->id); goto done; } } // Initiate a transaction for all the peer value updates CRM_CHECK(the_cib != NULL, goto done); the_cib->cmds->set_user(the_cib, a->user); rc = the_cib->cmds->init_transaction(the_cib); if (rc != pcmk_ok) { crm_err("Failed to write %s (id %s, set %s): Could not initiate " "CIB transaction", a->id, pcmk__s(a->uuid, "n/a"), pcmk__s(a->set_id, "n/a")); goto done; } } /* Attribute will be written shortly, so clear changed flag */ a->changed = false; /* We will check all peers' uuids shortly, so initialize this to false */ a->unknown_peer_uuids = false; /* Attribute will be written shortly, so clear forced write flag */ a->force_write = FALSE; /* Make the table for the attribute trap */ - alert_attribute_value = pcmk__strikey_table(NULL, attrd_free_attribute_value); + alert_attribute_value = pcmk__strikey_table(NULL, + attrd_free_attribute_value); /* Iterate over each peer value of this attribute */ g_hash_table_iter_init(&iter, a->values); - while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & v)) { - crm_node_t *peer = crm_get_peer_full(v->nodeid, v->nodename, CRM_GET_PEER_ANY); + while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &v)) { + crm_node_t *peer = crm_get_peer_full(v->nodeid, v->nodename, + CRM_GET_PEER_ANY); /* If the value's peer info does not correspond to a peer, ignore it */ if (peer == NULL) { crm_notice("Cannot update %s[%s]=%s because peer not known", a->id, v->nodename, v->current); continue; } /* If we're just learning the peer's node id, remember it */ if (peer->id && (v->nodeid == 0)) { crm_trace("Learned ID %u for node %s", peer->id, v->nodename); v->nodeid = peer->id; } /* If this is a private attribute, no update needs to be sent */ if (stand_alone || a->is_private) { private_updates++; continue; } /* If the peer is found, but its uuid is unknown, defer write */ if (peer->uuid == NULL) { a->unknown_peer_uuids = true; crm_notice("Cannot update %s[%s]=%s because peer UUID not known " "(will retry if learned)", a->id, v->nodename, v->current); continue; } // Update this value as part of the CIB transaction we're building rc = add_attr_update(a, v->current, peer->uuid); if (rc != pcmk_rc_ok) { crm_err("Failed to update %s[%s]=%s (peer known as %s, UUID %s, " "ID %" PRIu32 "/%" PRIu32 "): %s", a->id, v->nodename, v->current, peer->uname, peer->uuid, peer->id, v->nodeid, pcmk_rc_str(rc)); continue; } crm_debug("Updating %s[%s]=%s (peer known as %s, UUID %s, ID " "%" PRIu32 "/%" PRIu32 ")", a->id, v->nodename, v->current, peer->uname, peer->uuid, peer->id, v->nodeid); cib_updates++; /* Preservation of the attribute to transmit alert */ set_alert_attribute_value(alert_attribute_value, v); free(v->requested); v->requested = NULL; if (v->current) { v->requested = strdup(v->current); } } if (private_updates) { crm_info("Processed %d private change%s for %s, id=%s, set=%s", private_updates, pcmk__plural_s(private_updates), a->id, pcmk__s(a->uuid, "n/a"), pcmk__s(a->set_id, "n/a")); } if (cib_updates > 0) { char *id = NULL; // Commit transaction a->update = the_cib->cmds->end_transaction(the_cib, true, cib_none); crm_info("Sent CIB request %d with %d change%s for %s (id %s, set %s)", a->update, cib_updates, pcmk__plural_s(cib_updates), a->id, pcmk__s(a->uuid, "n/a"), pcmk__s(a->set_id, "n/a")); pcmk__str_update(&id, a->id); if (the_cib->cmds->register_callback_full(the_cib, a->update, CIB_OP_TIMEOUT_S, FALSE, id, "attrd_cib_callback", attrd_cib_callback, free)) { // Transmit alert of the attribute send_alert_attributes_value(a, alert_attribute_value); } } done: // Discard transaction (if any) if (the_cib != NULL) { the_cib->cmds->end_transaction(the_cib, false, cib_none); the_cib->cmds->set_user(the_cib, NULL); } if (alert_attribute_value != NULL) { g_hash_table_destroy(alert_attribute_value); } } /*! * \internal * \brief Write out attributes * * \param[in] options Group of enum attrd_write_options */ void attrd_write_attributes(uint32_t options) { GHashTableIter iter; attribute_t *a = NULL; crm_debug("Writing out %s attributes", pcmk_is_set(options, attrd_write_all)? "all" : "changed"); g_hash_table_iter_init(&iter, attributes); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & a)) { - if (pcmk_is_set(options, attrd_write_skip_shutdown) - && pcmk__str_eq(a->id, XML_CIB_ATTR_SHUTDOWN, pcmk__str_none)) { - continue; - } - if (!pcmk_is_set(options, attrd_write_all) && a->unknown_peer_uuids) { // Try writing this attribute again, in case peer ID was learned a->changed = true; } else if (a->force_write) { /* If the force_write flag is set, write the attribute. */ a->changed = true; } if (pcmk_is_set(options, attrd_write_all) || a->changed) { bool ignore_delay = pcmk_is_set(options, attrd_write_no_delay); if (a->force_write) { // Always ignore delay when forced write flag is set ignore_delay = true; } - attrd_write_attribute(a, ignore_delay); + write_attribute(a, ignore_delay); } else { crm_trace("Skipping unchanged attribute %s", a->id); } } } void attrd_write_or_elect_attribute(attribute_t *a) { if (attrd_election_won()) { - attrd_write_attribute(a, false); + write_attribute(a, false); } else { attrd_start_election_if_needed(); } } diff --git a/daemons/attrd/attrd_elections.c b/daemons/attrd/attrd_elections.c index a95cd44cbd..62310ed1d8 100644 --- a/daemons/attrd/attrd_elections.c +++ b/daemons/attrd/attrd_elections.c @@ -1,197 +1,189 @@ /* * Copyright 2013-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include "pacemaker-attrd.h" static char *peer_writer = NULL; static election_t *writer = NULL; static gboolean attrd_election_cb(gpointer user_data) { attrd_declare_winner(); if (attrd_shutting_down(true)) { /* This node is shutting down or about to, meaning its attributes will * be removed (and may have already been removed from the CIB by a * controller). Don't sync or write its attributes in this case. */ return G_SOURCE_REMOVE; } /* Update the peers after an election */ attrd_peer_sync(NULL, NULL); /* After winning an election, update the CIB with the values of all * attributes as the winner knows them. - * - * However, do not write out any "shutdown" attributes. A node that is - * shutting down will have all its transient attributes removed from the CIB - * when its controller exits, and from the attribute manager's memory (on - * remaining nodes) when its attribute manager exits; if an election is won - * between when those two things happen, we don't want to write the shutdown - * attribute back out, which would cause the node to immediately shut down - * the next time it rejoins. */ - attrd_write_attributes(attrd_write_all|attrd_write_skip_shutdown); + attrd_write_attributes(attrd_write_all); return G_SOURCE_REMOVE; } void attrd_election_init(void) { writer = election_init(T_ATTRD, attrd_cluster->uname, 120000, attrd_election_cb); } void attrd_election_fini(void) { election_fini(writer); } void attrd_start_election_if_needed(void) { if ((peer_writer == NULL) && (election_state(writer) != election_in_progress) && !attrd_shutting_down(false)) { crm_info("Starting an election to determine the writer"); election_vote(writer); } } bool attrd_election_won(void) { return (election_state(writer) == election_won); } void attrd_handle_election_op(const crm_node_t *peer, xmlNode *xml) { enum election_result rc = 0; enum election_result previous = election_state(writer); crm_xml_add(xml, F_CRM_HOST_FROM, peer->uname); // Don't become writer if we're shutting down rc = election_count_vote(writer, xml, !attrd_shutting_down(false)); switch(rc) { case election_start: crm_debug("Unsetting writer (was %s) and starting new election", peer_writer? peer_writer : "unset"); free(peer_writer); peer_writer = NULL; election_vote(writer); break; case election_lost: /* The election API should really distinguish between "we just lost * to this peer" and "we already lost previously, and we are * discarding this vote for some reason", but it doesn't. * * In the first case, we want to tentatively set the peer writer to * this peer, even though another peer may eventually win (which we * will learn via attrd_check_for_new_writer()), so * attrd_start_election_if_needed() doesn't start a new election. * * Approximate a test for that case as best as possible. */ if ((peer_writer == NULL) || (previous != election_lost)) { pcmk__str_update(&peer_writer, peer->uname); crm_debug("Election lost, presuming %s is writer for now", peer_writer); } break; case election_in_progress: election_check(writer); break; default: crm_info("Ignoring election op from %s due to error", peer->uname); break; } } bool attrd_check_for_new_writer(const crm_node_t *peer, const xmlNode *xml) { int peer_state = 0; crm_element_value_int(xml, PCMK__XA_ATTR_WRITER, &peer_state); if (peer_state == election_won) { if ((election_state(writer) == election_won) && !pcmk__str_eq(peer->uname, attrd_cluster->uname, pcmk__str_casei)) { crm_notice("Detected another attribute writer (%s), starting new election", peer->uname); election_vote(writer); } else if (!pcmk__str_eq(peer->uname, peer_writer, pcmk__str_casei)) { crm_notice("Recorded new attribute writer: %s (was %s)", peer->uname, (peer_writer? peer_writer : "unset")); pcmk__str_update(&peer_writer, peer->uname); } } return (peer_state == election_won); } void attrd_declare_winner(void) { crm_notice("Recorded local node as attribute writer (was %s)", (peer_writer? peer_writer : "unset")); pcmk__str_update(&peer_writer, attrd_cluster->uname); } void attrd_remove_voter(const crm_node_t *peer) { election_remove(writer, peer->uname); if (peer_writer && pcmk__str_eq(peer->uname, peer_writer, pcmk__str_casei)) { free(peer_writer); peer_writer = NULL; crm_notice("Lost attribute writer %s", peer->uname); /* Clear any election dampening in effect. Otherwise, if the lost writer * had just won, the election could fizzle out with no new writer. */ election_clear_dampening(writer); /* If the writer received attribute updates during its shutdown, it will * not have written them to the CIB. Ensure we get a new writer so they * are written out. This means that every node that sees the writer * leave will start a new election, but that's better than losing * attributes. */ attrd_start_election_if_needed(); /* If an election is in progress, we need to call election_check(), in case * this lost peer is the only one that hasn't voted, otherwise the election * would be pending until it's timed out. */ } else if (election_state(writer) == election_in_progress) { crm_debug("Checking election status upon loss of voter %s", peer->uname); election_check(writer); } } void attrd_xml_add_writer(xmlNode *xml) { crm_xml_add_int(xml, PCMK__XA_ATTR_WRITER, election_state(writer)); } diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h index e3c369b5bc..b8929a7f75 100644 --- a/daemons/attrd/pacemaker-attrd.h +++ b/daemons/attrd/pacemaker-attrd.h @@ -1,223 +1,221 @@ /* * Copyright 2013-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #ifndef PACEMAKER_ATTRD__H # define PACEMAKER_ATTRD__H #include #include #include #include #include #include #include /* * Legacy attrd (all pre-1.1.11 Pacemaker versions, plus all versions when used * with the no-longer-supported CMAN or corosync-plugin stacks) is unversioned. * * With atomic attrd, each attrd will send ATTRD_PROTOCOL_VERSION with every * peer request and reply. As of Pacemaker 2.0.0, at start-up each attrd will * also set a private attribute for itself with its version, so any attrd can * determine the minimum version supported by all peers. * * Protocol Pacemaker Significant changes * -------- --------- ------------------- * 1 1.1.11 PCMK__ATTRD_CMD_UPDATE (PCMK__XA_ATTR_NAME only), * PCMK__ATTRD_CMD_PEER_REMOVE, PCMK__ATTRD_CMD_REFRESH, * PCMK__ATTRD_CMD_FLUSH, PCMK__ATTRD_CMD_SYNC, * PCMK__ATTRD_CMD_SYNC_RESPONSE * 1 1.1.13 PCMK__ATTRD_CMD_UPDATE (with PCMK__XA_ATTR_PATTERN), * PCMK__ATTRD_CMD_QUERY * 1 1.1.15 PCMK__ATTRD_CMD_UPDATE_BOTH, * PCMK__ATTRD_CMD_UPDATE_DELAY * 2 1.1.17 PCMK__ATTRD_CMD_CLEAR_FAILURE * 3 2.1.1 PCMK__ATTRD_CMD_SYNC_RESPONSE indicates remote nodes * 4 2.1.5 Multiple attributes can be updated in a single IPC * message * 5 2.1.5 Peers can request confirmation of a sent message */ #define ATTRD_PROTOCOL_VERSION "5" #define ATTRD_SUPPORTS_MULTI_MESSAGE(x) ((x) >= 4) #define ATTRD_SUPPORTS_CONFIRMATION(x) ((x) >= 5) #define attrd_send_ack(client, id, flags) \ pcmk__ipc_send_ack((client), (id), (flags), "ack", ATTRD_PROTOCOL_VERSION, CRM_EX_INDETERMINATE) void attrd_init_mainloop(void); void attrd_run_mainloop(void); void attrd_set_requesting_shutdown(void); void attrd_clear_requesting_shutdown(void); void attrd_free_waitlist(void); bool attrd_shutting_down(bool if_requested); void attrd_shutdown(int nsig); void attrd_init_ipc(void); void attrd_ipc_fini(void); int attrd_cib_connect(int max_retry); void attrd_cib_disconnect(void); void attrd_cib_init(void); bool attrd_value_needs_expansion(const char *value); int attrd_expand_value(const char *value, const char *old_value); /* regular expression to clear failures of all resources */ #define ATTRD_RE_CLEAR_ALL \ "^(" PCMK__FAIL_COUNT_PREFIX "|" PCMK__LAST_FAILURE_PREFIX ")-" /* regular expression to clear failure of all operations for one resource * (format takes resource name) * * @COMPAT attributes set < 1.1.17: * also match older attributes that do not have the operation part */ #define ATTRD_RE_CLEAR_ONE ATTRD_RE_CLEAR_ALL "%s(#.+_[0-9]+)?$" /* regular expression to clear failure of one operation for one resource * (format takes resource name, operation name, and interval) * * @COMPAT attributes set < 1.1.17: * also match older attributes that do not have the operation part */ #define ATTRD_RE_CLEAR_OP ATTRD_RE_CLEAR_ALL "%s(#%s_%u)?$" int attrd_failure_regex(regex_t *regex, const char *rsc, const char *op, guint interval_ms); extern cib_t *the_cib; extern crm_exit_t attrd_exit_status; /* Alerts */ extern lrmd_t *the_lrmd; extern crm_trigger_t *attrd_config_read; void attrd_lrmd_disconnect(void); gboolean attrd_read_options(gpointer user_data); int attrd_send_attribute_alert(const char *node, int nodeid, const char *attr, const char *value); // Elections void attrd_election_init(void); void attrd_election_fini(void); void attrd_start_election_if_needed(void); bool attrd_election_won(void); void attrd_handle_election_op(const crm_node_t *peer, xmlNode *xml); bool attrd_check_for_new_writer(const crm_node_t *peer, const xmlNode *xml); void attrd_declare_winner(void); void attrd_remove_voter(const crm_node_t *peer); void attrd_xml_add_writer(xmlNode *xml); typedef struct attribute_s { char *uuid; /* TODO: Remove if at all possible */ char *id; char *set_id; char *set_type; GHashTable *values; int update; int timeout_ms; /* TODO: refactor these three as a bitmask */ bool changed; /* whether attribute value has changed since last write */ bool unknown_peer_uuids; /* whether we know we're missing a peer uuid */ gboolean is_private; /* whether to keep this attribute out of the CIB */ mainloop_timer_t *timer; char *user; gboolean force_write; /* Flag for updating attribute by ignoring delay */ } attribute_t; typedef struct attribute_value_s { uint32_t nodeid; gboolean is_remote; char *nodename; char *current; char *requested; gboolean seen; } attribute_value_t; extern crm_cluster_t *attrd_cluster; extern GHashTable *attributes; extern GHashTable *peer_protocol_vers; #define CIB_OP_TIMEOUT_S 120 int attrd_cluster_connect(void); void attrd_peer_update(const crm_node_t *peer, xmlNode *xml, const char *host, bool filter); void attrd_peer_sync(crm_node_t *peer, xmlNode *xml); void attrd_peer_remove(const char *host, bool uncache, const char *source); void attrd_peer_clear_failure(pcmk__request_t *request); void attrd_peer_sync_response(const crm_node_t *peer, bool peer_won, xmlNode *xml); void attrd_broadcast_protocol(void); xmlNode *attrd_client_peer_remove(pcmk__request_t *request); xmlNode *attrd_client_clear_failure(pcmk__request_t *request); xmlNode *attrd_client_update(pcmk__request_t *request); xmlNode *attrd_client_refresh(pcmk__request_t *request); xmlNode *attrd_client_query(pcmk__request_t *request); gboolean attrd_send_message(crm_node_t *node, xmlNode *data, bool confirm); xmlNode *attrd_add_value_xml(xmlNode *parent, const attribute_t *a, const attribute_value_t *v, bool force_write); void attrd_clear_value_seen(void); void attrd_free_attribute(gpointer data); void attrd_free_attribute_value(gpointer data); attribute_t *attrd_populate_attribute(xmlNode *xml, const char *attr); enum attrd_write_options { attrd_write_changed = 0, attrd_write_all = (1 << 0), attrd_write_no_delay = (1 << 1), - attrd_write_skip_shutdown = (1 << 2), }; -void attrd_write_attribute(attribute_t *a, bool ignore_delay); void attrd_write_attributes(uint32_t options); void attrd_write_or_elect_attribute(attribute_t *a); extern int minimum_protocol_version; void attrd_remove_peer_protocol_ver(const char *host); void attrd_update_minimum_protocol_ver(const char *host, const char *value); mainloop_timer_t *attrd_add_timer(const char *id, int timeout_ms, attribute_t *attr); void attrd_unregister_handlers(void); void attrd_handle_request(pcmk__request_t *request); enum attrd_sync_point { attrd_sync_point_local, attrd_sync_point_cluster, }; typedef int (*attrd_confirmation_action_fn)(xmlNode *); void attrd_add_client_to_waitlist(pcmk__request_t *request); void attrd_ack_waitlist_clients(enum attrd_sync_point sync_point, const xmlNode *xml); int attrd_cluster_sync_point_update(xmlNode *xml); void attrd_do_not_expect_from_peer(const char *host); void attrd_do_not_wait_for_client(pcmk__client_t *client); void attrd_expect_confirmations(pcmk__request_t *request, attrd_confirmation_action_fn fn); void attrd_free_confirmations(void); void attrd_handle_confirmation(int callid, const char *host); void attrd_remove_client_from_waitlist(pcmk__client_t *client); const char *attrd_request_sync_point(xmlNode *xml); bool attrd_request_has_sync_point(xmlNode *xml); void attrd_copy_xml_attributes(xmlNode *src, xmlNode *dest); extern gboolean stand_alone; #endif /* PACEMAKER_ATTRD__H */ diff --git a/devel/Makefile.am b/devel/Makefile.am index a463875253..4b970aeec2 100644 --- a/devel/Makefile.am +++ b/devel/Makefile.am @@ -1,309 +1,333 @@ # # Copyright 2020-2023 the Pacemaker project contributors # # The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # or later (GPLv2+) WITHOUT ANY WARRANTY. # include $(top_srcdir)/mk/common.mk include $(top_srcdir)/mk/release.mk # Coccinelle is a tool that takes special patch-like files (called semantic patches) and # applies them throughout a source tree. This is useful when refactoring, changing APIs, # catching dangerous or incorrect code, and other similar tasks. It's not especially # easy to write a semantic patch but most users should only be concerned about running # the target and inspecting the results. # # Documentation (including examples, which are the most useful): # https://coccinelle.gitlabpages.inria.fr/website/docs/ # # Run the "make cocci" target to just output what would be done, or "make cocci-inplace" # to apply the changes to the source tree. # # COCCI_FILES may be set on the command line, if you want to test just a single file # while it's under development. Otherwise, it is a list of all the files that are ready # to be run. # # ref-passed-variables-inited.cocci seems to be returning some false positives around # GHashTableIters, so it is disabled for the moment. COCCI_FILES ?= coccinelle/string-any-of.cocci \ coccinelle/string-empty.cocci \ coccinelle/string-null-matches.cocci \ coccinelle/use-func.cocci dist_noinst_SCRIPTS = coccinelle/test/testrunner.sh EXTRA_DIST = README \ gdbhelpers \ $(COCCI_FILES) \ coccinelle/ref-passed-variables-inited.cocci \ coccinelle/rename-fn.cocci \ coccinelle/test/ref-passed-variables-inited.input.c \ coccinelle/test/ref-passed-variables-inited.output # Any file in this list is allowed to use any of the pcmk__ internal functions. # Coccinelle can use any transformation that depends on "internal" to rewrite # code to use the internal functions. MAY_USE_INTERNAL_FILES = $(shell find .. -path "../lib/*.c" -o -path "../lib/*private.h" -o -path "../tools/*.c" -o -path "../daemons/*.c" -o -path '../include/pcmki/*h' -o -name '*internal.h') # And then any file in this list is public API, which may not use internal # functions. Thus, only those transformations that do not depend on "internal" # may be applied. OTHER_FILES = $(shell find ../include -name '*h' -a \! -name '*internal.h' -a \! -path '../include/pcmki/*') .PHONY: cocci cocci: -for cf in $(COCCI_FILES); do \ for f in $(MAY_USE_INTERNAL_FILES); do \ spatch $(_SPATCH_FLAGS) -D internal --very-quiet --local-includes --preprocess --sp-file $$cf $$f; \ done ; \ for f in $(OTHER_FILES); do \ spatch $(_SPATCH_FLAGS) --very-quiet --local-includes --preprocess --sp-file $$cf $$f; \ done ; \ done .PHONY: cocci-inplace cocci-inplace: $(MAKE) $(AM_MAKEFLAGS) _SPATCH_FLAGS=--in-place cocci .PHONY: cocci-test cocci-test: for f in coccinelle/test/*.c; do \ coccinelle/test/testrunner.sh $$f; \ done # # Static analysis # ## clang # See scan-build(1) for possible checkers (leave empty to use default set) CLANG_checkers ?= .PHONY: clang clang: OUT=$$(cd $(top_builddir) \ && scan-build $(CLANG_checkers:%=-enable-checker %) \ $(MAKE) $(AM_MAKEFLAGS) CFLAGS="-std=c99 $(CFLAGS)" \ clean all 2>&1); \ REPORT=$$(echo "$$OUT" \ | sed -n -e "s/.*'scan-view \(.*\)'.*/\1/p"); \ [ -z "$$REPORT" ] && echo "$$OUT" || scan-view "$$REPORT" ## coverity # Aggressiveness (low, medium, or high) COVLEVEL ?= low # Generated outputs COVERITY_DIR = $(abs_top_builddir)/coverity-$(TAG) COVTAR = $(abs_top_builddir)/$(PACKAGE)-coverity-$(TAG).tgz COVEMACS = $(abs_top_builddir)/$(TAG).coverity COVHTML = $(COVERITY_DIR)/output/errors # Coverity outputs are phony so they get rebuilt every invocation .PHONY: $(COVERITY_DIR) $(COVERITY_DIR): coverity-clean $(MAKE) $(AM_MAKEFLAGS) -C $(top_builddir) init core-clean $(AM_V_GEN)cd $(top_builddir) \ && cov-build --dir "$@" $(MAKE) $(AM_MAKEFLAGS) core # Public coverity instance .PHONY: $(COVTAR) $(COVTAR): $(COVERITY_DIR) $(AM_V_GEN)tar czf "$@" --transform="s@.*$(TAG)@cov-int@" "$<" .PHONY: coverity coverity: $(COVTAR) @echo "Now go to https://scan.coverity.com/users/sign_in and upload:" @echo " $(COVTAR)" @echo "then make clean at the top level" # Licensed coverity instance # # The prerequisites are a little hacky; rather than actually required, some # of them are designed so that things execute in the proper order (which is # not the same as GNU make's order-only prerequisites). .PHONY: coverity-analyze coverity-analyze: $(COVERITY_DIR) @echo "" @echo "Analyzing (waiting for coverity license if necessary) ..." cd $(top_builddir) && cov-analyze --dir "$<" --wait-for-license \ --security --aggressiveness-level "$(COVLEVEL)" .PHONY: $(COVEMACS) $(COVEMACS): coverity-analyze $(AM_V_GEN)cd $(top_builddir) \ && cov-format-errors --dir "$(COVERITY_DIR)" --emacs-style > "$@" .PHONY: $(COVHTML) $(COVHTML): $(COVEMACS) $(AM_V_GEN)cd $(top_builddir) \ && cov-format-errors --dir "$(COVERITY_DIR)" --html-output "$@" .PHONY: coverity-corp coverity-corp: $(COVHTML) $(MAKE) $(AM_MAKEFLAGS) -C $(top_builddir) core-clean @echo "Done. See:" @echo " file://$(COVHTML)/index.html" @echo "When no longer needed, make coverity-clean" # Remove all outputs regardless of tag .PHONY: coverity-clean coverity-clean: -rm -rf "$(abs_builddir)"/coverity-* \ "$(abs_builddir)"/$(PACKAGE)-coverity-*.tgz \ "$(abs_builddir)"/*.coverity ## cppcheck # Use CPPCHECK_ARGS to pass extra cppcheck options, e.g.: # --enable={warning,style,performance,portability,information,all} # --inconclusive --std=posix # -DBUILD_PUBLIC_LIBPACEMAKER -DDEFAULT_CONCURRENT_FENCING_TRUE CPPCHECK_ARGS ?= CPPCHECK_DIRS = replace lib daemons tools CPPCHECK_OUT = $(abs_top_builddir)/cppcheck.out .PHONY: cppcheck cppcheck: cppcheck $(CPPCHECK_ARGS) -I $(top_srcdir)/include \ --output-file=$(CPPCHECK_OUT) \ --max-configs=30 --inline-suppr -q \ --library=posix --library=gnu --library=gtk \ $(GLIB_CFLAGS) -D__GNUC__ \ $(foreach dir,$(CPPCHECK_DIRS),$(top_srcdir)/$(dir)) @echo "Done: See $(CPPCHECK_OUT)" @echo "When no longer needed, make cppcheck-clean" .PHONY: cppcheck-clean cppcheck-clean: -rm -f "$(CPPCHECK_OUT)" # # Coverage/profiling # COVERAGE_DIR = $(top_builddir)/coverage # Check coverage of unit tests .PHONY: coverage coverage: coverage-partial-clean cd $(top_builddir) \ && $(MAKE) $(AM_MAKEFLAGS) \ && lcov --no-external --exclude='*_test.c' -c -i -d . \ -o pacemaker_base.info \ && $(MAKE) $(AM_MAKEFLAGS) check \ && lcov --no-external --exclude='*_test.c' -c -d . \ -o pacemaker_test.info \ && lcov -a pacemaker_base.info -a pacemaker_test.info \ -o pacemaker_total.info \ && lcov --remove pacemaker_total.info -o pacemaker_filtered.info\ "$(abs_top_builddir)/tools/*" \ "$(abs_top_builddir)/daemons/*/*" \ "$(abs_top_builddir)/replace/*" \ "$(abs_top_builddir)/lib/gnu/*" genhtml $(top_builddir)/pacemaker_filtered.info -o $(COVERAGE_DIR) -s -t "Pacemaker code coverage" # Check coverage of CLI regression tests .PHONY: coverage-cts coverage-cts: coverage-partial-clean cd $(top_builddir) \ && $(MAKE) $(AM_MAKEFLAGS) \ && lcov --no-external -c -i -d tools -o pacemaker_base.info \ && cts/cts-cli \ && lcov --no-external -c -d tools -o pacemaker_test.info \ && lcov -a pacemaker_base.info -a pacemaker_test.info \ -o pacemaker_total.info genhtml $(top_builddir)/pacemaker_total.info -o $(COVERAGE_DIR) -s # Remove coverage-related files that aren't needed across runs .PHONY: coverage-partial-clean coverage-partial-clean: -rm -f $(top_builddir)/pacemaker_*.info -rm -rf $(COVERAGE_DIR) -find $(top_builddir) -name "*.gcda" -exec rm -f \{\} \; # This target removes all coverage-related files. It is only to be run when # done with coverage analysis and you are ready to go back to normal development, # starting with re-running ./configure. It is not to be run in between # "make coverage" runs. # # In particular, the *.gcno files are generated when the source is built. # Removing those files will break "make coverage" until the whole source tree # has been built and the *.gcno files generated again. .PHONY: coverage-clean coverage-clean: coverage-partial-clean -find $(top_builddir) -name "*.gcno" -exec rm -f \{\} \; # # indent cannot cope with all our exceptions and needs heavy manual editing # # indent target: Limit indent to these directories INDENT_DIRS ?= . # indent target: Extra options to pass to indent INDENT_OPTS ?= INDENT_IGNORE_PATHS = daemons/controld/controld_fsa.h \ lib/gnu/* INDENT_PACEMAKER_STYLE = --blank-lines-after-declarations \ --blank-lines-after-procedures \ --braces-after-func-def-line \ --braces-on-if-line \ --braces-on-struct-decl-line \ --break-before-boolean-operator \ --case-brace-indentation4 \ --case-indentation4 \ --comment-indentation0 \ --continuation-indentation4 \ --continue-at-parentheses \ --cuddle-do-while \ --cuddle-else \ --declaration-comment-column0 \ --declaration-indentation1 \ --else-endif-column0 \ --honour-newlines \ --indent-label0 \ --indent-level4 \ --line-comments-indentation0 \ --line-length80 \ --no-blank-lines-after-commas \ --no-comment-delimiters-on-blank-lines \ --no-space-after-function-call-names \ --no-space-after-parentheses \ --no-tabs \ --preprocessor-indentation2 \ --procnames-start-lines \ --space-after-cast \ --start-left-side-of-comments \ --swallow-optional-blank-lines \ --tab-size8 .PHONY: indent indent: VERSION_CONTROL=none \ find $(INDENT_DIRS) -type f -name "*.[ch]" \ $(INDENT_IGNORE_PATHS:%= ! -path '%') \ -exec indent $(INDENT_PACEMAKER_STYLE) $(INDENT_OPTS) \{\} \; +# +# Check whether copyrights have been updated appropriately +# (Set COMMIT to desired commit or commit range to check, defaulting to HEAD, +# or set it empty to check uncommitted changes) +# +YEAR = $(shell date +%Y) +MODIFIED_FILES = $(shell case "$(COMMIT)" in \ + [0-9a-f]*$(rparen) \ + git diff-tree --no-commit-id \ + --name-only "$(COMMIT)" -r ;; \ + *$(rparen) \ + cd "$(top_srcdir)"; \ + git ls-files --modified ;; \ + esac) + +.PHONY: copyright +copyright: + @cd "$(top_srcdir)" && for file in $(MODIFIED_FILES); do \ + if ! grep 'opyright .*$(YEAR).* Pacemaker' "$$file" \ + >/dev/null 2>&1; then \ + echo "$$file"; \ + fi; \ + done + # # Scratch file for ad-hoc testing # EXTRA_PROGRAMS = scratch nodist_scratch_SOURCES = scratch.c scratch_LDADD = $(top_builddir)/lib/common/libcrmcommon.la .PHONY: clean-local clean-local: coverage-clean coverity-clean cppcheck-clean -rm -f $(EXTRA_PROGRAMS)