diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c index 4695ca816b..ef205e6e13 100644 --- a/daemons/attrd/attrd_corosync.c +++ b/daemons/attrd/attrd_corosync.c @@ -1,619 +1,620 @@ /* * Copyright 2013-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include "pacemaker-attrd.h" extern crm_exit_t attrd_exit_status; static xmlNode * attrd_confirmation(int callid) { xmlNode *node = create_xml_node(NULL, __func__); crm_xml_add(node, F_TYPE, T_ATTRD); crm_xml_add(node, F_ORIG, get_local_node_name()); crm_xml_add(node, PCMK__XA_TASK, PCMK__ATTRD_CMD_CONFIRM); crm_xml_add_int(node, XML_LRM_ATTR_CALLID, callid); return node; } static void attrd_peer_message(crm_node_t *peer, xmlNode *xml) { const char *election_op = crm_element_value(xml, F_CRM_TASK); if (election_op) { attrd_handle_election_op(peer, xml); return; } if (attrd_shutting_down()) { /* If we're shutting down, we want to continue responding to election * ops as long as we're a cluster member (because our vote may be * needed). Ignore all other messages. */ return; } else { pcmk__request_t request = { .ipc_client = NULL, .ipc_id = 0, .ipc_flags = 0, .peer = peer->uname, .xml = xml, .call_options = 0, .result = PCMK__UNKNOWN_RESULT, }; request.op = crm_element_value_copy(request.xml, PCMK__XA_TASK); CRM_CHECK(request.op != NULL, return); attrd_handle_request(&request); /* Having finished handling the request, check to see if the originating * peer requested confirmation. If so, send that confirmation back now. */ if (pcmk__xe_attr_is_true(xml, PCMK__XA_CONFIRM) && !pcmk__str_eq(request.op, PCMK__ATTRD_CMD_CONFIRM, pcmk__str_none)) { int callid = 0; xmlNode *reply = NULL; /* Add the confirmation ID for the message we are confirming to the * response so the originating peer knows what they're a confirmation * for. */ crm_element_value_int(xml, XML_LRM_ATTR_CALLID, &callid); reply = attrd_confirmation(callid); /* And then send the confirmation back to the originating peer. This * ends up right back in this same function (attrd_peer_message) on the * peer where it will have to do something with a PCMK__XA_CONFIRM type * message. */ crm_debug("Sending %s a confirmation", peer->uname); attrd_send_message(peer, reply, false); free_xml(reply); } pcmk__reset_request(&request); } } static void attrd_cpg_dispatch(cpg_handle_t handle, const struct cpg_name *groupName, uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) { uint32_t kind = 0; xmlNode *xml = NULL; const char *from = NULL; char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from); if(data == NULL) { return; } if (kind == crm_class_cluster) { xml = string2xml(data); } if (xml == NULL) { crm_err("Bad message of class %d received from %s[%u]: '%.120s'", kind, from, nodeid, data); } else { crm_node_t *peer = crm_get_peer(nodeid, from); attrd_peer_message(peer, xml); } free_xml(xml); free(data); } static void attrd_cpg_destroy(gpointer unused) { if (attrd_shutting_down()) { crm_info("Corosync disconnection complete"); } else { crm_crit("Lost connection to cluster layer, shutting down"); attrd_exit_status = CRM_EX_DISCONNECT; attrd_shutdown(0); } } /*! * \internal * \brief Override an attribute sync with a local value * * Broadcast the local node's value for an attribute that's different from the * value provided in a peer's attribute synchronization response. This ensures a * node's values for itself take precedence and all peers are kept in sync. * * \param[in] a Attribute entry to override * * \return Local instance of attribute value */ static attribute_value_t * broadcast_local_value(const attribute_t *a) { attribute_value_t *v = g_hash_table_lookup(a->values, attrd_cluster->uname); xmlNode *sync = create_xml_node(NULL, __func__); crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE); attrd_add_value_xml(sync, a, v, false); attrd_send_message(NULL, sync, false); free_xml(sync); return v; } /*! * \internal * \brief Ensure a Pacemaker Remote node is in the correct peer cache * * \param[in] node_name Name of Pacemaker Remote node to check */ static void cache_remote_node(const char *node_name) { /* If we previously assumed this node was an unseen cluster node, * remove its entry from the cluster peer cache. */ crm_node_t *dup = pcmk__search_cluster_node_cache(0, node_name); if (dup && (dup->uuid == NULL)) { reap_crm_member(0, node_name); } // Ensure node is in the remote peer cache CRM_ASSERT(crm_remote_peer_get(node_name) != NULL); } #define state_text(state) pcmk__s((state), "in unknown state") /*! * \internal * \brief Return host's hash table entry (creating one if needed) * * \param[in,out] values Hash table of values * \param[in] host Name of peer to look up * \param[in] xml XML describing the attribute * * \return Pointer to new or existing hash table entry */ static attribute_value_t * attrd_lookup_or_create_value(GHashTable *values, const char *host, const xmlNode *xml) { attribute_value_t *v = g_hash_table_lookup(values, host); int is_remote = 0; crm_element_value_int(xml, PCMK__XA_ATTR_IS_REMOTE, &is_remote); if (is_remote) { cache_remote_node(host); } if (v == NULL) { v = calloc(1, sizeof(attribute_value_t)); CRM_ASSERT(v != NULL); pcmk__str_update(&v->nodename, host); v->is_remote = is_remote; g_hash_table_replace(values, v->nodename, v); } return(v); } static void attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *data) { bool gone = false; bool is_remote = pcmk_is_set(peer->flags, crm_remote_node); switch (kind) { case crm_status_uname: crm_debug("%s node %s is now %s", (is_remote? "Remote" : "Cluster"), peer->uname, state_text(peer->state)); break; case crm_status_processes: if (!pcmk_is_set(peer->processes, crm_get_cluster_proc())) { gone = true; } crm_debug("Node %s is %s a peer", peer->uname, (gone? "no longer" : "now")); break; case crm_status_nstate: crm_debug("%s node %s is now %s (was %s)", (is_remote? "Remote" : "Cluster"), peer->uname, state_text(peer->state), state_text(data)); if (pcmk__str_eq(peer->state, CRM_NODE_MEMBER, pcmk__str_casei)) { /* If we're the writer, send new peers a list of all attributes * (unless it's a remote node, which doesn't run its own attrd) */ if (attrd_election_won() && !pcmk_is_set(peer->flags, crm_remote_node)) { attrd_peer_sync(peer, NULL); } } else { // Remove all attribute values associated with lost nodes attrd_peer_remove(peer->uname, false, "loss"); gone = true; } break; } // Remove votes from cluster nodes that leave, in case election in progress if (gone && !is_remote) { attrd_remove_voter(peer); attrd_remove_peer_protocol_ver(peer->uname); attrd_do_not_expect_from_peer(peer->uname); // Ensure remote nodes that come up are in the remote node cache } else if (!gone && is_remote) { cache_remote_node(peer->uname); } } static void record_peer_nodeid(attribute_value_t *v, const char *host) { crm_node_t *known_peer = crm_get_peer(v->nodeid, host); crm_trace("Learned %s has node id %s", known_peer->uname, known_peer->uuid); if (attrd_election_won()) { attrd_write_attributes(false, false); } } static void update_attr_on_host(attribute_t *a, const crm_node_t *peer, const xmlNode *xml, const char *attr, const char *value, const char *host, bool filter, int is_force_write) { attribute_value_t *v = NULL; v = attrd_lookup_or_create_value(a->values, host, xml); if (filter && !pcmk__str_eq(v->current, value, pcmk__str_casei) && pcmk__str_eq(host, attrd_cluster->uname, pcmk__str_casei)) { crm_notice("%s[%s]: local value '%s' takes priority over '%s' from %s", attr, host, v->current, value, peer->uname); v = broadcast_local_value(a); } else if (!pcmk__str_eq(v->current, value, pcmk__str_casei)) { - crm_notice("Setting %s[%s]: %s -> %s " + crm_notice("Setting %s[%s]%s%s: %s -> %s " CRM_XS " from %s with %s write delay", - attr, host, pcmk__s(v->current, "(unset)"), + attr, host, a->set_type ? " in " : "", + pcmk__s(a->set_type, ""), pcmk__s(v->current, "(unset)"), pcmk__s(value, "(unset)"), peer->uname, (a->timeout_ms == 0)? "no" : pcmk__readable_interval(a->timeout_ms)); pcmk__str_update(&v->current, value); a->changed = true; if (pcmk__str_eq(host, attrd_cluster->uname, pcmk__str_casei) && pcmk__str_eq(attr, XML_CIB_ATTR_SHUTDOWN, pcmk__str_none)) { if (!pcmk__str_eq(value, "0", pcmk__str_null_matches)) { attrd_set_requesting_shutdown(); } else { attrd_clear_requesting_shutdown(); } } // Write out new value or start dampening timer if (a->timeout_ms && a->timer) { crm_trace("Delayed write out (%dms) for %s", a->timeout_ms, attr); mainloop_timer_start(a->timer); } else { attrd_write_or_elect_attribute(a); } } else { if (is_force_write == 1 && a->timeout_ms && a->timer) { /* Save forced writing and set change flag. */ /* The actual attribute is written by Writer after election. */ crm_trace("Unchanged %s[%s] from %s is %s(Set the forced write flag)", attr, host, peer->uname, value); a->force_write = TRUE; } else { crm_trace("Unchanged %s[%s] from %s is %s", attr, host, peer->uname, value); } } /* Set the seen flag for attribute processing held only in the own node. */ v->seen = TRUE; /* If this is a cluster node whose node ID we are learning, remember it */ if ((v->nodeid == 0) && (v->is_remote == FALSE) && (crm_element_value_int(xml, PCMK__XA_ATTR_NODE_ID, (int*)&v->nodeid) == 0) && (v->nodeid > 0)) { record_peer_nodeid(v, host); } } static void attrd_peer_update_one(const crm_node_t *peer, xmlNode *xml, bool filter) { attribute_t *a = NULL; const char *attr = crm_element_value(xml, PCMK__XA_ATTR_NAME); const char *value = crm_element_value(xml, PCMK__XA_ATTR_VALUE); const char *host = crm_element_value(xml, PCMK__XA_ATTR_NODE_NAME); int is_force_write = 0; if (attr == NULL) { crm_warn("Could not update attribute: peer did not specify name"); return; } crm_element_value_int(xml, PCMK__XA_ATTR_FORCE, &is_force_write); a = attrd_populate_attribute(xml, attr); if (a == NULL) { return; } if (host == NULL) { // If no host was specified, update all hosts GHashTableIter vIter; crm_debug("Setting %s for all hosts to %s", attr, value); xml_remove_prop(xml, PCMK__XA_ATTR_NODE_ID); g_hash_table_iter_init(&vIter, a->values); while (g_hash_table_iter_next(&vIter, (gpointer *) & host, NULL)) { update_attr_on_host(a, peer, xml, attr, value, host, filter, is_force_write); } } else { // Update attribute value for the given host update_attr_on_host(a, peer, xml, attr, value, host, filter, is_force_write); } /* If this is a message from some attrd instance broadcasting its protocol * version, check to see if it's a new minimum version. */ if (pcmk__str_eq(attr, CRM_ATTR_PROTOCOL, pcmk__str_none)) { attrd_update_minimum_protocol_ver(peer->uname, value); } } static void broadcast_unseen_local_values(void) { GHashTableIter aIter; GHashTableIter vIter; attribute_t *a = NULL; attribute_value_t *v = NULL; xmlNode *sync = NULL; g_hash_table_iter_init(&aIter, attributes); while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) { g_hash_table_iter_init(&vIter, a->values); while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) { if (!(v->seen) && pcmk__str_eq(v->nodename, attrd_cluster->uname, pcmk__str_casei)) { if (sync == NULL) { sync = create_xml_node(NULL, __func__); crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE); } attrd_add_value_xml(sync, a, v, a->timeout_ms && a->timer); } } } if (sync != NULL) { crm_debug("Broadcasting local-only values"); attrd_send_message(NULL, sync, false); free_xml(sync); } } int attrd_cluster_connect(void) { attrd_cluster = pcmk_cluster_new(); attrd_cluster->destroy = attrd_cpg_destroy; attrd_cluster->cpg.cpg_deliver_fn = attrd_cpg_dispatch; attrd_cluster->cpg.cpg_confchg_fn = pcmk_cpg_membership; crm_set_status_callback(&attrd_peer_change_cb); if (crm_cluster_connect(attrd_cluster) == FALSE) { crm_err("Cluster connection failed"); return -ENOTCONN; } return pcmk_ok; } void attrd_peer_clear_failure(pcmk__request_t *request) { xmlNode *xml = request->xml; const char *rsc = crm_element_value(xml, PCMK__XA_ATTR_RESOURCE); const char *host = crm_element_value(xml, PCMK__XA_ATTR_NODE_NAME); const char *op = crm_element_value(xml, PCMK__XA_ATTR_OPERATION); const char *interval_spec = crm_element_value(xml, PCMK__XA_ATTR_INTERVAL); guint interval_ms = crm_parse_interval_spec(interval_spec); char *attr = NULL; GHashTableIter iter; regex_t regex; crm_node_t *peer = crm_get_peer(0, request->peer); if (attrd_failure_regex(®ex, rsc, op, interval_ms) != pcmk_ok) { crm_info("Ignoring invalid request to clear failures for %s", pcmk__s(rsc, "all resources")); return; } crm_xml_add(xml, PCMK__XA_TASK, PCMK__ATTRD_CMD_UPDATE); /* Make sure value is not set, so we delete */ if (crm_element_value(xml, PCMK__XA_ATTR_VALUE)) { crm_xml_replace(xml, PCMK__XA_ATTR_VALUE, NULL); } g_hash_table_iter_init(&iter, attributes); while (g_hash_table_iter_next(&iter, (gpointer *) &attr, NULL)) { if (regexec(®ex, attr, 0, NULL, 0) == 0) { crm_trace("Matched %s when clearing %s", attr, pcmk__s(rsc, "all resources")); crm_xml_add(xml, PCMK__XA_ATTR_NAME, attr); attrd_peer_update(peer, xml, host, false); } } regfree(®ex); } /*! * \internal * \brief Load attributes from a peer sync response * * \param[in] peer Peer that sent clear request * \param[in] peer_won Whether peer is the attribute writer * \param[in,out] xml Request XML */ void attrd_peer_sync_response(const crm_node_t *peer, bool peer_won, xmlNode *xml) { crm_info("Processing " PCMK__ATTRD_CMD_SYNC_RESPONSE " from %s", peer->uname); if (peer_won) { /* Initialize the "seen" flag for all attributes to cleared, so we can * detect attributes that local node has but the writer doesn't. */ attrd_clear_value_seen(); } // Process each attribute update in the sync response for (xmlNode *child = pcmk__xml_first_child(xml); child != NULL; child = pcmk__xml_next(child)) { attrd_peer_update(peer, child, crm_element_value(child, PCMK__XA_ATTR_NODE_NAME), true); } if (peer_won) { /* If any attributes are still not marked as seen, the writer doesn't * know about them, so send all peers an update with them. */ broadcast_unseen_local_values(); } } /*! * \internal * \brief Remove all attributes and optionally peer cache entries for a node * * \param[in] host Name of node to purge * \param[in] uncache If true, remove node from peer caches * \param[in] source Who requested removal (only used for logging) */ void attrd_peer_remove(const char *host, bool uncache, const char *source) { attribute_t *a = NULL; GHashTableIter aIter; CRM_CHECK(host != NULL, return); crm_notice("Removing all %s attributes for peer %s", host, source); g_hash_table_iter_init(&aIter, attributes); while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) { if(g_hash_table_remove(a->values, host)) { crm_debug("Removed %s[%s] for peer %s", a->id, host, source); } } if (uncache) { crm_remote_peer_cache_remove(host); reap_crm_member(0, host); } } void attrd_peer_sync(crm_node_t *peer, xmlNode *xml) { GHashTableIter aIter; GHashTableIter vIter; attribute_t *a = NULL; attribute_value_t *v = NULL; xmlNode *sync = create_xml_node(NULL, __func__); crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE); g_hash_table_iter_init(&aIter, attributes); while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) { g_hash_table_iter_init(&vIter, a->values); while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) { crm_debug("Syncing %s[%s] = %s to %s", a->id, v->nodename, v->current, peer?peer->uname:"everyone"); attrd_add_value_xml(sync, a, v, false); } } crm_debug("Syncing values to %s", peer?peer->uname:"everyone"); attrd_send_message(peer, sync, false); free_xml(sync); } void attrd_peer_update(const crm_node_t *peer, xmlNode *xml, const char *host, bool filter) { bool handle_sync_point = false; if (xml_has_children(xml)) { for (xmlNode *child = first_named_child(xml, XML_ATTR_OP); child != NULL; child = crm_next_same_xml(child)) { attrd_copy_xml_attributes(xml, child); attrd_peer_update_one(peer, child, filter); if (attrd_request_has_sync_point(child)) { handle_sync_point = true; } } } else { attrd_peer_update_one(peer, xml, filter); if (attrd_request_has_sync_point(xml)) { handle_sync_point = true; } } /* If the update XML specified that the client wanted to wait for a sync * point, process that now. */ if (handle_sync_point) { crm_trace("Hit local sync point for attribute update"); attrd_ack_waitlist_clients(attrd_sync_point_local, xml); } } diff --git a/daemons/attrd/attrd_sync.c b/daemons/attrd/attrd_sync.c index 69367716ba..d59ddd5e5e 100644 --- a/daemons/attrd/attrd_sync.c +++ b/daemons/attrd/attrd_sync.c @@ -1,577 +1,577 @@ /* - * Copyright 2022 the Pacemaker project contributors + * Copyright 2022-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include "pacemaker-attrd.h" /* A hash table storing clients that are waiting on a sync point to be reached. * The key is waitlist_client - just a plain int. The obvious key would be * the IPC client's ID, but this is not guaranteed to be unique. A single client * could be waiting on a sync point for multiple attributes at the same time. * * It is not expected that this hash table will ever be especially large. */ static GHashTable *waitlist = NULL; static int waitlist_client = 0; struct waitlist_node { /* What kind of sync point does this node describe? */ enum attrd_sync_point sync_point; /* Information required to construct and send a reply to the client. */ char *client_id; uint32_t ipc_id; uint32_t flags; }; /* A hash table storing information on in-progress IPC requests that are awaiting * confirmations. These requests are currently being processed by peer attrds and * we are waiting to receive confirmation messages from each peer indicating that * processing is complete. * * Multiple requests could be waiting on confirmations at the same time. * * The key is the unique callid for the IPC request, and the value is a * confirmation_action struct. */ static GHashTable *expected_confirmations = NULL; /*! * \internal * \brief A structure describing a single IPC request that is awaiting confirmations */ struct confirmation_action { /*! * \brief A list of peer attrds that we are waiting to receive confirmation * messages from * * This list is dynamic - as confirmations arrive from peer attrds, they will * be removed from this list. When the list is empty, all peers have processed * the request and the associated confirmation action will be taken. */ GList *respondents; /*! * \brief A timer that will be used to remove the client should it time out * before receiving all confirmations */ mainloop_timer_t *timer; /*! * \brief A function to run when all confirmations have been received */ attrd_confirmation_action_fn fn; /*! * \brief Information required to construct and send a reply to the client */ char *client_id; uint32_t ipc_id; uint32_t flags; /*! * \brief The XML request containing the callid associated with this action */ void *xml; }; static void next_key(void) { do { waitlist_client++; if (waitlist_client < 0) { waitlist_client = 1; } } while (g_hash_table_contains(waitlist, GINT_TO_POINTER(waitlist_client))); } static void free_waitlist_node(gpointer data) { struct waitlist_node *wl = (struct waitlist_node *) data; free(wl->client_id); free(wl); } static const char * sync_point_str(enum attrd_sync_point sync_point) { if (sync_point == attrd_sync_point_local) { return PCMK__VALUE_LOCAL; } else if (sync_point == attrd_sync_point_cluster) { return PCMK__VALUE_CLUSTER; } else { return "unknown"; } } /*! * \internal * \brief Add a client to the attrd waitlist * * Typically, a client receives an ACK for its XML IPC request immediately. However, * some clients want to wait until their request has been processed and taken effect. * This is called a sync point. Any client placed on this waitlist will have its * ACK message delayed until either its requested sync point is hit, or until it * times out. * * The XML IPC request must specify the type of sync point it wants to wait for. * * \param[in,out] request The request describing the client to place on the waitlist. */ void attrd_add_client_to_waitlist(pcmk__request_t *request) { const char *sync_point = attrd_request_sync_point(request->xml); struct waitlist_node *wl = NULL; if (sync_point == NULL) { return; } if (waitlist == NULL) { waitlist = pcmk__intkey_table(free_waitlist_node); } wl = calloc(sizeof(struct waitlist_node), 1); CRM_ASSERT(wl != NULL); wl->client_id = strdup(request->ipc_client->id); CRM_ASSERT(wl->client_id); if (pcmk__str_eq(sync_point, PCMK__VALUE_LOCAL, pcmk__str_none)) { wl->sync_point = attrd_sync_point_local; } else if (pcmk__str_eq(sync_point, PCMK__VALUE_CLUSTER, pcmk__str_none)) { wl->sync_point = attrd_sync_point_cluster; } else { free_waitlist_node(wl); return; } wl->ipc_id = request->ipc_id; wl->flags = request->flags; next_key(); pcmk__intkey_table_insert(waitlist, waitlist_client, wl); crm_trace("Added client %s to waitlist for %s sync point", wl->client_id, sync_point_str(wl->sync_point)); crm_trace("%d clients now on waitlist", g_hash_table_size(waitlist)); /* And then add the key to the request XML so we can uniquely identify * it when it comes time to issue the ACK. */ crm_xml_add_int(request->xml, XML_LRM_ATTR_CALLID, waitlist_client); } /*! * \internal * \brief Free all memory associated with the waitlist. This is most typically * used when attrd shuts down. */ void attrd_free_waitlist(void) { if (waitlist == NULL) { return; } g_hash_table_destroy(waitlist); waitlist = NULL; } /*! * \internal * \brief Unconditionally remove a client from the waitlist, such as when the client * node disconnects from the cluster * * \param[in] client The client to remove */ void attrd_remove_client_from_waitlist(pcmk__client_t *client) { GHashTableIter iter; gpointer value; if (waitlist == NULL) { return; } g_hash_table_iter_init(&iter, waitlist); while (g_hash_table_iter_next(&iter, NULL, &value)) { struct waitlist_node *wl = (struct waitlist_node *) value; if (pcmk__str_eq(wl->client_id, client->id, pcmk__str_none)) { g_hash_table_iter_remove(&iter); crm_trace("%d clients now on waitlist", g_hash_table_size(waitlist)); } } } /*! * \internal * \brief Send an IPC ACK message to all awaiting clients * * This function will search the waitlist for all clients that are currently awaiting * an ACK indicating their attrd operation is complete. Only those clients with a * matching sync point type and callid from their original XML IPC request will be * ACKed. Once they have received an ACK, they will be removed from the waitlist. * * \param[in] sync_point What kind of sync point have we hit? * \param[in] xml The original XML IPC request. */ void attrd_ack_waitlist_clients(enum attrd_sync_point sync_point, const xmlNode *xml) { int callid; gpointer value; if (waitlist == NULL) { return; } if (crm_element_value_int(xml, XML_LRM_ATTR_CALLID, &callid) == -1) { crm_warn("Could not get callid from request XML"); return; } value = pcmk__intkey_table_lookup(waitlist, callid); if (value != NULL) { struct waitlist_node *wl = (struct waitlist_node *) value; pcmk__client_t *client = NULL; if (wl->sync_point != sync_point) { return; } - crm_trace("Alerting client %s for reached %s sync point", - wl->client_id, sync_point_str(wl->sync_point)); + crm_notice("Alerting client %s for reached %s sync point", + wl->client_id, sync_point_str(wl->sync_point)); client = pcmk__find_client_by_id(wl->client_id); if (client == NULL) { return; } attrd_send_ack(client, wl->ipc_id, wl->flags | crm_ipc_client_response); /* And then remove the client so it doesn't get alerted again. */ pcmk__intkey_table_remove(waitlist, callid); crm_trace("%d clients now on waitlist", g_hash_table_size(waitlist)); } } /*! * \internal * \brief Action to take when a cluster sync point is hit for a * PCMK__ATTRD_CMD_UPDATE* message. * * \param[in] xml The request that should be passed along to * attrd_ack_waitlist_clients. This should be the original * IPC request containing the callid for this update message. */ int attrd_cluster_sync_point_update(xmlNode *xml) { crm_trace("Hit cluster sync point for attribute update"); attrd_ack_waitlist_clients(attrd_sync_point_cluster, xml); return pcmk_rc_ok; } /*! * \internal * \brief Return the sync point attribute for an IPC request * * This function will check both the top-level element of \p xml for a sync * point attribute, as well as all of its \p op children, if any. The latter * is useful for newer versions of attrd that can put multiple IPC requests * into a single message. * * \param[in] xml An XML IPC request * * \note It is assumed that if one child element has a sync point attribute, * all will have a sync point attribute and they will all be the same * sync point. No other configuration is supported. * * \return The sync point attribute of \p xml, or NULL if none. */ const char * attrd_request_sync_point(xmlNode *xml) { if (xml_has_children(xml)) { xmlNode *child = pcmk__xe_match(xml, XML_ATTR_OP, PCMK__XA_ATTR_SYNC_POINT, NULL); if (child) { return crm_element_value(child, PCMK__XA_ATTR_SYNC_POINT); } else { return NULL; } } else { return crm_element_value(xml, PCMK__XA_ATTR_SYNC_POINT); } } /*! * \internal * \brief Does an IPC request contain any sync point attribute? * * \param[in] xml An XML IPC request * * \return true if there's a sync point attribute, false otherwise */ bool attrd_request_has_sync_point(xmlNode *xml) { return attrd_request_sync_point(xml) != NULL; } static void free_action(gpointer data) { struct confirmation_action *action = (struct confirmation_action *) data; g_list_free_full(action->respondents, free); mainloop_timer_del(action->timer); free_xml(action->xml); free(action->client_id); free(action); } /* Remove an IPC request from the expected_confirmations table if the peer attrds * don't respond before the timeout is hit. We set the timeout to 15s. The exact * number isn't critical - we just want to make sure that the table eventually gets * cleared of things that didn't complete. */ static gboolean confirmation_timeout_cb(gpointer data) { struct confirmation_action *action = (struct confirmation_action *) data; GHashTableIter iter; gpointer value; if (expected_confirmations == NULL) { return G_SOURCE_REMOVE; } g_hash_table_iter_init(&iter, expected_confirmations); while (g_hash_table_iter_next(&iter, NULL, &value)) { if (value == action) { pcmk__client_t *client = pcmk__find_client_by_id(action->client_id); if (client == NULL) { return G_SOURCE_REMOVE; } crm_trace("Timed out waiting for confirmations for client %s", client->id); pcmk__ipc_send_ack(client, action->ipc_id, action->flags | crm_ipc_client_response, "ack", ATTRD_PROTOCOL_VERSION, CRM_EX_TIMEOUT); g_hash_table_iter_remove(&iter); crm_trace("%d requests now in expected confirmations table", g_hash_table_size(expected_confirmations)); break; } } return G_SOURCE_REMOVE; } /*! * \internal * \brief When a peer disconnects from the cluster, no longer wait for its confirmation * for any IPC action. If this peer is the last one being waited on, this will * trigger the confirmation action. * * \param[in] host The disconnecting peer attrd's uname */ void attrd_do_not_expect_from_peer(const char *host) { GList *keys = NULL; if (expected_confirmations == NULL) { return; } keys = g_hash_table_get_keys(expected_confirmations); crm_trace("Removing peer %s from expected confirmations", host); for (GList *node = keys; node != NULL; node = node->next) { int callid = *(int *) node->data; attrd_handle_confirmation(callid, host); } g_list_free(keys); } /*! * \internal * \brief When a client disconnects from the cluster, no longer wait on confirmations * for it. Because the peer attrds may still be processing the original IPC * message, they may still send us confirmations. However, we will take no * action on them. * * \param[in] client The disconnecting client */ void attrd_do_not_wait_for_client(pcmk__client_t *client) { GHashTableIter iter; gpointer value; if (expected_confirmations == NULL) { return; } g_hash_table_iter_init(&iter, expected_confirmations); while (g_hash_table_iter_next(&iter, NULL, &value)) { struct confirmation_action *action = (struct confirmation_action *) value; if (pcmk__str_eq(action->client_id, client->id, pcmk__str_none)) { crm_trace("Removing client %s from expected confirmations", client->id); g_hash_table_iter_remove(&iter); crm_trace("%d requests now in expected confirmations table", g_hash_table_size(expected_confirmations)); break; } } } /*! * \internal * \brief Register some action to be taken when IPC request confirmations are * received * * When this function is called, a list of all peer attrds that support confirming * requests is generated. As confirmations from these peer attrds are received, * they are removed from this list. When the list is empty, the registered action * will be called. * * \note This function should always be called before attrd_send_message is called * to broadcast to the peers to ensure that we know what replies we are * waiting on. Otherwise, it is possible the peer could finish and confirm * before we know to expect it. * * \param[in] request The request that is awaiting confirmations * \param[in] fn A function to be run after all confirmations are received */ void attrd_expect_confirmations(pcmk__request_t *request, attrd_confirmation_action_fn fn) { struct confirmation_action *action = NULL; GHashTableIter iter; gpointer host, ver; GList *respondents = NULL; int callid; if (expected_confirmations == NULL) { expected_confirmations = pcmk__intkey_table((GDestroyNotify) free_action); } if (crm_element_value_int(request->xml, XML_LRM_ATTR_CALLID, &callid) == -1) { crm_err("Could not get callid from xml"); return; } if (pcmk__intkey_table_lookup(expected_confirmations, callid)) { crm_err("Already waiting on confirmations for call id %d", callid); return; } g_hash_table_iter_init(&iter, peer_protocol_vers); while (g_hash_table_iter_next(&iter, &host, &ver)) { if (ATTRD_SUPPORTS_CONFIRMATION(GPOINTER_TO_INT(ver))) { char *s = strdup((char *) host); CRM_ASSERT(s != NULL); respondents = g_list_prepend(respondents, s); } } action = calloc(1, sizeof(struct confirmation_action)); CRM_ASSERT(action != NULL); action->respondents = respondents; action->fn = fn; action->xml = copy_xml(request->xml); action->client_id = strdup(request->ipc_client->id); CRM_ASSERT(action->client_id != NULL); action->ipc_id = request->ipc_id; action->flags = request->flags; action->timer = mainloop_timer_add(NULL, 15000, FALSE, confirmation_timeout_cb, action); mainloop_timer_start(action->timer); pcmk__intkey_table_insert(expected_confirmations, callid, action); crm_trace("Callid %d now waiting on %d confirmations", callid, g_list_length(respondents)); crm_trace("%d requests now in expected confirmations table", g_hash_table_size(expected_confirmations)); } void attrd_free_confirmations(void) { if (expected_confirmations != NULL) { g_hash_table_destroy(expected_confirmations); expected_confirmations = NULL; } } /*! * \internal * \brief Process a confirmation message from a peer attrd * * This function is called every time a PCMK__ATTRD_CMD_CONFIRM message is * received from a peer attrd. If this is the last confirmation we are waiting * on for a given operation, the registered action will be called. * * \param[in] callid The unique callid for the XML IPC request * \param[in] host The confirming peer attrd's uname */ void attrd_handle_confirmation(int callid, const char *host) { struct confirmation_action *action = NULL; GList *node = NULL; if (expected_confirmations == NULL) { return; } action = pcmk__intkey_table_lookup(expected_confirmations, callid); if (action == NULL) { return; } node = g_list_find_custom(action->respondents, host, (GCompareFunc) strcasecmp); if (node == NULL) { return; } action->respondents = g_list_remove(action->respondents, node->data); crm_trace("Callid %d now waiting on %d confirmations", callid, g_list_length(action->respondents)); if (action->respondents == NULL) { action->fn(action->xml); pcmk__intkey_table_remove(expected_confirmations, callid); crm_trace("%d requests now in expected confirmations table", g_hash_table_size(expected_confirmations)); } }