diff --git a/daemons/attrd/attrd_attributes.c b/daemons/attrd/attrd_attributes.c index a808caeb8e..1ee0397d8b 100644 --- a/daemons/attrd/attrd_attributes.c +++ b/daemons/attrd/attrd_attributes.c @@ -1,284 +1,284 @@ /* * Copyright 2013-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include "pacemaker-attrd.h" static attribute_t * attrd_create_attribute(xmlNode *xml) { int is_private = 0; long long dampen = 0; const char *name = pcmk__xe_get(xml, PCMK__XA_ATTR_NAME); const char *set_type = pcmk__xe_get(xml, PCMK__XA_ATTR_SET_TYPE); const char *dampen_s = pcmk__xe_get(xml, PCMK__XA_ATTR_DAMPENING); attribute_t *a = NULL; if (set_type == NULL) { set_type = PCMK_XE_INSTANCE_ATTRIBUTES; } /* Set type is meaningful only when writing to the CIB. Private * attributes are not written. */ pcmk__xe_get_int(xml, PCMK__XA_ATTR_IS_PRIVATE, &is_private); if (!is_private && !pcmk__str_any_of(set_type, PCMK_XE_INSTANCE_ATTRIBUTES, PCMK_XE_UTILIZATION, NULL)) { crm_warn("Ignoring attribute %s with invalid set type %s", pcmk__s(name, "(unidentified)"), set_type); return NULL; } a = pcmk__assert_alloc(1, sizeof(attribute_t)); a->id = pcmk__str_copy(name); a->set_type = pcmk__str_copy(set_type); a->set_id = pcmk__xe_get_copy(xml, PCMK__XA_ATTR_SET); a->user = pcmk__xe_get_copy(xml, PCMK__XA_ATTR_USER); a->values = pcmk__strikey_table(NULL, attrd_free_attribute_value); if (is_private) { attrd_set_attr_flags(a, attrd_attr_is_private); } if (dampen_s != NULL) { dampen = crm_get_msec(dampen_s); } if (dampen > 0) { a->timeout_ms = (int) QB_MIN(dampen, INT_MAX); a->timer = attrd_add_timer(a->id, a->timeout_ms, a); } else if (dampen < 0) { crm_warn("Ignoring invalid delay %s for attribute %s", dampen_s, a->id); } crm_trace("Created attribute %s with %s write delay and %s CIB user", a->id, ((dampen > 0)? pcmk__readable_interval(a->timeout_ms) : "no"), pcmk__s(a->user, "default")); g_hash_table_replace(attributes, a->id, a); return a; } static int attrd_update_dampening(attribute_t *a, xmlNode *xml, const char *attr) { const char *dvalue = pcmk__xe_get(xml, PCMK__XA_ATTR_DAMPENING); long long dampen = 0; if (dvalue == NULL) { crm_warn("Could not update %s: peer did not specify value for delay", attr); return EINVAL; } dampen = crm_get_msec(dvalue); if (dampen < 0) { crm_warn("Could not update %s: invalid delay value %dms (%s)", attr, dampen, dvalue); return EINVAL; } if (a->timeout_ms != dampen) { mainloop_timer_del(a->timer); a->timeout_ms = (int) QB_MIN(dampen, INT_MAX); if (dampen > 0) { a->timer = attrd_add_timer(attr, a->timeout_ms, a); crm_info("Update attribute %s delay to %dms (%s)", attr, dampen, dvalue); } else { a->timer = NULL; crm_info("Update attribute %s to remove delay", attr); } /* If dampening changed, do an immediate write-out, * otherwise repeated dampening changes would prevent write-outs */ attrd_write_or_elect_attribute(a); } return pcmk_rc_ok; } GHashTable *attributes = NULL; /*! * \internal * \brief Create an XML representation of an attribute for use in peer messages * * \param[in,out] parent Create attribute XML as child element of this * \param[in] a Attribute to represent * \param[in] v Attribute value to represent * \param[in] force_write If true, value should be written even if unchanged * * \return XML representation of attribute */ xmlNode * attrd_add_value_xml(xmlNode *parent, const attribute_t *a, const attribute_value_t *v, bool force_write) { xmlNode *xml = pcmk__xe_create(parent, __func__); crm_xml_add(xml, PCMK__XA_ATTR_NAME, a->id); crm_xml_add(xml, PCMK__XA_ATTR_SET_TYPE, a->set_type); crm_xml_add(xml, PCMK__XA_ATTR_SET, a->set_id); crm_xml_add(xml, PCMK__XA_ATTR_USER, a->user); crm_xml_add(xml, PCMK__XA_ATTR_HOST, v->nodename); /* @COMPAT Prior to 2.1.10 and 3.0.1, the node's cluster ID was added * instead of its XML ID. For Corosync and Pacemaker Remote nodes, those are * the same, but if we ever support node XML IDs that differ from their * cluster IDs, we will have to drop support for rolling upgrades from * versions before those. */ crm_xml_add(xml, PCMK__XA_ATTR_HOST_ID, attrd_get_node_xml_id(v->nodename)); crm_xml_add(xml, PCMK__XA_ATTR_VALUE, v->current); - crm_xml_add_int(xml, PCMK__XA_ATTR_DAMPENING, - pcmk__timeout_ms2s(a->timeout_ms)); - crm_xml_add_int(xml, PCMK__XA_ATTR_IS_PRIVATE, - pcmk_is_set(a->flags, attrd_attr_is_private)); - crm_xml_add_int(xml, PCMK__XA_ATTR_IS_REMOTE, - pcmk_is_set(v->flags, attrd_value_remote)); - crm_xml_add_int(xml, PCMK__XA_ATTRD_IS_FORCE_WRITE, force_write); + pcmk__xe_set_int(xml, PCMK__XA_ATTR_DAMPENING, + pcmk__timeout_ms2s(a->timeout_ms)); + pcmk__xe_set_int(xml, PCMK__XA_ATTR_IS_PRIVATE, + pcmk_is_set(a->flags, attrd_attr_is_private)); + pcmk__xe_set_int(xml, PCMK__XA_ATTR_IS_REMOTE, + pcmk_is_set(v->flags, attrd_value_remote)); + pcmk__xe_set_int(xml, PCMK__XA_ATTRD_IS_FORCE_WRITE, force_write); return xml; } void attrd_clear_value_seen(void) { GHashTableIter aIter; GHashTableIter vIter; attribute_t *a; attribute_value_t *v = NULL; g_hash_table_iter_init(&aIter, attributes); while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) { g_hash_table_iter_init(&vIter, a->values); while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) { attrd_clear_value_flags(v, attrd_value_from_peer); } } } attribute_t * attrd_populate_attribute(xmlNode *xml, const char *attr) { attribute_t *a = NULL; bool update_both = false; const char *op = pcmk__xe_get(xml, PCMK_XA_TASK); // NULL because PCMK__ATTRD_CMD_SYNC_RESPONSE has no PCMK_XA_TASK update_both = pcmk__str_eq(op, PCMK__ATTRD_CMD_UPDATE_BOTH, pcmk__str_null_matches); // Look up or create attribute entry a = g_hash_table_lookup(attributes, attr); if (a == NULL) { if (update_both || pcmk__str_eq(op, PCMK__ATTRD_CMD_UPDATE, pcmk__str_none)) { a = attrd_create_attribute(xml); if (a == NULL) { return NULL; } } else { crm_warn("Could not update %s: attribute not found", attr); return NULL; } } // Update attribute dampening if (update_both || pcmk__str_eq(op, PCMK__ATTRD_CMD_UPDATE_DELAY, pcmk__str_none)) { int rc = attrd_update_dampening(a, xml, attr); if (rc != pcmk_rc_ok || !update_both) { return NULL; } } return a; } /*! * \internal * \brief Get the XML ID used to write out an attribute set * * \param[in] attr Attribute to get set ID for * \param[in] node_state_id XML ID of node state that attribute value is for * * \return Newly allocated string with XML ID to use for \p attr set */ char * attrd_set_id(const attribute_t *attr, const char *node_state_id) { char *set_id = NULL; pcmk__assert((attr != NULL) && (node_state_id != NULL)); if (pcmk__str_empty(attr->set_id)) { /* @COMPAT This should really take the set type into account. Currently * we use the same XML ID for transient attributes and utilization * attributes. It doesn't cause problems because the status section is * not limited by the schema in any way, but it's still unfortunate. * For backward compatibility reasons, we can't change this. */ set_id = crm_strdup_printf("%s-%s", PCMK_XE_STATUS, node_state_id); } else { /* @COMPAT When the user specifies a set ID for an attribute, it is the * same for every node. That is less than ideal, but again, the schema * doesn't enforce anything for the status section. We couldn't change * it without allowing the set ID to vary per value rather than per * attribute, which would break backward compatibility, pose design * challenges, and potentially cause problems in rolling upgrades. */ set_id = pcmk__str_copy(attr->set_id); } pcmk__xml_sanitize_id(set_id); return set_id; } /*! * \internal * \brief Get the XML ID used to write out an attribute value * * \param[in] attr Attribute to get value XML ID for * \param[in] node_state_id UUID of node that attribute value is for * * \return Newly allocated string with XML ID of \p attr value */ char * attrd_nvpair_id(const attribute_t *attr, const char *node_state_id) { char *nvpair_id = NULL; if (attr->set_id != NULL) { nvpair_id = crm_strdup_printf("%s-%s", attr->set_id, attr->id); } else { nvpair_id = crm_strdup_printf(PCMK_XE_STATUS "-%s-%s", node_state_id, attr->id); } pcmk__xml_sanitize_id(nvpair_id); return nvpair_id; } diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c index 960c736fdd..f9102abc8b 100644 --- a/daemons/attrd/attrd_corosync.c +++ b/daemons/attrd/attrd_corosync.c @@ -1,615 +1,615 @@ /* * Copyright 2013-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include "pacemaker-attrd.h" static xmlNode * attrd_confirmation(int callid) { xmlNode *node = pcmk__xe_create(NULL, __func__); crm_xml_add(node, PCMK__XA_T, PCMK__VALUE_ATTRD); crm_xml_add(node, PCMK__XA_SRC, pcmk__cluster_local_node_name()); crm_xml_add(node, PCMK_XA_TASK, PCMK__ATTRD_CMD_CONFIRM); - crm_xml_add_int(node, PCMK__XA_CALL_ID, callid); + pcmk__xe_set_int(node, PCMK__XA_CALL_ID, callid); return node; } static void attrd_peer_message(pcmk__node_status_t *peer, xmlNode *xml) { const char *election_op = pcmk__xe_get(xml, PCMK__XA_CRM_TASK); if (election_op) { attrd_handle_election_op(peer, xml); return; } if (attrd_shutting_down(false)) { /* If we're shutting down, we want to continue responding to election * ops as long as we're a cluster member (because our vote may be * needed). Ignore all other messages. */ return; } else { pcmk__request_t request = { .ipc_client = NULL, .ipc_id = 0, .ipc_flags = 0, .peer = peer->name, .xml = xml, .call_options = 0, .result = PCMK__UNKNOWN_RESULT, }; request.op = pcmk__xe_get_copy(request.xml, PCMK_XA_TASK); CRM_CHECK(request.op != NULL, return); attrd_handle_request(&request); /* Having finished handling the request, check to see if the originating * peer requested confirmation. If so, send that confirmation back now. */ if (pcmk__xe_attr_is_true(xml, PCMK__XA_CONFIRM) && !pcmk__str_eq(request.op, PCMK__ATTRD_CMD_CONFIRM, pcmk__str_none)) { int callid = 0; xmlNode *reply = NULL; /* Add the confirmation ID for the message we are confirming to the * response so the originating peer knows what they're a confirmation * for. */ pcmk__xe_get_int(xml, PCMK__XA_CALL_ID, &callid); reply = attrd_confirmation(callid); /* And then send the confirmation back to the originating peer. This * ends up right back in this same function (attrd_peer_message) on the * peer where it will have to do something with a PCMK__XA_CONFIRM type * message. */ crm_debug("Sending %s a confirmation", peer->name); attrd_send_message(peer, reply, false); pcmk__xml_free(reply); } pcmk__reset_request(&request); } } static void attrd_cpg_dispatch(cpg_handle_t handle, const struct cpg_name *groupName, uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) { xmlNode *xml = NULL; const char *from = NULL; char *data = pcmk__cpg_message_data(handle, nodeid, pid, msg, &from); if(data == NULL) { return; } xml = pcmk__xml_parse(data); if (xml == NULL) { crm_err("Bad message received from %s[%u]: '%.120s'", from, nodeid, data); } else { attrd_peer_message(pcmk__get_node(nodeid, from, NULL, pcmk__node_search_cluster_member), xml); } pcmk__xml_free(xml); free(data); } static void attrd_cpg_destroy(gpointer unused) { if (attrd_shutting_down(false)) { crm_info("Disconnected from Corosync process group"); } else { crm_crit("Lost connection to Corosync process group, shutting down"); attrd_exit_status = CRM_EX_DISCONNECT; attrd_shutdown(0); } } /*! * \internal * \brief Broadcast an update for a single attribute value * * \param[in] a Attribute to broadcast * \param[in] v Attribute value to broadcast */ void attrd_broadcast_value(const attribute_t *a, const attribute_value_t *v) { xmlNode *op = pcmk__xe_create(NULL, PCMK_XE_OP); crm_xml_add(op, PCMK_XA_TASK, PCMK__ATTRD_CMD_UPDATE); attrd_add_value_xml(op, a, v, false); attrd_send_message(NULL, op, false); pcmk__xml_free(op); } #define state_text(state) pcmk__s((state), "in unknown state") static void attrd_peer_change_cb(enum pcmk__node_update kind, pcmk__node_status_t *peer, const void *data) { bool gone = false; bool is_remote = pcmk_is_set(peer->flags, pcmk__node_status_remote); switch (kind) { case pcmk__node_update_name: crm_debug("%s node %s is now %s", (is_remote? "Remote" : "Cluster"), peer->name, state_text(peer->state)); break; case pcmk__node_update_processes: if (!pcmk_is_set(peer->processes, crm_get_cluster_proc())) { gone = true; } crm_debug("Node %s is %s a peer", peer->name, (gone? "no longer" : "now")); break; case pcmk__node_update_state: crm_debug("%s node %s is now %s (was %s)", (is_remote? "Remote" : "Cluster"), peer->name, state_text(peer->state), state_text(data)); if (pcmk__str_eq(peer->state, PCMK_VALUE_MEMBER, pcmk__str_none)) { /* If we're the writer, send new peers a list of all attributes * (unless it's a remote node, which doesn't run its own attrd) */ if (attrd_election_won() && !pcmk_is_set(peer->flags, pcmk__node_status_remote)) { attrd_peer_sync(peer); } } else { // Remove all attribute values associated with lost nodes attrd_peer_remove(peer->name, false, "loss"); gone = true; } break; } // Remove votes from cluster nodes that leave, in case election in progress if (gone && !is_remote) { attrd_remove_voter(peer); attrd_remove_peer_protocol_ver(peer->name); attrd_do_not_expect_from_peer(peer->name); } } #define readable_value(rv_v) pcmk__s((rv_v)->current, "(unset)") #define readable_peer(p) \ (((p) == NULL)? "all peers" : pcmk__s((p)->name, "unknown peer")) static void update_attr_on_host(attribute_t *a, const pcmk__node_status_t *peer, const xmlNode *xml, const char *attr, const char *value, const char *host, bool filter) { int is_remote = 0; bool changed = false; attribute_value_t *v = NULL; const char *prev_xml_id = NULL; const char *node_xml_id = pcmk__xe_get(xml, PCMK__XA_ATTR_HOST_ID); // Create entry for value if not already existing v = g_hash_table_lookup(a->values, host); if (v == NULL) { v = pcmk__assert_alloc(1, sizeof(attribute_value_t)); v->nodename = pcmk__str_copy(host); g_hash_table_replace(a->values, v->nodename, v); } /* If update doesn't contain the node XML ID, fall back to any previously * known value (for logging) */ prev_xml_id = attrd_get_node_xml_id(v->nodename); if (node_xml_id == NULL) { node_xml_id = prev_xml_id; } // If value is for a Pacemaker Remote node, remember that pcmk__xe_get_int(xml, PCMK__XA_ATTR_IS_REMOTE, &is_remote); if (is_remote) { attrd_set_value_flags(v, attrd_value_remote); pcmk__assert(pcmk__cluster_lookup_remote_node(host) != NULL); } // Check whether the value changed changed = !pcmk__str_eq(v->current, value, pcmk__str_casei); if (changed && filter && pcmk__str_eq(host, attrd_cluster->priv->node_name, pcmk__str_casei)) { /* Broadcast the local value for an attribute that differs from the * value provided in a peer's attribute synchronization response. This * ensures a node's values for itself take precedence and all peers are * kept in sync. */ v = g_hash_table_lookup(a->values, attrd_cluster->priv->node_name); crm_notice("%s[%s]: local value '%s' takes priority over '%s' from %s", attr, host, readable_value(v), value, peer->name); attrd_broadcast_value(a, v); } else if (changed) { crm_notice("Setting %s[%s]%s%s: %s -> %s " QB_XS " from %s with %s write delay and node XML ID %s", attr, host, a->set_type ? " in " : "", pcmk__s(a->set_type, ""), readable_value(v), pcmk__s(value, "(unset)"), peer->name, (a->timeout_ms == 0)? "no" : pcmk__readable_interval(a->timeout_ms), pcmk__s(node_xml_id, "unknown")); pcmk__str_update(&v->current, value); attrd_set_attr_flags(a, attrd_attr_changed); if (pcmk__str_eq(host, attrd_cluster->priv->node_name, pcmk__str_casei) && pcmk__str_eq(attr, PCMK__NODE_ATTR_SHUTDOWN, pcmk__str_none)) { if (!pcmk__str_eq(value, "0", pcmk__str_null_matches)) { attrd_set_requesting_shutdown(); } else { attrd_clear_requesting_shutdown(); } } // Write out new value or start dampening timer if (a->timeout_ms && a->timer) { crm_trace("Delaying write of %s %s for dampening", attr, pcmk__readable_interval(a->timeout_ms)); mainloop_timer_start(a->timer); } else { attrd_write_or_elect_attribute(a); } } else { int is_force_write = 0; pcmk__xe_get_int(xml, PCMK__XA_ATTRD_IS_FORCE_WRITE, &is_force_write); if (is_force_write == 1 && a->timeout_ms && a->timer) { /* Save forced writing and set change flag. */ /* The actual attribute is written by Writer after election. */ crm_trace("%s[%s] from %s is unchanged (%s), forcing write", attr, host, peer->name, pcmk__s(value, "unset")); attrd_set_attr_flags(a, attrd_attr_force_write); } else { crm_trace("%s[%s] from %s is unchanged (%s)", attr, host, peer->name, pcmk__s(value, "unset")); } } // This allows us to later detect local values that peer doesn't know about attrd_set_value_flags(v, attrd_value_from_peer); // Remember node's XML ID if we're just learning it if ((node_xml_id != NULL) && !pcmk__str_eq(node_xml_id, prev_xml_id, pcmk__str_none)) { crm_trace("Learned %s[%s] node XML ID is %s (was %s)", a->id, v->nodename, node_xml_id, pcmk__s(prev_xml_id, "unknown")); attrd_set_node_xml_id(v->nodename, node_xml_id); if (attrd_election_won()) { // In case we couldn't write a value missing the XML ID before attrd_write_attributes(attrd_write_changed); } } } static void attrd_peer_update_one(const pcmk__node_status_t *peer, xmlNode *xml, bool filter) { attribute_t *a = NULL; const char *attr = pcmk__xe_get(xml, PCMK__XA_ATTR_NAME); const char *value = pcmk__xe_get(xml, PCMK__XA_ATTR_VALUE); const char *host = pcmk__xe_get(xml, PCMK__XA_ATTR_HOST); if (attr == NULL) { crm_warn("Could not update attribute: peer did not specify name"); return; } a = attrd_populate_attribute(xml, attr); if (a == NULL) { return; } if (host == NULL) { // If no host was specified, update all hosts GHashTableIter vIter; crm_debug("Setting %s for all hosts to %s", attr, value); pcmk__xe_remove_attr(xml, PCMK__XA_ATTR_HOST_ID); g_hash_table_iter_init(&vIter, a->values); while (g_hash_table_iter_next(&vIter, (gpointer *) & host, NULL)) { update_attr_on_host(a, peer, xml, attr, value, host, filter); } } else { // Update attribute value for the given host update_attr_on_host(a, peer, xml, attr, value, host, filter); } /* If this is a message from some attrd instance broadcasting its protocol * version, check to see if it's a new minimum version. */ if (pcmk__str_eq(attr, CRM_ATTR_PROTOCOL, pcmk__str_none)) { attrd_update_minimum_protocol_ver(peer->name, value); } } static void broadcast_unseen_local_values(void) { GHashTableIter aIter; GHashTableIter vIter; attribute_t *a = NULL; attribute_value_t *v = NULL; xmlNode *sync = NULL; g_hash_table_iter_init(&aIter, attributes); while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) { g_hash_table_iter_init(&vIter, a->values); while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) { if (!pcmk_is_set(v->flags, attrd_value_from_peer) && pcmk__str_eq(v->nodename, attrd_cluster->priv->node_name, pcmk__str_casei)) { crm_trace("* %s[%s]='%s' is local-only", a->id, v->nodename, readable_value(v)); if (sync == NULL) { sync = pcmk__xe_create(NULL, __func__); crm_xml_add(sync, PCMK_XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE); } attrd_add_value_xml(sync, a, v, a->timeout_ms && a->timer); } } } if (sync != NULL) { crm_debug("Broadcasting local-only values"); attrd_send_message(NULL, sync, false); pcmk__xml_free(sync); } } int attrd_cluster_connect(void) { int rc = pcmk_rc_ok; attrd_cluster = pcmk_cluster_new(); pcmk_cluster_set_destroy_fn(attrd_cluster, attrd_cpg_destroy); pcmk_cpg_set_deliver_fn(attrd_cluster, attrd_cpg_dispatch); pcmk_cpg_set_confchg_fn(attrd_cluster, pcmk__cpg_confchg_cb); pcmk__cluster_set_status_callback(&attrd_peer_change_cb); rc = pcmk_cluster_connect(attrd_cluster); rc = pcmk_rc2legacy(rc); if (rc != pcmk_ok) { crm_err("Cluster connection failed"); return rc; } return pcmk_ok; } void attrd_peer_clear_failure(pcmk__request_t *request) { xmlNode *xml = request->xml; const char *rsc = pcmk__xe_get(xml, PCMK__XA_ATTR_RESOURCE); const char *host = pcmk__xe_get(xml, PCMK__XA_ATTR_HOST); const char *op = pcmk__xe_get(xml, PCMK__XA_ATTR_CLEAR_OPERATION); const char *interval_spec = pcmk__xe_get(xml, PCMK__XA_ATTR_CLEAR_INTERVAL); guint interval_ms = 0U; char *attr = NULL; GHashTableIter iter; regex_t regex; pcmk__node_status_t *peer = pcmk__get_node(0, request->peer, NULL, pcmk__node_search_cluster_member); pcmk_parse_interval_spec(interval_spec, &interval_ms); if (attrd_failure_regex(®ex, rsc, op, interval_ms) != pcmk_ok) { crm_info("Ignoring invalid request to clear failures for %s", pcmk__s(rsc, "all resources")); return; } crm_xml_add(xml, PCMK_XA_TASK, PCMK__ATTRD_CMD_UPDATE); /* Make sure value is not set, so we delete */ pcmk__xe_remove_attr(xml, PCMK__XA_ATTR_VALUE); g_hash_table_iter_init(&iter, attributes); while (g_hash_table_iter_next(&iter, (gpointer *) &attr, NULL)) { if (regexec(®ex, attr, 0, NULL, 0) == 0) { crm_trace("Matched %s when clearing %s", attr, pcmk__s(rsc, "all resources")); crm_xml_add(xml, PCMK__XA_ATTR_NAME, attr); attrd_peer_update(peer, xml, host, false); } } regfree(®ex); } /*! * \internal * \brief Load attributes from a peer sync response * * \param[in] peer Peer that sent sync response * \param[in] peer_won Whether peer is the attribute writer * \param[in,out] xml Request XML */ void attrd_peer_sync_response(const pcmk__node_status_t *peer, bool peer_won, xmlNode *xml) { crm_info("Processing " PCMK__ATTRD_CMD_SYNC_RESPONSE " from %s", peer->name); if (peer_won) { /* Initialize the "seen" flag for all attributes to cleared, so we can * detect attributes that local node has but the writer doesn't. */ attrd_clear_value_seen(); } // Process each attribute update in the sync response for (xmlNode *child = pcmk__xe_first_child(xml, NULL, NULL, NULL); child != NULL; child = pcmk__xe_next(child, NULL)) { attrd_peer_update(peer, child, pcmk__xe_get(child, PCMK__XA_ATTR_HOST), true); } if (peer_won) { /* If any attributes are still not marked as seen, the writer doesn't * know about them, so send all peers an update with them. */ broadcast_unseen_local_values(); } } /*! * \internal * \brief Remove all attributes and optionally peer cache entries for a node * * \param[in] host Name of node to purge * \param[in] uncache If true, remove node from peer caches * \param[in] source Who requested removal (only used for logging) */ void attrd_peer_remove(const char *host, bool uncache, const char *source) { attribute_t *a = NULL; GHashTableIter aIter; CRM_CHECK(host != NULL, return); crm_notice("Removing all %s attributes for node %s " QB_XS " %s reaping node from cache", host, source, (uncache? "and" : "without")); g_hash_table_iter_init(&aIter, attributes); while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) { if(g_hash_table_remove(a->values, host)) { crm_debug("Removed %s[%s] for peer %s", a->id, host, source); } } if (uncache) { pcmk__purge_node_from_cache(host, 0); attrd_forget_node_xml_id(host); } } /*! * \internal * \brief Send all known attributes and values to a peer * * \param[in] peer Peer to send sync to (if NULL, broadcast to all peers) */ void attrd_peer_sync(pcmk__node_status_t *peer) { GHashTableIter aIter; GHashTableIter vIter; attribute_t *a = NULL; attribute_value_t *v = NULL; xmlNode *sync = pcmk__xe_create(NULL, __func__); crm_xml_add(sync, PCMK_XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE); g_hash_table_iter_init(&aIter, attributes); while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) { g_hash_table_iter_init(&vIter, a->values); while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) { crm_debug("Syncing %s[%s]='%s' to %s", a->id, v->nodename, readable_value(v), readable_peer(peer)); attrd_add_value_xml(sync, a, v, false); } } crm_debug("Syncing values to %s", readable_peer(peer)); attrd_send_message(peer, sync, false); pcmk__xml_free(sync); } void attrd_peer_update(const pcmk__node_status_t *peer, xmlNode *xml, const char *host, bool filter) { bool handle_sync_point = false; CRM_CHECK((peer != NULL) && (xml != NULL), return); if (xml->children != NULL) { for (xmlNode *child = pcmk__xe_first_child(xml, PCMK_XE_OP, NULL, NULL); child != NULL; child = pcmk__xe_next(child, PCMK_XE_OP)) { pcmk__xe_copy_attrs(child, xml, pcmk__xaf_no_overwrite); attrd_peer_update_one(peer, child, filter); if (attrd_request_has_sync_point(child)) { handle_sync_point = true; } } } else { attrd_peer_update_one(peer, xml, filter); if (attrd_request_has_sync_point(xml)) { handle_sync_point = true; } } /* If the update XML specified that the client wanted to wait for a sync * point, process that now. */ if (handle_sync_point) { crm_trace("Hit local sync point for attribute update"); attrd_ack_waitlist_clients(attrd_sync_point_local, xml); } } diff --git a/daemons/attrd/attrd_elections.c b/daemons/attrd/attrd_elections.c index 87b79dcf6b..dc5aea8c8f 100644 --- a/daemons/attrd/attrd_elections.c +++ b/daemons/attrd/attrd_elections.c @@ -1,176 +1,176 @@ /* * Copyright 2013-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include "pacemaker-attrd.h" static char *peer_writer = NULL; static void attrd_election_cb(pcmk_cluster_t *cluster) { attrd_declare_winner(); /* Update the peers after an election */ attrd_peer_sync(NULL); /* After winning an election, update the CIB with the values of all * attributes as the winner knows them. */ attrd_write_attributes(attrd_write_all); } void attrd_election_init(void) { election_init(attrd_cluster, attrd_election_cb); } void attrd_start_election_if_needed(void) { if ((peer_writer == NULL) && (election_state(attrd_cluster) != election_in_progress) && !attrd_shutting_down(false)) { crm_info("Starting an election to determine the writer"); election_vote(attrd_cluster); } } bool attrd_election_won(void) { return (election_state(attrd_cluster) == election_won); } void attrd_handle_election_op(const pcmk__node_status_t *peer, xmlNode *xml) { enum election_result rc = 0; enum election_result previous = election_state(attrd_cluster); crm_xml_add(xml, PCMK__XA_SRC, peer->name); // Don't become writer if we're shutting down rc = election_count_vote(attrd_cluster, xml, !attrd_shutting_down(false)); switch(rc) { case election_start: crm_debug("Unsetting writer (was %s) and starting new election", peer_writer? peer_writer : "unset"); free(peer_writer); peer_writer = NULL; election_vote(attrd_cluster); break; case election_lost: /* The election API should really distinguish between "we just lost * to this peer" and "we already lost previously, and we are * discarding this vote for some reason", but it doesn't. * * In the first case, we want to tentatively set the peer writer to * this peer, even though another peer may eventually win (which we * will learn via attrd_check_for_new_writer()), so * attrd_start_election_if_needed() doesn't start a new election. * * Approximate a test for that case as best as possible. */ if ((peer_writer == NULL) || (previous != election_lost)) { pcmk__str_update(&peer_writer, peer->name); crm_debug("Election lost, presuming %s is writer for now", peer_writer); } break; case election_in_progress: election_check(attrd_cluster); break; default: crm_info("Ignoring election op from %s due to error", peer->name); break; } } bool attrd_check_for_new_writer(const pcmk__node_status_t *peer, const xmlNode *xml) { int peer_state = 0; pcmk__xe_get_int(xml, PCMK__XA_ATTR_WRITER, &peer_state); if (peer_state == election_won) { if ((election_state(attrd_cluster) == election_won) && !pcmk__str_eq(peer->name, attrd_cluster->priv->node_name, pcmk__str_casei)) { crm_notice("Detected another attribute writer (%s), starting new " "election", peer->name); election_vote(attrd_cluster); } else if (!pcmk__str_eq(peer->name, peer_writer, pcmk__str_casei)) { crm_notice("Recorded new attribute writer: %s (was %s)", peer->name, pcmk__s(peer_writer, "unset")); pcmk__str_update(&peer_writer, peer->name); } } return (peer_state == election_won); } void attrd_declare_winner(void) { crm_notice("Recorded local node as attribute writer (was %s)", (peer_writer? peer_writer : "unset")); pcmk__str_update(&peer_writer, attrd_cluster->priv->node_name); } void attrd_remove_voter(const pcmk__node_status_t *peer) { election_remove(attrd_cluster, peer->name); if ((peer_writer != NULL) && pcmk__str_eq(peer->name, peer_writer, pcmk__str_casei)) { free(peer_writer); peer_writer = NULL; crm_notice("Lost attribute writer %s", peer->name); /* Clear any election dampening in effect. Otherwise, if the lost writer * had just won, the election could fizzle out with no new writer. */ election_clear_dampening(attrd_cluster); /* If the writer received attribute updates during its shutdown, it will * not have written them to the CIB. Ensure we get a new writer so they * are written out. This means that every node that sees the writer * leave will start a new election, but that's better than losing * attributes. */ attrd_start_election_if_needed(); /* If an election is in progress, we need to call election_check(), in case * this lost peer is the only one that hasn't voted, otherwise the election * would be pending until it's timed out. */ } else if (election_state(attrd_cluster) == election_in_progress) { crm_debug("Checking election status upon loss of voter %s", peer->name); election_check(attrd_cluster); } } void attrd_xml_add_writer(xmlNode *xml) { - crm_xml_add_int(xml, PCMK__XA_ATTR_WRITER, election_state(attrd_cluster)); + pcmk__xe_set_int(xml, PCMK__XA_ATTR_WRITER, election_state(attrd_cluster)); } diff --git a/daemons/attrd/attrd_ipc.c b/daemons/attrd/attrd_ipc.c index f88abf90c4..75db2933ae 100644 --- a/daemons/attrd/attrd_ipc.c +++ b/daemons/attrd/attrd_ipc.c @@ -1,629 +1,629 @@ /* * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include // PRIu32 #include #include #include #include #include #include #include #include #include #include #include "pacemaker-attrd.h" static qb_ipcs_service_t *ipcs = NULL; /*! * \internal * \brief Build the XML reply to a client query * * \param[in] attr Name of requested attribute * \param[in] host Name of requested host (or NULL for all hosts) * * \return New XML reply * \note Caller is responsible for freeing the resulting XML */ static xmlNode *build_query_reply(const char *attr, const char *host) { xmlNode *reply = pcmk__xe_create(NULL, __func__); attribute_t *a; crm_xml_add(reply, PCMK__XA_T, PCMK__VALUE_ATTRD); crm_xml_add(reply, PCMK__XA_SUBT, PCMK__ATTRD_CMD_QUERY); crm_xml_add(reply, PCMK__XA_ATTR_VERSION, ATTRD_PROTOCOL_VERSION); /* If desired attribute exists, add its value(s) to the reply */ a = g_hash_table_lookup(attributes, attr); if (a) { attribute_value_t *v; xmlNode *host_value; crm_xml_add(reply, PCMK__XA_ATTR_NAME, attr); /* Allow caller to use "localhost" to refer to local node */ if (pcmk__str_eq(host, "localhost", pcmk__str_casei)) { host = attrd_cluster->priv->node_name; crm_trace("Mapped localhost to %s", host); } /* If a specific node was requested, add its value */ if (host) { v = g_hash_table_lookup(a->values, host); host_value = pcmk__xe_create(reply, PCMK_XE_NODE); crm_xml_add(host_value, PCMK__XA_ATTR_HOST, host); crm_xml_add(host_value, PCMK__XA_ATTR_VALUE, (v? v->current : NULL)); /* Otherwise, add all nodes' values */ } else { GHashTableIter iter; g_hash_table_iter_init(&iter, a->values); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &v)) { host_value = pcmk__xe_create(reply, PCMK_XE_NODE); crm_xml_add(host_value, PCMK__XA_ATTR_HOST, v->nodename); crm_xml_add(host_value, PCMK__XA_ATTR_VALUE, v->current); } } } return reply; } xmlNode * attrd_client_clear_failure(pcmk__request_t *request) { xmlNode *xml = request->xml; const char *rsc, *op, *interval_spec; if (minimum_protocol_version >= 2) { /* Propagate to all peers (including ourselves). * This ends up at attrd_peer_message(). */ attrd_send_message(NULL, xml, false); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return NULL; } rsc = pcmk__xe_get(xml, PCMK__XA_ATTR_RESOURCE); op = pcmk__xe_get(xml, PCMK__XA_ATTR_CLEAR_OPERATION); interval_spec = pcmk__xe_get(xml, PCMK__XA_ATTR_CLEAR_INTERVAL); /* Map this to an update */ crm_xml_add(xml, PCMK_XA_TASK, PCMK__ATTRD_CMD_UPDATE); /* Add regular expression matching desired attributes */ if (rsc) { char *pattern; if (op == NULL) { pattern = crm_strdup_printf(ATTRD_RE_CLEAR_ONE, rsc); } else { guint interval_ms = 0U; pcmk_parse_interval_spec(interval_spec, &interval_ms); pattern = crm_strdup_printf(ATTRD_RE_CLEAR_OP, rsc, op, interval_ms); } crm_xml_add(xml, PCMK__XA_ATTR_REGEX, pattern); free(pattern); } else { crm_xml_add(xml, PCMK__XA_ATTR_REGEX, ATTRD_RE_CLEAR_ALL); } /* Make sure attribute and value are not set, so we delete via regex */ pcmk__xe_remove_attr(xml, PCMK__XA_ATTR_NAME); pcmk__xe_remove_attr(xml, PCMK__XA_ATTR_VALUE); return attrd_client_update(request); } xmlNode * attrd_client_peer_remove(pcmk__request_t *request) { xmlNode *xml = request->xml; // Host and ID are not used in combination, rather host has precedence const char *host = pcmk__xe_get(xml, PCMK__XA_ATTR_HOST); char *host_alloc = NULL; attrd_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags); if (host == NULL) { int nodeid = 0; pcmk__xe_get_int(xml, PCMK__XA_ATTR_HOST_ID, &nodeid); if (nodeid > 0) { pcmk__node_status_t *node = NULL; char *host_alloc = NULL; node = pcmk__search_node_caches(nodeid, NULL, NULL, pcmk__node_search_cluster_member); if ((node != NULL) && (node->name != NULL)) { // Use cached name if available host = node->name; } else { // Otherwise ask cluster layer host_alloc = pcmk__cluster_node_name(nodeid); host = host_alloc; } crm_xml_add(xml, PCMK__XA_ATTR_HOST, host); } } if (host) { crm_info("Client %s is requesting all values for %s be removed", pcmk__client_name(request->ipc_client), host); attrd_send_message(NULL, xml, false); /* ends up at attrd_peer_message() */ free(host_alloc); } else { crm_info("Ignoring request by client %s to remove all peer values without specifying peer", pcmk__client_name(request->ipc_client)); } pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return NULL; } xmlNode * attrd_client_query(pcmk__request_t *request) { xmlNode *query = request->xml; xmlNode *reply = NULL; const char *attr = NULL; crm_debug("Query arrived from %s", pcmk__client_name(request->ipc_client)); /* Request must specify attribute name to query */ attr = pcmk__xe_get(query, PCMK__XA_ATTR_NAME); if (attr == NULL) { pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR, "Ignoring malformed query from %s (no attribute name given)", pcmk__client_name(request->ipc_client)); return NULL; } /* Build the XML reply */ reply = build_query_reply(attr, pcmk__xe_get(query, PCMK__XA_ATTR_HOST)); if (reply == NULL) { pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR, "Could not respond to query from %s: could not create XML reply", pcmk__client_name(request->ipc_client)); return NULL; } else { pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); } request->ipc_client->request_id = 0; return reply; } xmlNode * attrd_client_refresh(pcmk__request_t *request) { crm_info("Updating all attributes"); attrd_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags); attrd_write_attributes(attrd_write_all|attrd_write_no_delay); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return NULL; } static void handle_missing_host(xmlNode *xml) { if (pcmk__xe_get(xml, PCMK__XA_ATTR_HOST) == NULL) { crm_trace("Inferring local node %s with XML ID %s", attrd_cluster->priv->node_name, attrd_cluster->priv->node_xml_id); crm_xml_add(xml, PCMK__XA_ATTR_HOST, attrd_cluster->priv->node_name); crm_xml_add(xml, PCMK__XA_ATTR_HOST_ID, attrd_cluster->priv->node_xml_id); } } /* Convert a single IPC message with a regex into one with multiple children, one * for each regex match. */ static int expand_regexes(xmlNode *xml, const char *attr, const char *value, const char *regex) { if (attr == NULL && regex) { bool matched = false; GHashTableIter aIter; regex_t r_patt; crm_debug("Setting %s to %s", regex, value); if (regcomp(&r_patt, regex, REG_EXTENDED|REG_NOSUB)) { return EINVAL; } g_hash_table_iter_init(&aIter, attributes); while (g_hash_table_iter_next(&aIter, (gpointer *) & attr, NULL)) { int status = regexec(&r_patt, attr, 0, NULL, 0); if (status == 0) { xmlNode *child = pcmk__xe_create(xml, PCMK_XE_OP); crm_trace("Matched %s with %s", attr, regex); matched = true; /* Copy all the non-conflicting attributes from the parent over, * but remove the regex and replace it with the name. */ pcmk__xe_copy_attrs(child, xml, pcmk__xaf_no_overwrite); pcmk__xe_remove_attr(child, PCMK__XA_ATTR_REGEX); crm_xml_add(child, PCMK__XA_ATTR_NAME, attr); } } regfree(&r_patt); /* Return a code if we never matched anything. This should not be treated * as an error. It indicates there was a regex, and it was a valid regex, * but simply did not match anything and the caller should not continue * doing any regex-related processing. */ if (!matched) { return pcmk_rc_op_unsatisfied; } } else if (attr == NULL) { return pcmk_rc_bad_nvpair; } return pcmk_rc_ok; } static int handle_regexes(pcmk__request_t *request) { xmlNode *xml = request->xml; int rc = pcmk_rc_ok; const char *attr = pcmk__xe_get(xml, PCMK__XA_ATTR_NAME); const char *value = pcmk__xe_get(xml, PCMK__XA_ATTR_VALUE); const char *regex = pcmk__xe_get(xml, PCMK__XA_ATTR_REGEX); rc = expand_regexes(xml, attr, value, regex); if (rc == EINVAL) { pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR, "Bad regex '%s' for update from client %s", regex, pcmk__client_name(request->ipc_client)); } else if (rc == pcmk_rc_bad_nvpair) { crm_err("Update request did not specify attribute or regular expression"); pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR, "Client %s update request did not specify attribute or regular expression", pcmk__client_name(request->ipc_client)); } return rc; } static int handle_value_expansion(const char **value, xmlNode *xml, const char *op, const char *attr) { attribute_t *a = g_hash_table_lookup(attributes, attr); if (a == NULL && pcmk__str_eq(op, PCMK__ATTRD_CMD_UPDATE_DELAY, pcmk__str_none)) { return EINVAL; } if (*value && attrd_value_needs_expansion(*value)) { int int_value; attribute_value_t *v = NULL; if (a) { const char *host = pcmk__xe_get(xml, PCMK__XA_ATTR_HOST); v = g_hash_table_lookup(a->values, host); } int_value = attrd_expand_value(*value, (v? v->current : NULL)); crm_info("Expanded %s=%s to %d", attr, *value, int_value); - crm_xml_add_int(xml, PCMK__XA_ATTR_VALUE, int_value); + pcmk__xe_set_int(xml, PCMK__XA_ATTR_VALUE, int_value); /* Replacing the value frees the previous memory, so re-query it */ *value = pcmk__xe_get(xml, PCMK__XA_ATTR_VALUE); } return pcmk_rc_ok; } static void send_update_msg_to_cluster(pcmk__request_t *request, xmlNode *xml) { if (pcmk__str_eq(attrd_request_sync_point(xml), PCMK__VALUE_CLUSTER, pcmk__str_none)) { /* The client is waiting on the cluster-wide sync point. In this case, * the response ACK is not sent until this attrd broadcasts the update * and receives its own confirmation back from all peers. */ attrd_expect_confirmations(request, attrd_cluster_sync_point_update); attrd_send_message(NULL, xml, true); /* ends up at attrd_peer_message() */ } else { /* The client is either waiting on the local sync point or was not * waiting on any sync point at all. For the local sync point, the * response ACK is sent in attrd_peer_update. For clients not * waiting on any sync point, the response ACK is sent in * handle_update_request immediately before this function was called. */ attrd_send_message(NULL, xml, false); /* ends up at attrd_peer_message() */ } } static int send_child_update(xmlNode *child, void *data) { pcmk__request_t *request = (pcmk__request_t *) data; /* Calling pcmk__set_result is handled by one of these calls to * attrd_client_update, so no need to do it again here. */ request->xml = child; attrd_client_update(request); return pcmk_rc_ok; } xmlNode * attrd_client_update(pcmk__request_t *request) { xmlNode *xml = NULL; const char *attr, *value, *regex; CRM_CHECK((request != NULL) && (request->xml != NULL), return NULL); xml = request->xml; /* If the message has children, that means it is a message from a newer * client that supports sending multiple operations at a time. There are * two ways we can handle that. */ if (xml->children != NULL) { if (ATTRD_SUPPORTS_MULTI_MESSAGE(minimum_protocol_version)) { /* First, if all peers support a certain protocol version, we can * just broadcast the big message and they'll handle it. However, * we also need to apply all the transformations in this function * to the children since they don't happen anywhere else. */ for (xmlNode *child = pcmk__xe_first_child(xml, PCMK_XE_OP, NULL, NULL); child != NULL; child = pcmk__xe_next(child, PCMK_XE_OP)) { attr = pcmk__xe_get(child, PCMK__XA_ATTR_NAME); value = pcmk__xe_get(child, PCMK__XA_ATTR_VALUE); handle_missing_host(child); if (handle_value_expansion(&value, child, request->op, attr) == EINVAL) { pcmk__format_result(&request->result, CRM_EX_NOSUCH, PCMK_EXEC_ERROR, "Attribute %s does not exist", attr); return NULL; } } send_update_msg_to_cluster(request, xml); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); } else { /* Save the original xml node pointer so it can be restored after iterating * over all the children. */ xmlNode *orig_xml = request->xml; /* Second, if they do not support that protocol version, split it * up into individual messages and call attrd_client_update on * each one. */ pcmk__xe_foreach_child(xml, PCMK_XE_OP, send_child_update, request); request->xml = orig_xml; } return NULL; } attr = pcmk__xe_get(xml, PCMK__XA_ATTR_NAME); value = pcmk__xe_get(xml, PCMK__XA_ATTR_VALUE); regex = pcmk__xe_get(xml, PCMK__XA_ATTR_REGEX); if (handle_regexes(request) != pcmk_rc_ok) { /* Error handling was already dealt with in handle_regexes, so just return. */ return NULL; } else if (regex) { /* Recursively call attrd_client_update on the new message with regexes * expanded. If supported by the attribute daemon, this means that all * matches can also be handled atomically. */ return attrd_client_update(request); } handle_missing_host(xml); if (handle_value_expansion(&value, xml, request->op, attr) == EINVAL) { pcmk__format_result(&request->result, CRM_EX_NOSUCH, PCMK_EXEC_ERROR, "Attribute %s does not exist", attr); return NULL; } crm_debug("Broadcasting %s[%s]=%s%s", attr, pcmk__xe_get(xml, PCMK__XA_ATTR_HOST), value, (attrd_election_won()? " (writer)" : "")); send_update_msg_to_cluster(request, xml); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return NULL; } /*! * \internal * \brief Accept a new client IPC connection * * \param[in,out] c New connection * \param[in] uid Client user id * \param[in] gid Client group id * * \return pcmk_ok on success, -errno otherwise */ static int32_t attrd_ipc_accept(qb_ipcs_connection_t *c, uid_t uid, gid_t gid) { crm_trace("New client connection %p", c); if (attrd_shutting_down(false)) { crm_info("Ignoring new connection from pid %d during shutdown", pcmk__client_pid(c)); return -ECONNREFUSED; } if (pcmk__new_client(c, uid, gid) == NULL) { return -ENOMEM; } return pcmk_ok; } /*! * \internal * \brief Destroy a client IPC connection * * \param[in] c Connection to destroy * * \return FALSE (i.e. do not re-run this callback) */ static int32_t attrd_ipc_closed(qb_ipcs_connection_t *c) { pcmk__client_t *client = pcmk__find_client(c); if (client == NULL) { crm_trace("Ignoring request to clean up unknown connection %p", c); } else { crm_trace("Cleaning up closed client connection %p", c); /* Remove the client from the sync point waitlist if it's present. */ attrd_remove_client_from_waitlist(client); /* And no longer wait for confirmations from any peers. */ attrd_do_not_wait_for_client(client); pcmk__free_client(client); } return FALSE; } /*! * \internal * \brief Destroy a client IPC connection * * \param[in,out] c Connection to destroy * * \note We handle a destroyed connection the same as a closed one, * but we need a separate handler because the return type is different. */ static void attrd_ipc_destroy(qb_ipcs_connection_t *c) { crm_trace("Destroying client connection %p", c); attrd_ipc_closed(c); } static int32_t attrd_ipc_dispatch(qb_ipcs_connection_t * c, void *data, size_t size) { uint32_t id = 0; uint32_t flags = 0; pcmk__client_t *client = pcmk__find_client(c); xmlNode *xml = NULL; // Sanity-check, and parse XML from IPC data CRM_CHECK((c != NULL) && (client != NULL), return 0); if (data == NULL) { crm_debug("No IPC data from PID %d", pcmk__client_pid(c)); return 0; } xml = pcmk__client_data2xml(client, data, &id, &flags); if (xml == NULL) { crm_debug("Unrecognizable IPC data from PID %d", pcmk__client_pid(c)); pcmk__ipc_send_ack(client, id, flags, PCMK__XE_ACK, NULL, CRM_EX_PROTOCOL); return 0; } else { pcmk__request_t request = { .ipc_client = client, .ipc_id = id, .ipc_flags = flags, .peer = NULL, .xml = xml, .call_options = 0, .result = PCMK__UNKNOWN_RESULT, }; pcmk__assert(client->user != NULL); pcmk__update_acl_user(xml, PCMK__XA_ATTR_USER, client->user); request.op = pcmk__xe_get_copy(request.xml, PCMK_XA_TASK); CRM_CHECK(request.op != NULL, return 0); attrd_handle_request(&request); pcmk__reset_request(&request); } pcmk__xml_free(xml); return 0; } static struct qb_ipcs_service_handlers ipc_callbacks = { .connection_accept = attrd_ipc_accept, .connection_created = NULL, .msg_process = attrd_ipc_dispatch, .connection_closed = attrd_ipc_closed, .connection_destroyed = attrd_ipc_destroy }; void attrd_ipc_fini(void) { if (ipcs != NULL) { pcmk__drop_all_clients(ipcs); qb_ipcs_destroy(ipcs); ipcs = NULL; } attrd_unregister_handlers(); pcmk__client_cleanup(); } /*! * \internal * \brief Set up attrd IPC communication */ void attrd_init_ipc(void) { pcmk__serve_attrd_ipc(&ipcs, &ipc_callbacks); } diff --git a/daemons/attrd/attrd_messages.c b/daemons/attrd/attrd_messages.c index 602ca6f7a1..23b8fc3177 100644 --- a/daemons/attrd/attrd_messages.c +++ b/daemons/attrd/attrd_messages.c @@ -1,348 +1,348 @@ /* * Copyright 2022-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include // PRIu32 #include #include #include // pcmk__get_node() #include #include "pacemaker-attrd.h" int minimum_protocol_version = -1; static GHashTable *attrd_handlers = NULL; static bool is_sync_point_attr(xmlAttrPtr attr, void *data) { return pcmk__str_eq((const char *) attr->name, PCMK__XA_ATTR_SYNC_POINT, pcmk__str_none); } static int remove_sync_point_attribute(xmlNode *xml, void *data) { pcmk__xe_remove_matching_attrs(xml, false, is_sync_point_attr, NULL); pcmk__xe_foreach_child(xml, PCMK_XE_OP, remove_sync_point_attribute, NULL); return pcmk_rc_ok; } /* Sync points on a multi-update IPC message to an attrd too old to support * multi-update messages won't work. Strip the sync point attribute off here * so we don't pretend to support this situation and instead ACK the client * immediately. */ static void remove_unsupported_sync_points(pcmk__request_t *request) { if (request->xml->children != NULL && !ATTRD_SUPPORTS_MULTI_MESSAGE(minimum_protocol_version) && attrd_request_has_sync_point(request->xml)) { crm_warn("Ignoring sync point in request from %s because not all nodes support it", pcmk__request_origin(request)); remove_sync_point_attribute(request->xml, NULL); } } static xmlNode * handle_unknown_request(pcmk__request_t *request) { crm_err("Unknown IPC request %s from %s %s", request->op, pcmk__request_origin_type(request), pcmk__request_origin(request)); pcmk__format_result(&request->result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID, "Unknown request type '%s' (bug?)", request->op); return NULL; } static xmlNode * handle_clear_failure_request(pcmk__request_t *request) { if (request->peer != NULL) { /* It is not currently possible to receive this as a peer command, * but will be, if we one day enable propagating this operation. */ attrd_peer_clear_failure(request); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return NULL; } else { remove_unsupported_sync_points(request); if (attrd_request_has_sync_point(request->xml)) { /* If this client supplied a sync point it wants to wait for, add it to * the wait list. Clients on this list will not receive an ACK until * their sync point is hit which will result in the client stalled there * until it receives a response. * * All other clients will receive the expected response as normal. */ attrd_add_client_to_waitlist(request); } else { /* If the client doesn't want to wait for a sync point, go ahead and send * the ACK immediately. Otherwise, we'll send the ACK when the appropriate * sync point is reached. */ attrd_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags); } return attrd_client_clear_failure(request); } } static xmlNode * handle_confirm_request(pcmk__request_t *request) { if (request->peer != NULL) { int callid; crm_debug("Received confirmation from %s", request->peer); if (pcmk__xe_get_int(request->xml, PCMK__XA_CALL_ID, &callid) != pcmk_rc_ok) { pcmk__set_result(&request->result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID, "Could not get callid from XML"); } else { attrd_handle_confirmation(callid, request->peer); } pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return NULL; } else { return handle_unknown_request(request); } } static xmlNode * handle_query_request(pcmk__request_t *request) { if (request->peer != NULL) { return handle_unknown_request(request); } else { return attrd_client_query(request); } } static xmlNode * handle_remove_request(pcmk__request_t *request) { if (request->peer != NULL) { const char *host = pcmk__xe_get(request->xml, PCMK__XA_ATTR_HOST); bool reap = false; if (pcmk__xe_get_bool_attr(request->xml, PCMK__XA_REAP, &reap) != pcmk_rc_ok) { reap = true; // Default to true for backward compatibility } attrd_peer_remove(host, reap, request->peer); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return NULL; } else { return attrd_client_peer_remove(request); } } static xmlNode * handle_refresh_request(pcmk__request_t *request) { if (request->peer != NULL) { return handle_unknown_request(request); } else { return attrd_client_refresh(request); } } static xmlNode * handle_sync_response_request(pcmk__request_t *request) { if (request->ipc_client != NULL) { return handle_unknown_request(request); } else { if (request->peer != NULL) { pcmk__node_status_t *peer = pcmk__get_node(0, request->peer, NULL, pcmk__node_search_cluster_member); bool peer_won = attrd_check_for_new_writer(peer, request->xml); if (!pcmk__str_eq(peer->name, attrd_cluster->priv->node_name, pcmk__str_casei)) { attrd_peer_sync_response(peer, peer_won, request->xml); } } pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return NULL; } } static xmlNode * handle_update_request(pcmk__request_t *request) { if (request->peer != NULL) { const char *host = pcmk__xe_get(request->xml, PCMK__XA_ATTR_HOST); pcmk__node_status_t *peer = pcmk__get_node(0, request->peer, NULL, pcmk__node_search_cluster_member); attrd_peer_update(peer, request->xml, host, false); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return NULL; } else { remove_unsupported_sync_points(request); if (attrd_request_has_sync_point(request->xml)) { /* If this client supplied a sync point it wants to wait for, add it to * the wait list. Clients on this list will not receive an ACK until * their sync point is hit which will result in the client stalled there * until it receives a response. * * All other clients will receive the expected response as normal. */ attrd_add_client_to_waitlist(request); } else { /* If the client doesn't want to wait for a sync point, go ahead and send * the ACK immediately. Otherwise, we'll send the ACK when the appropriate * sync point is reached. * * In the normal case, attrd_client_update can be called recursively which * makes where to send the ACK tricky. Doing it here ensures the client * only ever receives one. */ attrd_send_ack(request->ipc_client, request->ipc_id, request->flags|crm_ipc_client_response); } return attrd_client_update(request); } } static void attrd_register_handlers(void) { pcmk__server_command_t handlers[] = { { PCMK__ATTRD_CMD_CLEAR_FAILURE, handle_clear_failure_request }, { PCMK__ATTRD_CMD_CONFIRM, handle_confirm_request }, { PCMK__ATTRD_CMD_PEER_REMOVE, handle_remove_request }, { PCMK__ATTRD_CMD_QUERY, handle_query_request }, { PCMK__ATTRD_CMD_REFRESH, handle_refresh_request }, { PCMK__ATTRD_CMD_SYNC_RESPONSE, handle_sync_response_request }, { PCMK__ATTRD_CMD_UPDATE, handle_update_request }, { PCMK__ATTRD_CMD_UPDATE_DELAY, handle_update_request }, { PCMK__ATTRD_CMD_UPDATE_BOTH, handle_update_request }, { NULL, handle_unknown_request }, }; attrd_handlers = pcmk__register_handlers(handlers); } void attrd_unregister_handlers(void) { if (attrd_handlers != NULL) { g_hash_table_destroy(attrd_handlers); attrd_handlers = NULL; } } void attrd_handle_request(pcmk__request_t *request) { xmlNode *reply = NULL; char *log_msg = NULL; const char *reason = NULL; if (attrd_handlers == NULL) { attrd_register_handlers(); } reply = pcmk__process_request(request, attrd_handlers); if (reply != NULL) { crm_log_xml_trace(reply, "Reply"); if (request->ipc_client != NULL) { pcmk__ipc_send_xml(request->ipc_client, request->ipc_id, reply, request->ipc_flags); } else { crm_err("Not sending CPG reply to client"); } pcmk__xml_free(reply); } reason = request->result.exit_reason; log_msg = crm_strdup_printf("Processed %s request from %s %s: %s%s%s%s", request->op, pcmk__request_origin_type(request), pcmk__request_origin(request), pcmk_exec_status_str(request->result.execution_status), (reason == NULL)? "" : " (", pcmk__s(reason, ""), (reason == NULL)? "" : ")"); if (!pcmk__result_ok(&request->result)) { crm_warn("%s", log_msg); } else { crm_debug("%s", log_msg); } free(log_msg); pcmk__reset_request(request); } /*! \internal \brief Broadcast private attribute for local node with protocol version */ void attrd_broadcast_protocol(void) { xmlNode *attrd_op = pcmk__xe_create(NULL, __func__); crm_xml_add(attrd_op, PCMK__XA_T, PCMK__VALUE_ATTRD); crm_xml_add(attrd_op, PCMK__XA_SRC, crm_system_name); crm_xml_add(attrd_op, PCMK_XA_TASK, PCMK__ATTRD_CMD_UPDATE); crm_xml_add(attrd_op, PCMK__XA_ATTR_NAME, CRM_ATTR_PROTOCOL); crm_xml_add(attrd_op, PCMK__XA_ATTR_VALUE, ATTRD_PROTOCOL_VERSION); - crm_xml_add_int(attrd_op, PCMK__XA_ATTR_IS_PRIVATE, 1); + pcmk__xe_set_int(attrd_op, PCMK__XA_ATTR_IS_PRIVATE, 1); crm_xml_add(attrd_op, PCMK__XA_ATTR_HOST, attrd_cluster->priv->node_name); crm_xml_add(attrd_op, PCMK__XA_ATTR_HOST_ID, attrd_cluster->priv->node_xml_id); crm_debug("Broadcasting attrd protocol version %s for node %s", ATTRD_PROTOCOL_VERSION, attrd_cluster->priv->node_name); attrd_send_message(NULL, attrd_op, false); /* ends up at attrd_peer_message() */ pcmk__xml_free(attrd_op); } gboolean attrd_send_message(pcmk__node_status_t *node, xmlNode *data, bool confirm) { const char *op = pcmk__xe_get(data, PCMK_XA_TASK); crm_xml_add(data, PCMK__XA_T, PCMK__VALUE_ATTRD); crm_xml_add(data, PCMK__XA_ATTR_VERSION, ATTRD_PROTOCOL_VERSION); /* Request a confirmation from the destination peer node (which could * be all if node is NULL) that the message has been received and * acted upon. */ if (!pcmk__str_eq(op, PCMK__ATTRD_CMD_CONFIRM, pcmk__str_none)) { pcmk__xe_set_bool_attr(data, PCMK__XA_CONFIRM, confirm); } attrd_xml_add_writer(data); return pcmk__cluster_send_message(node, pcmk_ipc_attrd, data); } diff --git a/daemons/attrd/attrd_sync.c b/daemons/attrd/attrd_sync.c index d82c15350d..cabbb79f5e 100644 --- a/daemons/attrd/attrd_sync.c +++ b/daemons/attrd/attrd_sync.c @@ -1,572 +1,572 @@ /* * Copyright 2022-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include "pacemaker-attrd.h" /* A hash table storing clients that are waiting on a sync point to be reached. * The key is waitlist_client - just a plain int. The obvious key would be * the IPC client's ID, but this is not guaranteed to be unique. A single client * could be waiting on a sync point for multiple attributes at the same time. * * It is not expected that this hash table will ever be especially large. */ static GHashTable *waitlist = NULL; static int waitlist_client = 0; struct waitlist_node { /* What kind of sync point does this node describe? */ enum attrd_sync_point sync_point; /* Information required to construct and send a reply to the client. */ char *client_id; uint32_t ipc_id; uint32_t flags; }; /* A hash table storing information on in-progress IPC requests that are awaiting * confirmations. These requests are currently being processed by peer attrds and * we are waiting to receive confirmation messages from each peer indicating that * processing is complete. * * Multiple requests could be waiting on confirmations at the same time. * * The key is the unique callid for the IPC request, and the value is a * confirmation_action struct. */ static GHashTable *expected_confirmations = NULL; /*! * \internal * \brief A structure describing a single IPC request that is awaiting confirmations */ struct confirmation_action { /*! * \brief A list of peer attrds that we are waiting to receive confirmation * messages from * * This list is dynamic - as confirmations arrive from peer attrds, they will * be removed from this list. When the list is empty, all peers have processed * the request and the associated confirmation action will be taken. */ GList *respondents; /*! * \brief A timer that will be used to remove the client should it time out * before receiving all confirmations */ mainloop_timer_t *timer; /*! * \brief A function to run when all confirmations have been received */ attrd_confirmation_action_fn fn; /*! * \brief Information required to construct and send a reply to the client */ char *client_id; uint32_t ipc_id; uint32_t flags; /*! * \brief The XML request containing the callid associated with this action */ void *xml; }; static void next_key(void) { do { waitlist_client++; if (waitlist_client < 0) { waitlist_client = 1; } } while (g_hash_table_contains(waitlist, GINT_TO_POINTER(waitlist_client))); } static void free_waitlist_node(gpointer data) { struct waitlist_node *wl = (struct waitlist_node *) data; free(wl->client_id); free(wl); } static const char * sync_point_str(enum attrd_sync_point sync_point) { if (sync_point == attrd_sync_point_local) { return PCMK__VALUE_LOCAL; } else if (sync_point == attrd_sync_point_cluster) { return PCMK__VALUE_CLUSTER; } else { return PCMK_VALUE_UNKNOWN; } } /*! * \internal * \brief Add a client to the attrd waitlist * * Typically, a client receives an ACK for its XML IPC request immediately. However, * some clients want to wait until their request has been processed and taken effect. * This is called a sync point. Any client placed on this waitlist will have its * ACK message delayed until either its requested sync point is hit, or until it * times out. * * The XML IPC request must specify the type of sync point it wants to wait for. * * \param[in,out] request The request describing the client to place on the waitlist. */ void attrd_add_client_to_waitlist(pcmk__request_t *request) { const char *sync_point = attrd_request_sync_point(request->xml); struct waitlist_node *wl = NULL; if (sync_point == NULL) { return; } if (waitlist == NULL) { waitlist = pcmk__intkey_table(free_waitlist_node); } wl = pcmk__assert_alloc(1, sizeof(struct waitlist_node)); if (pcmk__str_eq(sync_point, PCMK__VALUE_LOCAL, pcmk__str_none)) { wl->sync_point = attrd_sync_point_local; } else if (pcmk__str_eq(sync_point, PCMK__VALUE_CLUSTER, pcmk__str_none)) { wl->sync_point = attrd_sync_point_cluster; } else { free_waitlist_node(wl); return; } wl->client_id = pcmk__str_copy(request->ipc_client->id); wl->ipc_id = request->ipc_id; wl->flags = request->flags; next_key(); pcmk__intkey_table_insert(waitlist, waitlist_client, wl); crm_trace("Added client %s to waitlist for %s sync point", wl->client_id, sync_point_str(wl->sync_point)); crm_trace("%d clients now on waitlist", g_hash_table_size(waitlist)); /* And then add the key to the request XML so we can uniquely identify * it when it comes time to issue the ACK. */ - crm_xml_add_int(request->xml, PCMK__XA_CALL_ID, waitlist_client); + pcmk__xe_set_int(request->xml, PCMK__XA_CALL_ID, waitlist_client); } /*! * \internal * \brief Free all memory associated with the waitlist. This is most typically * used when attrd shuts down. */ void attrd_free_waitlist(void) { if (waitlist == NULL) { return; } g_hash_table_destroy(waitlist); waitlist = NULL; } /*! * \internal * \brief Unconditionally remove a client from the waitlist, such as when the client * node disconnects from the cluster * * \param[in] client The client to remove */ void attrd_remove_client_from_waitlist(pcmk__client_t *client) { GHashTableIter iter; gpointer value; if (waitlist == NULL) { return; } g_hash_table_iter_init(&iter, waitlist); while (g_hash_table_iter_next(&iter, NULL, &value)) { struct waitlist_node *wl = (struct waitlist_node *) value; if (pcmk__str_eq(wl->client_id, client->id, pcmk__str_none)) { g_hash_table_iter_remove(&iter); crm_trace("%d clients now on waitlist", g_hash_table_size(waitlist)); } } } /*! * \internal * \brief Send an IPC ACK message to all awaiting clients * * This function will search the waitlist for all clients that are currently awaiting * an ACK indicating their attrd operation is complete. Only those clients with a * matching sync point type and callid from their original XML IPC request will be * ACKed. Once they have received an ACK, they will be removed from the waitlist. * * \param[in] sync_point What kind of sync point have we hit? * \param[in] xml The original XML IPC request. */ void attrd_ack_waitlist_clients(enum attrd_sync_point sync_point, const xmlNode *xml) { int callid; gpointer value; if (waitlist == NULL) { return; } if (pcmk__xe_get_int(xml, PCMK__XA_CALL_ID, &callid) != pcmk_rc_ok) { crm_warn("Could not get callid from request XML"); return; } value = pcmk__intkey_table_lookup(waitlist, callid); if (value != NULL) { struct waitlist_node *wl = (struct waitlist_node *) value; pcmk__client_t *client = NULL; if (wl->sync_point != sync_point) { return; } crm_notice("Alerting client %s for reached %s sync point", wl->client_id, sync_point_str(wl->sync_point)); client = pcmk__find_client_by_id(wl->client_id); if (client == NULL) { return; } attrd_send_ack(client, wl->ipc_id, wl->flags | crm_ipc_client_response); /* And then remove the client so it doesn't get alerted again. */ pcmk__intkey_table_remove(waitlist, callid); crm_trace("%d clients now on waitlist", g_hash_table_size(waitlist)); } } /*! * \internal * \brief Action to take when a cluster sync point is hit for a * PCMK__ATTRD_CMD_UPDATE* message. * * \param[in] xml The request that should be passed along to * attrd_ack_waitlist_clients. This should be the original * IPC request containing the callid for this update message. */ int attrd_cluster_sync_point_update(xmlNode *xml) { crm_trace("Hit cluster sync point for attribute update"); attrd_ack_waitlist_clients(attrd_sync_point_cluster, xml); return pcmk_rc_ok; } /*! * \internal * \brief Return the sync point attribute for an IPC request * * This function will check both the top-level element of \p xml for a sync * point attribute, as well as all of its \p op children, if any. The latter * is useful for newer versions of attrd that can put multiple IPC requests * into a single message. * * \param[in] xml An XML IPC request * * \note It is assumed that if one child element has a sync point attribute, * all will have a sync point attribute and they will all be the same * sync point. No other configuration is supported. * * \return The sync point attribute of \p xml, or NULL if none. */ const char * attrd_request_sync_point(xmlNode *xml) { CRM_CHECK(xml != NULL, return NULL); if (xml->children != NULL) { xmlNode *child = pcmk__xe_first_child(xml, PCMK_XE_OP, PCMK__XA_ATTR_SYNC_POINT, NULL); if (child) { return pcmk__xe_get(child, PCMK__XA_ATTR_SYNC_POINT); } else { return NULL; } } else { return pcmk__xe_get(xml, PCMK__XA_ATTR_SYNC_POINT); } } /*! * \internal * \brief Does an IPC request contain any sync point attribute? * * \param[in] xml An XML IPC request * * \return true if there's a sync point attribute, false otherwise */ bool attrd_request_has_sync_point(xmlNode *xml) { return attrd_request_sync_point(xml) != NULL; } static void free_action(gpointer data) { struct confirmation_action *action = (struct confirmation_action *) data; g_list_free_full(action->respondents, free); mainloop_timer_del(action->timer); pcmk__xml_free(action->xml); free(action->client_id); free(action); } /* Remove an IPC request from the expected_confirmations table if the peer attrds * don't respond before the timeout is hit. We set the timeout to 15s. The exact * number isn't critical - we just want to make sure that the table eventually gets * cleared of things that didn't complete. */ static gboolean confirmation_timeout_cb(gpointer data) { struct confirmation_action *action = (struct confirmation_action *) data; GHashTableIter iter; gpointer value; if (expected_confirmations == NULL) { return G_SOURCE_REMOVE; } g_hash_table_iter_init(&iter, expected_confirmations); while (g_hash_table_iter_next(&iter, NULL, &value)) { if (value == action) { pcmk__client_t *client = pcmk__find_client_by_id(action->client_id); if (client == NULL) { return G_SOURCE_REMOVE; } crm_trace("Timed out waiting for confirmations for client %s", client->id); pcmk__ipc_send_ack(client, action->ipc_id, action->flags|crm_ipc_client_response, PCMK__XE_ACK, ATTRD_PROTOCOL_VERSION, CRM_EX_TIMEOUT); g_hash_table_iter_remove(&iter); crm_trace("%d requests now in expected confirmations table", g_hash_table_size(expected_confirmations)); break; } } return G_SOURCE_REMOVE; } /*! * \internal * \brief When a peer disconnects from the cluster, no longer wait for its confirmation * for any IPC action. If this peer is the last one being waited on, this will * trigger the confirmation action. * * \param[in] host The disconnecting peer attrd's uname */ void attrd_do_not_expect_from_peer(const char *host) { GList *keys = NULL; if (expected_confirmations == NULL) { return; } keys = g_hash_table_get_keys(expected_confirmations); crm_trace("Removing peer %s from expected confirmations", host); for (GList *node = keys; node != NULL; node = node->next) { int callid = *(int *) node->data; attrd_handle_confirmation(callid, host); } g_list_free(keys); } /*! * \internal * \brief When a client disconnects from the cluster, no longer wait on confirmations * for it. Because the peer attrds may still be processing the original IPC * message, they may still send us confirmations. However, we will take no * action on them. * * \param[in] client The disconnecting client */ void attrd_do_not_wait_for_client(pcmk__client_t *client) { GHashTableIter iter; gpointer value; if (expected_confirmations == NULL) { return; } g_hash_table_iter_init(&iter, expected_confirmations); while (g_hash_table_iter_next(&iter, NULL, &value)) { struct confirmation_action *action = (struct confirmation_action *) value; if (pcmk__str_eq(action->client_id, client->id, pcmk__str_none)) { crm_trace("Removing client %s from expected confirmations", client->id); g_hash_table_iter_remove(&iter); crm_trace("%d requests now in expected confirmations table", g_hash_table_size(expected_confirmations)); break; } } } /*! * \internal * \brief Register some action to be taken when IPC request confirmations are * received * * When this function is called, a list of all peer attrds that support confirming * requests is generated. As confirmations from these peer attrds are received, * they are removed from this list. When the list is empty, the registered action * will be called. * * \note This function should always be called before attrd_send_message is called * to broadcast to the peers to ensure that we know what replies we are * waiting on. Otherwise, it is possible the peer could finish and confirm * before we know to expect it. * * \param[in] request The request that is awaiting confirmations * \param[in] fn A function to be run after all confirmations are received */ void attrd_expect_confirmations(pcmk__request_t *request, attrd_confirmation_action_fn fn) { struct confirmation_action *action = NULL; GHashTableIter iter; gpointer host, ver; GList *respondents = NULL; int callid; if (expected_confirmations == NULL) { expected_confirmations = pcmk__intkey_table((GDestroyNotify) free_action); } if (pcmk__xe_get_int(request->xml, PCMK__XA_CALL_ID, &callid) != pcmk_rc_ok) { crm_err("Could not get callid from xml"); return; } if (pcmk__intkey_table_lookup(expected_confirmations, callid)) { crm_err("Already waiting on confirmations for call id %d", callid); return; } g_hash_table_iter_init(&iter, peer_protocol_vers); while (g_hash_table_iter_next(&iter, &host, &ver)) { if (ATTRD_SUPPORTS_CONFIRMATION(GPOINTER_TO_INT(ver))) { respondents = g_list_prepend(respondents, pcmk__str_copy((char *) host)); } } action = pcmk__assert_alloc(1, sizeof(struct confirmation_action)); action->respondents = respondents; action->fn = fn; action->xml = pcmk__xml_copy(NULL, request->xml); action->client_id = pcmk__str_copy(request->ipc_client->id); action->ipc_id = request->ipc_id; action->flags = request->flags; action->timer = mainloop_timer_add(NULL, 15000, FALSE, confirmation_timeout_cb, action); mainloop_timer_start(action->timer); pcmk__intkey_table_insert(expected_confirmations, callid, action); crm_trace("Callid %d now waiting on %d confirmations", callid, g_list_length(respondents)); crm_trace("%d requests now in expected confirmations table", g_hash_table_size(expected_confirmations)); } void attrd_free_confirmations(void) { if (expected_confirmations != NULL) { g_hash_table_destroy(expected_confirmations); expected_confirmations = NULL; } } /*! * \internal * \brief Process a confirmation message from a peer attrd * * This function is called every time a PCMK__ATTRD_CMD_CONFIRM message is * received from a peer attrd. If this is the last confirmation we are waiting * on for a given operation, the registered action will be called. * * \param[in] callid The unique callid for the XML IPC request * \param[in] host The confirming peer attrd's uname */ void attrd_handle_confirmation(int callid, const char *host) { struct confirmation_action *action = NULL; GList *node = NULL; if (expected_confirmations == NULL) { return; } action = pcmk__intkey_table_lookup(expected_confirmations, callid); if (action == NULL) { return; } node = g_list_find_custom(action->respondents, host, (GCompareFunc) strcasecmp); if (node == NULL) { return; } action->respondents = g_list_remove(action->respondents, node->data); crm_trace("Callid %d now waiting on %d confirmations", callid, g_list_length(action->respondents)); if (action->respondents == NULL) { action->fn(action->xml); pcmk__intkey_table_remove(expected_confirmations, callid); crm_trace("%d requests now in expected confirmations table", g_hash_table_size(expected_confirmations)); } } diff --git a/daemons/based/based_callbacks.c b/daemons/based/based_callbacks.c index 6e0e1dd916..e274b6089d 100644 --- a/daemons/based/based_callbacks.c +++ b/daemons/based/based_callbacks.c @@ -1,1389 +1,1389 @@ /* * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include // uint32_t, uint64_t, UINT64_C() #include #include #include // PRIu64 #include #include #include // xmlXPathObject, etc. #include #include #include #include #include #include #define EXIT_ESCALATION_MS 10000 qb_ipcs_service_t *ipcs_ro = NULL; qb_ipcs_service_t *ipcs_rw = NULL; qb_ipcs_service_t *ipcs_shm = NULL; static int cib_process_command(xmlNode *request, const cib__operation_t *operation, cib__op_fn_t op_function, xmlNode **reply, xmlNode **cib_diff, bool privileged); static gboolean cib_common_callback(qb_ipcs_connection_t *c, void *data, size_t size, gboolean privileged); static int32_t cib_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { if (cib_shutdown_flag) { crm_info("Ignoring new IPC client [%d] during shutdown", pcmk__client_pid(c)); return -ECONNREFUSED; } if (pcmk__new_client(c, uid, gid) == NULL) { return -ENOMEM; } return 0; } static int32_t cib_ipc_dispatch_rw(qb_ipcs_connection_t * c, void *data, size_t size) { pcmk__client_t *client = pcmk__find_client(c); crm_trace("%p message from %s", c, client->id); return cib_common_callback(c, data, size, TRUE); } static int32_t cib_ipc_dispatch_ro(qb_ipcs_connection_t * c, void *data, size_t size) { pcmk__client_t *client = pcmk__find_client(c); crm_trace("%p message from %s", c, client->id); return cib_common_callback(c, data, size, FALSE); } /* Error code means? */ static int32_t cib_ipc_closed(qb_ipcs_connection_t * c) { pcmk__client_t *client = pcmk__find_client(c); if (client == NULL) { return 0; } crm_trace("Connection %p", c); pcmk__free_client(client); return 0; } static void cib_ipc_destroy(qb_ipcs_connection_t * c) { crm_trace("Connection %p", c); cib_ipc_closed(c); if (cib_shutdown_flag) { cib_shutdown(0); } } struct qb_ipcs_service_handlers ipc_ro_callbacks = { .connection_accept = cib_ipc_accept, .connection_created = NULL, .msg_process = cib_ipc_dispatch_ro, .connection_closed = cib_ipc_closed, .connection_destroyed = cib_ipc_destroy }; struct qb_ipcs_service_handlers ipc_rw_callbacks = { .connection_accept = cib_ipc_accept, .connection_created = NULL, .msg_process = cib_ipc_dispatch_rw, .connection_closed = cib_ipc_closed, .connection_destroyed = cib_ipc_destroy }; /*! * \internal * \brief Create reply XML for a CIB request * * \param[in] op CIB operation type * \param[in] call_id CIB call ID * \param[in] client_id CIB client ID * \param[in] call_options Group of enum cib_call_options flags * \param[in] rc Request return code * \param[in] call_data Request output data * * \return Reply XML (guaranteed not to be \c NULL) * * \note The caller is responsible for freeing the return value using * \p pcmk__xml_free(). */ static xmlNode * create_cib_reply(const char *op, const char *call_id, const char *client_id, uint32_t call_options, int rc, xmlNode *call_data) { xmlNode *reply = pcmk__xe_create(NULL, PCMK__XE_CIB_REPLY); crm_xml_add(reply, PCMK__XA_T, PCMK__VALUE_CIB); crm_xml_add(reply, PCMK__XA_CIB_OP, op); crm_xml_add(reply, PCMK__XA_CIB_CALLID, call_id); crm_xml_add(reply, PCMK__XA_CIB_CLIENTID, client_id); - crm_xml_add_int(reply, PCMK__XA_CIB_CALLOPT, call_options); - crm_xml_add_int(reply, PCMK__XA_CIB_RC, rc); + pcmk__xe_set_int(reply, PCMK__XA_CIB_CALLOPT, call_options); + pcmk__xe_set_int(reply, PCMK__XA_CIB_RC, rc); if (call_data != NULL) { xmlNode *wrapper = pcmk__xe_create(reply, PCMK__XE_CIB_CALLDATA); crm_trace("Attaching reply output"); pcmk__xml_copy(wrapper, call_data); } crm_log_xml_explicit(reply, "cib:reply"); return reply; } static void do_local_notify(const xmlNode *notify_src, const char *client_id, bool sync_reply, bool from_peer) { int msg_id = 0; int rc = pcmk_rc_ok; pcmk__client_t *client_obj = NULL; uint32_t flags = crm_ipc_server_event; CRM_CHECK((notify_src != NULL) && (client_id != NULL), return); pcmk__xe_get_int(notify_src, PCMK__XA_CIB_CALLID, &msg_id); client_obj = pcmk__find_client_by_id(client_id); if (client_obj == NULL) { crm_debug("Could not notify client %s%s %s of call %d result: " "client no longer exists", client_id, (from_peer? " (originator of delegated request)" : ""), (sync_reply? "synchronously" : "asynchronously"), msg_id); return; } if (sync_reply) { flags = crm_ipc_flags_none; if (client_obj->ipcs != NULL) { msg_id = client_obj->request_id; client_obj->request_id = 0; } } switch (PCMK__CLIENT_TYPE(client_obj)) { case pcmk__client_ipc: rc = pcmk__ipc_send_xml(client_obj, msg_id, notify_src, flags); break; case pcmk__client_tls: case pcmk__client_tcp: rc = pcmk__remote_send_xml(client_obj->remote, notify_src); break; default: rc = EPROTONOSUPPORT; break; } if (rc == pcmk_rc_ok) { crm_trace("Notified %s client %s%s %s of call %d result", pcmk__client_type_str(PCMK__CLIENT_TYPE(client_obj)), pcmk__client_name(client_obj), (from_peer? " (originator of delegated request)" : ""), (sync_reply? "synchronously" : "asynchronously"), msg_id); } else { crm_warn("Could not notify %s client %s%s %s of call %d result: %s", pcmk__client_type_str(PCMK__CLIENT_TYPE(client_obj)), pcmk__client_name(client_obj), (from_peer? " (originator of delegated request)" : ""), (sync_reply? "synchronously" : "asynchronously"), msg_id, pcmk_rc_str(rc)); } } void cib_common_callback_worker(uint32_t id, uint32_t flags, xmlNode * op_request, pcmk__client_t *cib_client, gboolean privileged) { const char *op = pcmk__xe_get(op_request, PCMK__XA_CIB_OP); uint32_t call_options = cib_none; int rc = pcmk_rc_ok; rc = pcmk__xe_get_flags(op_request, PCMK__XA_CIB_CALLOPT, &call_options, cib_none); if (rc != pcmk_rc_ok) { crm_warn("Couldn't parse options from request: %s", pcmk_rc_str(rc)); } /* Requests with cib_transaction set should not be sent to based directly * (outside of a commit-transaction request) */ if (pcmk_is_set(call_options, cib_transaction)) { return; } if (pcmk__str_eq(op, CRM_OP_REGISTER, pcmk__str_none)) { if (flags & crm_ipc_client_response) { xmlNode *ack = pcmk__xe_create(NULL, __func__); crm_xml_add(ack, PCMK__XA_CIB_OP, CRM_OP_REGISTER); crm_xml_add(ack, PCMK__XA_CIB_CLIENTID, cib_client->id); pcmk__ipc_send_xml(cib_client, id, ack, flags); cib_client->request_id = 0; pcmk__xml_free(ack); } return; } else if (pcmk__str_eq(op, PCMK__VALUE_CIB_NOTIFY, pcmk__str_none)) { /* Update the notify filters for this client */ int on_off = 0; crm_exit_t status = CRM_EX_OK; uint64_t bit = UINT64_C(0); const char *type = pcmk__xe_get(op_request, PCMK__XA_CIB_NOTIFY_TYPE); pcmk__xe_get_int(op_request, PCMK__XA_CIB_NOTIFY_ACTIVATE, &on_off); crm_debug("Setting %s callbacks %s for client %s", type, (on_off? "on" : "off"), pcmk__client_name(cib_client)); if (pcmk__str_eq(type, PCMK__VALUE_CIB_POST_NOTIFY, pcmk__str_none)) { bit = cib_notify_post; } else if (pcmk__str_eq(type, PCMK__VALUE_CIB_PRE_NOTIFY, pcmk__str_none)) { bit = cib_notify_pre; } else if (pcmk__str_eq(type, PCMK__VALUE_CIB_UPDATE_CONFIRMATION, pcmk__str_none)) { bit = cib_notify_confirm; } else if (pcmk__str_eq(type, PCMK__VALUE_CIB_DIFF_NOTIFY, pcmk__str_none)) { bit = cib_notify_diff; } else { status = CRM_EX_INVALID_PARAM; } if (bit != 0) { if (on_off) { pcmk__set_client_flags(cib_client, bit); } else { pcmk__clear_client_flags(cib_client, bit); } } pcmk__ipc_send_ack(cib_client, id, flags, PCMK__XE_ACK, NULL, status); return; } cib_process_request(op_request, privileged, cib_client); } int32_t cib_common_callback(qb_ipcs_connection_t * c, void *data, size_t size, gboolean privileged) { uint32_t id = 0; uint32_t flags = 0; uint32_t call_options = cib_none; pcmk__client_t *cib_client = pcmk__find_client(c); xmlNode *op_request = pcmk__client_data2xml(cib_client, data, &id, &flags); if (op_request) { int rc = pcmk_rc_ok; rc = pcmk__xe_get_flags(op_request, PCMK__XA_CIB_CALLOPT, &call_options, cib_none); if (rc != pcmk_rc_ok) { crm_warn("Couldn't parse options from request: %s", pcmk_rc_str(rc)); } } if (op_request == NULL) { crm_trace("Invalid message from %p", c); pcmk__ipc_send_ack(cib_client, id, flags, PCMK__XE_NACK, NULL, CRM_EX_PROTOCOL); return 0; } else if(cib_client == NULL) { crm_trace("Invalid client %p", c); return 0; } if (pcmk_is_set(call_options, cib_sync_call)) { CRM_LOG_ASSERT(flags & crm_ipc_client_response); CRM_LOG_ASSERT(cib_client->request_id == 0); /* This means the client has two synchronous events in-flight */ cib_client->request_id = id; /* Reply only to the last one */ } if (cib_client->name == NULL) { const char *value = pcmk__xe_get(op_request, PCMK__XA_CIB_CLIENTNAME); if (value == NULL) { cib_client->name = pcmk__itoa(cib_client->pid); } else { cib_client->name = pcmk__str_copy(value); if (pcmk__parse_server(value) != pcmk_ipc_unknown) { pcmk__set_client_flags(cib_client, cib_is_daemon); } } } /* Allow cluster daemons more leeway before being evicted */ if (pcmk_is_set(cib_client->flags, cib_is_daemon)) { const char *qmax = cib_config_lookup(PCMK_OPT_CLUSTER_IPC_LIMIT); pcmk__set_client_queue_max(cib_client, qmax); } crm_xml_add(op_request, PCMK__XA_CIB_CLIENTID, cib_client->id); crm_xml_add(op_request, PCMK__XA_CIB_CLIENTNAME, cib_client->name); CRM_LOG_ASSERT(cib_client->user != NULL); pcmk__update_acl_user(op_request, PCMK__XA_CIB_USER, cib_client->user); cib_common_callback_worker(id, flags, op_request, cib_client, privileged); pcmk__xml_free(op_request); return 0; } static uint64_t ping_seq = 0; static char *ping_digest = NULL; static bool ping_modified_since = FALSE; static gboolean cib_digester_cb(gpointer data) { if (based_is_primary) { char buffer[32]; xmlNode *ping = pcmk__xe_create(NULL, PCMK__XE_PING); ping_seq++; free(ping_digest); ping_digest = NULL; ping_modified_since = FALSE; snprintf(buffer, 32, "%" PRIu64, ping_seq); crm_trace("Requesting peer digests (%s)", buffer); crm_xml_add(ping, PCMK__XA_T, PCMK__VALUE_CIB); crm_xml_add(ping, PCMK__XA_CIB_OP, CRM_OP_PING); crm_xml_add(ping, PCMK__XA_CIB_PING_ID, buffer); crm_xml_add(ping, PCMK_XA_CRM_FEATURE_SET, CRM_FEATURE_SET); pcmk__cluster_send_message(NULL, pcmk_ipc_based, ping); pcmk__xml_free(ping); } return FALSE; } static void process_ping_reply(xmlNode *reply) { uint64_t seq = 0; const char *host = pcmk__xe_get(reply, PCMK__XA_SRC); xmlNode *wrapper = pcmk__xe_first_child(reply, PCMK__XE_CIB_CALLDATA, NULL, NULL); xmlNode *pong = pcmk__xe_first_child(wrapper, NULL, NULL, NULL); const char *seq_s = pcmk__xe_get(pong, PCMK__XA_CIB_PING_ID); const char *digest = pcmk__xe_get(pong, PCMK__XA_DIGEST); if (seq_s == NULL) { crm_debug("Ignoring ping reply with no " PCMK__XA_CIB_PING_ID); return; } else { long long seq_ll; int rc = pcmk__scan_ll(seq_s, &seq_ll, 0LL); if (rc != pcmk_rc_ok) { crm_debug("Ignoring ping reply with invalid " PCMK__XA_CIB_PING_ID " '%s': %s", seq_s, pcmk_rc_str(rc)); return; } seq = (uint64_t) seq_ll; } if(digest == NULL) { crm_trace("Ignoring ping reply %s from %s with no digest", seq_s, host); } else if(seq != ping_seq) { crm_trace("Ignoring out of sequence ping reply %s from %s", seq_s, host); } else if(ping_modified_since) { crm_trace("Ignoring ping reply %s from %s: cib updated since", seq_s, host); } else { if(ping_digest == NULL) { crm_trace("Calculating new digest"); ping_digest = pcmk__digest_xml(the_cib, true); } crm_trace("Processing ping reply %s from %s (%s)", seq_s, host, digest); if (!pcmk__str_eq(ping_digest, digest, pcmk__str_casei)) { xmlNode *wrapper = pcmk__xe_first_child(pong, PCMK__XE_CIB_CALLDATA, NULL, NULL); xmlNode *remote_cib = pcmk__xe_first_child(wrapper, NULL, NULL, NULL); const char *admin_epoch_s = NULL; const char *epoch_s = NULL; const char *num_updates_s = NULL; if (remote_cib != NULL) { admin_epoch_s = pcmk__xe_get(remote_cib, PCMK_XA_ADMIN_EPOCH); epoch_s = pcmk__xe_get(remote_cib, PCMK_XA_EPOCH); num_updates_s = pcmk__xe_get(remote_cib, PCMK_XA_NUM_UPDATES); } crm_notice("Local CIB %s.%s.%s.%s differs from %s: %s.%s.%s.%s %p", pcmk__xe_get(the_cib, PCMK_XA_ADMIN_EPOCH), pcmk__xe_get(the_cib, PCMK_XA_EPOCH), pcmk__xe_get(the_cib, PCMK_XA_NUM_UPDATES), ping_digest, host, pcmk__s(admin_epoch_s, "_"), pcmk__s(epoch_s, "_"), pcmk__s(num_updates_s, "_"), digest, remote_cib); if(remote_cib && remote_cib->children) { // Additional debug pcmk__xml_mark_changes(the_cib, remote_cib); pcmk__log_xml_changes(LOG_INFO, remote_cib); crm_trace("End of differences"); } pcmk__xml_free(remote_cib); sync_our_cib(reply, FALSE); } } } static void parse_local_options(const pcmk__client_t *cib_client, const cib__operation_t *operation, const char *host, const char *op, gboolean *local_notify, gboolean *needs_reply, gboolean *process, gboolean *needs_forward) { // Process locally and notify local client *process = TRUE; *needs_reply = FALSE; *local_notify = TRUE; *needs_forward = FALSE; if (pcmk_is_set(operation->flags, cib__op_attr_local)) { /* Always process locally if cib__op_attr_local is set. * * @COMPAT: Currently host is ignored. At a compatibility break, throw * an error (from cib_process_request() or earlier) if host is not NULL or * OUR_NODENAME. */ crm_trace("Processing always-local %s op from client %s", op, pcmk__client_name(cib_client)); if (!pcmk__str_eq(host, OUR_NODENAME, pcmk__str_casei|pcmk__str_null_matches)) { crm_warn("Operation '%s' is always local but its target host is " "set to '%s'", op, host); } return; } if (pcmk_is_set(operation->flags, cib__op_attr_modifies) || !pcmk__str_eq(host, OUR_NODENAME, pcmk__str_casei|pcmk__str_null_matches)) { // Forward modifying and non-local requests via cluster *process = FALSE; *needs_reply = FALSE; *local_notify = FALSE; *needs_forward = TRUE; crm_trace("%s op from %s needs to be forwarded to %s", op, pcmk__client_name(cib_client), pcmk__s(host, "all nodes")); return; } if (stand_alone) { crm_trace("Processing %s op from client %s (stand-alone)", op, pcmk__client_name(cib_client)); } else { crm_trace("Processing %saddressed %s op from client %s", ((host != NULL)? "locally " : "un"), op, pcmk__client_name(cib_client)); } } static gboolean parse_peer_options(const cib__operation_t *operation, xmlNode *request, gboolean *local_notify, gboolean *needs_reply, gboolean *process) { /* TODO: What happens when an update comes in after node A * requests the CIB from node B, but before it gets the reply (and * sends out the replace operation)? * * (This may no longer be relevant since legacy mode was dropped; need to * trace code more closely to check.) */ const char *host = NULL; const char *delegated = pcmk__xe_get(request, PCMK__XA_CIB_DELEGATED_FROM); const char *op = pcmk__xe_get(request, PCMK__XA_CIB_OP); const char *originator = pcmk__xe_get(request, PCMK__XA_SRC); const char *reply_to = pcmk__xe_get(request, PCMK__XA_CIB_ISREPLYTO); gboolean is_reply = pcmk__str_eq(reply_to, OUR_NODENAME, pcmk__str_casei); if (originator == NULL) { // Shouldn't be possible originator = "peer"; } if (pcmk__str_eq(op, PCMK__CIB_REQUEST_REPLACE, pcmk__str_none)) { // sync_our_cib() sets PCMK__XA_CIB_ISREPLYTO if (reply_to) { delegated = reply_to; } goto skip_is_reply; } else if (pcmk__str_eq(op, PCMK__CIB_REQUEST_SYNC_TO_ALL, pcmk__str_none)) { // Nothing to do } else if (is_reply && pcmk__str_eq(op, CRM_OP_PING, pcmk__str_casei)) { process_ping_reply(request); return FALSE; } else if (pcmk__str_eq(op, PCMK__CIB_REQUEST_UPGRADE, pcmk__str_none)) { /* Only the DC (node with the oldest software) should process * this operation if PCMK__XA_CIB_SCHEMA_MAX is unset. * * If the DC is happy it will then send out another * PCMK__CIB_REQUEST_UPGRADE which will tell all nodes to do the actual * upgrade. * * Except this time PCMK__XA_CIB_SCHEMA_MAX will be set which puts a * limit on how far newer nodes will go */ const char *max = pcmk__xe_get(request, PCMK__XA_CIB_SCHEMA_MAX); const char *upgrade_rc = pcmk__xe_get(request, PCMK__XA_CIB_UPGRADE_RC); crm_trace("Parsing upgrade %s for %s with max=%s and upgrade_rc=%s", (is_reply? "reply" : "request"), (based_is_primary? "primary" : "secondary"), pcmk__s(max, "none"), pcmk__s(upgrade_rc, "none")); if (upgrade_rc != NULL) { // Our upgrade request was rejected by DC, notify clients of result crm_xml_add(request, PCMK__XA_CIB_RC, upgrade_rc); } else if ((max == NULL) && based_is_primary) { /* We are the DC, check if this upgrade is allowed */ goto skip_is_reply; } else if(max) { /* Ok, go ahead and upgrade to 'max' */ goto skip_is_reply; } else { // Ignore broadcast client requests when we're not primary return FALSE; } } else if (pcmk__xe_attr_is_true(request, PCMK__XA_CIB_UPDATE)) { crm_info("Detected legacy %s global update from %s", op, originator); send_sync_request(NULL); return FALSE; } else if (is_reply && pcmk_is_set(operation->flags, cib__op_attr_modifies)) { crm_trace("Ignoring legacy %s reply sent from %s to local clients", op, originator); return FALSE; } else if (pcmk__str_eq(op, PCMK__CIB_REQUEST_SHUTDOWN, pcmk__str_none)) { *local_notify = FALSE; if (reply_to == NULL) { *process = TRUE; } else { // Not possible? crm_debug("Ignoring shutdown request from %s because reply_to=%s", originator, reply_to); } return *process; } if (is_reply) { crm_trace("Will notify local clients for %s reply from %s", op, originator); *process = FALSE; *needs_reply = FALSE; *local_notify = TRUE; return TRUE; } skip_is_reply: *process = TRUE; *needs_reply = FALSE; *local_notify = pcmk__str_eq(delegated, OUR_NODENAME, pcmk__str_casei); host = pcmk__xe_get(request, PCMK__XA_CIB_HOST); if (pcmk__str_eq(host, OUR_NODENAME, pcmk__str_casei)) { crm_trace("Processing %s request sent to us from %s", op, originator); *needs_reply = TRUE; return TRUE; } else if (host != NULL) { crm_trace("Ignoring %s request intended for CIB manager on %s", op, host); return FALSE; } else if(is_reply == FALSE && pcmk__str_eq(op, CRM_OP_PING, pcmk__str_casei)) { *needs_reply = TRUE; } crm_trace("Processing %s request broadcast by %s call %s on %s " "(local clients will%s be notified)", op, pcmk__s(pcmk__xe_get(request, PCMK__XA_CIB_CLIENTNAME), "client"), pcmk__s(pcmk__xe_get(request, PCMK__XA_CIB_CALLID), "without ID"), originator, (*local_notify? "" : "not")); return TRUE; } /*! * \internal * \brief Forward a CIB request to the appropriate target host(s) * * \param[in] request CIB request to forward */ static void forward_request(xmlNode *request) { const char *op = pcmk__xe_get(request, PCMK__XA_CIB_OP); const char *section = pcmk__xe_get(request, PCMK__XA_CIB_SECTION); const char *host = pcmk__xe_get(request, PCMK__XA_CIB_HOST); const char *originator = pcmk__xe_get(request, PCMK__XA_SRC); const char *client_name = pcmk__xe_get(request, PCMK__XA_CIB_CLIENTNAME); const char *call_id = pcmk__xe_get(request, PCMK__XA_CIB_CALLID); pcmk__node_status_t *peer = NULL; int log_level = LOG_INFO; if (pcmk__str_eq(op, PCMK__CIB_REQUEST_NOOP, pcmk__str_none)) { log_level = LOG_DEBUG; } do_crm_log(log_level, "Forwarding %s operation for section %s to %s (origin=%s/%s/%s)", pcmk__s(op, "invalid"), pcmk__s(section, "all"), pcmk__s(host, "all"), pcmk__s(originator, "local"), pcmk__s(client_name, "unspecified"), pcmk__s(call_id, "unspecified")); crm_xml_add(request, PCMK__XA_CIB_DELEGATED_FROM, OUR_NODENAME); if (host != NULL) { peer = pcmk__get_node(0, host, NULL, pcmk__node_search_cluster_member); } pcmk__cluster_send_message(peer, pcmk_ipc_based, request); // Return the request to its original state pcmk__xe_remove_attr(request, PCMK__XA_CIB_DELEGATED_FROM); } static void send_peer_reply(xmlNode *msg, const char *originator) { const pcmk__node_status_t *node = NULL; if ((msg == NULL) || (originator == NULL)) { return; } // Send reply via cluster to originating node node = pcmk__get_node(0, originator, NULL, pcmk__node_search_cluster_member); crm_trace("Sending request result to %s only", originator); crm_xml_add(msg, PCMK__XA_CIB_ISREPLYTO, originator); pcmk__cluster_send_message(node, pcmk_ipc_based, msg); } /*! * \internal * \brief Handle an IPC or CPG message containing a request * * \param[in,out] request Request XML * \param[in] privileged Whether privileged commands may be run * (see cib_server_ops[] definition) * \param[in] cib_client IPC client that sent request (or NULL if CPG) * * \return Legacy Pacemaker return code */ int cib_process_request(xmlNode *request, gboolean privileged, const pcmk__client_t *cib_client) { // @TODO: Break into multiple smaller functions uint32_t call_options = cib_none; gboolean process = TRUE; // Whether to process request locally now gboolean is_update = TRUE; // Whether request would modify CIB gboolean needs_reply = TRUE; // Whether to build a reply gboolean local_notify = FALSE; // Whether to notify (local) requester gboolean needs_forward = FALSE; // Whether to forward request somewhere else xmlNode *op_reply = NULL; xmlNode *result_diff = NULL; int rc = pcmk_ok; const char *op = pcmk__xe_get(request, PCMK__XA_CIB_OP); const char *originator = pcmk__xe_get(request, PCMK__XA_SRC); const char *host = pcmk__xe_get(request, PCMK__XA_CIB_HOST); const char *call_id = pcmk__xe_get(request, PCMK__XA_CIB_CALLID); const char *client_id = pcmk__xe_get(request, PCMK__XA_CIB_CLIENTID); const char *client_name = pcmk__xe_get(request, PCMK__XA_CIB_CLIENTNAME); const char *reply_to = pcmk__xe_get(request, PCMK__XA_CIB_ISREPLYTO); const cib__operation_t *operation = NULL; cib__op_fn_t op_function = NULL; rc = pcmk__xe_get_flags(request, PCMK__XA_CIB_CALLOPT, &call_options, cib_none); if (rc != pcmk_rc_ok) { crm_warn("Couldn't parse options from request: %s", pcmk_rc_str(rc)); } if ((host != NULL) && (*host == '\0')) { host = NULL; } if (cib_client == NULL) { crm_trace("Processing peer %s operation from %s/%s on %s intended for %s (reply=%s)", op, pcmk__s(client_name, "client"), call_id, originator, pcmk__s(host, "all"), reply_to); } else { crm_xml_add(request, PCMK__XA_SRC, OUR_NODENAME); crm_trace("Processing local %s operation from %s/%s intended for %s", op, pcmk__s(client_name, "client"), call_id, pcmk__s(host, "all")); } rc = cib__get_operation(op, &operation); rc = pcmk_rc2legacy(rc); if (rc != pcmk_ok) { /* TODO: construct error reply? */ crm_err("Pre-processing of command failed: %s", pcmk_strerror(rc)); return rc; } op_function = based_get_op_function(operation); if (op_function == NULL) { crm_err("Operation %s not supported by CIB manager", op); return -EOPNOTSUPP; } if (cib_client != NULL) { parse_local_options(cib_client, operation, host, op, &local_notify, &needs_reply, &process, &needs_forward); } else if (!parse_peer_options(operation, request, &local_notify, &needs_reply, &process)) { return rc; } if (pcmk_is_set(call_options, cib_transaction)) { /* All requests in a transaction are processed locally against a working * CIB copy, and we don't notify for individual requests because the * entire transaction is atomic. * * We still call the option parser functions above, for the sake of log * messages and checking whether we're the target for peer requests. */ process = TRUE; needs_reply = FALSE; local_notify = FALSE; needs_forward = FALSE; } is_update = pcmk_is_set(operation->flags, cib__op_attr_modifies); if (pcmk_is_set(call_options, cib_discard_reply)) { /* If the request will modify the CIB, and we are in legacy mode, we * need to build a reply so we can broadcast a diff, even if the * requester doesn't want one. */ needs_reply = FALSE; local_notify = FALSE; crm_trace("Client is not interested in the reply"); } if (needs_forward) { forward_request(request); return rc; } if (cib_status != pcmk_ok) { rc = cib_status; crm_err("Ignoring request because cluster configuration is invalid " "(please repair and restart): %s", pcmk_strerror(rc)); op_reply = create_cib_reply(op, call_id, client_id, call_options, rc, the_cib); } else if (process) { time_t finished = 0; time_t now = time(NULL); int level = LOG_INFO; const char *section = pcmk__xe_get(request, PCMK__XA_CIB_SECTION); const char *admin_epoch_s = NULL; const char *epoch_s = NULL; const char *num_updates_s = NULL; rc = cib_process_command(request, operation, op_function, &op_reply, &result_diff, privileged); if (!is_update) { level = LOG_TRACE; } else if (pcmk__xe_attr_is_true(request, PCMK__XA_CIB_UPDATE)) { switch (rc) { case pcmk_ok: level = LOG_INFO; break; case -pcmk_err_old_data: case -pcmk_err_diff_resync: case -pcmk_err_diff_failed: level = LOG_TRACE; break; default: level = LOG_ERR; } } else if (rc != pcmk_ok) { level = LOG_WARNING; } if (the_cib != NULL) { admin_epoch_s = pcmk__xe_get(the_cib, PCMK_XA_ADMIN_EPOCH); epoch_s = pcmk__xe_get(the_cib, PCMK_XA_EPOCH); num_updates_s = pcmk__xe_get(the_cib, PCMK_XA_NUM_UPDATES); } do_crm_log(level, "Completed %s operation for section %s: %s (rc=%d, origin=%s/%s/%s, version=%s.%s.%s)", op, section ? section : "'all'", pcmk_strerror(rc), rc, originator ? originator : "local", pcmk__s(client_name, "client"), call_id, pcmk__s(admin_epoch_s, "0"), pcmk__s(epoch_s, "0"), pcmk__s(num_updates_s, "0")); finished = time(NULL); if ((finished - now) > 3) { crm_trace("%s operation took %lds to complete", op, (long)(finished - now)); crm_write_blackbox(0, NULL); } if (op_reply == NULL && (needs_reply || local_notify)) { crm_err("Unexpected NULL reply to message"); crm_log_xml_err(request, "null reply"); needs_reply = FALSE; local_notify = FALSE; } } if (is_update) { crm_trace("Completed pre-sync update from %s/%s/%s%s", originator ? originator : "local", pcmk__s(client_name, "client"), call_id, local_notify?" with local notification":""); } else if (!needs_reply || stand_alone) { // This was a non-originating secondary update crm_trace("Completed update as secondary"); } else if ((cib_client == NULL) && !pcmk_is_set(call_options, cib_discard_reply)) { if (is_update == FALSE || result_diff == NULL) { crm_trace("Request not broadcast: R/O call"); } else if (rc != pcmk_ok) { crm_trace("Request not broadcast: call failed: %s", pcmk_strerror(rc)); } else { crm_trace("Directing reply to %s", originator); } send_peer_reply(op_reply, originator); } if (local_notify && client_id) { crm_trace("Performing local %ssync notification for %s", (pcmk_is_set(call_options, cib_sync_call)? "" : "a"), client_id); if (process == FALSE) { do_local_notify(request, client_id, pcmk_is_set(call_options, cib_sync_call), (cib_client == NULL)); } else { do_local_notify(op_reply, client_id, pcmk_is_set(call_options, cib_sync_call), (cib_client == NULL)); } } pcmk__xml_free(op_reply); pcmk__xml_free(result_diff); return rc; } /*! * \internal * \brief Get a CIB operation's input from the request XML * * \param[in] request CIB request XML * \param[in] type CIB operation type * \param[out] section Where to store CIB section name * * \return Input XML for CIB operation * * \note If not \c NULL, the return value is a non-const pointer to part of * \p request. The caller should not free it directly. */ static xmlNode * prepare_input(const xmlNode *request, enum cib__op_type type, const char **section) { xmlNode *wrapper = pcmk__xe_first_child(request, PCMK__XE_CIB_CALLDATA, NULL, NULL); xmlNode *input = pcmk__xe_first_child(wrapper, NULL, NULL, NULL); if (type == cib__op_apply_patch) { *section = NULL; } else { *section = pcmk__xe_get(request, PCMK__XA_CIB_SECTION); } // Grab the specified section if ((*section != NULL) && pcmk__xe_is(input, PCMK_XE_CIB)) { input = pcmk_find_cib_element(input, *section); } return input; } #define XPATH_CONFIG_CHANGE \ "//" PCMK_XE_CHANGE \ "[contains(@" PCMK_XA_PATH ",'/" PCMK_XE_CRM_CONFIG "/')]" static bool contains_config_change(xmlNode *diff) { bool changed = false; if (diff) { xmlXPathObject *xpathObj = pcmk__xpath_search(diff->doc, XPATH_CONFIG_CHANGE); if (pcmk__xpath_num_results(xpathObj) > 0) { changed = true; } xmlXPathFreeObject(xpathObj); } return changed; } static int cib_process_command(xmlNode *request, const cib__operation_t *operation, cib__op_fn_t op_function, xmlNode **reply, xmlNode **cib_diff, bool privileged) { xmlNode *input = NULL; xmlNode *output = NULL; xmlNode *result_cib = NULL; uint32_t call_options = cib_none; const char *op = NULL; const char *section = NULL; const char *call_id = pcmk__xe_get(request, PCMK__XA_CIB_CALLID); const char *client_id = pcmk__xe_get(request, PCMK__XA_CIB_CLIENTID); const char *client_name = pcmk__xe_get(request, PCMK__XA_CIB_CLIENTNAME); const char *originator = pcmk__xe_get(request, PCMK__XA_SRC); int rc = pcmk_ok; bool config_changed = false; bool manage_counters = true; static mainloop_timer_t *digest_timer = NULL; pcmk__assert(cib_status == pcmk_ok); if(digest_timer == NULL) { digest_timer = mainloop_timer_add("digester", 5000, FALSE, cib_digester_cb, NULL); } *reply = NULL; *cib_diff = NULL; /* Start processing the request... */ op = pcmk__xe_get(request, PCMK__XA_CIB_OP); rc = pcmk__xe_get_flags(request, PCMK__XA_CIB_CALLOPT, &call_options, cib_none); if (rc != pcmk_rc_ok) { crm_warn("Couldn't parse options from request: %s", pcmk_rc_str(rc)); } if (!privileged && pcmk_is_set(operation->flags, cib__op_attr_privileged)) { rc = -EACCES; crm_trace("Failed due to lack of privileges: %s", pcmk_strerror(rc)); goto done; } input = prepare_input(request, operation->type, §ion); if (!pcmk_is_set(operation->flags, cib__op_attr_modifies)) { rc = cib_perform_op(NULL, op, call_options, op_function, true, section, request, input, false, &config_changed, &the_cib, &result_cib, NULL, &output); CRM_CHECK(result_cib == NULL, pcmk__xml_free(result_cib)); goto done; } /* @COMPAT: Handle a valid write action (legacy) * * @TODO: Re-evaluate whether this is all truly legacy. The cib_force_diff * portion is. However, PCMK__XA_CIB_UPDATE may be set by a sync operation * even in non-legacy mode, and manage_counters tells xml_create_patchset() * whether to update version/epoch info. */ if (pcmk__xe_attr_is_true(request, PCMK__XA_CIB_UPDATE)) { manage_counters = false; cib__set_call_options(call_options, "call", cib_force_diff); crm_trace("Global update detected"); CRM_LOG_ASSERT(pcmk__str_any_of(op, PCMK__CIB_REQUEST_APPLY_PATCH, PCMK__CIB_REQUEST_REPLACE, NULL)); } ping_modified_since = TRUE; // result_cib must not be modified after cib_perform_op() returns rc = cib_perform_op(NULL, op, call_options, op_function, false, section, request, input, manage_counters, &config_changed, &the_cib, &result_cib, cib_diff, &output); /* Always write to disk for successful ops with the flag set. This also * negates the need to detect ordering changes. */ if ((rc == pcmk_ok) && pcmk_is_set(operation->flags, cib__op_attr_writes_through)) { config_changed = true; } if ((rc == pcmk_ok) && !pcmk_any_flags_set(call_options, cib_dryrun|cib_transaction)) { if (result_cib != the_cib) { if (pcmk_is_set(operation->flags, cib__op_attr_writes_through)) { config_changed = true; } crm_trace("Activating %s->%s%s", pcmk__xe_get(the_cib, PCMK_XA_NUM_UPDATES), pcmk__xe_get(result_cib, PCMK_XA_NUM_UPDATES), (config_changed? " changed" : "")); rc = activateCibXml(result_cib, config_changed, op); if (rc != pcmk_ok) { crm_err("Failed to activate new CIB: %s", pcmk_strerror(rc)); } } if ((rc == pcmk_ok) && contains_config_change(*cib_diff)) { cib_read_config(config_hash, result_cib); } /* @COMPAT Nodes older than feature set 3.19.0 don't support * transactions. In a mixed-version cluster with nodes <3.19.0, we must * sync the updated CIB, so that the older nodes receive the changes. * Any node that has already applied the transaction will ignore the * synced CIB. * * To ensure the updated CIB is synced from only one node, we sync it * from the originator. */ if ((operation->type == cib__op_commit_transact) && pcmk__str_eq(originator, OUR_NODENAME, pcmk__str_casei) && compare_version(pcmk__xe_get(the_cib, PCMK_XA_CRM_FEATURE_SET), "3.19.0") < 0) { sync_our_cib(request, TRUE); } mainloop_timer_stop(digest_timer); mainloop_timer_start(digest_timer); } else if (rc == -pcmk_err_schema_validation) { pcmk__assert(result_cib != the_cib); if (output != NULL) { crm_log_xml_info(output, "cib:output"); pcmk__xml_free(output); } output = result_cib; } else { crm_trace("Not activating %d %d %s", rc, pcmk_is_set(call_options, cib_dryrun), pcmk__xe_get(result_cib, PCMK_XA_NUM_UPDATES)); if (result_cib != the_cib) { pcmk__xml_free(result_cib); } } if (!pcmk_any_flags_set(call_options, cib_dryrun|cib_inhibit_notify|cib_transaction)) { crm_trace("Sending notifications %d", pcmk_is_set(call_options, cib_dryrun)); cib_diff_notify(op, rc, call_id, client_id, client_name, originator, input, *cib_diff); } pcmk__log_xml_patchset(LOG_TRACE, *cib_diff); done: if (!pcmk_is_set(call_options, cib_discard_reply)) { *reply = create_cib_reply(op, call_id, client_id, call_options, rc, output); } if (output != the_cib) { pcmk__xml_free(output); } crm_trace("done"); return rc; } void cib_peer_callback(xmlNode * msg, void *private_data) { const char *reason = NULL; const char *originator = pcmk__xe_get(msg, PCMK__XA_SRC); if (pcmk__peer_cache == NULL) { reason = "membership not established"; goto bail; } if (pcmk__xe_get(msg, PCMK__XA_CIB_CLIENTNAME) == NULL) { crm_xml_add(msg, PCMK__XA_CIB_CLIENTNAME, originator); } /* crm_log_xml_trace(msg, "Peer[inbound]"); */ cib_process_request(msg, TRUE, NULL); return; bail: if (reason) { const char *op = pcmk__xe_get(msg, PCMK__XA_CIB_OP); crm_warn("Discarding %s message from %s: %s", op, originator, reason); } } static gboolean cib_force_exit(gpointer data) { crm_notice("Exiting immediately after %s without shutdown acknowledgment", pcmk__readable_interval(EXIT_ESCALATION_MS)); terminate_cib(CRM_EX_ERROR); return FALSE; } static void disconnect_remote_client(gpointer key, gpointer value, gpointer user_data) { pcmk__client_t *a_client = value; crm_err("Can't disconnect client %s: Not implemented", pcmk__client_name(a_client)); } static void initiate_exit(void) { int active = 0; xmlNode *leaving = NULL; active = pcmk__cluster_num_active_nodes(); if (active < 2) { // This is the last active node crm_info("Exiting without sending shutdown request (no active peers)"); terminate_cib(CRM_EX_OK); return; } crm_info("Sending shutdown request to %d peers", active); leaving = pcmk__xe_create(NULL, PCMK__XE_EXIT_NOTIFICATION); crm_xml_add(leaving, PCMK__XA_T, PCMK__VALUE_CIB); crm_xml_add(leaving, PCMK__XA_CIB_OP, PCMK__CIB_REQUEST_SHUTDOWN); pcmk__cluster_send_message(NULL, pcmk_ipc_based, leaving); pcmk__xml_free(leaving); pcmk__create_timer(EXIT_ESCALATION_MS, cib_force_exit, NULL); } void cib_shutdown(int nsig) { struct qb_ipcs_stats srv_stats; if (cib_shutdown_flag == FALSE) { int disconnects = 0; qb_ipcs_connection_t *c = NULL; cib_shutdown_flag = TRUE; c = qb_ipcs_connection_first_get(ipcs_rw); while (c != NULL) { qb_ipcs_connection_t *last = c; c = qb_ipcs_connection_next_get(ipcs_rw, last); crm_debug("Disconnecting r/w client %p...", last); qb_ipcs_disconnect(last); qb_ipcs_connection_unref(last); disconnects++; } c = qb_ipcs_connection_first_get(ipcs_ro); while (c != NULL) { qb_ipcs_connection_t *last = c; c = qb_ipcs_connection_next_get(ipcs_ro, last); crm_debug("Disconnecting r/o client %p...", last); qb_ipcs_disconnect(last); qb_ipcs_connection_unref(last); disconnects++; } c = qb_ipcs_connection_first_get(ipcs_shm); while (c != NULL) { qb_ipcs_connection_t *last = c; c = qb_ipcs_connection_next_get(ipcs_shm, last); crm_debug("Disconnecting non-blocking r/w client %p...", last); qb_ipcs_disconnect(last); qb_ipcs_connection_unref(last); disconnects++; } disconnects += pcmk__ipc_client_count(); crm_debug("Disconnecting %d remote clients", pcmk__ipc_client_count()); pcmk__foreach_ipc_client(disconnect_remote_client, NULL); crm_info("Disconnected %d clients", disconnects); } qb_ipcs_stats_get(ipcs_rw, &srv_stats, QB_FALSE); if (pcmk__ipc_client_count() == 0) { crm_info("All clients disconnected (%d)", srv_stats.active_connections); initiate_exit(); } else { crm_info("Waiting on %d clients to disconnect (%d)", pcmk__ipc_client_count(), srv_stats.active_connections); } } extern int remote_fd; extern int remote_tls_fd; /*! * \internal * \brief Close remote sockets, free the global CIB and quit * * \param[in] exit_status What exit status to use (if -1, use CRM_EX_OK, but * skip disconnecting from the cluster layer) */ void terminate_cib(int exit_status) { if (remote_fd > 0) { close(remote_fd); remote_fd = 0; } if (remote_tls_fd > 0) { close(remote_tls_fd); remote_tls_fd = 0; } uninitializeCib(); // Exit immediately on error if (exit_status > CRM_EX_OK) { pcmk__stop_based_ipc(ipcs_ro, ipcs_rw, ipcs_shm); crm_exit(exit_status); return; } if ((mainloop != NULL) && g_main_loop_is_running(mainloop)) { /* Quit via returning from the main loop. If exit_status has the special * value -1, we skip the disconnect here, and it will be done when the * main loop returns (this allows the peer status callback to avoid * messing with the peer caches). */ if (exit_status == CRM_EX_OK) { pcmk_cluster_disconnect(crm_cluster); } g_main_loop_quit(mainloop); return; } /* Exit cleanly. Even the peer status callback can disconnect here, because * we're not returning control to the caller. */ pcmk_cluster_disconnect(crm_cluster); pcmk__stop_based_ipc(ipcs_ro, ipcs_rw, ipcs_shm); crm_exit(CRM_EX_OK); } diff --git a/daemons/based/based_io.c b/daemons/based/based_io.c index 7f2a959b1d..26decf52eb 100644 --- a/daemons/based/based_io.c +++ b/daemons/based/based_io.c @@ -1,422 +1,422 @@ /* * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include crm_trigger_t *cib_writer = NULL; int write_cib_contents(gpointer p); static void cib_rename(const char *old) { int new_fd; char *new = crm_strdup_printf("%s/cib.auto.XXXXXX", cib_root); umask(S_IWGRP | S_IWOTH | S_IROTH); new_fd = mkstemp(new); if ((new_fd < 0) || (rename(old, new) < 0)) { crm_err("Couldn't archive unusable file %s (disabling disk writes and continuing)", old); cib_writes_enabled = FALSE; } else { crm_err("Archived unusable file %s as %s", old, new); } if (new_fd > 0) { close(new_fd); } free(new); } /* * It is the callers responsibility to free the output of this function */ static xmlNode * retrieveCib(const char *filename, const char *sigfile) { xmlNode *root = NULL; int rc = cib_file_read_and_verify(filename, sigfile, &root); if (rc == pcmk_ok) { crm_info("Loaded CIB from %s (with digest %s)", filename, sigfile); } else { crm_warn("Continuing but NOT using CIB from %s (with digest %s): %s", filename, sigfile, pcmk_strerror(rc)); if (rc == -pcmk_err_cib_modified) { // Archive the original files so the contents are not lost cib_rename(filename); cib_rename(sigfile); } } return root; } static int cib_archive_filter(const struct dirent * a) { int rc = 0; // Looking for regular files starting with "cib-" and not ending in .sig struct stat s; char *a_path = crm_strdup_printf("%s/%s", cib_root, a->d_name); if(stat(a_path, &s) != 0) { rc = errno; crm_trace("%s - stat failed: %s (%d)", a->d_name, pcmk_rc_str(rc), rc); rc = 0; } else if (!S_ISREG(s.st_mode)) { crm_trace("%s - wrong type (%#o)", a->d_name, (unsigned int) (s.st_mode & S_IFMT)); } else if(strstr(a->d_name, "cib-") != a->d_name) { crm_trace("%s - wrong prefix", a->d_name); } else if (pcmk__ends_with_ext(a->d_name, ".sig")) { crm_trace("%s - wrong suffix", a->d_name); } else { crm_debug("%s - candidate", a->d_name); rc = 1; } free(a_path); return rc; } static int cib_archive_sort(const struct dirent ** a, const struct dirent **b) { /* Order by creation date - most recently created file first */ int rc = 0; struct stat buf; time_t a_age = 0; time_t b_age = 0; char *a_path = crm_strdup_printf("%s/%s", cib_root, a[0]->d_name); char *b_path = crm_strdup_printf("%s/%s", cib_root, b[0]->d_name); if(stat(a_path, &buf) == 0) { a_age = buf.st_ctime; } if(stat(b_path, &buf) == 0) { b_age = buf.st_ctime; } free(a_path); free(b_path); if(a_age > b_age) { rc = 1; } else if(a_age < b_age) { rc = -1; } crm_trace("%s (%lu) vs. %s (%lu) : %d", a[0]->d_name, (unsigned long)a_age, b[0]->d_name, (unsigned long)b_age, rc); return rc; } xmlNode * readCibXmlFile(const char *dir, const char *file, gboolean discard_status) { struct dirent **namelist = NULL; int lpc = 0; char *sigfile = NULL; char *sigfilepath = NULL; char *filename = NULL; const char *name = NULL; const char *value = NULL; const char *use_valgrind = pcmk__env_option(PCMK__ENV_VALGRIND_ENABLED); xmlNode *root = NULL; xmlNode *status = NULL; sigfile = crm_strdup_printf("%s.sig", file); if (pcmk__daemon_can_write(dir, file) == FALSE || pcmk__daemon_can_write(dir, sigfile) == FALSE) { cib_status = -EACCES; return NULL; } filename = crm_strdup_printf("%s/%s", dir, file); sigfilepath = crm_strdup_printf("%s/%s", dir, sigfile); free(sigfile); cib_status = pcmk_ok; root = retrieveCib(filename, sigfilepath); free(filename); free(sigfilepath); if (root == NULL) { lpc = scandir(cib_root, &namelist, cib_archive_filter, cib_archive_sort); if (lpc < 0) { crm_err("Could not check for CIB backups in %s: %s", cib_root, pcmk_rc_str(errno)); } } while (root == NULL && lpc > 1) { int rc = pcmk_ok; lpc--; filename = crm_strdup_printf("%s/%s", cib_root, namelist[lpc]->d_name); sigfile = crm_strdup_printf("%s.sig", filename); rc = cib_file_read_and_verify(filename, sigfile, &root); if (rc == pcmk_ok) { crm_notice("Loaded CIB from last valid backup %s (with digest %s)", filename, sigfile); } else { crm_warn("Not using next most recent CIB backup from %s " "(with digest %s): %s", filename, sigfile, pcmk_strerror(rc)); } free(namelist[lpc]); free(filename); free(sigfile); } free(namelist); if (root == NULL) { root = createEmptyCib(0); crm_warn("Continuing with an empty configuration"); } if (cib_writes_enabled && (use_valgrind != NULL) && (crm_is_true(use_valgrind) || (strstr(use_valgrind, PCMK__SERVER_BASED) != NULL))) { cib_writes_enabled = FALSE; crm_err("*** Disabling disk writes to avoid confusing Valgrind ***"); } status = pcmk__xe_first_child(root, PCMK_XE_STATUS, NULL, NULL); if (discard_status && status != NULL) { // Strip out the PCMK_XE_STATUS section if there is one pcmk__xml_free(status); status = NULL; } if (status == NULL) { pcmk__xe_create(root, PCMK_XE_STATUS); } /* Do this before schema validation happens */ /* fill in some defaults */ value = pcmk__xe_get(root, PCMK_XA_ADMIN_EPOCH); if (value == NULL) { // Not possible with schema validation enabled crm_warn("Defaulting missing " PCMK_XA_ADMIN_EPOCH " to 0, but " "cluster may get confused about which node's configuration " "is most recent"); - crm_xml_add_int(root, PCMK_XA_ADMIN_EPOCH, 0); + pcmk__xe_set_int(root, PCMK_XA_ADMIN_EPOCH, 0); } name = PCMK_XA_EPOCH; value = pcmk__xe_get(root, name); if (value == NULL) { - crm_xml_add_int(root, name, 0); + pcmk__xe_set_int(root, name, 0); } name = PCMK_XA_NUM_UPDATES; value = pcmk__xe_get(root, name); if (value == NULL) { - crm_xml_add_int(root, name, 0); + pcmk__xe_set_int(root, name, 0); } // Unset (DC should set appropriate value) pcmk__xe_remove_attr(root, PCMK_XA_DC_UUID); if (discard_status) { crm_log_xml_trace(root, "[on-disk]"); } if (!pcmk__configured_schema_validates(root)) { cib_status = -pcmk_err_schema_validation; } return root; } gboolean uninitializeCib(void) { xmlNode *tmp_cib = the_cib; if (tmp_cib == NULL) { return FALSE; } the_cib = NULL; pcmk__xml_free(tmp_cib); return TRUE; } /* * This method will free the old CIB pointer on success and the new one * on failure. */ int activateCibXml(xmlNode * new_cib, gboolean to_disk, const char *op) { if (new_cib) { xmlNode *saved_cib = the_cib; pcmk__assert(new_cib != saved_cib); the_cib = new_cib; pcmk__xml_free(saved_cib); if (cib_writes_enabled && cib_status == pcmk_ok && to_disk) { crm_debug("Triggering CIB write for %s op", op); mainloop_set_trigger(cib_writer); } return pcmk_ok; } crm_err("Ignoring invalid CIB"); if (the_cib) { crm_warn("Reverting to last known CIB"); } else { crm_crit("Could not write out new CIB and no saved version to revert to"); } return -ENODATA; } static void cib_diskwrite_complete(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode) { const char *errmsg = "Could not write CIB to disk"; if ((exitcode != 0) && cib_writes_enabled) { cib_writes_enabled = FALSE; errmsg = "Disabling CIB disk writes after failure"; } if ((signo == 0) && (exitcode == 0)) { crm_trace("Disk write [%d] succeeded", (int) pid); } else if (signo == 0) { crm_err("%s: process %d exited %d", errmsg, (int) pid, exitcode); } else { crm_err("%s: process %d terminated with signal %d (%s)%s", errmsg, (int) pid, signo, strsignal(signo), (core? " and dumped core" : "")); } mainloop_trigger_complete(cib_writer); } int write_cib_contents(gpointer p) { int exit_rc = pcmk_ok; xmlNode *cib_local = NULL; /* Make a copy of the CIB to write (possibly in a forked child) */ if (p) { /* Synchronous write out */ cib_local = pcmk__xml_copy(NULL, p); } else { int pid = 0; int bb_state = qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_STATE_GET, 0); /* Turn it off before the fork() to avoid: * - 2 processes writing to the same shared mem * - the child needing to disable it * (which would close it from underneath the parent) * This way, the shared mem files are already closed */ qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_ENABLED, QB_FALSE); pid = fork(); if (pid < 0) { crm_err("Disabling disk writes after fork failure: %s", pcmk_rc_str(errno)); cib_writes_enabled = FALSE; return FALSE; } if (pid) { /* Parent */ mainloop_child_add(pid, 0, "disk-writer", NULL, cib_diskwrite_complete); if (bb_state == QB_LOG_STATE_ENABLED) { /* Re-enable now that it it safe */ qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_ENABLED, QB_TRUE); } return -1; /* -1 means 'still work to do' */ } /* Asynchronous write-out after a fork() */ /* In theory, we can scribble on the_cib here and not affect the parent, * but let's be safe anyway. */ cib_local = pcmk__xml_copy(NULL, the_cib); } /* Write the CIB */ exit_rc = cib_file_write_with_digest(cib_local, cib_root, "cib.xml"); /* A nonzero exit code will cause further writes to be disabled */ pcmk__xml_free(cib_local); if (p == NULL) { crm_exit_t exit_code = CRM_EX_OK; switch (exit_rc) { case pcmk_ok: exit_code = CRM_EX_OK; break; case pcmk_err_cib_modified: exit_code = CRM_EX_DIGEST; // Existing CIB doesn't match digest break; case pcmk_err_cib_backup: // Existing CIB couldn't be backed up case pcmk_err_cib_save: // New CIB couldn't be saved exit_code = CRM_EX_CANTCREAT; break; default: exit_code = CRM_EX_ERROR; break; } /* Use _exit() because exit() could affect the parent adversely */ pcmk_common_cleanup(); _exit(exit_code); } return exit_rc; } diff --git a/daemons/based/based_messages.c b/daemons/based/based_messages.c index 85957b5ab6..e6791d0e5b 100644 --- a/daemons/based/based_messages.c +++ b/daemons/based/based_messages.c @@ -1,528 +1,528 @@ /* * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* Maximum number of diffs to ignore while waiting for a resync */ #define MAX_DIFF_RETRY 5 bool based_is_primary = false; xmlNode *the_cib = NULL; int cib_process_shutdown_req(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer) { const char *host = pcmk__xe_get(req, PCMK__XA_SRC); *answer = NULL; if (pcmk__xe_get(req, PCMK__XA_CIB_ISREPLYTO) == NULL) { crm_info("Peer %s is requesting to shut down", host); return pcmk_ok; } if (cib_shutdown_flag == FALSE) { crm_err("Peer %s mistakenly thinks we wanted to shut down", host); return -EINVAL; } crm_info("Exiting after %s acknowledged our shutdown request", host); terminate_cib(CRM_EX_OK); return pcmk_ok; } // @COMPAT: Remove when PCMK__CIB_REQUEST_NOOP is removed int cib_process_noop(const char *op, int options, const char *section, xmlNode *req, xmlNode *input, xmlNode *existing_cib, xmlNode **result_cib, xmlNode **answer) { crm_trace("Processing \"%s\" event", op); *answer = NULL; return pcmk_ok; } int cib_process_readwrite(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer) { int result = pcmk_ok; crm_trace("Processing \"%s\" event", op); // @COMPAT Pacemaker Remote clients <3.0.0 may send this if (pcmk__str_eq(op, PCMK__CIB_REQUEST_IS_PRIMARY, pcmk__str_none)) { if (based_is_primary) { result = pcmk_ok; } else { result = -EPERM; } return result; } if (pcmk__str_eq(op, PCMK__CIB_REQUEST_PRIMARY, pcmk__str_none)) { if (!based_is_primary) { crm_info("We are now in R/W mode"); based_is_primary = true; } else { crm_debug("We are still in R/W mode"); } } else if (based_is_primary) { crm_info("We are now in R/O mode"); based_is_primary = false; } return result; } /* Set to 1 when a sync is requested, incremented when a diff is ignored, * reset to 0 when a sync is received */ static int sync_in_progress = 0; void send_sync_request(const char *host) { xmlNode *sync_me = pcmk__xe_create(NULL, "sync-me"); pcmk__node_status_t *peer = NULL; crm_info("Requesting re-sync from %s", (host? host : "all peers")); sync_in_progress = 1; crm_xml_add(sync_me, PCMK__XA_T, PCMK__VALUE_CIB); crm_xml_add(sync_me, PCMK__XA_CIB_OP, PCMK__CIB_REQUEST_SYNC_TO_ONE); crm_xml_add(sync_me, PCMK__XA_CIB_DELEGATED_FROM, OUR_NODENAME); if (host != NULL) { peer = pcmk__get_node(0, host, NULL, pcmk__node_search_cluster_member); } pcmk__cluster_send_message(peer, pcmk_ipc_based, sync_me); pcmk__xml_free(sync_me); } int cib_process_ping(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer) { const char *host = pcmk__xe_get(req, PCMK__XA_SRC); const char *seq = pcmk__xe_get(req, PCMK__XA_CIB_PING_ID); char *digest = pcmk__digest_xml(the_cib, true); xmlNode *wrapper = NULL; crm_trace("Processing \"%s\" event %s from %s", op, seq, host); *answer = pcmk__xe_create(NULL, PCMK__XE_PING_RESPONSE); crm_xml_add(*answer, PCMK_XA_CRM_FEATURE_SET, CRM_FEATURE_SET); crm_xml_add(*answer, PCMK__XA_DIGEST, digest); crm_xml_add(*answer, PCMK__XA_CIB_PING_ID, seq); wrapper = pcmk__xe_create(*answer, PCMK__XE_CIB_CALLDATA); if (the_cib != NULL) { pcmk__if_tracing( { /* Append additional detail so the receiver can log the * differences */ pcmk__xml_copy(wrapper, the_cib); }, { // Always include at least the version details const char *name = (const char *) the_cib->name; xmlNode *shallow = pcmk__xe_create(wrapper, name); pcmk__xe_copy_attrs(shallow, the_cib, pcmk__xaf_none); } ); } crm_info("Reporting our current digest to %s: %s for %s.%s.%s", host, digest, pcmk__xe_get(existing_cib, PCMK_XA_ADMIN_EPOCH), pcmk__xe_get(existing_cib, PCMK_XA_EPOCH), pcmk__xe_get(existing_cib, PCMK_XA_NUM_UPDATES)); free(digest); return pcmk_ok; } int cib_process_sync(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer) { return sync_our_cib(req, TRUE); } int cib_process_upgrade_server(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer) { int rc = pcmk_ok; *answer = NULL; if (pcmk__xe_get(req, PCMK__XA_CIB_SCHEMA_MAX) != NULL) { /* The originator of an upgrade request sends it to the DC, without * PCMK__XA_CIB_SCHEMA_MAX. If an upgrade is needed, the DC * re-broadcasts the request with PCMK__XA_CIB_SCHEMA_MAX, and each node * performs the upgrade (and notifies its local clients) here. */ return cib_process_upgrade( op, options, section, req, input, existing_cib, result_cib, answer); } else { xmlNode *scratch = pcmk__xml_copy(NULL, existing_cib); const char *host = pcmk__xe_get(req, PCMK__XA_SRC); const char *original_schema = NULL; const char *new_schema = NULL; const char *client_id = pcmk__xe_get(req, PCMK__XA_CIB_CLIENTID); const char *call_opts = pcmk__xe_get(req, PCMK__XA_CIB_CALLOPT); const char *call_id = pcmk__xe_get(req, PCMK__XA_CIB_CALLID); crm_trace("Processing \"%s\" event", op); original_schema = pcmk__xe_get(existing_cib, PCMK_XA_VALIDATE_WITH); if (original_schema == NULL) { crm_info("Rejecting upgrade request from %s: No " PCMK_XA_VALIDATE_WITH, host); return -pcmk_err_cib_corrupt; } rc = pcmk__update_schema(&scratch, NULL, true, true); rc = pcmk_rc2legacy(rc); new_schema = pcmk__xe_get(scratch, PCMK_XA_VALIDATE_WITH); if (pcmk__cmp_schemas_by_name(new_schema, original_schema) > 0) { xmlNode *up = pcmk__xe_create(NULL, __func__); rc = pcmk_ok; crm_notice("Upgrade request from %s verified", host); crm_xml_add(up, PCMK__XA_T, PCMK__VALUE_CIB); crm_xml_add(up, PCMK__XA_CIB_OP, PCMK__CIB_REQUEST_UPGRADE); crm_xml_add(up, PCMK__XA_CIB_SCHEMA_MAX, new_schema); crm_xml_add(up, PCMK__XA_CIB_DELEGATED_FROM, host); crm_xml_add(up, PCMK__XA_CIB_CLIENTID, client_id); crm_xml_add(up, PCMK__XA_CIB_CALLOPT, call_opts); crm_xml_add(up, PCMK__XA_CIB_CALLID, call_id); pcmk__cluster_send_message(NULL, pcmk_ipc_based, up); pcmk__xml_free(up); } else if(rc == pcmk_ok) { rc = -pcmk_err_schema_unchanged; } if (rc != pcmk_ok) { // Notify originating peer so it can notify its local clients pcmk__node_status_t *origin = NULL; origin = pcmk__search_node_caches(0, host, NULL, pcmk__node_search_cluster_member); crm_info("Rejecting upgrade request from %s: %s " QB_XS " rc=%d peer=%s", host, pcmk_strerror(rc), rc, (origin? origin->name : "lost")); if (origin) { xmlNode *up = pcmk__xe_create(NULL, __func__); crm_xml_add(up, PCMK__XA_T, PCMK__VALUE_CIB); crm_xml_add(up, PCMK__XA_CIB_OP, PCMK__CIB_REQUEST_UPGRADE); crm_xml_add(up, PCMK__XA_CIB_DELEGATED_FROM, host); crm_xml_add(up, PCMK__XA_CIB_ISREPLYTO, host); crm_xml_add(up, PCMK__XA_CIB_CLIENTID, client_id); crm_xml_add(up, PCMK__XA_CIB_CALLOPT, call_opts); crm_xml_add(up, PCMK__XA_CIB_CALLID, call_id); - crm_xml_add_int(up, PCMK__XA_CIB_UPGRADE_RC, rc); + pcmk__xe_set_int(up, PCMK__XA_CIB_UPGRADE_RC, rc); if (!pcmk__cluster_send_message(origin, pcmk_ipc_based, up)) { crm_warn("Could not send CIB upgrade result to %s", host); } pcmk__xml_free(up); } } pcmk__xml_free(scratch); } return rc; } int cib_process_sync_one(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer) { return sync_our_cib(req, FALSE); } int cib_server_process_diff(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer) { int rc = pcmk_ok; if (sync_in_progress > MAX_DIFF_RETRY) { /* Don't ignore diffs forever; the last request may have been lost. * If the diff fails, we'll ask for another full resync. */ sync_in_progress = 0; } // The primary instance should never ignore a diff if (sync_in_progress && !based_is_primary) { int diff_add_updates = 0; int diff_add_epoch = 0; int diff_add_admin_epoch = 0; int diff_del_updates = 0; int diff_del_epoch = 0; int diff_del_admin_epoch = 0; cib_diff_version_details(input, &diff_add_admin_epoch, &diff_add_epoch, &diff_add_updates, &diff_del_admin_epoch, &diff_del_epoch, &diff_del_updates); sync_in_progress++; crm_notice("Not applying diff %d.%d.%d -> %d.%d.%d (sync in progress)", diff_del_admin_epoch, diff_del_epoch, diff_del_updates, diff_add_admin_epoch, diff_add_epoch, diff_add_updates); return -pcmk_err_diff_resync; } rc = cib_process_diff(op, options, section, req, input, existing_cib, result_cib, answer); crm_trace("result: %s (%d), %s", pcmk_strerror(rc), rc, (based_is_primary? "primary": "secondary")); if ((rc == -pcmk_err_diff_resync) && !based_is_primary) { pcmk__xml_free(*result_cib); *result_cib = NULL; send_sync_request(NULL); } else if (rc == -pcmk_err_diff_resync) { rc = -pcmk_err_diff_failed; if (options & cib_force_diff) { crm_warn("Not requesting full refresh in R/W mode"); } } return rc; } int cib_process_replace_svr(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer) { int rc = cib_process_replace(op, options, section, req, input, existing_cib, result_cib, answer); if ((rc == pcmk_ok) && pcmk__xe_is(input, PCMK_XE_CIB)) { sync_in_progress = 0; } return rc; } /* @COMPAT: Remove when PCMK__CIB_REQUEST_ABS_DELETE is removed * (At least external client code <3.0.0 can send it) */ int cib_process_delete_absolute(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer) { return -EINVAL; } static xmlNode * cib_msg_copy(xmlNode *msg) { static const char *field_list[] = { PCMK__XA_T, PCMK__XA_CIB_CLIENTID, PCMK__XA_CIB_CALLOPT, PCMK__XA_CIB_CALLID, PCMK__XA_CIB_OP, PCMK__XA_CIB_ISREPLYTO, PCMK__XA_CIB_SECTION, PCMK__XA_CIB_HOST, PCMK__XA_CIB_RC, PCMK__XA_CIB_DELEGATED_FROM, PCMK__XA_CIB_UPDATE, PCMK__XA_CIB_CLIENTNAME, PCMK__XA_CIB_USER, PCMK__XA_CIB_NOTIFY_TYPE, PCMK__XA_CIB_NOTIFY_ACTIVATE, }; xmlNode *copy = pcmk__xe_create(NULL, PCMK__XE_COPY); for (int lpc = 0; lpc < PCMK__NELEM(field_list); lpc++) { const char *field = field_list[lpc]; const char *value = pcmk__xe_get(msg, field); if (value != NULL) { crm_xml_add(copy, field, value); } } return copy; } int sync_our_cib(xmlNode * request, gboolean all) { int result = pcmk_ok; char *digest = NULL; const char *host = pcmk__xe_get(request, PCMK__XA_SRC); const char *op = pcmk__xe_get(request, PCMK__XA_CIB_OP); pcmk__node_status_t *peer = NULL; xmlNode *replace_request = NULL; xmlNode *wrapper = NULL; CRM_CHECK(the_cib != NULL, return -EINVAL); CRM_CHECK(all || (host != NULL), return -EINVAL); crm_debug("Syncing CIB to %s", all ? "all peers" : host); replace_request = cib_msg_copy(request); if (host != NULL) { crm_xml_add(replace_request, PCMK__XA_CIB_ISREPLYTO, host); } if (all) { pcmk__xe_remove_attr(replace_request, PCMK__XA_CIB_HOST); } crm_xml_add(replace_request, PCMK__XA_CIB_OP, PCMK__CIB_REQUEST_REPLACE); // @TODO Keep for tracing, or drop? crm_xml_add(replace_request, PCMK__XA_ORIGINAL_CIB_OP, op); pcmk__xe_set_bool_attr(replace_request, PCMK__XA_CIB_UPDATE, true); crm_xml_add(replace_request, PCMK_XA_CRM_FEATURE_SET, CRM_FEATURE_SET); digest = pcmk__digest_xml(the_cib, true); crm_xml_add(replace_request, PCMK__XA_DIGEST, digest); wrapper = pcmk__xe_create(replace_request, PCMK__XE_CIB_CALLDATA); pcmk__xml_copy(wrapper, the_cib); if (!all) { peer = pcmk__get_node(0, host, NULL, pcmk__node_search_cluster_member); } if (!pcmk__cluster_send_message(peer, pcmk_ipc_based, replace_request)) { result = -ENOTCONN; } pcmk__xml_free(replace_request); free(digest); return result; } int cib_process_commit_transaction(const char *op, int options, const char *section, xmlNode *req, xmlNode *input, xmlNode *existing_cib, xmlNode **result_cib, xmlNode **answer) { /* On success, our caller will activate *result_cib locally, trigger a * replace notification if appropriate, and sync *result_cib to all nodes. * On failure, our caller will free *result_cib. */ int rc = pcmk_rc_ok; const char *client_id = pcmk__xe_get(req, PCMK__XA_CIB_CLIENTID); const char *origin = pcmk__xe_get(req, PCMK__XA_SRC); pcmk__client_t *client = pcmk__find_client_by_id(client_id); rc = based_commit_transaction(input, client, origin, result_cib); if (rc != pcmk_rc_ok) { char *source = based_transaction_source_str(client, origin); crm_err("Could not commit transaction for %s: %s", source, pcmk_rc_str(rc)); free(source); } return pcmk_rc2legacy(rc); } int cib_process_schemas(const char *op, int options, const char *section, xmlNode *req, xmlNode *input, xmlNode *existing_cib, xmlNode **result_cib, xmlNode **answer) { xmlNode *wrapper = NULL; xmlNode *data = NULL; const char *after_ver = NULL; GList *schemas = NULL; GList *already_included = NULL; *answer = pcmk__xe_create(NULL, PCMK__XA_SCHEMAS); wrapper = pcmk__xe_first_child(req, PCMK__XE_CIB_CALLDATA, NULL, NULL); data = pcmk__xe_first_child(wrapper, NULL, NULL, NULL); if (data == NULL) { crm_warn("No data specified in request"); return -EPROTO; } after_ver = pcmk__xe_get(data, PCMK_XA_VERSION); if (after_ver == NULL) { crm_warn("No version specified in request"); return -EPROTO; } /* The client requested all schemas after the latest one we know about, which * means the client is fully up-to-date. Return a properly formatted reply * with no schemas. */ if (pcmk__str_eq(after_ver, pcmk__highest_schema_name(), pcmk__str_none)) { return pcmk_ok; } schemas = pcmk__schema_files_later_than(after_ver); for (GList *iter = schemas; iter != NULL; iter = iter->next) { pcmk__build_schema_xml_node(*answer, iter->data, &already_included); } g_list_free_full(schemas, free); g_list_free_full(already_included, free); return pcmk_ok; } diff --git a/daemons/based/based_notify.c b/daemons/based/based_notify.c index 93248a08cc..7a164b0945 100644 --- a/daemons/based/based_notify.c +++ b/daemons/based/based_notify.c @@ -1,198 +1,198 @@ /* * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include // PRIx64 #include #include #include #include #include #include #include #include #include #include #include struct cib_notification_s { const xmlNode *msg; struct iovec *iov; int32_t iov_size; }; static void cib_notify_send_one(gpointer key, gpointer value, gpointer user_data) { const char *type = NULL; gboolean do_send = FALSE; int rc = pcmk_rc_ok; pcmk__client_t *client = value; struct cib_notification_s *update = user_data; if (client->ipcs == NULL && client->remote == NULL) { crm_warn("Skipping client with NULL channel"); return; } type = pcmk__xe_get(update->msg, PCMK__XA_SUBT); CRM_LOG_ASSERT(type != NULL); if (pcmk_is_set(client->flags, cib_notify_diff) && pcmk__str_eq(type, PCMK__VALUE_CIB_DIFF_NOTIFY, pcmk__str_none)) { do_send = TRUE; } else if (pcmk_is_set(client->flags, cib_notify_confirm) && pcmk__str_eq(type, PCMK__VALUE_CIB_UPDATE_CONFIRMATION, pcmk__str_none)) { do_send = TRUE; } else if (pcmk_is_set(client->flags, cib_notify_pre) && pcmk__str_eq(type, PCMK__VALUE_CIB_PRE_NOTIFY, pcmk__str_none)) { do_send = TRUE; } else if (pcmk_is_set(client->flags, cib_notify_post) && pcmk__str_eq(type, PCMK__VALUE_CIB_POST_NOTIFY, pcmk__str_none)) { do_send = TRUE; } if (do_send) { switch (PCMK__CLIENT_TYPE(client)) { case pcmk__client_ipc: rc = pcmk__ipc_send_iov(client, update->iov, crm_ipc_server_event); if (rc != pcmk_rc_ok) { crm_warn("Could not notify client %s: %s " QB_XS " id=%s", pcmk__client_name(client), pcmk_rc_str(rc), client->id); } break; case pcmk__client_tls: case pcmk__client_tcp: crm_debug("Sent %s notification to client %s (id %s)", type, pcmk__client_name(client), client->id); pcmk__remote_send_xml(client->remote, update->msg); break; default: crm_err("Unknown transport for client %s " QB_XS " flags=%#016" PRIx64, pcmk__client_name(client), client->flags); } } } static void cib_notify_send(const xmlNode *xml) { struct iovec *iov; struct cib_notification_s update; ssize_t bytes = 0; int rc = pcmk__ipc_prepare_iov(0, xml, 0, &iov, &bytes); if (rc == pcmk_rc_ok) { update.msg = xml; update.iov = iov; update.iov_size = bytes; pcmk__foreach_ipc_client(cib_notify_send_one, &update); } else { crm_notice("Could not notify clients: %s " QB_XS " rc=%d", pcmk_rc_str(rc), rc); } pcmk_free_ipc_event(iov); } void cib_diff_notify(const char *op, int result, const char *call_id, const char *client_id, const char *client_name, const char *origin, xmlNode *update, xmlNode *diff) { int add_updates = 0; int add_epoch = 0; int add_admin_epoch = 0; int del_updates = 0; int del_epoch = 0; int del_admin_epoch = 0; uint8_t log_level = LOG_TRACE; xmlNode *update_msg = NULL; xmlNode *wrapper = NULL; if (diff == NULL) { return; } if (result != pcmk_ok) { log_level = LOG_WARNING; } cib_diff_version_details(diff, &add_admin_epoch, &add_epoch, &add_updates, &del_admin_epoch, &del_epoch, &del_updates); if ((add_admin_epoch != del_admin_epoch) || (add_epoch != del_epoch) || (add_updates != del_updates)) { do_crm_log(log_level, "Updated CIB generation %d.%d.%d to %d.%d.%d from client " "%s%s%s (%s) (%s)", del_admin_epoch, del_epoch, del_updates, add_admin_epoch, add_epoch, add_updates, client_name, ((call_id != NULL)? " call " : ""), pcmk__s(call_id, ""), pcmk__s(origin, "unspecified peer"), pcmk_strerror(result)); } else if ((add_admin_epoch != 0) || (add_epoch != 0) || (add_updates != 0)) { do_crm_log(log_level, "Local-only change to CIB generation %d.%d.%d from client " "%s%s%s (%s) (%s)", add_admin_epoch, add_epoch, add_updates, client_name, ((call_id != NULL)? " call " : ""), pcmk__s(call_id, ""), pcmk__s(origin, "unspecified peer"), pcmk_strerror(result)); } update_msg = pcmk__xe_create(NULL, PCMK__XE_NOTIFY); crm_xml_add(update_msg, PCMK__XA_T, PCMK__VALUE_CIB_NOTIFY); crm_xml_add(update_msg, PCMK__XA_SUBT, PCMK__VALUE_CIB_DIFF_NOTIFY); crm_xml_add(update_msg, PCMK__XA_CIB_OP, op); crm_xml_add(update_msg, PCMK__XA_CIB_CLIENTID, client_id); crm_xml_add(update_msg, PCMK__XA_CIB_CLIENTNAME, client_name); crm_xml_add(update_msg, PCMK__XA_CIB_CALLID, call_id); crm_xml_add(update_msg, PCMK__XA_SRC, origin); - crm_xml_add_int(update_msg, PCMK__XA_CIB_RC, result); + pcmk__xe_set_int(update_msg, PCMK__XA_CIB_RC, result); wrapper = pcmk__xe_create(update_msg, PCMK__XE_CIB_UPDATE_RESULT); pcmk__xml_copy(wrapper, diff); crm_log_xml_trace(update_msg, "diff-notify"); cib_notify_send(update_msg); pcmk__xml_free(update_msg); } diff --git a/daemons/based/based_remote.c b/daemons/based/based_remote.c index 0747fd7b9a..620a222ab1 100644 --- a/daemons/based/based_remote.c +++ b/daemons/based/based_remote.c @@ -1,661 +1,661 @@ /* * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include // PRIx64 #include #include #include #include #include #include #include #include #include #include #include #include #include #include "pacemaker-based.h" #include #include #include #if HAVE_SECURITY_PAM_APPL_H # include # define HAVE_PAM 1 #elif HAVE_PAM_PAM_APPL_H # include # define HAVE_PAM 1 #endif static pcmk__tls_t *tls = NULL; extern int remote_tls_fd; extern gboolean cib_shutdown_flag; int init_remote_listener(int port, gboolean encrypted); void cib_remote_connection_destroy(gpointer user_data); // @TODO This is rather short for someone to type their password #define REMOTE_AUTH_TIMEOUT 10000 int num_clients; static bool authenticate_user(const char *user, const char *passwd); static int cib_remote_listen(gpointer data); static int cib_remote_msg(gpointer data); static void remote_connection_destroy(gpointer user_data) { crm_info("No longer listening for remote connections"); return; } int init_remote_listener(int port, gboolean encrypted) { int rc; int *ssock = NULL; struct sockaddr_in saddr; int optval; static struct mainloop_fd_callbacks remote_listen_fd_callbacks = { .dispatch = cib_remote_listen, .destroy = remote_connection_destroy, }; if (port <= 0) { /* don't start it */ return 0; } if (encrypted) { bool use_cert = pcmk__x509_enabled(); crm_notice("Starting TLS listener on port %d", port); rc = pcmk__init_tls(&tls, true, use_cert ? GNUTLS_CRD_CERTIFICATE : GNUTLS_CRD_ANON); if (rc != pcmk_rc_ok) { return -1; } } else { crm_warn("Starting plain-text listener on port %d", port); } #ifndef HAVE_PAM crm_warn("This build does not support remote administrators " "because PAM support is not available"); #endif /* create server socket */ ssock = pcmk__assert_alloc(1, sizeof(int)); *ssock = socket(AF_INET, SOCK_STREAM, 0); if (*ssock == -1) { crm_err("Listener socket creation failed: %s", pcmk_rc_str(errno)); free(ssock); return -1; } /* reuse address */ optval = 1; rc = setsockopt(*ssock, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof(optval)); if (rc < 0) { crm_err("Local address reuse not allowed on listener socket: %s", pcmk_rc_str(errno)); } /* bind server socket */ memset(&saddr, '\0', sizeof(saddr)); saddr.sin_family = AF_INET; saddr.sin_addr.s_addr = INADDR_ANY; saddr.sin_port = htons(port); if (bind(*ssock, (struct sockaddr *)&saddr, sizeof(saddr)) == -1) { crm_err("Cannot bind to listener socket: %s", pcmk_rc_str(errno)); close(*ssock); free(ssock); return -2; } if (listen(*ssock, 10) == -1) { crm_err("Cannot listen on socket: %s", pcmk_rc_str(errno)); close(*ssock); free(ssock); return -3; } mainloop_add_fd("cib-remote", G_PRIORITY_DEFAULT, *ssock, ssock, &remote_listen_fd_callbacks); crm_debug("Started listener on port %d", port); return *ssock; } static int check_group_membership(const char *usr, const char *grp) { int index = 0; struct passwd *pwd = NULL; struct group *group = NULL; pwd = getpwnam(usr); if (pwd == NULL) { crm_notice("Rejecting remote client: '%s' is not a valid user", usr); return FALSE; } group = getgrgid(pwd->pw_gid); if (group != NULL && pcmk__str_eq(grp, group->gr_name, pcmk__str_none)) { return TRUE; } group = getgrnam(grp); if (group == NULL) { crm_err("Rejecting remote client: '%s' is not a valid group", grp); return FALSE; } while (TRUE) { char *member = group->gr_mem[index++]; if (member == NULL) { break; } else if (pcmk__str_eq(usr, member, pcmk__str_none)) { return TRUE; } } crm_notice("Rejecting remote client: User '%s' is not a member of " "group '%s'", usr, grp); return FALSE; } static gboolean cib_remote_auth(xmlNode * login) { const char *user = NULL; const char *pass = NULL; const char *tmp = NULL; if (login == NULL) { return FALSE; } if (!pcmk__xe_is(login, PCMK__XE_CIB_COMMAND)) { crm_warn("Rejecting remote client: Unrecognizable message " "(element '%s' not '" PCMK__XE_CIB_COMMAND "')", login->name); crm_log_xml_debug(login, "bad"); return FALSE; } tmp = pcmk__xe_get(login, PCMK_XA_OP); if (!pcmk__str_eq(tmp, "authenticate", pcmk__str_casei)) { crm_warn("Rejecting remote client: Unrecognizable message " "(operation '%s' not 'authenticate')", tmp); crm_log_xml_debug(login, "bad"); return FALSE; } user = pcmk__xe_get(login, PCMK_XA_USER); pass = pcmk__xe_get(login, PCMK__XA_PASSWORD); if (!user || !pass) { crm_warn("Rejecting remote client: No %s given", ((user == NULL)? "username" : "password")); crm_log_xml_debug(login, "bad"); return FALSE; } crm_log_xml_debug(login, "auth"); return check_group_membership(user, CRM_DAEMON_GROUP) && authenticate_user(user, pass); } static gboolean remote_auth_timeout_cb(gpointer data) { pcmk__client_t *client = data; client->remote->auth_timeout = 0; if (pcmk_is_set(client->flags, pcmk__client_authenticated)) { return FALSE; } mainloop_del_fd(client->remote->source); crm_err("Remote client authentication timed out"); return FALSE; } static int cib_remote_listen(gpointer data) { int csock = -1; unsigned laddr; struct sockaddr_storage addr; char ipstr[INET6_ADDRSTRLEN]; int ssock = *(int *)data; int rc; pcmk__client_t *new_client = NULL; static struct mainloop_fd_callbacks remote_client_fd_callbacks = { .dispatch = cib_remote_msg, .destroy = cib_remote_connection_destroy, }; /* accept the connection */ laddr = sizeof(addr); memset(&addr, 0, sizeof(addr)); csock = accept(ssock, (struct sockaddr *)&addr, &laddr); if (csock == -1) { crm_warn("Could not accept remote connection: %s", pcmk_rc_str(errno)); return TRUE; } pcmk__sockaddr2str(&addr, ipstr); rc = pcmk__set_nonblocking(csock); if (rc != pcmk_rc_ok) { crm_warn("Dropping remote connection from %s because " "it could not be set to non-blocking: %s", ipstr, pcmk_rc_str(rc)); close(csock); return TRUE; } num_clients++; new_client = pcmk__new_unauth_client(NULL); new_client->remote = pcmk__assert_alloc(1, sizeof(pcmk__remote_t)); if (ssock == remote_tls_fd) { pcmk__set_client_flags(new_client, pcmk__client_tls); /* create gnutls session for the server socket */ new_client->remote->tls_session = pcmk__new_tls_session(tls, csock); if (new_client->remote->tls_session == NULL) { close(csock); return TRUE; } } else { pcmk__set_client_flags(new_client, pcmk__client_tcp); new_client->remote->tcp_socket = csock; } // Require the client to authenticate within this time new_client->remote->auth_timeout = pcmk__create_timer(REMOTE_AUTH_TIMEOUT, remote_auth_timeout_cb, new_client); crm_info("%s connection from %s pending authentication for client %s", ((ssock == remote_tls_fd)? "Encrypted" : "Clear-text"), ipstr, new_client->id); new_client->remote->source = mainloop_add_fd("cib-remote-client", G_PRIORITY_DEFAULT, csock, new_client, &remote_client_fd_callbacks); return TRUE; } void cib_remote_connection_destroy(gpointer user_data) { pcmk__client_t *client = user_data; int csock = -1; if (client == NULL) { return; } crm_trace("Cleaning up after client %s disconnect", pcmk__client_name(client)); num_clients--; crm_trace("Num unfree'd clients: %d", num_clients); switch (PCMK__CLIENT_TYPE(client)) { case pcmk__client_tcp: csock = client->remote->tcp_socket; break; case pcmk__client_tls: if (client->remote->tls_session) { csock = pcmk__tls_get_client_sock(client->remote); if (pcmk_is_set(client->flags, pcmk__client_tls_handshake_complete)) { gnutls_bye(client->remote->tls_session, GNUTLS_SHUT_WR); } gnutls_deinit(client->remote->tls_session); client->remote->tls_session = NULL; } break; default: crm_warn("Unknown transport for client %s " QB_XS " flags=%#016" PRIx64, pcmk__client_name(client), client->flags); } if (csock >= 0) { close(csock); } pcmk__free_client(client); crm_trace("Freed the cib client"); if (cib_shutdown_flag) { cib_shutdown(0); } return; } static void cib_handle_remote_msg(pcmk__client_t *client, xmlNode *command) { if (!pcmk__xe_is(command, PCMK__XE_CIB_COMMAND)) { crm_log_xml_trace(command, "bad"); return; } if (client->name == NULL) { client->name = pcmk__str_copy(client->id); } /* unset dangerous options */ pcmk__xe_remove_attr(command, PCMK__XA_SRC); pcmk__xe_remove_attr(command, PCMK__XA_CIB_HOST); pcmk__xe_remove_attr(command, PCMK__XA_CIB_UPDATE); crm_xml_add(command, PCMK__XA_T, PCMK__VALUE_CIB); crm_xml_add(command, PCMK__XA_CIB_CLIENTID, client->id); crm_xml_add(command, PCMK__XA_CIB_CLIENTNAME, client->name); crm_xml_add(command, PCMK__XA_CIB_USER, client->user); if (pcmk__xe_get(command, PCMK__XA_CIB_CALLID) == NULL) { char *call_uuid = crm_generate_uuid(); /* fix the command */ crm_xml_add(command, PCMK__XA_CIB_CALLID, call_uuid); free(call_uuid); } if (pcmk__xe_get(command, PCMK__XA_CIB_CALLOPT) == NULL) { - crm_xml_add_int(command, PCMK__XA_CIB_CALLOPT, 0); + pcmk__xe_set_int(command, PCMK__XA_CIB_CALLOPT, 0); } crm_log_xml_trace(command, "Remote command: "); cib_common_callback_worker(0, 0, command, client, TRUE); } static int cib_remote_msg(gpointer data) { xmlNode *command = NULL; pcmk__client_t *client = data; int rc; const char *client_name = pcmk__client_name(client); crm_trace("Remote %s message received for client %s", pcmk__client_type_str(PCMK__CLIENT_TYPE(client)), client_name); if ((PCMK__CLIENT_TYPE(client) == pcmk__client_tls) && !pcmk_is_set(client->flags, pcmk__client_tls_handshake_complete)) { int rc = pcmk__read_handshake_data(client); if (rc == EAGAIN) { /* No more data is available at the moment. Just return for now; * we'll get invoked again once the client sends more. */ return 0; } else if (rc != pcmk_rc_ok) { return -1; } crm_debug("Completed TLS handshake with remote client %s", client_name); pcmk__set_client_flags(client, pcmk__client_tls_handshake_complete); if (client->remote->auth_timeout) { g_source_remove(client->remote->auth_timeout); } /* Now that the handshake is done, see if any client TLS certificate is * close to its expiration date and log if so. If a TLS certificate is not * in use, this function will just return so we don't need to check for the * session type here. */ pcmk__tls_check_cert_expiration(client->remote->tls_session); // Require the client to authenticate within this time client->remote->auth_timeout = pcmk__create_timer(REMOTE_AUTH_TIMEOUT, remote_auth_timeout_cb, client); return 0; } rc = pcmk__read_available_remote_data(client->remote); switch (rc) { case pcmk_rc_ok: break; case EAGAIN: /* We haven't read the whole message yet */ return 0; default: /* Error */ crm_trace("Error reading from remote client: %s", pcmk_rc_str(rc)); return -1; } /* must pass auth before we will process anything else */ if (!pcmk_is_set(client->flags, pcmk__client_authenticated)) { xmlNode *reg; const char *user = NULL; command = pcmk__remote_message_xml(client->remote); if (cib_remote_auth(command) == FALSE) { pcmk__xml_free(command); return -1; } pcmk__set_client_flags(client, pcmk__client_authenticated); g_source_remove(client->remote->auth_timeout); client->remote->auth_timeout = 0; client->name = pcmk__xe_get_copy(command, PCMK_XA_NAME); user = pcmk__xe_get(command, PCMK_XA_USER); if (user) { client->user = pcmk__str_copy(user); } crm_notice("Remote connection accepted for authenticated user %s " QB_XS " client %s", pcmk__s(user, ""), client_name); /* send ACK */ reg = pcmk__xe_create(NULL, PCMK__XE_CIB_RESULT); crm_xml_add(reg, PCMK__XA_CIB_OP, CRM_OP_REGISTER); crm_xml_add(reg, PCMK__XA_CIB_CLIENTID, client->id); pcmk__remote_send_xml(client->remote, reg); pcmk__xml_free(reg); pcmk__xml_free(command); } command = pcmk__remote_message_xml(client->remote); if (command != NULL) { crm_trace("Remote message received from client %s", client_name); cib_handle_remote_msg(client, command); pcmk__xml_free(command); } return 0; } #ifdef HAVE_PAM /*! * \internal * \brief Pass remote user's password to PAM * * \param[in] num_msg Number of entries in \p msg * \param[in] msg Array of PAM messages * \param[out] response Where to set response to PAM * \param[in] data User data (the password string) * * \return PAM return code (PAM_BUF_ERR for memory errors, PAM_CONV_ERR for all * other errors, or PAM_SUCCESS on success) * \note See pam_conv(3) for more explanation */ static int construct_pam_passwd(int num_msg, const struct pam_message **msg, struct pam_response **response, void *data) { /* In theory, multiple messages are allowed, but due to OS compatibility * issues, PAM implementations are recommended to only send one message at a * time. We can require that here for simplicity. */ CRM_CHECK((num_msg == 1) && (msg != NULL) && (response != NULL) && (data != NULL), return PAM_CONV_ERR); switch (msg[0]->msg_style) { case PAM_PROMPT_ECHO_OFF: case PAM_PROMPT_ECHO_ON: // Password requested break; case PAM_TEXT_INFO: crm_info("PAM: %s", msg[0]->msg); data = NULL; break; case PAM_ERROR_MSG: /* In theory we should show msg[0]->msg, but that might * contain the password, which we don't want in the logs */ crm_err("PAM reported an error"); data = NULL; break; default: crm_warn("Ignoring PAM message of unrecognized type %d", msg[0]->msg_style); return PAM_CONV_ERR; } *response = calloc(1, sizeof(struct pam_response)); if (*response == NULL) { return PAM_BUF_ERR; } (*response)->resp_retcode = 0; (*response)->resp = pcmk__str_copy((const char *) data); // Caller will free return PAM_SUCCESS; } #endif /*! * \internal * \brief Verify the username and password passed for a remote CIB connection * * \param[in] user Username passed for remote CIB connection * \param[in] passwd Password passed for remote CIB connection * * \return \c true if the username and password are accepted, otherwise \c false * \note This function rejects all credentials when built without PAM support. */ static bool authenticate_user(const char *user, const char *passwd) { #ifdef HAVE_PAM int rc = 0; bool pass = false; const void *p_user = NULL; struct pam_conv p_conv; struct pam_handle *pam_h = NULL; static const char *pam_name = NULL; if (pam_name == NULL) { pam_name = getenv("CIB_pam_service"); if (pam_name == NULL) { pam_name = "login"; } } p_conv.conv = construct_pam_passwd; p_conv.appdata_ptr = (void *) passwd; rc = pam_start(pam_name, user, &p_conv, &pam_h); if (rc != PAM_SUCCESS) { crm_warn("Rejecting remote client for user %s " "because PAM initialization failed: %s", user, pam_strerror(pam_h, rc)); goto bail; } // Check user credentials rc = pam_authenticate(pam_h, PAM_SILENT); if (rc != PAM_SUCCESS) { crm_notice("Access for remote user %s denied: %s", user, pam_strerror(pam_h, rc)); goto bail; } /* Get the authenticated user name (PAM modules can map the original name to * something else). Since the CIB manager runs as the daemon user (not * root), that is the only user that can be successfully authenticated. */ rc = pam_get_item(pam_h, PAM_USER, &p_user); if (rc != PAM_SUCCESS) { crm_warn("Rejecting remote client for user %s " "because PAM failed to return final user name: %s", user, pam_strerror(pam_h, rc)); goto bail; } if (p_user == NULL) { crm_warn("Rejecting remote client for user %s " "because PAM returned no final user name", user); goto bail; } // @TODO Why do we require these to match? if (!pcmk__str_eq(p_user, user, pcmk__str_none)) { crm_warn("Rejecting remote client for user %s " "because PAM returned different final user name %s", user, p_user); goto bail; } // Check user account restrictions (expiration, etc.) rc = pam_acct_mgmt(pam_h, PAM_SILENT); if (rc != PAM_SUCCESS) { crm_notice("Access for remote user %s denied: %s", user, pam_strerror(pam_h, rc)); goto bail; } pass = true; bail: pam_end(pam_h, rc); return pass; #else // @TODO Implement for non-PAM environments crm_warn("Rejecting remote user %s because this build does not have " "PAM support", user); return false; #endif } diff --git a/daemons/controld/controld_cib.c b/daemons/controld/controld_cib.c index 20b6d61328..def5108ed0 100644 --- a/daemons/controld/controld_cib.c +++ b/daemons/controld/controld_cib.c @@ -1,1060 +1,1060 @@ /* * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include /* sleep */ #include #include #include #include #include // Call ID of the most recent in-progress CIB resource update (or 0 if none) static int pending_rsc_update = 0; /*! * \internal * \brief Respond to a dropped CIB connection * * \param[in] user_data CIB connection that dropped */ static void handle_cib_disconnect(gpointer user_data) { CRM_LOG_ASSERT(user_data == controld_globals.cib_conn); controld_trigger_fsa(); controld_globals.cib_conn->state = cib_disconnected; if (pcmk_is_set(controld_globals.fsa_input_register, R_CIB_CONNECTED)) { // @TODO This should trigger a reconnect, not a shutdown crm_crit("Lost connection to the CIB manager, shutting down"); register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL); controld_clear_fsa_input_flags(R_CIB_CONNECTED); } else { // Expected crm_info("Disconnected from the CIB manager"); } } static void do_cib_updated(const char *event, xmlNode * msg) { const xmlNode *patchset = NULL; const char *client_name = NULL; crm_debug("Received CIB diff notification: DC=%s", pcmk__btoa(AM_I_DC)); if (cib__get_notify_patchset(msg, &patchset) != pcmk_rc_ok) { return; } if (pcmk__cib_element_in_patchset(patchset, PCMK_XE_ALERTS) || pcmk__cib_element_in_patchset(patchset, PCMK_XE_CRM_CONFIG)) { controld_trigger_config(); } if (!AM_I_DC) { // We're not in control of the join sequence return; } client_name = pcmk__xe_get(msg, PCMK__XA_CIB_CLIENTNAME); if (!cib__client_triggers_refresh(client_name)) { // The CIB is still accurate return; } if (pcmk__cib_element_in_patchset(patchset, PCMK_XE_NODES) || pcmk__cib_element_in_patchset(patchset, PCMK_XE_STATUS)) { /* An unsafe client modified the PCMK_XE_NODES or PCMK_XE_STATUS * section. Ensure the node list is up-to-date, and start the join * process again so we get everyone's current resource history. */ if (client_name == NULL) { client_name = pcmk__xe_get(msg, PCMK__XA_CIB_CLIENTID); } crm_notice("Populating nodes and starting an election after %s event " "triggered by %s", event, pcmk__s(client_name, "(unidentified client)")); populate_cib_nodes(node_update_quick|node_update_all, __func__); register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL); } } void controld_disconnect_cib_manager(void) { cib_t *cib_conn = controld_globals.cib_conn; pcmk__assert(cib_conn != NULL); crm_debug("Disconnecting from the CIB manager"); controld_clear_fsa_input_flags(R_CIB_CONNECTED); cib_conn->cmds->del_notify_callback(cib_conn, PCMK__VALUE_CIB_DIFF_NOTIFY, do_cib_updated); cib_free_callbacks(cib_conn); if (cib_conn->state != cib_disconnected) { cib_conn->cmds->set_secondary(cib_conn, cib_discard_reply); cib_conn->cmds->signoff(cib_conn); } } /* A_CIB_STOP, A_CIB_START, O_CIB_RESTART */ void do_cib_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { static int cib_retries = 0; cib_t *cib_conn = controld_globals.cib_conn; void (*dnotify_fn) (gpointer user_data) = handle_cib_disconnect; void (*update_cb) (const char *event, xmlNodePtr msg) = do_cib_updated; int rc = pcmk_ok; pcmk__assert(cib_conn != NULL); if (pcmk_is_set(action, A_CIB_STOP)) { if ((cib_conn->state != cib_disconnected) && (pending_rsc_update != 0)) { crm_info("Waiting for resource update %d to complete", pending_rsc_update); crmd_fsa_stall(FALSE); return; } controld_disconnect_cib_manager(); } if (!pcmk_is_set(action, A_CIB_START)) { return; } if (cur_state == S_STOPPING) { crm_err("Ignoring request to connect to the CIB manager after " "shutdown"); return; } rc = cib_conn->cmds->signon(cib_conn, crm_system_name, cib_command_nonblocking); if (rc != pcmk_ok) { // A short wait that usually avoids stalling the FSA sleep(1); rc = cib_conn->cmds->signon(cib_conn, crm_system_name, cib_command_nonblocking); } if (rc != pcmk_ok) { crm_info("Could not connect to the CIB manager: %s", pcmk_strerror(rc)); } else if (cib_conn->cmds->set_connection_dnotify(cib_conn, dnotify_fn) != pcmk_ok) { crm_err("Could not set dnotify callback"); } else if (cib_conn->cmds->add_notify_callback(cib_conn, PCMK__VALUE_CIB_DIFF_NOTIFY, update_cb) != pcmk_ok) { crm_err("Could not set CIB notification callback (update)"); } else { controld_set_fsa_input_flags(R_CIB_CONNECTED); cib_retries = 0; } if (!pcmk_is_set(controld_globals.fsa_input_register, R_CIB_CONNECTED)) { cib_retries++; if (cib_retries < 30) { crm_warn("Couldn't complete CIB registration %d times... " "pause and retry", cib_retries); controld_start_wait_timer(); crmd_fsa_stall(FALSE); } else { crm_err("Could not complete CIB registration %d times... " "hard error", cib_retries); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } } #define MIN_CIB_OP_TIMEOUT (30) /*! * \internal * \brief Get the timeout (in seconds) that should be used with CIB operations * * \return The maximum of 30 seconds, the value of the PCMK_cib_timeout * environment variable, or 10 seconds times one more than the number of * nodes in the cluster. */ unsigned int cib_op_timeout(void) { unsigned int calculated_timeout = 10U * (pcmk__cluster_num_active_nodes() + pcmk__cluster_num_remote_nodes() + 1U); calculated_timeout = QB_MAX(calculated_timeout, MIN_CIB_OP_TIMEOUT); crm_trace("Calculated timeout: %s", pcmk__readable_interval(calculated_timeout * 1000)); if (controld_globals.cib_conn) { controld_globals.cib_conn->call_timeout = calculated_timeout; } return calculated_timeout; } /*! * \internal * \brief Get CIB call options to use local scope if primary is unavailable * * \return CIB call options */ int crmd_cib_smart_opt(void) { int call_opt = cib_none; if ((controld_globals.fsa_state == S_ELECTION) || (controld_globals.fsa_state == S_PENDING)) { crm_info("Sending update to local CIB in state: %s", fsa_state2string(controld_globals.fsa_state)); cib__set_call_options(call_opt, "update", cib_none); } return call_opt; } static void cib_delete_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { char *desc = user_data; if (rc == 0) { crm_debug("Deletion of %s (via CIB call %d) succeeded", desc, call_id); } else { crm_warn("Deletion of %s (via CIB call %d) failed: %s " QB_XS " rc=%d", desc, call_id, pcmk_strerror(rc), rc); } } // Searches for various portions of PCMK__XE_NODE_STATE to delete // Match a particular node's PCMK__XE_NODE_STATE (takes node name 1x) #define XPATH_NODE_STATE "//" PCMK__XE_NODE_STATE "[@" PCMK_XA_UNAME "='%s']" // Node's lrm section (name 1x) #define XPATH_NODE_LRM XPATH_NODE_STATE "/" PCMK__XE_LRM /* Node's PCMK__XE_LRM_RSC_OP entries and PCMK__XE_LRM_RESOURCE entries without * unexpired lock * (name 2x, (seconds_since_epoch - PCMK_OPT_SHUTDOWN_LOCK_LIMIT) 1x) */ #define XPATH_NODE_LRM_UNLOCKED XPATH_NODE_STATE "//" PCMK__XE_LRM_RSC_OP \ "|" XPATH_NODE_STATE \ "//" PCMK__XE_LRM_RESOURCE \ "[not(@" PCMK_OPT_SHUTDOWN_LOCK ") " \ "or " PCMK_OPT_SHUTDOWN_LOCK "<%lld]" // Node's PCMK__XE_TRANSIENT_ATTRIBUTES section (name 1x) #define XPATH_NODE_ATTRS XPATH_NODE_STATE "/" PCMK__XE_TRANSIENT_ATTRIBUTES // Everything under PCMK__XE_NODE_STATE (name 1x) #define XPATH_NODE_ALL XPATH_NODE_STATE "/*" /* Unlocked history + transient attributes * (name 2x, (seconds_since_epoch - PCMK_OPT_SHUTDOWN_LOCK_LIMIT) 1x, name 1x) */ #define XPATH_NODE_ALL_UNLOCKED XPATH_NODE_LRM_UNLOCKED "|" XPATH_NODE_ATTRS /*! * \internal * \brief Get the XPath and description of a node state section to be deleted * * \param[in] uname Desired node * \param[in] section Subsection of \c PCMK__XE_NODE_STATE to be deleted * \param[out] xpath Where to store XPath of \p section * \param[out] desc If not \c NULL, where to store description of \p section */ void controld_node_state_deletion_strings(const char *uname, enum controld_section_e section, char **xpath, char **desc) { const char *desc_pre = NULL; // Shutdown locks that started before this time are expired long long expire = (long long) time(NULL) - controld_globals.shutdown_lock_limit; switch (section) { case controld_section_lrm: *xpath = crm_strdup_printf(XPATH_NODE_LRM, uname); desc_pre = "resource history"; break; case controld_section_lrm_unlocked: *xpath = crm_strdup_printf(XPATH_NODE_LRM_UNLOCKED, uname, uname, expire); desc_pre = "resource history (other than shutdown locks)"; break; case controld_section_attrs: *xpath = crm_strdup_printf(XPATH_NODE_ATTRS, uname); desc_pre = "transient attributes"; break; case controld_section_all: *xpath = crm_strdup_printf(XPATH_NODE_ALL, uname); desc_pre = "all state"; break; case controld_section_all_unlocked: *xpath = crm_strdup_printf(XPATH_NODE_ALL_UNLOCKED, uname, uname, expire, uname); desc_pre = "all state (other than shutdown locks)"; break; default: // We called this function incorrectly pcmk__assert(false); break; } if (desc != NULL) { *desc = crm_strdup_printf("%s for node %s", desc_pre, uname); } } /*! * \internal * \brief Delete subsection of a node's CIB \c PCMK__XE_NODE_STATE * * \param[in] uname Desired node * \param[in] section Subsection of \c PCMK__XE_NODE_STATE to delete * \param[in] options CIB call options to use */ void controld_delete_node_state(const char *uname, enum controld_section_e section, int options) { cib_t *cib = controld_globals.cib_conn; char *xpath = NULL; char *desc = NULL; int cib_rc = pcmk_ok; pcmk__assert((uname != NULL) && (cib != NULL)); controld_node_state_deletion_strings(uname, section, &xpath, &desc); cib__set_call_options(options, "node state deletion", cib_xpath|cib_multiple); cib_rc = cib->cmds->remove(cib, xpath, NULL, options); fsa_register_cib_callback(cib_rc, desc, cib_delete_callback); crm_info("Deleting %s (via CIB call %d) " QB_XS " xpath=%s", desc, cib_rc, xpath); // CIB library handles freeing desc free(xpath); } // Takes node name and resource ID #define XPATH_RESOURCE_HISTORY "//" PCMK__XE_NODE_STATE \ "[@" PCMK_XA_UNAME "='%s']/" \ PCMK__XE_LRM "/" PCMK__XE_LRM_RESOURCES \ "/" PCMK__XE_LRM_RESOURCE \ "[@" PCMK_XA_ID "='%s']" // @TODO could add "and @PCMK_OPT_SHUTDOWN_LOCK" to limit to locks /*! * \internal * \brief Clear resource history from CIB for a given resource and node * * \param[in] rsc_id ID of resource to be cleared * \param[in] node Node whose resource history should be cleared * \param[in] user_name ACL user name to use * \param[in] call_options CIB call options * * \return Standard Pacemaker return code */ int controld_delete_resource_history(const char *rsc_id, const char *node, const char *user_name, int call_options) { char *desc = NULL; char *xpath = NULL; int rc = pcmk_rc_ok; cib_t *cib = controld_globals.cib_conn; CRM_CHECK((rsc_id != NULL) && (node != NULL), return EINVAL); desc = crm_strdup_printf("resource history for %s on %s", rsc_id, node); if (cib == NULL) { crm_err("Unable to clear %s: no CIB connection", desc); free(desc); return ENOTCONN; } // Ask CIB to delete the entry xpath = crm_strdup_printf(XPATH_RESOURCE_HISTORY, node, rsc_id); cib->cmds->set_user(cib, user_name); rc = cib->cmds->remove(cib, xpath, NULL, call_options|cib_xpath); cib->cmds->set_user(cib, NULL); if (rc < 0) { rc = pcmk_legacy2rc(rc); crm_err("Could not delete resource status of %s on %s%s%s: %s " QB_XS " rc=%d", rsc_id, node, (user_name? " for user " : ""), (user_name? user_name : ""), pcmk_rc_str(rc), rc); free(desc); free(xpath); return rc; } if (pcmk_is_set(call_options, cib_sync_call)) { if (pcmk_is_set(call_options, cib_dryrun)) { crm_debug("Deletion of %s would succeed", desc); } else { crm_debug("Deletion of %s succeeded", desc); } free(desc); } else { crm_info("Clearing %s (via CIB call %d) " QB_XS " xpath=%s", desc, rc, xpath); fsa_register_cib_callback(rc, desc, cib_delete_callback); // CIB library handles freeing desc } free(xpath); return pcmk_rc_ok; } /*! * \internal * \brief Build XML and string of parameters meeting some criteria, for digest * * \param[in] op Executor event with parameter table to use * \param[in] metadata Parsed meta-data for executed resource agent * \param[in] param_type Flag used for selection criteria * \param[out] result Will be set to newly created XML with selected * parameters as attributes * * \return Newly allocated space-separated string of parameter names * \note Selection criteria varies by param_type: for the restart digest, we * want parameters that are *not* marked reloadable (OCF 1.1) or that * *are* marked unique (pre-1.1), for both string and XML results; for the * secure digest, we want parameters that *are* marked private for the * string, but parameters that are *not* marked private for the XML. * \note It is the caller's responsibility to free the string return value with * \p g_string_free() and the XML result with \p pcmk__xml_free(). */ static GString * build_parameter_list(const lrmd_event_data_t *op, const struct ra_metadata_s *metadata, enum ra_param_flags_e param_type, xmlNode **result) { GString *list = NULL; *result = pcmk__xe_create(NULL, PCMK_XE_PARAMETERS); /* Consider all parameters only except private ones to be consistent with * what scheduler does with calculate_secure_digest(). */ if (param_type == ra_param_private && compare_version(controld_globals.dc_version, "3.16.0") >= 0) { g_hash_table_foreach(op->params, hash2field, *result); pcmk__filter_op_for_digest(*result); } for (GList *iter = metadata->ra_params; iter != NULL; iter = iter->next) { struct ra_param_s *param = (struct ra_param_s *) iter->data; bool accept_for_list = false; bool accept_for_xml = false; switch (param_type) { case ra_param_reloadable: accept_for_list = !pcmk_is_set(param->rap_flags, param_type); accept_for_xml = accept_for_list; break; case ra_param_unique: accept_for_list = pcmk_is_set(param->rap_flags, param_type); accept_for_xml = accept_for_list; break; case ra_param_private: accept_for_list = pcmk_is_set(param->rap_flags, param_type); accept_for_xml = !accept_for_list; break; } if (accept_for_list) { crm_trace("Attr %s is %s", param->rap_name, ra_param_flag2text(param_type)); if (list == NULL) { // We will later search for " WORD ", so start list with a space pcmk__add_word(&list, 256, " "); } pcmk__add_word(&list, 0, param->rap_name); } else { crm_trace("Rejecting %s for %s", param->rap_name, ra_param_flag2text(param_type)); } if (accept_for_xml) { const char *v = g_hash_table_lookup(op->params, param->rap_name); if (v != NULL) { crm_trace("Adding attr %s=%s to the xml result", param->rap_name, v); crm_xml_add(*result, param->rap_name, v); } } else { crm_trace("Removing attr %s from the xml result", param->rap_name); pcmk__xe_remove_attr(*result, param->rap_name); } } if (list != NULL) { // We will later search for " WORD ", so end list with a space pcmk__add_word(&list, 0, " "); } return list; } static void append_restart_list(lrmd_event_data_t *op, struct ra_metadata_s *metadata, xmlNode *update, const char *version) { GString *list = NULL; char *digest = NULL; xmlNode *restart = NULL; CRM_LOG_ASSERT(op->params != NULL); if (op->interval_ms > 0) { /* monitors are not reloadable */ return; } if (pcmk_is_set(metadata->ra_flags, ra_supports_reload_agent)) { /* Add parameters not marked reloadable to the PCMK__XA_OP_FORCE_RESTART * list */ list = build_parameter_list(op, metadata, ra_param_reloadable, &restart); } else if (pcmk_is_set(metadata->ra_flags, ra_supports_legacy_reload)) { /* @COMPAT pre-OCF-1.1 resource agents * * Before OCF 1.1, Pacemaker abused "unique=0" to indicate * reloadability. Add any parameters with unique="1" to the * PCMK__XA_OP_FORCE_RESTART list. */ list = build_parameter_list(op, metadata, ra_param_unique, &restart); } else { // Resource does not support agent reloads return; } digest = pcmk__digest_operation(restart); /* Add PCMK__XA_OP_FORCE_RESTART and PCMK__XA_OP_RESTART_DIGEST to indicate * the resource supports reload, no matter if it actually supports any * reloadable parameters */ crm_xml_add(update, PCMK__XA_OP_FORCE_RESTART, (list == NULL)? "" : (const char *) list->str); crm_xml_add(update, PCMK__XA_OP_RESTART_DIGEST, digest); if ((list != NULL) && (list->len > 0)) { crm_trace("%s: %s, %s", op->rsc_id, digest, (const char *) list->str); } else { crm_trace("%s: %s", op->rsc_id, digest); } if (list != NULL) { g_string_free(list, TRUE); } pcmk__xml_free(restart); free(digest); } static void append_secure_list(lrmd_event_data_t *op, struct ra_metadata_s *metadata, xmlNode *update, const char *version) { GString *list = NULL; char *digest = NULL; xmlNode *secure = NULL; CRM_LOG_ASSERT(op->params != NULL); /* To keep PCMK__XA_OP_SECURE_PARAMS short, we want it to contain the secure * parameters but PCMK__XA_OP_SECURE_DIGEST to be based on the insecure ones */ list = build_parameter_list(op, metadata, ra_param_private, &secure); if (list != NULL) { digest = pcmk__digest_operation(secure); crm_xml_add(update, PCMK__XA_OP_SECURE_PARAMS, (const char *) list->str); crm_xml_add(update, PCMK__XA_OP_SECURE_DIGEST, digest); crm_trace("%s: %s, %s", op->rsc_id, digest, (const char *) list->str); g_string_free(list, TRUE); } else { crm_trace("%s: no secure parameters", op->rsc_id); } pcmk__xml_free(secure); free(digest); } /*! * \internal * \brief Create XML for a resource history entry * * \param[in] func Function name of caller * \param[in,out] parent XML to add entry to * \param[in] rsc Affected resource * \param[in,out] op Action to add an entry for (or NULL to do nothing) * \param[in] node_name Node where action occurred */ void controld_add_resource_history_xml_as(const char *func, xmlNode *parent, const lrmd_rsc_info_t *rsc, lrmd_event_data_t *op, const char *node_name) { int target_rc = 0; xmlNode *xml_op = NULL; struct ra_metadata_s *metadata = NULL; const char *caller_version = NULL; lrm_state_t *lrm_state = NULL; if (op == NULL) { return; } target_rc = rsc_op_expected_rc(op); caller_version = g_hash_table_lookup(op->params, PCMK_XA_CRM_FEATURE_SET); CRM_CHECK(caller_version != NULL, caller_version = CRM_FEATURE_SET); xml_op = pcmk__create_history_xml(parent, op, caller_version, target_rc, controld_globals.cluster->priv->node_name, func); if (xml_op == NULL) { return; } if ((rsc == NULL) || (op->params == NULL) || !crm_op_needs_metadata(rsc->standard, op->op_type)) { crm_trace("No digests needed for %s action on %s (params=%p rsc=%p)", op->op_type, op->rsc_id, op->params, rsc); return; } lrm_state = controld_get_executor_state(node_name, false); if (lrm_state == NULL) { crm_warn("Cannot calculate digests for operation " PCMK__OP_FMT " because we have no connection to executor for %s", op->rsc_id, op->op_type, op->interval_ms, node_name); return; } /* Ideally the metadata is cached, and the agent is just a fallback. * * @TODO Go through all callers and ensure they get metadata asynchronously * first. */ metadata = controld_get_rsc_metadata(lrm_state, rsc, controld_metadata_from_agent |controld_metadata_from_cache); if (metadata == NULL) { return; } crm_trace("Including additional digests for %s:%s:%s", rsc->standard, rsc->provider, rsc->type); append_restart_list(op, metadata, xml_op, caller_version); append_secure_list(op, metadata, xml_op, caller_version); return; } /*! * \internal * \brief Record an action as pending in the CIB, if appropriate * * \param[in] node_name Node where the action is pending * \param[in] rsc Resource that action is for * \param[in,out] op Pending action * * \return true if action was recorded in CIB, otherwise false */ bool controld_record_pending_op(const char *node_name, const lrmd_rsc_info_t *rsc, lrmd_event_data_t *op) { const char *record_pending = NULL; CRM_CHECK((node_name != NULL) && (rsc != NULL) && (op != NULL), return false); // Never record certain operation types as pending if ((op->op_type == NULL) || (op->params == NULL) || !controld_action_is_recordable(op->op_type)) { return false; } // Check action's PCMK_META_RECORD_PENDING meta-attribute (defaults to true) record_pending = crm_meta_value(op->params, PCMK_META_RECORD_PENDING); if ((record_pending != NULL) && !crm_is_true(record_pending)) { pcmk__warn_once(pcmk__wo_record_pending, "The " PCMK_META_RECORD_PENDING " option (for example, " "for the %s resource's %s operation) is deprecated and " "will be removed in a future release", rsc->id, op->op_type); return false; } op->call_id = -1; op->t_run = time(NULL); op->t_rcchange = op->t_run; lrmd__set_result(op, PCMK_OCF_UNKNOWN, PCMK_EXEC_PENDING, NULL); crm_debug("Recording pending %s-interval %s for %s on %s in the CIB", pcmk__readable_interval(op->interval_ms), op->op_type, op->rsc_id, node_name); controld_update_resource_history(node_name, rsc, op, 0); return true; } static void cib_rsc_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { switch (rc) { case pcmk_ok: case -pcmk_err_diff_failed: case -pcmk_err_diff_resync: crm_trace("Resource history update completed (call=%d rc=%d)", call_id, rc); break; default: if (call_id > 0) { crm_warn("Resource history update %d failed: %s " QB_XS " rc=%d", call_id, pcmk_strerror(rc), rc); } else { crm_warn("Resource history update failed: %s " QB_XS " rc=%d", pcmk_strerror(rc), rc); } } if (call_id == pending_rsc_update) { pending_rsc_update = 0; controld_trigger_fsa(); } } /* Only successful stops, and probes that found the resource inactive, get locks * recorded in the history. This ensures the resource stays locked to the node * until it is active there again after the node comes back up. */ static bool should_preserve_lock(lrmd_event_data_t *op) { if (!pcmk_is_set(controld_globals.flags, controld_shutdown_lock_enabled)) { return false; } if (!strcmp(op->op_type, PCMK_ACTION_STOP) && (op->rc == PCMK_OCF_OK)) { return true; } if (!strcmp(op->op_type, PCMK_ACTION_MONITOR) && (op->rc == PCMK_OCF_NOT_RUNNING)) { return true; } return false; } /*! * \internal * \brief Request a CIB update * * \param[in] section Section of CIB to update * \param[in] data New XML of CIB section to update * \param[in] options CIB call options * \param[in] callback If not \c NULL, set this as the operation callback * * \return Standard Pacemaker return code * * \note If \p callback is \p cib_rsc_callback(), the CIB update's call ID is * stored in \p pending_rsc_update on success. */ int controld_update_cib(const char *section, xmlNode *data, int options, void (*callback)(xmlNode *, int, int, xmlNode *, void *)) { cib_t *cib = controld_globals.cib_conn; int cib_rc = -ENOTCONN; pcmk__assert(data != NULL); if (cib != NULL) { cib_rc = cib->cmds->modify(cib, section, data, options); if (cib_rc >= 0) { crm_debug("Submitted CIB update %d for %s section", cib_rc, section); } } if (callback == NULL) { if (cib_rc < 0) { crm_err("Failed to update CIB %s section: %s", section, pcmk_rc_str(pcmk_legacy2rc(cib_rc))); } } else { if ((cib_rc >= 0) && (callback == cib_rsc_callback)) { /* Checking for a particular callback is a little hacky, but it * didn't seem worth adding an output argument for cib_rc for just * one use case. */ pending_rsc_update = cib_rc; } fsa_register_cib_callback(cib_rc, NULL, callback); } return (cib_rc >= 0)? pcmk_rc_ok : pcmk_legacy2rc(cib_rc); } /*! * \internal * \brief Update resource history entry in CIB * * \param[in] node_name Node where action occurred * \param[in] rsc Resource that action is for * \param[in,out] op Action to record * \param[in] lock_time If nonzero, when resource was locked to node * * \note On success, the CIB update's call ID will be stored in * pending_rsc_update. */ void controld_update_resource_history(const char *node_name, const lrmd_rsc_info_t *rsc, lrmd_event_data_t *op, time_t lock_time) { xmlNode *update = NULL; xmlNode *xml = NULL; int call_opt = crmd_cib_smart_opt(); const char *node_id = NULL; const char *container = NULL; CRM_CHECK((node_name != NULL) && (op != NULL), return); if (rsc == NULL) { crm_warn("Resource %s no longer exists in the executor", op->rsc_id); controld_ack_event_directly(NULL, NULL, rsc, op, op->rsc_id); return; } // update = pcmk__xe_create(NULL, PCMK_XE_STATUS); // xml = pcmk__xe_create(update, PCMK__XE_NODE_STATE); if (controld_is_local_node(node_name)) { node_id = controld_globals.our_uuid; } else { node_id = node_name; pcmk__xe_set_bool_attr(xml, PCMK_XA_REMOTE_NODE, true); } crm_xml_add(xml, PCMK_XA_ID, node_id); crm_xml_add(xml, PCMK_XA_UNAME, node_name); crm_xml_add(xml, PCMK_XA_CRM_DEBUG_ORIGIN, __func__); // xml = pcmk__xe_create(xml, PCMK__XE_LRM); crm_xml_add(xml, PCMK_XA_ID, node_id); // xml = pcmk__xe_create(xml, PCMK__XE_LRM_RESOURCES); // xml = pcmk__xe_create(xml, PCMK__XE_LRM_RESOURCE); crm_xml_add(xml, PCMK_XA_ID, op->rsc_id); crm_xml_add(xml, PCMK_XA_CLASS, rsc->standard); crm_xml_add(xml, PCMK_XA_PROVIDER, rsc->provider); crm_xml_add(xml, PCMK_XA_TYPE, rsc->type); if (lock_time != 0) { /* Actions on a locked resource should either preserve the lock by * recording it with the action result, or clear it. */ if (!should_preserve_lock(op)) { lock_time = 0; } pcmk__xe_set_time(xml, PCMK_OPT_SHUTDOWN_LOCK, lock_time); } if (op->params != NULL) { container = g_hash_table_lookup(op->params, CRM_META "_" PCMK__META_CONTAINER); if (container != NULL) { crm_trace("Resource %s is a part of container resource %s", op->rsc_id, container); crm_xml_add(xml, PCMK__META_CONTAINER, container); } } // (possibly more than one) controld_add_resource_history_xml(xml, rsc, op, node_name); /* Update CIB asynchronously. Even if it fails, the resource state should be * discovered during the next election. Worst case, the node is wrongly * fenced for running a resource it isn't. */ crm_log_xml_trace(update, __func__); controld_update_cib(PCMK_XE_STATUS, update, call_opt, cib_rsc_callback); pcmk__xml_free(update); } /*! * \internal * \brief Erase an LRM history entry from the CIB, given the operation data * * \param[in] op Operation whose history should be deleted */ void controld_delete_action_history(const lrmd_event_data_t *op) { xmlNode *xml_top = NULL; CRM_CHECK(op != NULL, return); xml_top = pcmk__xe_create(NULL, PCMK__XE_LRM_RSC_OP); - crm_xml_add_int(xml_top, PCMK__XA_CALL_ID, op->call_id); + pcmk__xe_set_int(xml_top, PCMK__XA_CALL_ID, op->call_id); crm_xml_add(xml_top, PCMK__XA_TRANSITION_KEY, op->user_data); if (op->interval_ms > 0) { char *op_id = pcmk__op_key(op->rsc_id, op->op_type, op->interval_ms); /* Avoid deleting last_failure too (if it was a result of this recurring op failing) */ crm_xml_add(xml_top, PCMK_XA_ID, op_id); free(op_id); } crm_debug("Erasing resource operation history for " PCMK__OP_FMT " (call=%d)", op->rsc_id, op->op_type, op->interval_ms, op->call_id); controld_globals.cib_conn->cmds->remove(controld_globals.cib_conn, PCMK_XE_STATUS, xml_top, cib_none); crm_log_xml_trace(xml_top, "op:cancel"); pcmk__xml_free(xml_top); } /* Define xpath to find LRM resource history entry by node and resource */ #define XPATH_HISTORY \ "/" PCMK_XE_CIB "/" PCMK_XE_STATUS \ "/" PCMK__XE_NODE_STATE "[@" PCMK_XA_UNAME "='%s']" \ "/" PCMK__XE_LRM "/" PCMK__XE_LRM_RESOURCES \ "/" PCMK__XE_LRM_RESOURCE "[@" PCMK_XA_ID "='%s']" \ "/" PCMK__XE_LRM_RSC_OP /* ... and also by operation key */ #define XPATH_HISTORY_ID XPATH_HISTORY "[@" PCMK_XA_ID "='%s']" /* ... and also by operation key and operation call ID */ #define XPATH_HISTORY_CALL XPATH_HISTORY \ "[@" PCMK_XA_ID "='%s' and @" PCMK__XA_CALL_ID "='%d']" /* ... and also by operation key and original operation key */ #define XPATH_HISTORY_ORIG XPATH_HISTORY \ "[@" PCMK_XA_ID "='%s' and @" PCMK__XA_OPERATION_KEY "='%s']" /*! * \internal * \brief Delete a last_failure resource history entry from the CIB * * \param[in] rsc_id Name of resource to clear history for * \param[in] node Name of node to clear history for * \param[in] action If specified, delete only if this was failed action * \param[in] interval_ms If \p action is specified, it has this interval */ void controld_cib_delete_last_failure(const char *rsc_id, const char *node, const char *action, guint interval_ms) { char *xpath = NULL; char *last_failure_key = NULL; CRM_CHECK((rsc_id != NULL) && (node != NULL), return); // Generate XPath to match desired entry last_failure_key = pcmk__op_key(rsc_id, "last_failure", 0); if (action == NULL) { xpath = crm_strdup_printf(XPATH_HISTORY_ID, node, rsc_id, last_failure_key); } else { char *action_key = pcmk__op_key(rsc_id, action, interval_ms); xpath = crm_strdup_printf(XPATH_HISTORY_ORIG, node, rsc_id, last_failure_key, action_key); free(action_key); } free(last_failure_key); controld_globals.cib_conn->cmds->remove(controld_globals.cib_conn, xpath, NULL, cib_xpath); free(xpath); } /*! * \internal * \brief Delete resource history entry from the CIB, given operation key * * \param[in] rsc_id Name of resource to clear history for * \param[in] node Name of node to clear history for * \param[in] key Operation key of operation to clear history for * \param[in] call_id If specified, delete entry only if it has this call ID */ void controld_delete_action_history_by_key(const char *rsc_id, const char *node, const char *key, int call_id) { char *xpath = NULL; CRM_CHECK((rsc_id != NULL) && (node != NULL) && (key != NULL), return); if (call_id > 0) { xpath = crm_strdup_printf(XPATH_HISTORY_CALL, node, rsc_id, key, call_id); } else { xpath = crm_strdup_printf(XPATH_HISTORY_ID, node, rsc_id, key); } controld_globals.cib_conn->cmds->remove(controld_globals.cib_conn, xpath, NULL, cib_xpath); free(xpath); } diff --git a/daemons/controld/controld_execd_state.c b/daemons/controld/controld_execd_state.c index 78c0c4b570..0393f03867 100644 --- a/daemons/controld/controld_execd_state.c +++ b/daemons/controld/controld_execd_state.c @@ -1,828 +1,828 @@ /* * Copyright 2012-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include static GHashTable *lrm_state_table = NULL; extern GHashTable *proxy_table; int lrmd_internal_proxy_send(lrmd_t * lrmd, xmlNode *msg); void lrmd_internal_set_proxy_callback(lrmd_t * lrmd, void *userdata, void (*callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg)); static void free_rsc_info(gpointer value) { lrmd_rsc_info_t *rsc_info = value; lrmd_free_rsc_info(rsc_info); } static void free_deletion_op(gpointer value) { struct pending_deletion_op_s *op = value; free(op->rsc); delete_ha_msg_input(op->input); free(op); } static void free_recurring_op(gpointer value) { active_op_t *op = value; free(op->user_data); free(op->rsc_id); free(op->op_type); free(op->op_key); if (op->params) { g_hash_table_destroy(op->params); } free(op); } static gboolean fail_pending_op(gpointer key, gpointer value, gpointer user_data) { lrmd_event_data_t event = { 0, }; lrm_state_t *lrm_state = user_data; active_op_t *op = value; crm_trace("Pre-emptively failing " PCMK__OP_FMT " on %s (call=%s, %s)", op->rsc_id, op->op_type, op->interval_ms, lrm_state->node_name, (char*)key, op->user_data); event.type = lrmd_event_exec_complete; event.rsc_id = op->rsc_id; event.op_type = op->op_type; event.user_data = op->user_data; event.timeout = 0; event.interval_ms = op->interval_ms; lrmd__set_result(&event, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_NOT_CONNECTED, "Action was pending when executor connection was dropped"); event.t_run = op->start_time; event.t_rcchange = op->start_time; event.call_id = op->call_id; event.remote_nodename = lrm_state->node_name; event.params = op->params; process_lrm_event(lrm_state, &event, op, NULL); lrmd__reset_result(&event); return TRUE; } gboolean lrm_state_is_local(lrm_state_t *lrm_state) { return (lrm_state != NULL) && controld_is_local_node(lrm_state->node_name); } /*! * \internal * \brief Create executor state entry for a node and add it to the state table * * \param[in] node_name Node to create entry for * * \return Newly allocated executor state object initialized for \p node_name */ static lrm_state_t * lrm_state_create(const char *node_name) { lrm_state_t *state = NULL; if (!node_name) { crm_err("No node name given for lrm state object"); return NULL; } state = pcmk__assert_alloc(1, sizeof(lrm_state_t)); state->node_name = pcmk__str_copy(node_name); state->rsc_info_cache = pcmk__strkey_table(NULL, free_rsc_info); state->deletion_ops = pcmk__strkey_table(free, free_deletion_op); state->active_ops = pcmk__strkey_table(free, free_recurring_op); state->resource_history = pcmk__strkey_table(NULL, history_free); state->metadata_cache = metadata_cache_new(); g_hash_table_insert(lrm_state_table, (char *)state->node_name, state); return state; } static gboolean remote_proxy_remove_by_node(gpointer key, gpointer value, gpointer user_data) { remote_proxy_t *proxy = value; const char *node_name = user_data; if (pcmk__str_eq(node_name, proxy->node_name, pcmk__str_casei)) { return TRUE; } return FALSE; } static remote_proxy_t * find_connected_proxy_by_node(const char * node_name) { GHashTableIter gIter; remote_proxy_t *proxy = NULL; CRM_CHECK(proxy_table != NULL, return NULL); g_hash_table_iter_init(&gIter, proxy_table); while (g_hash_table_iter_next(&gIter, NULL, (gpointer *) &proxy)) { if (proxy->source && pcmk__str_eq(node_name, proxy->node_name, pcmk__str_casei)) { return proxy; } } return NULL; } static void remote_proxy_disconnect_by_node(const char * node_name) { remote_proxy_t *proxy = NULL; CRM_CHECK(proxy_table != NULL, return); while ((proxy = find_connected_proxy_by_node(node_name)) != NULL) { /* mainloop_del_ipc_client() eventually calls remote_proxy_disconnected() * , which removes the entry from proxy_table. * Do not do this in a g_hash_table_iter_next() loop. */ if (proxy->source) { mainloop_del_ipc_client(proxy->source); } } return; } static void internal_lrm_state_destroy(gpointer data) { lrm_state_t *lrm_state = data; if (!lrm_state) { return; } /* Rather than directly remove the recorded proxy entries from proxy_table, * make sure any connected proxies get disconnected. So that * remote_proxy_disconnected() will be called and as well remove the * entries from proxy_table. */ remote_proxy_disconnect_by_node(lrm_state->node_name); crm_trace("Destroying proxy table %s with %u members", lrm_state->node_name, g_hash_table_size(proxy_table)); // Just in case there's still any leftovers in proxy_table g_hash_table_foreach_remove(proxy_table, remote_proxy_remove_by_node, (char *) lrm_state->node_name); remote_ra_cleanup(lrm_state); lrmd_api_delete(lrm_state->conn); if (lrm_state->rsc_info_cache) { crm_trace("Destroying rsc info cache with %u members", g_hash_table_size(lrm_state->rsc_info_cache)); g_hash_table_destroy(lrm_state->rsc_info_cache); } if (lrm_state->resource_history) { crm_trace("Destroying history op cache with %u members", g_hash_table_size(lrm_state->resource_history)); g_hash_table_destroy(lrm_state->resource_history); } if (lrm_state->deletion_ops) { crm_trace("Destroying deletion op cache with %u members", g_hash_table_size(lrm_state->deletion_ops)); g_hash_table_destroy(lrm_state->deletion_ops); } if (lrm_state->active_ops != NULL) { crm_trace("Destroying pending op cache with %u members", g_hash_table_size(lrm_state->active_ops)); g_hash_table_destroy(lrm_state->active_ops); } metadata_cache_free(lrm_state->metadata_cache); free((char *)lrm_state->node_name); free(lrm_state); } void lrm_state_reset_tables(lrm_state_t * lrm_state, gboolean reset_metadata) { if (lrm_state->resource_history) { crm_trace("Resetting resource history cache with %u members", g_hash_table_size(lrm_state->resource_history)); g_hash_table_remove_all(lrm_state->resource_history); } if (lrm_state->deletion_ops) { crm_trace("Resetting deletion operations cache with %u members", g_hash_table_size(lrm_state->deletion_ops)); g_hash_table_remove_all(lrm_state->deletion_ops); } if (lrm_state->active_ops != NULL) { crm_trace("Resetting active operations cache with %u members", g_hash_table_size(lrm_state->active_ops)); g_hash_table_remove_all(lrm_state->active_ops); } if (lrm_state->rsc_info_cache) { crm_trace("Resetting resource information cache with %u members", g_hash_table_size(lrm_state->rsc_info_cache)); g_hash_table_remove_all(lrm_state->rsc_info_cache); } if (reset_metadata) { metadata_cache_reset(lrm_state->metadata_cache); } } gboolean lrm_state_init_local(void) { if (lrm_state_table) { return TRUE; } lrm_state_table = pcmk__strikey_table(NULL, internal_lrm_state_destroy); if (!lrm_state_table) { return FALSE; } proxy_table = pcmk__strikey_table(NULL, remote_proxy_free); if (!proxy_table) { g_hash_table_destroy(lrm_state_table); lrm_state_table = NULL; return FALSE; } return TRUE; } void lrm_state_destroy_all(void) { if (lrm_state_table) { crm_trace("Destroying state table with %u members", g_hash_table_size(lrm_state_table)); g_hash_table_destroy(lrm_state_table); lrm_state_table = NULL; } if(proxy_table) { crm_trace("Destroying proxy table with %u members", g_hash_table_size(proxy_table)); g_hash_table_destroy(proxy_table); proxy_table = NULL; } } /*! * \internal * \brief Get executor state object * * \param[in] node_name Get executor state for this node (local node if NULL) * \param[in] create If true, create executor state if it doesn't exist * * \return Executor state object for \p node_name */ lrm_state_t * controld_get_executor_state(const char *node_name, bool create) { lrm_state_t *state = NULL; if ((node_name == NULL) && (controld_globals.cluster != NULL)) { node_name = controld_globals.cluster->priv->node_name; } if ((node_name == NULL) || (lrm_state_table == NULL)) { return NULL; } state = g_hash_table_lookup(lrm_state_table, node_name); if ((state == NULL) && create) { state = lrm_state_create(node_name); } return state; } /* @TODO the lone caller just needs to iterate over the values, so replace this * with a g_hash_table_foreach() wrapper instead */ GList * lrm_state_get_list(void) { if (lrm_state_table == NULL) { return NULL; } return g_hash_table_get_values(lrm_state_table); } void lrm_state_disconnect_only(lrm_state_t * lrm_state) { int removed = 0; if (!lrm_state->conn) { return; } crm_trace("Disconnecting %s", lrm_state->node_name); remote_proxy_disconnect_by_node(lrm_state->node_name); ((lrmd_t *) lrm_state->conn)->cmds->disconnect(lrm_state->conn); if (!pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { removed = g_hash_table_foreach_remove(lrm_state->active_ops, fail_pending_op, lrm_state); crm_trace("Synthesized %d operation failures for %s", removed, lrm_state->node_name); } } void lrm_state_disconnect(lrm_state_t * lrm_state) { if (!lrm_state->conn) { return; } lrm_state_disconnect_only(lrm_state); lrmd_api_delete(lrm_state->conn); lrm_state->conn = NULL; } int lrm_state_is_connected(lrm_state_t * lrm_state) { if (!lrm_state->conn) { return FALSE; } return ((lrmd_t *) lrm_state->conn)->cmds->is_connected(lrm_state->conn); } int lrm_state_poke_connection(lrm_state_t * lrm_state) { if (!lrm_state->conn) { return -ENOTCONN; } return ((lrmd_t *) lrm_state->conn)->cmds->poke_connection(lrm_state->conn); } // \return Standard Pacemaker return code int controld_connect_local_executor(lrm_state_t *lrm_state) { int rc = pcmk_rc_ok; if (lrm_state->conn == NULL) { lrmd_t *api = NULL; rc = lrmd__new(&api, NULL, NULL, 0); if (rc != pcmk_rc_ok) { return rc; } api->cmds->set_callback(api, lrm_op_callback); lrm_state->conn = api; } rc = ((lrmd_t *) lrm_state->conn)->cmds->connect(lrm_state->conn, CRM_SYSTEM_CRMD, NULL); rc = pcmk_legacy2rc(rc); if (rc == pcmk_rc_ok) { lrm_state->num_lrm_register_fails = 0; } else { lrm_state->num_lrm_register_fails++; } return rc; } static remote_proxy_t * crmd_remote_proxy_new(lrmd_t *lrmd, const char *node_name, const char *session_id, const char *channel) { struct ipc_client_callbacks proxy_callbacks = { .dispatch = remote_proxy_dispatch, .destroy = remote_proxy_disconnected }; remote_proxy_t *proxy = remote_proxy_new(lrmd, &proxy_callbacks, node_name, session_id, channel); return proxy; } gboolean crmd_is_proxy_session(const char *session) { return g_hash_table_lookup(proxy_table, session) ? TRUE : FALSE; } void crmd_proxy_send(const char *session, xmlNode *msg) { remote_proxy_t *proxy = g_hash_table_lookup(proxy_table, session); lrm_state_t *lrm_state = NULL; if (!proxy) { return; } crm_log_xml_trace(msg, "to-proxy"); lrm_state = controld_get_executor_state(proxy->node_name, false); if (lrm_state) { crm_trace("Sending event to %.8s on %s", proxy->session_id, proxy->node_name); remote_proxy_relay_event(proxy, msg); } } static void crmd_proxy_dispatch(const char *session, xmlNode *msg) { crm_trace("Processing proxied IPC message from session %s", session); crm_log_xml_trace(msg, "controller[inbound]"); crm_xml_add(msg, PCMK__XA_CRM_SYS_FROM, session); if (controld_authorize_ipc_message(msg, NULL, session)) { route_message(C_IPC_MESSAGE, msg); } controld_trigger_fsa(); } static void remote_config_check(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { if (rc != pcmk_ok) { crm_err("Query resulted in an error: %s", pcmk_strerror(rc)); if (rc == -EACCES || rc == -pcmk_err_schema_validation) { crm_err("The cluster is mis-configured - shutting down and staying down"); } } else { lrmd_t * lrmd = (lrmd_t *)user_data; crm_time_t *now = crm_time_new(NULL); GHashTable *config_hash = pcmk__strkey_table(free, free); pcmk_rule_input_t rule_input = { .now = now, }; crm_debug("Call %d : Parsing CIB options", call_id); pcmk_unpack_nvpair_blocks(output, PCMK_XE_CLUSTER_PROPERTY_SET, PCMK_VALUE_CIB_BOOTSTRAP_OPTIONS, &rule_input, config_hash, NULL); /* Now send it to the remote peer */ lrmd__validate_remote_settings(lrmd, config_hash); g_hash_table_destroy(config_hash); crm_time_free(now); } } static void crmd_remote_proxy_cb(lrmd_t *lrmd, void *userdata, xmlNode *msg) { lrm_state_t *lrm_state = userdata; const char *session = pcmk__xe_get(msg, PCMK__XA_LRMD_IPC_SESSION); remote_proxy_t *proxy = g_hash_table_lookup(proxy_table, session); const char *op = pcmk__xe_get(msg, PCMK__XA_LRMD_IPC_OP); if (pcmk__str_eq(op, LRMD_IPC_OP_NEW, pcmk__str_casei)) { const char *channel = pcmk__xe_get(msg, PCMK__XA_LRMD_IPC_SERVER); proxy = crmd_remote_proxy_new(lrmd, lrm_state->node_name, session, channel); if (!remote_ra_controlling_guest(lrm_state)) { if (proxy != NULL) { cib_t *cib_conn = controld_globals.cib_conn; /* Look up PCMK_OPT_STONITH_WATCHDOG_TIMEOUT and send to the * remote peer for validation */ int rc = cib_conn->cmds->query(cib_conn, PCMK_XE_CRM_CONFIG, NULL, cib_none); cib_conn->cmds->register_callback_full(cib_conn, rc, 10, FALSE, lrmd, "remote_config_check", remote_config_check, NULL); } } else { crm_debug("Skipping remote_config_check for guest-nodes"); } } else if (pcmk__str_eq(op, LRMD_IPC_OP_SHUTDOWN_REQ, pcmk__str_casei)) { char *now_s = NULL; crm_notice("%s requested shutdown of its remote connection", lrm_state->node_name); if (!remote_ra_is_in_maintenance(lrm_state)) { now_s = pcmk__ttoa(time(NULL)); update_attrd(lrm_state->node_name, PCMK__NODE_ATTR_SHUTDOWN, now_s, NULL, TRUE); free(now_s); remote_proxy_ack_shutdown(lrmd); crm_warn("Reconnection attempts to %s may result in failures that must be cleared", lrm_state->node_name); } else { remote_proxy_nack_shutdown(lrmd); crm_notice("Remote resource for %s is not managed so no ordered shutdown happening", lrm_state->node_name); } return; } else if (pcmk__str_eq(op, LRMD_IPC_OP_REQUEST, pcmk__str_casei) && proxy && proxy->is_local) { /* This is for the controller, which we are, so don't try * to send to ourselves over IPC -- do it directly. */ uint32_t flags = 0U; int rc = pcmk_rc_ok; xmlNode *wrapper = pcmk__xe_first_child(msg, PCMK__XE_LRMD_IPC_MSG, NULL, NULL); xmlNode *request = pcmk__xe_first_child(wrapper, NULL, NULL, NULL); CRM_CHECK(request != NULL, return); CRM_CHECK(lrm_state->node_name, return); crm_xml_add(request, PCMK_XE_ACL_ROLE, "pacemaker-remote"); pcmk__update_acl_user(request, PCMK__XA_LRMD_IPC_USER, lrm_state->node_name); /* Pacemaker Remote nodes don't know their own names (as known to the * cluster). When getting a node info request with no name or ID, add * the name, so we don't return info for ourselves instead of the * Pacemaker Remote node. */ if (pcmk__str_eq(pcmk__xe_get(request, PCMK__XA_CRM_TASK), CRM_OP_NODE_INFO, pcmk__str_none)) { int node_id = 0; pcmk__xe_get_int(request, PCMK_XA_ID, &node_id); if ((node_id <= 0) && (pcmk__xe_get(request, PCMK_XA_UNAME) == NULL)) { crm_xml_add(request, PCMK_XA_UNAME, lrm_state->node_name); } } crmd_proxy_dispatch(session, request); rc = pcmk__xe_get_flags(msg, PCMK__XA_LRMD_IPC_MSG_FLAGS, &flags, 0U); if (rc != pcmk_rc_ok) { crm_warn("Couldn't parse controller flags from remote request: %s", pcmk_rc_str(rc)); } if (pcmk_is_set(flags, crm_ipc_client_response)) { int msg_id = 0; xmlNode *op_reply = pcmk__xe_create(NULL, PCMK__XE_ACK); crm_xml_add(op_reply, PCMK_XA_FUNCTION, __func__); - crm_xml_add_int(op_reply, PCMK__XA_LINE, __LINE__); + pcmk__xe_set_int(op_reply, PCMK__XA_LINE, __LINE__); pcmk__xe_get_int(msg, PCMK__XA_LRMD_IPC_MSG_ID, &msg_id); remote_proxy_relay_response(proxy, op_reply, msg_id); pcmk__xml_free(op_reply); } } else { remote_proxy_cb(lrmd, lrm_state->node_name, msg); } } // \return Standard Pacemaker return code int controld_connect_remote_executor(lrm_state_t *lrm_state, const char *server, int port, int timeout_ms) { int rc = pcmk_rc_ok; if (lrm_state->conn == NULL) { lrmd_t *api = NULL; rc = lrmd__new(&api, lrm_state->node_name, server, port); if (rc != pcmk_rc_ok) { crm_warn("Pacemaker Remote connection to %s:%s failed: %s " QB_XS " rc=%d", server, port, pcmk_rc_str(rc), rc); return rc; } lrm_state->conn = api; api->cmds->set_callback(api, remote_lrm_op_callback); lrmd_internal_set_proxy_callback(api, lrm_state, crmd_remote_proxy_cb); } crm_trace("Initiating remote connection to %s:%d with timeout %dms", server, port, timeout_ms); rc = ((lrmd_t *) lrm_state->conn)->cmds->connect_async(lrm_state->conn, lrm_state->node_name, timeout_ms); if (rc == pcmk_ok) { lrm_state->num_lrm_register_fails = 0; } else { lrm_state->num_lrm_register_fails++; // Ignored for remote connections } return pcmk_legacy2rc(rc); } int lrm_state_get_metadata(lrm_state_t * lrm_state, const char *class, const char *provider, const char *agent, char **output, enum lrmd_call_options options) { lrmd_key_value_t *params = NULL; if (!lrm_state->conn) { return -ENOTCONN; } /* Add the node name to the environment, as is done with normal resource * action calls. Meta-data calls shouldn't need it, but some agents are * written with an ocf_local_nodename call at the beginning regardless of * action. Without the environment variable, the agent would try to contact * the controller to get the node name -- but the controller would be * blocking on the synchronous meta-data call. * * At this point, we have to assume that agents are unlikely to make other * calls that require the controller, such as crm_node --quorum or * --cluster-id. * * @TODO Make meta-data calls asynchronous. (This will be part of a larger * project to make meta-data calls via the executor rather than directly.) */ params = lrmd_key_value_add(params, CRM_META "_" PCMK__META_ON_NODE, lrm_state->node_name); return ((lrmd_t *) lrm_state->conn)->cmds->get_metadata_params(lrm_state->conn, class, provider, agent, output, options, params); } int lrm_state_cancel(lrm_state_t *lrm_state, const char *rsc_id, const char *action, guint interval_ms) { if (!lrm_state->conn) { return -ENOTCONN; } /* Figure out a way to make this async? * NOTICE: Currently it's synced and directly acknowledged in do_lrm_invoke(). */ if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) { return remote_ra_cancel(lrm_state, rsc_id, action, interval_ms); } return ((lrmd_t *) lrm_state->conn)->cmds->cancel(lrm_state->conn, rsc_id, action, interval_ms); } lrmd_rsc_info_t * lrm_state_get_rsc_info(lrm_state_t * lrm_state, const char *rsc_id, enum lrmd_call_options options) { lrmd_rsc_info_t *rsc = NULL; if (!lrm_state->conn) { return NULL; } if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) { return remote_ra_get_rsc_info(lrm_state, rsc_id); } rsc = g_hash_table_lookup(lrm_state->rsc_info_cache, rsc_id); if (rsc == NULL) { /* only contact the lrmd if we don't already have a cached rsc info */ rsc = ((lrmd_t *) lrm_state->conn)->cmds->get_rsc_info(lrm_state->conn, rsc_id, options); if (rsc == NULL) { return NULL; } /* cache the result */ g_hash_table_insert(lrm_state->rsc_info_cache, rsc->id, rsc); } return lrmd_copy_rsc_info(rsc); } /*! * \internal * \brief Initiate a resource agent action * * \param[in,out] lrm_state Executor state object * \param[in] rsc_id ID of resource for action * \param[in] action Action to execute * \param[in] userdata String to copy and pass to execution callback * \param[in] interval_ms Action interval (in milliseconds) * \param[in] timeout_ms Action timeout (in milliseconds) * \param[in] start_delay_ms Delay (in ms) before initiating action * \param[in] parameters Hash table of resource parameters * \param[out] call_id Where to store call ID on success * * \return Standard Pacemaker return code */ int controld_execute_resource_agent(lrm_state_t *lrm_state, const char *rsc_id, const char *action, const char *userdata, guint interval_ms, int timeout_ms, int start_delay_ms, GHashTable *parameters, int *call_id) { int rc = pcmk_rc_ok; lrmd_key_value_t *params = NULL; if (lrm_state->conn == NULL) { return ENOTCONN; } // Convert parameters from hash table to list if (parameters != NULL) { const char *key = NULL; const char *value = NULL; GHashTableIter iter; g_hash_table_iter_init(&iter, parameters); while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) { params = lrmd_key_value_add(params, key, value); } } if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) { rc = controld_execute_remote_agent(lrm_state, rsc_id, action, userdata, interval_ms, timeout_ms, start_delay_ms, params, call_id); } else { rc = ((lrmd_t *) lrm_state->conn)->cmds->exec(lrm_state->conn, rsc_id, action, userdata, interval_ms, timeout_ms, start_delay_ms, lrmd_opt_notify_changes_only, params); if (rc < 0) { rc = pcmk_legacy2rc(rc); } else { *call_id = rc; rc = pcmk_rc_ok; } } return rc; } int lrm_state_register_rsc(lrm_state_t * lrm_state, const char *rsc_id, const char *class, const char *provider, const char *agent, enum lrmd_call_options options) { lrmd_t *conn = (lrmd_t *) lrm_state->conn; if (conn == NULL) { return -ENOTCONN; } if (is_remote_lrmd_ra(agent, provider, NULL)) { return controld_get_executor_state(rsc_id, true)? pcmk_ok : -EINVAL; } /* @TODO Implement an asynchronous version of this (currently a blocking * call to the lrmd). */ return conn->cmds->register_rsc(lrm_state->conn, rsc_id, class, provider, agent, options); } int lrm_state_unregister_rsc(lrm_state_t * lrm_state, const char *rsc_id, enum lrmd_call_options options) { if (!lrm_state->conn) { return -ENOTCONN; } if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) { g_hash_table_remove(lrm_state_table, rsc_id); return pcmk_ok; } g_hash_table_remove(lrm_state->rsc_info_cache, rsc_id); /* @TODO Optimize this ... this function is a blocking round trip from * client to daemon. The controld_execd_state.c code path that uses this * function should always treat it as an async operation. The executor API * should make an async version available. */ return ((lrmd_t *) lrm_state->conn)->cmds->unregister_rsc(lrm_state->conn, rsc_id, options); } diff --git a/daemons/controld/controld_join_client.c b/daemons/controld/controld_join_client.c index 91f9b11113..f7441442ae 100644 --- a/daemons/controld/controld_join_client.c +++ b/daemons/controld/controld_join_client.c @@ -1,373 +1,373 @@ /* * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include void join_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data); extern ha_msg_input_t *copy_ha_msg_input(ha_msg_input_t * orig); /*! * \internal * \brief Remember if DC is shutting down as we join * * If we're joining while the current DC is shutting down, update its expected * state, so we don't fence it if we become the new DC. (We weren't a peer * when it broadcast its shutdown request.) * * \param[in] msg A join message from the DC */ static void update_dc_expected(const xmlNode *msg) { if ((controld_globals.dc_name != NULL) && pcmk__xe_attr_is_true(msg, PCMK__XA_DC_LEAVING)) { pcmk__node_status_t *dc_node = pcmk__get_node(0, controld_globals.dc_name, NULL, pcmk__node_search_cluster_member); pcmk__update_peer_expected(__func__, dc_node, CRMD_JOINSTATE_DOWN); } } /* A_CL_JOIN_QUERY */ /* is there a DC out there? */ void do_cl_join_query(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { xmlNode *req = pcmk__new_request(pcmk_ipc_controld, CRM_SYSTEM_CRMD, NULL, CRM_SYSTEM_DC, CRM_OP_JOIN_ANNOUNCE, NULL); sleep(1); // Give the cluster layer time to propagate to the DC update_dc(NULL); /* Unset any existing value so that the result is not discarded */ crm_debug("Querying for a DC"); pcmk__cluster_send_message(NULL, pcmk_ipc_controld, req); pcmk__xml_free(req); } /* A_CL_JOIN_ANNOUNCE */ /* this is kind of a workaround for the fact that we may not be around or * are otherwise unable to reply when the DC sends out A_DC_JOIN_OFFER_ALL */ void do_cl_join_announce(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { /* don't announce if we're in one of these states */ if (cur_state != S_PENDING) { crm_warn("Not announcing cluster join because in state %s", fsa_state2string(cur_state)); return; } if (!pcmk_is_set(controld_globals.fsa_input_register, R_STARTING)) { /* send as a broadcast */ xmlNode *req = pcmk__new_request(pcmk_ipc_controld, CRM_SYSTEM_CRMD, NULL, CRM_SYSTEM_DC, CRM_OP_JOIN_ANNOUNCE, NULL); crm_debug("Announcing availability"); update_dc(NULL); pcmk__cluster_send_message(NULL, pcmk_ipc_controld, req); pcmk__xml_free(req); } else { /* Delay announce until we have finished local startup */ crm_warn("Delaying announce of cluster join until local startup is complete"); return; } } static int query_call_id = 0; /* A_CL_JOIN_REQUEST */ /* aka. accept the welcome offer */ void do_cl_join_offer_respond(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { cib_t *cib_conn = controld_globals.cib_conn; ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); const char *welcome_from; const char *join_id; CRM_CHECK(input != NULL, return); welcome_from = pcmk__xe_get(input->msg, PCMK__XA_SRC); join_id = pcmk__xe_get(input->msg, PCMK__XA_JOIN_ID); crm_trace("Accepting cluster join offer from node %s " QB_XS " join-%s", welcome_from, pcmk__xe_get(input->msg, PCMK__XA_JOIN_ID)); /* we only ever want the last one */ if (query_call_id > 0) { crm_trace("Cancelling previous join query: %d", query_call_id); remove_cib_op_callback(query_call_id, FALSE); query_call_id = 0; } if (update_dc(input->msg) == FALSE) { crm_warn("Discarding cluster join offer from node %s (expected %s)", welcome_from, controld_globals.dc_name); return; } update_dc_expected(input->msg); query_call_id = cib_conn->cmds->query(cib_conn, NULL, NULL, cib_no_children); fsa_register_cib_callback(query_call_id, pcmk__str_copy(join_id), join_query_callback); crm_trace("Registered join query callback: %d", query_call_id); controld_set_fsa_action_flags(A_DC_TIMER_STOP); controld_trigger_fsa(); } void join_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { char *join_id = user_data; xmlNode *generation = pcmk__xe_create(NULL, PCMK__XE_GENERATION_TUPLE); CRM_LOG_ASSERT(join_id != NULL); if (query_call_id != call_id) { crm_trace("Query %d superseded", call_id); goto done; } query_call_id = 0; if(rc != pcmk_ok || output == NULL) { crm_err("Could not retrieve version details for join-%s: %s (%d)", join_id, pcmk_strerror(rc), rc); register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__); } else if (controld_globals.dc_name == NULL) { crm_debug("Membership is in flux, not continuing join-%s", join_id); } else { xmlNode *join_request = NULL; const pcmk__node_status_t *dc_node = pcmk__get_node(0, controld_globals.dc_name, NULL, pcmk__node_search_cluster_member); crm_debug("Respond to join offer join-%s from %s", join_id, controld_globals.dc_name); pcmk__xe_copy_attrs(generation, output, pcmk__xaf_none); join_request = pcmk__new_request(pcmk_ipc_controld, CRM_SYSTEM_CRMD, controld_globals.dc_name, CRM_SYSTEM_DC, CRM_OP_JOIN_REQUEST, generation); crm_xml_add(join_request, PCMK__XA_JOIN_ID, join_id); crm_xml_add(join_request, PCMK_XA_CRM_FEATURE_SET, CRM_FEATURE_SET); pcmk__cluster_send_message(dc_node, pcmk_ipc_controld, join_request); pcmk__xml_free(join_request); } done: pcmk__xml_free(generation); } void set_join_state(const char *start_state, const char *node_name, const char *node_uuid, bool remote) { if (pcmk__str_eq(start_state, PCMK_VALUE_STANDBY, pcmk__str_casei)) { crm_notice("Forcing node %s to join in %s state per configured " "environment", node_name, start_state); cib__update_node_attr(controld_globals.logger_out, controld_globals.cib_conn, cib_sync_call, PCMK_XE_NODES, node_uuid, NULL, NULL, NULL, PCMK_NODE_ATTR_STANDBY, PCMK_VALUE_TRUE, NULL, (remote? PCMK_VALUE_REMOTE : NULL)); } else if (pcmk__str_eq(start_state, PCMK_VALUE_ONLINE, pcmk__str_casei)) { crm_notice("Forcing node %s to join in %s state per configured " "environment", node_name, start_state); cib__update_node_attr(controld_globals.logger_out, controld_globals.cib_conn, cib_sync_call, PCMK_XE_NODES, node_uuid, NULL, NULL, NULL, PCMK_NODE_ATTR_STANDBY, PCMK_VALUE_FALSE, NULL, (remote? PCMK_VALUE_REMOTE : NULL)); } else if (pcmk__str_eq(start_state, PCMK_VALUE_DEFAULT, pcmk__str_casei)) { crm_debug("Not forcing a starting state on node %s", node_name); } else { crm_warn("Unrecognized start state '%s', using " "'" PCMK_VALUE_DEFAULT "' (%s)", start_state, node_name); } } static int update_conn_host_cache(xmlNode *node, void *userdata) { const char *remote = pcmk__xe_get(node, PCMK_XA_ID); const char *conn_host = pcmk__xe_get(node, PCMK__XA_CONNECTION_HOST); const char *state = pcmk__xe_get(node, PCMK__XA_NODE_STATE); pcmk__node_status_t *remote_peer = pcmk__cluster_lookup_remote_node(remote); if (remote_peer == NULL) { return pcmk_rc_ok; } if (conn_host != NULL) { pcmk__str_update(&remote_peer->conn_host, conn_host); } if (state != NULL) { pcmk__update_peer_state(__func__, remote_peer, state, 0); } return pcmk_rc_ok; } /* A_CL_JOIN_RESULT */ /* aka. this is notification that we have (or have not) been accepted */ void do_cl_join_finalize_respond(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { xmlNode *tmp1 = NULL; gboolean was_nack = TRUE; static gboolean first_join = TRUE; ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); const char *start_state = pcmk__env_option(PCMK__ENV_NODE_START_STATE); int join_id = -1; const char *op = pcmk__xe_get(input->msg, PCMK__XA_CRM_TASK); const char *welcome_from = pcmk__xe_get(input->msg, PCMK__XA_SRC); if (!pcmk__str_eq(op, CRM_OP_JOIN_ACKNAK, pcmk__str_casei)) { crm_trace("Ignoring op=%s message", op); return; } /* calculate if it was an ack or a nack */ if (pcmk__xe_attr_is_true(input->msg, CRM_OP_JOIN_ACKNAK)) { was_nack = FALSE; } pcmk__xe_get_int(input->msg, PCMK__XA_JOIN_ID, &join_id); if (was_nack) { crm_err("Shutting down because cluster join with leader %s failed " QB_XS " join-%d NACK'd", welcome_from, join_id); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); controld_set_fsa_input_flags(R_STAYDOWN); return; } if (!AM_I_DC && controld_is_local_node(welcome_from)) { crm_warn("Discarding our own welcome - we're no longer the DC"); return; } if (update_dc(input->msg) == FALSE) { crm_warn("Discarding %s from node %s (expected from %s)", op, welcome_from, controld_globals.dc_name); return; } update_dc_expected(input->msg); /* record the node's feature set as a transient attribute */ update_attrd(controld_globals.cluster->priv->node_name, CRM_ATTR_FEATURE_SET, CRM_FEATURE_SET, NULL, FALSE); /* send our status section to the DC */ tmp1 = controld_query_executor_state(); if (tmp1 != NULL) { xmlNode *remotes = NULL; xmlNode *join_confirm = pcmk__new_request(pcmk_ipc_controld, CRM_SYSTEM_CRMD, controld_globals.dc_name, CRM_SYSTEM_DC, CRM_OP_JOIN_CONFIRM, tmp1); const pcmk__node_status_t *dc_node = pcmk__get_node(0, controld_globals.dc_name, NULL, pcmk__node_search_cluster_member); - crm_xml_add_int(join_confirm, PCMK__XA_JOIN_ID, join_id); + pcmk__xe_set_int(join_confirm, PCMK__XA_JOIN_ID, join_id); crm_debug("Confirming join-%d: sending local operation history to %s", join_id, controld_globals.dc_name); /* * If this is the node's first join since the controller started on it, * set its initial state (standby or member) according to the user's * preference. * * We do not clear the LRM history here. Even if the DC failed to do it * when we last left, removing them here creates a race condition if the * controller is being recovered. Instead of a list of active resources * from the executor, we may end up with a blank status section. If we * are _NOT_ lucky, we will probe for the "wrong" instance of anonymous * clones and end up with multiple active instances on the machine. */ if (first_join && !pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { first_join = FALSE; if (start_state) { set_join_state(start_state, controld_globals.cluster->priv->node_name, controld_globals.our_uuid, false); } } pcmk__cluster_send_message(dc_node, pcmk_ipc_controld, join_confirm); pcmk__xml_free(join_confirm); if (AM_I_DC == FALSE) { register_fsa_input_adv(cause, I_NOT_DC, NULL, A_NOTHING, TRUE, __func__); } pcmk__xml_free(tmp1); /* Update the remote node cache with information about which node * is hosting the connection. */ remotes = pcmk__xe_first_child(input->msg, PCMK_XE_NODES, NULL, NULL); if (remotes != NULL) { pcmk__xe_foreach_child(remotes, PCMK_XE_NODE, update_conn_host_cache, NULL); } } else { crm_err("Could not confirm join-%d with %s: Local operation history " "failed", join_id, controld_globals.dc_name); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } } diff --git a/daemons/controld/controld_join_dc.c b/daemons/controld/controld_join_dc.c index c142632116..07c9b978fd 100644 --- a/daemons/controld/controld_join_dc.c +++ b/daemons/controld/controld_join_dc.c @@ -1,1097 +1,1097 @@ /* * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include // PRIu32 #include // bool, true, false #include // NULL #include // free(), etc. #include // gboolean, etc. #include // xmlNode #include #include #include #include static char *max_generation_from = NULL; static xmlNodePtr max_generation_xml = NULL; /*! * \internal * \brief Nodes from which a CIB sync has failed since the peer joined * * This table is of the form (node_name -> join_id). \p node_name is * the name of a client node from which a CIB \p sync_from() call has failed in * \p do_dc_join_finalize() since the client joined the cluster as a peer. * \p join_id is the ID of the join round in which the \p sync_from() failed, * and is intended for use in nack log messages. */ static GHashTable *failed_sync_nodes = NULL; void finalize_join_for(gpointer key, gpointer value, gpointer user_data); void finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data); gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source); /* Numeric counter used to identify join rounds (an unsigned int would be * appropriate, except we get and set it in XML as int) */ static int current_join_id = 0; /*! * \internal * \brief Get log-friendly string equivalent of a controller group join phase * * \param[in] phase Join phase * * \return Log-friendly string equivalent of \p phase */ static const char * join_phase_text(enum controld_join_phase phase) { switch (phase) { case controld_join_nack: return "nack"; case controld_join_none: return "none"; case controld_join_welcomed: return "welcomed"; case controld_join_integrated: return "integrated"; case controld_join_finalized: return "finalized"; case controld_join_confirmed: return "confirmed"; default: return "invalid"; } } /*! * \internal * \brief Destroy the hash table containing failed sync nodes */ void controld_destroy_failed_sync_table(void) { if (failed_sync_nodes != NULL) { g_hash_table_destroy(failed_sync_nodes); failed_sync_nodes = NULL; } } /*! * \internal * \brief Remove a node from the failed sync nodes table if present * * \param[in] node_name Node name to remove */ void controld_remove_failed_sync_node(const char *node_name) { if (failed_sync_nodes != NULL) { g_hash_table_remove(failed_sync_nodes, (gchar *) node_name); } } /*! * \internal * \brief Add to a hash table a node whose CIB failed to sync * * \param[in] node_name Name of node whose CIB failed to sync * \param[in] join_id Join round when the failure occurred */ static void record_failed_sync_node(const char *node_name, gint join_id) { if (failed_sync_nodes == NULL) { failed_sync_nodes = pcmk__strikey_table(g_free, NULL); } /* If the node is already in the table then we failed to nack it during the * filter offer step */ CRM_LOG_ASSERT(g_hash_table_insert(failed_sync_nodes, g_strdup(node_name), GINT_TO_POINTER(join_id))); } /*! * \internal * \brief Look up a node name in the failed sync table * * \param[in] node_name Name of node to look up * \param[out] join_id Where to store the join ID of when the sync failed * * \return Standard Pacemaker return code. Specifically, \p pcmk_rc_ok if the * node name was found, or \p pcmk_rc_node_unknown otherwise. * \note \p *join_id is set to -1 if the node is not found. */ static int lookup_failed_sync_node(const char *node_name, gint *join_id) { *join_id = -1; if (failed_sync_nodes != NULL) { gpointer result = g_hash_table_lookup(failed_sync_nodes, (gchar *) node_name); if (result != NULL) { *join_id = GPOINTER_TO_INT(result); return pcmk_rc_ok; } } return pcmk_rc_node_unknown; } void crm_update_peer_join(const char *source, pcmk__node_status_t *node, enum controld_join_phase phase) { enum controld_join_phase last = controld_get_join_phase(node); CRM_CHECK(node != NULL, return); /* Remote nodes do not participate in joins */ if (pcmk_is_set(node->flags, pcmk__node_status_remote)) { return; } if (phase == last) { crm_trace("Node %s join-%d phase is still %s " QB_XS " nodeid=%" PRIu32 " source=%s", node->name, current_join_id, join_phase_text(last), node->cluster_layer_id, source); return; } if ((phase <= controld_join_none) || (phase == (last + 1))) { struct controld_node_status_data *data = NULL; if (node->user_data == NULL) { node->user_data = pcmk__assert_alloc(1, sizeof(struct controld_node_status_data)); } data = node->user_data; data->join_phase = phase; crm_trace("Node %s join-%d phase is now %s (was %s) " QB_XS " nodeid=%" PRIu32 " source=%s", node->name, current_join_id, join_phase_text(phase), join_phase_text(last), node->cluster_layer_id, source); return; } crm_warn("Rejecting join-%d phase update for node %s because can't go from " "%s to %s " QB_XS " nodeid=%" PRIu32 " source=%s", current_join_id, node->name, join_phase_text(last), join_phase_text(phase), node->cluster_layer_id, source); } static void start_join_round(void) { GHashTableIter iter; pcmk__node_status_t *peer = NULL; crm_debug("Starting new join round join-%d", current_join_id); g_hash_table_iter_init(&iter, pcmk__peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) { crm_update_peer_join(__func__, peer, controld_join_none); } if (max_generation_from != NULL) { free(max_generation_from); max_generation_from = NULL; } if (max_generation_xml != NULL) { pcmk__xml_free(max_generation_xml); max_generation_xml = NULL; } controld_clear_fsa_input_flags(R_HAVE_CIB); } /*! * \internal * \brief Create a join message from the DC * * \param[in] join_op Join operation name * \param[in] host_to Recipient of message */ static xmlNode * create_dc_message(const char *join_op, const char *host_to) { xmlNode *msg = pcmk__new_request(pcmk_ipc_controld, CRM_SYSTEM_DC, host_to, CRM_SYSTEM_CRMD, join_op, NULL); /* Identify which election this is a part of */ - crm_xml_add_int(msg, PCMK__XA_JOIN_ID, current_join_id); + pcmk__xe_set_int(msg, PCMK__XA_JOIN_ID, current_join_id); /* Add a field specifying whether the DC is shutting down. This keeps the * joining node from fencing the old DC if it becomes the new DC. */ pcmk__xe_set_bool_attr(msg, PCMK__XA_DC_LEAVING, pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)); return msg; } static void join_make_offer(gpointer key, gpointer value, gpointer user_data) { /* @TODO We don't use user_data except to distinguish one particular call * from others. Make this clearer. */ xmlNode *offer = NULL; pcmk__node_status_t *member = (pcmk__node_status_t *) value; pcmk__assert(member != NULL); if (!pcmk__cluster_is_node_active(member)) { crm_info("Not making join-%d offer to inactive node %s", current_join_id, pcmk__s(member->name, "with unknown name")); if ((member->expected == NULL) && pcmk__str_eq(member->state, PCMK__VALUE_LOST, pcmk__str_none)) { /* You would think this unsafe, but in fact this plus an * active resource is what causes it to be fenced. * * Yes, this does mean that any node that dies at the same * time as the old DC and is not running resource (still) * won't be fenced. * * I'm not happy about this either. */ pcmk__update_peer_expected(__func__, member, CRMD_JOINSTATE_DOWN); } return; } if (member->name == NULL) { crm_info("Not making join-%d offer to node uuid %s with unknown name", current_join_id, member->xml_id); return; } if (controld_globals.membership_id != controld_globals.peer_seq) { controld_globals.membership_id = controld_globals.peer_seq; crm_info("Making join-%d offers based on membership event %llu", current_join_id, controld_globals.peer_seq); } if (user_data != NULL) { enum controld_join_phase phase = controld_get_join_phase(member); if (phase > controld_join_none) { crm_info("Not making join-%d offer to already known node %s (%s)", current_join_id, member->name, join_phase_text(phase)); return; } } crm_update_peer_join(__func__, (pcmk__node_status_t*) member, controld_join_none); offer = create_dc_message(CRM_OP_JOIN_OFFER, member->name); // Advertise our feature set so the joining node can bail if not compatible crm_xml_add(offer, PCMK_XA_CRM_FEATURE_SET, CRM_FEATURE_SET); crm_info("Sending join-%d offer to %s", current_join_id, member->name); pcmk__cluster_send_message(member, pcmk_ipc_controld, offer); pcmk__xml_free(offer); crm_update_peer_join(__func__, member, controld_join_welcomed); } /* A_DC_JOIN_OFFER_ALL */ void do_dc_join_offer_all(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { int count; /* Reset everyone's status back to down or in_ccm in the CIB. * Any nodes that are active in the CIB but not in the cluster membership * will be seen as offline by the scheduler anyway. */ current_join_id++; start_join_round(); update_dc(NULL); if (cause == C_HA_MESSAGE && current_input == I_NODE_JOIN) { crm_info("A new node joined the cluster"); } g_hash_table_foreach(pcmk__peer_cache, join_make_offer, NULL); count = crmd_join_phase_count(controld_join_welcomed); crm_info("Waiting on join-%d requests from %d outstanding node%s", current_join_id, count, pcmk__plural_s(count)); // Don't waste time by invoking the scheduler yet } /* A_DC_JOIN_OFFER_ONE */ void do_dc_join_offer_one(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { pcmk__node_status_t *member = NULL; ha_msg_input_t *welcome = NULL; int count; const char *join_to = NULL; if (msg_data->data == NULL) { crm_info("Making join-%d offers to any unconfirmed nodes " "because an unknown node joined", current_join_id); g_hash_table_foreach(pcmk__peer_cache, join_make_offer, &member); check_join_state(cur_state, __func__); return; } welcome = fsa_typed_data(fsa_dt_ha_msg); if (welcome == NULL) { // fsa_typed_data() already logged an error return; } join_to = pcmk__xe_get(welcome->msg, PCMK__XA_SRC); if (join_to == NULL) { crm_err("Can't make join-%d offer to unknown node", current_join_id); return; } member = pcmk__get_node(0, join_to, NULL, pcmk__node_search_cluster_member); /* It is possible that a node will have been sick or starting up when the * original offer was made. However, it will either re-announce itself in * due course, or we can re-store the original offer on the client. */ crm_update_peer_join(__func__, member, controld_join_none); join_make_offer(NULL, member, NULL); /* If the offer isn't to the local node, make an offer to the local node as * well, to ensure the correct value for max_generation_from. */ if (!controld_is_local_node(join_to)) { member = controld_get_local_node_status(); join_make_offer(NULL, member, NULL); } /* This was a genuine join request; cancel any existing transition and * invoke the scheduler. */ abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart, "Node join", NULL); count = crmd_join_phase_count(controld_join_welcomed); crm_info("Waiting on join-%d requests from %d outstanding node%s", current_join_id, count, pcmk__plural_s(count)); // Don't waste time by invoking the scheduler yet } static int compare_int_fields(xmlNode * left, xmlNode * right, const char *field) { const char *elem_l = pcmk__xe_get(left, field); const char *elem_r = pcmk__xe_get(right, field); long long int_elem_l; long long int_elem_r; int rc = pcmk_rc_ok; rc = pcmk__scan_ll(elem_l, &int_elem_l, -1LL); if (rc != pcmk_rc_ok) { // Shouldn't be possible crm_warn("Comparing current CIB %s as -1 " "because '%s' is not an integer", field, elem_l); } rc = pcmk__scan_ll(elem_r, &int_elem_r, -1LL); if (rc != pcmk_rc_ok) { // Shouldn't be possible crm_warn("Comparing joining node's CIB %s as -1 " "because '%s' is not an integer", field, elem_r); } if (int_elem_l < int_elem_r) { return -1; } else if (int_elem_l > int_elem_r) { return 1; } return 0; } /* A_DC_JOIN_PROCESS_REQ */ void do_dc_join_filter_offer(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { xmlNode *generation = NULL; int cmp = 0; int join_id = -1; int count = 0; gint value = 0; gboolean ack_nack_bool = TRUE; ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg); const char *join_from = pcmk__xe_get(join_ack->msg, PCMK__XA_SRC); const char *ref = pcmk__xe_get(join_ack->msg, PCMK_XA_REFERENCE); const char *join_version = pcmk__xe_get(join_ack->msg, PCMK_XA_CRM_FEATURE_SET); pcmk__node_status_t *join_node = NULL; if (join_from == NULL) { crm_err("Ignoring invalid join request without node name"); return; } join_node = pcmk__get_node(0, join_from, NULL, pcmk__node_search_cluster_member); pcmk__xe_get_int(join_ack->msg, PCMK__XA_JOIN_ID, &join_id); if (join_id != current_join_id) { crm_debug("Ignoring join-%d request from %s because we are on join-%d", join_id, join_from, current_join_id); check_join_state(cur_state, __func__); return; } generation = join_ack->xml; if (max_generation_xml != NULL && generation != NULL) { int lpc = 0; const char *attributes[] = { PCMK_XA_ADMIN_EPOCH, PCMK_XA_EPOCH, PCMK_XA_NUM_UPDATES, }; /* It's not obvious that join_ack->xml is the PCMK__XE_GENERATION_TUPLE * element from the join client. The "if" guard is for clarity. */ if (pcmk__xe_is(generation, PCMK__XE_GENERATION_TUPLE)) { for (lpc = 0; cmp == 0 && lpc < PCMK__NELEM(attributes); lpc++) { cmp = compare_int_fields(max_generation_xml, generation, attributes[lpc]); } } else { // Should always be PCMK__XE_GENERATION_TUPLE CRM_LOG_ASSERT(false); } } if (ref == NULL) { ref = "none"; // for logging only } if (lookup_failed_sync_node(join_from, &value) == pcmk_rc_ok) { crm_err("Rejecting join-%d request from node %s because we failed to " "sync its CIB in join-%d " QB_XS " ref=%s", join_id, join_from, value, ref); ack_nack_bool = FALSE; } else if (!pcmk__cluster_is_node_active(join_node)) { if (match_down_event(join_from) != NULL) { /* The join request was received after the node was fenced or * otherwise shutdown in a way that we're aware of. No need to log * an error in this rare occurrence; we know the client was recently * shut down, and receiving a lingering in-flight request is not * cause for alarm. */ crm_debug("Rejecting join-%d request from inactive node %s " QB_XS " ref=%s", join_id, join_from, ref); } else { crm_err("Rejecting join-%d request from inactive node %s " QB_XS " ref=%s", join_id, join_from, ref); } ack_nack_bool = FALSE; } else if (generation == NULL) { crm_err("Rejecting invalid join-%d request from node %s " "missing CIB generation " QB_XS " ref=%s", join_id, join_from, ref); ack_nack_bool = FALSE; } else if ((join_version == NULL) || !feature_set_compatible(CRM_FEATURE_SET, join_version)) { crm_err("Rejecting join-%d request from node %s because feature set %s" " is incompatible with ours (%s) " QB_XS " ref=%s", join_id, join_from, (join_version? join_version : "pre-3.1.0"), CRM_FEATURE_SET, ref); ack_nack_bool = FALSE; } else if (max_generation_xml == NULL) { const char *validation = pcmk__xe_get(generation, PCMK_XA_VALIDATE_WITH); if (pcmk__get_schema(validation) == NULL) { crm_err("Rejecting join-%d request from %s (with first CIB " "generation) due to %s schema version %s " QB_XS " ref=%s", join_id, join_from, ((validation == NULL)? "missing" : "unknown"), pcmk__s(validation, ""), ref); ack_nack_bool = FALSE; } else { crm_debug("Accepting join-%d request from %s (with first CIB " "generation) " QB_XS " ref=%s", join_id, join_from, ref); max_generation_xml = pcmk__xml_copy(NULL, generation); pcmk__str_update(&max_generation_from, join_from); } } else if ((cmp < 0) || ((cmp == 0) && controld_is_local_node(join_from))) { const char *validation = pcmk__xe_get(generation, PCMK_XA_VALIDATE_WITH); if (pcmk__get_schema(validation) == NULL) { crm_err("Rejecting join-%d request from %s (with better CIB " "generation than current best from %s) due to %s " "schema version %s " QB_XS " ref=%s", join_id, join_from, max_generation_from, ((validation == NULL)? "missing" : "unknown"), pcmk__s(validation, ""), ref); ack_nack_bool = FALSE; } else { crm_debug("Accepting join-%d request from %s (with better CIB " "generation than current best from %s) " QB_XS " ref=%s", join_id, join_from, max_generation_from, ref); crm_log_xml_debug(max_generation_xml, "Old max generation"); crm_log_xml_debug(generation, "New max generation"); pcmk__xml_free(max_generation_xml); max_generation_xml = pcmk__xml_copy(NULL, join_ack->xml); pcmk__str_update(&max_generation_from, join_from); } } else { crm_debug("Accepting join-%d request from %s " QB_XS " ref=%s", join_id, join_from, ref); } if (!ack_nack_bool) { crm_update_peer_join(__func__, join_node, controld_join_nack); pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_NACK); } else { crm_update_peer_join(__func__, join_node, controld_join_integrated); pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_MEMBER); } count = crmd_join_phase_count(controld_join_integrated); crm_debug("%d node%s currently integrated in join-%d", count, pcmk__plural_s(count), join_id); if (check_join_state(cur_state, __func__) == FALSE) { // Don't waste time by invoking the scheduler yet count = crmd_join_phase_count(controld_join_welcomed); crm_debug("Waiting on join-%d requests from %d outstanding node%s", join_id, count, pcmk__plural_s(count)); } } /* A_DC_JOIN_FINALIZE */ void do_dc_join_finalize(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { char *sync_from = NULL; int rc = pcmk_ok; int count_welcomed = crmd_join_phase_count(controld_join_welcomed); int count_finalizable = crmd_join_phase_count(controld_join_integrated) + crmd_join_phase_count(controld_join_nack); /* This we can do straight away and avoid clients timing us out * while we compute the latest CIB */ if (count_welcomed != 0) { crm_debug("Waiting on join-%d requests from %d outstanding node%s " "before finalizing join", current_join_id, count_welcomed, pcmk__plural_s(count_welcomed)); crmd_join_phase_log(LOG_DEBUG); /* crmd_fsa_stall(FALSE); Needed? */ return; } else if (count_finalizable == 0) { crm_debug("Finalization not needed for join-%d at the current time", current_join_id); crmd_join_phase_log(LOG_DEBUG); check_join_state(controld_globals.fsa_state, __func__); return; } controld_clear_fsa_input_flags(R_HAVE_CIB); if ((max_generation_from == NULL) || controld_is_local_node(max_generation_from)) { controld_set_fsa_input_flags(R_HAVE_CIB); } if (!controld_globals.transition_graph->complete) { crm_warn("Delaying join-%d finalization while transition in progress", current_join_id); crmd_join_phase_log(LOG_DEBUG); crmd_fsa_stall(FALSE); return; } if (pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) { // Send our CIB out to everyone sync_from = pcmk__str_copy(controld_globals.cluster->priv->node_name); } else { // Ask for the agreed best CIB sync_from = pcmk__str_copy(max_generation_from); } crm_notice("Finalizing join-%d for %d node%s (sync'ing CIB %s.%s.%s " "with schema %s and feature set %s from %s)", current_join_id, count_finalizable, pcmk__plural_s(count_finalizable), pcmk__xe_get(max_generation_xml, PCMK_XA_ADMIN_EPOCH), pcmk__xe_get(max_generation_xml, PCMK_XA_EPOCH), pcmk__xe_get(max_generation_xml, PCMK_XA_NUM_UPDATES), pcmk__xe_get(max_generation_xml, PCMK_XA_VALIDATE_WITH), pcmk__xe_get(max_generation_xml, PCMK_XA_CRM_FEATURE_SET), sync_from); crmd_join_phase_log(LOG_DEBUG); rc = controld_globals.cib_conn->cmds->sync_from(controld_globals.cib_conn, sync_from, NULL, cib_none); fsa_register_cib_callback(rc, sync_from, finalize_sync_callback); } void free_max_generation(void) { free(max_generation_from); max_generation_from = NULL; pcmk__xml_free(max_generation_xml); max_generation_xml = NULL; } void finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { CRM_LOG_ASSERT(-EPERM != rc); if (rc != pcmk_ok) { const char *sync_from = (const char *) user_data; do_crm_log(((rc == -pcmk_err_old_data)? LOG_WARNING : LOG_ERR), "Could not sync CIB from %s in join-%d: %s", sync_from, current_join_id, pcmk_strerror(rc)); if (rc != -pcmk_err_old_data) { record_failed_sync_node(sync_from, current_join_id); } /* restart the whole join process */ register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION_DC, NULL, NULL, __func__); } else if (!AM_I_DC) { crm_debug("Sync'ed CIB for join-%d but no longer DC", current_join_id); } else if (controld_globals.fsa_state != S_FINALIZE_JOIN) { crm_debug("Sync'ed CIB for join-%d but no longer in S_FINALIZE_JOIN " "(%s)", current_join_id, fsa_state2string(controld_globals.fsa_state)); } else { controld_set_fsa_input_flags(R_HAVE_CIB); /* make sure dc_uuid is re-set to us */ if (!check_join_state(controld_globals.fsa_state, __func__)) { int count_finalizable = 0; count_finalizable = crmd_join_phase_count(controld_join_integrated) + crmd_join_phase_count(controld_join_nack); crm_debug("Notifying %d node%s of join-%d results", count_finalizable, pcmk__plural_s(count_finalizable), current_join_id); g_hash_table_foreach(pcmk__peer_cache, finalize_join_for, NULL); } } } static void join_node_state_commit_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { const char *node = user_data; if (rc != pcmk_ok) { fsa_data_t *msg_data = NULL; // for register_fsa_error() macro crm_crit("join-%d node history update (via CIB call %d) for node %s " "failed: %s", current_join_id, call_id, node, pcmk_strerror(rc)); crm_log_xml_debug(msg, "failed"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } crm_debug("join-%d node history update (via CIB call %d) for node %s " "complete", current_join_id, call_id, node); check_join_state(controld_globals.fsa_state, __func__); } /* A_DC_JOIN_PROCESS_ACK */ void do_dc_join_ack(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { int join_id = -1; ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg); const char *op = pcmk__xe_get(join_ack->msg, PCMK__XA_CRM_TASK); char *join_from = pcmk__xe_get_copy(join_ack->msg, PCMK__XA_SRC); pcmk__node_status_t *peer = NULL; enum controld_join_phase phase = controld_join_none; enum controld_section_e section = controld_section_lrm; char *xpath = NULL; xmlNode *state = join_ack->xml; xmlNode *execd_state = NULL; cib_t *cib = controld_globals.cib_conn; int rc = pcmk_ok; // Sanity checks if (join_from == NULL) { crm_warn("Ignoring message received without node identification"); goto done; } if (op == NULL) { crm_warn("Ignoring message received from %s without task", join_from); goto done; } if (strcmp(op, CRM_OP_JOIN_CONFIRM)) { crm_debug("Ignoring '%s' message from %s while waiting for '%s'", op, join_from, CRM_OP_JOIN_CONFIRM); goto done; } if (pcmk__xe_get_int(join_ack->msg, PCMK__XA_JOIN_ID, &join_id) != pcmk_rc_ok) { crm_warn("Ignoring join confirmation from %s without valid join ID", join_from); goto done; } peer = pcmk__get_node(0, join_from, NULL, pcmk__node_search_cluster_member); phase = controld_get_join_phase(peer); if (phase != controld_join_finalized) { crm_info("Ignoring out-of-sequence join-%d confirmation from %s " "(currently %s not %s)", join_id, join_from, join_phase_text(phase), join_phase_text(controld_join_finalized)); goto done; } if (join_id != current_join_id) { crm_err("Rejecting join-%d confirmation from %s " "because currently on join-%d", join_id, join_from, current_join_id); crm_update_peer_join(__func__, peer, controld_join_nack); goto done; } crm_update_peer_join(__func__, peer, controld_join_confirmed); /* Update CIB with node's current executor state. A new transition will be * triggered later, when the CIB manager notifies us of the change. * * The delete and modify requests are part of an atomic transaction. */ rc = cib->cmds->init_transaction(cib); if (rc != pcmk_ok) { goto done; } // Delete relevant parts of node's current executor state from CIB if (pcmk_is_set(controld_globals.flags, controld_shutdown_lock_enabled)) { section = controld_section_lrm_unlocked; } controld_node_state_deletion_strings(join_from, section, &xpath, NULL); rc = cib->cmds->remove(cib, xpath, NULL, cib_xpath|cib_multiple|cib_transaction); if (rc != pcmk_ok) { goto done; } // Update CIB with node's latest known executor state if (controld_is_local_node(join_from)) { // Use the latest possible state if processing our own join ack execd_state = controld_query_executor_state(); if (execd_state != NULL) { crm_debug("Updating local node history for join-%d from query " "result", current_join_id); state = execd_state; } else { crm_warn("Updating local node history from join-%d confirmation " "because query failed", current_join_id); } } else { crm_debug("Updating node history for %s from join-%d confirmation", join_from, current_join_id); } rc = cib->cmds->modify(cib, PCMK_XE_STATUS, state, cib_can_create|cib_transaction); pcmk__xml_free(execd_state); if (rc != pcmk_ok) { goto done; } // Commit the transaction rc = cib->cmds->end_transaction(cib, true, cib_none); fsa_register_cib_callback(rc, join_from, join_node_state_commit_callback); if (rc > 0) { // join_from will be freed after callback join_from = NULL; rc = pcmk_ok; } done: if (rc != pcmk_ok) { crm_crit("join-%d node history update for node %s failed: %s", current_join_id, join_from, pcmk_strerror(rc)); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } free(join_from); free(xpath); } void finalize_join_for(gpointer key, gpointer value, gpointer user_data) { xmlNode *acknak = NULL; xmlNode *tmp1 = NULL; pcmk__node_status_t *join_node = value; const char *join_to = join_node->name; enum controld_join_phase phase = controld_get_join_phase(join_node); bool integrated = false; switch (phase) { case controld_join_integrated: integrated = true; break; case controld_join_nack: break; default: crm_trace("Not updating non-integrated and non-nacked node %s (%s) " "for join-%d", join_to, join_phase_text(phase), current_join_id); return; } /* Update the element with the node's name and UUID, in case they * weren't known before */ crm_trace("Updating node name and UUID in CIB for %s", join_to); tmp1 = pcmk__xe_create(NULL, PCMK_XE_NODE); crm_xml_add(tmp1, PCMK_XA_ID, pcmk__cluster_get_xml_id(join_node)); crm_xml_add(tmp1, PCMK_XA_UNAME, join_to); fsa_cib_anon_update(PCMK_XE_NODES, tmp1); pcmk__xml_free(tmp1); join_node = pcmk__get_node(0, join_to, NULL, pcmk__node_search_cluster_member); if (!pcmk__cluster_is_node_active(join_node)) { /* * NACK'ing nodes that the membership layer doesn't know about yet * simply creates more churn * * Better to leave them waiting and let the join restart when * the new membership event comes in * * All other NACKs (due to versions etc) should still be processed */ pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_PENDING); return; } // Acknowledge or nack node's join request crm_debug("%sing join-%d request from %s", integrated? "Acknowledg" : "Nack", current_join_id, join_to); acknak = create_dc_message(CRM_OP_JOIN_ACKNAK, join_to); pcmk__xe_set_bool_attr(acknak, CRM_OP_JOIN_ACKNAK, integrated); if (integrated) { // No change needed for a nacked node crm_update_peer_join(__func__, join_node, controld_join_finalized); pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_MEMBER); /* Iterate through the remote peer cache and add information on which * node hosts each to the ACK message. This keeps new controllers in * sync with what has already happened. */ if (pcmk__cluster_num_remote_nodes() > 0) { GHashTableIter iter; pcmk__node_status_t *node = NULL; xmlNode *remotes = pcmk__xe_create(acknak, PCMK_XE_NODES); g_hash_table_iter_init(&iter, pcmk__remote_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { xmlNode *remote = NULL; if (!node->conn_host) { continue; } remote = pcmk__xe_create(remotes, PCMK_XE_NODE); pcmk__xe_set_props(remote, PCMK_XA_ID, node->name, PCMK__XA_NODE_STATE, node->state, PCMK__XA_CONNECTION_HOST, node->conn_host, NULL); } } } pcmk__cluster_send_message(join_node, pcmk_ipc_controld, acknak); pcmk__xml_free(acknak); return; } gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source) { static unsigned long long highest_seq = 0; if (controld_globals.membership_id != controld_globals.peer_seq) { crm_debug("join-%d: Membership changed from %llu to %llu " QB_XS " highest=%llu state=%s for=%s", current_join_id, controld_globals.membership_id, controld_globals.peer_seq, highest_seq, fsa_state2string(cur_state), source); if (highest_seq < controld_globals.peer_seq) { /* Don't spam the FSA with duplicates */ highest_seq = controld_globals.peer_seq; register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL); } } else if (cur_state == S_INTEGRATION) { if (crmd_join_phase_count(controld_join_welcomed) == 0) { int count = crmd_join_phase_count(controld_join_integrated); crm_debug("join-%d: Integration of %d peer%s complete " QB_XS " state=%s for=%s", current_join_id, count, pcmk__plural_s(count), fsa_state2string(cur_state), source); register_fsa_input_before(C_FSA_INTERNAL, I_INTEGRATED, NULL); return TRUE; } } else if (cur_state == S_FINALIZE_JOIN) { if (!pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) { crm_debug("join-%d: Delaying finalization until we have CIB " QB_XS " state=%s for=%s", current_join_id, fsa_state2string(cur_state), source); return TRUE; } else if (crmd_join_phase_count(controld_join_welcomed) != 0) { int count = crmd_join_phase_count(controld_join_welcomed); crm_debug("join-%d: Still waiting on %d welcomed node%s " QB_XS " state=%s for=%s", current_join_id, count, pcmk__plural_s(count), fsa_state2string(cur_state), source); crmd_join_phase_log(LOG_DEBUG); } else if (crmd_join_phase_count(controld_join_integrated) != 0) { int count = crmd_join_phase_count(controld_join_integrated); crm_debug("join-%d: Still waiting on %d integrated node%s " QB_XS " state=%s for=%s", current_join_id, count, pcmk__plural_s(count), fsa_state2string(cur_state), source); crmd_join_phase_log(LOG_DEBUG); } else if (crmd_join_phase_count(controld_join_finalized) != 0) { int count = crmd_join_phase_count(controld_join_finalized); crm_debug("join-%d: Still waiting on %d finalized node%s " QB_XS " state=%s for=%s", current_join_id, count, pcmk__plural_s(count), fsa_state2string(cur_state), source); crmd_join_phase_log(LOG_DEBUG); } else { crm_debug("join-%d: Complete " QB_XS " state=%s for=%s", current_join_id, fsa_state2string(cur_state), source); register_fsa_input_later(C_FSA_INTERNAL, I_FINALIZED, NULL); return TRUE; } } return FALSE; } void do_dc_join_final(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { crm_debug("Ensuring DC, quorum and node attributes are up-to-date"); crm_update_quorum(pcmk__cluster_has_quorum(), TRUE); } int crmd_join_phase_count(enum controld_join_phase phase) { int count = 0; pcmk__node_status_t *peer; GHashTableIter iter; g_hash_table_iter_init(&iter, pcmk__peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) { if (controld_get_join_phase(peer) == phase) { count++; } } return count; } void crmd_join_phase_log(int level) { pcmk__node_status_t *peer; GHashTableIter iter; g_hash_table_iter_init(&iter, pcmk__peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) { do_crm_log(level, "join-%d: %s=%s", current_join_id, peer->name, join_phase_text(controld_get_join_phase(peer))); } } diff --git a/daemons/controld/controld_membership.c b/daemons/controld/controld_membership.c index b17f781660..4faf849731 100644 --- a/daemons/controld/controld_membership.c +++ b/daemons/controld/controld_membership.c @@ -1,468 +1,468 @@ /* * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ /* put these first so that uuid_t is defined without conflicts */ #include #include #include #include #include #include #include void post_cache_update(int instance); extern gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source); static void reap_dead_nodes(gpointer key, gpointer value, gpointer user_data) { pcmk__node_status_t *node = value; if (pcmk__cluster_is_node_active(node)) { return; } crm_update_peer_join(__func__, node, controld_join_none); if ((node != NULL) && (node->name != NULL)) { if (controld_is_local_node(node->name)) { crm_err("We're not part of the cluster anymore"); register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL); } else if (!AM_I_DC && pcmk__str_eq(node->name, controld_globals.dc_name, pcmk__str_casei)) { crm_warn("Our DC node (%s) left the cluster", node->name); register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL); } } if ((controld_globals.fsa_state == S_INTEGRATION) || (controld_globals.fsa_state == S_FINALIZE_JOIN)) { check_join_state(controld_globals.fsa_state, __func__); } if ((node != NULL) && (node->xml_id != NULL)) { fail_incompletable_actions(controld_globals.transition_graph, node->xml_id); } } void post_cache_update(int instance) { xmlNode *no_op = NULL; controld_globals.peer_seq = instance; crm_debug("Updated cache after membership event %d.", instance); g_hash_table_foreach(pcmk__peer_cache, reap_dead_nodes, NULL); controld_set_fsa_input_flags(R_MEMBERSHIP); if (AM_I_DC) { populate_cib_nodes(node_update_quick | node_update_cluster | node_update_peer | node_update_expected, __func__); } /* * If we lost nodes, we should re-check the election status * Safe to call outside of an election */ controld_set_fsa_action_flags(A_ELECTION_CHECK); controld_trigger_fsa(); /* Membership changed, remind everyone we're here. * This will aid detection of duplicate DCs */ no_op = pcmk__new_request(pcmk_ipc_controld, (AM_I_DC? CRM_SYSTEM_DC : CRM_SYSTEM_CRMD), NULL, CRM_SYSTEM_CRMD, CRM_OP_NOOP, NULL); pcmk__cluster_send_message(NULL, pcmk_ipc_controld, no_op); pcmk__xml_free(no_op); } static void crmd_node_update_complete(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { fsa_data_t *msg_data = NULL; if (rc == pcmk_ok) { crm_trace("Node update %d complete", call_id); } else if(call_id < pcmk_ok) { crm_err("Node update failed: %s (%d)", pcmk_strerror(call_id), call_id); crm_log_xml_debug(msg, "failed"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } else { crm_err("Node update %d failed: %s (%d)", call_id, pcmk_strerror(rc), rc); crm_log_xml_debug(msg, "failed"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } /*! * \internal * \brief Create an XML node state tag with updates * * \param[in,out] node Node whose state will be used for update * \param[in] flags Bitmask of node_update_flags indicating what to update * \param[in,out] parent XML node to contain update (or NULL) * \param[in] source Who requested the update (only used for logging) * * \return Pointer to created node state tag */ xmlNode * create_node_state_update(pcmk__node_status_t *node, int flags, xmlNode *parent, const char *source) { // @TODO Ensure all callers handle NULL returns const char *value = NULL; xmlNode *node_state; if (!node->state) { crm_info("Node update for %s cancelled: no state, not seen yet", node->name); return NULL; } node_state = pcmk__xe_create(parent, PCMK__XE_NODE_STATE); if (pcmk_is_set(node->flags, pcmk__node_status_remote)) { pcmk__xe_set_bool_attr(node_state, PCMK_XA_REMOTE_NODE, true); } if (crm_xml_add(node_state, PCMK_XA_ID, pcmk__cluster_get_xml_id(node)) == NULL) { crm_info("Node update for %s cancelled: no ID", node->name); pcmk__xml_free(node_state); return NULL; } crm_xml_add(node_state, PCMK_XA_UNAME, node->name); if ((flags & node_update_cluster) && node->state) { if (compare_version(controld_globals.dc_version, "3.18.0") >= 0) { // A value 0 means the node is not a cluster member. pcmk__xe_set_ll(node_state, PCMK__XA_IN_CCM, node->when_member); } else { pcmk__xe_set_bool_attr(node_state, PCMK__XA_IN_CCM, pcmk__str_eq(node->state, PCMK_VALUE_MEMBER, pcmk__str_none)); } } if (!pcmk_is_set(node->flags, pcmk__node_status_remote)) { if (flags & node_update_peer) { if (compare_version(controld_globals.dc_version, "3.18.0") >= 0) { // A value 0 means the peer is offline in CPG. pcmk__xe_set_ll(node_state, PCMK_XA_CRMD, node->when_online); } else { // @COMPAT DCs < 2.1.7 use online/offline rather than timestamp value = PCMK_VALUE_OFFLINE; if (pcmk_is_set(node->processes, crm_get_cluster_proc())) { value = PCMK_VALUE_ONLINE; } crm_xml_add(node_state, PCMK_XA_CRMD, value); } } if (flags & node_update_join) { if (controld_get_join_phase(node) <= controld_join_none) { value = CRMD_JOINSTATE_DOWN; } else { value = CRMD_JOINSTATE_MEMBER; } crm_xml_add(node_state, PCMK__XA_JOIN, value); } if (flags & node_update_expected) { crm_xml_add(node_state, PCMK_XA_EXPECTED, node->expected); } } crm_xml_add(node_state, PCMK_XA_CRM_DEBUG_ORIGIN, source); return node_state; } static void remove_conflicting_node_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { char *node_uuid = user_data; do_crm_log_unlikely(rc == 0 ? LOG_DEBUG : LOG_NOTICE, "Deletion of the unknown conflicting node \"%s\": %s (rc=%d)", node_uuid, pcmk_strerror(rc), rc); } static void search_conflicting_node_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { char *new_node_uuid = user_data; xmlNode *node_xml = NULL; if (rc != pcmk_ok) { if (rc != -ENXIO) { crm_notice("Searching conflicting nodes for %s failed: %s (%d)", new_node_uuid, pcmk_strerror(rc), rc); } return; } else if (output == NULL) { return; } if (pcmk__xe_is(output, PCMK_XE_NODE)) { node_xml = output; } else { node_xml = pcmk__xe_first_child(output, PCMK_XE_NODE, NULL, NULL); } for (; node_xml != NULL; node_xml = pcmk__xe_next(node_xml, PCMK_XE_NODE)) { const char *node_uuid = NULL; const char *node_uname = NULL; GHashTableIter iter; pcmk__node_status_t *node = NULL; gboolean known = FALSE; node_uuid = pcmk__xe_get(node_xml, PCMK_XA_ID); node_uname = pcmk__xe_get(node_xml, PCMK_XA_UNAME); if (node_uuid == NULL || node_uname == NULL) { continue; } g_hash_table_iter_init(&iter, pcmk__peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { if ((node != NULL) && pcmk__str_eq(node->xml_id, node_uuid, pcmk__str_casei) && pcmk__str_eq(node->name, node_uname, pcmk__str_casei)) { known = TRUE; break; } } if (known == FALSE) { cib_t *cib_conn = controld_globals.cib_conn; int delete_call_id = 0; xmlNode *node_state_xml = NULL; crm_notice("Deleting unknown node %s/%s which has conflicting uname with %s", node_uuid, node_uname, new_node_uuid); delete_call_id = cib_conn->cmds->remove(cib_conn, PCMK_XE_NODES, node_xml, cib_none); fsa_register_cib_callback(delete_call_id, pcmk__str_copy(node_uuid), remove_conflicting_node_callback); node_state_xml = pcmk__xe_create(NULL, PCMK__XE_NODE_STATE); crm_xml_add(node_state_xml, PCMK_XA_ID, node_uuid); crm_xml_add(node_state_xml, PCMK_XA_UNAME, node_uname); delete_call_id = cib_conn->cmds->remove(cib_conn, PCMK_XE_STATUS, node_state_xml, cib_none); fsa_register_cib_callback(delete_call_id, pcmk__str_copy(node_uuid), remove_conflicting_node_callback); pcmk__xml_free(node_state_xml); } } } static void node_list_update_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { fsa_data_t *msg_data = NULL; if(call_id < pcmk_ok) { crm_err("Node list update failed: %s (%d)", pcmk_strerror(call_id), call_id); crm_log_xml_debug(msg, "update:failed"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } else if(rc < pcmk_ok) { crm_err("Node update %d failed: %s (%d)", call_id, pcmk_strerror(rc), rc); crm_log_xml_debug(msg, "update:failed"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } void populate_cib_nodes(enum node_update_flags flags, const char *source) { cib_t *cib_conn = controld_globals.cib_conn; int call_id = 0; gboolean from_hashtable = TRUE; xmlNode *node_list = pcmk__xe_create(NULL, PCMK_XE_NODES); #if SUPPORT_COROSYNC if (!pcmk_is_set(flags, node_update_quick) && (pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync)) { from_hashtable = pcmk__corosync_add_nodes(node_list); } #endif if (from_hashtable) { GHashTableIter iter; pcmk__node_status_t *node = NULL; GString *xpath = NULL; g_hash_table_iter_init(&iter, pcmk__peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { xmlNode *new_node = NULL; if ((node->xml_id != NULL) && (node->name != NULL)) { crm_trace("Creating node entry for %s/%s", node->name, node->xml_id); if (xpath == NULL) { xpath = g_string_sized_new(512); } else { g_string_truncate(xpath, 0); } /* We need both to be valid */ new_node = pcmk__xe_create(node_list, PCMK_XE_NODE); crm_xml_add(new_node, PCMK_XA_ID, node->xml_id); crm_xml_add(new_node, PCMK_XA_UNAME, node->name); /* Search and remove unknown nodes with the conflicting uname from CIB */ pcmk__g_strcat(xpath, "/" PCMK_XE_CIB "/" PCMK_XE_CONFIGURATION "/" PCMK_XE_NODES "/" PCMK_XE_NODE "[@" PCMK_XA_UNAME "='", node->name, "']" "[@" PCMK_XA_ID "!='", node->xml_id, "']", NULL); call_id = cib_conn->cmds->query(cib_conn, (const char *) xpath->str, NULL, cib_xpath); fsa_register_cib_callback(call_id, pcmk__str_copy(node->xml_id), search_conflicting_node_callback); } } if (xpath != NULL) { g_string_free(xpath, TRUE); } } crm_trace("Populating section from %s", from_hashtable ? "hashtable" : "cluster"); if ((controld_update_cib(PCMK_XE_NODES, node_list, cib_none, node_list_update_callback) == pcmk_rc_ok) && (pcmk__peer_cache != NULL) && AM_I_DC) { /* * There is no need to update the local CIB with our values if * we've not seen valid membership data */ GHashTableIter iter; pcmk__node_status_t *node = NULL; pcmk__xml_free(node_list); node_list = pcmk__xe_create(NULL, PCMK_XE_STATUS); g_hash_table_iter_init(&iter, pcmk__peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { create_node_state_update(node, flags, node_list, source); } if (pcmk__remote_peer_cache != NULL) { g_hash_table_iter_init(&iter, pcmk__remote_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { create_node_state_update(node, flags, node_list, source); } } controld_update_cib(PCMK_XE_STATUS, node_list, cib_none, crmd_node_update_complete); } pcmk__xml_free(node_list); } static void cib_quorum_update_complete(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { fsa_data_t *msg_data = NULL; if (rc == pcmk_ok) { crm_trace("Quorum update %d complete", call_id); } else { crm_err("Quorum update %d failed: %s (%d)", call_id, pcmk_strerror(rc), rc); crm_log_xml_debug(msg, "failed"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } void crm_update_quorum(gboolean quorum, gboolean force_update) { bool has_quorum = pcmk_is_set(controld_globals.flags, controld_has_quorum); if (quorum) { controld_set_global_flags(controld_ever_had_quorum); } else if (pcmk_all_flags_set(controld_globals.flags, controld_ever_had_quorum |controld_no_quorum_panic)) { pcmk__panic("Quorum lost"); } if (AM_I_DC && ((has_quorum && !quorum) || (!has_quorum && quorum) || force_update)) { xmlNode *update = NULL; update = pcmk__xe_create(NULL, PCMK_XE_CIB); - crm_xml_add_int(update, PCMK_XA_HAVE_QUORUM, quorum); + pcmk__xe_set_int(update, PCMK_XA_HAVE_QUORUM, quorum); crm_xml_add(update, PCMK_XA_DC_UUID, controld_globals.our_uuid); crm_debug("Updating quorum status to %s", pcmk__btoa(quorum)); controld_update_cib(PCMK_XE_CIB, update, cib_none, cib_quorum_update_complete); pcmk__xml_free(update); /* Quorum changes usually cause a new transition via other activity: * quorum gained via a node joining will abort via the node join, * and quorum lost via a node leaving will usually abort via resource * activity and/or fencing. * * However, it is possible that nothing else causes a transition (e.g. * someone forces quorum via corosync-cmaptcl, or quorum is lost due to * a node in standby shutting down cleanly), so here ensure a new * transition is triggered. */ if (quorum) { /* If quorum was gained, abort after a short delay, in case multiple * nodes are joining around the same time, so the one that brings us * to quorum doesn't cause all the remaining ones to be fenced. */ abort_after_delay(PCMK_SCORE_INFINITY, pcmk__graph_restart, "Quorum gained", 5000); } else { abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart, "Quorum lost", NULL); } } if (quorum) { controld_set_global_flags(controld_has_quorum); } else { controld_clear_global_flags(controld_has_quorum); } } diff --git a/daemons/controld/controld_schedulerd.c b/daemons/controld/controld_schedulerd.c index 88f7ac1489..b575bf1db3 100644 --- a/daemons/controld/controld_schedulerd.c +++ b/daemons/controld/controld_schedulerd.c @@ -1,518 +1,518 @@ /* * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include /* pid_t, sleep, ssize_t */ #include #include #include #include #include #include #include #include // xmlXPathObject, etc. #include static void handle_disconnect(void); static pcmk_ipc_api_t *schedulerd_api = NULL; /*! * \internal * \brief Close any scheduler connection and free associated memory */ void controld_shutdown_schedulerd_ipc(void) { controld_clear_fsa_input_flags(R_PE_REQUIRED); pcmk_disconnect_ipc(schedulerd_api); handle_disconnect(); pcmk_free_ipc_api(schedulerd_api); schedulerd_api = NULL; } /*! * \internal * \brief Save CIB query result to file, raising FSA error * * \param[in] msg Ignored * \param[in] call_id Call ID of CIB query * \param[in] rc Return code of CIB query * \param[in] output Result of CIB query * \param[in] user_data Unique identifier for filename * * \note This is intended to be called after a scheduler connection fails. */ static void save_cib_contents(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { const char *id = user_data; register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__); CRM_CHECK(id != NULL, return); if (rc == pcmk_ok) { char *filename = crm_strdup_printf(PCMK_SCHEDULER_INPUT_DIR "/pe-core-%s.bz2", id); if (pcmk__xml_write_file(output, filename, true) != pcmk_rc_ok) { crm_err("Could not save Cluster Information Base to %s after scheduler crash", filename); } else { crm_notice("Saved Cluster Information Base to %s after scheduler crash", filename); } free(filename); } } /*! * \internal * \brief Respond to scheduler connection failure */ static void handle_disconnect(void) { // If we aren't connected to the scheduler, we can't expect a reply controld_expect_sched_reply(NULL); if (pcmk_is_set(controld_globals.fsa_input_register, R_PE_REQUIRED)) { int rc = pcmk_ok; char *uuid_str = crm_generate_uuid(); crm_crit("Lost connection to the scheduler " QB_XS " CIB will be saved to " PCMK_SCHEDULER_INPUT_DIR "/pe-core-%s.bz2", uuid_str); /* * The scheduler died... * * Save the current CIB so that we have a chance of * figuring out what killed it. * * Delay raising the I_ERROR until the query below completes or * 5s is up, whichever comes first. * */ rc = controld_globals.cib_conn->cmds->query(controld_globals.cib_conn, NULL, NULL, cib_none); fsa_register_cib_callback(rc, uuid_str, save_cib_contents); } controld_clear_fsa_input_flags(R_PE_CONNECTED); controld_trigger_fsa(); return; } static void handle_reply(pcmk_schedulerd_api_reply_t *reply) { const char *msg_ref = NULL; if (!AM_I_DC) { return; } msg_ref = reply->data.graph.reference; if (msg_ref == NULL) { crm_err("%s - Ignoring calculation with no reference", CRM_OP_PECALC); } else if (pcmk__str_eq(msg_ref, controld_globals.fsa_pe_ref, pcmk__str_none)) { ha_msg_input_t fsa_input; xmlNode *crm_data_node; controld_stop_sched_timer(); /* do_te_invoke (which will eventually process the fsa_input we are constructing * here) requires that fsa_input.xml be non-NULL. That will only happen if * copy_ha_msg_input (which is called by register_fsa_input_adv) sees the * fsa_input.msg that it is expecting. The scheduler's IPC dispatch function * gave us the values we need, we just need to put them into XML. * * The name of the top level element here is irrelevant. Nothing checks it. */ fsa_input.msg = pcmk__xe_create(NULL, "dummy-reply"); crm_xml_add(fsa_input.msg, PCMK_XA_REFERENCE, msg_ref); crm_xml_add(fsa_input.msg, PCMK__XA_CRM_TGRAPH_IN, reply->data.graph.input); crm_data_node = pcmk__xe_create(fsa_input.msg, PCMK__XE_CRM_XML); pcmk__xml_copy(crm_data_node, reply->data.graph.tgraph); register_fsa_input_later(C_IPC_MESSAGE, I_PE_SUCCESS, &fsa_input); pcmk__xml_free(fsa_input.msg); } else { crm_info("%s calculation %s is obsolete", CRM_OP_PECALC, msg_ref); } } static void scheduler_event_callback(pcmk_ipc_api_t *api, enum pcmk_ipc_event event_type, crm_exit_t status, void *event_data, void *user_data) { pcmk_schedulerd_api_reply_t *reply = event_data; switch (event_type) { case pcmk_ipc_event_disconnect: handle_disconnect(); break; case pcmk_ipc_event_reply: handle_reply(reply); break; default: break; } } static bool new_schedulerd_ipc_connection(void) { int rc; controld_set_fsa_input_flags(R_PE_REQUIRED); if (schedulerd_api == NULL) { rc = pcmk_new_ipc_api(&schedulerd_api, pcmk_ipc_schedulerd); if (rc != pcmk_rc_ok) { crm_err("Error connecting to the scheduler: %s", pcmk_rc_str(rc)); return false; } } pcmk_register_ipc_callback(schedulerd_api, scheduler_event_callback, NULL); rc = pcmk__connect_ipc(schedulerd_api, pcmk_ipc_dispatch_main, 3); if (rc != pcmk_rc_ok) { crm_err("Error connecting to %s: %s", pcmk_ipc_name(schedulerd_api, true), pcmk_rc_str(rc)); return false; } controld_set_fsa_input_flags(R_PE_CONNECTED); return true; } static void do_pe_invoke_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data); /* A_PE_START, A_PE_STOP, O_PE_RESTART */ void do_pe_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { if (pcmk_is_set(action, A_PE_STOP)) { controld_clear_fsa_input_flags(R_PE_REQUIRED); pcmk_disconnect_ipc(schedulerd_api); handle_disconnect(); } if (pcmk_is_set(action, A_PE_START) && !pcmk_is_set(controld_globals.fsa_input_register, R_PE_CONNECTED)) { if (cur_state == S_STOPPING) { crm_info("Ignoring request to connect to scheduler while shutting down"); } else if (!new_schedulerd_ipc_connection()) { crm_warn("Could not connect to scheduler"); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } } } static int fsa_pe_query = 0; static mainloop_timer_t *controld_sched_timer = NULL; // @TODO Make this a configurable cluster option if there's demand for it #define SCHED_TIMEOUT_MS (120000) /*! * \internal * \brief Handle a timeout waiting for scheduler reply * * \param[in] user_data Ignored * * \return FALSE (indicating that timer should not be restarted) */ static gboolean controld_sched_timeout(gpointer user_data) { if (AM_I_DC) { /* If this node is the DC but can't communicate with the scheduler, just * exit (and likely get fenced) so this node doesn't interfere with any * further DC elections. * * @TODO We could try something less drastic first, like disconnecting * and reconnecting to the scheduler, but something is likely going * seriously wrong, so perhaps it's better to just fail as quickly as * possible. */ crmd_exit(CRM_EX_FATAL); } return FALSE; } void controld_stop_sched_timer(void) { if ((controld_sched_timer != NULL) && (controld_globals.fsa_pe_ref != NULL)) { crm_trace("Stopping timer for scheduler reply %s", controld_globals.fsa_pe_ref); } mainloop_timer_stop(controld_sched_timer); } /*! * \internal * \brief Set the scheduler request currently being waited on * * \param[in] ref Request to expect reply to (or NULL for none) * * \note This function takes ownership of \p ref. */ void controld_expect_sched_reply(char *ref) { if (ref) { if (controld_sched_timer == NULL) { controld_sched_timer = mainloop_timer_add("scheduler_reply_timer", SCHED_TIMEOUT_MS, FALSE, controld_sched_timeout, NULL); } mainloop_timer_start(controld_sched_timer); } else { controld_stop_sched_timer(); } free(controld_globals.fsa_pe_ref); controld_globals.fsa_pe_ref = ref; } /*! * \internal * \brief Free the scheduler reply timer */ void controld_free_sched_timer(void) { if (controld_sched_timer != NULL) { mainloop_timer_del(controld_sched_timer); controld_sched_timer = NULL; } } /* A_PE_INVOKE */ void do_pe_invoke(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { cib_t *cib_conn = controld_globals.cib_conn; if (AM_I_DC == FALSE) { crm_err("Not invoking scheduler because not DC: %s", fsa_action2string(action)); return; } if (!pcmk_is_set(controld_globals.fsa_input_register, R_PE_CONNECTED)) { if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { crm_err("Cannot shut down gracefully without the scheduler"); register_fsa_input_before(C_FSA_INTERNAL, I_TERMINATE, NULL); } else { crm_info("Waiting for the scheduler to connect"); crmd_fsa_stall(FALSE); controld_set_fsa_action_flags(A_PE_START); controld_trigger_fsa(); } return; } if (cur_state != S_POLICY_ENGINE) { crm_notice("Not invoking scheduler because in state %s", fsa_state2string(cur_state)); return; } if (!pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) { crm_err("Attempted to invoke scheduler without consistent Cluster Information Base!"); /* start the join from scratch */ register_fsa_input_before(C_FSA_INTERNAL, I_ELECTION, NULL); return; } fsa_pe_query = cib_conn->cmds->query(cib_conn, NULL, NULL, cib_none); crm_debug("Query %d: Requesting the current CIB: %s", fsa_pe_query, fsa_state2string(controld_globals.fsa_state)); controld_expect_sched_reply(NULL); fsa_register_cib_callback(fsa_pe_query, NULL, do_pe_invoke_callback); } static void force_local_option(xmlNode *xml, const char *attr_name, const char *attr_value) { int max = 0; int lpc = 0; const char *xpath_base = NULL; char *xpath_string = NULL; xmlXPathObject *xpathObj = NULL; xpath_base = pcmk_cib_xpath_for(PCMK_XE_CRM_CONFIG); if (xpath_base == NULL) { crm_err(PCMK_XE_CRM_CONFIG " CIB element not known (bug?)"); return; } xpath_string = crm_strdup_printf("%s//%s//nvpair[@name='%s']", xpath_base, PCMK_XE_CLUSTER_PROPERTY_SET, attr_name); xpathObj = pcmk__xpath_search(xml->doc, xpath_string); max = pcmk__xpath_num_results(xpathObj); free(xpath_string); for (lpc = 0; lpc < max; lpc++) { xmlNode *match = pcmk__xpath_result(xpathObj, lpc); if (match == NULL) { continue; } crm_trace("Forcing %s/%s = %s", pcmk__xe_id(match), attr_name, attr_value); crm_xml_add(match, PCMK_XA_VALUE, attr_value); } if(max == 0) { xmlNode *configuration = NULL; xmlNode *crm_config = NULL; xmlNode *cluster_property_set = NULL; crm_trace("Creating %s-%s for %s=%s", PCMK_VALUE_CIB_BOOTSTRAP_OPTIONS, attr_name, attr_name, attr_value); configuration = pcmk__xe_first_child(xml, PCMK_XE_CONFIGURATION, NULL, NULL); if (configuration == NULL) { configuration = pcmk__xe_create(xml, PCMK_XE_CONFIGURATION); } crm_config = pcmk__xe_first_child(configuration, PCMK_XE_CRM_CONFIG, NULL, NULL); if (crm_config == NULL) { crm_config = pcmk__xe_create(configuration, PCMK_XE_CRM_CONFIG); } cluster_property_set = pcmk__xe_first_child(crm_config, PCMK_XE_CLUSTER_PROPERTY_SET, NULL, NULL); if (cluster_property_set == NULL) { cluster_property_set = pcmk__xe_create(crm_config, PCMK_XE_CLUSTER_PROPERTY_SET); crm_xml_add(cluster_property_set, PCMK_XA_ID, PCMK_VALUE_CIB_BOOTSTRAP_OPTIONS); } xml = pcmk__xe_create(cluster_property_set, PCMK_XE_NVPAIR); pcmk__xe_set_id(xml, "%s-%s", PCMK_VALUE_CIB_BOOTSTRAP_OPTIONS, attr_name); crm_xml_add(xml, PCMK_XA_NAME, attr_name); crm_xml_add(xml, PCMK_XA_VALUE, attr_value); } xmlXPathFreeObject(xpathObj); } static void do_pe_invoke_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { char *ref = NULL; pid_t watchdog = pcmk__locate_sbd(); if (rc != pcmk_ok) { crm_err("Could not retrieve the Cluster Information Base: %s " QB_XS " rc=%d call=%d", pcmk_strerror(rc), rc, call_id); register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__); return; } else if (call_id != fsa_pe_query) { crm_trace("Skipping superseded CIB query: %d (current=%d)", call_id, fsa_pe_query); return; } else if (!AM_I_DC || !pcmk_is_set(controld_globals.fsa_input_register, R_PE_CONNECTED)) { crm_debug("No need to invoke the scheduler anymore"); return; } else if (controld_globals.fsa_state != S_POLICY_ENGINE) { crm_debug("Discarding scheduler request in state: %s", fsa_state2string(controld_globals.fsa_state)); return; /* this callback counts as 1 */ } else if (num_cib_op_callbacks() > 1) { crm_debug("Re-asking for the CIB: %d other peer updates still pending", (num_cib_op_callbacks() - 1)); sleep(1); controld_set_fsa_action_flags(A_PE_INVOKE); controld_trigger_fsa(); return; } CRM_LOG_ASSERT(output != NULL); /* Refresh the remote node cache and the known node cache when the * scheduler is invoked */ pcmk__refresh_node_caches_from_cib(output); crm_xml_add(output, PCMK_XA_DC_UUID, controld_globals.our_uuid); pcmk__xe_set_bool_attr(output, PCMK_XA_HAVE_QUORUM, pcmk_is_set(controld_globals.flags, controld_has_quorum)); force_local_option(output, PCMK_OPT_HAVE_WATCHDOG, pcmk__btoa(watchdog)); if (pcmk_is_set(controld_globals.flags, controld_ever_had_quorum) && !pcmk__cluster_has_quorum()) { - crm_xml_add_int(output, PCMK_XA_NO_QUORUM_PANIC, 1); + pcmk__xe_set_int(output, PCMK_XA_NO_QUORUM_PANIC, 1); } rc = pcmk_schedulerd_api_graph(schedulerd_api, output, &ref); if (rc != pcmk_rc_ok) { free(ref); crm_err("Could not contact the scheduler: %s " QB_XS " rc=%d", pcmk_rc_str(rc), rc); register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__); } else { pcmk__assert(ref != NULL); controld_expect_sched_reply(ref); crm_debug("Invoking the scheduler: query=%d, ref=%s, seq=%llu, " "quorate=%s", fsa_pe_query, controld_globals.fsa_pe_ref, controld_globals.peer_seq, pcmk__flag_text(controld_globals.flags, controld_has_quorum)); } } diff --git a/daemons/controld/controld_throttle.c b/daemons/controld/controld_throttle.c index 8ef18932f9..e042f4cf8e 100644 --- a/daemons/controld/controld_throttle.c +++ b/daemons/controld/controld_throttle.c @@ -1,407 +1,407 @@ /* * Copyright 2013-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include /* These values don't need to be bits, but these particular values must be kept * for backward compatibility during rolling upgrades. */ enum throttle_state_e { throttle_none = 0x0000, throttle_low = 0x0001, throttle_med = 0x0010, throttle_high = 0x0100, throttle_extreme = 0x1000, }; struct throttle_record_s { int max; enum throttle_state_e mode; char *node; }; static int throttle_job_max = 0; static float throttle_load_target = 0.0; #define THROTTLE_FACTOR_LOW 1.2 #define THROTTLE_FACTOR_MEDIUM 1.6 #define THROTTLE_FACTOR_HIGH 2.0 static GHashTable *throttle_records = NULL; static mainloop_timer_t *throttle_timer = NULL; static const char * load2str(enum throttle_state_e mode) { switch (mode) { case throttle_extreme: return "extreme"; case throttle_high: return "high"; case throttle_med: return "medium"; case throttle_low: return "low"; case throttle_none: return "negligible"; default: return "undetermined"; } } /*! * \internal * \brief Check a load value against throttling thresholds * * \param[in] load Load value to check * \param[in] desc Description of metric (for logging) * \param[in] thresholds Low/medium/high/extreme thresholds * * \return Throttle mode corresponding to load value */ static enum throttle_state_e throttle_check_thresholds(float load, const char *desc, const float thresholds[4]) { if (load > thresholds[3]) { crm_notice("Extreme %s detected: %f", desc, load); return throttle_extreme; } else if (load > thresholds[2]) { crm_notice("High %s detected: %f", desc, load); return throttle_high; } else if (load > thresholds[1]) { crm_info("Moderate %s detected: %f", desc, load); return throttle_med; } else if (load > thresholds[0]) { crm_debug("Noticeable %s detected: %f", desc, load); return throttle_low; } crm_trace("Negligible %s detected: %f", desc, load); return throttle_none; } static enum throttle_state_e throttle_handle_load(float load, const char *desc, int cores) { float normalize; float thresholds[4]; if (cores == 1) { /* On a single core machine, a load of 1.0 is already too high */ normalize = 0.6; } else { /* Normalize the load to be per-core */ normalize = cores; } thresholds[0] = throttle_load_target * normalize * THROTTLE_FACTOR_LOW; thresholds[1] = throttle_load_target * normalize * THROTTLE_FACTOR_MEDIUM; thresholds[2] = throttle_load_target * normalize * THROTTLE_FACTOR_HIGH; thresholds[3] = load + 1.0; /* never extreme */ return throttle_check_thresholds(load, desc, thresholds); } static enum throttle_state_e throttle_mode(void) { enum throttle_state_e mode = throttle_none; unsigned int cores = pcmk__procfs_num_cores(); float load; float thresholds[4]; if (pcmk__throttle_cib_load(PCMK__SERVER_BASED, &load)) { float cib_max_cpu = 0.95; /* The CIB is a single-threaded task and thus cannot consume more * than 100% of a CPU (and 1/cores of the overall system load). * * On a many-cored system, the CIB might therefore be maxed out (causing * operations to fail or appear to fail) even though the overall system * load is still reasonable. * * Therefore, the 'normal' thresholds can not apply here, and we need a * special case. */ if (cores == 1) { cib_max_cpu = 0.4; } if ((throttle_load_target > 0.0) && (throttle_load_target < cib_max_cpu)) { cib_max_cpu = throttle_load_target; } thresholds[0] = cib_max_cpu * 0.8; thresholds[1] = cib_max_cpu * 0.9; thresholds[2] = cib_max_cpu; /* Can only happen on machines with a low number of cores */ thresholds[3] = cib_max_cpu * 1.5; mode = throttle_check_thresholds(load, "CIB load", thresholds); } if (throttle_load_target <= 0) { /* If we ever make this a valid value, the cluster will at least behave * as expected */ return mode; } if (pcmk__throttle_load_avg(&load)) { enum throttle_state_e cpu_load; cpu_load = throttle_handle_load(load, "CPU load", cores); if (cpu_load > mode) { mode = cpu_load; } crm_debug("Current load is %f across %u core(s)", load, cores); } return mode; } static void throttle_send_command(enum throttle_state_e mode) { xmlNode *xml = NULL; static enum throttle_state_e last = -1; if(mode != last) { crm_info("New throttle mode: %s load (was %s)", load2str(mode), load2str(last)); last = mode; xml = pcmk__new_request(pcmk_ipc_controld, CRM_SYSTEM_CRMD, NULL, CRM_SYSTEM_CRMD, CRM_OP_THROTTLE, NULL); - crm_xml_add_int(xml, PCMK__XA_CRM_LIMIT_MODE, mode); - crm_xml_add_int(xml, PCMK__XA_CRM_LIMIT_MAX, throttle_job_max); + pcmk__xe_set_int(xml, PCMK__XA_CRM_LIMIT_MODE, mode); + pcmk__xe_set_int(xml, PCMK__XA_CRM_LIMIT_MAX, throttle_job_max); pcmk__cluster_send_message(NULL, pcmk_ipc_controld, xml); pcmk__xml_free(xml); } } static gboolean throttle_timer_cb(gpointer data) { throttle_send_command(throttle_mode()); return TRUE; } static void throttle_record_free(gpointer p) { struct throttle_record_s *r = p; free(r->node); free(r); } static void throttle_set_load_target(float target) { throttle_load_target = target; } /*! * \internal * \brief Update the maximum number of simultaneous jobs * * \param[in] preference Cluster-wide \c PCMK_OPT_NODE_ACTION_LIMIT from the * CIB */ static void throttle_update_job_max(const char *preference) { long long max = 0LL; // Per-node override const char *env_limit = pcmk__env_option(PCMK__ENV_NODE_ACTION_LIMIT); if (env_limit != NULL) { int rc = pcmk__scan_ll(env_limit, &max, 0LL); if (rc != pcmk_rc_ok) { crm_warn("Ignoring local option PCMK_" PCMK__ENV_NODE_ACTION_LIMIT " because '%s' is not a valid value: %s", env_limit, pcmk_rc_str(rc)); env_limit = NULL; } } if (env_limit == NULL) { // Option validator should prevent invalid values CRM_LOG_ASSERT(pcmk__scan_ll(preference, &max, 0LL) == pcmk_rc_ok); } if (max > 0) { throttle_job_max = (max >= INT_MAX)? INT_MAX : (int) max; } else { // Default is based on the number of cores detected throttle_job_max = 2 * pcmk__procfs_num_cores(); } } void throttle_init(void) { if(throttle_records == NULL) { throttle_records = pcmk__strkey_table(NULL, throttle_record_free); throttle_timer = mainloop_timer_add("throttle", 30 * 1000, TRUE, throttle_timer_cb, NULL); } throttle_update_job_max(NULL); mainloop_timer_start(throttle_timer); } /*! * \internal * \brief Configure throttle options based on the CIB * * \param[in,out] options Name/value pairs for configured options */ void controld_configure_throttle(GHashTable *options) { const char *value = g_hash_table_lookup(options, PCMK_OPT_LOAD_THRESHOLD); if (value != NULL) { throttle_set_load_target(strtof(value, NULL) / 100.0); } value = g_hash_table_lookup(options, PCMK_OPT_NODE_ACTION_LIMIT); throttle_update_job_max(value); } void throttle_fini(void) { if (throttle_timer != NULL) { mainloop_timer_del(throttle_timer); throttle_timer = NULL; } if (throttle_records != NULL) { g_hash_table_destroy(throttle_records); throttle_records = NULL; } } int throttle_get_total_job_limit(int l) { /* Cluster-wide limit */ GHashTableIter iter; int limit = l; int peers = pcmk__cluster_num_active_nodes(); struct throttle_record_s *r = NULL; g_hash_table_iter_init(&iter, throttle_records); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &r)) { switch(r->mode) { case throttle_extreme: if(limit == 0 || limit > peers/4) { limit = QB_MAX(1, peers/4); } break; case throttle_high: if(limit == 0 || limit > peers/2) { limit = QB_MAX(1, peers/2); } break; default: break; } } if(limit == l) { } else if(l == 0) { crm_trace("Using " PCMK_OPT_BATCH_LIMIT "=%d", limit); } else { crm_trace("Using " PCMK_OPT_BATCH_LIMIT "=%d instead of %d", limit, l); } return limit; } int throttle_get_job_limit(const char *node) { int jobs = 1; struct throttle_record_s *r = NULL; r = g_hash_table_lookup(throttle_records, node); if(r == NULL) { r = pcmk__assert_alloc(1, sizeof(struct throttle_record_s)); r->node = pcmk__str_copy(node); r->mode = throttle_low; r->max = throttle_job_max; crm_trace("Defaulting to local values for unknown node %s", node); g_hash_table_insert(throttle_records, r->node, r); } switch(r->mode) { case throttle_extreme: case throttle_high: jobs = 1; /* At least one job must always be allowed */ break; case throttle_med: jobs = QB_MAX(1, r->max / 4); break; case throttle_low: jobs = QB_MAX(1, r->max / 2); break; case throttle_none: jobs = QB_MAX(1, r->max); break; default: crm_err("Unknown throttle mode %.4x on %s", r->mode, node); break; } return jobs; } void throttle_update(xmlNode *xml) { int max = 0; int mode = 0; struct throttle_record_s *r = NULL; const char *from = pcmk__xe_get(xml, PCMK__XA_SRC); pcmk__xe_get_int(xml, PCMK__XA_CRM_LIMIT_MODE, &mode); pcmk__xe_get_int(xml, PCMK__XA_CRM_LIMIT_MAX, &max); r = g_hash_table_lookup(throttle_records, from); if(r == NULL) { r = pcmk__assert_alloc(1, sizeof(struct throttle_record_s)); r->node = pcmk__str_copy(from); g_hash_table_insert(throttle_records, r->node, r); } r->max = max; r->mode = (enum throttle_state_e) mode; crm_debug("Node %s has %s load and supports at most %d jobs; new job limit %d", from, load2str((enum throttle_state_e) mode), max, throttle_get_job_limit(from)); } diff --git a/daemons/execd/execd_commands.c b/daemons/execd/execd_commands.c index 3c738289aa..f5cccbb7f1 100644 --- a/daemons/execd/execd_commands.c +++ b/daemons/execd/execd_commands.c @@ -1,2001 +1,2001 @@ /* * Copyright 2012-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include // xmlNode // Check whether we have a high-resolution monotonic clock #undef PCMK__TIME_USE_CGT #if HAVE_DECL_CLOCK_MONOTONIC && defined(CLOCK_MONOTONIC) # define PCMK__TIME_USE_CGT # include /* clock_gettime */ #endif #include #include #include #include #include #include #include #include #include #include "pacemaker-execd.h" GHashTable *rsc_list = NULL; typedef struct lrmd_cmd_s { int timeout; guint interval_ms; int start_delay; int timeout_orig; int call_id; int call_opts; /* Timer ids, must be removed on cmd destruction. */ int delay_id; int stonith_recurring_id; int rsc_deleted; int service_flags; char *client_id; char *origin; char *rsc_id; char *action; char *real_action; char *userdata_str; pcmk__action_result_t result; /* We can track operation queue time and run time, to be saved with the CIB * resource history (and displayed in cluster status). We need * high-resolution monotonic time for this purpose, so we use * clock_gettime(CLOCK_MONOTONIC, ...) (if available, otherwise this feature * is disabled). * * However, we also need epoch timestamps for recording the time the command * last ran and the time its return value last changed, for use in time * displays (as opposed to interval calculations). We keep time_t values for * this purpose. * * The last run time is used for both purposes, so we keep redundant * monotonic and epoch values for this. Technically the two could represent * different times, but since time_t has only second resolution and the * values are used for distinct purposes, that is not significant. */ #ifdef PCMK__TIME_USE_CGT /* Recurring and systemd operations may involve more than one executor * command per operation, so they need info about the original and the most * recent. */ struct timespec t_first_run; // When op first ran struct timespec t_run; // When op most recently ran struct timespec t_first_queue; // When op was first queued struct timespec t_queue; // When op was most recently queued #endif time_t epoch_last_run; // Epoch timestamp of when op last ran time_t epoch_rcchange; // Epoch timestamp of when rc last changed bool first_notify_sent; int last_notify_rc; int last_notify_op_status; int last_pid; GHashTable *params; } lrmd_cmd_t; static void cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc); static gboolean execute_resource_action(gpointer user_data); static void cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id); #ifdef PCMK__TIME_USE_CGT /*! * \internal * \brief Check whether a struct timespec has been set * * \param[in] timespec Time to check * * \return true if timespec has been set (i.e. is nonzero), false otherwise */ static inline bool time_is_set(const struct timespec *timespec) { return (timespec != NULL) && ((timespec->tv_sec != 0) || (timespec->tv_nsec != 0)); } /* * \internal * \brief Set a timespec (and its original if unset) to the current time * * \param[out] t_current Where to store current time * \param[out] t_orig Where to copy t_current if unset */ static void get_current_time(struct timespec *t_current, struct timespec *t_orig) { clock_gettime(CLOCK_MONOTONIC, t_current); if ((t_orig != NULL) && !time_is_set(t_orig)) { *t_orig = *t_current; } } /*! * \internal * \brief Return difference between two times in milliseconds * * \param[in] now More recent time (or NULL to use current time) * \param[in] old Earlier time * * \return milliseconds difference (or 0 if old is NULL or unset) * * \note Can overflow on 32bit machines when the differences is around * 24 days or more. */ static int time_diff_ms(const struct timespec *now, const struct timespec *old) { int diff_ms = 0; if (time_is_set(old)) { struct timespec local_now = { 0, }; if (now == NULL) { clock_gettime(CLOCK_MONOTONIC, &local_now); now = &local_now; } diff_ms = (now->tv_sec - old->tv_sec) * 1000 + (now->tv_nsec - old->tv_nsec) / 1000000; } return diff_ms; } /*! * \internal * \brief Reset a command's operation times to their original values. * * Reset a command's run and queued timestamps to the timestamps of the original * command, so we report the entire time since then and not just the time since * the most recent command (for recurring and systemd operations). * * \param[in,out] cmd Executor command object to reset * * \note It's not obvious what the queued time should be for a systemd * start/stop operation, which might go like this: * initial command queued 5ms, runs 3s * monitor command queued 10ms, runs 10s * monitor command queued 10ms, runs 10s * Is the queued time for that operation 5ms, 10ms or 25ms? The current * implementation will report 5ms. If it's 25ms, then we need to * subtract 20ms from the total exec time so as not to count it twice. * We can implement that later if it matters to anyone ... */ static void cmd_original_times(lrmd_cmd_t * cmd) { cmd->t_run = cmd->t_first_run; cmd->t_queue = cmd->t_first_queue; } #endif static inline bool action_matches(const lrmd_cmd_t *cmd, const char *action, guint interval_ms) { return (cmd->interval_ms == interval_ms) && pcmk__str_eq(cmd->action, action, pcmk__str_casei); } /*! * \internal * \brief Log the result of an asynchronous command * * \param[in] cmd Command to log result for * \param[in] exec_time_ms Execution time in milliseconds, if known * \param[in] queue_time_ms Queue time in milliseconds, if known */ static void log_finished(const lrmd_cmd_t *cmd, int exec_time_ms, int queue_time_ms) { int log_level = LOG_INFO; GString *str = g_string_sized_new(100); // reasonable starting size if (pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR, pcmk__str_casei)) { log_level = LOG_DEBUG; } g_string_append_printf(str, "%s %s (call %d", cmd->rsc_id, cmd->action, cmd->call_id); if (cmd->last_pid != 0) { g_string_append_printf(str, ", PID %d", cmd->last_pid); } switch (cmd->result.execution_status) { case PCMK_EXEC_DONE: g_string_append_printf(str, ") exited with status %d", cmd->result.exit_status); break; case PCMK_EXEC_CANCELLED: g_string_append_printf(str, ") cancelled"); break; default: pcmk__g_strcat(str, ") could not be executed: ", pcmk_exec_status_str(cmd->result.execution_status), NULL); break; } if (cmd->result.exit_reason != NULL) { pcmk__g_strcat(str, " (", cmd->result.exit_reason, ")", NULL); } #ifdef PCMK__TIME_USE_CGT pcmk__g_strcat(str, " (execution time ", pcmk__readable_interval(exec_time_ms), NULL); if (queue_time_ms > 0) { pcmk__g_strcat(str, " after being queued ", pcmk__readable_interval(queue_time_ms), NULL); } g_string_append_c(str, ')'); #endif do_crm_log(log_level, "%s", str->str); g_string_free(str, TRUE); } static void log_execute(lrmd_cmd_t * cmd) { int log_level = LOG_INFO; if (pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR, pcmk__str_casei)) { log_level = LOG_DEBUG; } do_crm_log(log_level, "executing - rsc:%s action:%s call_id:%d", cmd->rsc_id, cmd->action, cmd->call_id); } static const char * normalize_action_name(lrmd_rsc_t * rsc, const char *action) { if (pcmk__str_eq(action, PCMK_ACTION_MONITOR, pcmk__str_casei) && pcmk_is_set(pcmk_get_ra_caps(rsc->class), pcmk_ra_cap_status)) { return PCMK_ACTION_STATUS; } return action; } static lrmd_rsc_t * build_rsc_from_xml(xmlNode * msg) { xmlNode *rsc_xml = pcmk__xpath_find_one(msg->doc, "//" PCMK__XE_LRMD_RSC, LOG_ERR); lrmd_rsc_t *rsc = NULL; rsc = pcmk__assert_alloc(1, sizeof(lrmd_rsc_t)); pcmk__xe_get_int(msg, PCMK__XA_LRMD_CALLOPT, &rsc->call_opts); rsc->rsc_id = pcmk__xe_get_copy(rsc_xml, PCMK__XA_LRMD_RSC_ID); rsc->class = pcmk__xe_get_copy(rsc_xml, PCMK__XA_LRMD_CLASS); rsc->provider = pcmk__xe_get_copy(rsc_xml, PCMK__XA_LRMD_PROVIDER); rsc->type = pcmk__xe_get_copy(rsc_xml, PCMK__XA_LRMD_TYPE); rsc->work = mainloop_add_trigger(G_PRIORITY_HIGH, execute_resource_action, rsc); // Initialize fence device probes (to return "not running") pcmk__set_result(&rsc->fence_probe_result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, NULL); return rsc; } static lrmd_cmd_t * create_lrmd_cmd(xmlNode *msg, pcmk__client_t *client) { int call_options = 0; xmlNode *rsc_xml = pcmk__xpath_find_one(msg->doc, "//" PCMK__XE_LRMD_RSC, LOG_ERR); lrmd_cmd_t *cmd = NULL; cmd = pcmk__assert_alloc(1, sizeof(lrmd_cmd_t)); pcmk__xe_get_int(msg, PCMK__XA_LRMD_CALLOPT, &call_options); cmd->call_opts = call_options; cmd->client_id = pcmk__str_copy(client->id); pcmk__xe_get_int(msg, PCMK__XA_LRMD_CALLID, &cmd->call_id); pcmk__xe_get_guint(rsc_xml, PCMK__XA_LRMD_RSC_INTERVAL, &cmd->interval_ms); pcmk__xe_get_int(rsc_xml, PCMK__XA_LRMD_TIMEOUT, &cmd->timeout); pcmk__xe_get_int(rsc_xml, PCMK__XA_LRMD_RSC_START_DELAY, &cmd->start_delay); cmd->timeout_orig = cmd->timeout; cmd->origin = pcmk__xe_get_copy(rsc_xml, PCMK__XA_LRMD_ORIGIN); cmd->action = pcmk__xe_get_copy(rsc_xml, PCMK__XA_LRMD_RSC_ACTION); cmd->userdata_str = pcmk__xe_get_copy(rsc_xml, PCMK__XA_LRMD_RSC_USERDATA_STR); cmd->rsc_id = pcmk__xe_get_copy(rsc_xml, PCMK__XA_LRMD_RSC_ID); cmd->params = xml2list(rsc_xml); if (pcmk__str_eq(g_hash_table_lookup(cmd->params, "CRM_meta_on_fail"), PCMK_VALUE_BLOCK, pcmk__str_casei)) { crm_debug("Setting flag to leave pid group on timeout and " "only kill action pid for " PCMK__OP_FMT, cmd->rsc_id, cmd->action, cmd->interval_ms); cmd->service_flags = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, "Action", cmd->action, 0, SVC_ACTION_LEAVE_GROUP, "SVC_ACTION_LEAVE_GROUP"); } return cmd; } static void stop_recurring_timer(lrmd_cmd_t *cmd) { if (cmd) { if (cmd->stonith_recurring_id) { g_source_remove(cmd->stonith_recurring_id); } cmd->stonith_recurring_id = 0; } } static void free_lrmd_cmd(lrmd_cmd_t * cmd) { stop_recurring_timer(cmd); if (cmd->delay_id) { g_source_remove(cmd->delay_id); } if (cmd->params) { g_hash_table_destroy(cmd->params); } pcmk__reset_result(&(cmd->result)); free(cmd->origin); free(cmd->action); free(cmd->real_action); free(cmd->userdata_str); free(cmd->rsc_id); free(cmd->client_id); free(cmd); } static gboolean stonith_recurring_op_helper(gpointer data) { lrmd_cmd_t *cmd = data; lrmd_rsc_t *rsc; cmd->stonith_recurring_id = 0; if (!cmd->rsc_id) { return FALSE; } rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id); pcmk__assert(rsc != NULL); /* take it out of recurring_ops list, and put it in the pending ops * to be executed */ rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd); rsc->pending_ops = g_list_append(rsc->pending_ops, cmd); #ifdef PCMK__TIME_USE_CGT get_current_time(&(cmd->t_queue), &(cmd->t_first_queue)); #endif mainloop_set_trigger(rsc->work); return FALSE; } static inline void start_recurring_timer(lrmd_cmd_t *cmd) { if (!cmd || (cmd->interval_ms <= 0)) { return; } cmd->stonith_recurring_id = pcmk__create_timer(cmd->interval_ms, stonith_recurring_op_helper, cmd); } static gboolean start_delay_helper(gpointer data) { lrmd_cmd_t *cmd = data; lrmd_rsc_t *rsc = NULL; cmd->delay_id = 0; rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL; if (rsc) { mainloop_set_trigger(rsc->work); } return FALSE; } /*! * \internal * \brief Check whether a list already contains the equivalent of a given action * * \param[in] action_list List to search * \param[in] cmd Action to search for */ static lrmd_cmd_t * find_duplicate_action(const GList *action_list, const lrmd_cmd_t *cmd) { for (const GList *item = action_list; item != NULL; item = item->next) { lrmd_cmd_t *dup = item->data; if (action_matches(cmd, dup->action, dup->interval_ms)) { return dup; } } return NULL; } static bool merge_recurring_duplicate(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd) { lrmd_cmd_t * dup = NULL; bool dup_pending = true; if (cmd->interval_ms == 0) { return false; } // Search for a duplicate of this action (in-flight or not) dup = find_duplicate_action(rsc->pending_ops, cmd); if (dup == NULL) { dup_pending = false; dup = find_duplicate_action(rsc->recurring_ops, cmd); if (dup == NULL) { return false; } } /* Do not merge fencing monitors marked for cancellation, so we can reply to * the cancellation separately. */ if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei) && (dup->result.execution_status == PCMK_EXEC_CANCELLED)) { return false; } /* This should not occur. If it does, we need to investigate how something * like this is possible in the controller. */ crm_warn("Duplicate recurring op entry detected (" PCMK__OP_FMT "), merging with previous op entry", rsc->rsc_id, normalize_action_name(rsc, dup->action), dup->interval_ms); // Merge new action's call ID and user data into existing action dup->first_notify_sent = false; free(dup->userdata_str); dup->userdata_str = cmd->userdata_str; cmd->userdata_str = NULL; dup->call_id = cmd->call_id; free_lrmd_cmd(cmd); cmd = NULL; /* If dup is not pending, that means it has already executed at least once * and is waiting in the interval. In that case, stop waiting and initiate * a new instance now. */ if (!dup_pending) { if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { stop_recurring_timer(dup); stonith_recurring_op_helper(dup); } else { services_action_kick(rsc->rsc_id, normalize_action_name(rsc, dup->action), dup->interval_ms); } } return true; } static void schedule_lrmd_cmd(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd) { CRM_CHECK(cmd != NULL, return); CRM_CHECK(rsc != NULL, return); crm_trace("Scheduling %s on %s", cmd->action, rsc->rsc_id); if (merge_recurring_duplicate(rsc, cmd)) { // Equivalent of cmd has already been scheduled return; } /* The controller expects the executor to automatically cancel * recurring operations before a resource stops. */ if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP, pcmk__str_casei)) { cancel_all_recurring(rsc, NULL); } rsc->pending_ops = g_list_append(rsc->pending_ops, cmd); #ifdef PCMK__TIME_USE_CGT get_current_time(&(cmd->t_queue), &(cmd->t_first_queue)); #endif mainloop_set_trigger(rsc->work); if (cmd->start_delay) { cmd->delay_id = pcmk__create_timer(cmd->start_delay, start_delay_helper, cmd); } } static xmlNode * create_lrmd_reply(const char *origin, int rc, int call_id) { xmlNode *reply = pcmk__xe_create(NULL, PCMK__XE_LRMD_REPLY); crm_xml_add(reply, PCMK__XA_LRMD_ORIGIN, origin); - crm_xml_add_int(reply, PCMK__XA_LRMD_RC, rc); - crm_xml_add_int(reply, PCMK__XA_LRMD_CALLID, call_id); + pcmk__xe_set_int(reply, PCMK__XA_LRMD_RC, rc); + pcmk__xe_set_int(reply, PCMK__XA_LRMD_CALLID, call_id); return reply; } static void send_client_notify(gpointer key, gpointer value, gpointer user_data) { xmlNode *update_msg = user_data; pcmk__client_t *client = value; int rc; int log_level = LOG_WARNING; const char *msg = NULL; CRM_CHECK(client != NULL, return); if (client->name == NULL) { crm_trace("Skipping notification to client without name"); return; } if (pcmk_is_set(client->flags, pcmk__client_to_proxy)) { /* We only want to notify clients of the executor IPC API. If we are * running as Pacemaker Remote, we may have clients proxied to other * IPC services in the cluster, so skip those. */ crm_trace("Skipping executor API notification to client %s", pcmk__client_name(client)); return; } rc = lrmd_server_send_notify(client, update_msg); if (rc == pcmk_rc_ok) { return; } switch (rc) { case ENOTCONN: case EPIPE: // Client exited without waiting for notification log_level = LOG_INFO; msg = "Disconnected"; break; default: msg = pcmk_rc_str(rc); break; } do_crm_log(log_level, "Could not notify client %s: %s " QB_XS " rc=%d", pcmk__client_name(client), msg, rc); } static void send_cmd_complete_notify(lrmd_cmd_t * cmd) { xmlNode *notify = NULL; int exec_time = 0; int queue_time = 0; #ifdef PCMK__TIME_USE_CGT exec_time = time_diff_ms(NULL, &(cmd->t_run)); queue_time = time_diff_ms(&cmd->t_run, &(cmd->t_queue)); #endif log_finished(cmd, exec_time, queue_time); /* If the originator requested to be notified only for changes in recurring * operation results, skip the notification if the result hasn't changed. */ if (cmd->first_notify_sent && pcmk_is_set(cmd->call_opts, lrmd_opt_notify_changes_only) && (cmd->last_notify_rc == cmd->result.exit_status) && (cmd->last_notify_op_status == cmd->result.execution_status)) { return; } cmd->first_notify_sent = true; cmd->last_notify_rc = cmd->result.exit_status; cmd->last_notify_op_status = cmd->result.execution_status; notify = pcmk__xe_create(NULL, PCMK__XE_LRMD_NOTIFY); crm_xml_add(notify, PCMK__XA_LRMD_ORIGIN, __func__); - crm_xml_add_int(notify, PCMK__XA_LRMD_TIMEOUT, cmd->timeout); + pcmk__xe_set_int(notify, PCMK__XA_LRMD_TIMEOUT, cmd->timeout); pcmk__xe_set_guint(notify, PCMK__XA_LRMD_RSC_INTERVAL, cmd->interval_ms); - crm_xml_add_int(notify, PCMK__XA_LRMD_RSC_START_DELAY, cmd->start_delay); - crm_xml_add_int(notify, PCMK__XA_LRMD_EXEC_RC, cmd->result.exit_status); - crm_xml_add_int(notify, PCMK__XA_LRMD_EXEC_OP_STATUS, - cmd->result.execution_status); - crm_xml_add_int(notify, PCMK__XA_LRMD_CALLID, cmd->call_id); - crm_xml_add_int(notify, PCMK__XA_LRMD_RSC_DELETED, cmd->rsc_deleted); + pcmk__xe_set_int(notify, PCMK__XA_LRMD_RSC_START_DELAY, cmd->start_delay); + pcmk__xe_set_int(notify, PCMK__XA_LRMD_EXEC_RC, cmd->result.exit_status); + pcmk__xe_set_int(notify, PCMK__XA_LRMD_EXEC_OP_STATUS, + cmd->result.execution_status); + pcmk__xe_set_int(notify, PCMK__XA_LRMD_CALLID, cmd->call_id); + pcmk__xe_set_int(notify, PCMK__XA_LRMD_RSC_DELETED, cmd->rsc_deleted); pcmk__xe_set_time(notify, PCMK__XA_LRMD_RUN_TIME, cmd->epoch_last_run); pcmk__xe_set_time(notify, PCMK__XA_LRMD_RCCHANGE_TIME, cmd->epoch_rcchange); #ifdef PCMK__TIME_USE_CGT - crm_xml_add_int(notify, PCMK__XA_LRMD_EXEC_TIME, exec_time); - crm_xml_add_int(notify, PCMK__XA_LRMD_QUEUE_TIME, queue_time); + pcmk__xe_set_int(notify, PCMK__XA_LRMD_EXEC_TIME, exec_time); + pcmk__xe_set_int(notify, PCMK__XA_LRMD_QUEUE_TIME, queue_time); #endif crm_xml_add(notify, PCMK__XA_LRMD_OP, LRMD_OP_RSC_EXEC); crm_xml_add(notify, PCMK__XA_LRMD_RSC_ID, cmd->rsc_id); if(cmd->real_action) { crm_xml_add(notify, PCMK__XA_LRMD_RSC_ACTION, cmd->real_action); } else { crm_xml_add(notify, PCMK__XA_LRMD_RSC_ACTION, cmd->action); } crm_xml_add(notify, PCMK__XA_LRMD_RSC_USERDATA_STR, cmd->userdata_str); crm_xml_add(notify, PCMK__XA_LRMD_RSC_EXIT_REASON, cmd->result.exit_reason); if (cmd->result.action_stderr != NULL) { crm_xml_add(notify, PCMK__XA_LRMD_RSC_OUTPUT, cmd->result.action_stderr); } else if (cmd->result.action_stdout != NULL) { crm_xml_add(notify, PCMK__XA_LRMD_RSC_OUTPUT, cmd->result.action_stdout); } if (cmd->params) { char *key = NULL; char *value = NULL; GHashTableIter iter; xmlNode *args = pcmk__xe_create(notify, PCMK__XE_ATTRIBUTES); g_hash_table_iter_init(&iter, cmd->params); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { hash2smartfield((gpointer) key, (gpointer) value, args); } } if ((cmd->client_id != NULL) && pcmk_is_set(cmd->call_opts, lrmd_opt_notify_orig_only)) { pcmk__client_t *client = pcmk__find_client_by_id(cmd->client_id); if (client != NULL) { send_client_notify(client->id, client, notify); } } else { pcmk__foreach_ipc_client(send_client_notify, notify); } pcmk__xml_free(notify); } static void send_generic_notify(int rc, xmlNode * request) { if (pcmk__ipc_client_count() != 0) { int call_id = 0; xmlNode *notify = NULL; xmlNode *rsc_xml = pcmk__xpath_find_one(request->doc, "//" PCMK__XE_LRMD_RSC, LOG_ERR); const char *rsc_id = pcmk__xe_get(rsc_xml, PCMK__XA_LRMD_RSC_ID); const char *op = pcmk__xe_get(request, PCMK__XA_LRMD_OP); pcmk__xe_get_int(request, PCMK__XA_LRMD_CALLID, &call_id); notify = pcmk__xe_create(NULL, PCMK__XE_LRMD_NOTIFY); crm_xml_add(notify, PCMK__XA_LRMD_ORIGIN, __func__); - crm_xml_add_int(notify, PCMK__XA_LRMD_RC, rc); - crm_xml_add_int(notify, PCMK__XA_LRMD_CALLID, call_id); + pcmk__xe_set_int(notify, PCMK__XA_LRMD_RC, rc); + pcmk__xe_set_int(notify, PCMK__XA_LRMD_CALLID, call_id); crm_xml_add(notify, PCMK__XA_LRMD_OP, op); crm_xml_add(notify, PCMK__XA_LRMD_RSC_ID, rsc_id); pcmk__foreach_ipc_client(send_client_notify, notify); pcmk__xml_free(notify); } } static void cmd_reset(lrmd_cmd_t * cmd) { cmd->last_pid = 0; #ifdef PCMK__TIME_USE_CGT memset(&cmd->t_run, 0, sizeof(cmd->t_run)); memset(&cmd->t_queue, 0, sizeof(cmd->t_queue)); #endif cmd->epoch_last_run = 0; pcmk__reset_result(&(cmd->result)); cmd->result.execution_status = PCMK_EXEC_DONE; } static void cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc) { crm_trace("Resource operation rsc:%s action:%s completed (%p %p)", cmd->rsc_id, cmd->action, rsc ? rsc->active : NULL, cmd); if (rsc && (rsc->active == cmd)) { rsc->active = NULL; mainloop_set_trigger(rsc->work); } if (!rsc) { cmd->rsc_deleted = 1; } /* reset original timeout so client notification has correct information */ cmd->timeout = cmd->timeout_orig; send_cmd_complete_notify(cmd); if ((cmd->interval_ms != 0) && (cmd->result.execution_status == PCMK_EXEC_CANCELLED)) { if (rsc) { rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd); rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd); } free_lrmd_cmd(cmd); } else if (cmd->interval_ms == 0) { if (rsc) { rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd); } free_lrmd_cmd(cmd); } else { /* Clear all the values pertaining just to the last iteration of a recurring op. */ cmd_reset(cmd); } } struct notify_new_client_data { xmlNode *notify; pcmk__client_t *new_client; }; static void notify_one_client(gpointer key, gpointer value, gpointer user_data) { pcmk__client_t *client = value; struct notify_new_client_data *data = user_data; if (!pcmk__str_eq(client->id, data->new_client->id, pcmk__str_casei)) { send_client_notify(key, (gpointer) client, (gpointer) data->notify); } } void notify_of_new_client(pcmk__client_t *new_client) { struct notify_new_client_data data; data.new_client = new_client; data.notify = pcmk__xe_create(NULL, PCMK__XE_LRMD_NOTIFY); crm_xml_add(data.notify, PCMK__XA_LRMD_ORIGIN, __func__); crm_xml_add(data.notify, PCMK__XA_LRMD_OP, LRMD_OP_NEW_CLIENT); pcmk__foreach_ipc_client(notify_one_client, &data); pcmk__xml_free(data.notify); } void client_disconnect_cleanup(const char *client_id) { GHashTableIter iter; lrmd_rsc_t *rsc = NULL; char *key = NULL; g_hash_table_iter_init(&iter, rsc_list); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & rsc)) { if (pcmk_all_flags_set(rsc->call_opts, lrmd_opt_drop_recurring)) { /* This client is disconnecting, drop any recurring operations * it may have initiated on the resource */ cancel_all_recurring(rsc, client_id); } } } static void action_complete(svc_action_t * action) { lrmd_rsc_t *rsc; lrmd_cmd_t *cmd = action->cb_data; enum ocf_exitcode code; #ifdef PCMK__TIME_USE_CGT const char *rclass = NULL; bool goagain = false; int time_sum = 0; int timeout_left = 0; int delay = 0; #endif if (!cmd) { crm_err("Completed executor action (%s) does not match any known operations", action->id); return; } #ifdef PCMK__TIME_USE_CGT if (cmd->result.exit_status != action->rc) { cmd->epoch_rcchange = time(NULL); } #endif cmd->last_pid = action->pid; // Cast variable instead of function return to keep compilers happy code = services_result2ocf(action->standard, cmd->action, action->rc); pcmk__set_result(&(cmd->result), (int) code, action->status, services__exit_reason(action)); rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL; #ifdef PCMK__TIME_USE_CGT if (rsc != NULL) { rclass = rsc->class; #if PCMK__ENABLE_SERVICE if (pcmk__str_eq(rclass, PCMK_RESOURCE_CLASS_SERVICE, pcmk__str_casei)) { rclass = resources_find_service_class(rsc->type); } #endif } if (!pcmk__str_eq(rclass, PCMK_RESOURCE_CLASS_SYSTEMD, pcmk__str_casei)) { goto finalize; } if (pcmk__result_ok(&(cmd->result)) && pcmk__strcase_any_of(cmd->action, PCMK_ACTION_START, PCMK_ACTION_STOP, NULL)) { /* Getting results for when a start or stop action completes is now * handled by watching for JobRemoved() signals from systemd and * reacting to them. So, we can bypass the rest of the code in this * function for those actions, and simply finalize cmd. * * @TODO When monitors are handled in the same way, this function * can either be drastically simplified or done away with entirely. */ services__copy_result(action, &(cmd->result)); goto finalize; } else if (cmd->result.execution_status == PCMK_EXEC_PENDING && pcmk__str_any_of(cmd->action, PCMK_ACTION_MONITOR, PCMK_ACTION_STATUS, NULL) && cmd->interval_ms == 0 && cmd->real_action == NULL) { /* If the state is Pending at the time of probe, execute follow-up monitor. */ goagain = true; cmd->real_action = cmd->action; cmd->action = pcmk__str_copy(PCMK_ACTION_MONITOR); } else if (cmd->real_action != NULL) { // This is follow-up monitor to check whether start/stop/probe(monitor) completed if (cmd->result.execution_status == PCMK_EXEC_PENDING) { goagain = true; } else if (pcmk__result_ok(&(cmd->result)) && pcmk__str_eq(cmd->real_action, PCMK_ACTION_STOP, pcmk__str_casei)) { goagain = true; } else { int time_sum = time_diff_ms(NULL, &(cmd->t_first_run)); int timeout_left = cmd->timeout_orig - time_sum; crm_debug("%s systemd %s is now complete (elapsed=%dms, " "remaining=%dms): %s (%d)", cmd->rsc_id, cmd->real_action, time_sum, timeout_left, crm_exit_str(cmd->result.exit_status), cmd->result.exit_status); cmd_original_times(cmd); // Monitors may return "not running", but start/stop shouldn't if ((cmd->result.execution_status == PCMK_EXEC_DONE) && (cmd->result.exit_status == PCMK_OCF_NOT_RUNNING)) { if (pcmk__str_eq(cmd->real_action, PCMK_ACTION_START, pcmk__str_casei)) { cmd->result.exit_status = PCMK_OCF_UNKNOWN_ERROR; } else if (pcmk__str_eq(cmd->real_action, PCMK_ACTION_STOP, pcmk__str_casei)) { cmd->result.exit_status = PCMK_OCF_OK; } } } } else if (pcmk__str_any_of(cmd->action, PCMK_ACTION_MONITOR, PCMK_ACTION_STATUS, NULL) && (cmd->interval_ms > 0)) { /* For monitors, excluding follow-up monitors, */ /* if the pending state persists from the first notification until its timeout, */ /* it will be treated as a timeout. */ if ((cmd->result.execution_status == PCMK_EXEC_PENDING) && (cmd->last_notify_op_status == PCMK_EXEC_PENDING)) { int time_left = time(NULL) - (cmd->epoch_rcchange + (cmd->timeout_orig/1000)); if (time_left >= 0) { crm_notice("Giving up on %s %s (rc=%d): monitor pending timeout " "(first pending notification=%s timeout=%ds)", cmd->rsc_id, cmd->action, cmd->result.exit_status, pcmk__trim(ctime(&cmd->epoch_rcchange)), cmd->timeout_orig); pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_TIMEOUT, "Investigate reason for timeout, and adjust " "configured operation timeout if necessary"); cmd_original_times(cmd); } } } if (!goagain) { goto finalize; } time_sum = time_diff_ms(NULL, &(cmd->t_first_run)); timeout_left = cmd->timeout_orig - time_sum; delay = cmd->timeout_orig / 10; if (delay >= timeout_left && timeout_left > 20) { delay = timeout_left/2; } delay = QB_MIN(2000, delay); if (delay < timeout_left) { cmd->start_delay = delay; cmd->timeout = timeout_left; if (pcmk__result_ok(&(cmd->result))) { crm_debug("%s %s may still be in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)", cmd->rsc_id, cmd->real_action, time_sum, timeout_left, delay); } else if (cmd->result.execution_status == PCMK_EXEC_PENDING) { crm_info("%s %s is still in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)", cmd->rsc_id, cmd->action, time_sum, timeout_left, delay); } else { crm_notice("%s %s failed: %s: Re-scheduling (remaining " "timeout %s) " QB_XS " exitstatus=%d elapsed=%dms start_delay=%dms)", cmd->rsc_id, cmd->action, crm_exit_str(cmd->result.exit_status), pcmk__readable_interval(timeout_left), cmd->result.exit_status, time_sum, delay); } cmd_reset(cmd); if (rsc) { rsc->active = NULL; } schedule_lrmd_cmd(rsc, cmd); /* Don't finalize cmd, we're not done with it yet */ return; } else { crm_notice("Giving up on %s %s (rc=%d): timeout (elapsed=%dms, remaining=%dms)", cmd->rsc_id, (cmd->real_action? cmd->real_action : cmd->action), cmd->result.exit_status, time_sum, timeout_left); pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_TIMEOUT, "Investigate reason for timeout, and adjust " "configured operation timeout if necessary"); cmd_original_times(cmd); } #endif finalize: pcmk__set_result_output(&(cmd->result), services__grab_stdout(action), services__grab_stderr(action)); cmd_finalize(cmd, rsc); } /*! * \internal * \brief Process the result of a fence device action (start, stop, or monitor) * * \param[in,out] cmd Fence device action that completed * \param[in] exit_status Fencer API exit status for action * \param[in] execution_status Fencer API execution status for action * \param[in] exit_reason Human-friendly detail, if action failed */ static void stonith_action_complete(lrmd_cmd_t *cmd, int exit_status, enum pcmk_exec_status execution_status, const char *exit_reason) { // This can be NULL if resource was removed before command completed lrmd_rsc_t *rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id); // Simplify fencer exit status to uniform exit status if (exit_status != CRM_EX_OK) { exit_status = PCMK_OCF_UNKNOWN_ERROR; } if (cmd->result.execution_status == PCMK_EXEC_CANCELLED) { /* An in-flight fence action was cancelled. The execution status is * already correct, so don't overwrite it. */ execution_status = PCMK_EXEC_CANCELLED; } else { /* Some execution status codes have specific meanings for the fencer * that executor clients may not expect, so map them to a simple error * status. */ switch (execution_status) { case PCMK_EXEC_NOT_CONNECTED: case PCMK_EXEC_INVALID: execution_status = PCMK_EXEC_ERROR; break; case PCMK_EXEC_NO_FENCE_DEVICE: /* This should be possible only for probes in practice, but * interpret for all actions to be safe. */ if (pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR, pcmk__str_none)) { exit_status = PCMK_OCF_NOT_RUNNING; } else if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP, pcmk__str_none)) { exit_status = PCMK_OCF_OK; } else { exit_status = PCMK_OCF_NOT_INSTALLED; } execution_status = PCMK_EXEC_ERROR; break; case PCMK_EXEC_NOT_SUPPORTED: exit_status = PCMK_OCF_UNIMPLEMENT_FEATURE; break; default: break; } } pcmk__set_result(&cmd->result, exit_status, execution_status, exit_reason); // Certain successful actions change the known state of the resource if ((rsc != NULL) && pcmk__result_ok(&(cmd->result))) { if (pcmk__str_eq(cmd->action, PCMK_ACTION_START, pcmk__str_casei)) { pcmk__set_result(&rsc->fence_probe_result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); // "running" } else if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP, pcmk__str_casei)) { pcmk__set_result(&rsc->fence_probe_result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, NULL); // "not running" } } /* The recurring timer should not be running at this point in any case, but * as a failsafe, stop it if it is. */ stop_recurring_timer(cmd); /* Reschedule this command if appropriate. If a recurring command is *not* * rescheduled, its status must be PCMK_EXEC_CANCELLED, otherwise it will * not be removed from recurring_ops by cmd_finalize(). */ if (rsc && (cmd->interval_ms > 0) && (cmd->result.execution_status != PCMK_EXEC_CANCELLED)) { start_recurring_timer(cmd); } cmd_finalize(cmd, rsc); } static void lrmd_stonith_callback(stonith_t * stonith, stonith_callback_data_t * data) { if ((data == NULL) || (data->userdata == NULL)) { crm_err("Ignoring fence action result: " "Invalid callback arguments (bug?)"); } else { stonith_action_complete((lrmd_cmd_t *) data->userdata, stonith__exit_status(data), stonith__execution_status(data), stonith__exit_reason(data)); } } void stonith_connection_failed(void) { GHashTableIter iter; lrmd_rsc_t *rsc = NULL; crm_warn("Connection to fencer lost (any pending operations for " "fence devices will be considered failed)"); g_hash_table_iter_init(&iter, rsc_list); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &rsc)) { if (!pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_none)) { continue; } /* If we registered this fence device, we don't know whether the * fencer still has the registration or not. Cause future probes to * return an error until the resource is stopped or started * successfully. This is especially important if the controller also * went away (possibly due to a cluster layer restart) and won't * receive our client notification of any monitors finalized below. */ if (rsc->fence_probe_result.execution_status == PCMK_EXEC_DONE) { pcmk__set_result(&rsc->fence_probe_result, CRM_EX_ERROR, PCMK_EXEC_NOT_CONNECTED, "Lost connection to fencer"); } // Consider any active, pending, or recurring operations as failed for (GList *op = rsc->recurring_ops; op != NULL; op = op->next) { lrmd_cmd_t *cmd = op->data; /* This won't free a recurring op but instead restart its timer. * If cmd is rsc->active, this will set rsc->active to NULL, so we * don't have to worry about finalizing it a second time below. */ stonith_action_complete(cmd, CRM_EX_ERROR, PCMK_EXEC_NOT_CONNECTED, "Lost connection to fencer"); } if (rsc->active != NULL) { rsc->pending_ops = g_list_prepend(rsc->pending_ops, rsc->active); } while (rsc->pending_ops != NULL) { // This will free the op and remove it from rsc->pending_ops stonith_action_complete((lrmd_cmd_t *) rsc->pending_ops->data, CRM_EX_ERROR, PCMK_EXEC_NOT_CONNECTED, "Lost connection to fencer"); } } } /*! * \internal * \brief Execute a stonith resource "start" action * * Start a stonith resource by registering it with the fencer. * (Stonith agents don't have a start command.) * * \param[in,out] stonith_api Connection to fencer * \param[in] rsc Stonith resource to start * \param[in] cmd Start command to execute * * \return pcmk_ok on success, -errno otherwise */ static int execd_stonith_start(stonith_t *stonith_api, const lrmd_rsc_t *rsc, const lrmd_cmd_t *cmd) { char *key = NULL; char *value = NULL; stonith_key_value_t *device_params = NULL; int rc = pcmk_ok; // Convert command parameters to stonith API key/values if (cmd->params) { GHashTableIter iter; g_hash_table_iter_init(&iter, cmd->params); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { device_params = stonith_key_value_add(device_params, key, value); } } /* The fencer will automatically register devices via CIB notifications * when the CIB changes, but to avoid a possible race condition between * the fencer receiving the notification and the executor requesting that * resource, the executor registers the device as well. The fencer knows how * to handle duplicate registrations. */ rc = stonith_api->cmds->register_device(stonith_api, st_opt_sync_call, cmd->rsc_id, rsc->provider, rsc->type, device_params); stonith_key_value_freeall(device_params, 1, 1); return rc; } /*! * \internal * \brief Execute a stonith resource "stop" action * * Stop a stonith resource by unregistering it with the fencer. * (Stonith agents don't have a stop command.) * * \param[in,out] stonith_api Connection to fencer * \param[in] rsc Stonith resource to stop * * \return pcmk_ok on success, -errno otherwise */ static inline int execd_stonith_stop(stonith_t *stonith_api, const lrmd_rsc_t *rsc) { /* @TODO Failure would indicate a problem communicating with fencer; * perhaps we should try reconnecting and retrying a few times? */ return stonith_api->cmds->remove_device(stonith_api, st_opt_sync_call, rsc->rsc_id); } /*! * \internal * \brief Initiate a stonith resource agent recurring "monitor" action * * \param[in,out] stonith_api Connection to fencer * \param[in,out] rsc Stonith resource to monitor * \param[in] cmd Monitor command being executed * * \return pcmk_ok if monitor was successfully initiated, -errno otherwise */ static inline int execd_stonith_monitor(stonith_t *stonith_api, lrmd_rsc_t *rsc, lrmd_cmd_t *cmd) { int rc = stonith_api->cmds->monitor(stonith_api, 0, cmd->rsc_id, pcmk__timeout_ms2s(cmd->timeout)); rc = stonith_api->cmds->register_callback(stonith_api, rc, 0, 0, cmd, "lrmd_stonith_callback", lrmd_stonith_callback); if (rc == TRUE) { rsc->active = cmd; rc = pcmk_ok; } else { rc = -pcmk_err_generic; } return rc; } static void execute_stonith_action(lrmd_rsc_t *rsc, lrmd_cmd_t *cmd) { int rc = 0; bool do_monitor = FALSE; stonith_t *stonith_api = get_stonith_connection(); if (pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR, pcmk__str_casei) && (cmd->interval_ms == 0)) { // Probes don't require a fencer connection stonith_action_complete(cmd, rsc->fence_probe_result.exit_status, rsc->fence_probe_result.execution_status, rsc->fence_probe_result.exit_reason); return; } else if (stonith_api == NULL) { stonith_action_complete(cmd, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_NOT_CONNECTED, "No connection to fencer"); return; } else if (pcmk__str_eq(cmd->action, PCMK_ACTION_START, pcmk__str_casei)) { rc = execd_stonith_start(stonith_api, rsc, cmd); if (rc == pcmk_ok) { do_monitor = TRUE; } } else if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP, pcmk__str_casei)) { rc = execd_stonith_stop(stonith_api, rsc); } else if (pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR, pcmk__str_casei)) { do_monitor = TRUE; } else { stonith_action_complete(cmd, PCMK_OCF_UNIMPLEMENT_FEATURE, PCMK_EXEC_ERROR, "Invalid fence device action (bug?)"); return; } if (do_monitor) { rc = execd_stonith_monitor(stonith_api, rsc, cmd); if (rc == pcmk_ok) { // Don't clean up yet, we will find out result of the monitor later return; } } stonith_action_complete(cmd, ((rc == pcmk_ok)? CRM_EX_OK : CRM_EX_ERROR), stonith__legacy2status(rc), ((rc == -pcmk_err_generic)? NULL : pcmk_strerror(rc))); } static void execute_nonstonith_action(lrmd_rsc_t *rsc, lrmd_cmd_t *cmd) { svc_action_t *action = NULL; GHashTable *params_copy = NULL; pcmk__assert((rsc != NULL) && (cmd != NULL)); crm_trace("Creating action, resource:%s action:%s class:%s provider:%s agent:%s", rsc->rsc_id, cmd->action, rsc->class, rsc->provider, rsc->type); params_copy = pcmk__str_table_dup(cmd->params); action = services__create_resource_action(rsc->rsc_id, rsc->class, rsc->provider, rsc->type, normalize_action_name(rsc, cmd->action), cmd->interval_ms, cmd->timeout, params_copy, cmd->service_flags); if (action == NULL) { pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, strerror(ENOMEM)); cmd_finalize(cmd, rsc); return; } if (action->rc != PCMK_OCF_UNKNOWN) { services__copy_result(action, &(cmd->result)); services_action_free(action); cmd_finalize(cmd, rsc); return; } action->cb_data = cmd; if (services_action_async(action, action_complete)) { /* The services library has taken responsibility for the action. It * could be pending, blocked, or merged into a duplicate recurring * action, in which case the action callback (action_complete()) * will be called when the action completes, otherwise the callback has * already been called. * * action_complete() calls cmd_finalize() which can free cmd, so cmd * cannot be used here. */ } else { /* This is a recurring action that is not being cancelled and could not * be initiated. It has been rescheduled, and the action callback * (action_complete()) has been called, which in this case has already * called cmd_finalize(), which in this case should only reset (not * free) cmd. */ services__copy_result(action, &(cmd->result)); services_action_free(action); } } static gboolean execute_resource_action(gpointer user_data) { lrmd_rsc_t *rsc = (lrmd_rsc_t *) user_data; lrmd_cmd_t *cmd = NULL; CRM_CHECK(rsc != NULL, return FALSE); if (rsc->active) { crm_trace("%s is still active", rsc->rsc_id); return TRUE; } if (rsc->pending_ops) { GList *first = rsc->pending_ops; cmd = first->data; if (cmd->delay_id) { crm_trace ("Command %s %s was asked to run too early, waiting for start_delay timeout of %dms", cmd->rsc_id, cmd->action, cmd->start_delay); return TRUE; } rsc->pending_ops = g_list_remove_link(rsc->pending_ops, first); g_list_free_1(first); #ifdef PCMK__TIME_USE_CGT get_current_time(&(cmd->t_run), &(cmd->t_first_run)); #endif cmd->epoch_last_run = time(NULL); } if (!cmd) { crm_trace("Nothing further to do for %s", rsc->rsc_id); return TRUE; } rsc->active = cmd; /* only one op at a time for a rsc */ if (cmd->interval_ms) { rsc->recurring_ops = g_list_append(rsc->recurring_ops, cmd); } log_execute(cmd); if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { execute_stonith_action(rsc, cmd); } else { execute_nonstonith_action(rsc, cmd); } return TRUE; } void free_rsc(gpointer data) { GList *gIter = NULL; lrmd_rsc_t *rsc = data; int is_stonith = pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei); gIter = rsc->pending_ops; while (gIter != NULL) { GList *next = gIter->next; lrmd_cmd_t *cmd = gIter->data; /* command was never executed */ cmd->result.execution_status = PCMK_EXEC_CANCELLED; cmd_finalize(cmd, NULL); gIter = next; } /* frees list, but not list elements. */ g_list_free(rsc->pending_ops); gIter = rsc->recurring_ops; while (gIter != NULL) { GList *next = gIter->next; lrmd_cmd_t *cmd = gIter->data; if (is_stonith) { cmd->result.execution_status = PCMK_EXEC_CANCELLED; /* If a stonith command is in-flight, just mark it as cancelled; * it is not safe to finalize/free the cmd until the stonith api * says it has either completed or timed out. */ if (rsc->active != cmd) { cmd_finalize(cmd, NULL); } } else { /* This command is already handed off to service library, * let service library cancel it and tell us via the callback * when it is cancelled. The rsc can be safely destroyed * even if we are waiting for the cancel result */ services_action_cancel(rsc->rsc_id, normalize_action_name(rsc, cmd->action), cmd->interval_ms); } gIter = next; } /* frees list, but not list elements. */ g_list_free(rsc->recurring_ops); free(rsc->rsc_id); free(rsc->class); free(rsc->provider); free(rsc->type); mainloop_destroy_trigger(rsc->work); free(rsc); } static int process_lrmd_signon(pcmk__client_t *client, xmlNode *request, int call_id, xmlNode **reply) { int rc = pcmk_ok; time_t now = time(NULL); const char *protocol_version = pcmk__xe_get(request, PCMK__XA_LRMD_PROTOCOL_VERSION); const char *start_state = pcmk__env_option(PCMK__ENV_NODE_START_STATE); if (compare_version(protocol_version, LRMD_COMPATIBLE_PROTOCOL) < 0) { crm_err("Cluster API version must be greater than or equal to %s, not %s", LRMD_COMPATIBLE_PROTOCOL, protocol_version); rc = -EPROTO; } if (pcmk__xe_attr_is_true(request, PCMK__XA_LRMD_IS_IPC_PROVIDER)) { #ifdef PCMK__COMPILE_REMOTE if ((client->remote != NULL) && pcmk_is_set(client->flags, pcmk__client_tls_handshake_complete)) { const char *op = pcmk__xe_get(request, PCMK__XA_LRMD_OP); // This is a remote connection from a cluster node's controller ipc_proxy_add_provider(client); /* @TODO Allowing multiple proxies makes no sense given that clients * have no way to choose between them. Maybe always use the most * recent one and switch any existing IPC connections to use it, * by iterating over ipc_clients here, and if client->id doesn't * match the client's userdata, replace the userdata with the new * ID. After the iteration, call lrmd_remote_client_destroy() on any * of the replaced values in ipc_providers. */ /* If this was a register operation, also ask for new schema files but * only if it's supported by the protocol version. */ if (pcmk__str_eq(op, CRM_OP_REGISTER, pcmk__str_none) && LRMD_SUPPORTS_SCHEMA_XFER(protocol_version)) { remoted_request_cib_schema_files(); } } else { rc = -EACCES; } #else rc = -EPROTONOSUPPORT; #endif } *reply = create_lrmd_reply(__func__, rc, call_id); crm_xml_add(*reply, PCMK__XA_LRMD_OP, CRM_OP_REGISTER); crm_xml_add(*reply, PCMK__XA_LRMD_CLIENTID, client->id); crm_xml_add(*reply, PCMK__XA_LRMD_PROTOCOL_VERSION, LRMD_PROTOCOL_VERSION); pcmk__xe_set_time(*reply, PCMK__XA_UPTIME, now - start_time); if (start_state) { crm_xml_add(*reply, PCMK__XA_NODE_START_STATE, start_state); } return rc; } static int process_lrmd_rsc_register(pcmk__client_t *client, uint32_t id, xmlNode *request) { int rc = pcmk_ok; lrmd_rsc_t *rsc = build_rsc_from_xml(request); lrmd_rsc_t *dup = g_hash_table_lookup(rsc_list, rsc->rsc_id); if (dup && pcmk__str_eq(rsc->class, dup->class, pcmk__str_casei) && pcmk__str_eq(rsc->provider, dup->provider, pcmk__str_casei) && pcmk__str_eq(rsc->type, dup->type, pcmk__str_casei)) { crm_notice("Ignoring duplicate registration of '%s'", rsc->rsc_id); free_rsc(rsc); return rc; } g_hash_table_replace(rsc_list, rsc->rsc_id, rsc); crm_info("Cached agent information for '%s'", rsc->rsc_id); return rc; } static xmlNode * process_lrmd_get_rsc_info(xmlNode *request, int call_id) { int rc = pcmk_ok; xmlNode *rsc_xml = pcmk__xpath_find_one(request->doc, "//" PCMK__XE_LRMD_RSC, LOG_ERR); const char *rsc_id = pcmk__xe_get(rsc_xml, PCMK__XA_LRMD_RSC_ID); xmlNode *reply = NULL; lrmd_rsc_t *rsc = NULL; if (rsc_id == NULL) { rc = -ENODEV; } else { rsc = g_hash_table_lookup(rsc_list, rsc_id); if (rsc == NULL) { crm_info("Agent information for '%s' not in cache", rsc_id); rc = -ENODEV; } } reply = create_lrmd_reply(__func__, rc, call_id); if (rsc) { crm_xml_add(reply, PCMK__XA_LRMD_RSC_ID, rsc->rsc_id); crm_xml_add(reply, PCMK__XA_LRMD_CLASS, rsc->class); crm_xml_add(reply, PCMK__XA_LRMD_PROVIDER, rsc->provider); crm_xml_add(reply, PCMK__XA_LRMD_TYPE, rsc->type); } return reply; } static int process_lrmd_rsc_unregister(pcmk__client_t *client, uint32_t id, xmlNode *request) { int rc = pcmk_ok; lrmd_rsc_t *rsc = NULL; xmlNode *rsc_xml = pcmk__xpath_find_one(request->doc, "//" PCMK__XE_LRMD_RSC, LOG_ERR); const char *rsc_id = pcmk__xe_get(rsc_xml, PCMK__XA_LRMD_RSC_ID); if (!rsc_id) { return -ENODEV; } rsc = g_hash_table_lookup(rsc_list, rsc_id); if (rsc == NULL) { crm_info("Ignoring unregistration of resource '%s', which is not registered", rsc_id); return pcmk_ok; } if (rsc->active) { /* let the caller know there are still active ops on this rsc to watch for */ crm_trace("Operation (%p) still in progress for unregistered resource %s", rsc->active, rsc_id); rc = -EINPROGRESS; } g_hash_table_remove(rsc_list, rsc_id); return rc; } static int process_lrmd_rsc_exec(pcmk__client_t *client, uint32_t id, xmlNode *request) { lrmd_rsc_t *rsc = NULL; lrmd_cmd_t *cmd = NULL; xmlNode *rsc_xml = pcmk__xpath_find_one(request->doc, "//" PCMK__XE_LRMD_RSC, LOG_ERR); const char *rsc_id = pcmk__xe_get(rsc_xml, PCMK__XA_LRMD_RSC_ID); int call_id; if (!rsc_id) { return -EINVAL; } if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) { crm_info("Resource '%s' not found (%d active resources)", rsc_id, g_hash_table_size(rsc_list)); return -ENODEV; } cmd = create_lrmd_cmd(request, client); call_id = cmd->call_id; /* Don't reference cmd after handing it off to be scheduled. * The cmd could get merged and freed. */ schedule_lrmd_cmd(rsc, cmd); return call_id; } static int cancel_op(const char *rsc_id, const char *action, guint interval_ms) { GList *gIter = NULL; lrmd_rsc_t *rsc = g_hash_table_lookup(rsc_list, rsc_id); /* How to cancel an action. * 1. Check pending ops list, if it hasn't been handed off * to the service library or stonith recurring list remove * it there and that will stop it. * 2. If it isn't in the pending ops list, then it's either a * recurring op in the stonith recurring list, or the service * library's recurring list. Stop it there * 3. If not found in any lists, then this operation has either * been executed already and is not a recurring operation, or * never existed. */ if (!rsc) { return -ENODEV; } for (gIter = rsc->pending_ops; gIter != NULL; gIter = gIter->next) { lrmd_cmd_t *cmd = gIter->data; if (action_matches(cmd, action, interval_ms)) { cmd->result.execution_status = PCMK_EXEC_CANCELLED; cmd_finalize(cmd, rsc); return pcmk_ok; } } if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { /* The service library does not handle stonith operations. * We have to handle recurring stonith operations ourselves. */ for (gIter = rsc->recurring_ops; gIter != NULL; gIter = gIter->next) { lrmd_cmd_t *cmd = gIter->data; if (action_matches(cmd, action, interval_ms)) { cmd->result.execution_status = PCMK_EXEC_CANCELLED; if (rsc->active != cmd) { cmd_finalize(cmd, rsc); } return pcmk_ok; } } } else if (services_action_cancel(rsc_id, normalize_action_name(rsc, action), interval_ms) == TRUE) { /* The service library will tell the action_complete callback function * this action was cancelled, which will destroy the cmd and remove * it from the recurring_op list. Do not do that in this function * if the service library says it cancelled it. */ return pcmk_ok; } return -EOPNOTSUPP; } static void cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id) { GList *cmd_list = NULL; GList *cmd_iter = NULL; /* Notice a copy of each list is created when concat is called. * This prevents odd behavior from occurring when the cmd_list * is iterated through later on. It is possible the cancel_op * function may end up modifying the recurring_ops and pending_ops * lists. If we did not copy those lists, our cmd_list iteration * could get messed up.*/ if (rsc->recurring_ops) { cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->recurring_ops)); } if (rsc->pending_ops) { cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->pending_ops)); } if (!cmd_list) { return; } for (cmd_iter = cmd_list; cmd_iter; cmd_iter = cmd_iter->next) { lrmd_cmd_t *cmd = cmd_iter->data; if (cmd->interval_ms == 0) { continue; } if (client_id && !pcmk__str_eq(cmd->client_id, client_id, pcmk__str_casei)) { continue; } cancel_op(rsc->rsc_id, cmd->action, cmd->interval_ms); } /* frees only the copied list data, not the cmds */ g_list_free(cmd_list); } static int process_lrmd_rsc_cancel(pcmk__client_t *client, uint32_t id, xmlNode *request) { xmlNode *rsc_xml = pcmk__xpath_find_one(request->doc, "//" PCMK__XE_LRMD_RSC, LOG_ERR); const char *rsc_id = pcmk__xe_get(rsc_xml, PCMK__XA_LRMD_RSC_ID); const char *action = pcmk__xe_get(rsc_xml, PCMK__XA_LRMD_RSC_ACTION); guint interval_ms = 0; pcmk__xe_get_guint(rsc_xml, PCMK__XA_LRMD_RSC_INTERVAL, &interval_ms); if (!rsc_id || !action) { return -EINVAL; } return cancel_op(rsc_id, action, interval_ms); } static void add_recurring_op_xml(xmlNode *reply, lrmd_rsc_t *rsc) { xmlNode *rsc_xml = pcmk__xe_create(reply, PCMK__XE_LRMD_RSC); crm_xml_add(rsc_xml, PCMK__XA_LRMD_RSC_ID, rsc->rsc_id); for (GList *item = rsc->recurring_ops; item != NULL; item = item->next) { lrmd_cmd_t *cmd = item->data; xmlNode *op_xml = pcmk__xe_create(rsc_xml, PCMK__XE_LRMD_RSC_OP); crm_xml_add(op_xml, PCMK__XA_LRMD_RSC_ACTION, pcmk__s(cmd->real_action, cmd->action)); pcmk__xe_set_guint(op_xml, PCMK__XA_LRMD_RSC_INTERVAL, cmd->interval_ms); - crm_xml_add_int(op_xml, PCMK__XA_LRMD_TIMEOUT, cmd->timeout_orig); + pcmk__xe_set_int(op_xml, PCMK__XA_LRMD_TIMEOUT, cmd->timeout_orig); } } static xmlNode * process_lrmd_get_recurring(xmlNode *request, int call_id) { int rc = pcmk_ok; const char *rsc_id = NULL; lrmd_rsc_t *rsc = NULL; xmlNode *reply = NULL; xmlNode *rsc_xml = NULL; // Resource ID is optional rsc_xml = pcmk__xe_first_child(request, PCMK__XE_LRMD_CALLDATA, NULL, NULL); if (rsc_xml) { rsc_xml = pcmk__xe_first_child(rsc_xml, PCMK__XE_LRMD_RSC, NULL, NULL); } if (rsc_xml) { rsc_id = pcmk__xe_get(rsc_xml, PCMK__XA_LRMD_RSC_ID); } // If resource ID is specified, resource must exist if (rsc_id != NULL) { rsc = g_hash_table_lookup(rsc_list, rsc_id); if (rsc == NULL) { crm_info("Resource '%s' not found (%d active resources)", rsc_id, g_hash_table_size(rsc_list)); rc = -ENODEV; } } reply = create_lrmd_reply(__func__, rc, call_id); // If resource ID is not specified, check all resources if (rsc_id == NULL) { GHashTableIter iter; char *key = NULL; g_hash_table_iter_init(&iter, rsc_list); while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &rsc)) { add_recurring_op_xml(reply, rsc); } } else if (rsc) { add_recurring_op_xml(reply, rsc); } return reply; } void process_lrmd_message(pcmk__client_t *client, uint32_t id, xmlNode *request) { int rc = pcmk_ok; int call_id = 0; const char *op = pcmk__xe_get(request, PCMK__XA_LRMD_OP); int do_reply = 0; int do_notify = 0; xmlNode *reply = NULL; /* Certain IPC commands may be done only by privileged users (i.e. root or * hacluster), because they would otherwise provide a means of bypassing * ACLs. */ bool allowed = pcmk_is_set(client->flags, pcmk__client_privileged); crm_trace("Processing %s operation from %s", op, client->id); pcmk__xe_get_int(request, PCMK__XA_LRMD_CALLID, &call_id); if (pcmk__str_eq(op, CRM_OP_IPC_FWD, pcmk__str_none)) { #ifdef PCMK__COMPILE_REMOTE if (allowed) { ipc_proxy_forward_client(client, request); } else { rc = -EACCES; } #else rc = -EPROTONOSUPPORT; #endif do_reply = 1; } else if (pcmk__str_eq(op, CRM_OP_REGISTER, pcmk__str_none)) { rc = process_lrmd_signon(client, request, call_id, &reply); do_reply = 1; } else if (pcmk__str_eq(op, LRMD_OP_RSC_REG, pcmk__str_none)) { if (allowed) { rc = process_lrmd_rsc_register(client, id, request); do_notify = 1; } else { rc = -EACCES; } do_reply = 1; } else if (pcmk__str_eq(op, LRMD_OP_RSC_INFO, pcmk__str_none)) { if (allowed) { reply = process_lrmd_get_rsc_info(request, call_id); } else { rc = -EACCES; } do_reply = 1; } else if (pcmk__str_eq(op, LRMD_OP_RSC_UNREG, pcmk__str_none)) { if (allowed) { rc = process_lrmd_rsc_unregister(client, id, request); /* don't notify anyone about failed un-registers */ if (rc == pcmk_ok || rc == -EINPROGRESS) { do_notify = 1; } } else { rc = -EACCES; } do_reply = 1; } else if (pcmk__str_eq(op, LRMD_OP_RSC_EXEC, pcmk__str_none)) { if (allowed) { rc = process_lrmd_rsc_exec(client, id, request); } else { rc = -EACCES; } do_reply = 1; } else if (pcmk__str_eq(op, LRMD_OP_RSC_CANCEL, pcmk__str_none)) { if (allowed) { rc = process_lrmd_rsc_cancel(client, id, request); } else { rc = -EACCES; } do_reply = 1; } else if (pcmk__str_eq(op, LRMD_OP_POKE, pcmk__str_none)) { do_notify = 1; do_reply = 1; } else if (pcmk__str_eq(op, LRMD_OP_CHECK, pcmk__str_none)) { if (allowed) { xmlNode *wrapper = pcmk__xe_first_child(request, PCMK__XE_LRMD_CALLDATA, NULL, NULL); xmlNode *data = pcmk__xe_first_child(wrapper, NULL, NULL, NULL); const char *timeout = NULL; CRM_LOG_ASSERT(data != NULL); timeout = pcmk__xe_get(data, PCMK__XA_LRMD_WATCHDOG); pcmk__valid_stonith_watchdog_timeout(timeout); } else { rc = -EACCES; } } else if (pcmk__str_eq(op, LRMD_OP_ALERT_EXEC, pcmk__str_none)) { if (allowed) { rc = process_lrmd_alert_exec(client, id, request); } else { rc = -EACCES; } do_reply = 1; } else if (pcmk__str_eq(op, LRMD_OP_GET_RECURRING, pcmk__str_none)) { if (allowed) { reply = process_lrmd_get_recurring(request, call_id); } else { rc = -EACCES; } do_reply = 1; } else { rc = -EOPNOTSUPP; do_reply = 1; crm_err("Unknown IPC request '%s' from client %s", op, pcmk__client_name(client)); } if (rc == -EACCES) { crm_warn("Rejecting IPC request '%s' from unprivileged client %s", op, pcmk__client_name(client)); } crm_debug("Processed %s operation from %s: rc=%d, reply=%d, notify=%d", op, client->id, rc, do_reply, do_notify); if (do_reply) { int send_rc = pcmk_rc_ok; if (reply == NULL) { reply = create_lrmd_reply(__func__, rc, call_id); } send_rc = lrmd_server_send_reply(client, id, reply); pcmk__xml_free(reply); if (send_rc != pcmk_rc_ok) { crm_warn("Reply to client %s failed: %s " QB_XS " rc=%d", pcmk__client_name(client), pcmk_rc_str(send_rc), send_rc); } } if (do_notify) { send_generic_notify(rc, request); } } diff --git a/daemons/execd/pacemaker-execd.c b/daemons/execd/pacemaker-execd.c index 4b05d766c4..26c2d380d2 100644 --- a/daemons/execd/pacemaker-execd.c +++ b/daemons/execd/pacemaker-execd.c @@ -1,575 +1,575 @@ /* * Copyright 2012-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "pacemaker-execd.h" #ifdef PCMK__COMPILE_REMOTE # define EXECD_TYPE "remote" # define EXECD_NAME PCMK__SERVER_REMOTED # define SUMMARY "resource agent executor daemon for Pacemaker Remote nodes" #else # define EXECD_TYPE "local" # define EXECD_NAME PCMK__SERVER_EXECD # define SUMMARY "resource agent executor daemon for Pacemaker cluster nodes" #endif static GMainLoop *mainloop = NULL; static qb_ipcs_service_t *ipcs = NULL; static stonith_t *stonith_api = NULL; int lrmd_call_id = 0; time_t start_time; static struct { gchar **log_files; #ifdef PCMK__COMPILE_REMOTE gchar *port; #endif // PCMK__COMPILE_REMOTE } options; #ifdef PCMK__COMPILE_REMOTE /* whether shutdown request has been sent */ static gboolean shutting_down = FALSE; #endif static void exit_executor(void); static void stonith_connection_destroy_cb(stonith_t * st, stonith_event_t * e) { stonith_api->state = stonith_disconnected; stonith_connection_failed(); } stonith_t * get_stonith_connection(void) { if (stonith_api && stonith_api->state == stonith_disconnected) { stonith_api_delete(stonith_api); stonith_api = NULL; } if (stonith_api == NULL) { int rc = pcmk_ok; stonith_api = stonith_api_new(); if (stonith_api == NULL) { crm_err("Could not connect to fencer: API memory allocation failed"); return NULL; } rc = stonith_api_connect_retry(stonith_api, crm_system_name, 10); if (rc != pcmk_ok) { crm_err("Could not connect to fencer in 10 attempts: %s " QB_XS " rc=%d", pcmk_strerror(rc), rc); stonith_api_delete(stonith_api); stonith_api = NULL; } else { stonith_api_operations_t *cmds = stonith_api->cmds; cmds->register_notification(stonith_api, PCMK__VALUE_ST_NOTIFY_DISCONNECT, stonith_connection_destroy_cb); } } return stonith_api; } static int32_t lrmd_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { crm_trace("Connection %p", c); if (pcmk__new_client(c, uid, gid) == NULL) { return -ENOMEM; } return 0; } static void lrmd_ipc_created(qb_ipcs_connection_t * c) { pcmk__client_t *new_client = pcmk__find_client(c); crm_trace("Connection %p", c); pcmk__assert(new_client != NULL); /* Now that the connection is offically established, alert * the other clients a new connection exists. */ notify_of_new_client(new_client); } static int32_t lrmd_ipc_dispatch(qb_ipcs_connection_t * c, void *data, size_t size) { uint32_t id = 0; uint32_t flags = 0; pcmk__client_t *client = pcmk__find_client(c); xmlNode *request = pcmk__client_data2xml(client, data, &id, &flags); CRM_CHECK(client != NULL, crm_err("Invalid client"); return FALSE); CRM_CHECK(client->id != NULL, crm_err("Invalid client: %p", client); return FALSE); CRM_CHECK(flags & crm_ipc_client_response, crm_err("Invalid client request: %p", client); return FALSE); if (!request) { return 0; } /* @TODO functionize some of this to reduce duplication with * lrmd_remote_client_msg() */ if (!client->name) { const char *value = pcmk__xe_get(request, PCMK__XA_LRMD_CLIENTNAME); if (value == NULL) { client->name = pcmk__itoa(pcmk__client_pid(c)); } else { client->name = pcmk__str_copy(value); } } lrmd_call_id++; if (lrmd_call_id < 1) { lrmd_call_id = 1; } crm_xml_add(request, PCMK__XA_LRMD_CLIENTID, client->id); crm_xml_add(request, PCMK__XA_LRMD_CLIENTNAME, client->name); - crm_xml_add_int(request, PCMK__XA_LRMD_CALLID, lrmd_call_id); + pcmk__xe_set_int(request, PCMK__XA_LRMD_CALLID, lrmd_call_id); process_lrmd_message(client, id, request); pcmk__xml_free(request); return 0; } /*! * \internal * \brief Free a client connection, and exit if appropriate * * \param[in,out] client Client connection to free */ void lrmd_client_destroy(pcmk__client_t *client) { pcmk__free_client(client); #ifdef PCMK__COMPILE_REMOTE /* If we were waiting to shut down, we can now safely do so * if there are no more proxied IPC providers */ if (shutting_down && (ipc_proxy_get_provider() == NULL)) { exit_executor(); } #endif } static int32_t lrmd_ipc_closed(qb_ipcs_connection_t * c) { pcmk__client_t *client = pcmk__find_client(c); if (client == NULL) { return 0; } crm_trace("Connection %p", c); client_disconnect_cleanup(client->id); #ifdef PCMK__COMPILE_REMOTE ipc_proxy_remove_provider(client); #endif lrmd_client_destroy(client); return 0; } static void lrmd_ipc_destroy(qb_ipcs_connection_t * c) { lrmd_ipc_closed(c); crm_trace("Connection %p", c); } static struct qb_ipcs_service_handlers lrmd_ipc_callbacks = { .connection_accept = lrmd_ipc_accept, .connection_created = lrmd_ipc_created, .msg_process = lrmd_ipc_dispatch, .connection_closed = lrmd_ipc_closed, .connection_destroyed = lrmd_ipc_destroy }; // \return Standard Pacemaker return code int lrmd_server_send_reply(pcmk__client_t *client, uint32_t id, xmlNode *reply) { crm_trace("Sending reply (%d) to client (%s)", id, client->id); switch (PCMK__CLIENT_TYPE(client)) { case pcmk__client_ipc: return pcmk__ipc_send_xml(client, id, reply, FALSE); #ifdef PCMK__COMPILE_REMOTE case pcmk__client_tls: return lrmd__remote_send_xml(client->remote, reply, id, "reply"); #endif default: crm_err("Could not send reply: unknown type for client %s " QB_XS " flags=%#llx", pcmk__client_name(client), client->flags); } return ENOTCONN; } // \return Standard Pacemaker return code int lrmd_server_send_notify(pcmk__client_t *client, xmlNode *msg) { crm_trace("Sending notification to client (%s)", client->id); switch (PCMK__CLIENT_TYPE(client)) { case pcmk__client_ipc: if (client->ipcs == NULL) { crm_trace("Could not notify local client: disconnected"); return ENOTCONN; } return pcmk__ipc_send_xml(client, 0, msg, crm_ipc_server_event); #ifdef PCMK__COMPILE_REMOTE case pcmk__client_tls: if (client->remote == NULL) { crm_trace("Could not notify remote client: disconnected"); return ENOTCONN; } else { return lrmd__remote_send_xml(client->remote, msg, 0, "notify"); } #endif default: crm_err("Could not notify client %s with unknown transport " QB_XS " flags=%#llx", pcmk__client_name(client), client->flags); } return ENOTCONN; } /*! * \internal * \brief Clean up and exit immediately */ static void exit_executor(void) { const guint nclients = pcmk__ipc_client_count(); crm_info("Terminating with %d client%s", nclients, pcmk__plural_s(nclients)); stonith_api_delete(stonith_api); if (ipcs) { mainloop_del_ipc_server(ipcs); } #ifdef PCMK__COMPILE_REMOTE execd_stop_tls_server(); ipc_proxy_cleanup(); #endif pcmk__client_cleanup(); if (mainloop) { lrmd_drain_alerts(mainloop); } g_hash_table_destroy(rsc_list); // @TODO End mainloop instead so all cleanup is done crm_exit(CRM_EX_OK); } /*! * \internal * \brief Request cluster shutdown if appropriate, otherwise exit immediately * * \param[in] nsig Signal that caused invocation (ignored) */ static void lrmd_shutdown(int nsig) { #ifdef PCMK__COMPILE_REMOTE pcmk__client_t *ipc_proxy = ipc_proxy_get_provider(); /* If there are active proxied IPC providers, then we may be running * resources, so notify the cluster that we wish to shut down. */ if (ipc_proxy) { if (shutting_down) { crm_notice("Waiting for cluster to stop resources before exiting"); return; } crm_info("Sending shutdown request to cluster"); if (ipc_proxy_shutdown_req(ipc_proxy) < 0) { crm_crit("Shutdown request failed, exiting immediately"); } else { /* We requested a shutdown. Now, we need to wait for an * acknowledgement from the proxy host, then wait for all proxy * hosts to disconnect (which ensures that all resources have been * stopped). */ shutting_down = TRUE; /* Stop accepting new proxy connections */ execd_stop_tls_server(); /* Currently, we let the OS kill us if the clients don't disconnect * in a reasonable time. We could instead set a long timer here * (shorter than what the OS is likely to use) and exit immediately * if it pops. */ return; } } #endif exit_executor(); } /*! * \internal * \brief Log a shutdown acknowledgment */ void handle_shutdown_ack(void) { #ifdef PCMK__COMPILE_REMOTE if (shutting_down) { crm_info("IPC proxy provider acknowledged shutdown request"); return; } #endif crm_debug("Ignoring unexpected shutdown acknowledgment " "from IPC proxy provider"); } /*! * \internal * \brief Handle rejection of shutdown request */ void handle_shutdown_nack(void) { #ifdef PCMK__COMPILE_REMOTE if (shutting_down) { crm_info("Exiting immediately after IPC proxy provider " "indicated no resources will be stopped"); exit_executor(); return; } #endif crm_debug("Ignoring unexpected shutdown rejection from IPC proxy provider"); } static GOptionEntry entries[] = { { "logfile", 'l', G_OPTION_FLAG_NONE, G_OPTION_ARG_FILENAME_ARRAY, &options.log_files, "Send logs to the additional named logfile", NULL }, #ifdef PCMK__COMPILE_REMOTE { "port", 'p', G_OPTION_FLAG_NONE, G_OPTION_ARG_STRING, &options.port, "Port to listen on (defaults to " G_STRINGIFY(DEFAULT_REMOTE_PORT) ")", NULL }, #endif // PCMK__COMPILE_REMOTE { NULL } }; static pcmk__supported_format_t formats[] = { PCMK__SUPPORTED_FORMAT_NONE, PCMK__SUPPORTED_FORMAT_TEXT, PCMK__SUPPORTED_FORMAT_XML, { NULL, NULL, NULL } }; static GOptionContext * build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) { GOptionContext *context = NULL; context = pcmk__build_arg_context(args, "text (default), xml", group, NULL); pcmk__add_main_args(context, entries); return context; } int main(int argc, char **argv, char **envp) { int rc = pcmk_rc_ok; crm_exit_t exit_code = CRM_EX_OK; const char *option = NULL; pcmk__output_t *out = NULL; GError *error = NULL; GOptionGroup *output_group = NULL; pcmk__common_args_t *args = NULL; gchar **processed_args = NULL; GOptionContext *context = NULL; #ifdef PCMK__COMPILE_REMOTE // If necessary, create PID 1 now before any file descriptors are opened remoted_spawn_pidone(argc, argv, envp); #endif args = pcmk__new_common_args(SUMMARY); #ifdef PCMK__COMPILE_REMOTE processed_args = pcmk__cmdline_preproc(argv, "lp"); #else processed_args = pcmk__cmdline_preproc(argv, "l"); #endif // PCMK__COMPILE_REMOTE context = build_arg_context(args, &output_group); crm_log_preinit(EXECD_NAME, argc, argv); pcmk__register_formats(output_group, formats); if (!g_option_context_parse_strv(context, &processed_args, &error)) { exit_code = CRM_EX_USAGE; goto done; } rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv); if (rc != pcmk_rc_ok) { exit_code = CRM_EX_ERROR; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Error creating output format %s: %s", args->output_ty, pcmk_rc_str(rc)); goto done; } if (args->version) { out->version(out, false); goto done; } // Open additional log files if (options.log_files != NULL) { for (gchar **fname = options.log_files; *fname != NULL; fname++) { rc = pcmk__add_logfile(*fname); if (rc != pcmk_rc_ok) { out->err(out, "Logging to %s is disabled: %s", *fname, pcmk_rc_str(rc)); } } } pcmk__cli_init_logging(EXECD_NAME, args->verbosity); crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); // ocf_log() (in resource-agents) uses the capitalized env options below option = pcmk__env_option(PCMK__ENV_LOGFACILITY); if (!pcmk__str_eq(option, PCMK_VALUE_NONE, pcmk__str_casei|pcmk__str_null_matches) && !pcmk__str_eq(option, "/dev/null", pcmk__str_none)) { pcmk__set_env_option("LOGFACILITY", option, true); } option = pcmk__env_option(PCMK__ENV_LOGFILE); if (!pcmk__str_eq(option, PCMK_VALUE_NONE, pcmk__str_casei|pcmk__str_null_matches)) { pcmk__set_env_option("LOGFILE", option, true); if (pcmk__env_option_enabled(crm_system_name, PCMK__ENV_DEBUG)) { pcmk__set_env_option("DEBUGLOG", option, true); } } #ifdef PCMK__COMPILE_REMOTE if (options.port != NULL) { pcmk__set_env_option(PCMK__ENV_REMOTE_PORT, options.port, false); } #endif // PCMK__COMPILE_REMOTE start_time = time(NULL); crm_notice("Starting Pacemaker " EXECD_TYPE " executor"); /* The presence of this variable allegedly controls whether child * processes like httpd will try and use Systemd's sd_notify * API */ unsetenv("NOTIFY_SOCKET"); { // Temporary directory for resource agent use (leave owned by root) int rc = pcmk__build_path(PCMK__OCF_TMP_DIR, 0755); if (rc != pcmk_rc_ok) { crm_warn("Could not create resource agent temporary directory " PCMK__OCF_TMP_DIR ": %s", pcmk_rc_str(rc)); } } rsc_list = pcmk__strkey_table(NULL, free_rsc); ipcs = mainloop_add_ipc_server(CRM_SYSTEM_LRMD, QB_IPC_SHM, &lrmd_ipc_callbacks); if (ipcs == NULL) { crm_err("Failed to create IPC server: shutting down and inhibiting respawn"); exit_code = CRM_EX_FATAL; goto done; } #ifdef PCMK__COMPILE_REMOTE if (lrmd_init_remote_tls_server() < 0) { crm_err("Failed to create TLS listener: shutting down and staying down"); exit_code = CRM_EX_FATAL; goto done; } ipc_proxy_init(); #endif mainloop_add_signal(SIGTERM, lrmd_shutdown); mainloop = g_main_loop_new(NULL, FALSE); crm_notice("Pacemaker " EXECD_TYPE " executor successfully started and accepting connections"); crm_notice("OCF resource agent search path is %s", PCMK__OCF_RA_PATH); g_main_loop_run(mainloop); /* should never get here */ exit_executor(); done: g_strfreev(options.log_files); #ifdef PCMK__COMPILE_REMOTE g_free(options.port); #endif // PCMK__COMPILE_REMOTE g_strfreev(processed_args); pcmk__free_arg_context(context); pcmk__output_and_clear_error(&error, out); if (out != NULL) { out->finish(out, exit_code, true, NULL); pcmk__output_free(out); } pcmk__unregister_formats(); crm_exit(exit_code); } diff --git a/daemons/execd/remoted_proxy.c b/daemons/execd/remoted_proxy.c index 1f4aa28531..0abdf78524 100644 --- a/daemons/execd/remoted_proxy.c +++ b/daemons/execd/remoted_proxy.c @@ -1,481 +1,483 @@ /* * Copyright 2012-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include "pacemaker-execd.h" #include #include #include #include #include #include #include #include static qb_ipcs_service_t *cib_ro = NULL; static qb_ipcs_service_t *cib_rw = NULL; static qb_ipcs_service_t *cib_shm = NULL; static qb_ipcs_service_t *attrd_ipcs = NULL; static qb_ipcs_service_t *crmd_ipcs = NULL; static qb_ipcs_service_t *stonith_ipcs = NULL; static qb_ipcs_service_t *pacemakerd_ipcs = NULL; // An IPC provider is a cluster node controller connecting as a client static GList *ipc_providers = NULL; /* ipc clients == things like cibadmin, crm_resource, connecting locally * * @TODO This should be unnecessary (pcmk__foreach_ipc_client() should be * sufficient) */ static GHashTable *ipc_clients = NULL; /*! * \internal * \brief Get an IPC proxy provider * * \return Pointer to a provider if one exists, NULL otherwise * * \note Grab the first provider, which is the most recent connection. That way, * if we haven't yet timed out an old, failed connection, we don't try to * use it. */ pcmk__client_t * ipc_proxy_get_provider(void) { return ipc_providers? (pcmk__client_t *) (ipc_providers->data) : NULL; } /*! * \internal * \brief Accept a client connection on a proxy IPC server * * \param[in] c Client's IPC connection * \param[in] uid Client's user ID * \param[in] gid Client's group ID * \param[in] ipc_channel Name of IPC server to proxy * * \return pcmk_ok on success, -errno on error */ static int32_t ipc_proxy_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid, const char *ipc_channel) { pcmk__client_t *client; pcmk__client_t *ipc_proxy = ipc_proxy_get_provider(); xmlNode *msg; if (ipc_proxy == NULL) { crm_warn("Cannot proxy IPC connection from uid %d gid %d to %s " "because not connected to cluster", uid, gid, ipc_channel); return -EREMOTEIO; } /* This new client is a local IPC client on a Pacemaker Remote controlled * node, needing to access cluster node IPC services. */ client = pcmk__new_client(c, uid, gid); if (client == NULL) { return -ENOMEM; } /* This ipc client is bound to a single ipc provider. If the * provider goes away, this client is disconnected */ client->userdata = pcmk__str_copy(ipc_proxy->id); client->name = crm_strdup_printf("proxy-%s-%d-%.8s", ipc_channel, client->pid, client->id); /* Allow remote executor to distinguish between proxied local clients and * actual executor API clients */ pcmk__set_client_flags(client, pcmk__client_to_proxy); g_hash_table_insert(ipc_clients, client->id, client); msg = pcmk__xe_create(NULL, PCMK__XE_LRMD_IPC_PROXY); crm_xml_add(msg, PCMK__XA_LRMD_IPC_OP, LRMD_IPC_OP_NEW); crm_xml_add(msg, PCMK__XA_LRMD_IPC_SERVER, ipc_channel); crm_xml_add(msg, PCMK__XA_LRMD_IPC_SESSION, client->id); lrmd_server_send_notify(ipc_proxy, msg); pcmk__xml_free(msg); crm_debug("Accepted IPC proxy connection (session ID %s) " "from uid %d gid %d on channel %s", client->id, uid, gid, ipc_channel); return 0; } static int32_t crmd_proxy_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { return ipc_proxy_accept(c, uid, gid, CRM_SYSTEM_CRMD); } static int32_t attrd_proxy_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { return ipc_proxy_accept(c, uid, gid, PCMK__VALUE_ATTRD); } static int32_t stonith_proxy_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { return ipc_proxy_accept(c, uid, gid, "stonith-ng"); } static int32_t pacemakerd_proxy_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { return -EREMOTEIO; } static int32_t cib_proxy_accept_rw(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { return ipc_proxy_accept(c, uid, gid, PCMK__SERVER_BASED_RW); } static int32_t cib_proxy_accept_ro(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { return ipc_proxy_accept(c, uid, gid, PCMK__SERVER_BASED_RO); } void ipc_proxy_forward_client(pcmk__client_t *ipc_proxy, xmlNode *xml) { const char *session = pcmk__xe_get(xml, PCMK__XA_LRMD_IPC_SESSION); const char *msg_type = pcmk__xe_get(xml, PCMK__XA_LRMD_IPC_OP); xmlNode *wrapper = pcmk__xe_first_child(xml, PCMK__XE_LRMD_IPC_MSG, NULL, NULL); xmlNode *msg = pcmk__xe_first_child(wrapper, NULL, NULL, NULL); pcmk__client_t *ipc_client; int rc = pcmk_rc_ok; if (pcmk__str_eq(msg_type, LRMD_IPC_OP_SHUTDOWN_ACK, pcmk__str_casei)) { handle_shutdown_ack(); return; } if (pcmk__str_eq(msg_type, LRMD_IPC_OP_SHUTDOWN_NACK, pcmk__str_casei)) { handle_shutdown_nack(); return; } ipc_client = pcmk__find_client_by_id(session); if (ipc_client == NULL) { xmlNode *msg = pcmk__xe_create(NULL, PCMK__XE_LRMD_IPC_PROXY); crm_xml_add(msg, PCMK__XA_LRMD_IPC_OP, LRMD_IPC_OP_DESTROY); crm_xml_add(msg, PCMK__XA_LRMD_IPC_SESSION, session); lrmd_server_send_notify(ipc_proxy, msg); pcmk__xml_free(msg); return; } /* This is an event or response from the ipc provider * going to the local ipc client. * * Looking at the chain of events. * * -----remote node----------------|---- cluster node ------ * ipc_client <--1--> this code * <--2--> pacemaker-controld:remote_proxy_cb/remote_proxy_relay_event() * <--3--> ipc server * * This function is receiving a msg from connection 2 * and forwarding it to connection 1. */ if (pcmk__str_eq(msg_type, LRMD_IPC_OP_EVENT, pcmk__str_casei)) { crm_trace("Sending event to %s", ipc_client->id); rc = pcmk__ipc_send_xml(ipc_client, 0, msg, crm_ipc_server_event); } else if (pcmk__str_eq(msg_type, LRMD_IPC_OP_RESPONSE, pcmk__str_casei)) { int msg_id = 0; pcmk__xe_get_int(xml, PCMK__XA_LRMD_IPC_MSG_ID, &msg_id); crm_trace("Sending response to %d - %s", ipc_client->request_id, ipc_client->id); rc = pcmk__ipc_send_xml(ipc_client, msg_id, msg, FALSE); CRM_LOG_ASSERT(msg_id == ipc_client->request_id); ipc_client->request_id = 0; } else if (pcmk__str_eq(msg_type, LRMD_IPC_OP_DESTROY, pcmk__str_casei)) { qb_ipcs_disconnect(ipc_client->ipcs); } else { crm_err("Unknown ipc proxy msg type %s" , msg_type); } if (rc != pcmk_rc_ok) { crm_warn("Could not proxy IPC to client %s: %s " QB_XS " rc=%d", ipc_client->id, pcmk_rc_str(rc), rc); } } static int32_t ipc_proxy_dispatch(qb_ipcs_connection_t * c, void *data, size_t size) { uint32_t id = 0; uint32_t flags = 0; pcmk__client_t *client = pcmk__find_client(c); pcmk__client_t *ipc_proxy = pcmk__find_client_by_id(client->userdata); xmlNode *wrapper = NULL; xmlNode *request = NULL; xmlNode *msg = NULL; if (!ipc_proxy) { qb_ipcs_disconnect(client->ipcs); return 0; } /* This is a request from the local ipc client going * to the ipc provider. * * Looking at the chain of events. * * -----remote node----------------|---- cluster node ------ * ipc_client <--1--> this code * <--2--> pacemaker-controld:remote_proxy_dispatch_internal() * <--3--> ipc server * * This function is receiving a request from connection * 1 and forwarding it to connection 2. */ request = pcmk__client_data2xml(client, data, &id, &flags); if (!request) { return 0; } CRM_CHECK(client != NULL, crm_err("Invalid client"); pcmk__xml_free(request); return FALSE); CRM_CHECK(client->id != NULL, crm_err("Invalid client: %p", client); pcmk__xml_free(request); return FALSE); /* This ensures that synced request/responses happen over the event channel * in the controller, allowing the controller to process the messages async. */ pcmk__set_ipc_flags(flags, pcmk__client_name(client), crm_ipc_proxied); client->request_id = id; msg = pcmk__xe_create(NULL, PCMK__XE_LRMD_IPC_PROXY); crm_xml_add(msg, PCMK__XA_LRMD_IPC_OP, LRMD_IPC_OP_REQUEST); crm_xml_add(msg, PCMK__XA_LRMD_IPC_SESSION, client->id); crm_xml_add(msg, PCMK__XA_LRMD_IPC_CLIENT, pcmk__client_name(client)); crm_xml_add(msg, PCMK__XA_LRMD_IPC_USER, client->user); - crm_xml_add_int(msg, PCMK__XA_LRMD_IPC_MSG_ID, id); - crm_xml_add_int(msg, PCMK__XA_LRMD_IPC_MSG_FLAGS, flags); + pcmk__xe_set_int(msg, PCMK__XA_LRMD_IPC_MSG_ID, id); + + // @TODO Use different setter for uint32_t + pcmk__xe_set_int(msg, PCMK__XA_LRMD_IPC_MSG_FLAGS, flags); wrapper = pcmk__xe_create(msg, PCMK__XE_LRMD_IPC_MSG); pcmk__xml_copy(wrapper, request); lrmd_server_send_notify(ipc_proxy, msg); pcmk__xml_free(request); pcmk__xml_free(msg); return 0; } /*! * \internal * \brief Notify a proxy provider that we wish to shut down * * \param[in,out] ipc_proxy IPC client connection to proxy provider * * \return 0 on success, -1 on error */ int ipc_proxy_shutdown_req(pcmk__client_t *ipc_proxy) { xmlNode *msg = pcmk__xe_create(NULL, PCMK__XE_LRMD_IPC_PROXY); int rc; crm_xml_add(msg, PCMK__XA_LRMD_IPC_OP, LRMD_IPC_OP_SHUTDOWN_REQ); /* We don't really have a session, but the controller needs this attribute * to recognize this as proxy communication. */ crm_xml_add(msg, PCMK__XA_LRMD_IPC_SESSION, "0"); rc = (lrmd_server_send_notify(ipc_proxy, msg) != pcmk_rc_ok)? -1 : 0; pcmk__xml_free(msg); return rc; } static int32_t ipc_proxy_closed(qb_ipcs_connection_t * c) { pcmk__client_t *client = pcmk__find_client(c); pcmk__client_t *ipc_proxy; if (client == NULL) { return 0; } ipc_proxy = pcmk__find_client_by_id(client->userdata); crm_trace("Connection %p", c); if (ipc_proxy) { xmlNode *msg = pcmk__xe_create(NULL, PCMK__XE_LRMD_IPC_PROXY); crm_xml_add(msg, PCMK__XA_LRMD_IPC_OP, LRMD_IPC_OP_DESTROY); crm_xml_add(msg, PCMK__XA_LRMD_IPC_SESSION, client->id); lrmd_server_send_notify(ipc_proxy, msg); pcmk__xml_free(msg); } g_hash_table_remove(ipc_clients, client->id); free(client->userdata); client->userdata = NULL; pcmk__free_client(client); return 0; } static void ipc_proxy_destroy(qb_ipcs_connection_t * c) { crm_trace("Connection %p", c); ipc_proxy_closed(c); } static struct qb_ipcs_service_handlers crmd_proxy_callbacks = { .connection_accept = crmd_proxy_accept, .connection_created = NULL, .msg_process = ipc_proxy_dispatch, .connection_closed = ipc_proxy_closed, .connection_destroyed = ipc_proxy_destroy }; static struct qb_ipcs_service_handlers attrd_proxy_callbacks = { .connection_accept = attrd_proxy_accept, .connection_created = NULL, .msg_process = ipc_proxy_dispatch, .connection_closed = ipc_proxy_closed, .connection_destroyed = ipc_proxy_destroy }; static struct qb_ipcs_service_handlers stonith_proxy_callbacks = { .connection_accept = stonith_proxy_accept, .connection_created = NULL, .msg_process = ipc_proxy_dispatch, .connection_closed = ipc_proxy_closed, .connection_destroyed = ipc_proxy_destroy }; static struct qb_ipcs_service_handlers pacemakerd_proxy_callbacks = { .connection_accept = pacemakerd_proxy_accept, .connection_created = NULL, .msg_process = NULL, .connection_closed = NULL, .connection_destroyed = NULL }; static struct qb_ipcs_service_handlers cib_proxy_callbacks_ro = { .connection_accept = cib_proxy_accept_ro, .connection_created = NULL, .msg_process = ipc_proxy_dispatch, .connection_closed = ipc_proxy_closed, .connection_destroyed = ipc_proxy_destroy }; static struct qb_ipcs_service_handlers cib_proxy_callbacks_rw = { .connection_accept = cib_proxy_accept_rw, .connection_created = NULL, .msg_process = ipc_proxy_dispatch, .connection_closed = ipc_proxy_closed, .connection_destroyed = ipc_proxy_destroy }; void ipc_proxy_add_provider(pcmk__client_t *ipc_proxy) { // Prepending ensures the most recent connection is always first ipc_providers = g_list_prepend(ipc_providers, ipc_proxy); } void ipc_proxy_remove_provider(pcmk__client_t *ipc_proxy) { GHashTableIter iter; pcmk__client_t *ipc_client = NULL; char *key = NULL; GList *remove_these = NULL; GList *gIter = NULL; ipc_providers = g_list_remove(ipc_providers, ipc_proxy); g_hash_table_iter_init(&iter, ipc_clients); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & ipc_client)) { const char *proxy_id = ipc_client->userdata; if (pcmk__str_eq(proxy_id, ipc_proxy->id, pcmk__str_casei)) { crm_info("ipc proxy connection for client %s pid %d destroyed because cluster node disconnected.", ipc_client->id, ipc_client->pid); /* we can't remove during the iteration, so copy items * to a list we can destroy later */ remove_these = g_list_append(remove_these, ipc_client); } } for (gIter = remove_these; gIter != NULL; gIter = gIter->next) { ipc_client = gIter->data; // Disconnection callback will free the client here qb_ipcs_disconnect(ipc_client->ipcs); } /* just frees the list, not the elements in the list */ g_list_free(remove_these); } void ipc_proxy_init(void) { ipc_clients = pcmk__strkey_table(NULL, NULL); pcmk__serve_based_ipc(&cib_ro, &cib_rw, &cib_shm, &cib_proxy_callbacks_ro, &cib_proxy_callbacks_rw); pcmk__serve_attrd_ipc(&attrd_ipcs, &attrd_proxy_callbacks); pcmk__serve_fenced_ipc(&stonith_ipcs, &stonith_proxy_callbacks); pcmk__serve_pacemakerd_ipc(&pacemakerd_ipcs, &pacemakerd_proxy_callbacks); crmd_ipcs = pcmk__serve_controld_ipc(&crmd_proxy_callbacks); if (crmd_ipcs == NULL) { crm_err("Failed to create controller: exiting and inhibiting respawn"); crm_warn("Verify pacemaker and pacemaker_remote are not both enabled"); crm_exit(CRM_EX_FATAL); } } void ipc_proxy_cleanup(void) { if (ipc_providers) { g_list_free(ipc_providers); ipc_providers = NULL; } if (ipc_clients) { g_hash_table_destroy(ipc_clients); ipc_clients = NULL; } pcmk__stop_based_ipc(cib_ro, cib_rw, cib_shm); qb_ipcs_destroy(attrd_ipcs); qb_ipcs_destroy(stonith_ipcs); qb_ipcs_destroy(pacemakerd_ipcs); qb_ipcs_destroy(crmd_ipcs); cib_ro = NULL; cib_rw = NULL; cib_shm = NULL; } diff --git a/daemons/execd/remoted_tls.c b/daemons/execd/remoted_tls.c index c7c04d283b..26529e6981 100644 --- a/daemons/execd/remoted_tls.c +++ b/daemons/execd/remoted_tls.c @@ -1,439 +1,439 @@ /* * Copyright 2012-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "pacemaker-execd.h" #include #define LRMD_REMOTE_AUTH_TIMEOUT 10000 static pcmk__tls_t *tls = NULL; static int ssock = -1; extern int lrmd_call_id; /*! * \internal * \brief Read (more) TLS handshake data from client * * \param[in,out] client IPC client doing handshake * * \return 0 on success or more data needed, -1 on error */ static int remoted__read_handshake_data(pcmk__client_t *client) { int rc = pcmk__read_handshake_data(client); if (rc == EAGAIN) { /* No more data is available at the moment. Just return for now; * we'll get invoked again once the client sends more. */ return 0; } else if (rc != pcmk_rc_ok) { return -1; } if (client->remote->auth_timeout) { g_source_remove(client->remote->auth_timeout); } client->remote->auth_timeout = 0; pcmk__set_client_flags(client, pcmk__client_tls_handshake_complete); crm_notice("Remote client connection accepted"); /* Now that the handshake is done, see if any client TLS certificate is * close to its expiration date and log if so. If a TLS certificate is not * in use, this function will just return so we don't need to check for the * session type here. */ pcmk__tls_check_cert_expiration(client->remote->tls_session); /* Only a client with access to the TLS key can connect, so we can treat * it as privileged. */ pcmk__set_client_flags(client, pcmk__client_privileged); // Alert other clients of the new connection notify_of_new_client(client); return 0; } static int lrmd_remote_client_msg(gpointer data) { int id = 0; int rc = pcmk_rc_ok; xmlNode *request = NULL; pcmk__client_t *client = data; if (!pcmk_is_set(client->flags, pcmk__client_tls_handshake_complete)) { return remoted__read_handshake_data(client); } rc = pcmk__remote_ready(client->remote, 0); switch (rc) { case pcmk_rc_ok: break; case ETIME: /* No message available to read */ return 0; default: /* Error */ crm_info("Error polling remote client: %s", pcmk_rc_str(rc)); return -1; } rc = pcmk__read_available_remote_data(client->remote); switch (rc) { case pcmk_rc_ok: break; case EAGAIN: /* We haven't read the whole message yet */ return 0; default: /* Error */ crm_info("Error reading from remote client: %s", pcmk_rc_str(rc)); return -1; } request = pcmk__remote_message_xml(client->remote); if (request == NULL) { return 0; } pcmk__xe_get_int(request, PCMK__XA_LRMD_REMOTE_MSG_ID, &id); crm_trace("Processing remote client request %d", id); if (!client->name) { client->name = pcmk__xe_get_copy(request, PCMK__XA_LRMD_CLIENTNAME); } lrmd_call_id++; if (lrmd_call_id < 1) { lrmd_call_id = 1; } crm_xml_add(request, PCMK__XA_LRMD_CLIENTID, client->id); crm_xml_add(request, PCMK__XA_LRMD_CLIENTNAME, client->name); - crm_xml_add_int(request, PCMK__XA_LRMD_CALLID, lrmd_call_id); + pcmk__xe_set_int(request, PCMK__XA_LRMD_CALLID, lrmd_call_id); process_lrmd_message(client, id, request); pcmk__xml_free(request); return 0; } static void lrmd_remote_client_destroy(gpointer user_data) { pcmk__client_t *client = user_data; if (client == NULL) { return; } crm_notice("Cleaning up after remote client %s disconnected", pcmk__client_name(client)); ipc_proxy_remove_provider(client); /* if this is the last remote connection, stop recurring * operations */ if (pcmk__ipc_client_count() == 1) { client_disconnect_cleanup(NULL); } if (client->remote->tls_session) { int csock = pcmk__tls_get_client_sock(client->remote); gnutls_bye(client->remote->tls_session, GNUTLS_SHUT_RDWR); gnutls_deinit(client->remote->tls_session); client->remote->tls_session = NULL; close(csock); } lrmd_client_destroy(client); return; } static gboolean lrmd_auth_timeout_cb(gpointer data) { pcmk__client_t *client = data; client->remote->auth_timeout = 0; if (pcmk_is_set(client->flags, pcmk__client_tls_handshake_complete)) { return FALSE; } mainloop_del_fd(client->remote->source); client->remote->source = NULL; crm_err("Remote client authentication timed out"); return FALSE; } // Dispatch callback for remote server socket static int lrmd_remote_listen(gpointer data) { int csock = -1; gnutls_session_t session = NULL; pcmk__client_t *new_client = NULL; // For client socket static struct mainloop_fd_callbacks lrmd_remote_fd_cb = { .dispatch = lrmd_remote_client_msg, .destroy = lrmd_remote_client_destroy, }; CRM_CHECK(ssock >= 0, return TRUE); if (pcmk__accept_remote_connection(ssock, &csock) != pcmk_rc_ok) { return TRUE; } session = pcmk__new_tls_session(tls, csock); if (session == NULL) { close(csock); return TRUE; } new_client = pcmk__new_unauth_client(NULL); new_client->remote = pcmk__assert_alloc(1, sizeof(pcmk__remote_t)); pcmk__set_client_flags(new_client, pcmk__client_tls); new_client->remote->tls_session = session; // Require the client to authenticate within this time new_client->remote->auth_timeout = pcmk__create_timer(LRMD_REMOTE_AUTH_TIMEOUT, lrmd_auth_timeout_cb, new_client); crm_info("Remote client pending authentication " QB_XS " %p id: %s", new_client, new_client->id); new_client->remote->source = mainloop_add_fd("pacemaker-remote-client", G_PRIORITY_DEFAULT, csock, new_client, &lrmd_remote_fd_cb); return TRUE; } static void tls_server_dropped(gpointer user_data) { crm_notice("TLS server session ended"); return; } // \return 0 on success, -1 on error (gnutls_psk_server_credentials_function) static int lrmd_tls_server_key_cb(gnutls_session_t session, const char *username, gnutls_datum_t * key) { return (lrmd__init_remote_key(key) == pcmk_rc_ok)? 0 : -1; } static int bind_and_listen(struct addrinfo *addr) { int optval; int fd; int rc; char buffer[INET6_ADDRSTRLEN] = { 0, }; pcmk__sockaddr2str(addr->ai_addr, buffer); crm_trace("Attempting to bind to address %s", buffer); fd = socket(addr->ai_family, addr->ai_socktype, addr->ai_protocol); if (fd < 0) { rc = errno; crm_err("Listener socket creation failed: %", pcmk_rc_str(rc)); return -rc; } /* reuse address */ optval = 1; rc = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof(optval)); if (rc < 0) { rc = errno; crm_err("Local address reuse not allowed on %s: %s", buffer, pcmk_rc_str(rc)); close(fd); return -rc; } if (addr->ai_family == AF_INET6) { optval = 0; rc = setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &optval, sizeof(optval)); if (rc < 0) { rc = errno; crm_err("Couldn't disable IPV6-only on %s: %s", buffer, pcmk_rc_str(rc)); close(fd); return -rc; } } if (bind(fd, addr->ai_addr, addr->ai_addrlen) != 0) { rc = errno; crm_err("Cannot bind to %s: %s", buffer, pcmk_rc_str(rc)); close(fd); return -rc; } if (listen(fd, 10) == -1) { rc = errno; crm_err("Cannot listen on %s: %s", buffer, pcmk_rc_str(rc)); close(fd); return -rc; } return fd; } static int get_address_info(const char *bind_name, int port, struct addrinfo **res) { int rc; char port_str[6]; // at most "65535" struct addrinfo hints; memset(&hints, 0, sizeof(struct addrinfo)); hints.ai_flags = AI_PASSIVE; hints.ai_family = AF_UNSPEC; // IPv6 or IPv4 hints.ai_socktype = SOCK_STREAM; hints.ai_protocol = IPPROTO_TCP; snprintf(port_str, sizeof(port_str), "%d", port); rc = getaddrinfo(bind_name, port_str, &hints, res); rc = pcmk__gaierror2rc(rc); if (rc != pcmk_rc_ok) { crm_err("Unable to get IP address(es) for %s: %s", (bind_name? bind_name : "local node"), pcmk_rc_str(rc)); return rc; } return pcmk_rc_ok; } int lrmd_init_remote_tls_server(void) { int rc = pcmk_rc_ok; int filter; int port = crm_default_remote_port(); struct addrinfo *res = NULL, *iter; const char *bind_name = pcmk__env_option(PCMK__ENV_REMOTE_ADDRESS); bool use_cert = pcmk__x509_enabled(); static struct mainloop_fd_callbacks remote_listen_fd_callbacks = { .dispatch = lrmd_remote_listen, .destroy = tls_server_dropped, }; CRM_CHECK(ssock == -1, return ssock); crm_debug("Starting TLS listener on %s port %d", (bind_name? bind_name : "all addresses on"), port); rc = pcmk__init_tls(&tls, true, use_cert ? GNUTLS_CRD_CERTIFICATE : GNUTLS_CRD_PSK); if (rc != pcmk_rc_ok) { return -1; } if (!use_cert) { gnutls_datum_t psk_key = { NULL, 0 }; pcmk__tls_add_psk_callback(tls, lrmd_tls_server_key_cb); /* The key callback won't get called until the first client connection * attempt. Do it once here, so we can warn the user at start-up if we can't * read the key. We don't error out, though, because it's fine if the key is * going to be added later. */ if (lrmd__init_remote_key(&psk_key) != pcmk_rc_ok) { crm_warn("A cluster connection will not be possible until the key is available"); } gnutls_free(psk_key.data); } if (get_address_info(bind_name, port, &res) != pcmk_rc_ok) { return -1; } /* Currently we listen on only one address from the resulting list (the * first IPv6 address we can bind to if possible, otherwise the first IPv4 * address we can bind to). When bind_name is NULL, this should be the * respective wildcard address. * * @TODO If there is demand for specifying more than one address, allow * bind_name to be a space-separated list, call getaddrinfo() for each, * and create a socket for each result (set IPV6_V6ONLY on IPv6 sockets * since IPv4 listeners will have their own sockets). */ iter = res; filter = AF_INET6; while (iter) { if (iter->ai_family == filter) { ssock = bind_and_listen(iter); } if (ssock >= 0) { break; } iter = iter->ai_next; if (iter == NULL && filter == AF_INET6) { iter = res; filter = AF_INET; } } if (ssock >= 0) { mainloop_add_fd("pacemaker-remote-server", G_PRIORITY_DEFAULT, ssock, NULL, &remote_listen_fd_callbacks); crm_debug("Started TLS listener on %s port %d", (bind_name? bind_name : "all addresses on"), port); } freeaddrinfo(res); return ssock; } void execd_stop_tls_server(void) { if (tls != NULL) { pcmk__free_tls(tls); tls = NULL; } if (ssock >= 0) { close(ssock); ssock = -1; } } diff --git a/daemons/fenced/fenced_cib.c b/daemons/fenced/fenced_cib.c index 59e68a722e..9e7f7cc7ae 100644 --- a/daemons/fenced/fenced_cib.c +++ b/daemons/fenced/fenced_cib.c @@ -1,621 +1,621 @@ /* * Copyright 2009-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include // xmlNode #include // xmlXPathObject, etc. #include #include #include #include #include #include static xmlNode *local_cib = NULL; static cib_t *cib_api = NULL; static bool have_cib_devices = FALSE; /*! * \internal * \brief Check whether a node has a specific attribute name/value * * \param[in] node Name of node to check * \param[in] name Name of an attribute to look for * \param[in] value The value the named attribute needs to be set to in order to be considered a match * * \return TRUE if the locally cached CIB has the specified node attribute */ gboolean node_has_attr(const char *node, const char *name, const char *value) { GString *xpath = NULL; xmlNode *match; CRM_CHECK((local_cib != NULL) && (node != NULL) && (name != NULL) && (value != NULL), return FALSE); /* Search for the node's attributes in the CIB. While the schema allows * multiple sets of instance attributes, and allows instance attributes to * use id-ref to reference values elsewhere, that is intended for resources, * so we ignore that here. */ xpath = g_string_sized_new(256); pcmk__g_strcat(xpath, "//" PCMK_XE_NODES "/" PCMK_XE_NODE "[@" PCMK_XA_UNAME "='", node, "']" "/" PCMK_XE_INSTANCE_ATTRIBUTES "/" PCMK_XE_NVPAIR "[@" PCMK_XA_NAME "='", name, "' " "and @" PCMK_XA_VALUE "='", value, "']", NULL); match = pcmk__xpath_find_one(local_cib->doc, xpath->str, LOG_NEVER); g_string_free(xpath, TRUE); return (match != NULL); } static void add_topology_level(xmlNode *match) { char *desc = NULL; pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; CRM_CHECK(match != NULL, return); fenced_register_level(match, &desc, &result); fenced_send_config_notification(STONITH_OP_LEVEL_ADD, &result, desc); pcmk__reset_result(&result); free(desc); } static void topology_remove_helper(const char *node, int level) { char *desc = NULL; pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; xmlNode *data = pcmk__xe_create(NULL, PCMK_XE_FENCING_LEVEL); crm_xml_add(data, PCMK__XA_ST_ORIGIN, __func__); - crm_xml_add_int(data, PCMK_XA_INDEX, level); + pcmk__xe_set_int(data, PCMK_XA_INDEX, level); crm_xml_add(data, PCMK_XA_TARGET, node); fenced_unregister_level(data, &desc, &result); fenced_send_config_notification(STONITH_OP_LEVEL_DEL, &result, desc); pcmk__reset_result(&result); pcmk__xml_free(data); free(desc); } static void remove_topology_level(xmlNode *match) { int index = 0; char *key = NULL; CRM_CHECK(match != NULL, return); key = stonith_level_key(match, fenced_target_by_unknown); pcmk__xe_get_int(match, PCMK_XA_INDEX, &index); topology_remove_helper(key, index); free(key); } static void register_fencing_topology(xmlXPathObjectPtr xpathObj) { int max = pcmk__xpath_num_results(xpathObj); for (int lpc = 0; lpc < max; lpc++) { xmlNode *match = pcmk__xpath_result(xpathObj, lpc); if (match == NULL) { continue; } remove_topology_level(match); add_topology_level(match); } } /* Fencing */ void fencing_topology_init(void) { xmlXPathObject *xpathObj = NULL; const char *xpath = "//" PCMK_XE_FENCING_LEVEL; crm_trace("Full topology refresh"); free_topology_list(); init_topology_list(); /* Grab everything */ xpathObj = pcmk__xpath_search(local_cib->doc, xpath); register_fencing_topology(xpathObj); xmlXPathFreeObject(xpathObj); } #define XPATH_WATCHDOG_TIMEOUT "//" PCMK_XE_NVPAIR \ "[@" PCMK_XA_NAME "='" \ PCMK_OPT_STONITH_WATCHDOG_TIMEOUT "']" static void update_stonith_watchdog_timeout_ms(xmlNode *cib) { long long timeout_ms = 0; xmlNode *stonith_watchdog_xml = NULL; const char *value = NULL; // @TODO An XPath search can't handle multiple instances or rules stonith_watchdog_xml = pcmk__xpath_find_one(cib->doc, XPATH_WATCHDOG_TIMEOUT, LOG_NEVER); if (stonith_watchdog_xml) { value = pcmk__xe_get(stonith_watchdog_xml, PCMK_XA_VALUE); } if (value) { timeout_ms = crm_get_msec(value); } if (timeout_ms < 0) { timeout_ms = pcmk__auto_stonith_watchdog_timeout(); } stonith_watchdog_timeout_ms = timeout_ms; } /*! * \internal * \brief Update all STONITH device definitions based on current CIB */ static void cib_devices_update(void) { GHashTableIter iter; stonith_device_t *device = NULL; crm_info("Updating devices to version %s.%s.%s", pcmk__xe_get(local_cib, PCMK_XA_ADMIN_EPOCH), pcmk__xe_get(local_cib, PCMK_XA_EPOCH), pcmk__xe_get(local_cib, PCMK_XA_NUM_UPDATES)); g_hash_table_iter_init(&iter, device_list); while (g_hash_table_iter_next(&iter, NULL, (void **)&device)) { if (device->cib_registered) { device->dirty = TRUE; } } /* have list repopulated if cib has a watchdog-fencing-resource TODO: keep a cached list for queries happening while we are refreshing */ g_list_free_full(stonith_watchdog_targets, free); stonith_watchdog_targets = NULL; fenced_scheduler_run(local_cib); g_hash_table_iter_init(&iter, device_list); while (g_hash_table_iter_next(&iter, NULL, (void **)&device)) { if (device->dirty) { g_hash_table_iter_remove(&iter); } } } static void update_cib_stonith_devices(const char *event, xmlNode * msg) { int format = 1; xmlNode *wrapper = pcmk__xe_first_child(msg, PCMK__XE_CIB_UPDATE_RESULT, NULL, NULL); xmlNode *patchset = pcmk__xe_first_child(wrapper, NULL, NULL, NULL); char *reason = NULL; CRM_CHECK(patchset != NULL, return); pcmk__xe_get_int(patchset, PCMK_XA_FORMAT, &format); if (format != 2) { crm_warn("Unknown patch format: %d", format); return; } for (xmlNode *change = pcmk__xe_first_child(patchset, NULL, NULL, NULL); change != NULL; change = pcmk__xe_next(change, NULL)) { const char *op = pcmk__xe_get(change, PCMK_XA_OPERATION); const char *xpath = pcmk__xe_get(change, PCMK_XA_PATH); const char *shortpath = NULL; if (pcmk__str_eq(op, PCMK_VALUE_MOVE, pcmk__str_null_matches) || (strstr(xpath, "/" PCMK_XE_STATUS) != NULL)) { continue; } if (pcmk__str_eq(op, PCMK_VALUE_DELETE, pcmk__str_none) && (strstr(xpath, "/" PCMK_XE_PRIMITIVE) != NULL)) { const char *rsc_id = NULL; char *search = NULL; char *mutable = NULL; if ((strstr(xpath, PCMK_XE_INSTANCE_ATTRIBUTES) != NULL) || (strstr(xpath, PCMK_XE_META_ATTRIBUTES) != NULL)) { reason = pcmk__str_copy("(meta) attribute deleted from " "resource"); break; } mutable = pcmk__str_copy(xpath); rsc_id = strstr(mutable, PCMK_XE_PRIMITIVE "[@" PCMK_XA_ID "=\'"); if (rsc_id != NULL) { rsc_id += strlen(PCMK_XE_PRIMITIVE "[@" PCMK_XA_ID "=\'"); search = strchr(rsc_id, '\''); } if (search != NULL) { *search = 0; stonith_device_remove(rsc_id, true); /* watchdog_device_update called afterwards to fall back to implicit definition if needed */ } else { crm_warn("Ignoring malformed CIB update (resource deletion)"); } free(mutable); } else if (strstr(xpath, "/" PCMK_XE_RESOURCES) || strstr(xpath, "/" PCMK_XE_CONSTRAINTS) || strstr(xpath, "/" PCMK_XE_RSC_DEFAULTS)) { shortpath = strrchr(xpath, '/'); pcmk__assert(shortpath != NULL); reason = crm_strdup_printf("%s %s", op, shortpath+1); break; } } if (reason != NULL) { crm_info("Updating device list from CIB: %s", reason); cib_devices_update(); free(reason); } else { crm_trace("No updates for device list found in CIB"); } } static void watchdog_device_update(void) { if (stonith_watchdog_timeout_ms > 0) { if (!g_hash_table_lookup(device_list, STONITH_WATCHDOG_ID) && !stonith_watchdog_targets) { /* getting here watchdog-fencing enabled, no device there yet and reason isn't stonith_watchdog_targets preventing that */ int rc; xmlNode *xml; xml = create_device_registration_xml( STONITH_WATCHDOG_ID, st_namespace_internal, STONITH_WATCHDOG_AGENT, NULL, /* stonith_device_register will add our own name as PCMK_STONITH_HOST_LIST param so we can skip that here */ NULL); rc = stonith_device_register(xml, TRUE); pcmk__xml_free(xml); if (rc != pcmk_ok) { rc = pcmk_legacy2rc(rc); exit_code = CRM_EX_FATAL; crm_crit("Cannot register watchdog pseudo fence agent: %s", pcmk_rc_str(rc)); stonith_shutdown(0); } } } else if (g_hash_table_lookup(device_list, STONITH_WATCHDOG_ID) != NULL) { /* be silent if no device - todo parameter to stonith_device_remove */ stonith_device_remove(STONITH_WATCHDOG_ID, true); } } /*! * \internal * \brief Query the full CIB * * \return Standard Pacemaker return code */ static int fenced_query_cib(void) { int rc = pcmk_ok; crm_trace("Re-requesting full CIB"); rc = cib_api->cmds->query(cib_api, NULL, &local_cib, cib_sync_call); rc = pcmk_legacy2rc(rc); if (rc == pcmk_rc_ok) { pcmk__assert(local_cib != NULL); } else { crm_err("Couldn't retrieve the CIB: %s " QB_XS " rc=%d", pcmk_rc_str(rc), rc); } return rc; } static void update_fencing_topology(const char *event, xmlNode *msg) { xmlNode *wrapper = pcmk__xe_first_child(msg, PCMK__XE_CIB_UPDATE_RESULT, NULL, NULL); xmlNode *patchset = pcmk__xe_first_child(wrapper, NULL, NULL, NULL); int format = 1; int add[] = { 0, 0, 0 }; int del[] = { 0, 0, 0 }; CRM_CHECK(patchset != NULL, return); pcmk__xe_get_int(patchset, PCMK_XA_FORMAT, &format); if (format != 2) { crm_warn("Unknown patch format: %d", format); return; } pcmk__xml_patchset_versions(patchset, del, add); for (xmlNode *change = pcmk__xe_first_child(patchset, NULL, NULL, NULL); change != NULL; change = pcmk__xe_next(change, NULL)) { const char *op = pcmk__xe_get(change, PCMK_XA_OPERATION); const char *xpath = pcmk__xe_get(change, PCMK_XA_PATH); if (op == NULL) { continue; } if (strstr(xpath, "/" PCMK_XE_FENCING_LEVEL) != NULL) { // Change to a specific entry crm_trace("Handling %s operation %d.%d.%d for %s", op, add[0], add[1], add[2], xpath); if (strcmp(op, PCMK_VALUE_DELETE) == 0) { /* We have only path and ID, which is not enough info to remove * a specific entry. Re-initialize the whole topology. */ crm_info("Re-initializing fencing topology after %s operation " "%d.%d.%d for %s", op, add[0], add[1], add[2], xpath); fencing_topology_init(); return; } if (strcmp(op, PCMK_VALUE_CREATE) == 0) { add_topology_level(change->children); } else if (strcmp(op, PCMK_VALUE_MODIFY) == 0) { xmlNode *match = pcmk__xe_first_child(change, PCMK_XE_CHANGE_RESULT, NULL, NULL); if (match != NULL) { remove_topology_level(match->children); add_topology_level(match->children); } } continue; } if (strstr(xpath, "/" PCMK_XE_FENCING_TOPOLOGY) != NULL) { // Change to the topology in general crm_info("Re-initializing fencing topology after top-level " "%s operation %d.%d.%d for %s", op, add[0], add[1], add[2], xpath); fencing_topology_init(); return; } if ((strstr(xpath, "/" PCMK_XE_CONFIGURATION) != NULL) && (pcmk__xe_first_child(change, PCMK_XE_FENCING_TOPOLOGY, NULL, NULL) != NULL) && pcmk__str_any_of(op, PCMK_VALUE_CREATE, PCMK_VALUE_DELETE, NULL)) { // Topology was created or entire configuration section was deleted crm_info("Re-initializing fencing topology after top-level " "%s operation %d.%d.%d for %s", op, add[0], add[1], add[2], xpath); fencing_topology_init(); return; } crm_trace("Nothing for us in %s operation %d.%d.%d for %s", op, add[0], add[1], add[2], xpath); } } static void update_cib_cache_cb(const char *event, xmlNode * msg) { long long timeout_ms_saved = stonith_watchdog_timeout_ms; bool need_full_refresh = false; if(!have_cib_devices) { crm_trace("Skipping updates until we get a full dump"); return; } else if(msg == NULL) { crm_trace("Missing %s update", event); return; } /* Maintain a local copy of the CIB so that we have full access * to device definitions, location constraints, and node attributes */ if (local_cib != NULL) { int rc = pcmk_ok; xmlNode *wrapper = NULL; xmlNode *patchset = NULL; pcmk__xe_get_int(msg, PCMK__XA_CIB_RC, &rc); if (rc != pcmk_ok) { return; } wrapper = pcmk__xe_first_child(msg, PCMK__XE_CIB_UPDATE_RESULT, NULL, NULL); patchset = pcmk__xe_first_child(wrapper, NULL, NULL, NULL); rc = xml_apply_patchset(local_cib, patchset, TRUE); switch (rc) { case pcmk_ok: case -pcmk_err_old_data: break; case -pcmk_err_diff_resync: case -pcmk_err_diff_failed: crm_notice("[%s] Patch aborted: %s (%d)", event, pcmk_strerror(rc), rc); pcmk__xml_free(local_cib); local_cib = NULL; break; default: crm_warn("[%s] ABORTED: %s (%d)", event, pcmk_strerror(rc), rc); pcmk__xml_free(local_cib); local_cib = NULL; } } if (local_cib == NULL) { if (fenced_query_cib() != pcmk_rc_ok) { return; } need_full_refresh = true; } pcmk__refresh_node_caches_from_cib(local_cib); update_stonith_watchdog_timeout_ms(local_cib); if (timeout_ms_saved != stonith_watchdog_timeout_ms) { need_full_refresh = true; } if (need_full_refresh) { fencing_topology_init(); cib_devices_update(); } else { // Partial refresh update_fencing_topology(event, msg); update_cib_stonith_devices(event, msg); } watchdog_device_update(); } static void init_cib_cache_cb(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { crm_info("Updating device list from CIB"); have_cib_devices = TRUE; local_cib = pcmk__xml_copy(NULL, output); pcmk__refresh_node_caches_from_cib(local_cib); update_stonith_watchdog_timeout_ms(local_cib); fencing_topology_init(); cib_devices_update(); watchdog_device_update(); } static void cib_connection_destroy(gpointer user_data) { if (stonith_shutdown_flag) { crm_info("Connection to the CIB manager closed"); return; } else { crm_crit("Lost connection to the CIB manager, shutting down"); } if (cib_api) { cib_api->cmds->signoff(cib_api); } stonith_shutdown(0); } /*! * \internal * \brief Disconnect from CIB manager */ void fenced_cib_cleanup(void) { if (cib_api != NULL) { cib_api->cmds->del_notify_callback(cib_api, PCMK__VALUE_CIB_DIFF_NOTIFY, update_cib_cache_cb); cib__clean_up_connection(&cib_api); } pcmk__xml_free(local_cib); local_cib = NULL; } void setup_cib(void) { int rc, retries = 0; cib_api = cib_new(); if (cib_api == NULL) { crm_err("No connection to the CIB manager"); return; } do { sleep(retries); rc = cib_api->cmds->signon(cib_api, crm_system_name, cib_command); } while (rc == -ENOTCONN && ++retries < 5); if (rc != pcmk_ok) { crm_err("Could not connect to the CIB manager: %s (%d)", pcmk_strerror(rc), rc); return; } rc = cib_api->cmds->add_notify_callback(cib_api, PCMK__VALUE_CIB_DIFF_NOTIFY, update_cib_cache_cb); if (rc != pcmk_ok) { crm_err("Could not set CIB notification callback"); return; } rc = cib_api->cmds->query(cib_api, NULL, NULL, cib_none); cib_api->cmds->register_callback(cib_api, rc, 120, FALSE, NULL, "init_cib_cache_cb", init_cib_cache_cb); cib_api->cmds->set_connection_dnotify(cib_api, cib_connection_destroy); crm_info("Watching for fencing topology changes"); } diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c index f2f519e868..aee42dee9c 100644 --- a/daemons/fenced/fenced_commands.c +++ b/daemons/fenced/fenced_commands.c @@ -1,3665 +1,3665 @@ /* * Copyright 2009-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include // xmlNode #include // xmlXPathObject, etc. #include #include #include #include #include #include #include #include #include GHashTable *device_list = NULL; GHashTable *topology = NULL; static GList *cmd_list = NULL; static GHashTable *fenced_handlers = NULL; struct device_search_s { /* target of fence action */ char *host; /* requested fence action */ char *action; /* timeout to use if a device is queried dynamically for possible targets */ // @TODO This name is misleading now, it's the value of stonith-timeout int per_device_timeout; /* number of registered fencing devices at time of request */ int replies_needed; /* number of device replies received so far */ int replies_received; /* whether the target is eligible to perform requested action (or off) */ bool allow_self; /* private data to pass to search callback function */ void *user_data; /* function to call when all replies have been received */ void (*callback) (GList * devices, void *user_data); /* devices capable of performing requested action (or off if remapping) */ GList *capable; /* Whether to perform searches that support the action */ uint32_t support_action_only; }; static gboolean stonith_device_dispatch(gpointer user_data); static void st_child_done(int pid, const pcmk__action_result_t *result, void *user_data); static void search_devices_record_result(struct device_search_s *search, const char *device, gboolean can_fence); static int get_agent_metadata(const char *agent, xmlNode **metadata); static void read_action_metadata(stonith_device_t *device); static enum fenced_target_by unpack_level_kind(const xmlNode *level); typedef struct async_command_s { int id; int pid; int fd_stdout; uint32_t options; int default_timeout; /* seconds */ int timeout; /* seconds */ int start_delay; // seconds (-1 means disable static/random fencing delays) int delay_id; char *op; char *origin; char *client; char *client_name; char *remote_op_id; char *target; uint32_t target_nodeid; char *action; char *device; GList *device_list; GList *next_device_iter; // device_list entry for next device to execute void *internal_user_data; void (*done_cb) (int pid, const pcmk__action_result_t *result, void *user_data); guint timer_sigterm; guint timer_sigkill; /*! If the operation timed out, this is the last signal * we sent to the process to get it to terminate */ int last_timeout_signo; stonith_device_t *active_on; stonith_device_t *activating_on; } async_command_t; static xmlNode *construct_async_reply(const async_command_t *cmd, const pcmk__action_result_t *result); static gboolean is_action_required(const char *action, const stonith_device_t *device) { return (device != NULL) && device->automatic_unfencing && pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none); } static int get_action_delay_max(const stonith_device_t *device, const char *action) { const char *value = NULL; guint delay_max = 0U; if (!pcmk__is_fencing_action(action)) { return 0; } value = g_hash_table_lookup(device->params, PCMK_STONITH_DELAY_MAX); if (value) { pcmk_parse_interval_spec(value, &delay_max); delay_max /= 1000; } return (int) delay_max; } static int get_action_delay_base(const stonith_device_t *device, const char *action, const char *target) { char *hash_value = NULL; guint delay_base = 0U; if (!pcmk__is_fencing_action(action)) { return 0; } hash_value = g_hash_table_lookup(device->params, PCMK_STONITH_DELAY_BASE); if (hash_value) { char *value = pcmk__str_copy(hash_value); char *valptr = value; if (target != NULL) { for (char *val = strtok(value, "; \t"); val != NULL; val = strtok(NULL, "; \t")) { char *mapval = strchr(val, ':'); if (mapval == NULL || mapval[1] == 0) { crm_err("pcmk_delay_base: empty value in mapping", val); continue; } if (mapval != val && strncasecmp(target, val, (size_t)(mapval - val)) == 0) { value = mapval + 1; crm_debug("pcmk_delay_base mapped to %s for %s", value, target); break; } } } if (strchr(value, ':') == 0) { pcmk_parse_interval_spec(value, &delay_base); delay_base /= 1000; } free(valptr); } return (int) delay_base; } /*! * \internal * \brief Override STONITH timeout with pcmk_*_timeout if available * * \param[in] device STONITH device to use * \param[in] action STONITH action name * \param[in] default_timeout Timeout to use if device does not have * a pcmk_*_timeout parameter for action * * \return Value of pcmk_(action)_timeout if available, otherwise default_timeout * \note For consistency, it would be nice if reboot/off/on timeouts could be * set the same way as start/stop/monitor timeouts, i.e. with an * entry in the fencing resource configuration. However that * is insufficient because fencing devices may be registered directly via * the fencer's register_device() API instead of going through the CIB * (e.g. stonith_admin uses it for its -R option, and the executor uses it * to ensure a device is registered when a command is issued). As device * properties, pcmk_*_timeout parameters can be grabbed by the fencer when * the device is registered, whether by CIB change or API call. */ static int get_action_timeout(const stonith_device_t *device, const char *action, int default_timeout) { if (action && device && device->params) { char buffer[64] = { 0, }; const char *value = NULL; /* If "reboot" was requested but the device does not support it, * we will remap to "off", so check timeout for "off" instead */ if (pcmk__str_eq(action, PCMK_ACTION_REBOOT, pcmk__str_none) && !pcmk_is_set(device->flags, st_device_supports_reboot)) { crm_trace("%s doesn't support reboot, using timeout for off instead", device->id); action = PCMK_ACTION_OFF; } /* If the device config specified an action-specific timeout, use it */ snprintf(buffer, sizeof(buffer), "pcmk_%s_timeout", action); value = g_hash_table_lookup(device->params, buffer); if (value) { long long timeout_ms = crm_get_msec(value); return (int) QB_MIN(pcmk__timeout_ms2s(timeout_ms), INT_MAX); } } return default_timeout; } /*! * \internal * \brief Get the currently executing device for a fencing operation * * \param[in] cmd Fencing operation to check * * \return Currently executing device for \p cmd if any, otherwise NULL */ static stonith_device_t * cmd_device(const async_command_t *cmd) { if ((cmd == NULL) || (cmd->device == NULL) || (device_list == NULL)) { return NULL; } return g_hash_table_lookup(device_list, cmd->device); } /*! * \internal * \brief Return the configured reboot action for a given device * * \param[in] device_id Device ID * * \return Configured reboot action for \p device_id */ const char * fenced_device_reboot_action(const char *device_id) { const char *action = NULL; if ((device_list != NULL) && (device_id != NULL)) { stonith_device_t *device = g_hash_table_lookup(device_list, device_id); if ((device != NULL) && (device->params != NULL)) { action = g_hash_table_lookup(device->params, "pcmk_reboot_action"); } } return pcmk__s(action, PCMK_ACTION_REBOOT); } /*! * \internal * \brief Check whether a given device supports the "on" action * * \param[in] device_id Device ID * * \return true if \p device_id supports "on", otherwise false */ bool fenced_device_supports_on(const char *device_id) { if ((device_list != NULL) && (device_id != NULL)) { stonith_device_t *device = g_hash_table_lookup(device_list, device_id); if (device != NULL) { return pcmk_is_set(device->flags, st_device_supports_on); } } return false; } static void free_async_command(async_command_t * cmd) { if (!cmd) { return; } if (cmd->delay_id) { g_source_remove(cmd->delay_id); } cmd_list = g_list_remove(cmd_list, cmd); g_list_free_full(cmd->device_list, free); free(cmd->device); free(cmd->action); free(cmd->target); free(cmd->remote_op_id); free(cmd->client); free(cmd->client_name); free(cmd->origin); free(cmd->op); free(cmd); } /*! * \internal * \brief Create a new asynchronous fencing operation from request XML * * \param[in] msg Fencing request XML (from IPC or CPG) * * \return Newly allocated fencing operation on success, otherwise NULL * * \note This asserts on memory errors, so a NULL return indicates an * unparseable message. */ static async_command_t * create_async_command(xmlNode *msg) { xmlNode *op = NULL; async_command_t *cmd = NULL; int rc = pcmk_rc_ok; if (msg == NULL) { return NULL; } op = pcmk__xpath_find_one(msg->doc, "//*[@" PCMK__XA_ST_DEVICE_ACTION "]", LOG_ERR); if (op == NULL) { return NULL; } cmd = pcmk__assert_alloc(1, sizeof(async_command_t)); // All messages must include these cmd->action = pcmk__xe_get_copy(op, PCMK__XA_ST_DEVICE_ACTION); cmd->op = pcmk__xe_get_copy(msg, PCMK__XA_ST_OP); cmd->client = pcmk__xe_get_copy(msg, PCMK__XA_ST_CLIENTID); if ((cmd->action == NULL) || (cmd->op == NULL) || (cmd->client == NULL)) { free_async_command(cmd); return NULL; } pcmk__xe_get_int(msg, PCMK__XA_ST_CALLID, &(cmd->id)); pcmk__xe_get_int(msg, PCMK__XA_ST_DELAY, &(cmd->start_delay)); pcmk__xe_get_int(msg, PCMK__XA_ST_TIMEOUT, &(cmd->default_timeout)); cmd->timeout = cmd->default_timeout; rc = pcmk__xe_get_flags(msg, PCMK__XA_ST_CALLOPT, &(cmd->options), st_opt_none); if (rc != pcmk_rc_ok) { crm_warn("Couldn't parse options from request: %s", pcmk_rc_str(rc)); } cmd->origin = pcmk__xe_get_copy(msg, PCMK__XA_SRC); cmd->remote_op_id = pcmk__xe_get_copy(msg, PCMK__XA_ST_REMOTE_OP); cmd->client_name = pcmk__xe_get_copy(msg, PCMK__XA_ST_CLIENTNAME); cmd->target = pcmk__xe_get_copy(op, PCMK__XA_ST_TARGET); cmd->device = pcmk__xe_get_copy(op, PCMK__XA_ST_DEVICE_ID); cmd->done_cb = st_child_done; // Track in global command list cmd_list = g_list_append(cmd_list, cmd); return cmd; } static int get_action_limit(stonith_device_t * device) { const char *value = NULL; int action_limit = 1; value = g_hash_table_lookup(device->params, PCMK_STONITH_ACTION_LIMIT); if ((value == NULL) || (pcmk__scan_min_int(value, &action_limit, INT_MIN) != pcmk_rc_ok) || (action_limit == 0)) { action_limit = 1; } return action_limit; } static int get_active_cmds(stonith_device_t * device) { int counter = 0; GList *gIter = NULL; GList *gIterNext = NULL; CRM_CHECK(device != NULL, return 0); for (gIter = cmd_list; gIter != NULL; gIter = gIterNext) { async_command_t *cmd = gIter->data; gIterNext = gIter->next; if (cmd->active_on == device) { counter++; } } return counter; } static void fork_cb(int pid, void *user_data) { async_command_t *cmd = (async_command_t *) user_data; stonith_device_t * device = /* in case of a retry we've done the move from activating_on to active_on already */ cmd->activating_on?cmd->activating_on:cmd->active_on; pcmk__assert(device != NULL); crm_debug("Operation '%s' [%d]%s%s using %s now running with %ds timeout", cmd->action, pid, ((cmd->target == NULL)? "" : " targeting "), pcmk__s(cmd->target, ""), device->id, cmd->timeout); cmd->active_on = device; cmd->activating_on = NULL; } static int get_agent_metadata_cb(gpointer data) { stonith_device_t *device = data; guint period_ms; switch (get_agent_metadata(device->agent, &device->agent_metadata)) { case pcmk_rc_ok: if (device->agent_metadata) { read_action_metadata(device); stonith__device_parameter_flags(&(device->flags), device->id, device->agent_metadata); } return G_SOURCE_REMOVE; case EAGAIN: period_ms = pcmk__mainloop_timer_get_period(device->timer); if (period_ms < 160 * 1000) { mainloop_timer_set_period(device->timer, 2 * period_ms); } return G_SOURCE_CONTINUE; default: return G_SOURCE_REMOVE; } } /*! * \internal * \brief Call a command's action callback for an internal (not library) result * * \param[in,out] cmd Command to report result for * \param[in] execution_status Execution status to use for result * \param[in] exit_status Exit status to use for result * \param[in] exit_reason Exit reason to use for result */ static void report_internal_result(async_command_t *cmd, int exit_status, int execution_status, const char *exit_reason) { pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; pcmk__set_result(&result, exit_status, execution_status, exit_reason); cmd->done_cb(0, &result, cmd); pcmk__reset_result(&result); } static gboolean stonith_device_execute(stonith_device_t * device) { int exec_rc = 0; const char *action_str = NULL; const char *host_arg = NULL; async_command_t *cmd = NULL; stonith_action_t *action = NULL; int active_cmds = 0; int action_limit = 0; GList *gIter = NULL; GList *gIterNext = NULL; CRM_CHECK(device != NULL, return FALSE); active_cmds = get_active_cmds(device); action_limit = get_action_limit(device); if (action_limit > -1 && active_cmds >= action_limit) { crm_trace("%s is over its action limit of %d (%u active action%s)", device->id, action_limit, active_cmds, pcmk__plural_s(active_cmds)); return TRUE; } for (gIter = device->pending_ops; gIter != NULL; gIter = gIterNext) { async_command_t *pending_op = gIter->data; gIterNext = gIter->next; if (pending_op && pending_op->delay_id) { crm_trace("Operation '%s'%s%s using %s was asked to run too early, " "waiting for start delay of %ds", pending_op->action, ((pending_op->target == NULL)? "" : " targeting "), pcmk__s(pending_op->target, ""), device->id, pending_op->start_delay); continue; } device->pending_ops = g_list_remove_link(device->pending_ops, gIter); g_list_free_1(gIter); cmd = pending_op; break; } if (cmd == NULL) { crm_trace("No actions using %s are needed", device->id); return TRUE; } if (pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT, STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) { if (pcmk__is_fencing_action(cmd->action)) { if (node_does_watchdog_fencing(fenced_get_local_node())) { pcmk__panic("Watchdog self-fencing required"); goto done; } } else { crm_info("Faking success for %s watchdog operation", cmd->action); report_internal_result(cmd, CRM_EX_OK, PCMK_EXEC_DONE, NULL); goto done; } } #if PCMK__ENABLE_CIBSECRETS exec_rc = pcmk__substitute_secrets(device->id, device->params); if (exec_rc != pcmk_rc_ok) { if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP, pcmk__str_none)) { crm_info("Proceeding with stop operation for %s " "despite being unable to load CIB secrets (%s)", device->id, pcmk_rc_str(exec_rc)); } else { crm_err("Considering %s unconfigured " "because unable to load CIB secrets: %s", device->id, pcmk_rc_str(exec_rc)); report_internal_result(cmd, CRM_EX_ERROR, PCMK_EXEC_NO_SECRETS, "Failed to get CIB secrets"); goto done; } } #endif action_str = cmd->action; if (pcmk__str_eq(cmd->action, PCMK_ACTION_REBOOT, pcmk__str_none) && !pcmk_is_set(device->flags, st_device_supports_reboot)) { crm_notice("Remapping 'reboot' action%s%s using %s to 'off' " "because agent '%s' does not support reboot", ((cmd->target == NULL)? "" : " targeting "), pcmk__s(cmd->target, ""), device->id, device->agent); action_str = PCMK_ACTION_OFF; } if (pcmk_is_set(device->flags, st_device_supports_parameter_port)) { host_arg = "port"; } else if (pcmk_is_set(device->flags, st_device_supports_parameter_plug)) { host_arg = "plug"; } action = stonith__action_create(device->agent, action_str, cmd->target, cmd->target_nodeid, cmd->timeout, device->params, device->aliases, host_arg); /* for async exec, exec_rc is negative for early error exit otherwise handling of success/errors is done via callbacks */ cmd->activating_on = device; exec_rc = stonith__execute_async(action, (void *)cmd, cmd->done_cb, fork_cb); if (exec_rc < 0) { cmd->activating_on = NULL; cmd->done_cb(0, stonith__action_result(action), cmd); stonith__destroy_action(action); } done: /* Device might get triggered to work by multiple fencing commands * simultaneously. Trigger the device again to make sure any * remaining concurrent commands get executed. */ if (device->pending_ops) { mainloop_set_trigger(device->work); } return TRUE; } static gboolean stonith_device_dispatch(gpointer user_data) { return stonith_device_execute(user_data); } static gboolean start_delay_helper(gpointer data) { async_command_t *cmd = data; stonith_device_t *device = cmd_device(cmd); cmd->delay_id = 0; if (device) { mainloop_set_trigger(device->work); } return FALSE; } static void schedule_stonith_command(async_command_t * cmd, stonith_device_t * device) { int delay_max = 0; int delay_base = 0; int requested_delay = cmd->start_delay; CRM_CHECK(cmd != NULL, return); CRM_CHECK(device != NULL, return); if (cmd->device) { free(cmd->device); } if (device->include_nodeid && (cmd->target != NULL)) { pcmk__node_status_t *node = pcmk__get_node(0, cmd->target, NULL, pcmk__node_search_cluster_member); cmd->target_nodeid = node->cluster_layer_id; } cmd->device = pcmk__str_copy(device->id); cmd->timeout = get_action_timeout(device, cmd->action, cmd->default_timeout); if (cmd->remote_op_id) { crm_debug("Scheduling '%s' action%s%s using %s for remote peer %s " "with op id %.8s and timeout %ds", cmd->action, (cmd->target == NULL)? "" : " targeting ", pcmk__s(cmd->target, ""), device->id, cmd->origin, cmd->remote_op_id, cmd->timeout); } else { crm_debug("Scheduling '%s' action%s%s using %s for %s with timeout %ds", cmd->action, (cmd->target == NULL)? "" : " targeting ", pcmk__s(cmd->target, ""), device->id, cmd->client, cmd->timeout); } device->pending_ops = g_list_append(device->pending_ops, cmd); mainloop_set_trigger(device->work); // Value -1 means disable any static/random fencing delays if (requested_delay < 0) { return; } delay_max = get_action_delay_max(device, cmd->action); delay_base = get_action_delay_base(device, cmd->action, cmd->target); if (delay_max == 0) { delay_max = delay_base; } if (delay_max < delay_base) { crm_warn(PCMK_STONITH_DELAY_BASE " (%ds) is larger than " PCMK_STONITH_DELAY_MAX " (%ds) for %s using %s " "(limiting to maximum delay)", delay_base, delay_max, cmd->action, device->id); delay_base = delay_max; } if (delay_max > 0) { // coverity[dontcall] It doesn't matter here if rand() is predictable cmd->start_delay += ((delay_max != delay_base)?(rand() % (delay_max - delay_base)):0) + delay_base; } if (cmd->start_delay > 0) { crm_notice("Delaying '%s' action%s%s using %s for %ds " QB_XS " timeout=%ds requested_delay=%ds base=%ds max=%ds", cmd->action, (cmd->target == NULL)? "" : " targeting ", pcmk__s(cmd->target, ""), device->id, cmd->start_delay, cmd->timeout, requested_delay, delay_base, delay_max); cmd->delay_id = pcmk__create_timer(cmd->start_delay * 1000, start_delay_helper, cmd); } } static void free_device(gpointer data) { GList *gIter = NULL; stonith_device_t *device = data; g_hash_table_destroy(device->params); g_hash_table_destroy(device->aliases); for (gIter = device->pending_ops; gIter != NULL; gIter = gIter->next) { async_command_t *cmd = gIter->data; crm_warn("Removal of device '%s' purged operation '%s'", device->id, cmd->action); report_internal_result(cmd, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, "Device was removed before action could be executed"); } g_list_free(device->pending_ops); g_list_free_full(device->targets, free); if (device->timer) { mainloop_timer_stop(device->timer); mainloop_timer_del(device->timer); } mainloop_destroy_trigger(device->work); pcmk__xml_free(device->agent_metadata); free(device->namespace); if (device->on_target_actions != NULL) { g_string_free(device->on_target_actions, TRUE); } free(device->agent); free(device->id); free(device); } void free_device_list(void) { if (device_list != NULL) { g_hash_table_destroy(device_list); device_list = NULL; } } void init_device_list(void) { if (device_list == NULL) { device_list = pcmk__strkey_table(NULL, free_device); } } static GHashTable * build_port_aliases(const char *hostmap, GList ** targets) { char *name = NULL; int last = 0, lpc = 0, max = 0, added = 0; GHashTable *aliases = pcmk__strikey_table(free, free); if (hostmap == NULL) { return aliases; } max = strlen(hostmap); for (; lpc <= max; lpc++) { switch (hostmap[lpc]) { /* Skip escaped chars */ case '\\': lpc++; break; /* Assignment chars */ case '=': case ':': if (lpc > last) { free(name); name = pcmk__assert_alloc(1, 1 + lpc - last); memcpy(name, hostmap + last, lpc - last); } last = lpc + 1; break; /* Delimeter chars */ /* case ',': Potentially used to specify multiple ports */ case 0: case ';': case ' ': case '\t': if (name) { char *value = NULL; int k = 0; value = pcmk__assert_alloc(1, 1 + lpc - last); memcpy(value, hostmap + last, lpc - last); for (int i = 0; value[i] != '\0'; i++) { if (value[i] != '\\') { value[k++] = value[i]; } } value[k] = '\0'; crm_debug("Adding alias '%s'='%s'", name, value); g_hash_table_replace(aliases, name, value); if (targets) { *targets = g_list_append(*targets, pcmk__str_copy(value)); } value = NULL; name = NULL; added++; } else if (lpc > last) { crm_debug("Parse error at offset %d near '%s'", lpc - last, hostmap + last); } last = lpc + 1; break; } if (hostmap[lpc] == 0) { break; } } if (added == 0) { crm_info("No host mappings detected in '%s'", hostmap); } free(name); return aliases; } GHashTable *metadata_cache = NULL; void free_metadata_cache(void) { if (metadata_cache != NULL) { g_hash_table_destroy(metadata_cache); metadata_cache = NULL; } } static void init_metadata_cache(void) { if (metadata_cache == NULL) { metadata_cache = pcmk__strkey_table(free, free); } } int get_agent_metadata(const char *agent, xmlNode ** metadata) { char *buffer = NULL; if (metadata == NULL) { return EINVAL; } *metadata = NULL; if (pcmk__str_eq(agent, STONITH_WATCHDOG_AGENT_INTERNAL, pcmk__str_none)) { return pcmk_rc_ok; } init_metadata_cache(); buffer = g_hash_table_lookup(metadata_cache, agent); if (buffer == NULL) { stonith_t *st = stonith_api_new(); int rc; if (st == NULL) { crm_warn("Could not get agent meta-data: " "API memory allocation failed"); return EAGAIN; } rc = st->cmds->metadata(st, st_opt_sync_call, agent, NULL, &buffer, 10); stonith_api_delete(st); if (rc || !buffer) { crm_err("Could not retrieve metadata for fencing agent %s", agent); return EAGAIN; } g_hash_table_replace(metadata_cache, pcmk__str_copy(agent), buffer); } *metadata = pcmk__xml_parse(buffer); return pcmk_rc_ok; } static gboolean is_nodeid_required(xmlNode * xml) { xmlXPathObject *xpath = NULL; if (!xml) { return FALSE; } xpath = pcmk__xpath_search(xml->doc, "//" PCMK_XE_PARAMETER "[@" PCMK_XA_NAME "='nodeid']"); if (pcmk__xpath_num_results(xpath) == 0) { xmlXPathFreeObject(xpath); return FALSE; } xmlXPathFreeObject(xpath); return TRUE; } static void read_action_metadata(stonith_device_t *device) { xmlXPathObject *xpath = NULL; int max = 0; int lpc = 0; if (device->agent_metadata == NULL) { return; } xpath = pcmk__xpath_search(device->agent_metadata->doc, "//" PCMK_XE_ACTION); max = pcmk__xpath_num_results(xpath); if (max == 0) { xmlXPathFreeObject(xpath); return; } for (lpc = 0; lpc < max; lpc++) { const char *action = NULL; xmlNode *match = pcmk__xpath_result(xpath, lpc); CRM_LOG_ASSERT(match != NULL); if(match == NULL) { continue; }; action = pcmk__xe_get(match, PCMK_XA_NAME); if (pcmk__str_eq(action, PCMK_ACTION_LIST, pcmk__str_none)) { stonith__set_device_flags(device->flags, device->id, st_device_supports_list); } else if (pcmk__str_eq(action, PCMK_ACTION_STATUS, pcmk__str_none)) { stonith__set_device_flags(device->flags, device->id, st_device_supports_status); } else if (pcmk__str_eq(action, PCMK_ACTION_REBOOT, pcmk__str_none)) { stonith__set_device_flags(device->flags, device->id, st_device_supports_reboot); } else if (pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none)) { /* PCMK_XA_AUTOMATIC means the cluster will unfence a node when it * joins. * * @COMPAT PCMK__XA_REQUIRED is a deprecated synonym for * PCMK_XA_AUTOMATIC. */ if (pcmk__xe_attr_is_true(match, PCMK_XA_AUTOMATIC) || pcmk__xe_attr_is_true(match, PCMK__XA_REQUIRED)) { device->automatic_unfencing = TRUE; } stonith__set_device_flags(device->flags, device->id, st_device_supports_on); } if ((action != NULL) && pcmk__xe_attr_is_true(match, PCMK_XA_ON_TARGET)) { pcmk__add_word(&(device->on_target_actions), 64, action); } } xmlXPathFreeObject(xpath); } static const char * target_list_type(stonith_device_t * dev) { const char *check_type = NULL; check_type = g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_CHECK); if (check_type == NULL) { if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_LIST)) { check_type = PCMK_VALUE_STATIC_LIST; } else if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_MAP)) { check_type = PCMK_VALUE_STATIC_LIST; } else if (pcmk_is_set(dev->flags, st_device_supports_list)) { check_type = PCMK_VALUE_DYNAMIC_LIST; } else if (pcmk_is_set(dev->flags, st_device_supports_status)) { check_type = PCMK_VALUE_STATUS; } else { check_type = PCMK_VALUE_NONE; } } return check_type; } static stonith_device_t * build_device_from_xml(xmlNode *dev) { const char *value; stonith_device_t *device = NULL; char *agent = pcmk__xe_get_copy(dev, PCMK_XA_AGENT); CRM_CHECK(agent != NULL, return device); device = pcmk__assert_alloc(1, sizeof(stonith_device_t)); device->id = pcmk__xe_get_copy(dev, PCMK_XA_ID); device->agent = agent; device->namespace = pcmk__xe_get_copy(dev, PCMK__XA_NAMESPACE); device->params = xml2list(dev); value = g_hash_table_lookup(device->params, PCMK_STONITH_HOST_LIST); if (value) { device->targets = stonith__parse_targets(value); } value = g_hash_table_lookup(device->params, PCMK_STONITH_HOST_MAP); device->aliases = build_port_aliases(value, &(device->targets)); value = target_list_type(device); if (!pcmk__str_eq(value, PCMK_VALUE_STATIC_LIST, pcmk__str_casei) && (device->targets != NULL)) { // device->targets is necessary only with PCMK_VALUE_STATIC_LIST g_list_free_full(device->targets, free); device->targets = NULL; } switch (get_agent_metadata(device->agent, &device->agent_metadata)) { case pcmk_rc_ok: if (device->agent_metadata) { read_action_metadata(device); stonith__device_parameter_flags(&(device->flags), device->id, device->agent_metadata); } break; case EAGAIN: if (device->timer == NULL) { device->timer = mainloop_timer_add("get_agent_metadata", 10 * 1000, TRUE, get_agent_metadata_cb, device); } if (!mainloop_timer_running(device->timer)) { mainloop_timer_start(device->timer); } break; default: break; } value = g_hash_table_lookup(device->params, "nodeid"); if (!value) { device->include_nodeid = is_nodeid_required(device->agent_metadata); } value = pcmk__xe_get(dev, PCMK__XA_RSC_PROVIDES); if (pcmk__str_eq(value, PCMK_VALUE_UNFENCING, pcmk__str_casei)) { device->automatic_unfencing = TRUE; } if (is_action_required(PCMK_ACTION_ON, device)) { crm_info("Fencing device '%s' requires unfencing", device->id); } if (device->on_target_actions != NULL) { crm_info("Fencing device '%s' requires actions (%s) to be executed " "on target", device->id, (const char *) device->on_target_actions->str); } device->work = mainloop_add_trigger(G_PRIORITY_HIGH, stonith_device_dispatch, device); /* TODO: Hook up priority */ return device; } static void schedule_internal_command(const char *origin, stonith_device_t * device, const char *action, const char *target, int timeout, void *internal_user_data, void (*done_cb) (int pid, const pcmk__action_result_t *result, void *user_data)) { async_command_t *cmd = NULL; cmd = pcmk__assert_alloc(1, sizeof(async_command_t)); cmd->id = -1; cmd->default_timeout = timeout ? timeout : 60; cmd->timeout = cmd->default_timeout; cmd->action = pcmk__str_copy(action); cmd->target = pcmk__str_copy(target); cmd->device = pcmk__str_copy(device->id); cmd->origin = pcmk__str_copy(origin); cmd->client = pcmk__str_copy(crm_system_name); cmd->client_name = pcmk__str_copy(crm_system_name); cmd->internal_user_data = internal_user_data; cmd->done_cb = done_cb; /* cmd, not internal_user_data, is passed to 'done_cb' as the userdata */ schedule_stonith_command(cmd, device); } // Fence agent status commands use custom exit status codes enum fence_status_code { fence_status_invalid = -1, fence_status_active = 0, fence_status_unknown = 1, fence_status_inactive = 2, }; static void status_search_cb(int pid, const pcmk__action_result_t *result, void *user_data) { async_command_t *cmd = user_data; struct device_search_s *search = cmd->internal_user_data; stonith_device_t *dev = cmd_device(cmd); gboolean can = FALSE; free_async_command(cmd); if (!dev) { search_devices_record_result(search, NULL, FALSE); return; } mainloop_set_trigger(dev->work); if (result->execution_status != PCMK_EXEC_DONE) { crm_warn("Assuming %s cannot fence %s " "because status could not be executed: %s%s%s%s", dev->id, search->host, pcmk_exec_status_str(result->execution_status), ((result->exit_reason == NULL)? "" : " ("), ((result->exit_reason == NULL)? "" : result->exit_reason), ((result->exit_reason == NULL)? "" : ")")); search_devices_record_result(search, dev->id, FALSE); return; } switch (result->exit_status) { case fence_status_unknown: crm_trace("%s reported it cannot fence %s", dev->id, search->host); break; case fence_status_active: case fence_status_inactive: crm_trace("%s reported it can fence %s", dev->id, search->host); can = TRUE; break; default: crm_warn("Assuming %s cannot fence %s " "(status returned unknown code %d)", dev->id, search->host, result->exit_status); break; } search_devices_record_result(search, dev->id, can); } static void dynamic_list_search_cb(int pid, const pcmk__action_result_t *result, void *user_data) { async_command_t *cmd = user_data; struct device_search_s *search = cmd->internal_user_data; stonith_device_t *dev = cmd_device(cmd); gboolean can_fence = FALSE; free_async_command(cmd); /* Host/alias must be in the list output to be eligible to be fenced * * Will cause problems if down'd nodes aren't listed or (for virtual nodes) * if the guest is still listed despite being moved to another machine */ if (!dev) { search_devices_record_result(search, NULL, FALSE); return; } mainloop_set_trigger(dev->work); if (pcmk__result_ok(result)) { crm_info("Refreshing target list for %s", dev->id); g_list_free_full(dev->targets, free); dev->targets = stonith__parse_targets(result->action_stdout); dev->targets_age = time(NULL); } else if (dev->targets != NULL) { if (result->execution_status == PCMK_EXEC_DONE) { crm_info("Reusing most recent target list for %s " "because list returned error code %d", dev->id, result->exit_status); } else { crm_info("Reusing most recent target list for %s " "because list could not be executed: %s%s%s%s", dev->id, pcmk_exec_status_str(result->execution_status), ((result->exit_reason == NULL)? "" : " ("), ((result->exit_reason == NULL)? "" : result->exit_reason), ((result->exit_reason == NULL)? "" : ")")); } } else { // We have never successfully executed list if (result->execution_status == PCMK_EXEC_DONE) { crm_warn("Assuming %s cannot fence %s " "because list returned error code %d", dev->id, search->host, result->exit_status); } else { crm_warn("Assuming %s cannot fence %s " "because list could not be executed: %s%s%s%s", dev->id, search->host, pcmk_exec_status_str(result->execution_status), ((result->exit_reason == NULL)? "" : " ("), ((result->exit_reason == NULL)? "" : result->exit_reason), ((result->exit_reason == NULL)? "" : ")")); } /* Fall back to pcmk_host_check=PCMK_VALUE_STATUS if the user didn't * explicitly specify PCMK_VALUE_DYNAMIC_LIST */ if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_CHECK) == NULL) { crm_notice("Switching to pcmk_host_check='status' for %s", dev->id); pcmk__insert_dup(dev->params, PCMK_STONITH_HOST_CHECK, PCMK_VALUE_STATUS); } } if (dev->targets) { const char *alias = g_hash_table_lookup(dev->aliases, search->host); if (!alias) { alias = search->host; } if (pcmk__str_in_list(alias, dev->targets, pcmk__str_casei)) { can_fence = TRUE; } } search_devices_record_result(search, dev->id, can_fence); } /*! * \internal * \brief Returns true if any key in first is not in second or second has a different value for key */ static int device_params_diff(GHashTable *first, GHashTable *second) { char *key = NULL; char *value = NULL; GHashTableIter gIter; g_hash_table_iter_init(&gIter, first); while (g_hash_table_iter_next(&gIter, (void **)&key, (void **)&value)) { if(strstr(key, "CRM_meta") == key) { continue; } else if (strcmp(key, PCMK_XA_CRM_FEATURE_SET) == 0) { continue; } else { char *other_value = g_hash_table_lookup(second, key); if (!other_value || !pcmk__str_eq(other_value, value, pcmk__str_casei)) { crm_trace("Different value for %s: %s != %s", key, other_value, value); return 1; } } } return 0; } /*! * \internal * \brief Checks to see if an identical device already exists in the device_list */ static stonith_device_t * device_has_duplicate(const stonith_device_t *device) { stonith_device_t *dup = g_hash_table_lookup(device_list, device->id); if (!dup) { crm_trace("No match for %s", device->id); return NULL; } else if (!pcmk__str_eq(dup->agent, device->agent, pcmk__str_casei)) { crm_trace("Different agent: %s != %s", dup->agent, device->agent); return NULL; } // Use pcmk__digest_operation() here? if (device_params_diff(device->params, dup->params) || device_params_diff(dup->params, device->params)) { return NULL; } crm_trace("Match"); return dup; } int stonith_device_register(xmlNode *dev, gboolean from_cib) { stonith_device_t *dup = NULL; stonith_device_t *device = build_device_from_xml(dev); guint ndevices = 0; int rv = pcmk_ok; CRM_CHECK(device != NULL, return -ENOMEM); /* do we have a watchdog-device? */ if (pcmk__str_eq(device->id, STONITH_WATCHDOG_ID, pcmk__str_none) || pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT, STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) do { if (stonith_watchdog_timeout_ms <= 0) { crm_err("Ignoring watchdog fence device without " PCMK_OPT_STONITH_WATCHDOG_TIMEOUT " set."); rv = -ENODEV; /* fall through to cleanup & return */ } else if (!pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT, STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) { crm_err("Ignoring watchdog fence device with unknown " "agent '%s' unequal '" STONITH_WATCHDOG_AGENT "'.", device->agent?device->agent:""); rv = -ENODEV; /* fall through to cleanup & return */ } else if (!pcmk__str_eq(device->id, STONITH_WATCHDOG_ID, pcmk__str_none)) { crm_err("Ignoring watchdog fence device " "named %s !='"STONITH_WATCHDOG_ID"'.", device->id?device->id:""); rv = -ENODEV; /* fall through to cleanup & return */ } else { const char *local_node_name = fenced_get_local_node(); if (pcmk__str_eq(device->agent, STONITH_WATCHDOG_AGENT, pcmk__str_none)) { /* this either has an empty list or the targets configured for watchdog-fencing */ g_list_free_full(stonith_watchdog_targets, free); stonith_watchdog_targets = device->targets; device->targets = NULL; } if (node_does_watchdog_fencing(local_node_name)) { g_list_free_full(device->targets, free); device->targets = stonith__parse_targets(local_node_name); pcmk__insert_dup(device->params, PCMK_STONITH_HOST_LIST, local_node_name); /* proceed as with any other stonith-device */ break; } crm_debug("Skip registration of watchdog fence device on node not in host-list."); /* cleanup and fall through to more cleanup and return */ device->targets = NULL; stonith_device_remove(device->id, from_cib); } free_device(device); return rv; } while (0); dup = device_has_duplicate(device); if (dup) { ndevices = g_hash_table_size(device_list); crm_debug("Device '%s' already in device list (%d active device%s)", device->id, ndevices, pcmk__plural_s(ndevices)); free_device(device); device = dup; dup = g_hash_table_lookup(device_list, device->id); dup->dirty = FALSE; } else { stonith_device_t *old = g_hash_table_lookup(device_list, device->id); if (from_cib && old && old->api_registered) { /* If the cib is writing over an entry that is shared with a stonith client, * copy any pending ops that currently exist on the old entry to the new one. * Otherwise the pending ops will be reported as failures */ crm_info("Overwriting existing entry for %s from CIB", device->id); device->pending_ops = old->pending_ops; device->api_registered = TRUE; old->pending_ops = NULL; if (device->pending_ops) { mainloop_set_trigger(device->work); } } g_hash_table_replace(device_list, device->id, device); ndevices = g_hash_table_size(device_list); crm_notice("Added '%s' to device list (%d active device%s)", device->id, ndevices, pcmk__plural_s(ndevices)); } if (from_cib) { device->cib_registered = TRUE; } else { device->api_registered = TRUE; } return pcmk_ok; } void stonith_device_remove(const char *id, bool from_cib) { stonith_device_t *device = g_hash_table_lookup(device_list, id); guint ndevices = 0; if (!device) { ndevices = g_hash_table_size(device_list); crm_info("Device '%s' not found (%d active device%s)", id, ndevices, pcmk__plural_s(ndevices)); return; } if (from_cib) { device->cib_registered = FALSE; } else { device->verified = FALSE; device->api_registered = FALSE; } if (!device->cib_registered && !device->api_registered) { g_hash_table_remove(device_list, id); ndevices = g_hash_table_size(device_list); crm_info("Removed '%s' from device list (%d active device%s)", id, ndevices, pcmk__plural_s(ndevices)); } else { crm_trace("Not removing '%s' from device list (%d active) because " "still registered via:%s%s", id, g_hash_table_size(device_list), (device->cib_registered? " cib" : ""), (device->api_registered? " api" : "")); } } /*! * \internal * \brief Return the number of stonith levels registered for a node * * \param[in] tp Node's topology table entry * * \return Number of non-NULL levels in topology entry * \note This function is used only for log messages. */ static int count_active_levels(const stonith_topology_t *tp) { int lpc = 0; int count = 0; for (lpc = 0; lpc < ST__LEVEL_COUNT; lpc++) { if (tp->levels[lpc] != NULL) { count++; } } return count; } static void free_topology_entry(gpointer data) { stonith_topology_t *tp = data; int lpc = 0; for (lpc = 0; lpc < ST__LEVEL_COUNT; lpc++) { if (tp->levels[lpc] != NULL) { g_list_free_full(tp->levels[lpc], free); } } free(tp->target); free(tp->target_value); free(tp->target_pattern); free(tp->target_attribute); free(tp); } void free_topology_list(void) { if (topology != NULL) { g_hash_table_destroy(topology); topology = NULL; } } void init_topology_list(void) { if (topology == NULL) { topology = pcmk__strkey_table(NULL, free_topology_entry); } } char * stonith_level_key(const xmlNode *level, enum fenced_target_by mode) { if (mode == fenced_target_by_unknown) { mode = unpack_level_kind(level); } switch (mode) { case fenced_target_by_name: return pcmk__xe_get_copy(level, PCMK_XA_TARGET); case fenced_target_by_pattern: return pcmk__xe_get_copy(level, PCMK_XA_TARGET_PATTERN); case fenced_target_by_attribute: return crm_strdup_printf("%s=%s", pcmk__xe_get(level, PCMK_XA_TARGET_ATTRIBUTE), pcmk__xe_get(level, PCMK_XA_TARGET_VALUE)); default: return crm_strdup_printf("unknown-%s", pcmk__xe_id(level)); } } /*! * \internal * \brief Parse target identification from topology level XML * * \param[in] level Topology level XML to parse * * \return How to identify target of \p level */ static enum fenced_target_by unpack_level_kind(const xmlNode *level) { if (pcmk__xe_get(level, PCMK_XA_TARGET) != NULL) { return fenced_target_by_name; } if (pcmk__xe_get(level, PCMK_XA_TARGET_PATTERN) != NULL) { return fenced_target_by_pattern; } if ((pcmk__xe_get(level, PCMK_XA_TARGET_ATTRIBUTE) != NULL) && (pcmk__xe_get(level, PCMK_XA_TARGET_VALUE) != NULL)) { return fenced_target_by_attribute; } return fenced_target_by_unknown; } static stonith_key_value_t * parse_device_list(const char *devices) { int lpc = 0; int max = 0; int last = 0; stonith_key_value_t *output = NULL; if (devices == NULL) { return output; } max = strlen(devices); for (lpc = 0; lpc <= max; lpc++) { if (devices[lpc] == ',' || devices[lpc] == 0) { char *line = strndup(devices + last, lpc - last); output = stonith_key_value_add(output, NULL, line); free(line); last = lpc + 1; } } return output; } /*! * \internal * \brief Unpack essential information from topology request XML * * \param[in] xml Request XML to search * \param[out] mode If not NULL, where to store level kind * \param[out] target If not NULL, where to store representation of target * \param[out] id If not NULL, where to store level number * \param[out] desc If not NULL, where to store log-friendly level description * * \return Topology level XML from within \p xml, or NULL if not found * \note The caller is responsible for freeing \p *target and \p *desc if set. */ static xmlNode * unpack_level_request(xmlNode *xml, enum fenced_target_by *mode, char **target, int *id, char **desc) { enum fenced_target_by local_mode = fenced_target_by_unknown; char *local_target = NULL; int local_id = 0; /* The level element can be the top element or lower. If top level, don't * search by xpath, because it might give multiple hits if the XML is the * CIB. */ if ((xml != NULL) && !pcmk__xe_is(xml, PCMK_XE_FENCING_LEVEL)) { xml = pcmk__xpath_find_one(xml->doc, "//" PCMK_XE_FENCING_LEVEL, LOG_WARNING); } if (xml == NULL) { if (desc != NULL) { *desc = crm_strdup_printf("missing"); } } else { local_mode = unpack_level_kind(xml); local_target = stonith_level_key(xml, local_mode); pcmk__xe_get_int(xml, PCMK_XA_INDEX, &local_id); if (desc != NULL) { *desc = crm_strdup_printf("%s[%d]", local_target, local_id); } } if (mode != NULL) { *mode = local_mode; } if (id != NULL) { *id = local_id; } if (target != NULL) { *target = local_target; } else { free(local_target); } return xml; } /*! * \internal * \brief Register a fencing topology level for a target * * Given an XML request specifying the target name, level index, and device IDs * for the level, this will create an entry for the target in the global topology * table if one does not already exist, then append the specified device IDs to * the entry's device list for the specified level. * * \param[in] msg XML request for STONITH level registration * \param[out] desc If not NULL, set to string representation "TARGET[LEVEL]" * \param[out] result Where to set result of registration */ void fenced_register_level(xmlNode *msg, char **desc, pcmk__action_result_t *result) { int id = 0; xmlNode *level; enum fenced_target_by mode; char *target; stonith_topology_t *tp; stonith_key_value_t *dIter = NULL; stonith_key_value_t *devices = NULL; CRM_CHECK((msg != NULL) && (result != NULL), return); level = unpack_level_request(msg, &mode, &target, &id, desc); if (level == NULL) { fenced_set_protocol_error(result); return; } // Ensure an ID was given (even the client API adds an ID) if (pcmk__str_empty(pcmk__xe_id(level))) { crm_warn("Ignoring registration for topology level without ID"); free(target); crm_log_xml_trace(level, "Bad level"); pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID, "Topology level is invalid without ID"); return; } // Ensure a valid target was specified if (mode == fenced_target_by_unknown) { crm_warn("Ignoring registration for topology level '%s' " "without valid target", pcmk__xe_id(level)); free(target); crm_log_xml_trace(level, "Bad level"); pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID, "Invalid target for topology level '%s'", pcmk__xe_id(level)); return; } // Ensure level ID is in allowed range if ((id < ST__LEVEL_MIN) || (id > ST__LEVEL_MAX)) { crm_warn("Ignoring topology registration for %s with invalid level %d", target, id); free(target); crm_log_xml_trace(level, "Bad level"); pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID, "Invalid level number '%s' for topology level '%s'", pcmk__s(pcmk__xe_get(level, PCMK_XA_INDEX), ""), pcmk__xe_id(level)); return; } /* Find or create topology table entry */ tp = g_hash_table_lookup(topology, target); if (tp == NULL) { tp = pcmk__assert_alloc(1, sizeof(stonith_topology_t)); tp->kind = mode; tp->target = target; tp->target_value = pcmk__xe_get_copy(level, PCMK_XA_TARGET_VALUE); tp->target_pattern = pcmk__xe_get_copy(level, PCMK_XA_TARGET_PATTERN); tp->target_attribute = pcmk__xe_get_copy(level, PCMK_XA_TARGET_ATTRIBUTE); g_hash_table_replace(topology, tp->target, tp); crm_trace("Added %s (%d) to the topology (%d active entries)", target, (int) mode, g_hash_table_size(topology)); } else { free(target); } if (tp->levels[id] != NULL) { crm_info("Adding to the existing %s[%d] topology entry", tp->target, id); } devices = parse_device_list(pcmk__xe_get(level, PCMK_XA_DEVICES)); for (dIter = devices; dIter; dIter = dIter->next) { const char *device = dIter->value; crm_trace("Adding device '%s' for %s[%d]", device, tp->target, id); tp->levels[id] = g_list_append(tp->levels[id], pcmk__str_copy(device)); } stonith_key_value_freeall(devices, 1, 1); { int nlevels = count_active_levels(tp); crm_info("Target %s has %d active fencing level%s", tp->target, nlevels, pcmk__plural_s(nlevels)); } pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); } /*! * \internal * \brief Unregister a fencing topology level for a target * * Given an XML request specifying the target name and level index (or 0 for all * levels), this will remove any corresponding entry for the target from the * global topology table. * * \param[in] msg XML request for STONITH level registration * \param[out] desc If not NULL, set to string representation "TARGET[LEVEL]" * \param[out] result Where to set result of unregistration */ void fenced_unregister_level(xmlNode *msg, char **desc, pcmk__action_result_t *result) { int id = -1; stonith_topology_t *tp; char *target; xmlNode *level = NULL; CRM_CHECK(result != NULL, return); level = unpack_level_request(msg, NULL, &target, &id, desc); if (level == NULL) { fenced_set_protocol_error(result); return; } // Ensure level ID is in allowed range if ((id < 0) || (id >= ST__LEVEL_COUNT)) { crm_warn("Ignoring topology unregistration for %s with invalid level %d", target, id); free(target); crm_log_xml_trace(level, "Bad level"); pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID, "Invalid level number '%s' for topology level %s", pcmk__s(pcmk__xe_get(level, PCMK_XA_INDEX), ""), // Client API doesn't add ID to unregistration XML pcmk__s(pcmk__xe_id(level), "")); return; } tp = g_hash_table_lookup(topology, target); if (tp == NULL) { guint nentries = g_hash_table_size(topology); crm_info("No fencing topology found for %s (%d active %s)", target, nentries, pcmk__plural_alt(nentries, "entry", "entries")); } else if (id == 0 && g_hash_table_remove(topology, target)) { guint nentries = g_hash_table_size(topology); crm_info("Removed all fencing topology entries related to %s " "(%d active %s remaining)", target, nentries, pcmk__plural_alt(nentries, "entry", "entries")); } else if (tp->levels[id] != NULL) { guint nlevels; g_list_free_full(tp->levels[id], free); tp->levels[id] = NULL; nlevels = count_active_levels(tp); crm_info("Removed level %d from fencing topology for %s " "(%d active level%s remaining)", id, target, nlevels, pcmk__plural_s(nlevels)); } free(target); pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); } static char * list_to_string(GList *list, const char *delim, gboolean terminate_with_delim) { int max = g_list_length(list); size_t delim_len = delim?strlen(delim):0; size_t alloc_size = 1 + (max?((max-1+(terminate_with_delim?1:0))*delim_len):0); char *rv; GList *gIter; char *pos = NULL; const char *lead_delim = ""; for (gIter = list; gIter != NULL; gIter = gIter->next) { const char *value = (const char *) gIter->data; alloc_size += strlen(value); } rv = pcmk__assert_alloc(alloc_size, sizeof(char)); pos = rv; for (gIter = list; gIter != NULL; gIter = gIter->next) { const char *value = (const char *) gIter->data; pos = &pos[sprintf(pos, "%s%s", lead_delim, value)]; lead_delim = delim; } if (max && terminate_with_delim) { sprintf(pos, "%s", delim); } return rv; } /*! * \internal * \brief Execute a fence agent action directly (and asynchronously) * * Handle a STONITH_OP_EXEC API message by scheduling a requested agent action * directly on a specified device. Only list, monitor, and status actions are * expected to use this call, though it should work with any agent command. * * \param[in] msg Request XML specifying action * \param[out] result Where to store result of action * * \note If the action is monitor, the device must be registered via the API * (CIB registration is not sufficient), because monitor should not be * possible unless the device is "started" (API registered). */ static void execute_agent_action(xmlNode *msg, pcmk__action_result_t *result) { xmlNode *dev = pcmk__xpath_find_one(msg->doc, "//" PCMK__XE_ST_DEVICE_ID, LOG_ERR); xmlNode *op = pcmk__xpath_find_one(msg->doc, "//*[@" PCMK__XA_ST_DEVICE_ACTION "]", LOG_ERR); const char *id = pcmk__xe_get(dev, PCMK__XA_ST_DEVICE_ID); const char *action = pcmk__xe_get(op, PCMK__XA_ST_DEVICE_ACTION); async_command_t *cmd = NULL; stonith_device_t *device = NULL; if ((id == NULL) || (action == NULL)) { crm_info("Malformed API action request: device %s, action %s", (id? id : "not specified"), (action? action : "not specified")); fenced_set_protocol_error(result); return; } if (pcmk__str_eq(id, STONITH_WATCHDOG_ID, pcmk__str_none)) { // Watchdog agent actions are implemented internally if (stonith_watchdog_timeout_ms <= 0) { pcmk__set_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, "Watchdog fence device not configured"); return; } else if (pcmk__str_eq(action, PCMK_ACTION_LIST, pcmk__str_none)) { pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); pcmk__set_result_output(result, list_to_string(stonith_watchdog_targets, "\n", TRUE), NULL); return; } else if (pcmk__str_eq(action, PCMK_ACTION_MONITOR, pcmk__str_none)) { pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return; } } device = g_hash_table_lookup(device_list, id); if (device == NULL) { crm_info("Ignoring API '%s' action request because device %s not found", action, id); pcmk__format_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, "'%s' not found", id); return; } else if (!device->api_registered && (strcmp(action, PCMK_ACTION_MONITOR) == 0)) { // Monitors may run only on "started" (API-registered) devices crm_info("Ignoring API '%s' action request because device %s not active", action, id); pcmk__format_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, "'%s' not active", id); return; } cmd = create_async_command(msg); if (cmd == NULL) { crm_log_xml_warn(msg, "invalid"); fenced_set_protocol_error(result); return; } schedule_stonith_command(cmd, device); pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL); } static void search_devices_record_result(struct device_search_s *search, const char *device, gboolean can_fence) { search->replies_received++; if (can_fence && device) { if (search->support_action_only != st_device_supports_none) { stonith_device_t *dev = g_hash_table_lookup(device_list, device); if (dev && !pcmk_is_set(dev->flags, search->support_action_only)) { return; } } search->capable = g_list_append(search->capable, pcmk__str_copy(device)); } if (search->replies_needed == search->replies_received) { guint ndevices = g_list_length(search->capable); crm_debug("Search found %d device%s that can perform '%s' targeting %s", ndevices, pcmk__plural_s(ndevices), (search->action? search->action : "unknown action"), (search->host? search->host : "any node")); search->callback(search->capable, search->user_data); free(search->host); free(search->action); free(search); } } /*! * \internal * \brief Check whether the local host is allowed to execute a fencing action * * \param[in] device Fence device to check * \param[in] action Fence action to check * \param[in] target Hostname of fence target * \param[in] allow_self Whether self-fencing is allowed for this operation * * \return TRUE if local host is allowed to execute action, FALSE otherwise */ static gboolean localhost_is_eligible(const stonith_device_t *device, const char *action, const char *target, gboolean allow_self) { gboolean localhost_is_target = pcmk__str_eq(target, fenced_get_local_node(), pcmk__str_casei); if ((device != NULL) && (action != NULL) && (device->on_target_actions != NULL) && (strstr((const char*) device->on_target_actions->str, action) != NULL)) { if (!localhost_is_target) { crm_trace("Operation '%s' using %s can only be executed for local " "host, not %s", action, device->id, target); return FALSE; } } else if (localhost_is_target && !allow_self) { crm_trace("'%s' operation does not support self-fencing", action); return FALSE; } return TRUE; } /*! * \internal * \brief Check if local node is allowed to execute (possibly remapped) action * * \param[in] device Fence device to check * \param[in] action Fence action to check * \param[in] target Node name of fence target * \param[in] allow_self Whether self-fencing is allowed for this operation * * \return true if local node is allowed to execute \p action or any actions it * might be remapped to, otherwise false */ static bool localhost_is_eligible_with_remap(const stonith_device_t *device, const char *action, const char *target, gboolean allow_self) { // Check exact action if (localhost_is_eligible(device, action, target, allow_self)) { return true; } // Check potential remaps if (pcmk__str_eq(action, PCMK_ACTION_REBOOT, pcmk__str_none)) { /* "reboot" might get remapped to "off" then "on", so even if reboot is * disallowed, return true if either of those is allowed. We'll report * the disallowed actions with the results. We never allow self-fencing * for remapped "on" actions because the target is off at that point. */ if (localhost_is_eligible(device, PCMK_ACTION_OFF, target, allow_self) || localhost_is_eligible(device, PCMK_ACTION_ON, target, FALSE)) { return true; } } return false; } /*! * \internal * \brief Check whether we can use a device's cached target list * * \param[in] dev Fencing device to check * * \return \c true if \p dev cached its targets less than a minute ago, * otherwise \c false */ static inline bool can_use_target_cache(const stonith_device_t *dev) { return (dev->targets != NULL) && (time(NULL) < (dev->targets_age + 60)); } static void can_fence_host_with_device(stonith_device_t *dev, struct device_search_s *search) { gboolean can = FALSE; const char *check_type = "Internal bug"; const char *target = NULL; const char *alias = NULL; const char *dev_id = "Unspecified device"; const char *action = (search == NULL)? NULL : search->action; CRM_CHECK((dev != NULL) && (action != NULL), goto search_report_results); if (dev->id != NULL) { dev_id = dev->id; } target = search->host; if (target == NULL) { can = TRUE; check_type = "No target"; goto search_report_results; } /* Answer immediately if the device does not support the action * or the local node is not allowed to perform it */ if (pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none) && !pcmk_is_set(dev->flags, st_device_supports_on)) { check_type = "Agent does not support 'on'"; goto search_report_results; } else if (!localhost_is_eligible_with_remap(dev, action, target, search->allow_self)) { check_type = "This node is not allowed to execute action"; goto search_report_results; } // Check eligibility as specified by pcmk_host_check check_type = target_list_type(dev); alias = g_hash_table_lookup(dev->aliases, target); if (pcmk__str_eq(check_type, PCMK_VALUE_NONE, pcmk__str_casei)) { can = TRUE; } else if (pcmk__str_eq(check_type, PCMK_VALUE_STATIC_LIST, pcmk__str_casei)) { if (pcmk__str_in_list(target, dev->targets, pcmk__str_casei)) { can = TRUE; } else if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_MAP) && g_hash_table_lookup(dev->aliases, target)) { can = TRUE; } } else if (pcmk__str_eq(check_type, PCMK_VALUE_DYNAMIC_LIST, pcmk__str_casei)) { if (!can_use_target_cache(dev)) { int device_timeout = get_action_timeout(dev, PCMK_ACTION_LIST, search->per_device_timeout); if (device_timeout > search->per_device_timeout) { crm_notice("Since the pcmk_list_timeout (%ds) parameter of %s " "is larger than " PCMK_OPT_STONITH_TIMEOUT " (%ds), timeout may occur", device_timeout, dev_id, search->per_device_timeout); } crm_trace("Running '%s' to check whether %s is eligible to fence %s (%s)", check_type, dev_id, target, action); schedule_internal_command(__func__, dev, PCMK_ACTION_LIST, NULL, search->per_device_timeout, search, dynamic_list_search_cb); /* we'll respond to this search request async in the cb */ return; } if (pcmk__str_in_list(((alias == NULL)? target : alias), dev->targets, pcmk__str_casei)) { can = TRUE; } } else if (pcmk__str_eq(check_type, PCMK_VALUE_STATUS, pcmk__str_casei)) { int device_timeout = get_action_timeout(dev, check_type, search->per_device_timeout); if (device_timeout > search->per_device_timeout) { crm_notice("Since the pcmk_status_timeout (%ds) parameter of %s is " "larger than " PCMK_OPT_STONITH_TIMEOUT " (%ds), " "timeout may occur", device_timeout, dev_id, search->per_device_timeout); } crm_trace("Running '%s' to check whether %s is eligible to fence %s (%s)", check_type, dev_id, target, action); schedule_internal_command(__func__, dev, PCMK_ACTION_STATUS, target, search->per_device_timeout, search, status_search_cb); /* we'll respond to this search request async in the cb */ return; } else { crm_err("Invalid value for " PCMK_STONITH_HOST_CHECK ": %s", check_type); check_type = "Invalid " PCMK_STONITH_HOST_CHECK; } search_report_results: crm_info("%s is%s eligible to fence (%s) %s%s%s%s: %s", dev_id, (can? "" : " not"), pcmk__s(action, "unspecified action"), pcmk__s(target, "unspecified target"), (alias == NULL)? "" : " (as '", pcmk__s(alias, ""), (alias == NULL)? "" : "')", check_type); search_devices_record_result(search, ((dev == NULL)? NULL : dev_id), can); } static void search_devices(gpointer key, gpointer value, gpointer user_data) { stonith_device_t *dev = value; struct device_search_s *search = user_data; can_fence_host_with_device(dev, search); } #define DEFAULT_QUERY_TIMEOUT 20 static void get_capable_devices(const char *host, const char *action, int timeout, bool allow_self, void *user_data, void (*callback) (GList * devices, void *user_data), uint32_t support_action_only) { struct device_search_s *search; guint ndevices = g_hash_table_size(device_list); if (ndevices == 0) { callback(NULL, user_data); return; } search = pcmk__assert_alloc(1, sizeof(struct device_search_s)); search->host = pcmk__str_copy(host); search->action = pcmk__str_copy(action); search->per_device_timeout = timeout; search->allow_self = allow_self; search->callback = callback; search->user_data = user_data; search->support_action_only = support_action_only; /* We are guaranteed this many replies, even if a device is * unregistered while the search is in progress. */ search->replies_needed = ndevices; crm_debug("Searching %d device%s to see which can execute '%s' targeting %s", ndevices, pcmk__plural_s(ndevices), (search->action? search->action : "unknown action"), (search->host? search->host : "any node")); g_hash_table_foreach(device_list, search_devices, search); } struct st_query_data { xmlNode *reply; char *remote_peer; char *client_id; char *target; char *action; int call_options; }; /*! * \internal * \brief Add action-specific attributes to query reply XML * * \param[in,out] xml XML to add attributes to * \param[in] action Fence action * \param[in] device Fence device * \param[in] target Fence target */ static void add_action_specific_attributes(xmlNode *xml, const char *action, const stonith_device_t *device, const char *target) { int action_specific_timeout; int delay_max; int delay_base; CRM_CHECK(xml && action && device, return); // PCMK__XA_ST_REQUIRED is currently used only for unfencing if (is_action_required(action, device)) { crm_trace("Action '%s' is required using %s", action, device->id); - crm_xml_add_int(xml, PCMK__XA_ST_REQUIRED, 1); + pcmk__xe_set_int(xml, PCMK__XA_ST_REQUIRED, 1); } // pcmk__timeout if configured action_specific_timeout = get_action_timeout(device, action, 0); if (action_specific_timeout) { crm_trace("Action '%s' has timeout %ds using %s", action, action_specific_timeout, device->id); - crm_xml_add_int(xml, PCMK__XA_ST_ACTION_TIMEOUT, - action_specific_timeout); + pcmk__xe_set_int(xml, PCMK__XA_ST_ACTION_TIMEOUT, + action_specific_timeout); } delay_max = get_action_delay_max(device, action); if (delay_max > 0) { crm_trace("Action '%s' has maximum random delay %ds using %s", action, delay_max, device->id); - crm_xml_add_int(xml, PCMK__XA_ST_DELAY_MAX, delay_max); + pcmk__xe_set_int(xml, PCMK__XA_ST_DELAY_MAX, delay_max); } delay_base = get_action_delay_base(device, action, target); if (delay_base > 0) { - crm_xml_add_int(xml, PCMK__XA_ST_DELAY_BASE, delay_base); + pcmk__xe_set_int(xml, PCMK__XA_ST_DELAY_BASE, delay_base); } if ((delay_max > 0) && (delay_base == 0)) { crm_trace("Action '%s' has maximum random delay %ds using %s", action, delay_max, device->id); } else if ((delay_max == 0) && (delay_base > 0)) { crm_trace("Action '%s' has a static delay of %ds using %s", action, delay_base, device->id); } else if ((delay_max > 0) && (delay_base > 0)) { crm_trace("Action '%s' has a minimum delay of %ds and a randomly chosen " "maximum delay of %ds using %s", action, delay_base, delay_max, device->id); } } /*! * \internal * \brief Add "disallowed" attribute to query reply XML if appropriate * * \param[in,out] xml XML to add attribute to * \param[in] action Fence action * \param[in] device Fence device * \param[in] target Fence target * \param[in] allow_self Whether self-fencing is allowed */ static void add_disallowed(xmlNode *xml, const char *action, const stonith_device_t *device, const char *target, gboolean allow_self) { if (!localhost_is_eligible(device, action, target, allow_self)) { crm_trace("Action '%s' using %s is disallowed for local host", action, device->id); pcmk__xe_set_bool_attr(xml, PCMK__XA_ST_ACTION_DISALLOWED, true); } } /*! * \internal * \brief Add child element with action-specific values to query reply XML * * \param[in,out] xml XML to add attribute to * \param[in] action Fence action * \param[in] device Fence device * \param[in] target Fence target * \param[in] allow_self Whether self-fencing is allowed */ static void add_action_reply(xmlNode *xml, const char *action, const stonith_device_t *device, const char *target, gboolean allow_self) { xmlNode *child = pcmk__xe_create(xml, PCMK__XE_ST_DEVICE_ACTION); crm_xml_add(child, PCMK_XA_ID, action); add_action_specific_attributes(child, action, device, target); add_disallowed(child, action, device, target, allow_self); } /*! * \internal * \brief Send a reply to a CPG peer or IPC client * * \param[in] reply XML reply to send * \param[in] call_options Send synchronously if st_opt_sync_call is set * \param[in] remote_peer If not NULL, name of peer node to send CPG reply * \param[in,out] client If not NULL, client to send IPC reply */ static void stonith_send_reply(const xmlNode *reply, int call_options, const char *remote_peer, pcmk__client_t *client) { CRM_CHECK((reply != NULL) && ((remote_peer != NULL) || (client != NULL)), return); if (remote_peer == NULL) { do_local_reply(reply, client, call_options); } else { const pcmk__node_status_t *node = pcmk__get_node(0, remote_peer, NULL, pcmk__node_search_cluster_member); pcmk__cluster_send_message(node, pcmk_ipc_fenced, reply); } } static void stonith_query_capable_device_cb(GList * devices, void *user_data) { struct st_query_data *query = user_data; int available_devices = 0; xmlNode *wrapper = NULL; xmlNode *list = NULL; GList *lpc = NULL; pcmk__client_t *client = NULL; if (query->client_id != NULL) { client = pcmk__find_client_by_id(query->client_id); if ((client == NULL) && (query->remote_peer == NULL)) { crm_trace("Skipping reply to %s: no longer a client", query->client_id); goto done; } } // Pack the results into XML wrapper = pcmk__xe_create(query->reply, PCMK__XE_ST_CALLDATA); list = pcmk__xe_create(wrapper, __func__); crm_xml_add(list, PCMK__XA_ST_TARGET, query->target); for (lpc = devices; lpc != NULL; lpc = lpc->next) { stonith_device_t *device = g_hash_table_lookup(device_list, lpc->data); const char *action = query->action; xmlNode *dev = NULL; if (!device) { /* It is possible the device got unregistered while * determining who can fence the target */ continue; } available_devices++; dev = pcmk__xe_create(list, PCMK__XE_ST_DEVICE_ID); crm_xml_add(dev, PCMK_XA_ID, device->id); crm_xml_add(dev, PCMK__XA_NAMESPACE, device->namespace); crm_xml_add(dev, PCMK_XA_AGENT, device->agent); // Has had successful monitor, list, or status on this node - crm_xml_add_int(dev, PCMK__XA_ST_MONITOR_VERIFIED, device->verified); + pcmk__xe_set_int(dev, PCMK__XA_ST_MONITOR_VERIFIED, device->verified); - crm_xml_add_int(dev, PCMK__XA_ST_DEVICE_SUPPORT_FLAGS, device->flags); + pcmk__xe_set_int(dev, PCMK__XA_ST_DEVICE_SUPPORT_FLAGS, device->flags); /* If the originating fencer wants to reboot the node, and we have a * capable device that doesn't support "reboot", remap to "off" instead. */ if (!pcmk_is_set(device->flags, st_device_supports_reboot) && pcmk__str_eq(query->action, PCMK_ACTION_REBOOT, pcmk__str_none)) { crm_trace("%s doesn't support reboot, using values for off instead", device->id); action = PCMK_ACTION_OFF; } /* Add action-specific values if available */ add_action_specific_attributes(dev, action, device, query->target); if (pcmk__str_eq(query->action, PCMK_ACTION_REBOOT, pcmk__str_none)) { /* A "reboot" *might* get remapped to "off" then "on", so after * sending the "reboot"-specific values in the main element, we add * sub-elements for "off" and "on" values. * * We short-circuited earlier if "reboot", "off" and "on" are all * disallowed for the local host. However if only one or two are * disallowed, we send back the results and mark which ones are * disallowed. If "reboot" is disallowed, this might cause problems * with older fencer versions, which won't check for it. Older * versions will ignore "off" and "on", so they are not a problem. */ add_disallowed(dev, action, device, query->target, pcmk_is_set(query->call_options, st_opt_allow_self_fencing)); add_action_reply(dev, PCMK_ACTION_OFF, device, query->target, pcmk_is_set(query->call_options, st_opt_allow_self_fencing)); add_action_reply(dev, PCMK_ACTION_ON, device, query->target, FALSE); } /* A query without a target wants device parameters */ if (query->target == NULL) { xmlNode *attrs = pcmk__xe_create(dev, PCMK__XE_ATTRIBUTES); g_hash_table_foreach(device->params, hash2field, attrs); } } - crm_xml_add_int(list, PCMK__XA_ST_AVAILABLE_DEVICES, available_devices); + pcmk__xe_set_int(list, PCMK__XA_ST_AVAILABLE_DEVICES, available_devices); if (query->target) { crm_debug("Found %d matching device%s for target '%s'", available_devices, pcmk__plural_s(available_devices), query->target); } else { crm_debug("%d device%s installed", available_devices, pcmk__plural_s(available_devices)); } crm_log_xml_trace(list, "query-result"); stonith_send_reply(query->reply, query->call_options, query->remote_peer, client); done: pcmk__xml_free(query->reply); free(query->remote_peer); free(query->client_id); free(query->target); free(query->action); free(query); g_list_free_full(devices, free); } /*! * \internal * \brief Log the result of an asynchronous command * * \param[in] cmd Command the result is for * \param[in] result Result of command * \param[in] pid Process ID of command, if available * \param[in] next Alternate device that will be tried if command failed * \param[in] op_merged Whether this command was merged with an earlier one */ static void log_async_result(const async_command_t *cmd, const pcmk__action_result_t *result, int pid, const char *next, bool op_merged) { int log_level = LOG_ERR; int output_log_level = LOG_NEVER; guint devices_remaining = g_list_length(cmd->next_device_iter); GString *msg = g_string_sized_new(80); // Reasonable starting size // Choose log levels appropriately if we have a result if (pcmk__result_ok(result)) { log_level = (cmd->target == NULL)? LOG_DEBUG : LOG_NOTICE; if ((result->action_stdout != NULL) && !pcmk__str_eq(cmd->action, PCMK_ACTION_METADATA, pcmk__str_none)) { output_log_level = LOG_DEBUG; } next = NULL; } else { log_level = (cmd->target == NULL)? LOG_NOTICE : LOG_ERR; if ((result->action_stdout != NULL) && !pcmk__str_eq(cmd->action, PCMK_ACTION_METADATA, pcmk__str_none)) { output_log_level = LOG_WARNING; } } // Build the log message piece by piece pcmk__g_strcat(msg, "Operation '", cmd->action, "' ", NULL); if (pid != 0) { g_string_append_printf(msg, "[%d] ", pid); } if (cmd->target != NULL) { pcmk__g_strcat(msg, "targeting ", cmd->target, " ", NULL); } if (cmd->device != NULL) { pcmk__g_strcat(msg, "using ", cmd->device, " ", NULL); } // Add exit status or execution status as appropriate if (result->execution_status == PCMK_EXEC_DONE) { g_string_append_printf(msg, "returned %d", result->exit_status); } else { pcmk__g_strcat(msg, "could not be executed: ", pcmk_exec_status_str(result->execution_status), NULL); } // Add exit reason and next device if appropriate if (result->exit_reason != NULL) { pcmk__g_strcat(msg, " (", result->exit_reason, ")", NULL); } if (next != NULL) { pcmk__g_strcat(msg, ", retrying with ", next, NULL); } if (devices_remaining > 0) { g_string_append_printf(msg, " (%u device%s remaining)", (unsigned int) devices_remaining, pcmk__plural_s(devices_remaining)); } g_string_append_printf(msg, " " QB_XS " %scall %d from %s", (op_merged? "merged " : ""), cmd->id, cmd->client_name); // Log the result do_crm_log(log_level, "%s", msg->str); g_string_free(msg, TRUE); // Log the output (which may have multiple lines), if appropriate if (output_log_level != LOG_NEVER) { char *prefix = crm_strdup_printf("%s[%d]", cmd->device, pid); crm_log_output(output_log_level, prefix, result->action_stdout); free(prefix); } } /*! * \internal * \brief Reply to requester after asynchronous command completion * * \param[in] cmd Command that completed * \param[in] result Result of command * \param[in] pid Process ID of command, if available * \param[in] merged If true, command was merged with another, not executed */ static void send_async_reply(const async_command_t *cmd, const pcmk__action_result_t *result, int pid, bool merged) { xmlNode *reply = NULL; pcmk__client_t *client = NULL; CRM_CHECK((cmd != NULL) && (result != NULL), return); log_async_result(cmd, result, pid, NULL, merged); if (cmd->client != NULL) { client = pcmk__find_client_by_id(cmd->client); if ((client == NULL) && (cmd->origin == NULL)) { crm_trace("Skipping reply to %s: no longer a client", cmd->client); return; } } reply = construct_async_reply(cmd, result); if (merged) { pcmk__xe_set_bool_attr(reply, PCMK__XA_ST_OP_MERGED, true); } if (pcmk__is_fencing_action(cmd->action) && pcmk__str_eq(cmd->origin, cmd->target, pcmk__str_casei)) { /* The target was also the originator, so broadcast the result on its * behalf (since it will be unable to). */ crm_trace("Broadcast '%s' result for %s (target was also originator)", cmd->action, cmd->target); crm_xml_add(reply, PCMK__XA_SUBT, PCMK__VALUE_BROADCAST); crm_xml_add(reply, PCMK__XA_ST_OP, STONITH_OP_NOTIFY); pcmk__cluster_send_message(NULL, pcmk_ipc_fenced, reply); } else { // Reply only to the originator stonith_send_reply(reply, cmd->options, cmd->origin, client); } crm_log_xml_trace(reply, "Reply"); pcmk__xml_free(reply); } static void cancel_stonith_command(async_command_t * cmd) { stonith_device_t *device = cmd_device(cmd); if (device) { crm_trace("Cancel scheduled '%s' action using %s", cmd->action, device->id); device->pending_ops = g_list_remove(device->pending_ops, cmd); } } /*! * \internal * \brief Cancel and reply to any duplicates of a just-completed operation * * Check whether any fencing operations are scheduled to do the same thing as * one that just succeeded. If so, rather than performing the same operation * twice, return the result of this operation for all matching pending commands. * * \param[in,out] cmd Fencing operation that just succeeded * \param[in] result Result of \p cmd * \param[in] pid If nonzero, process ID of agent invocation (for logs) * * \note Duplicate merging will do the right thing for either type of remapped * reboot. If the executing fencer remapped an unsupported reboot to off, * then cmd->action will be "reboot" and will be merged with any other * reboot requests. If the originating fencer remapped a topology reboot * to off then on, we will get here once with cmd->action "off" and once * with "on", and they will be merged separately with similar requests. */ static void reply_to_duplicates(async_command_t *cmd, const pcmk__action_result_t *result, int pid) { GList *next = NULL; for (GList *iter = cmd_list; iter != NULL; iter = next) { async_command_t *cmd_other = iter->data; next = iter->next; // We might delete this entry, so grab next now if (cmd == cmd_other) { continue; } /* A pending operation matches if: * 1. The client connections are different. * 2. The target is the same. * 3. The fencing action is the same. * 4. The device scheduled to execute the action is the same. */ if (pcmk__str_eq(cmd->client, cmd_other->client, pcmk__str_casei) || !pcmk__str_eq(cmd->target, cmd_other->target, pcmk__str_casei) || !pcmk__str_eq(cmd->action, cmd_other->action, pcmk__str_none) || !pcmk__str_eq(cmd->device, cmd_other->device, pcmk__str_casei)) { continue; } crm_notice("Merging fencing action '%s'%s%s originating from " "client %s with identical fencing request from client %s", cmd_other->action, (cmd_other->target == NULL)? "" : " targeting ", pcmk__s(cmd_other->target, ""), cmd_other->client_name, cmd->client_name); // Stop tracking the duplicate, send its result, and cancel it cmd_list = g_list_remove_link(cmd_list, iter); send_async_reply(cmd_other, result, pid, true); cancel_stonith_command(cmd_other); free_async_command(cmd_other); g_list_free_1(iter); } } /*! * \internal * \brief Return the next required device (if any) for an operation * * \param[in,out] cmd Fencing operation that just succeeded * * \return Next device required for action if any, otherwise NULL */ static stonith_device_t * next_required_device(async_command_t *cmd) { for (GList *iter = cmd->next_device_iter; iter != NULL; iter = iter->next) { stonith_device_t *next_device = g_hash_table_lookup(device_list, iter->data); if (is_action_required(cmd->action, next_device)) { /* This is only called for successful actions, so it's OK to skip * non-required devices. */ cmd->next_device_iter = iter->next; return next_device; } } return NULL; } static void st_child_done(int pid, const pcmk__action_result_t *result, void *user_data) { async_command_t *cmd = user_data; stonith_device_t *device = NULL; stonith_device_t *next_device = NULL; CRM_CHECK(cmd != NULL, return); device = cmd_device(cmd); cmd->active_on = NULL; /* The device is ready to do something else now */ if (device) { if (!device->verified && pcmk__result_ok(result) && pcmk__strcase_any_of(cmd->action, PCMK_ACTION_LIST, PCMK_ACTION_MONITOR, PCMK_ACTION_STATUS, NULL)) { device->verified = TRUE; } mainloop_set_trigger(device->work); } if (pcmk__result_ok(result)) { next_device = next_required_device(cmd); } else if ((cmd->next_device_iter != NULL) && !is_action_required(cmd->action, device)) { /* if this device didn't work out, see if there are any others we can try. * if the failed device was 'required', we can't pick another device. */ next_device = g_hash_table_lookup(device_list, cmd->next_device_iter->data); cmd->next_device_iter = cmd->next_device_iter->next; } if (next_device == NULL) { send_async_reply(cmd, result, pid, false); if (pcmk__result_ok(result)) { reply_to_duplicates(cmd, result, pid); } free_async_command(cmd); } else { // This operation requires more fencing log_async_result(cmd, result, pid, next_device->id, false); schedule_stonith_command(cmd, next_device); } } static gint sort_device_priority(gconstpointer a, gconstpointer b) { const stonith_device_t *dev_a = a; const stonith_device_t *dev_b = b; if (dev_a->priority > dev_b->priority) { return -1; } else if (dev_a->priority < dev_b->priority) { return 1; } return 0; } static void stonith_fence_get_devices_cb(GList * devices, void *user_data) { async_command_t *cmd = user_data; stonith_device_t *device = NULL; guint ndevices = g_list_length(devices); crm_info("Found %d matching device%s for target '%s'", ndevices, pcmk__plural_s(ndevices), cmd->target); if (devices != NULL) { /* Order based on priority */ devices = g_list_sort(devices, sort_device_priority); device = g_hash_table_lookup(device_list, devices->data); } if (device == NULL) { // No device found pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; pcmk__format_result(&result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, "No device configured for target '%s'", cmd->target); send_async_reply(cmd, &result, 0, false); pcmk__reset_result(&result); free_async_command(cmd); g_list_free_full(devices, free); } else { // Device found, schedule it for fencing cmd->device_list = devices; cmd->next_device_iter = devices->next; schedule_stonith_command(cmd, device); } } /*! * \internal * \brief Execute a fence action via the local node * * \param[in] msg Fencing request * \param[out] result Where to store result of fence action */ static void fence_locally(xmlNode *msg, pcmk__action_result_t *result) { const char *device_id = NULL; stonith_device_t *device = NULL; async_command_t *cmd = NULL; xmlNode *dev = NULL; CRM_CHECK((msg != NULL) && (result != NULL), return); dev = pcmk__xpath_find_one(msg->doc, "//*[@" PCMK__XA_ST_TARGET "]", LOG_ERR); cmd = create_async_command(msg); if (cmd == NULL) { crm_log_xml_warn(msg, "invalid"); fenced_set_protocol_error(result); return; } device_id = pcmk__xe_get(dev, PCMK__XA_ST_DEVICE_ID); if (device_id != NULL) { device = g_hash_table_lookup(device_list, device_id); if (device == NULL) { crm_err("Requested device '%s' is not available", device_id); pcmk__format_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, "Requested device '%s' not found", device_id); return; } schedule_stonith_command(cmd, device); } else { const char *host = pcmk__xe_get(dev, PCMK__XA_ST_TARGET); if (pcmk_is_set(cmd->options, st_opt_cs_nodeid)) { int nodeid = 0; pcmk__node_status_t *node = NULL; pcmk__scan_min_int(host, &nodeid, 0); node = pcmk__search_node_caches(nodeid, NULL, NULL, pcmk__node_search_any |pcmk__node_search_cluster_cib); if (node != NULL) { host = node->name; } } /* If we get to here, then self-fencing is implicitly allowed */ get_capable_devices(host, cmd->action, cmd->default_timeout, TRUE, cmd, stonith_fence_get_devices_cb, fenced_support_flag(cmd->action)); } pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL); } /*! * \internal * \brief Build an XML reply for a fencing operation * * \param[in] request Request that reply is for * \param[in] data If not NULL, add to reply as call data * \param[in] result Full result of fencing operation * * \return Newly created XML reply * \note The caller is responsible for freeing the result. * \note This has some overlap with construct_async_reply(), but that copies * values from an async_command_t, whereas this one copies them from the * request. */ xmlNode * fenced_construct_reply(const xmlNode *request, xmlNode *data, const pcmk__action_result_t *result) { xmlNode *reply = NULL; reply = pcmk__xe_create(NULL, PCMK__XE_ST_REPLY); crm_xml_add(reply, PCMK__XA_ST_ORIGIN, __func__); crm_xml_add(reply, PCMK__XA_T, PCMK__VALUE_STONITH_NG); stonith__xe_set_result(reply, result); if (request == NULL) { /* Most likely, this is the result of a stonith operation that was * initiated before we came up. Unfortunately that means we lack enough * information to provide clients with a full result. * * @TODO Maybe synchronize this information at start-up? */ crm_warn("Missing request information for client notifications for " "operation with result '%s' (initiated before we came up?)", pcmk_exec_status_str(result->execution_status)); } else { const char *name = NULL; const char *value = NULL; // Attributes to copy from request to reply const char *names[] = { PCMK__XA_ST_OP, PCMK__XA_ST_CALLID, PCMK__XA_ST_CLIENTID, PCMK__XA_ST_CLIENTNAME, PCMK__XA_ST_REMOTE_OP, PCMK__XA_ST_CALLOPT, }; for (int lpc = 0; lpc < PCMK__NELEM(names); lpc++) { name = names[lpc]; value = pcmk__xe_get(request, name); crm_xml_add(reply, name, value); } if (data != NULL) { xmlNode *wrapper = pcmk__xe_create(reply, PCMK__XE_ST_CALLDATA); pcmk__xml_copy(wrapper, data); } } return reply; } /*! * \internal * \brief Build an XML reply to an asynchronous fencing command * * \param[in] cmd Fencing command that reply is for * \param[in] result Command result */ static xmlNode * construct_async_reply(const async_command_t *cmd, const pcmk__action_result_t *result) { xmlNode *reply = pcmk__xe_create(NULL, PCMK__XE_ST_REPLY); crm_xml_add(reply, PCMK__XA_ST_ORIGIN, __func__); crm_xml_add(reply, PCMK__XA_T, PCMK__VALUE_STONITH_NG); crm_xml_add(reply, PCMK__XA_ST_OP, cmd->op); crm_xml_add(reply, PCMK__XA_ST_DEVICE_ID, cmd->device); crm_xml_add(reply, PCMK__XA_ST_REMOTE_OP, cmd->remote_op_id); crm_xml_add(reply, PCMK__XA_ST_CLIENTID, cmd->client); crm_xml_add(reply, PCMK__XA_ST_CLIENTNAME, cmd->client_name); crm_xml_add(reply, PCMK__XA_ST_TARGET, cmd->target); crm_xml_add(reply, PCMK__XA_ST_DEVICE_ACTION, cmd->op); crm_xml_add(reply, PCMK__XA_ST_ORIGIN, cmd->origin); - crm_xml_add_int(reply, PCMK__XA_ST_CALLID, cmd->id); - crm_xml_add_int(reply, PCMK__XA_ST_CALLOPT, cmd->options); + pcmk__xe_set_int(reply, PCMK__XA_ST_CALLID, cmd->id); + pcmk__xe_set_int(reply, PCMK__XA_ST_CALLOPT, cmd->options); stonith__xe_set_result(reply, result); return reply; } bool fencing_peer_active(pcmk__node_status_t *peer) { return (peer != NULL) && (peer->name != NULL) && pcmk_is_set(peer->processes, crm_get_cluster_proc()); } void set_fencing_completed(remote_fencing_op_t *op) { struct timespec tv; qb_util_timespec_from_epoch_get(&tv); op->completed = tv.tv_sec; op->completed_nsec = tv.tv_nsec; } /*! * \internal * \brief Look for alternate node needed if local node shouldn't fence target * * \param[in] target Node that must be fenced * * \return Name of an alternate node that should fence \p target if any, * or NULL otherwise */ static const char * check_alternate_host(const char *target) { if (pcmk__str_eq(target, fenced_get_local_node(), pcmk__str_casei)) { GHashTableIter gIter; pcmk__node_status_t *entry = NULL; g_hash_table_iter_init(&gIter, pcmk__peer_cache); while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) { if (fencing_peer_active(entry) && !pcmk__str_eq(entry->name, target, pcmk__str_casei)) { crm_notice("Forwarding self-fencing request to %s", entry->name); return entry->name; } } crm_warn("Will handle own fencing because no peer can"); } return NULL; } static void remove_relay_op(xmlNode * request) { xmlNode *dev = pcmk__xpath_find_one(request->doc, "//*[@" PCMK__XA_ST_DEVICE_ACTION "]", LOG_TRACE); const char *relay_op_id = NULL; const char *op_id = NULL; const char *client_name = NULL; const char *target = NULL; remote_fencing_op_t *relay_op = NULL; if (dev) { target = pcmk__xe_get(dev, PCMK__XA_ST_TARGET); } relay_op_id = pcmk__xe_get(request, PCMK__XA_ST_REMOTE_OP_RELAY); op_id = pcmk__xe_get(request, PCMK__XA_ST_REMOTE_OP); client_name = pcmk__xe_get(request, PCMK__XA_ST_CLIENTNAME); /* Delete RELAY operation. */ if ((relay_op_id != NULL) && (target != NULL) && pcmk__str_eq(target, fenced_get_local_node(), pcmk__str_casei)) { relay_op = g_hash_table_lookup(stonith_remote_op_list, relay_op_id); if (relay_op) { GHashTableIter iter; remote_fencing_op_t *list_op = NULL; g_hash_table_iter_init(&iter, stonith_remote_op_list); /* If the operation to be deleted is registered as a duplicate, delete the registration. */ while (g_hash_table_iter_next(&iter, NULL, (void **)&list_op)) { GList *dup_iter = NULL; if (list_op != relay_op) { for (dup_iter = list_op->duplicates; dup_iter != NULL; dup_iter = dup_iter->next) { remote_fencing_op_t *other = dup_iter->data; if (other == relay_op) { other->duplicates = g_list_remove(other->duplicates, relay_op); break; } } } } crm_debug("Deleting relay op %s ('%s'%s%s for %s), " "replaced by op %s ('%s'%s%s for %s)", relay_op->id, relay_op->action, (relay_op->target == NULL)? "" : " targeting ", pcmk__s(relay_op->target, ""), relay_op->client_name, op_id, relay_op->action, (target == NULL)? "" : " targeting ", pcmk__s(target, ""), client_name); g_hash_table_remove(stonith_remote_op_list, relay_op_id); } } } /*! * \internal * \brief Check whether an API request was sent by a privileged user * * API commands related to fencing configuration may be done only by privileged * IPC users (i.e. root or hacluster), because all other users should go through * the CIB to have ACLs applied. If no client was given, this is a peer request, * which is always allowed. * * \param[in] c IPC client that sent request (or NULL if sent by CPG peer) * \param[in] op Requested API operation (for logging only) * * \return true if sender is peer or privileged client, otherwise false */ static inline bool is_privileged(const pcmk__client_t *c, const char *op) { if ((c == NULL) || pcmk_is_set(c->flags, pcmk__client_privileged)) { return true; } else { crm_warn("Rejecting IPC request '%s' from unprivileged client %s", pcmk__s(op, ""), pcmk__client_name(c)); return false; } } // CRM_OP_REGISTER static xmlNode * handle_register_request(pcmk__request_t *request) { xmlNode *reply = pcmk__xe_create(NULL, "reply"); pcmk__assert(request->ipc_client != NULL); crm_xml_add(reply, PCMK__XA_ST_OP, CRM_OP_REGISTER); crm_xml_add(reply, PCMK__XA_ST_CLIENTID, request->ipc_client->id); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); pcmk__set_request_flags(request, pcmk__request_reuse_options); return reply; } // STONITH_OP_EXEC static xmlNode * handle_agent_request(pcmk__request_t *request) { execute_agent_action(request->xml, &request->result); if (request->result.execution_status == PCMK_EXEC_PENDING) { return NULL; } return fenced_construct_reply(request->xml, NULL, &request->result); } // STONITH_OP_TIMEOUT_UPDATE static xmlNode * handle_update_timeout_request(pcmk__request_t *request) { const char *call_id = pcmk__xe_get(request->xml, PCMK__XA_ST_CALLID); const char *client_id = pcmk__xe_get(request->xml, PCMK__XA_ST_CLIENTID); int op_timeout = 0; pcmk__xe_get_int(request->xml, PCMK__XA_ST_TIMEOUT, &op_timeout); do_stonith_async_timeout_update(client_id, call_id, op_timeout); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return NULL; } // STONITH_OP_QUERY static xmlNode * handle_query_request(pcmk__request_t *request) { int timeout = 0; xmlNode *dev = NULL; const char *action = NULL; const char *target = NULL; const char *client_id = pcmk__xe_get(request->xml, PCMK__XA_ST_CLIENTID); struct st_query_data *query = NULL; if (request->peer != NULL) { // Record it for the future notification create_remote_stonith_op(client_id, request->xml, TRUE); } /* Delete the DC node RELAY operation. */ remove_relay_op(request->xml); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); dev = pcmk__xpath_find_one(request->xml->doc, "//*[@" PCMK__XA_ST_DEVICE_ACTION "]", LOG_NEVER); if (dev != NULL) { const char *device = pcmk__xe_get(dev, PCMK__XA_ST_DEVICE_ID); if (pcmk__str_eq(device, "manual_ack", pcmk__str_casei)) { return NULL; // No query or reply necessary } target = pcmk__xe_get(dev, PCMK__XA_ST_TARGET); action = pcmk__xe_get(dev, PCMK__XA_ST_DEVICE_ACTION); } crm_log_xml_trace(request->xml, "Query"); query = pcmk__assert_alloc(1, sizeof(struct st_query_data)); query->reply = fenced_construct_reply(request->xml, NULL, &request->result); query->remote_peer = pcmk__str_copy(request->peer); query->client_id = pcmk__str_copy(client_id); query->target = pcmk__str_copy(target); query->action = pcmk__str_copy(action); query->call_options = request->call_options; pcmk__xe_get_int(request->xml, PCMK__XA_ST_TIMEOUT, &timeout); get_capable_devices(target, action, timeout, pcmk_is_set(query->call_options, st_opt_allow_self_fencing), query, stonith_query_capable_device_cb, st_device_supports_none); return NULL; } // STONITH_OP_NOTIFY static xmlNode * handle_notify_request(pcmk__request_t *request) { const char *flag_name = NULL; pcmk__assert(request->ipc_client != NULL); flag_name = pcmk__xe_get(request->xml, PCMK__XA_ST_NOTIFY_ACTIVATE); if (flag_name != NULL) { crm_debug("Enabling %s callbacks for client %s", flag_name, pcmk__request_origin(request)); pcmk__set_client_flags(request->ipc_client, get_stonith_flag(flag_name)); } flag_name = pcmk__xe_get(request->xml, PCMK__XA_ST_NOTIFY_DEACTIVATE); if (flag_name != NULL) { crm_debug("Disabling %s callbacks for client %s", flag_name, pcmk__request_origin(request)); pcmk__clear_client_flags(request->ipc_client, get_stonith_flag(flag_name)); } pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); pcmk__set_request_flags(request, pcmk__request_reuse_options); return pcmk__ipc_create_ack(request->ipc_flags, PCMK__XE_ACK, NULL, CRM_EX_OK); } // STONITH_OP_RELAY static xmlNode * handle_relay_request(pcmk__request_t *request) { xmlNode *dev = pcmk__xpath_find_one(request->xml->doc, "//*[@" PCMK__XA_ST_TARGET "]", LOG_TRACE); crm_notice("Received forwarded fencing request from " "%s %s to fence (%s) peer %s", pcmk__request_origin_type(request), pcmk__request_origin(request), pcmk__xe_get(dev, PCMK__XA_ST_DEVICE_ACTION), pcmk__xe_get(dev, PCMK__XA_ST_TARGET)); if (initiate_remote_stonith_op(NULL, request->xml, FALSE) == NULL) { fenced_set_protocol_error(&request->result); return fenced_construct_reply(request->xml, NULL, &request->result); } pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL); return NULL; } // STONITH_OP_FENCE static xmlNode * handle_fence_request(pcmk__request_t *request) { if (request->peer != NULL) { fence_locally(request->xml, &request->result); } else if (pcmk_is_set(request->call_options, st_opt_manual_ack)) { switch (fenced_handle_manual_confirmation(request->ipc_client, request->xml)) { case pcmk_rc_ok: pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); break; case EINPROGRESS: pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL); break; default: fenced_set_protocol_error(&request->result); break; } } else { const char *alternate_host = NULL; xmlNode *dev = pcmk__xpath_find_one(request->xml->doc, "//*[@" PCMK__XA_ST_TARGET "]", LOG_TRACE); const char *target = pcmk__xe_get(dev, PCMK__XA_ST_TARGET); const char *action = pcmk__xe_get(dev, PCMK__XA_ST_DEVICE_ACTION); const char *device = pcmk__xe_get(dev, PCMK__XA_ST_DEVICE_ID); if (request->ipc_client != NULL) { int tolerance = 0; crm_notice("Client %s wants to fence (%s) %s using %s", pcmk__request_origin(request), action, target, (device? device : "any device")); pcmk__xe_get_int(dev, PCMK__XA_ST_TOLERANCE, &tolerance); if (stonith_check_fence_tolerance(tolerance, target, action)) { pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return fenced_construct_reply(request->xml, NULL, &request->result); } alternate_host = check_alternate_host(target); } else { crm_notice("Peer %s wants to fence (%s) '%s' with device '%s'", request->peer, action, target, (device == NULL)? "(any)" : device); } if (alternate_host != NULL) { const char *client_id = NULL; remote_fencing_op_t *op = NULL; pcmk__node_status_t *node = pcmk__get_node(0, alternate_host, NULL, pcmk__node_search_cluster_member); if (request->ipc_client->id == 0) { client_id = pcmk__xe_get(request->xml, PCMK__XA_ST_CLIENTID); } else { client_id = request->ipc_client->id; } /* Create a duplicate fencing operation to relay with the client ID. * When a query response is received, this operation should be * deleted to avoid keeping the duplicate around. */ op = create_remote_stonith_op(client_id, request->xml, FALSE); crm_xml_add(request->xml, PCMK__XA_ST_OP, STONITH_OP_RELAY); crm_xml_add(request->xml, PCMK__XA_ST_CLIENTID, request->ipc_client->id); crm_xml_add(request->xml, PCMK__XA_ST_REMOTE_OP, op->id); // @TODO On failure, fail request immediately, or maybe panic pcmk__cluster_send_message(node, pcmk_ipc_fenced, request->xml); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL); } else if (initiate_remote_stonith_op(request->ipc_client, request->xml, FALSE) == NULL) { fenced_set_protocol_error(&request->result); } else { pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL); } } if (request->result.execution_status == PCMK_EXEC_PENDING) { return NULL; } return fenced_construct_reply(request->xml, NULL, &request->result); } // STONITH_OP_FENCE_HISTORY static xmlNode * handle_history_request(pcmk__request_t *request) { xmlNode *reply = NULL; xmlNode *data = NULL; stonith_fence_history(request->xml, &data, request->peer, request->call_options); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); if (!pcmk_is_set(request->call_options, st_opt_discard_reply)) { /* When the local node broadcasts its history, it sets * st_opt_discard_reply and doesn't need a reply. */ reply = fenced_construct_reply(request->xml, data, &request->result); } pcmk__xml_free(data); return reply; } // STONITH_OP_DEVICE_ADD static xmlNode * handle_device_add_request(pcmk__request_t *request) { const char *op = pcmk__xe_get(request->xml, PCMK__XA_ST_OP); xmlNode *dev = pcmk__xpath_find_one(request->xml->doc, "//" PCMK__XE_ST_DEVICE_ID, LOG_ERR); if (is_privileged(request->ipc_client, op)) { int rc = stonith_device_register(dev, FALSE); pcmk__set_result(&request->result, ((rc == pcmk_ok)? CRM_EX_OK : CRM_EX_ERROR), stonith__legacy2status(rc), ((rc == pcmk_ok)? NULL : pcmk_strerror(rc))); } else { pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV, PCMK_EXEC_INVALID, "Unprivileged users must register device via CIB"); } fenced_send_config_notification(op, &request->result, (dev == NULL)? NULL : pcmk__xe_id(dev)); return fenced_construct_reply(request->xml, NULL, &request->result); } // STONITH_OP_DEVICE_DEL static xmlNode * handle_device_delete_request(pcmk__request_t *request) { xmlNode *dev = pcmk__xpath_find_one(request->xml->doc, "//" PCMK__XE_ST_DEVICE_ID, LOG_ERR); const char *device_id = pcmk__xe_get(dev, PCMK_XA_ID); const char *op = pcmk__xe_get(request->xml, PCMK__XA_ST_OP); if (is_privileged(request->ipc_client, op)) { stonith_device_remove(device_id, false); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); } else { pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV, PCMK_EXEC_INVALID, "Unprivileged users must delete device via CIB"); } fenced_send_config_notification(op, &request->result, device_id); return fenced_construct_reply(request->xml, NULL, &request->result); } // STONITH_OP_LEVEL_ADD static xmlNode * handle_level_add_request(pcmk__request_t *request) { char *desc = NULL; const char *op = pcmk__xe_get(request->xml, PCMK__XA_ST_OP); if (is_privileged(request->ipc_client, op)) { fenced_register_level(request->xml, &desc, &request->result); } else { unpack_level_request(request->xml, NULL, NULL, NULL, &desc); pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV, PCMK_EXEC_INVALID, "Unprivileged users must add level via CIB"); } fenced_send_config_notification(op, &request->result, desc); free(desc); return fenced_construct_reply(request->xml, NULL, &request->result); } // STONITH_OP_LEVEL_DEL static xmlNode * handle_level_delete_request(pcmk__request_t *request) { char *desc = NULL; const char *op = pcmk__xe_get(request->xml, PCMK__XA_ST_OP); if (is_privileged(request->ipc_client, op)) { fenced_unregister_level(request->xml, &desc, &request->result); } else { unpack_level_request(request->xml, NULL, NULL, NULL, &desc); pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV, PCMK_EXEC_INVALID, "Unprivileged users must delete level via CIB"); } fenced_send_config_notification(op, &request->result, desc); free(desc); return fenced_construct_reply(request->xml, NULL, &request->result); } // CRM_OP_RM_NODE_CACHE static xmlNode * handle_cache_request(pcmk__request_t *request) { int node_id = 0; const char *name = NULL; pcmk__xe_get_int(request->xml, PCMK_XA_ID, &node_id); name = pcmk__xe_get(request->xml, PCMK_XA_UNAME); pcmk__cluster_forget_cluster_node(node_id, name); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return NULL; } static xmlNode * handle_unknown_request(pcmk__request_t *request) { crm_err("Unknown IPC request %s from %s %s", request->op, pcmk__request_origin_type(request), pcmk__request_origin(request)); pcmk__format_result(&request->result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID, "Unknown IPC request type '%s' (bug?)", request->op); return fenced_construct_reply(request->xml, NULL, &request->result); } static void fenced_register_handlers(void) { pcmk__server_command_t handlers[] = { { CRM_OP_REGISTER, handle_register_request }, { STONITH_OP_EXEC, handle_agent_request }, { STONITH_OP_TIMEOUT_UPDATE, handle_update_timeout_request }, { STONITH_OP_QUERY, handle_query_request }, { STONITH_OP_NOTIFY, handle_notify_request }, { STONITH_OP_RELAY, handle_relay_request }, { STONITH_OP_FENCE, handle_fence_request }, { STONITH_OP_FENCE_HISTORY, handle_history_request }, { STONITH_OP_DEVICE_ADD, handle_device_add_request }, { STONITH_OP_DEVICE_DEL, handle_device_delete_request }, { STONITH_OP_LEVEL_ADD, handle_level_add_request }, { STONITH_OP_LEVEL_DEL, handle_level_delete_request }, { CRM_OP_RM_NODE_CACHE, handle_cache_request }, { NULL, handle_unknown_request }, }; fenced_handlers = pcmk__register_handlers(handlers); } void fenced_unregister_handlers(void) { if (fenced_handlers != NULL) { g_hash_table_destroy(fenced_handlers); fenced_handlers = NULL; } } static void handle_request(pcmk__request_t *request) { xmlNode *reply = NULL; const char *reason = NULL; if (fenced_handlers == NULL) { fenced_register_handlers(); } reply = pcmk__process_request(request, fenced_handlers); if (reply != NULL) { if (pcmk_is_set(request->flags, pcmk__request_reuse_options) && (request->ipc_client != NULL)) { /* Certain IPC-only commands must reuse the call options from the * original request rather than the ones set by stonith_send_reply() * -> do_local_reply(). */ pcmk__ipc_send_xml(request->ipc_client, request->ipc_id, reply, request->ipc_flags); request->ipc_client->request_id = 0; } else { stonith_send_reply(reply, request->call_options, request->peer, request->ipc_client); } pcmk__xml_free(reply); } reason = request->result.exit_reason; crm_debug("Processed %s request from %s %s: %s%s%s%s", request->op, pcmk__request_origin_type(request), pcmk__request_origin(request), pcmk_exec_status_str(request->result.execution_status), (reason == NULL)? "" : " (", (reason == NULL)? "" : reason, (reason == NULL)? "" : ")"); } static void handle_reply(pcmk__client_t *client, xmlNode *request, const char *remote_peer) { // Copy, because request might be freed before we want to log this char *op = pcmk__xe_get_copy(request, PCMK__XA_ST_OP); if (pcmk__str_eq(op, STONITH_OP_QUERY, pcmk__str_none)) { process_remote_stonith_query(request); } else if (pcmk__str_any_of(op, STONITH_OP_NOTIFY, STONITH_OP_FENCE, NULL)) { fenced_process_fencing_reply(request); } else { crm_err("Ignoring unknown %s reply from %s %s", pcmk__s(op, "untyped"), ((client == NULL)? "peer" : "client"), ((client == NULL)? remote_peer : pcmk__client_name(client))); crm_log_xml_warn(request, "UnknownOp"); free(op); return; } crm_debug("Processed %s reply from %s %s", op, ((client == NULL)? "peer" : "client"), ((client == NULL)? remote_peer : pcmk__client_name(client))); free(op); } /*! * \internal * \brief Handle a message from an IPC client or CPG peer * * \param[in,out] client If not NULL, IPC client that sent message * \param[in] id If from IPC client, IPC message ID * \param[in] flags Message flags * \param[in,out] message Message XML * \param[in] remote_peer If not NULL, CPG peer that sent message */ void stonith_command(pcmk__client_t *client, uint32_t id, uint32_t flags, xmlNode *message, const char *remote_peer) { uint32_t call_options = st_opt_none; int rc = pcmk_rc_ok; bool is_reply = false; CRM_CHECK(message != NULL, return); if (pcmk__xpath_find_one(message->doc, "//" PCMK__XE_ST_REPLY, LOG_NEVER) != NULL) { is_reply = true; } rc = pcmk__xe_get_flags(message, PCMK__XA_ST_CALLOPT, &call_options, st_opt_none); if (rc != pcmk_rc_ok) { crm_warn("Couldn't parse options from message: %s", pcmk_rc_str(rc)); } crm_debug("Processing %ssynchronous %s %s %u from %s %s", pcmk_is_set(call_options, st_opt_sync_call)? "" : "a", pcmk__xe_get(message, PCMK__XA_ST_OP), (is_reply? "reply" : "request"), id, ((client == NULL)? "peer" : "client"), ((client == NULL)? remote_peer : pcmk__client_name(client))); if (pcmk_is_set(call_options, st_opt_sync_call)) { pcmk__assert((client == NULL) || (client->request_id == id)); } if (is_reply) { handle_reply(client, message, remote_peer); } else { pcmk__request_t request = { .ipc_client = client, .ipc_id = id, .ipc_flags = flags, .peer = remote_peer, .xml = message, .call_options = call_options, .result = PCMK__UNKNOWN_RESULT, }; request.op = pcmk__xe_get_copy(request.xml, PCMK__XA_ST_OP); CRM_CHECK(request.op != NULL, return); if (pcmk_is_set(request.call_options, st_opt_sync_call)) { pcmk__set_request_flags(&request, pcmk__request_sync); } handle_request(&request); pcmk__reset_request(&request); } } diff --git a/daemons/fenced/fenced_history.c b/daemons/fenced/fenced_history.c index 722c9f4407..576e435510 100644 --- a/daemons/fenced/fenced_history.c +++ b/daemons/fenced/fenced_history.c @@ -1,574 +1,574 @@ /* * Copyright 2009-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include // xmlNode #include #include #include #include #include #include #include #include #include #define MAX_STONITH_HISTORY 500 /*! * \internal * \brief Send a broadcast to all nodes to trigger cleanup or * history synchronisation * * \param[in] history Optional history to be attached * \param[in] callopts We control cleanup via a flag in the callopts * \param[in] target Cleanup can be limited to certain fence-targets */ static void stonith_send_broadcast_history(xmlNode *history, int callopts, const char *target) { xmlNode *bcast = pcmk__xe_create(NULL, PCMK__XE_STONITH_COMMAND); xmlNode *wrapper = pcmk__xe_create(bcast, PCMK__XE_ST_CALLDATA); xmlNode *call_data = pcmk__xe_create(wrapper, __func__); crm_xml_add(bcast, PCMK__XA_T, PCMK__VALUE_STONITH_NG); crm_xml_add(bcast, PCMK__XA_SUBT, PCMK__VALUE_BROADCAST); crm_xml_add(bcast, PCMK__XA_ST_OP, STONITH_OP_FENCE_HISTORY); - crm_xml_add_int(bcast, PCMK__XA_ST_CALLOPT, callopts); + pcmk__xe_set_int(bcast, PCMK__XA_ST_CALLOPT, callopts); pcmk__xml_copy(call_data, history); if (target != NULL) { crm_xml_add(call_data, PCMK__XA_ST_TARGET, target); } pcmk__cluster_send_message(NULL, pcmk_ipc_fenced, bcast); pcmk__xml_free(bcast); } static gboolean stonith_remove_history_entry (gpointer key, gpointer value, gpointer user_data) { remote_fencing_op_t *op = value; const char *target = (const char *) user_data; if ((op->state == st_failed) || (op->state == st_done)) { if ((target) && (strcmp(op->target, target) != 0)) { return FALSE; } return TRUE; } return FALSE; /* don't clean pending operations */ } /*! * \internal * \brief Send out a cleanup broadcast or do a local history-cleanup * * \param[in] target Cleanup can be limited to certain fence-targets * \param[in] broadcast Send out a cleanup broadcast */ static void stonith_fence_history_cleanup(const char *target, gboolean broadcast) { if (broadcast) { stonith_send_broadcast_history(NULL, st_opt_cleanup | st_opt_discard_reply, target); /* we'll do the local clean when we receive back our own broadcast */ } else if (stonith_remote_op_list) { g_hash_table_foreach_remove(stonith_remote_op_list, stonith_remove_history_entry, (gpointer) target); fenced_send_notification(PCMK__VALUE_ST_NOTIFY_HISTORY, NULL, NULL); } } /* keeping the length of fence-history within bounds * ================================================= * * If things are really running wild a lot of fencing-attempts * might fill up the hash-map, eventually using up a lot * of memory and creating huge history-sync messages. * Before the history being synced across nodes at least * the reboot of a cluster-node helped keeping the * history within bounds even though not in a reliable * manner. * * stonith_remote_op_list isn't sorted for time-stamps * thus it would be kind of expensive to delete e.g. * the oldest entry if it would grow past MAX_STONITH_HISTORY * entries. * It is more efficient to purge MAX_STONITH_HISTORY/2 * entries whenever the list grows beyond MAX_STONITH_HISTORY. * (sort for age + purge the MAX_STONITH_HISTORY/2 oldest) * That done on a per-node-base might raise the * probability of large syncs to occur. * Things like introducing a broadcast to purge * MAX_STONITH_HISTORY/2 entries or not sync above a certain * threshold coming to mind ... * Simplest thing though is to purge the full history * throughout the cluster once MAX_STONITH_HISTORY is reached. * On the other hand this leads to purging the history in * situations where it would be handy to have it probably. */ /*! * \internal * \brief Compare two remote fencing operations by status and completion time * * A pending operation is ordered before a completed operation. If both * operations have completed, then the more recently completed operation is * ordered first. Two pending operations are considered equal. * * \param[in] a First \c remote_fencing_op_t to compare * \param[in] b Second \c remote_fencing_op_t to compare * * \return Standard comparison result (a negative integer if \p a is lesser, * 0 if the values are equal, and a positive integer if \p a is greater) */ static gint cmp_op_by_completion(gconstpointer a, gconstpointer b) { const remote_fencing_op_t *op1 = a; const remote_fencing_op_t *op2 = b; bool op1_pending = stonith__op_state_pending(op1->state); bool op2_pending = stonith__op_state_pending(op2->state); if (op1_pending && op2_pending) { return 0; } if (op1_pending) { return -1; } if (op2_pending) { return 1; } if (op1->completed > op2->completed) { return -1; } if (op1->completed < op2->completed) { return 1; } if (op1->completed_nsec > op2->completed_nsec) { return -1; } if (op1->completed_nsec < op2->completed_nsec) { return 1; } return 0; } /*! * \internal * \brief Remove a completed operation from \c stonith_remote_op_list * * \param[in] data \c remote_fencing_op_t to remove * \param[in] user_data Ignored */ static void remove_completed_remote_op(gpointer data, gpointer user_data) { const remote_fencing_op_t *op = data; if (!stonith__op_state_pending(op->state)) { g_hash_table_remove(stonith_remote_op_list, op->id); } } /*! * \internal * \brief Do a local history-trim to MAX_STONITH_HISTORY / 2 entries * once over MAX_STONITH_HISTORY */ void stonith_fence_history_trim(void) { if (stonith_remote_op_list == NULL) { return; } if (g_hash_table_size(stonith_remote_op_list) > MAX_STONITH_HISTORY) { GList *ops = g_hash_table_get_values(stonith_remote_op_list); crm_trace("More than %d entries in fencing history, purging oldest " "completed operations", MAX_STONITH_HISTORY); ops = g_list_sort(ops, cmp_op_by_completion); // Always keep pending ops regardless of number of entries g_list_foreach(g_list_nth(ops, MAX_STONITH_HISTORY / 2), remove_completed_remote_op, NULL); // No need for a notification after purging old data g_list_free(ops); } } /*! * \internal * \brief Convert xml fence-history to a hash-table like stonith_remote_op_list * * \param[in] history Fence-history in xml * * \return Fence-history as hash-table */ static GHashTable * stonith_xml_history_to_list(const xmlNode *history) { xmlNode *xml_op = NULL; GHashTable *rv = NULL; init_stonith_remote_op_hash_table(&rv); CRM_LOG_ASSERT(rv != NULL); for (xml_op = pcmk__xe_first_child(history, NULL, NULL, NULL); xml_op != NULL; xml_op = pcmk__xe_next(xml_op, NULL)) { remote_fencing_op_t *op = NULL; char *id = pcmk__xe_get_copy(xml_op, PCMK__XA_ST_REMOTE_OP); int state; int exit_status = CRM_EX_OK; int execution_status = PCMK_EXEC_DONE; if (!id) { crm_warn("Malformed fencing history received from peer"); continue; } crm_trace("Attaching op %s to hashtable", id); op = pcmk__assert_alloc(1, sizeof(remote_fencing_op_t)); op->id = id; op->target = pcmk__xe_get_copy(xml_op, PCMK__XA_ST_TARGET); op->action = pcmk__xe_get_copy(xml_op, PCMK__XA_ST_DEVICE_ACTION); op->originator = pcmk__xe_get_copy(xml_op, PCMK__XA_ST_ORIGIN); op->delegate = pcmk__xe_get_copy(xml_op, PCMK__XA_ST_DELEGATE); op->client_name = pcmk__xe_get_copy(xml_op, PCMK__XA_ST_CLIENTNAME); pcmk__xe_get_time(xml_op, PCMK__XA_ST_DATE, &op->completed); pcmk__xe_get_ll(xml_op, PCMK__XA_ST_DATE_NSEC, &op->completed_nsec); pcmk__xe_get_int(xml_op, PCMK__XA_ST_STATE, &state); op->state = (enum op_state) state; /* @COMPAT We can't use stonith__xe_get_result() here because * fencers <2.1.3 didn't include results, leading it to assume an error * status. Instead, set an unknown status in that case. */ if ((pcmk__xe_get_int(xml_op, PCMK__XA_RC_CODE, &exit_status) != pcmk_rc_ok) || (pcmk__xe_get_int(xml_op, PCMK__XA_OP_STATUS, &execution_status) != pcmk_rc_ok)) { exit_status = CRM_EX_INDETERMINATE; execution_status = PCMK_EXEC_UNKNOWN; } pcmk__set_result(&op->result, exit_status, execution_status, pcmk__xe_get(xml_op, PCMK_XA_EXIT_REASON)); pcmk__set_result_output(&op->result, pcmk__xe_get_copy(xml_op, PCMK__XA_ST_OUTPUT), NULL); g_hash_table_replace(rv, id, op); CRM_LOG_ASSERT(g_hash_table_lookup(rv, id) != NULL); } return rv; } /*! * \internal * \brief Craft xml difference between local fence-history and a history * coming from remote, and merge the remote history into the local * * \param[in,out] remote_history Fence-history as hash-table (may be NULL) * \param[in] add_id If crafting the answer for an API * history-request there is no need for the id * \param[in] target Optionally limit to certain fence-target * * \return The fence-history as xml */ static xmlNode * stonith_local_history_diff_and_merge(GHashTable *remote_history, gboolean add_id, const char *target) { xmlNode *history = NULL; GHashTableIter iter; remote_fencing_op_t *op = NULL; gboolean updated = FALSE; int cnt = 0; if (stonith_remote_op_list) { char *id = NULL; history = pcmk__xe_create(NULL, PCMK__XE_ST_HISTORY); g_hash_table_iter_init(&iter, stonith_remote_op_list); while (g_hash_table_iter_next(&iter, (void **)&id, (void **)&op)) { xmlNode *entry = NULL; if (remote_history) { remote_fencing_op_t *remote_op = g_hash_table_lookup(remote_history, op->id); if (remote_op) { if (stonith__op_state_pending(op->state) && !stonith__op_state_pending(remote_op->state)) { crm_debug("Updating outdated pending operation %.8s " "(state=%s) according to the one (state=%s) from " "remote peer history", op->id, stonith_op_state_str(op->state), stonith_op_state_str(remote_op->state)); g_hash_table_steal(remote_history, op->id); op->id = remote_op->id; remote_op->id = id; g_hash_table_iter_replace(&iter, remote_op); updated = TRUE; continue; /* skip outdated entries */ } else if (!stonith__op_state_pending(op->state) && stonith__op_state_pending(remote_op->state)) { crm_debug("Broadcasting operation %.8s (state=%s) to " "update the outdated pending one " "(state=%s) in remote peer history", op->id, stonith_op_state_str(op->state), stonith_op_state_str(remote_op->state)); g_hash_table_remove(remote_history, op->id); } else { g_hash_table_remove(remote_history, op->id); continue; /* skip entries broadcasted already */ } } } if (!pcmk__str_eq(target, op->target, pcmk__str_null_matches)) { continue; } cnt++; crm_trace("Attaching op %s", op->id); entry = pcmk__xe_create(history, STONITH_OP_EXEC); if (add_id) { crm_xml_add(entry, PCMK__XA_ST_REMOTE_OP, op->id); } crm_xml_add(entry, PCMK__XA_ST_TARGET, op->target); crm_xml_add(entry, PCMK__XA_ST_DEVICE_ACTION, op->action); crm_xml_add(entry, PCMK__XA_ST_ORIGIN, op->originator); crm_xml_add(entry, PCMK__XA_ST_DELEGATE, op->delegate); crm_xml_add(entry, PCMK__XA_ST_CLIENTNAME, op->client_name); pcmk__xe_set_time(entry, PCMK__XA_ST_DATE, op->completed); pcmk__xe_set_ll(entry, PCMK__XA_ST_DATE_NSEC, op->completed_nsec); - crm_xml_add_int(entry, PCMK__XA_ST_STATE, op->state); + pcmk__xe_set_int(entry, PCMK__XA_ST_STATE, op->state); stonith__xe_set_result(entry, &op->result); } } if (remote_history) { init_stonith_remote_op_hash_table(&stonith_remote_op_list); updated |= g_hash_table_size(remote_history); g_hash_table_iter_init(&iter, remote_history); while (g_hash_table_iter_next(&iter, NULL, (void **)&op)) { if (stonith__op_state_pending(op->state) && pcmk__str_eq(op->originator, fenced_get_local_node(), pcmk__str_casei)) { crm_warn("Failing pending operation %.8s originated by us but " "known only from peer history", op->id); op->state = st_failed; set_fencing_completed(op); /* CRM_EX_EXPIRED + PCMK_EXEC_INVALID prevents finalize_op() * from setting a delegate */ pcmk__set_result(&op->result, CRM_EX_EXPIRED, PCMK_EXEC_INVALID, "Initiated by earlier fencer " "process and presumed failed"); fenced_broadcast_op_result(op, false); } g_hash_table_iter_steal(&iter); g_hash_table_replace(stonith_remote_op_list, op->id, op); /* we could trim the history here but if we bail * out after trim we might miss more recent entries * of those that might still be in the list * if we don't bail out trimming once is more * efficient and memory overhead is minimal as * we are just moving pointers from one hash to * another */ } g_hash_table_destroy(remote_history); /* remove what is left */ } if (updated) { stonith_fence_history_trim(); fenced_send_notification(PCMK__VALUE_ST_NOTIFY_HISTORY, NULL, NULL); } if (cnt == 0) { pcmk__xml_free(history); return NULL; } else { return history; } } /*! * \internal * \brief Craft xml from the local fence-history * * \param[in] add_id If crafting the answer for an API * history-request there is no need for the id * \param[in] target Optionally limit to certain fence-target * * \return The fence-history as xml */ static xmlNode * stonith_local_history(gboolean add_id, const char *target) { return stonith_local_history_diff_and_merge(NULL, add_id, target); } /*! * \internal * \brief Handle fence-history messages (from API or coming in as broadcasts) * * \param[in,out] msg Request XML * \param[out] output Where to set local history, if requested * \param[in] remote_peer If broadcast, peer that sent it * \param[in] options Call options from the request */ void stonith_fence_history(xmlNode *msg, xmlNode **output, const char *remote_peer, int options) { const char *target = NULL; xmlNode *dev = pcmk__xpath_find_one(msg->doc, "//*[@" PCMK__XA_ST_TARGET "]", LOG_NEVER); xmlNode *out_history = NULL; if (dev) { target = pcmk__xe_get(dev, PCMK__XA_ST_TARGET); if (target && (options & st_opt_cs_nodeid)) { int nodeid; pcmk__node_status_t *node = NULL; pcmk__scan_min_int(target, &nodeid, 0); node = pcmk__search_node_caches(nodeid, NULL, NULL, pcmk__node_search_any |pcmk__node_search_cluster_cib); if (node != NULL) { target = node->name; } } } if (options & st_opt_cleanup) { const char *call_id = pcmk__xe_get(msg, PCMK__XA_ST_CALLID); crm_trace("Cleaning up operations on %s in %p", target, stonith_remote_op_list); stonith_fence_history_cleanup(target, (call_id != NULL)); } else if (options & st_opt_broadcast) { /* there is no clear sign atm for when a history sync is done so send a notification for anything that smells like history-sync */ fenced_send_notification(PCMK__VALUE_ST_NOTIFY_HISTORY_SYNCED, NULL, NULL); if (pcmk__xe_get(msg, PCMK__XA_ST_CALLID) != NULL) { /* this is coming from the stonith-API * * craft a broadcast with node's history * so that every node can merge and broadcast * what it has on top */ out_history = stonith_local_history(TRUE, NULL); crm_trace("Broadcasting history to peers"); stonith_send_broadcast_history(out_history, st_opt_broadcast | st_opt_discard_reply, NULL); } else if (remote_peer && !pcmk__str_eq(remote_peer, fenced_get_local_node(), pcmk__str_casei)) { xmlNode *history = pcmk__xpath_find_one(msg->doc, "//" PCMK__XE_ST_HISTORY, LOG_NEVER); /* either a broadcast created directly upon stonith-API request * or a diff as response to such a thing * * in both cases it may have a history or not * if we have differential data * merge in what we've received and stop * otherwise broadcast what we have on top * marking as differential and merge in afterwards */ if (!history || !pcmk__xe_attr_is_true(history, PCMK__XA_ST_DIFFERENTIAL)) { GHashTable *received_history = NULL; if (history != NULL) { received_history = stonith_xml_history_to_list(history); } out_history = stonith_local_history_diff_and_merge(received_history, TRUE, NULL); if (out_history) { crm_trace("Broadcasting history-diff to peers"); pcmk__xe_set_bool_attr(out_history, PCMK__XA_ST_DIFFERENTIAL, true); stonith_send_broadcast_history(out_history, st_opt_broadcast | st_opt_discard_reply, NULL); } else { crm_trace("History-diff is empty - skip broadcast"); } } } else { crm_trace("Skipping history-query-broadcast (%s%s)" " we sent ourselves", remote_peer?"remote-peer=":"local-ipc", remote_peer?remote_peer:""); } } else { /* plain history request */ crm_trace("Looking for operations on %s in %p", target, stonith_remote_op_list); *output = stonith_local_history(FALSE, target); } pcmk__xml_free(out_history); } diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c index 336e5e7968..28ea84dc93 100644 --- a/daemons/fenced/fenced_remote.c +++ b/daemons/fenced/fenced_remote.c @@ -1,2621 +1,2621 @@ /* * Copyright 2009-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include // xmlNode #include #include #include #include #include #include #include #include #include #include #define TIMEOUT_MULTIPLY_FACTOR 1.2 /* When one fencer queries its peers for devices able to handle a fencing * request, each peer will reply with a list of such devices available to it. * Each reply will be parsed into a peer_device_info_t, with each device's * information kept in a device_properties_t. */ typedef struct device_properties_s { /* Whether access to this device has been verified */ gboolean verified; /* The remaining members are indexed by the operation's "phase" */ /* Whether this device has been executed in each phase */ gboolean executed[st_phase_max]; /* Whether this device is disallowed from executing in each phase */ gboolean disallowed[st_phase_max]; /* Action-specific timeout for each phase */ int custom_action_timeout[st_phase_max]; /* Action-specific maximum random delay for each phase */ int delay_max[st_phase_max]; /* Action-specific base delay for each phase */ int delay_base[st_phase_max]; /* Group of enum st_device_flags */ uint32_t device_support_flags; } device_properties_t; typedef struct { /* Name of peer that sent this result */ char *host; /* Only try peers for non-topology based operations once */ gboolean tried; /* Number of entries in the devices table */ int ndevices; /* Devices available to this host that are capable of fencing the target */ GHashTable *devices; } peer_device_info_t; GHashTable *stonith_remote_op_list = NULL; extern xmlNode *stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data, int call_options); static void request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer); static void finalize_op(remote_fencing_op_t *op, xmlNode *data, bool dup); static void report_timeout_period(remote_fencing_op_t * op, int op_timeout); static int get_op_total_timeout(const remote_fencing_op_t *op, const peer_device_info_t *chosen_peer); static gint sort_strings(gconstpointer a, gconstpointer b) { return strcmp(a, b); } static void free_remote_query(gpointer data) { if (data != NULL) { peer_device_info_t *peer = data; g_hash_table_destroy(peer->devices); free(peer->host); free(peer); } } void free_stonith_remote_op_list(void) { if (stonith_remote_op_list != NULL) { g_hash_table_destroy(stonith_remote_op_list); stonith_remote_op_list = NULL; } } struct peer_count_data { const remote_fencing_op_t *op; gboolean verified_only; uint32_t support_action_only; int count; }; /*! * \internal * \brief Increment a counter if a device has not been executed yet * * \param[in] key Device ID (ignored) * \param[in] value Device properties * \param[in,out] user_data Peer count data */ static void count_peer_device(gpointer key, gpointer value, gpointer user_data) { device_properties_t *props = (device_properties_t*)value; struct peer_count_data *data = user_data; if (!props->executed[data->op->phase] && (!data->verified_only || props->verified) && ((data->support_action_only == st_device_supports_none) || pcmk_is_set(props->device_support_flags, data->support_action_only))) { ++(data->count); } } /*! * \internal * \brief Check the number of available devices in a peer's query results * * \param[in] op Operation that results are for * \param[in] peer Peer to count * \param[in] verified_only Whether to count only verified devices * \param[in] support_action_only Whether to count only devices that support action * * \return Number of devices available to peer that were not already executed */ static int count_peer_devices(const remote_fencing_op_t *op, const peer_device_info_t *peer, gboolean verified_only, uint32_t support_on_action_only) { struct peer_count_data data; data.op = op; data.verified_only = verified_only; data.support_action_only = support_on_action_only; data.count = 0; if (peer) { g_hash_table_foreach(peer->devices, count_peer_device, &data); } return data.count; } /*! * \internal * \brief Search for a device in a query result * * \param[in] op Operation that result is for * \param[in] peer Query result for a peer * \param[in] device Device ID to search for * * \return Device properties if found, NULL otherwise */ static device_properties_t * find_peer_device(const remote_fencing_op_t *op, const peer_device_info_t *peer, const char *device, uint32_t support_action_only) { device_properties_t *props = g_hash_table_lookup(peer->devices, device); if (props && support_action_only != st_device_supports_none && !pcmk_is_set(props->device_support_flags, support_action_only)) { return NULL; } return (props && !props->executed[op->phase] && !props->disallowed[op->phase])? props : NULL; } /*! * \internal * \brief Find a device in a peer's device list and mark it as executed * * \param[in] op Operation that peer result is for * \param[in,out] peer Peer with results to search * \param[in] device ID of device to mark as done * \param[in] verified_devices_only Only consider verified devices * * \return TRUE if device was found and marked, FALSE otherwise */ static gboolean grab_peer_device(const remote_fencing_op_t *op, peer_device_info_t *peer, const char *device, gboolean verified_devices_only) { device_properties_t *props = find_peer_device(op, peer, device, fenced_support_flag(op->action)); if ((props == NULL) || (verified_devices_only && !props->verified)) { return FALSE; } crm_trace("Removing %s from %s (%d remaining)", device, peer->host, count_peer_devices(op, peer, FALSE, st_device_supports_none)); props->executed[op->phase] = TRUE; return TRUE; } static void clear_remote_op_timers(remote_fencing_op_t * op) { if (op->query_timer) { g_source_remove(op->query_timer); op->query_timer = 0; } if (op->op_timer_total) { g_source_remove(op->op_timer_total); op->op_timer_total = 0; } if (op->op_timer_one) { g_source_remove(op->op_timer_one); op->op_timer_one = 0; } } static void free_remote_op(gpointer data) { remote_fencing_op_t *op = data; crm_log_xml_debug(op->request, "Destroying"); clear_remote_op_timers(op); free(op->id); free(op->action); free(op->delegate); free(op->target); free(op->client_id); free(op->client_name); free(op->originator); if (op->query_results) { g_list_free_full(op->query_results, free_remote_query); } if (op->request) { pcmk__xml_free(op->request); op->request = NULL; } if (op->devices_list) { g_list_free_full(op->devices_list, free); op->devices_list = NULL; } g_list_free_full(op->automatic_list, free); g_list_free(op->duplicates); pcmk__reset_result(&op->result); free(op); } void init_stonith_remote_op_hash_table(GHashTable **table) { if (*table == NULL) { *table = pcmk__strkey_table(NULL, free_remote_op); } } /*! * \internal * \brief Return an operation's originally requested action (before any remap) * * \param[in] op Operation to check * * \return Operation's original action */ static const char * op_requested_action(const remote_fencing_op_t *op) { return ((op->phase > st_phase_requested)? PCMK_ACTION_REBOOT : op->action); } /*! * \internal * \brief Remap a "reboot" operation to the "off" phase * * \param[in,out] op Operation to remap */ static void op_phase_off(remote_fencing_op_t *op) { crm_info("Remapping multiple-device reboot targeting %s to 'off' " QB_XS " id=%.8s", op->target, op->id); op->phase = st_phase_off; /* Happily, "off" and "on" are shorter than "reboot", so we can reuse the * memory allocation at each phase. */ strcpy(op->action, PCMK_ACTION_OFF); } /*! * \internal * \brief Advance a remapped reboot operation to the "on" phase * * \param[in,out] op Operation to remap */ static void op_phase_on(remote_fencing_op_t *op) { GList *iter = NULL; crm_info("Remapped 'off' targeting %s complete, " "remapping to 'on' for %s " QB_XS " id=%.8s", op->target, op->client_name, op->id); op->phase = st_phase_on; strcpy(op->action, PCMK_ACTION_ON); /* Skip devices with automatic unfencing, because the cluster will handle it * when the node rejoins. */ for (iter = op->automatic_list; iter != NULL; iter = iter->next) { GList *match = g_list_find_custom(op->devices_list, iter->data, sort_strings); if (match) { op->devices_list = g_list_remove(op->devices_list, match->data); } } g_list_free_full(op->automatic_list, free); op->automatic_list = NULL; /* Rewind device list pointer */ op->devices = op->devices_list; } /*! * \internal * \brief Reset a remapped reboot operation * * \param[in,out] op Operation to reset */ static void undo_op_remap(remote_fencing_op_t *op) { if (op->phase > 0) { crm_info("Undoing remap of reboot targeting %s for %s " QB_XS " id=%.8s", op->target, op->client_name, op->id); op->phase = st_phase_requested; strcpy(op->action, PCMK_ACTION_REBOOT); } } /*! * \internal * \brief Create notification data XML for a fencing operation result * * \param[in,out] parent Parent XML element for newly created element * \param[in] op Fencer operation that completed * * \return Newly created XML to add as notification data * \note The caller is responsible for freeing the result. */ static xmlNode * fencing_result2xml(xmlNode *parent, const remote_fencing_op_t *op) { xmlNode *notify_data = pcmk__xe_create(parent, PCMK__XE_ST_NOTIFY_FENCE); - crm_xml_add_int(notify_data, PCMK_XA_STATE, op->state); + pcmk__xe_set_int(notify_data, PCMK_XA_STATE, op->state); crm_xml_add(notify_data, PCMK__XA_ST_TARGET, op->target); crm_xml_add(notify_data, PCMK__XA_ST_DEVICE_ACTION, op->action); crm_xml_add(notify_data, PCMK__XA_ST_DELEGATE, op->delegate); crm_xml_add(notify_data, PCMK__XA_ST_REMOTE_OP, op->id); crm_xml_add(notify_data, PCMK__XA_ST_ORIGIN, op->originator); crm_xml_add(notify_data, PCMK__XA_ST_CLIENTID, op->client_id); crm_xml_add(notify_data, PCMK__XA_ST_CLIENTNAME, op->client_name); return notify_data; } /*! * \internal * \brief Broadcast a fence result notification to all CPG peers * * \param[in] op Fencer operation that completed * \param[in] op_merged Whether this operation is a duplicate of another */ void fenced_broadcast_op_result(const remote_fencing_op_t *op, bool op_merged) { static int count = 0; xmlNode *bcast = pcmk__xe_create(NULL, PCMK__XE_ST_REPLY); xmlNode *wrapper = NULL; xmlNode *notify_data = NULL; count++; crm_trace("Broadcasting result to peers"); crm_xml_add(bcast, PCMK__XA_T, PCMK__VALUE_ST_NOTIFY); crm_xml_add(bcast, PCMK__XA_SUBT, PCMK__VALUE_BROADCAST); crm_xml_add(bcast, PCMK__XA_ST_OP, STONITH_OP_NOTIFY); - crm_xml_add_int(bcast, PCMK_XA_COUNT, count); + pcmk__xe_set_int(bcast, PCMK_XA_COUNT, count); if (op_merged) { pcmk__xe_set_bool_attr(bcast, PCMK__XA_ST_OP_MERGED, true); } wrapper = pcmk__xe_create(bcast, PCMK__XE_ST_CALLDATA); notify_data = fencing_result2xml(wrapper, op); stonith__xe_set_result(notify_data, &op->result); pcmk__cluster_send_message(NULL, pcmk_ipc_fenced, bcast); pcmk__xml_free(bcast); return; } /*! * \internal * \brief Reply to a local request originator and notify all subscribed clients * * \param[in,out] op Fencer operation that completed * \param[in,out] data Top-level XML to add notification to */ static void handle_local_reply_and_notify(remote_fencing_op_t *op, xmlNode *data) { xmlNode *notify_data = NULL; xmlNode *reply = NULL; pcmk__client_t *client = NULL; if (op->notify_sent == TRUE) { /* nothing to do */ return; } /* Do notification with a clean data object */ - crm_xml_add_int(data, PCMK_XA_STATE, op->state); + pcmk__xe_set_int(data, PCMK_XA_STATE, op->state); crm_xml_add(data, PCMK__XA_ST_TARGET, op->target); crm_xml_add(data, PCMK__XA_ST_OP, op->action); reply = fenced_construct_reply(op->request, data, &op->result); crm_xml_add(reply, PCMK__XA_ST_DELEGATE, op->delegate); /* Send fencing OP reply to local client that initiated fencing */ client = pcmk__find_client_by_id(op->client_id); if (client == NULL) { crm_trace("Skipping reply to %s: no longer a client", op->client_id); } else { do_local_reply(reply, client, op->call_options); } /* bcast to all local clients that the fencing operation happend */ notify_data = fencing_result2xml(NULL, op); fenced_send_notification(PCMK__VALUE_ST_NOTIFY_FENCE, &op->result, notify_data); pcmk__xml_free(notify_data); fenced_send_notification(PCMK__VALUE_ST_NOTIFY_HISTORY, NULL, NULL); /* mark this op as having notify's already sent */ op->notify_sent = TRUE; pcmk__xml_free(reply); } /*! * \internal * \brief Finalize all duplicates of a given fencer operation * * \param[in,out] op Fencer operation that completed * \param[in,out] data Top-level XML to add notification to */ static void finalize_op_duplicates(remote_fencing_op_t *op, xmlNode *data) { for (GList *iter = op->duplicates; iter != NULL; iter = iter->next) { remote_fencing_op_t *other = iter->data; if (other->state == st_duplicate) { other->state = op->state; crm_debug("Performing duplicate notification for %s@%s: %s " QB_XS " id=%.8s", other->client_name, other->originator, pcmk_exec_status_str(op->result.execution_status), other->id); pcmk__copy_result(&op->result, &other->result); finalize_op(other, data, true); } else { // Possible if (for example) it timed out already crm_err("Skipping duplicate notification for %s@%s " QB_XS " state=%s id=%.8s", other->client_name, other->originator, stonith_op_state_str(other->state), other->id); } } } static char * delegate_from_xml(xmlNode *xml) { xmlNode *match = pcmk__xpath_find_one(xml->doc, "//*[@" PCMK__XA_ST_DELEGATE "]", LOG_NEVER); if (match == NULL) { return pcmk__xe_get_copy(xml, PCMK__XA_SRC); } else { return pcmk__xe_get_copy(match, PCMK__XA_ST_DELEGATE); } } /*! * \internal * \brief Finalize a peer fencing operation * * Clean up after a fencing operation completes. This function has two code * paths: the executioner uses it to broadcast the result to CPG peers, and then * each peer (including the executioner) uses it to process that broadcast and * notify its IPC clients of the result. * * \param[in,out] op Fencer operation that completed * \param[in,out] data If not NULL, XML reply of last delegated operation * \param[in] dup Whether this operation is a duplicate of another * (in which case, do not broadcast the result) * * \note The operation result should be set before calling this function. */ static void finalize_op(remote_fencing_op_t *op, xmlNode *data, bool dup) { int level = LOG_ERR; const char *subt = NULL; xmlNode *local_data = NULL; gboolean op_merged = FALSE; CRM_CHECK((op != NULL), return); // This is a no-op if timers have already been cleared clear_remote_op_timers(op); if (op->notify_sent) { // Most likely, this is a timed-out action that eventually completed crm_notice("Operation '%s'%s%s by %s for %s@%s%s: " "Result arrived too late " QB_XS " id=%.8s", op->action, (op->target? " targeting " : ""), (op->target? op->target : ""), (op->delegate? op->delegate : "unknown node"), op->client_name, op->originator, (op_merged? " (merged)" : ""), op->id); return; } set_fencing_completed(op); undo_op_remap(op); if (data == NULL) { data = pcmk__xe_create(NULL, "remote-op"); local_data = data; } else if (op->delegate == NULL) { switch (op->result.execution_status) { case PCMK_EXEC_NO_FENCE_DEVICE: break; case PCMK_EXEC_INVALID: if (op->result.exit_status != CRM_EX_EXPIRED) { op->delegate = delegate_from_xml(data); } break; default: op->delegate = delegate_from_xml(data); break; } } if (dup || (pcmk__xe_get(data, PCMK__XA_ST_OP_MERGED) != NULL)) { op_merged = true; } /* Tell everyone the operation is done, we will continue * with doing the local notifications once we receive * the broadcast back. */ subt = pcmk__xe_get(data, PCMK__XA_SUBT); if (!dup && !pcmk__str_eq(subt, PCMK__VALUE_BROADCAST, pcmk__str_none)) { /* Defer notification until the bcast message arrives */ fenced_broadcast_op_result(op, op_merged); pcmk__xml_free(local_data); return; } if (pcmk__result_ok(&op->result) || dup || !pcmk__str_eq(op->originator, fenced_get_local_node(), pcmk__str_casei)) { level = LOG_NOTICE; } do_crm_log(level, "Operation '%s'%s%s by %s for %s@%s%s: %s (%s%s%s) " QB_XS " id=%.8s", op->action, (op->target? " targeting " : ""), (op->target? op->target : ""), (op->delegate? op->delegate : "unknown node"), op->client_name, op->originator, (op_merged? " (merged)" : ""), crm_exit_str(op->result.exit_status), pcmk_exec_status_str(op->result.execution_status), ((op->result.exit_reason == NULL)? "" : ": "), ((op->result.exit_reason == NULL)? "" : op->result.exit_reason), op->id); handle_local_reply_and_notify(op, data); if (!dup) { finalize_op_duplicates(op, data); } /* Free non-essential parts of the record * Keep the record around so we can query the history */ if (op->query_results) { g_list_free_full(op->query_results, free_remote_query); op->query_results = NULL; } if (op->request) { pcmk__xml_free(op->request); op->request = NULL; } pcmk__xml_free(local_data); } /*! * \internal * \brief Finalize a watchdog fencer op after the waiting time expires * * \param[in,out] userdata Fencer operation that completed * * \return G_SOURCE_REMOVE (which tells glib not to restart timer) */ static gboolean remote_op_watchdog_done(gpointer userdata) { remote_fencing_op_t *op = userdata; op->op_timer_one = 0; crm_notice("Self-fencing (%s) by %s for %s assumed complete " QB_XS " id=%.8s", op->action, op->target, op->client_name, op->id); op->state = st_done; pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); finalize_op(op, NULL, false); return G_SOURCE_REMOVE; } static gboolean remote_op_timeout_one(gpointer userdata) { remote_fencing_op_t *op = userdata; op->op_timer_one = 0; crm_notice("Peer's '%s' action targeting %s for client %s timed out " QB_XS " id=%.8s", op->action, op->target, op->client_name, op->id); pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT, "Peer did not return fence result within timeout"); // The requested delay has been applied for the first device if (op->client_delay > 0) { op->client_delay = 0; crm_trace("Try another device for '%s' action targeting %s " "for client %s without delay " QB_XS " id=%.8s", op->action, op->target, op->client_name, op->id); } // Try another device, if appropriate request_peer_fencing(op, NULL); return G_SOURCE_REMOVE; } /*! * \internal * \brief Finalize a remote fencer operation that timed out * * \param[in,out] op Fencer operation that timed out * \param[in] reason Readable description of what step timed out */ static void finalize_timed_out_op(remote_fencing_op_t *op, const char *reason) { crm_debug("Action '%s' targeting %s for client %s timed out " QB_XS " id=%.8s", op->action, op->target, op->client_name, op->id); if (op->phase == st_phase_on) { /* A remapped reboot operation timed out in the "on" phase, but the * "off" phase completed successfully, so quit trying any further * devices, and return success. */ op->state = st_done; pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); } else { op->state = st_failed; pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT, reason); } finalize_op(op, NULL, false); } /*! * \internal * \brief Finalize a remote fencer operation that timed out * * \param[in,out] userdata Fencer operation that timed out * * \return G_SOURCE_REMOVE (which tells glib not to restart timer) */ static gboolean remote_op_timeout(gpointer userdata) { remote_fencing_op_t *op = userdata; op->op_timer_total = 0; if (op->state == st_done) { crm_debug("Action '%s' targeting %s for client %s already completed " QB_XS " id=%.8s", op->action, op->target, op->client_name, op->id); } else { finalize_timed_out_op(userdata, "Fencing did not complete within a " "total timeout based on the " "configured timeout and retries for " "any devices attempted"); } return G_SOURCE_REMOVE; } static gboolean remote_op_query_timeout(gpointer data) { remote_fencing_op_t *op = data; op->query_timer = 0; if (op->state == st_done) { crm_debug("Operation %.8s targeting %s already completed", op->id, op->target); } else if (op->state == st_exec) { crm_debug("Operation %.8s targeting %s already in progress", op->id, op->target); } else if (op->query_results) { // Query succeeded, so attempt the actual fencing crm_debug("Query %.8s targeting %s complete (state=%s)", op->id, op->target, stonith_op_state_str(op->state)); request_peer_fencing(op, NULL); } else { crm_debug("Query %.8s targeting %s timed out (state=%s)", op->id, op->target, stonith_op_state_str(op->state)); finalize_timed_out_op(op, "No capable peers replied to device query " "within timeout"); } return G_SOURCE_REMOVE; } static gboolean topology_is_empty(stonith_topology_t *tp) { int i; if (tp == NULL) { return TRUE; } for (i = 0; i < ST__LEVEL_COUNT; i++) { if (tp->levels[i] != NULL) { return FALSE; } } return TRUE; } /*! * \internal * \brief Add a device to an operation's automatic unfencing list * * \param[in,out] op Operation to modify * \param[in] device Device ID to add */ static void add_required_device(remote_fencing_op_t *op, const char *device) { GList *match = g_list_find_custom(op->automatic_list, device, sort_strings); if (!match) { op->automatic_list = g_list_prepend(op->automatic_list, pcmk__str_copy(device)); } } /*! * \internal * \brief Remove a device from the automatic unfencing list * * \param[in,out] op Operation to modify * \param[in] device Device ID to remove */ static void remove_required_device(remote_fencing_op_t *op, const char *device) { GList *match = g_list_find_custom(op->automatic_list, device, sort_strings); if (match) { op->automatic_list = g_list_remove(op->automatic_list, match->data); } } /* deep copy the device list */ static void set_op_device_list(remote_fencing_op_t * op, GList *devices) { GList *lpc = NULL; if (op->devices_list) { g_list_free_full(op->devices_list, free); op->devices_list = NULL; } for (lpc = devices; lpc != NULL; lpc = lpc->next) { const char *device = lpc->data; op->devices_list = g_list_append(op->devices_list, pcmk__str_copy(device)); } op->devices = op->devices_list; } /*! * \internal * \brief Check whether a node matches a topology target * * \param[in] tp Topology table entry to check * \param[in] node Name of node to check * * \return TRUE if node matches topology target */ static gboolean topology_matches(const stonith_topology_t *tp, const char *node) { regex_t r_patt; CRM_CHECK(node && tp && tp->target, return FALSE); switch (tp->kind) { case fenced_target_by_attribute: /* This level targets by attribute, so tp->target is a NAME=VALUE pair * of a permanent attribute applied to targeted nodes. The test below * relies on the locally cached copy of the CIB, so if fencing needs to * be done before the initial CIB is received or after a malformed CIB * is received, then the topology will be unable to be used. */ if (node_has_attr(node, tp->target_attribute, tp->target_value)) { crm_notice("Matched %s with %s by attribute", node, tp->target); return TRUE; } break; case fenced_target_by_pattern: /* This level targets node names matching a pattern, so tp->target * (and tp->target_pattern) is a regular expression. */ if (regcomp(&r_patt, tp->target_pattern, REG_EXTENDED|REG_NOSUB)) { crm_info("Bad regex '%s' for fencing level", tp->target); } else { int status = regexec(&r_patt, node, 0, NULL, 0); regfree(&r_patt); if (status == 0) { crm_notice("Matched %s with %s by name", node, tp->target); return TRUE; } } break; case fenced_target_by_name: crm_trace("Testing %s against %s", node, tp->target); return pcmk__str_eq(tp->target, node, pcmk__str_casei); default: break; } crm_trace("No match for %s with %s", node, tp->target); return FALSE; } stonith_topology_t * find_topology_for_host(const char *host) { GHashTableIter tIter; stonith_topology_t *tp = g_hash_table_lookup(topology, host); if(tp != NULL) { crm_trace("Found %s for %s in %d entries", tp->target, host, g_hash_table_size(topology)); return tp; } g_hash_table_iter_init(&tIter, topology); while (g_hash_table_iter_next(&tIter, NULL, (gpointer *) & tp)) { if (topology_matches(tp, host)) { crm_trace("Found %s for %s in %d entries", tp->target, host, g_hash_table_size(topology)); return tp; } } crm_trace("No matches for %s in %d topology entries", host, g_hash_table_size(topology)); return NULL; } /*! * \internal * \brief Set fencing operation's device list to target's next topology level * * \param[in,out] op Remote fencing operation to modify * \param[in] empty_ok If true, an operation without a target (i.e. * queries) or a target without a topology will get a * pcmk_rc_ok return value instead of ENODEV * * \return Standard Pacemaker return value */ static int advance_topology_level(remote_fencing_op_t *op, bool empty_ok) { stonith_topology_t *tp = NULL; if (op->target) { tp = find_topology_for_host(op->target); } if (topology_is_empty(tp)) { return empty_ok? pcmk_rc_ok : ENODEV; } pcmk__assert(tp->levels != NULL); stonith__set_call_options(op->call_options, op->id, st_opt_topology); /* This is a new level, so undo any remapping left over from previous */ undo_op_remap(op); do { op->level++; } while (op->level < ST__LEVEL_COUNT && tp->levels[op->level] == NULL); if (op->level < ST__LEVEL_COUNT) { crm_trace("Attempting fencing level %d targeting %s (%d devices) " "for client %s@%s (id=%.8s)", op->level, op->target, g_list_length(tp->levels[op->level]), op->client_name, op->originator, op->id); set_op_device_list(op, tp->levels[op->level]); // The requested delay has been applied for the first fencing level if ((op->level > 1) && (op->client_delay > 0)) { op->client_delay = 0; } if ((g_list_next(op->devices_list) != NULL) && pcmk__str_eq(op->action, PCMK_ACTION_REBOOT, pcmk__str_none)) { /* A reboot has been requested for a topology level with multiple * devices. Instead of rebooting the devices sequentially, we will * turn them all off, then turn them all on again. (Think about * switched power outlets for redundant power supplies.) */ op_phase_off(op); } return pcmk_rc_ok; } crm_info("All %sfencing options targeting %s for client %s@%s failed " QB_XS " id=%.8s", (stonith_watchdog_timeout_ms > 0)?"non-watchdog ":"", op->target, op->client_name, op->originator, op->id); return ENODEV; } /*! * \internal * \brief If fencing operation is a duplicate, merge it into the other one * * \param[in,out] op Fencing operation to check */ static void merge_duplicates(remote_fencing_op_t *op) { GHashTableIter iter; remote_fencing_op_t *other = NULL; time_t now = time(NULL); g_hash_table_iter_init(&iter, stonith_remote_op_list); while (g_hash_table_iter_next(&iter, NULL, (void **)&other)) { const char *other_action = op_requested_action(other); pcmk__node_status_t *node = NULL; if (!strcmp(op->id, other->id)) { continue; // Don't compare against self } if (other->state > st_exec) { crm_trace("%.8s not duplicate of %.8s: not in progress", op->id, other->id); continue; } if (!pcmk__str_eq(op->target, other->target, pcmk__str_casei)) { crm_trace("%.8s not duplicate of %.8s: node %s vs. %s", op->id, other->id, op->target, other->target); continue; } if (!pcmk__str_eq(op->action, other_action, pcmk__str_none)) { crm_trace("%.8s not duplicate of %.8s: action %s vs. %s", op->id, other->id, op->action, other_action); continue; } if (pcmk__str_eq(op->client_name, other->client_name, pcmk__str_casei)) { crm_trace("%.8s not duplicate of %.8s: same client %s", op->id, other->id, op->client_name); continue; } if (pcmk__str_eq(other->target, other->originator, pcmk__str_casei)) { crm_trace("%.8s not duplicate of %.8s: self-fencing for %s", op->id, other->id, other->target); continue; } node = pcmk__get_node(0, other->originator, NULL, pcmk__node_search_cluster_member); if (!fencing_peer_active(node)) { crm_notice("Failing action '%s' targeting %s originating from " "client %s@%s: Originator is dead " QB_XS " id=%.8s", other->action, other->target, other->client_name, other->originator, other->id); crm_trace("%.8s not duplicate of %.8s: originator dead", op->id, other->id); other->state = st_failed; continue; } if ((other->total_timeout > 0) && (now > (other->total_timeout + other->created))) { crm_trace("%.8s not duplicate of %.8s: old (%lld vs. %lld + %ds)", op->id, other->id, (long long)now, (long long)other->created, other->total_timeout); continue; } /* There is another in-flight request to fence the same host * Piggyback on that instead. If it fails, so do we. */ other->duplicates = g_list_append(other->duplicates, op); if (other->total_timeout == 0) { other->total_timeout = op->total_timeout = TIMEOUT_MULTIPLY_FACTOR * get_op_total_timeout(op, NULL); crm_trace("Best guess as to timeout used for %.8s: %ds", other->id, other->total_timeout); } crm_notice("Merging fencing action '%s' targeting %s originating from " "client %s with identical request from %s@%s " QB_XS " original=%.8s duplicate=%.8s total_timeout=%ds", op->action, op->target, op->client_name, other->client_name, other->originator, op->id, other->id, other->total_timeout); report_timeout_period(op, other->total_timeout); op->state = st_duplicate; } } static uint32_t fencing_active_peers(void) { uint32_t count = 0; pcmk__node_status_t *entry = NULL; GHashTableIter gIter; g_hash_table_iter_init(&gIter, pcmk__peer_cache); while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) { if(fencing_peer_active(entry)) { count++; } } return count; } /*! * \internal * \brief Process a manual confirmation of a pending fence action * * \param[in] client IPC client that sent confirmation * \param[in,out] msg Request XML with manual confirmation * * \return Standard Pacemaker return code */ int fenced_handle_manual_confirmation(const pcmk__client_t *client, xmlNode *msg) { remote_fencing_op_t *op = NULL; xmlNode *dev = pcmk__xpath_find_one(msg->doc, "//*[@" PCMK__XA_ST_TARGET "]", LOG_ERR); CRM_CHECK(dev != NULL, return EPROTO); crm_notice("Received manual confirmation that %s has been fenced", pcmk__s(pcmk__xe_get(dev, PCMK__XA_ST_TARGET), "unknown target")); op = initiate_remote_stonith_op(client, msg, TRUE); if (op == NULL) { return EPROTO; } op->state = st_done; op->delegate = pcmk__str_copy("a human"); // For the fencer's purposes, the fencing operation is done pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); finalize_op(op, msg, false); /* For the requester's purposes, the operation is still pending. The * actual result will be sent asynchronously via the operation's done_cb(). */ return EINPROGRESS; } /*! * \internal * \brief Create a new remote stonith operation * * \param[in] client ID of local stonith client that initiated the operation * \param[in] request The request from the client that started the operation * \param[in] peer TRUE if this operation is owned by another stonith peer * (an operation owned by one peer is stored on all peers, * but only the owner executes it; all nodes get the results * once the owner finishes execution) */ void * create_remote_stonith_op(const char *client, xmlNode *request, gboolean peer) { remote_fencing_op_t *op = NULL; xmlNode *dev = pcmk__xpath_find_one(request->doc, "//*[@" PCMK__XA_ST_TARGET "]", LOG_NEVER); int rc = pcmk_rc_ok; const char *operation = NULL; init_stonith_remote_op_hash_table(&stonith_remote_op_list); /* If this operation is owned by another node, check to make * sure we haven't already created this operation. */ if (peer && dev) { const char *op_id = pcmk__xe_get(dev, PCMK__XA_ST_REMOTE_OP); CRM_CHECK(op_id != NULL, return NULL); op = g_hash_table_lookup(stonith_remote_op_list, op_id); if (op) { crm_debug("Reusing existing remote fencing op %.8s for %s", op_id, ((client == NULL)? "unknown client" : client)); return op; } } op = pcmk__assert_alloc(1, sizeof(remote_fencing_op_t)); pcmk__xe_get_int(request, PCMK__XA_ST_TIMEOUT, &(op->base_timeout)); // Value -1 means disable any static/random fencing delays pcmk__xe_get_int(request, PCMK__XA_ST_DELAY, &(op->client_delay)); if (peer && dev) { op->id = pcmk__xe_get_copy(dev, PCMK__XA_ST_REMOTE_OP); } else { op->id = crm_generate_uuid(); } g_hash_table_replace(stonith_remote_op_list, op->id, op); op->state = st_query; op->replies_expected = fencing_active_peers(); op->action = pcmk__xe_get_copy(dev, PCMK__XA_ST_DEVICE_ACTION); /* The node initiating the stonith operation. If an operation is relayed, * this is the last node the operation lands on. When in standalone mode, * origin is the ID of the client that originated the operation. * * Or may be the name of the function that created the operation. */ op->originator = pcmk__xe_get_copy(dev, PCMK__XA_ST_ORIGIN); if (op->originator == NULL) { /* Local or relayed request */ op->originator = pcmk__str_copy(fenced_get_local_node()); } // Delegate may not be set op->delegate = pcmk__xe_get_copy(dev, PCMK__XA_ST_DELEGATE); op->created = time(NULL); CRM_LOG_ASSERT(client != NULL); op->client_id = pcmk__str_copy(client); /* For a RELAY operation, set fenced on the client. */ operation = pcmk__xe_get(request, PCMK__XA_ST_OP); if (pcmk__str_eq(operation, STONITH_OP_RELAY, pcmk__str_none)) { op->client_name = crm_strdup_printf("%s.%lu", crm_system_name, (unsigned long) getpid()); } else { op->client_name = pcmk__xe_get_copy(request, PCMK__XA_ST_CLIENTNAME); } op->target = pcmk__xe_get_copy(dev, PCMK__XA_ST_TARGET); // @TODO Figure out how to avoid copying XML here op->request = pcmk__xml_copy(NULL, request); rc = pcmk__xe_get_flags(request, PCMK__XA_ST_CALLOPT, &(op->call_options), 0U); if (rc != pcmk_rc_ok) { crm_warn("Couldn't parse options from request %s: %s", op->id, pcmk_rc_str(rc)); } pcmk__xe_get_int(request, PCMK__XA_ST_CALLID, &(op->client_callid)); crm_trace("%s new fencing op %s ('%s' targeting %s for client %s, " "base timeout %ds, %u %s expected)", (peer && dev)? "Recorded" : "Generated", op->id, op->action, op->target, op->client_name, op->base_timeout, op->replies_expected, pcmk__plural_alt(op->replies_expected, "reply", "replies")); if (op->call_options & st_opt_cs_nodeid) { int nodeid; pcmk__node_status_t *node = NULL; pcmk__scan_min_int(op->target, &nodeid, 0); node = pcmk__search_node_caches(nodeid, NULL, NULL, pcmk__node_search_any |pcmk__node_search_cluster_cib); /* Ensure the conversion only happens once */ stonith__clear_call_options(op->call_options, op->id, st_opt_cs_nodeid); if ((node != NULL) && (node->name != NULL)) { pcmk__str_update(&(op->target), node->name); } else { crm_warn("Could not expand nodeid '%s' into a host name", op->target); } } /* check to see if this is a duplicate operation of another in-flight operation */ merge_duplicates(op); if (op->state != st_duplicate) { /* kick history readers */ fenced_send_notification(PCMK__VALUE_ST_NOTIFY_HISTORY, NULL, NULL); } /* safe to trim as long as that doesn't touch pending ops */ stonith_fence_history_trim(); return op; } /*! * \internal * \brief Create a peer fencing operation from a request, and initiate it * * \param[in] client IPC client that made request (NULL to get from request) * \param[in] request Request XML * \param[in] manual_ack Whether this is a manual action confirmation * * \return Newly created operation on success, otherwise NULL */ remote_fencing_op_t * initiate_remote_stonith_op(const pcmk__client_t *client, xmlNode *request, gboolean manual_ack) { int query_timeout = 0; xmlNode *query = NULL; const char *client_id = NULL; remote_fencing_op_t *op = NULL; const char *relay_op_id = NULL; const char *operation = NULL; if (client) { client_id = client->id; } else { client_id = pcmk__xe_get(request, PCMK__XA_ST_CLIENTID); } CRM_LOG_ASSERT(client_id != NULL); op = create_remote_stonith_op(client_id, request, FALSE); op->owner = TRUE; if (manual_ack) { return op; } CRM_CHECK(op->action, return NULL); if (advance_topology_level(op, true) != pcmk_rc_ok) { op->state = st_failed; } switch (op->state) { case st_failed: // advance_topology_level() exhausted levels pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_ERROR, "All topology levels failed"); crm_warn("Could not request peer fencing (%s) targeting %s " QB_XS " id=%.8s", op->action, op->target, op->id); finalize_op(op, NULL, false); return op; case st_duplicate: crm_info("Requesting peer fencing (%s) targeting %s (duplicate) " QB_XS " id=%.8s", op->action, op->target, op->id); return op; default: crm_notice("Requesting peer fencing (%s) targeting %s " QB_XS " id=%.8s state=%s base_timeout=%ds", op->action, op->target, op->id, stonith_op_state_str(op->state), op->base_timeout); } query = stonith_create_op(op->client_callid, op->id, STONITH_OP_QUERY, NULL, op->call_options); crm_xml_add(query, PCMK__XA_ST_REMOTE_OP, op->id); crm_xml_add(query, PCMK__XA_ST_TARGET, op->target); crm_xml_add(query, PCMK__XA_ST_DEVICE_ACTION, op_requested_action(op)); crm_xml_add(query, PCMK__XA_ST_ORIGIN, op->originator); crm_xml_add(query, PCMK__XA_ST_CLIENTID, op->client_id); crm_xml_add(query, PCMK__XA_ST_CLIENTNAME, op->client_name); - crm_xml_add_int(query, PCMK__XA_ST_TIMEOUT, op->base_timeout); + pcmk__xe_set_int(query, PCMK__XA_ST_TIMEOUT, op->base_timeout); /* In case of RELAY operation, RELAY information is added to the query to delete the original operation of RELAY. */ operation = pcmk__xe_get(request, PCMK__XA_ST_OP); if (pcmk__str_eq(operation, STONITH_OP_RELAY, pcmk__str_none)) { relay_op_id = pcmk__xe_get(request, PCMK__XA_ST_REMOTE_OP); if (relay_op_id) { crm_xml_add(query, PCMK__XA_ST_REMOTE_OP_RELAY, relay_op_id); } } pcmk__cluster_send_message(NULL, pcmk_ipc_fenced, query); pcmk__xml_free(query); query_timeout = op->base_timeout * TIMEOUT_MULTIPLY_FACTOR; op->query_timer = pcmk__create_timer((1000 * query_timeout), remote_op_query_timeout, op); return op; } enum find_best_peer_options { /*! Skip checking the target peer for capable fencing devices */ FIND_PEER_SKIP_TARGET = 0x0001, /*! Only check the target peer for capable fencing devices */ FIND_PEER_TARGET_ONLY = 0x0002, /*! Skip peers and devices that are not verified */ FIND_PEER_VERIFIED_ONLY = 0x0004, }; static bool is_watchdog_fencing(const remote_fencing_op_t *op, const char *device) { return (stonith_watchdog_timeout_ms > 0 // Only an explicit mismatch is considered not a watchdog fencing. && pcmk__str_eq(device, STONITH_WATCHDOG_ID, pcmk__str_null_matches) && pcmk__is_fencing_action(op->action) && node_does_watchdog_fencing(op->target)); } static peer_device_info_t * find_best_peer(const char *device, remote_fencing_op_t * op, enum find_best_peer_options options) { GList *iter = NULL; gboolean verified_devices_only = (options & FIND_PEER_VERIFIED_ONLY) ? TRUE : FALSE; if (!device && pcmk_is_set(op->call_options, st_opt_topology)) { return NULL; } for (iter = op->query_results; iter != NULL; iter = iter->next) { peer_device_info_t *peer = iter->data; crm_trace("Testing result from %s targeting %s with %d device%s: %d %x", peer->host, op->target, peer->ndevices, pcmk__plural_s(peer->ndevices), peer->tried, options); if ((options & FIND_PEER_SKIP_TARGET) && pcmk__str_eq(peer->host, op->target, pcmk__str_casei)) { continue; } if ((options & FIND_PEER_TARGET_ONLY) && !pcmk__str_eq(peer->host, op->target, pcmk__str_casei)) { continue; } if (pcmk_is_set(op->call_options, st_opt_topology)) { if (grab_peer_device(op, peer, device, verified_devices_only)) { return peer; } } else if (!peer->tried && count_peer_devices(op, peer, verified_devices_only, fenced_support_flag(op->action))) { /* No topology: Use the current best peer */ crm_trace("Simple fencing"); return peer; } } return NULL; } static peer_device_info_t * stonith_choose_peer(remote_fencing_op_t * op) { const char *device = NULL; peer_device_info_t *peer = NULL; uint32_t active = fencing_active_peers(); do { if (op->devices) { device = op->devices->data; crm_trace("Checking for someone to fence (%s) %s using %s", op->action, op->target, device); } else { crm_trace("Checking for someone to fence (%s) %s", op->action, op->target); } /* Best choice is a peer other than the target with verified access */ peer = find_best_peer(device, op, FIND_PEER_SKIP_TARGET|FIND_PEER_VERIFIED_ONLY); if (peer) { crm_trace("Found verified peer %s for %s", peer->host, device?device:""); return peer; } if(op->query_timer != 0 && op->replies < QB_MIN(op->replies_expected, active)) { crm_trace("Waiting before looking for unverified devices to fence %s", op->target); return NULL; } /* If no other peer has verified access, next best is unverified access */ peer = find_best_peer(device, op, FIND_PEER_SKIP_TARGET); if (peer) { crm_trace("Found best unverified peer %s", peer->host); return peer; } /* If no other peer can do it, last option is self-fencing * (which is never allowed for the "on" phase of a remapped reboot) */ if (op->phase != st_phase_on) { peer = find_best_peer(device, op, FIND_PEER_TARGET_ONLY); if (peer) { crm_trace("%s will fence itself", peer->host); return peer; } } /* Try the next fencing level if there is one (unless we're in the "on" * phase of a remapped "reboot", because we ignore errors in that case) */ } while ((op->phase != st_phase_on) && pcmk_is_set(op->call_options, st_opt_topology) && (advance_topology_level(op, false) == pcmk_rc_ok)); /* With a simple watchdog fencing configuration without a topology, * "device" is NULL here. Consider it should be done with watchdog fencing. */ if (is_watchdog_fencing(op, device)) { crm_info("Couldn't contact watchdog-fencing target-node (%s)", op->target); /* check_watchdog_fencing_and_wait will log additional info */ } else { crm_notice("Couldn't find anyone to fence (%s) %s using %s", op->action, op->target, (device? device : "any device")); } return NULL; } static int valid_fencing_timeout(int specified_timeout, bool action_specific, const remote_fencing_op_t *op, const char *device) { int timeout = specified_timeout; if (!is_watchdog_fencing(op, device)) { return timeout; } timeout = (int) QB_MIN(QB_MAX(specified_timeout, pcmk__timeout_ms2s(stonith_watchdog_timeout_ms)), INT_MAX); if (timeout > specified_timeout) { if (action_specific) { crm_warn("pcmk_%s_timeout %ds for %s is too short (must be >= " PCMK_OPT_STONITH_WATCHDOG_TIMEOUT " %ds), using %ds " "instead", op->action, specified_timeout, device? device : "watchdog", timeout, timeout); } else { crm_warn("Fencing timeout %ds is too short (must be >= " PCMK_OPT_STONITH_WATCHDOG_TIMEOUT " %ds), using %ds " "instead", specified_timeout, timeout, timeout); } } return timeout; } static int get_device_timeout(const remote_fencing_op_t *op, const peer_device_info_t *peer, const char *device, bool with_delay) { int timeout = op->base_timeout; device_properties_t *props; timeout = valid_fencing_timeout(op->base_timeout, false, op, device); if (!peer || !device) { return timeout; } props = g_hash_table_lookup(peer->devices, device); if (!props) { return timeout; } if (props->custom_action_timeout[op->phase]) { timeout = valid_fencing_timeout(props->custom_action_timeout[op->phase], true, op, device); } // op->client_delay < 0 means disable any static/random fencing delays if (with_delay && (op->client_delay >= 0)) { // delay_base is eventually limited by delay_max timeout += (props->delay_max[op->phase] > 0 ? props->delay_max[op->phase] : props->delay_base[op->phase]); } return timeout; } struct timeout_data { const remote_fencing_op_t *op; const peer_device_info_t *peer; int total_timeout; }; /*! * \internal * \brief Add timeout to a total if device has not been executed yet * * \param[in] key GHashTable key (device ID) * \param[in] value GHashTable value (device properties) * \param[in,out] user_data Timeout data */ static void add_device_timeout(gpointer key, gpointer value, gpointer user_data) { const char *device_id = key; device_properties_t *props = value; struct timeout_data *timeout = user_data; if (!props->executed[timeout->op->phase] && !props->disallowed[timeout->op->phase]) { timeout->total_timeout += get_device_timeout(timeout->op, timeout->peer, device_id, true); } } static int get_peer_timeout(const remote_fencing_op_t *op, const peer_device_info_t *peer) { struct timeout_data timeout; timeout.op = op; timeout.peer = peer; timeout.total_timeout = 0; g_hash_table_foreach(peer->devices, add_device_timeout, &timeout); return (timeout.total_timeout? timeout.total_timeout : op->base_timeout); } static int get_op_total_timeout(const remote_fencing_op_t *op, const peer_device_info_t *chosen_peer) { long long total_timeout = 0; stonith_topology_t *tp = find_topology_for_host(op->target); if (pcmk_is_set(op->call_options, st_opt_topology) && tp) { int i; GList *device_list = NULL; GList *iter = NULL; GList *auto_list = NULL; if (pcmk__str_eq(op->action, PCMK_ACTION_ON, pcmk__str_none) && (op->automatic_list != NULL)) { auto_list = g_list_copy(op->automatic_list); } /* Yep, this looks scary, nested loops all over the place. * Here is what is going on. * Loop1: Iterate through fencing levels. * Loop2: If a fencing level has devices, loop through each device * Loop3: For each device in a fencing level, see what peer owns it * and what that peer has reported the timeout is for the device. */ for (i = 0; i < ST__LEVEL_COUNT; i++) { if (!tp->levels[i]) { continue; } for (device_list = tp->levels[i]; device_list; device_list = device_list->next) { bool found = false; for (iter = op->query_results; iter != NULL; iter = iter->next) { const peer_device_info_t *peer = iter->data; if (auto_list) { GList *match = g_list_find_custom(auto_list, device_list->data, sort_strings); if (match) { auto_list = g_list_remove(auto_list, match->data); } } if (find_peer_device(op, peer, device_list->data, fenced_support_flag(op->action))) { total_timeout += get_device_timeout(op, peer, device_list->data, true); found = true; break; } } /* End Loop3: match device with peer that owns device, find device's timeout period */ /* in case of watchdog-device we add the timeout to the budget if didn't get a reply */ if (!found && is_watchdog_fencing(op, device_list->data)) { total_timeout += pcmk__timeout_ms2s(stonith_watchdog_timeout_ms); } } /* End Loop2: iterate through devices at a specific level */ } /*End Loop1: iterate through fencing levels */ //Add only exists automatic_list device timeout if (auto_list) { for (iter = auto_list; iter != NULL; iter = iter->next) { GList *iter2 = NULL; for (iter2 = op->query_results; iter2 != NULL; iter = iter2->next) { peer_device_info_t *peer = iter2->data; if (find_peer_device(op, peer, iter->data, st_device_supports_on)) { total_timeout += get_device_timeout(op, peer, iter->data, true); break; } } } } g_list_free(auto_list); } else if (chosen_peer) { total_timeout = get_peer_timeout(op, chosen_peer); } else { total_timeout = valid_fencing_timeout(op->base_timeout, false, op, NULL); } if (total_timeout <= 0) { total_timeout = op->base_timeout; } /* Take any requested fencing delay into account to prevent it from eating * up the total timeout. */ if (op->client_delay > 0) { total_timeout += op->client_delay; } return (int) QB_MIN(total_timeout, INT_MAX); } static void report_timeout_period(remote_fencing_op_t * op, int op_timeout) { GList *iter = NULL; xmlNode *update = NULL; const char *client_node = NULL; const char *client_id = NULL; const char *call_id = NULL; if (op->call_options & st_opt_sync_call) { /* There is no reason to report the timeout for a synchronous call. It * is impossible to use the reported timeout to do anything when the client * is blocking for the response. This update is only important for * async calls that require a callback to report the results in. */ return; } else if (!op->request) { return; } crm_trace("Reporting timeout for %s (id=%.8s)", op->client_name, op->id); client_node = pcmk__xe_get(op->request, PCMK__XA_ST_CLIENTNODE); call_id = pcmk__xe_get(op->request, PCMK__XA_ST_CALLID); client_id = pcmk__xe_get(op->request, PCMK__XA_ST_CLIENTID); if (!client_node || !call_id || !client_id) { return; } if (pcmk__str_eq(client_node, fenced_get_local_node(), pcmk__str_casei)) { // Client is connected to this node, so send update directly to them do_stonith_async_timeout_update(client_id, call_id, op_timeout); return; } /* The client is connected to another node, relay this update to them */ update = stonith_create_op(op->client_callid, op->id, STONITH_OP_TIMEOUT_UPDATE, NULL, 0); crm_xml_add(update, PCMK__XA_ST_REMOTE_OP, op->id); crm_xml_add(update, PCMK__XA_ST_CLIENTID, client_id); crm_xml_add(update, PCMK__XA_ST_CALLID, call_id); - crm_xml_add_int(update, PCMK__XA_ST_TIMEOUT, op_timeout); + pcmk__xe_set_int(update, PCMK__XA_ST_TIMEOUT, op_timeout); pcmk__cluster_send_message(pcmk__get_node(0, client_node, NULL, pcmk__node_search_cluster_member), pcmk_ipc_fenced, update); pcmk__xml_free(update); for (iter = op->duplicates; iter != NULL; iter = iter->next) { remote_fencing_op_t *dup = iter->data; crm_trace("Reporting timeout for duplicate %.8s to client %s", dup->id, dup->client_name); report_timeout_period(iter->data, op_timeout); } } /*! * \internal * \brief Advance an operation to the next device in its topology * * \param[in,out] op Fencer operation to advance * \param[in] device ID of device that just completed * \param[in,out] msg If not NULL, XML reply of last delegated operation */ static void advance_topology_device_in_level(remote_fencing_op_t *op, const char *device, xmlNode *msg) { /* Advance to the next device at this topology level, if any */ if (op->devices) { op->devices = op->devices->next; } /* Handle automatic unfencing if an "on" action was requested */ if ((op->phase == st_phase_requested) && pcmk__str_eq(op->action, PCMK_ACTION_ON, pcmk__str_none)) { /* If the device we just executed was required, it's not anymore */ remove_required_device(op, device); /* If there are no more devices at this topology level, run through any * remaining devices with automatic unfencing */ if (op->devices == NULL) { op->devices = op->automatic_list; } } if ((op->devices == NULL) && (op->phase == st_phase_off)) { /* We're done with this level and with required devices, but we had * remapped "reboot" to "off", so start over with "on". If any devices * need to be turned back on, op->devices will be non-NULL after this. */ op_phase_on(op); } // This function is only called if the previous device succeeded pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); if (op->devices) { /* Necessary devices remain, so execute the next one */ crm_trace("Next targeting %s on behalf of %s@%s", op->target, op->client_name, op->originator); // The requested delay has been applied for the first device if (op->client_delay > 0) { op->client_delay = 0; } request_peer_fencing(op, NULL); } else { /* We're done with all devices and phases, so finalize operation */ crm_trace("Marking complex fencing op targeting %s as complete", op->target); op->state = st_done; finalize_op(op, msg, false); } } static gboolean check_watchdog_fencing_and_wait(remote_fencing_op_t * op) { if (node_does_watchdog_fencing(op->target)) { guint timeout_ms = QB_MIN(stonith_watchdog_timeout_ms, UINT_MAX); crm_notice("Waiting %s for %s to self-fence (%s) for " "client %s " QB_XS " id=%.8s", pcmk__readable_interval(timeout_ms), op->target, op->action, op->client_name, op->id); if (op->op_timer_one) { g_source_remove(op->op_timer_one); } op->op_timer_one = pcmk__create_timer(timeout_ms, remote_op_watchdog_done, op); return TRUE; } else { crm_debug("Skipping fallback to watchdog-fencing as %s is " "not in host-list", op->target); } return FALSE; } /*! * \internal * \brief Ask a peer to execute a fencing operation * * \param[in,out] op Fencing operation to be executed * \param[in,out] peer If NULL or topology is in use, choose best peer to * execute the fencing, otherwise use this peer */ static void request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer) { const char *device = NULL; int timeout; CRM_CHECK(op != NULL, return); crm_trace("Action %.8s targeting %s for %s is %s", op->id, op->target, op->client_name, stonith_op_state_str(op->state)); if ((op->phase == st_phase_on) && (op->devices != NULL)) { /* We are in the "on" phase of a remapped topology reboot. If this * device has pcmk_reboot_action="off", or doesn't support the "on" * action, skip it. * * We can't check device properties at this point because we haven't * chosen a peer for this stage yet. Instead, we check the local node's * knowledge about the device. If different versions of the fence agent * are installed on different nodes, there's a chance this could be * mistaken, but the worst that could happen is we don't try turning the * node back on when we should. */ device = op->devices->data; if (pcmk__str_eq(fenced_device_reboot_action(device), PCMK_ACTION_OFF, pcmk__str_none)) { crm_info("Not turning %s back on using %s because the device is " "configured to stay off (pcmk_reboot_action='off')", op->target, device); advance_topology_device_in_level(op, device, NULL); return; } if (!fenced_device_supports_on(device)) { crm_info("Not turning %s back on using %s because the agent " "doesn't support 'on'", op->target, device); advance_topology_device_in_level(op, device, NULL); return; } } timeout = op->base_timeout; if ((peer == NULL) && !pcmk_is_set(op->call_options, st_opt_topology)) { peer = stonith_choose_peer(op); } if (!op->op_timer_total) { op->total_timeout = TIMEOUT_MULTIPLY_FACTOR * get_op_total_timeout(op, peer); op->op_timer_total = pcmk__create_timer(1000 * op->total_timeout, remote_op_timeout, op); report_timeout_period(op, op->total_timeout); crm_info("Total timeout set to %ds for peer's fencing targeting %s for %s " QB_XS " id=%.8s", op->total_timeout, op->target, op->client_name, op->id); } if (pcmk_is_set(op->call_options, st_opt_topology) && op->devices) { /* Ignore the caller's peer preference if topology is in use, because * that peer might not have access to the required device. With * topology, stonith_choose_peer() removes the device from further * consideration, so the timeout must be calculated beforehand. * * @TODO Basing the total timeout on the caller's preferred peer (above) * is less than ideal. */ peer = stonith_choose_peer(op); device = op->devices->data; /* Fencing timeout sent to peer takes no delay into account. * The peer will add a dedicated timer for any delay upon * schedule_stonith_command(). */ timeout = get_device_timeout(op, peer, device, false); } if (peer) { int timeout_one = 0; xmlNode *remote_op = stonith_create_op(op->client_callid, op->id, STONITH_OP_FENCE, NULL, 0); const pcmk__node_status_t *peer_node = pcmk__get_node(0, peer->host, NULL, pcmk__node_search_cluster_member); if (op->client_delay > 0) { /* Take requested fencing delay into account to prevent it from * eating up the timeout. */ timeout_one = TIMEOUT_MULTIPLY_FACTOR * op->client_delay; } crm_xml_add(remote_op, PCMK__XA_ST_REMOTE_OP, op->id); crm_xml_add(remote_op, PCMK__XA_ST_TARGET, op->target); crm_xml_add(remote_op, PCMK__XA_ST_DEVICE_ACTION, op->action); crm_xml_add(remote_op, PCMK__XA_ST_ORIGIN, op->originator); crm_xml_add(remote_op, PCMK__XA_ST_CLIENTID, op->client_id); crm_xml_add(remote_op, PCMK__XA_ST_CLIENTNAME, op->client_name); - crm_xml_add_int(remote_op, PCMK__XA_ST_TIMEOUT, timeout); - crm_xml_add_int(remote_op, PCMK__XA_ST_CALLOPT, op->call_options); - crm_xml_add_int(remote_op, PCMK__XA_ST_DELAY, op->client_delay); + pcmk__xe_set_int(remote_op, PCMK__XA_ST_TIMEOUT, timeout); + pcmk__xe_set_int(remote_op, PCMK__XA_ST_CALLOPT, op->call_options); + pcmk__xe_set_int(remote_op, PCMK__XA_ST_DELAY, op->client_delay); if (device) { timeout_one += TIMEOUT_MULTIPLY_FACTOR * get_device_timeout(op, peer, device, true); crm_notice("Requesting that %s perform '%s' action targeting %s " "using %s " QB_XS " for client %s (%ds)", peer->host, op->action, op->target, device, op->client_name, timeout_one); crm_xml_add(remote_op, PCMK__XA_ST_DEVICE_ID, device); } else { timeout_one += TIMEOUT_MULTIPLY_FACTOR * get_peer_timeout(op, peer); crm_notice("Requesting that %s perform '%s' action targeting %s " QB_XS " for client %s (%ds, %s)", peer->host, op->action, op->target, op->client_name, timeout_one, pcmk__readable_interval(stonith_watchdog_timeout_ms)); } op->state = st_exec; if (op->op_timer_one) { g_source_remove(op->op_timer_one); op->op_timer_one = 0; } if (!is_watchdog_fencing(op, device) || !check_watchdog_fencing_and_wait(op)) { /* Some thoughts about self-fencing cases reaching this point: - Actually check in check_watchdog_fencing_and_wait shouldn't fail if STONITH_WATCHDOG_ID is chosen as fencing-device and it being present implies watchdog-fencing is enabled anyway - If watchdog-fencing is disabled either in general or for a specific target - detected in check_watchdog_fencing_and_wait - for some other kind of self-fencing we can't expect a success answer but timeout is fine if the node doesn't come back in between - Delicate might be the case where we have watchdog-fencing enabled for a node but the watchdog-fencing-device isn't explicitly chosen for self-fencing. Local scheduler execution in sbd might detect the node as unclean and lead to timely self-fencing. Otherwise the selection of PCMK_OPT_STONITH_WATCHDOG_TIMEOUT at least is questionable. */ /* coming here we're not waiting for watchdog timeout - thus engage timer with timout evaluated before */ op->op_timer_one = pcmk__create_timer((1000 * timeout_one), remote_op_timeout_one, op); } pcmk__cluster_send_message(peer_node, pcmk_ipc_fenced, remote_op); peer->tried = TRUE; pcmk__xml_free(remote_op); return; } else if (op->phase == st_phase_on) { /* A remapped "on" cannot be executed, but the node was already * turned off successfully, so ignore the error and continue. */ crm_warn("Ignoring %s 'on' failure (no capable peers) targeting %s " "after successful 'off'", device, op->target); advance_topology_device_in_level(op, device, NULL); return; } else if (op->owner == FALSE) { crm_err("Fencing (%s) targeting %s for client %s is not ours to control", op->action, op->target, op->client_name); } else if (op->query_timer == 0) { /* We've exhausted all available peers */ crm_info("No remaining peers capable of fencing (%s) %s for client %s " QB_XS " state=%s", op->action, op->target, op->client_name, stonith_op_state_str(op->state)); CRM_CHECK(op->state < st_done, return); finalize_timed_out_op(op, "All nodes failed, or are unable, to " "fence target"); } else if(op->replies >= op->replies_expected || op->replies >= fencing_active_peers()) { /* if the operation never left the query state, * but we have all the expected replies, then no devices * are available to execute the fencing operation. */ if (is_watchdog_fencing(op, device) && check_watchdog_fencing_and_wait(op)) { /* Consider a watchdog fencing targeting an offline node executing * once it starts waiting for the target to self-fence. So that when * the query timer pops, remote_op_query_timeout() considers the * fencing already in progress. */ op->state = st_exec; return; } if (op->state == st_query) { crm_info("No peers (out of %d) have devices capable of fencing " "(%s) %s for client %s " QB_XS " state=%s", op->replies, op->action, op->target, op->client_name, stonith_op_state_str(op->state)); pcmk__reset_result(&op->result); pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, NULL); } else { if (pcmk_is_set(op->call_options, st_opt_topology)) { pcmk__reset_result(&op->result); pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, NULL); } /* ... else use existing result from previous failed attempt * (topology is not in use, and no devices remain to be attempted). * Overwriting the result with PCMK_EXEC_NO_FENCE_DEVICE would * prevent finalize_op() from setting the correct delegate if * needed. */ crm_info("No peers (out of %d) are capable of fencing (%s) %s " "for client %s " QB_XS " state=%s", op->replies, op->action, op->target, op->client_name, stonith_op_state_str(op->state)); } op->state = st_failed; finalize_op(op, NULL, false); } else { crm_info("Waiting for additional peers capable of fencing (%s) %s%s%s " "for client %s " QB_XS " id=%.8s", op->action, op->target, (device? " using " : ""), (device? device : ""), op->client_name, op->id); } } /*! * \internal * \brief Comparison function for sorting query results * * \param[in] a GList item to compare * \param[in] b GList item to compare * * \return Per the glib documentation, "a negative integer if the first value * comes before the second, 0 if they are equal, or a positive integer * if the first value comes after the second." */ static gint sort_peers(gconstpointer a, gconstpointer b) { const peer_device_info_t *peer_a = a; const peer_device_info_t *peer_b = b; return (peer_b->ndevices - peer_a->ndevices); } /*! * \internal * \brief Determine if all the devices in the topology are found or not * * \param[in] op Fencing operation with topology to check */ static gboolean all_topology_devices_found(const remote_fencing_op_t *op) { GList *device = NULL; GList *iter = NULL; device_properties_t *match = NULL; stonith_topology_t *tp = NULL; gboolean skip_target = FALSE; int i; tp = find_topology_for_host(op->target); if (!tp) { return FALSE; } if (pcmk__is_fencing_action(op->action)) { /* Don't count the devices on the target node if we are killing * the target node. */ skip_target = TRUE; } for (i = 0; i < ST__LEVEL_COUNT; i++) { for (device = tp->levels[i]; device; device = device->next) { match = NULL; for (iter = op->query_results; iter && !match; iter = iter->next) { peer_device_info_t *peer = iter->data; if (skip_target && pcmk__str_eq(peer->host, op->target, pcmk__str_casei)) { continue; } match = find_peer_device(op, peer, device->data, st_device_supports_none); } if (!match) { return FALSE; } } } return TRUE; } /*! * \internal * \brief Parse action-specific device properties from XML * * \param[in] xml XML element containing the properties * \param[in] peer Name of peer that sent XML (for logs) * \param[in] device Device ID (for logs) * \param[in] action Action the properties relate to (for logs) * \param[in,out] op Fencing operation that properties are being parsed for * \param[in] phase Phase the properties relate to * \param[in,out] props Device properties to update */ static void parse_action_specific(const xmlNode *xml, const char *peer, const char *device, const char *action, remote_fencing_op_t *op, enum st_remap_phase phase, device_properties_t *props) { props->custom_action_timeout[phase] = 0; pcmk__xe_get_int(xml, PCMK__XA_ST_ACTION_TIMEOUT, &props->custom_action_timeout[phase]); if (props->custom_action_timeout[phase]) { crm_trace("Peer %s with device %s returned %s action timeout %ds", peer, device, action, props->custom_action_timeout[phase]); } props->delay_max[phase] = 0; pcmk__xe_get_int(xml, PCMK__XA_ST_DELAY_MAX, &props->delay_max[phase]); if (props->delay_max[phase]) { crm_trace("Peer %s with device %s returned maximum of random delay %ds for %s", peer, device, props->delay_max[phase], action); } props->delay_base[phase] = 0; pcmk__xe_get_int(xml, PCMK__XA_ST_DELAY_BASE, &props->delay_base[phase]); if (props->delay_base[phase]) { crm_trace("Peer %s with device %s returned base delay %ds for %s", peer, device, props->delay_base[phase], action); } /* Handle devices with automatic unfencing */ if (pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none)) { int required = 0; pcmk__xe_get_int(xml, PCMK__XA_ST_REQUIRED, &required); if (required) { crm_trace("Peer %s requires device %s to execute for action %s", peer, device, action); add_required_device(op, device); } } /* If a reboot is remapped to off+on, it's possible that a node is allowed * to perform one action but not another. */ if (pcmk__xe_attr_is_true(xml, PCMK__XA_ST_ACTION_DISALLOWED)) { props->disallowed[phase] = TRUE; crm_trace("Peer %s is disallowed from executing %s for device %s", peer, action, device); } } /*! * \internal * \brief Parse one device's properties from peer's XML query reply * * \param[in] xml XML node containing device properties * \param[in,out] op Operation that query and reply relate to * \param[in,out] peer Peer's device information * \param[in] device ID of device being parsed */ static void add_device_properties(const xmlNode *xml, remote_fencing_op_t *op, peer_device_info_t *peer, const char *device) { xmlNode *child; int verified = 0; device_properties_t *props = pcmk__assert_alloc(1, sizeof(device_properties_t)); int rc = pcmk_rc_ok; /* Add a new entry to this peer's devices list */ g_hash_table_insert(peer->devices, pcmk__str_copy(device), props); /* Peers with verified (monitored) access will be preferred */ pcmk__xe_get_int(xml, PCMK__XA_ST_MONITOR_VERIFIED, &verified); if (verified) { crm_trace("Peer %s has confirmed a verified device %s", peer->host, device); props->verified = TRUE; } // Nodes <2.1.5 won't set this, so assume unfencing in that case rc = pcmk__xe_get_flags(xml, PCMK__XA_ST_DEVICE_SUPPORT_FLAGS, &(props->device_support_flags), st_device_supports_on); if (rc != pcmk_rc_ok) { crm_warn("Couldn't determine device support for %s " "(assuming unfencing): %s", device, pcmk_rc_str(rc)); } /* Parse action-specific device properties */ parse_action_specific(xml, peer->host, device, op_requested_action(op), op, st_phase_requested, props); for (child = pcmk__xe_first_child(xml, NULL, NULL, NULL); child != NULL; child = pcmk__xe_next(child, NULL)) { /* Replies for "reboot" operations will include the action-specific * values for "off" and "on" in child elements, just in case the reboot * winds up getting remapped. */ if (pcmk__str_eq(pcmk__xe_id(child), PCMK_ACTION_OFF, pcmk__str_none)) { parse_action_specific(child, peer->host, device, PCMK_ACTION_OFF, op, st_phase_off, props); } else if (pcmk__str_eq(pcmk__xe_id(child), PCMK_ACTION_ON, pcmk__str_none)) { parse_action_specific(child, peer->host, device, PCMK_ACTION_ON, op, st_phase_on, props); } } } /*! * \internal * \brief Parse a peer's XML query reply and add it to operation's results * * \param[in,out] op Operation that query and reply relate to * \param[in] host Name of peer that sent this reply * \param[in] ndevices Number of devices expected in reply * \param[in] xml XML node containing device list * * \return Newly allocated result structure with parsed reply */ static peer_device_info_t * add_result(remote_fencing_op_t *op, const char *host, int ndevices, const xmlNode *xml) { peer_device_info_t *peer = pcmk__assert_alloc(1, sizeof(peer_device_info_t)); xmlNode *child; peer->host = pcmk__str_copy(host); peer->devices = pcmk__strkey_table(free, free); /* Each child element describes one capable device available to the peer */ for (child = pcmk__xe_first_child(xml, NULL, NULL, NULL); child != NULL; child = pcmk__xe_next(child, NULL)) { const char *device = pcmk__xe_id(child); if (device) { add_device_properties(child, op, peer, device); } } peer->ndevices = g_hash_table_size(peer->devices); CRM_CHECK(ndevices == peer->ndevices, crm_err("Query claimed to have %d device%s but %d found", ndevices, pcmk__plural_s(ndevices), peer->ndevices)); op->query_results = g_list_insert_sorted(op->query_results, peer, sort_peers); return peer; } /*! * \internal * \brief Handle a peer's reply to our fencing query * * Parse a query result from XML and store it in the remote operation * table, and when enough replies have been received, issue a fencing request. * * \param[in] msg XML reply received * * \return pcmk_ok on success, -errno on error * * \note See initiate_remote_stonith_op() for how the XML query was initially * formed, and stonith_query() for how the peer formed its XML reply. */ int process_remote_stonith_query(xmlNode *msg) { int ndevices = 0; gboolean host_is_target = FALSE; gboolean have_all_replies = FALSE; const char *id = NULL; const char *host = NULL; remote_fencing_op_t *op = NULL; peer_device_info_t *peer = NULL; uint32_t replies_expected; xmlNode *dev = pcmk__xpath_find_one(msg->doc, "//*[@" PCMK__XA_ST_REMOTE_OP "]", LOG_ERR); CRM_CHECK(dev != NULL, return -EPROTO); id = pcmk__xe_get(dev, PCMK__XA_ST_REMOTE_OP); CRM_CHECK(id != NULL, return -EPROTO); dev = pcmk__xpath_find_one(msg->doc, "//*[@" PCMK__XA_ST_AVAILABLE_DEVICES "]", LOG_ERR); CRM_CHECK(dev != NULL, return -EPROTO); pcmk__xe_get_int(dev, PCMK__XA_ST_AVAILABLE_DEVICES, &ndevices); op = g_hash_table_lookup(stonith_remote_op_list, id); if (op == NULL) { crm_debug("Received query reply for unknown or expired operation %s", id); return -EOPNOTSUPP; } replies_expected = fencing_active_peers(); if (op->replies_expected < replies_expected) { replies_expected = op->replies_expected; } if ((++op->replies >= replies_expected) && (op->state == st_query)) { have_all_replies = TRUE; } host = pcmk__xe_get(msg, PCMK__XA_SRC); host_is_target = pcmk__str_eq(host, op->target, pcmk__str_casei); crm_info("Query result %d of %d from %s for %s/%s (%d device%s) %s", op->replies, replies_expected, host, op->target, op->action, ndevices, pcmk__plural_s(ndevices), id); if (ndevices > 0) { peer = add_result(op, host, ndevices, dev); } pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); if (pcmk_is_set(op->call_options, st_opt_topology)) { /* If we start the fencing before all the topology results are in, * it is possible fencing levels will be skipped because of the missing * query results. */ if (op->state == st_query && all_topology_devices_found(op)) { /* All the query results are in for the topology, start the fencing ops. */ crm_trace("All topology devices found"); request_peer_fencing(op, peer); } else if (have_all_replies) { crm_info("All topology query replies have arrived, continuing (%d expected/%d received) ", replies_expected, op->replies); request_peer_fencing(op, NULL); } } else if (op->state == st_query) { int nverified = count_peer_devices(op, peer, TRUE, fenced_support_flag(op->action)); /* We have a result for a non-topology fencing op that looks promising, * go ahead and start fencing before query timeout */ if ((peer != NULL) && !host_is_target && nverified) { /* we have a verified device living on a peer that is not the target */ crm_trace("Found %d verified device%s", nverified, pcmk__plural_s(nverified)); request_peer_fencing(op, peer); } else if (have_all_replies) { crm_info("All query replies have arrived, continuing (%d expected/%d received) ", replies_expected, op->replies); request_peer_fencing(op, NULL); } else { crm_trace("Waiting for more peer results before launching fencing operation"); } } else if ((peer != NULL) && (op->state == st_done)) { crm_info("Discarding query result from %s (%d device%s): " "Operation is %s", peer->host, peer->ndevices, pcmk__plural_s(peer->ndevices), stonith_op_state_str(op->state)); } return pcmk_ok; } /*! * \internal * \brief Handle a peer's reply to a fencing request * * Parse a fencing reply from XML, and either finalize the operation * or attempt another device as appropriate. * * \param[in] msg XML reply received */ void fenced_process_fencing_reply(xmlNode *msg) { const char *id = NULL; const char *device = NULL; remote_fencing_op_t *op = NULL; xmlNode *dev = pcmk__xpath_find_one(msg->doc, "//*[@" PCMK__XA_ST_REMOTE_OP "]", LOG_ERR); pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; CRM_CHECK(dev != NULL, return); id = pcmk__xe_get(dev, PCMK__XA_ST_REMOTE_OP); CRM_CHECK(id != NULL, return); dev = stonith__find_xe_with_result(msg); CRM_CHECK(dev != NULL, return); stonith__xe_get_result(dev, &result); device = pcmk__xe_get(dev, PCMK__XA_ST_DEVICE_ID); if (stonith_remote_op_list) { op = g_hash_table_lookup(stonith_remote_op_list, id); } if ((op == NULL) && pcmk__result_ok(&result)) { /* Record successful fencing operations */ const char *client_id = pcmk__xe_get(dev, PCMK__XA_ST_CLIENTID); op = create_remote_stonith_op(client_id, dev, TRUE); } if (op == NULL) { /* Could be for an event that began before we started */ /* TODO: Record the op for later querying */ crm_info("Received peer result of unknown or expired operation %s", id); pcmk__reset_result(&result); return; } pcmk__reset_result(&op->result); op->result = result; // The operation takes ownership of the result if (op->devices && device && !pcmk__str_eq(op->devices->data, device, pcmk__str_casei)) { crm_err("Received outdated reply for device %s (instead of %s) to " "fence (%s) %s. Operation already timed out at peer level.", device, (const char *) op->devices->data, op->action, op->target); return; } if (pcmk__str_eq(pcmk__xe_get(msg, PCMK__XA_SUBT), PCMK__VALUE_BROADCAST, pcmk__str_none)) { if (pcmk__result_ok(&op->result)) { op->state = st_done; } else { op->state = st_failed; } finalize_op(op, msg, false); return; } else if (!pcmk__str_eq(op->originator, fenced_get_local_node(), pcmk__str_casei)) { /* If this isn't a remote level broadcast, and we are not the * originator of the operation, we should not be receiving this msg. */ crm_err("Received non-broadcast fencing result for operation %.8s " "we do not own (device %s targeting %s)", op->id, device, op->target); return; } if (pcmk_is_set(op->call_options, st_opt_topology)) { const char *device = NULL; const char *reason = op->result.exit_reason; /* We own the op, and it is complete. broadcast the result to all nodes * and notify our local clients. */ if (op->state == st_done) { finalize_op(op, msg, false); return; } device = pcmk__xe_get(msg, PCMK__XA_ST_DEVICE_ID); if ((op->phase == 2) && !pcmk__result_ok(&op->result)) { /* A remapped "on" failed, but the node was already turned off * successfully, so ignore the error and continue. */ crm_warn("Ignoring %s 'on' failure (%s%s%s) targeting %s " "after successful 'off'", device, pcmk_exec_status_str(op->result.execution_status), (reason == NULL)? "" : ": ", (reason == NULL)? "" : reason, op->target); pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); } else { crm_notice("Action '%s' targeting %s%s%s on behalf of %s@%s: " "%s%s%s%s", op->action, op->target, ((device == NULL)? "" : " using "), ((device == NULL)? "" : device), op->client_name, op->originator, pcmk_exec_status_str(op->result.execution_status), (reason == NULL)? "" : " (", (reason == NULL)? "" : reason, (reason == NULL)? "" : ")"); } if (pcmk__result_ok(&op->result)) { /* An operation completed successfully. Try another device if * necessary, otherwise mark the operation as done. */ advance_topology_device_in_level(op, device, msg); return; } else { /* This device failed, time to try another topology level. If no other * levels are available, mark this operation as failed and report results. */ if (advance_topology_level(op, false) != pcmk_rc_ok) { op->state = st_failed; finalize_op(op, msg, false); return; } } } else if (pcmk__result_ok(&op->result) && (op->devices == NULL)) { op->state = st_done; finalize_op(op, msg, false); return; } else if ((op->result.execution_status == PCMK_EXEC_TIMEOUT) && (op->devices == NULL)) { /* If the operation timed out don't bother retrying other peers. */ op->state = st_failed; finalize_op(op, msg, false); return; } else { /* fall-through and attempt other fencing action using another peer */ } /* Retry on failure */ crm_trace("Next for %s on behalf of %s@%s (result was: %s)", op->target, op->originator, op->client_name, pcmk_exec_status_str(op->result.execution_status)); request_peer_fencing(op, NULL); } gboolean stonith_check_fence_tolerance(int tolerance, const char *target, const char *action) { GHashTableIter iter; time_t now = time(NULL); remote_fencing_op_t *rop = NULL; if (tolerance <= 0 || !stonith_remote_op_list || target == NULL || action == NULL) { return FALSE; } g_hash_table_iter_init(&iter, stonith_remote_op_list); while (g_hash_table_iter_next(&iter, NULL, (void **)&rop)) { if (strcmp(rop->target, target) != 0) { continue; } else if (rop->state != st_done) { continue; /* We don't have to worry about remapped reboots here * because if state is done, any remapping has been undone */ } else if (strcmp(rop->action, action) != 0) { continue; } else if ((rop->completed + tolerance) < now) { continue; } crm_notice("Target %s was fenced (%s) less than %ds ago by %s on behalf of %s", target, action, tolerance, rop->delegate, rop->originator); return TRUE; } return FALSE; } diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c index 0aa2439fc8..adb0704f64 100644 --- a/daemons/fenced/pacemaker-fenced.c +++ b/daemons/fenced/pacemaker-fenced.c @@ -1,666 +1,666 @@ /* * Copyright 2009-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include // PRIu32, PRIx32 #include #include #include #include #include #include #include #include #include #include #include #include #define SUMMARY "daemon for executing fencing devices in a Pacemaker cluster" // @TODO This should be guint long long stonith_watchdog_timeout_ms = 0; GList *stonith_watchdog_targets = NULL; static GMainLoop *mainloop = NULL; gboolean stonith_shutdown_flag = FALSE; static qb_ipcs_service_t *ipcs = NULL; static pcmk__output_t *out = NULL; pcmk__supported_format_t formats[] = { PCMK__SUPPORTED_FORMAT_NONE, PCMK__SUPPORTED_FORMAT_TEXT, PCMK__SUPPORTED_FORMAT_XML, { NULL, NULL, NULL } }; static struct { gboolean stand_alone; gchar **log_files; } options; crm_exit_t exit_code = CRM_EX_OK; static void stonith_cleanup(void); static int32_t st_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { if (stonith_shutdown_flag) { crm_info("Ignoring new client [%d] during shutdown", pcmk__client_pid(c)); return -ECONNREFUSED; } if (pcmk__new_client(c, uid, gid) == NULL) { return -ENOMEM; } return 0; } /* Exit code means? */ static int32_t st_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size) { uint32_t id = 0; uint32_t flags = 0; uint32_t call_options = st_opt_none; xmlNode *request = NULL; pcmk__client_t *c = pcmk__find_client(qbc); const char *op = NULL; int rc = pcmk_rc_ok; if (c == NULL) { crm_info("Invalid client: %p", qbc); return 0; } request = pcmk__client_data2xml(c, data, &id, &flags); if (request == NULL) { pcmk__ipc_send_ack(c, id, flags, PCMK__XE_NACK, NULL, CRM_EX_PROTOCOL); return 0; } op = pcmk__xe_get(request, PCMK__XA_CRM_TASK); if(pcmk__str_eq(op, CRM_OP_RM_NODE_CACHE, pcmk__str_casei)) { crm_xml_add(request, PCMK__XA_T, PCMK__VALUE_STONITH_NG); crm_xml_add(request, PCMK__XA_ST_OP, op); crm_xml_add(request, PCMK__XA_ST_CLIENTID, c->id); crm_xml_add(request, PCMK__XA_ST_CLIENTNAME, pcmk__client_name(c)); crm_xml_add(request, PCMK__XA_ST_CLIENTNODE, fenced_get_local_node()); pcmk__cluster_send_message(NULL, pcmk_ipc_fenced, request); pcmk__xml_free(request); return 0; } if (c->name == NULL) { const char *value = pcmk__xe_get(request, PCMK__XA_ST_CLIENTNAME); c->name = crm_strdup_printf("%s.%u", pcmk__s(value, "unknown"), c->pid); } rc = pcmk__xe_get_flags(request, PCMK__XA_ST_CALLOPT, &call_options, st_opt_none); if (rc != pcmk_rc_ok) { crm_warn("Couldn't parse options from IPC request: %s", pcmk_rc_str(rc)); } crm_trace("Flags %#08" PRIx32 "/%#08x for command %" PRIu32 " from client %s", flags, call_options, id, pcmk__client_name(c)); if (pcmk_is_set(call_options, st_opt_sync_call)) { pcmk__assert(pcmk_is_set(flags, crm_ipc_client_response)); CRM_LOG_ASSERT(c->request_id == 0); /* This means the client has two synchronous events in-flight */ c->request_id = id; /* Reply only to the last one */ } crm_xml_add(request, PCMK__XA_ST_CLIENTID, c->id); crm_xml_add(request, PCMK__XA_ST_CLIENTNAME, pcmk__client_name(c)); crm_xml_add(request, PCMK__XA_ST_CLIENTNODE, fenced_get_local_node()); crm_log_xml_trace(request, "ipc-received"); stonith_command(c, id, flags, request, NULL); pcmk__xml_free(request); return 0; } /* Error code means? */ static int32_t st_ipc_closed(qb_ipcs_connection_t * c) { pcmk__client_t *client = pcmk__find_client(c); if (client == NULL) { return 0; } crm_trace("Connection %p closed", c); pcmk__free_client(client); /* 0 means: yes, go ahead and destroy the connection */ return 0; } static void st_ipc_destroy(qb_ipcs_connection_t * c) { crm_trace("Connection %p destroyed", c); st_ipc_closed(c); } static void stonith_peer_callback(xmlNode * msg, void *private_data) { const char *remote_peer = pcmk__xe_get(msg, PCMK__XA_SRC); const char *op = pcmk__xe_get(msg, PCMK__XA_ST_OP); if (pcmk__str_eq(op, STONITH_OP_POKE, pcmk__str_none)) { return; } crm_log_xml_trace(msg, "Peer[inbound]"); stonith_command(NULL, 0, 0, msg, remote_peer); } #if SUPPORT_COROSYNC static void handle_cpg_message(cpg_handle_t handle, const struct cpg_name *groupName, uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) { xmlNode *xml = NULL; const char *from = NULL; char *data = pcmk__cpg_message_data(handle, nodeid, pid, msg, &from); if(data == NULL) { return; } xml = pcmk__xml_parse(data); if (xml == NULL) { crm_err("Invalid XML: '%.120s'", data); free(data); return; } crm_xml_add(xml, PCMK__XA_SRC, from); stonith_peer_callback(xml, NULL); pcmk__xml_free(xml); free(data); } static void stonith_peer_cs_destroy(gpointer user_data) { crm_crit("Lost connection to cluster layer, shutting down"); stonith_shutdown(0); } #endif void do_local_reply(const xmlNode *notify_src, pcmk__client_t *client, int call_options) { /* send callback to originating child */ int local_rc = pcmk_rc_ok; int rid = 0; uint32_t ipc_flags = crm_ipc_server_event; if (pcmk_is_set(call_options, st_opt_sync_call)) { CRM_LOG_ASSERT(client->request_id); rid = client->request_id; client->request_id = 0; ipc_flags = crm_ipc_flags_none; } local_rc = pcmk__ipc_send_xml(client, rid, notify_src, ipc_flags); if (local_rc == pcmk_rc_ok) { crm_trace("Sent response %d to client %s", rid, pcmk__client_name(client)); } else { crm_warn("%synchronous reply to client %s failed: %s", (pcmk_is_set(call_options, st_opt_sync_call)? "S" : "As"), pcmk__client_name(client), pcmk_rc_str(local_rc)); } } uint64_t get_stonith_flag(const char *name) { if (pcmk__str_eq(name, PCMK__VALUE_ST_NOTIFY_FENCE, pcmk__str_none)) { return st_callback_notify_fence; } else if (pcmk__str_eq(name, STONITH_OP_DEVICE_ADD, pcmk__str_casei)) { return st_callback_device_add; } else if (pcmk__str_eq(name, STONITH_OP_DEVICE_DEL, pcmk__str_casei)) { return st_callback_device_del; } else if (pcmk__str_eq(name, PCMK__VALUE_ST_NOTIFY_HISTORY, pcmk__str_none)) { return st_callback_notify_history; } else if (pcmk__str_eq(name, PCMK__VALUE_ST_NOTIFY_HISTORY_SYNCED, pcmk__str_none)) { return st_callback_notify_history_synced; } return st_callback_unknown; } static void stonith_notify_client(gpointer key, gpointer value, gpointer user_data) { const xmlNode *update_msg = user_data; pcmk__client_t *client = value; const char *type = NULL; CRM_CHECK(client != NULL, return); CRM_CHECK(update_msg != NULL, return); type = pcmk__xe_get(update_msg, PCMK__XA_SUBT); CRM_CHECK(type != NULL, crm_log_xml_err(update_msg, "notify"); return); if (client->ipcs == NULL) { crm_trace("Skipping client with NULL channel"); return; } if (pcmk_is_set(client->flags, get_stonith_flag(type))) { int rc = pcmk__ipc_send_xml(client, 0, update_msg, crm_ipc_server_event); if (rc != pcmk_rc_ok) { crm_warn("%s notification of client %s failed: %s " QB_XS " id=%.8s rc=%d", type, pcmk__client_name(client), pcmk_rc_str(rc), client->id, rc); } else { crm_trace("Sent %s notification to client %s", type, pcmk__client_name(client)); } } } void do_stonith_async_timeout_update(const char *client_id, const char *call_id, int timeout) { pcmk__client_t *client = NULL; xmlNode *notify_data = NULL; if (!timeout || !call_id || !client_id) { return; } client = pcmk__find_client_by_id(client_id); if (!client) { return; } notify_data = pcmk__xe_create(NULL, PCMK__XE_ST_ASYNC_TIMEOUT_VALUE); crm_xml_add(notify_data, PCMK__XA_T, PCMK__VALUE_ST_ASYNC_TIMEOUT_VALUE); crm_xml_add(notify_data, PCMK__XA_ST_CALLID, call_id); - crm_xml_add_int(notify_data, PCMK__XA_ST_TIMEOUT, timeout); + pcmk__xe_set_int(notify_data, PCMK__XA_ST_TIMEOUT, timeout); crm_trace("timeout update is %d for client %s and call id %s", timeout, client_id, call_id); if (client) { pcmk__ipc_send_xml(client, 0, notify_data, crm_ipc_server_event); } pcmk__xml_free(notify_data); } /*! * \internal * \brief Notify relevant IPC clients of a fencing operation result * * \param[in] type Notification type * \param[in] result Result of fencing operation (assume success if NULL) * \param[in] data If not NULL, add to notification as call data */ void fenced_send_notification(const char *type, const pcmk__action_result_t *result, xmlNode *data) { /* TODO: Standardize the contents of data */ xmlNode *update_msg = pcmk__xe_create(NULL, PCMK__XE_NOTIFY); CRM_LOG_ASSERT(type != NULL); crm_xml_add(update_msg, PCMK__XA_T, PCMK__VALUE_ST_NOTIFY); crm_xml_add(update_msg, PCMK__XA_SUBT, type); crm_xml_add(update_msg, PCMK__XA_ST_OP, type); stonith__xe_set_result(update_msg, result); if (data != NULL) { xmlNode *wrapper = pcmk__xe_create(update_msg, PCMK__XE_ST_CALLDATA); pcmk__xml_copy(wrapper, data); } crm_trace("Notifying clients"); pcmk__foreach_ipc_client(stonith_notify_client, update_msg); pcmk__xml_free(update_msg); crm_trace("Notify complete"); } /*! * \internal * \brief Send notifications for a configuration change to subscribed clients * * \param[in] op Notification type (\c STONITH_OP_DEVICE_ADD, * \c STONITH_OP_DEVICE_DEL, \c STONITH_OP_LEVEL_ADD, or * \c STONITH_OP_LEVEL_DEL) * \param[in] result Operation result * \param[in] desc Description of what changed (either device ID or string * representation of level * ([])) */ void fenced_send_config_notification(const char *op, const pcmk__action_result_t *result, const char *desc) { xmlNode *notify_data = pcmk__xe_create(NULL, op); crm_xml_add(notify_data, PCMK__XA_ST_DEVICE_ID, desc); fenced_send_notification(op, result, notify_data); pcmk__xml_free(notify_data); } /*! * \internal * \brief Check whether a node does watchdog-fencing * * \param[in] node Name of node to check * * \return TRUE if node found in stonith_watchdog_targets * or stonith_watchdog_targets is empty indicating * all nodes are doing watchdog-fencing */ gboolean node_does_watchdog_fencing(const char *node) { return ((stonith_watchdog_targets == NULL) || pcmk__str_in_list(node, stonith_watchdog_targets, pcmk__str_casei)); } void stonith_shutdown(int nsig) { crm_info("Terminating with %d clients", pcmk__ipc_client_count()); stonith_shutdown_flag = TRUE; if (mainloop != NULL && g_main_loop_is_running(mainloop)) { g_main_loop_quit(mainloop); } } static void stonith_cleanup(void) { fenced_cib_cleanup(); if (ipcs) { qb_ipcs_destroy(ipcs); } pcmk__cluster_destroy_node_caches(); pcmk__client_cleanup(); free_stonith_remote_op_list(); free_topology_list(); free_device_list(); free_metadata_cache(); fenced_unregister_handlers(); } struct qb_ipcs_service_handlers ipc_callbacks = { .connection_accept = st_ipc_accept, .connection_created = NULL, .msg_process = st_ipc_dispatch, .connection_closed = st_ipc_closed, .connection_destroyed = st_ipc_destroy }; /*! * \internal * \brief Callback for peer status changes * * \param[in] type What changed * \param[in] node What peer had the change * \param[in] data Previous value of what changed */ static void st_peer_update_callback(enum pcmk__node_update type, pcmk__node_status_t *node, const void *data) { if ((type != pcmk__node_update_processes) && !pcmk_is_set(node->flags, pcmk__node_status_remote)) { /* * This is a hack until we can send to a nodeid and/or we fix node name lookups * These messages are ignored in stonith_peer_callback() */ xmlNode *query = pcmk__xe_create(NULL, PCMK__XE_STONITH_COMMAND); crm_xml_add(query, PCMK__XA_T, PCMK__VALUE_STONITH_NG); crm_xml_add(query, PCMK__XA_ST_OP, STONITH_OP_POKE); crm_debug("Broadcasting our uname because of node %" PRIu32, node->cluster_layer_id); pcmk__cluster_send_message(NULL, pcmk_ipc_fenced, query); pcmk__xml_free(query); } } /* @COMPAT Deprecated since 2.1.8. Use pcmk_list_fence_attrs() or * crm_resource --list-options=fencing instead of querying daemon metadata. * * NOTE: pcs (as of at least 0.11.8) uses this */ static int fencer_metadata(void) { const char *name = PCMK__SERVER_FENCED; const char *desc_short = N_("Instance attributes available for all " "\"stonith\"-class resources"); const char *desc_long = N_("Instance attributes available for all " "\"stonith\"-class resources and used by " "Pacemaker's fence daemon"); return pcmk__daemon_metadata(out, name, desc_short, desc_long, pcmk__opt_fencing); } static GOptionEntry entries[] = { { "stand-alone", 's', G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE, &options.stand_alone, N_("Intended for use in regression testing only"), NULL }, { "logfile", 'l', G_OPTION_FLAG_NONE, G_OPTION_ARG_FILENAME_ARRAY, &options.log_files, N_("Send logs to the additional named logfile"), NULL }, { NULL } }; static GOptionContext * build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) { GOptionContext *context = NULL; context = pcmk__build_arg_context(args, "text (default), xml", group, NULL); pcmk__add_main_args(context, entries); return context; } int main(int argc, char **argv) { int rc = pcmk_rc_ok; pcmk_cluster_t *cluster = NULL; crm_ipc_t *old_instance = NULL; GError *error = NULL; GOptionGroup *output_group = NULL; pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY); gchar **processed_args = pcmk__cmdline_preproc(argv, "l"); GOptionContext *context = build_arg_context(args, &output_group); crm_log_preinit(NULL, argc, argv); pcmk__register_formats(output_group, formats); if (!g_option_context_parse_strv(context, &processed_args, &error)) { exit_code = CRM_EX_USAGE; goto done; } rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv); if (rc != pcmk_rc_ok) { exit_code = CRM_EX_ERROR; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Error creating output format %s: %s", args->output_ty, pcmk_rc_str(rc)); goto done; } if (args->version) { out->version(out, false); goto done; } if ((g_strv_length(processed_args) >= 2) && pcmk__str_eq(processed_args[1], "metadata", pcmk__str_none)) { rc = fencer_metadata(); if (rc != pcmk_rc_ok) { exit_code = CRM_EX_FATAL; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Unable to display metadata: %s", pcmk_rc_str(rc)); } goto done; } // Open additional log files pcmk__add_logfiles(options.log_files, out); crm_log_init(NULL, LOG_INFO + args->verbosity, TRUE, (args->verbosity > 0), argc, argv, FALSE); crm_notice("Starting Pacemaker fencer"); old_instance = crm_ipc_new("stonith-ng", 0); if (old_instance == NULL) { /* crm_ipc_new() will have already logged an error message with * crm_err() */ exit_code = CRM_EX_FATAL; goto done; } if (pcmk__connect_generic_ipc(old_instance) == pcmk_rc_ok) { // IPC endpoint already up crm_ipc_close(old_instance); crm_ipc_destroy(old_instance); crm_crit("Aborting start-up because another fencer instance is " "already active"); goto done; } else { // Not up or not authentic, we'll proceed either way crm_ipc_destroy(old_instance); old_instance = NULL; } mainloop_add_signal(SIGTERM, stonith_shutdown); pcmk__cluster_init_node_caches(); rc = fenced_scheduler_init(); if (rc != pcmk_rc_ok) { exit_code = CRM_EX_FATAL; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Error initializing scheduler data: %s", pcmk_rc_str(rc)); goto done; } cluster = pcmk_cluster_new(); #if SUPPORT_COROSYNC if (pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync) { pcmk_cluster_set_destroy_fn(cluster, stonith_peer_cs_destroy); pcmk_cpg_set_deliver_fn(cluster, handle_cpg_message); pcmk_cpg_set_confchg_fn(cluster, pcmk__cpg_confchg_cb); } #endif // SUPPORT_COROSYNC pcmk__cluster_set_status_callback(&st_peer_update_callback); if (pcmk_cluster_connect(cluster) != pcmk_rc_ok) { exit_code = CRM_EX_FATAL; crm_crit("Cannot sign in to the cluster... terminating"); goto done; } fenced_set_local_node(cluster->priv->node_name); if (!options.stand_alone) { setup_cib(); } init_device_list(); init_topology_list(); pcmk__serve_fenced_ipc(&ipcs, &ipc_callbacks); // Create the mainloop and run it... mainloop = g_main_loop_new(NULL, FALSE); crm_notice("Pacemaker fencer successfully started and accepting connections"); g_main_loop_run(mainloop); done: g_strfreev(processed_args); pcmk__free_arg_context(context); g_strfreev(options.log_files); stonith_cleanup(); pcmk_cluster_free(cluster); fenced_scheduler_cleanup(); pcmk__output_and_clear_error(&error, out); if (out != NULL) { out->finish(out, exit_code, true, NULL); pcmk__output_free(out); } pcmk__unregister_formats(); crm_exit(exit_code); } diff --git a/daemons/pacemakerd/pcmkd_messages.c b/daemons/pacemakerd/pcmkd_messages.c index b2cbec3f7f..bc15914045 100644 --- a/daemons/pacemakerd/pcmkd_messages.c +++ b/daemons/pacemakerd/pcmkd_messages.c @@ -1,280 +1,281 @@ /* * Copyright 2010-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include "pacemakerd.h" #include #include #include #include #include #include #include #include static GHashTable *pcmkd_handlers = NULL; static xmlNode * handle_node_cache_request(pcmk__request_t *request) { crm_trace("Ignoring request from client %s to purge node " "because peer cache is not used", pcmk__client_name(request->ipc_client)); pcmk__ipc_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags, PCMK__XE_ACK, NULL, CRM_EX_OK); return NULL; } static xmlNode * handle_ping_request(pcmk__request_t *request) { xmlNode *msg = request->xml; const char *value = NULL; xmlNode *ping = NULL; xmlNode *reply = NULL; const char *from = pcmk__xe_get(msg, PCMK__XA_CRM_SYS_FROM); /* Pinged for status */ crm_trace("Pinged from " PCMK__XA_CRM_SYS_FROM "='%s' " PCMK_XA_ORIGIN "='%s'", pcmk__s(from, ""), pcmk__s(pcmk__xe_get(msg, PCMK_XA_ORIGIN), "")); pcmk__ipc_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags, PCMK__XE_ACK, NULL, CRM_EX_INDETERMINATE); ping = pcmk__xe_create(NULL, PCMK__XE_PING_RESPONSE); value = pcmk__xe_get(msg, PCMK__XA_CRM_SYS_TO); crm_xml_add(ping, PCMK__XA_CRM_SUBSYSTEM, value); crm_xml_add(ping, PCMK__XA_PACEMAKERD_STATE, pacemakerd_state); pcmk__xe_set_time(ping, PCMK_XA_CRM_TIMESTAMP, subdaemon_check_progress); crm_xml_add(ping, PCMK_XA_RESULT, "ok"); reply = pcmk__new_reply(msg, ping); pcmk__xml_free(ping); if (reply == NULL) { pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR, "Failed building ping reply for client %s", pcmk__client_name(request->ipc_client)); } else { pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); } /* just proceed state on sbd pinging us */ if (from && strstr(from, "sbd")) { if (pcmk__str_eq(pacemakerd_state, PCMK__VALUE_SHUTDOWN_COMPLETE, pcmk__str_none)) { if (pcmk__get_sbd_sync_resource_startup()) { crm_notice("Shutdown-complete-state passed to SBD."); } shutdown_complete_state_reported_to = request->ipc_client->pid; } else if (pcmk__str_eq(pacemakerd_state, PCMK__VALUE_WAIT_FOR_PING, pcmk__str_none)) { crm_notice("Received startup-trigger from SBD."); pacemakerd_state = PCMK__VALUE_STARTING_DAEMONS; mainloop_set_trigger(startup_trigger); } } return reply; } static xmlNode * handle_shutdown_request(pcmk__request_t *request) { xmlNode *msg = request->xml; xmlNode *shutdown = NULL; xmlNode *reply = NULL; /* Only allow privileged users (i.e. root or hacluster) to shut down * Pacemaker from the command line (or direct IPC), so that other users * are forced to go through the CIB and have ACLs applied. */ bool allowed = pcmk_is_set(request->ipc_client->flags, pcmk__client_privileged); pcmk__ipc_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags, PCMK__XE_ACK, NULL, CRM_EX_INDETERMINATE); shutdown = pcmk__xe_create(NULL, PCMK__XE_SHUTDOWN); if (allowed) { crm_notice("Shutting down in response to IPC request %s from %s", pcmk__xe_get(msg, PCMK_XA_REFERENCE), pcmk__xe_get(msg, PCMK_XA_ORIGIN)); - crm_xml_add_int(shutdown, PCMK__XA_OP_STATUS, CRM_EX_OK); + pcmk__xe_set_int(shutdown, PCMK__XA_OP_STATUS, CRM_EX_OK); } else { crm_warn("Ignoring shutdown request from unprivileged client %s", pcmk__client_name(request->ipc_client)); - crm_xml_add_int(shutdown, PCMK__XA_OP_STATUS, CRM_EX_INSUFFICIENT_PRIV); + pcmk__xe_set_int(shutdown, PCMK__XA_OP_STATUS, + CRM_EX_INSUFFICIENT_PRIV); } reply = pcmk__new_reply(msg, shutdown); pcmk__xml_free(shutdown); if (reply == NULL) { pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR, "Failed building shutdown reply for client %s", pcmk__client_name(request->ipc_client)); } else { pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); } if (allowed) { pcmk_shutdown(15); } return reply; } static xmlNode * handle_unknown_request(pcmk__request_t *request) { pcmk__ipc_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags, PCMK__XE_ACK, NULL, CRM_EX_INVALID_PARAM); pcmk__format_result(&request->result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID, "Unknown IPC request type '%s' (bug?)", pcmk__client_name(request->ipc_client)); return NULL; } static void pcmkd_register_handlers(void) { pcmk__server_command_t handlers[] = { { CRM_OP_RM_NODE_CACHE, handle_node_cache_request }, { CRM_OP_PING, handle_ping_request }, { CRM_OP_QUIT, handle_shutdown_request }, { NULL, handle_unknown_request }, }; pcmkd_handlers = pcmk__register_handlers(handlers); } static int32_t pcmk_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { crm_trace("Connection %p", c); if (pcmk__new_client(c, uid, gid) == NULL) { return -ENOMEM; } return 0; } /* Error code means? */ static int32_t pcmk_ipc_closed(qb_ipcs_connection_t * c) { pcmk__client_t *client = pcmk__find_client(c); if (client == NULL) { return 0; } crm_trace("Connection %p", c); if (shutdown_complete_state_reported_to == client->pid) { shutdown_complete_state_reported_client_closed = TRUE; if (shutdown_trigger) { mainloop_set_trigger(shutdown_trigger); } } pcmk__free_client(client); return 0; } static void pcmk_ipc_destroy(qb_ipcs_connection_t * c) { crm_trace("Connection %p", c); pcmk_ipc_closed(c); } /* Exit code means? */ static int32_t pcmk_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size) { uint32_t id = 0; uint32_t flags = 0; xmlNode *msg = NULL; pcmk__client_t *c = pcmk__find_client(qbc); CRM_CHECK(c != NULL, return 0); if (pcmkd_handlers == NULL) { pcmkd_register_handlers(); } msg = pcmk__client_data2xml(c, data, &id, &flags); if (msg == NULL) { pcmk__ipc_send_ack(c, id, flags, PCMK__XE_ACK, NULL, CRM_EX_PROTOCOL); return 0; } else { char *log_msg = NULL; const char *reason = NULL; xmlNode *reply = NULL; pcmk__request_t request = { .ipc_client = c, .ipc_id = id, .ipc_flags = flags, .peer = NULL, .xml = msg, .call_options = 0, .result = PCMK__UNKNOWN_RESULT, }; request.op = pcmk__xe_get_copy(request.xml, PCMK__XA_CRM_TASK); CRM_CHECK(request.op != NULL, return 0); reply = pcmk__process_request(&request, pcmkd_handlers); if (reply != NULL) { pcmk__ipc_send_xml(c, id, reply, crm_ipc_server_event); pcmk__xml_free(reply); } reason = request.result.exit_reason; log_msg = crm_strdup_printf("Processed %s request from %s %s: %s%s%s%s", request.op, pcmk__request_origin_type(&request), pcmk__request_origin(&request), pcmk_exec_status_str(request.result.execution_status), (reason == NULL)? "" : " (", (reason == NULL)? "" : reason, (reason == NULL)? "" : ")"); if (!pcmk__result_ok(&request.result)) { crm_warn("%s", log_msg); } else { crm_debug("%s", log_msg); } free(log_msg); pcmk__reset_request(&request); } pcmk__xml_free(msg); return 0; } struct qb_ipcs_service_handlers pacemakerd_ipc_callbacks = { .connection_accept = pcmk_ipc_accept, .connection_created = NULL, .msg_process = pcmk_ipc_dispatch, .connection_closed = pcmk_ipc_closed, .connection_destroyed = pcmk_ipc_destroy }; diff --git a/daemons/schedulerd/schedulerd_messages.c b/daemons/schedulerd/schedulerd_messages.c index 9d63edbff5..50a2a80c75 100644 --- a/daemons/schedulerd/schedulerd_messages.c +++ b/daemons/schedulerd/schedulerd_messages.c @@ -1,333 +1,333 @@ /* * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include "pacemaker-schedulerd.h" static GHashTable *schedulerd_handlers = NULL; static pcmk_scheduler_t * init_scheduler(void) { pcmk_scheduler_t *scheduler = pcmk_new_scheduler(); pcmk__mem_assert(scheduler); scheduler->priv->out = logger_out; return scheduler; } static xmlNode * handle_pecalc_request(pcmk__request_t *request) { static struct series_s { const char *name; const char *param; /* Maximum number of inputs of this kind to save to disk. * If -1, save all; if 0, save none. */ int wrap; } series[] = { { "pe-error", PCMK_OPT_PE_ERROR_SERIES_MAX, -1 }, { "pe-warn", PCMK_OPT_PE_WARN_SERIES_MAX, 5000 }, { "pe-input", PCMK_OPT_PE_INPUT_SERIES_MAX, 4000 }, }; xmlNode *msg = request->xml; xmlNode *wrapper = pcmk__xe_first_child(msg, PCMK__XE_CRM_XML, NULL, NULL); xmlNode *xml_data = pcmk__xe_first_child(wrapper, NULL, NULL, NULL); static char *last_digest = NULL; static char *filename = NULL; unsigned int seq = 0U; int series_id = 0; int series_wrap = 0; char *digest = NULL; const char *value = NULL; time_t execution_date = time(NULL); xmlNode *converted = NULL; xmlNode *reply = NULL; bool is_repoke = false; bool process = true; pcmk_scheduler_t *scheduler = init_scheduler(); pcmk__ipc_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags, PCMK__XE_ACK, NULL, CRM_EX_INDETERMINATE); digest = pcmk__digest_xml(xml_data, false); converted = pcmk__xml_copy(NULL, xml_data); if (pcmk__update_configured_schema(&converted, true) != pcmk_rc_ok) { scheduler->priv->graph = pcmk__xe_create(NULL, PCMK__XE_TRANSITION_GRAPH); - crm_xml_add_int(scheduler->priv->graph, "transition_id", 0); - crm_xml_add_int(scheduler->priv->graph, PCMK_OPT_CLUSTER_DELAY, 0); + pcmk__xe_set_int(scheduler->priv->graph, "transition_id", 0); + pcmk__xe_set_int(scheduler->priv->graph, PCMK_OPT_CLUSTER_DELAY, 0); process = false; free(digest); } else if (pcmk__str_eq(digest, last_digest, pcmk__str_casei)) { is_repoke = true; free(digest); } else { free(last_digest); last_digest = digest; } if (process) { scheduler->input = converted; pcmk__set_scheduler_flags(scheduler, pcmk__sched_no_counts |pcmk__sched_show_utilization); pcmk__schedule_actions(scheduler); // Don't free converted as part of scheduler scheduler->input = NULL; } // Get appropriate index into series[] array if (pcmk_is_set(scheduler->flags, pcmk__sched_processing_error) || pcmk__config_has_error) { series_id = 0; } else if (pcmk_is_set(scheduler->flags, pcmk__sched_processing_warning) || pcmk__config_has_warning) { series_id = 1; } else { series_id = 2; } value = pcmk__cluster_option(scheduler->priv->options, series[series_id].param); if ((value == NULL) || (pcmk__scan_min_int(value, &series_wrap, -1) != pcmk_rc_ok)) { series_wrap = series[series_id].wrap; } if (pcmk__read_series_sequence(PCMK_SCHEDULER_INPUT_DIR, series[series_id].name, &seq) != pcmk_rc_ok) { // @TODO maybe handle errors better ... seq = 0U; } crm_trace("Series %s: wrap=%d, seq=%u, pref=%s", series[series_id].name, series_wrap, seq, value); reply = pcmk__new_reply(msg, scheduler->priv->graph); if (reply == NULL) { pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR, "Failed building ping reply for client %s", pcmk__client_name(request->ipc_client)); goto done; } if (series_wrap == 0) { // Don't save any inputs of this kind free(filename); filename = NULL; } else if (!is_repoke) { // Input changed, save to disk free(filename); filename = pcmk__series_filename(PCMK_SCHEDULER_INPUT_DIR, series[series_id].name, seq, true); } crm_xml_add(reply, PCMK__XA_CRM_TGRAPH_IN, filename); pcmk__log_transition_summary(scheduler, filename); if (series_wrap == 0) { crm_debug("Not saving input to disk (disabled by configuration)"); } else if (is_repoke) { crm_info("Input has not changed since last time, not saving to disk"); } else { unlink(filename); pcmk__xe_set_time(xml_data, PCMK_XA_EXECUTION_DATE, execution_date); pcmk__xml_write_file(xml_data, filename, true); pcmk__write_series_sequence(PCMK_SCHEDULER_INPUT_DIR, series[series_id].name, ++seq, series_wrap); } pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); done: pcmk__xml_free(converted); pcmk_free_scheduler(scheduler); return reply; } static xmlNode * handle_unknown_request(pcmk__request_t *request) { pcmk__ipc_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags, PCMK__XE_ACK, NULL, CRM_EX_INVALID_PARAM); pcmk__format_result(&request->result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID, "Unknown IPC request type '%s' (bug?)", pcmk__client_name(request->ipc_client)); return NULL; } static xmlNode * handle_hello_request(pcmk__request_t *request) { pcmk__ipc_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags, PCMK__XE_ACK, NULL, CRM_EX_INDETERMINATE); crm_trace("Received IPC hello from %s", pcmk__client_name(request->ipc_client)); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return NULL; } static void schedulerd_register_handlers(void) { pcmk__server_command_t handlers[] = { { CRM_OP_HELLO, handle_hello_request }, { CRM_OP_PECALC, handle_pecalc_request }, { NULL, handle_unknown_request }, }; schedulerd_handlers = pcmk__register_handlers(handlers); } static int32_t pe_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { crm_trace("Connection %p", c); if (pcmk__new_client(c, uid, gid) == NULL) { return -ENOMEM; } return 0; } static int32_t pe_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size) { uint32_t id = 0; uint32_t flags = 0; xmlNode *msg = NULL; pcmk__client_t *c = pcmk__find_client(qbc); const char *sys_to = NULL; CRM_CHECK(c != NULL, return 0); if (schedulerd_handlers == NULL) { schedulerd_register_handlers(); } msg = pcmk__client_data2xml(c, data, &id, &flags); if (msg == NULL) { pcmk__ipc_send_ack(c, id, flags, PCMK__XE_ACK, NULL, CRM_EX_PROTOCOL); return 0; } sys_to = pcmk__xe_get(msg, PCMK__XA_CRM_SYS_TO); if (pcmk__str_eq(pcmk__xe_get(msg, PCMK__XA_SUBT), PCMK__VALUE_RESPONSE, pcmk__str_none)) { pcmk__ipc_send_ack(c, id, flags, PCMK__XE_ACK, NULL, CRM_EX_INDETERMINATE); crm_info("Ignoring IPC reply from %s", pcmk__client_name(c)); } else if (!pcmk__str_eq(sys_to, CRM_SYSTEM_PENGINE, pcmk__str_none)) { pcmk__ipc_send_ack(c, id, flags, PCMK__XE_ACK, NULL, CRM_EX_INDETERMINATE); crm_info("Ignoring invalid IPC message: to '%s' not " CRM_SYSTEM_PENGINE, pcmk__s(sys_to, "")); } else { char *log_msg = NULL; const char *reason = NULL; xmlNode *reply = NULL; pcmk__request_t request = { .ipc_client = c, .ipc_id = id, .ipc_flags = flags, .peer = NULL, .xml = msg, .call_options = 0, .result = PCMK__UNKNOWN_RESULT, }; request.op = pcmk__xe_get_copy(request.xml, PCMK__XA_CRM_TASK); CRM_CHECK(request.op != NULL, return 0); reply = pcmk__process_request(&request, schedulerd_handlers); if (reply != NULL) { pcmk__ipc_send_xml(c, id, reply, crm_ipc_server_event); pcmk__xml_free(reply); } reason = request.result.exit_reason; log_msg = crm_strdup_printf("Processed %s request from %s %s: %s%s%s%s", request.op, pcmk__request_origin_type(&request), pcmk__request_origin(&request), pcmk_exec_status_str(request.result.execution_status), (reason == NULL)? "" : " (", (reason == NULL)? "" : reason, (reason == NULL)? "" : ")"); if (!pcmk__result_ok(&request.result)) { crm_warn("%s", log_msg); } else { crm_debug("%s", log_msg); } free(log_msg); pcmk__reset_request(&request); } pcmk__xml_free(msg); return 0; } /* Error code means? */ static int32_t pe_ipc_closed(qb_ipcs_connection_t * c) { pcmk__client_t *client = pcmk__find_client(c); if (client == NULL) { return 0; } crm_trace("Connection %p", c); pcmk__free_client(client); return 0; } static void pe_ipc_destroy(qb_ipcs_connection_t * c) { crm_trace("Connection %p", c); pe_ipc_closed(c); } struct qb_ipcs_service_handlers ipc_callbacks = { .connection_accept = pe_ipc_accept, .connection_created = NULL, .msg_process = pe_ipc_dispatch, .connection_closed = pe_ipc_closed, .connection_destroyed = pe_ipc_destroy }; diff --git a/include/crm/common/xml_element_internal.h b/include/crm/common/xml_element_internal.h index b9ba2b1323..be5517864e 100644 --- a/include/crm/common/xml_element_internal.h +++ b/include/crm/common/xml_element_internal.h @@ -1,205 +1,206 @@ /* * Copyright 2017-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PCMK__CRM_COMMON_XML_ELEMENT_INTERNAL__H #define PCMK__CRM_COMMON_XML_ELEMENT_INTERNAL__H /* * Internal-only wrappers for and extensions to libxml2 for processing XML * elements */ #include // bool #include // uint32_t #include // NULL #include // strcmp() #include // guint #include // xmlNode, etc. #include // crm_time_t #include // pcmk__str_copy() #include // PCMK_XA_ID #ifdef __cplusplus extern "C" { #endif const char *pcmk__xe_add_last_written(xmlNode *xe); xmlNode *pcmk__xe_first_child(const xmlNode *parent, const char *node_name, const char *attr_n, const char *attr_v); void pcmk__xe_remove_attr(xmlNode *element, const char *name); bool pcmk__xe_remove_attr_cb(xmlNode *xml, void *user_data); void pcmk__xe_remove_matching_attrs(xmlNode *element, bool force, bool (*match)(xmlAttrPtr, void *), void *user_data); int pcmk__xe_delete_match(xmlNode *xml, xmlNode *search); int pcmk__xe_replace_match(xmlNode *xml, xmlNode *replace); int pcmk__xe_update_match(xmlNode *xml, xmlNode *update, uint32_t flags); /*! * \internal * \brief Check whether an XML element is of a particular type * * \param[in] xml XML element to compare * \param[in] name XML element name to compare * * \return \c true if \p xml is of type \p name, otherwise \c false */ static inline bool pcmk__xe_is(const xmlNode *xml, const char *name) { return (xml != NULL) && (xml->name != NULL) && (name != NULL) && (strcmp((const char *) xml->name, name) == 0); } xmlNode *pcmk__xe_create(xmlNode *parent, const char *name); xmlNode *pcmk__xe_next(const xmlNode *node, const char *element_name); void pcmk__xe_set_content(xmlNode *node, const char *format, ...) G_GNUC_PRINTF(2, 3); int pcmk__xe_get_score(const xmlNode *xml, const char *name, int *score, int default_score); int pcmk__xe_copy_attrs(xmlNode *target, const xmlNode *src, uint32_t flags); void pcmk__xe_sort_attrs(xmlNode *xml); void pcmk__xe_set_id(xmlNode *xml, const char *format, ...) G_GNUC_PRINTF(2, 3); /*! * \internal * \brief Like pcmk__xe_set_props, but takes a va_list instead of * arguments directly. * * \param[in,out] node XML to add attributes to * \param[in] pairs NULL-terminated list of name/value pairs to add */ void pcmk__xe_set_propv(xmlNodePtr node, va_list pairs); /*! * \internal * \brief Add a NULL-terminated list of name/value pairs to the given * XML node as properties. * * \param[in,out] node XML node to add properties to * \param[in] ... NULL-terminated list of name/value pairs * * \note A NULL name terminates the arguments; a NULL value will be skipped. */ void pcmk__xe_set_props(xmlNodePtr node, ...) G_GNUC_NULL_TERMINATED; /*! * \internal * \brief Get first attribute of an XML element * * \param[in] xe XML element to check * * \return First attribute of \p xe (or NULL if \p xe is NULL or has none) */ static inline xmlAttr * pcmk__xe_first_attr(const xmlNode *xe) { return (xe == NULL)? NULL : xe->properties; } /*! * \internal * \brief Iterate over child elements of \p xml * * This function iterates over the children of \p xml, performing the * callback function \p handler on each node. If the callback returns * a value other than pcmk_rc_ok, the iteration stops and the value is * returned. It is therefore possible that not all children will be * visited. * * \param[in,out] xml The starting XML node. Can be NULL. * \param[in] child_element_name The name that the node must match in order * for \p handler to be run. If NULL, all * child elements will match. * \param[in] handler The callback function. * \param[in,out] userdata User data to pass to the callback function. * Can be NULL. * * \return Standard Pacemaker return code */ int pcmk__xe_foreach_child(xmlNode *xml, const char *child_element_name, int (*handler)(xmlNode *xml, void *userdata), void *userdata); const char *pcmk__xe_get(const xmlNode *xml, const char *attr_name); int pcmk__xe_get_datetime(const xmlNode *xml, const char *attr, crm_time_t **t); int pcmk__xe_get_flags(const xmlNode *xml, const char *name, uint32_t *dest, uint32_t default_value); int pcmk__xe_get_guint(const xmlNode *xml, const char *attr, guint *dest); void pcmk__xe_set_guint(xmlNode *xml, const char *attr, guint value); int pcmk__xe_get_int(const xmlNode *xml, const char *name, int *dest); +void pcmk__xe_set_int(xmlNode *xml, const char *attr, int value); int pcmk__xe_get_ll(const xmlNode *xml, const char *name, long long *dest); int pcmk__xe_set_ll(xmlNode *xml, const char *attr, long long value); int pcmk__xe_get_time(const xmlNode *xml, const char *attr, time_t *dest); void pcmk__xe_set_time(xmlNode *xml, const char *attr, time_t value); int pcmk__xe_get_timeval(const xmlNode *xml, const char *sec_attr, const char *usec_attr, struct timeval *dest); void pcmk__xe_set_timeval(xmlNode *xml, const char *sec_attr, const char *usec_attr, const struct timeval *value); void pcmk__xe_set_bool_attr(xmlNodePtr node, const char *name, bool value); int pcmk__xe_get_bool_attr(const xmlNode *node, const char *name, bool *value); bool pcmk__xe_attr_is_true(const xmlNode *node, const char *name); /*! * \internal * \brief Retrieve a copy of the value of an XML attribute * * This is like \c pcmk__xe_get() but allocates new memory for the result. * * \param[in] xml XML element whose attribute to get * \param[in] attr Attribute name * * \return Value of specified attribute (or \c NULL if not set) * * \note The caller is responsible for freeing the result using \c free(). */ static inline char * pcmk__xe_get_copy(const xmlNode *xml, const char *attr) { return pcmk__str_copy(pcmk__xe_get(xml, attr)); } /*! * \internal * \brief Retrieve the value of the \c PCMK_XA_ID XML attribute * * \param[in] xml XML element to check * * \return Value of the \c PCMK_XA_ID attribute (may be \c NULL) */ static inline const char * pcmk__xe_id(const xmlNode *xml) { return pcmk__xe_get(xml, PCMK_XA_ID); } #ifdef __cplusplus } #endif #endif // PCMK__CRM_COMMON_XML_ELEMENT_INTERNAL__H diff --git a/lib/cib/cib_native.c b/lib/cib/cib_native.c index 3df5e72649..1b78f11400 100644 --- a/lib/cib/cib_native.c +++ b/lib/cib/cib_native.c @@ -1,485 +1,485 @@ /* * Copyright 2004 International Business Machines * Later changes copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include typedef struct cib_native_opaque_s { char *token; crm_ipc_t *ipc; void (*dnotify_fn) (gpointer user_data); mainloop_io_t *source; } cib_native_opaque_t; static int cib_native_perform_op_delegate(cib_t *cib, const char *op, const char *host, const char *section, xmlNode *data, xmlNode **output_data, int call_options, const char *user_name) { int rc = pcmk_ok; int reply_id = 0; enum crm_ipc_flags ipc_flags = crm_ipc_flags_none; xmlNode *op_msg = NULL; xmlNode *op_reply = NULL; cib_native_opaque_t *native = cib->variant_opaque; if (cib->state == cib_disconnected) { return -ENOTCONN; } if (output_data != NULL) { *output_data = NULL; } if (op == NULL) { crm_err("No operation specified"); return -EINVAL; } if (call_options & cib_sync_call) { pcmk__set_ipc_flags(ipc_flags, "client", crm_ipc_client_response); } rc = cib__create_op(cib, op, host, section, data, call_options, user_name, NULL, &op_msg); if (rc != pcmk_ok) { return rc; } if (pcmk_is_set(call_options, cib_transaction)) { rc = cib__extend_transaction(cib, op_msg); goto done; } crm_trace("Sending %s message to the CIB manager (timeout=%ds)", op, cib->call_timeout); rc = crm_ipc_send(native->ipc, op_msg, ipc_flags, cib->call_timeout * 1000, &op_reply); if (rc < 0) { crm_err("Couldn't perform %s operation (timeout=%ds): %s (%d)", op, cib->call_timeout, pcmk_strerror(rc), rc); rc = -ECOMM; goto done; } crm_log_xml_trace(op_reply, "Reply"); if (!(call_options & cib_sync_call)) { crm_trace("Async call, returning %d", cib->call_id); CRM_CHECK(cib->call_id != 0, rc = -ENOMSG; goto done); rc = cib->call_id; goto done; } rc = pcmk_ok; pcmk__xe_get_int(op_reply, PCMK__XA_CIB_CALLID, &reply_id); if (reply_id == cib->call_id) { xmlNode *wrapper = pcmk__xe_first_child(op_reply, PCMK__XE_CIB_CALLDATA, NULL, NULL); xmlNode *tmp = pcmk__xe_first_child(wrapper, NULL, NULL, NULL); crm_trace("Synchronous reply %d received", reply_id); if (pcmk__xe_get_int(op_reply, PCMK__XA_CIB_RC, &rc) != pcmk_rc_ok) { rc = -EPROTO; } if (output_data == NULL || (call_options & cib_discard_reply)) { crm_trace("Discarding reply"); } else { *output_data = pcmk__xml_copy(NULL, tmp); } } else if (reply_id <= 0) { crm_err("Received bad reply: No id set"); crm_log_xml_err(op_reply, "Bad reply"); rc = -ENOMSG; goto done; } else { crm_err("Received bad reply: %d (wanted %d)", reply_id, cib->call_id); crm_log_xml_err(op_reply, "Old reply"); rc = -ENOMSG; goto done; } if (op_reply == NULL && cib->state == cib_disconnected) { rc = -ENOTCONN; } else if (rc == pcmk_ok && op_reply == NULL) { rc = -ETIME; } switch (rc) { case pcmk_ok: case -EPERM: break; /* This is an internal value that clients do not and should not care about */ case -pcmk_err_diff_resync: rc = pcmk_ok; break; /* These indicate internal problems */ case -EPROTO: case -ENOMSG: crm_err("Call failed: %s", pcmk_strerror(rc)); if (op_reply) { crm_log_xml_err(op_reply, "Invalid reply"); } break; default: if (!pcmk__str_eq(op, PCMK__CIB_REQUEST_QUERY, pcmk__str_none)) { crm_warn("Call failed: %s", pcmk_strerror(rc)); } } done: if (!crm_ipc_connected(native->ipc)) { crm_err("The CIB manager disconnected"); cib->state = cib_disconnected; } pcmk__xml_free(op_msg); pcmk__xml_free(op_reply); return rc; } static int cib_native_dispatch_internal(const char *buffer, ssize_t length, gpointer userdata) { const char *type = NULL; xmlNode *msg = NULL; cib_t *cib = userdata; crm_trace("dispatching %p", userdata); if (cib == NULL) { crm_err("No CIB!"); return 0; } msg = pcmk__xml_parse(buffer); if (msg == NULL) { crm_warn("Received a NULL message from the CIB manager"); return 0; } /* do callbacks */ type = pcmk__xe_get(msg, PCMK__XA_T); crm_trace("Activating %s callbacks...", type); crm_log_xml_explicit(msg, "cib-reply"); if (pcmk__str_eq(type, PCMK__VALUE_CIB, pcmk__str_none)) { cib_native_callback(cib, msg, 0, 0); } else if (pcmk__str_eq(type, PCMK__VALUE_CIB_NOTIFY, pcmk__str_none)) { g_list_foreach(cib->notify_list, cib_native_notify, msg); } else { crm_err("Unknown message type: %s", type); } pcmk__xml_free(msg); return 0; } static void cib_native_destroy(void *userdata) { cib_t *cib = userdata; cib_native_opaque_t *native = cib->variant_opaque; crm_trace("destroying %p", userdata); cib->state = cib_disconnected; native->source = NULL; native->ipc = NULL; if (native->dnotify_fn) { native->dnotify_fn(userdata); } } static int cib_native_signoff(cib_t *cib) { cib_native_opaque_t *native = cib->variant_opaque; crm_debug("Disconnecting from the CIB manager"); cib_free_notify(cib); remove_cib_op_callback(0, TRUE); if (native->source != NULL) { /* Attached to mainloop */ mainloop_del_ipc_client(native->source); native->source = NULL; native->ipc = NULL; } else if (native->ipc) { /* Not attached to mainloop */ crm_ipc_t *ipc = native->ipc; native->ipc = NULL; crm_ipc_close(ipc); crm_ipc_destroy(ipc); } cib->cmds->end_transaction(cib, false, cib_none); cib->state = cib_disconnected; cib->type = cib_no_connection; return pcmk_ok; } static int cib_native_signon(cib_t *cib, const char *name, enum cib_conn_type type) { int rc = pcmk_ok; const char *channel = NULL; cib_native_opaque_t *native = cib->variant_opaque; xmlNode *hello = NULL; struct ipc_client_callbacks cib_callbacks = { .dispatch = cib_native_dispatch_internal, .destroy = cib_native_destroy }; if (name == NULL) { name = pcmk__s(crm_system_name, "client"); } cib->call_timeout = PCMK__IPC_TIMEOUT; if (type == cib_command) { cib->state = cib_connected_command; channel = PCMK__SERVER_BASED_RW; } else if (type == cib_command_nonblocking) { cib->state = cib_connected_command; channel = PCMK__SERVER_BASED_SHM; } else if (type == cib_query) { cib->state = cib_connected_query; channel = PCMK__SERVER_BASED_RO; } else { return -ENOTCONN; } crm_trace("Connecting %s channel", channel); native->source = mainloop_add_ipc_client(channel, G_PRIORITY_HIGH, 512 * 1024, cib, &cib_callbacks); native->ipc = mainloop_get_ipc_client(native->source); if (rc != pcmk_ok || native->ipc == NULL || !crm_ipc_connected(native->ipc)) { crm_info("Could not connect to CIB manager for %s", name); rc = -ENOTCONN; } if (rc == pcmk_ok) { rc = cib__create_op(cib, CRM_OP_REGISTER, NULL, NULL, NULL, cib_sync_call, NULL, name, &hello); } if (rc == pcmk_ok) { xmlNode *reply = NULL; if (crm_ipc_send(native->ipc, hello, crm_ipc_client_response, -1, &reply) > 0) { const char *msg_type = pcmk__xe_get(reply, PCMK__XA_CIB_OP); crm_log_xml_trace(reply, "reg-reply"); if (!pcmk__str_eq(msg_type, CRM_OP_REGISTER, pcmk__str_casei)) { crm_info("Reply to CIB registration message has unknown type " "'%s'", msg_type); rc = -EPROTO; } else { native->token = pcmk__xe_get_copy(reply, PCMK__XA_CIB_CLIENTID); if (native->token == NULL) { rc = -EPROTO; } } pcmk__xml_free(reply); } else { rc = -ECOMM; } pcmk__xml_free(hello); } if (rc == pcmk_ok) { crm_info("Successfully connected to CIB manager for %s", name); return pcmk_ok; } crm_info("Connection to CIB manager for %s failed: %s", name, pcmk_strerror(rc)); cib_native_signoff(cib); return rc; } static int cib_native_free(cib_t *cib) { int rc = pcmk_ok; if (cib->state != cib_disconnected) { rc = cib_native_signoff(cib); } if (cib->state == cib_disconnected) { cib_native_opaque_t *native = cib->variant_opaque; free(native->token); free(cib->variant_opaque); free(cib->cmds); free(cib->user); free(cib); } return rc; } static int cib_native_register_notification(cib_t *cib, const char *callback, int enabled) { int rc = pcmk_ok; xmlNode *notify_msg = pcmk__xe_create(NULL, PCMK__XE_CIB_CALLBACK); cib_native_opaque_t *native = cib->variant_opaque; if (cib->state != cib_disconnected) { crm_xml_add(notify_msg, PCMK__XA_CIB_OP, PCMK__VALUE_CIB_NOTIFY); crm_xml_add(notify_msg, PCMK__XA_CIB_NOTIFY_TYPE, callback); - crm_xml_add_int(notify_msg, PCMK__XA_CIB_NOTIFY_ACTIVATE, enabled); + pcmk__xe_set_int(notify_msg, PCMK__XA_CIB_NOTIFY_ACTIVATE, enabled); rc = crm_ipc_send(native->ipc, notify_msg, crm_ipc_client_response, 1000 * cib->call_timeout, NULL); if (rc <= 0) { crm_trace("Notification not registered: %d", rc); rc = -ECOMM; } } pcmk__xml_free(notify_msg); return rc; } static int cib_native_set_connection_dnotify(cib_t *cib, void (*dnotify) (gpointer user_data)) { cib_native_opaque_t *native = NULL; if (cib == NULL) { crm_err("No CIB!"); return FALSE; } native = cib->variant_opaque; native->dnotify_fn = dnotify; return pcmk_ok; } /*! * \internal * \brief Get the given CIB connection's unique client identifier * * These can be used to check whether this client requested the action that * triggered a CIB notification. * * \param[in] cib CIB connection * \param[out] async_id If not \p NULL, where to store asynchronous client ID * \param[out] sync_id If not \p NULL, where to store synchronous client ID * * \return Legacy Pacemaker return code (specifically, \p pcmk_ok) * * \note This is the \p cib_native variant implementation of * \p cib_api_operations_t:client_id(). * \note For \p cib_native objects, \p async_id and \p sync_id are the same. * \note The client ID is assigned during CIB sign-on. */ static int cib_native_client_id(const cib_t *cib, const char **async_id, const char **sync_id) { cib_native_opaque_t *native = cib->variant_opaque; if (async_id != NULL) { *async_id = native->token; } if (sync_id != NULL) { *sync_id = native->token; } return pcmk_ok; } cib_t * cib_native_new(void) { cib_native_opaque_t *native = NULL; cib_t *cib = cib_new_variant(); if (cib == NULL) { return NULL; } native = calloc(1, sizeof(cib_native_opaque_t)); if (native == NULL) { free(cib); return NULL; } cib->variant = cib_native; cib->variant_opaque = native; native->ipc = NULL; native->source = NULL; native->dnotify_fn = NULL; /* assign variant specific ops */ cib->delegate_fn = cib_native_perform_op_delegate; cib->cmds->signon = cib_native_signon; cib->cmds->signoff = cib_native_signoff; cib->cmds->free = cib_native_free; cib->cmds->register_notification = cib_native_register_notification; cib->cmds->set_connection_dnotify = cib_native_set_connection_dnotify; cib->cmds->client_id = cib_native_client_id; return cib; } diff --git a/lib/cib/cib_remote.c b/lib/cib/cib_remote.c index 5d547e962d..ea838595a0 100644 --- a/lib/cib/cib_remote.c +++ b/lib/cib/cib_remote.c @@ -1,658 +1,658 @@ /* * Copyright 2008-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include // GnuTLS handshake timeout in seconds #define TLS_HANDSHAKE_TIMEOUT 5 static pcmk__tls_t *tls = NULL; #include typedef struct cib_remote_opaque_s { int port; char *server; char *user; char *passwd; gboolean encrypted; pcmk__remote_t command; pcmk__remote_t callback; pcmk__output_t *out; time_t start_time; int timeout_sec; } cib_remote_opaque_t; static int cib_remote_perform_op(cib_t *cib, const char *op, const char *host, const char *section, xmlNode *data, xmlNode **output_data, int call_options, const char *user_name) { int rc; int remaining_time = 0; time_t start_time; xmlNode *op_msg = NULL; xmlNode *op_reply = NULL; cib_remote_opaque_t *private = cib->variant_opaque; if (cib->state == cib_disconnected) { return -ENOTCONN; } if (output_data != NULL) { *output_data = NULL; } if (op == NULL) { crm_err("No operation specified"); return -EINVAL; } rc = cib__create_op(cib, op, host, section, data, call_options, user_name, NULL, &op_msg); if (rc != pcmk_ok) { return rc; } if (pcmk_is_set(call_options, cib_transaction)) { rc = cib__extend_transaction(cib, op_msg); pcmk__xml_free(op_msg); return rc; } crm_trace("Sending %s message to the CIB manager", op); if (!(call_options & cib_sync_call)) { pcmk__remote_send_xml(&private->callback, op_msg); } else { pcmk__remote_send_xml(&private->command, op_msg); } pcmk__xml_free(op_msg); if ((call_options & cib_discard_reply)) { crm_trace("Discarding reply"); return pcmk_ok; } else if (!(call_options & cib_sync_call)) { return cib->call_id; } crm_trace("Waiting for a synchronous reply"); start_time = time(NULL); remaining_time = cib->call_timeout ? cib->call_timeout : 60; rc = pcmk_rc_ok; while (remaining_time > 0 && (rc != ENOTCONN)) { int reply_id = -1; int msg_id = cib->call_id; rc = pcmk__read_remote_message(&private->command, remaining_time * 1000); op_reply = pcmk__remote_message_xml(&private->command); if (!op_reply) { break; } pcmk__xe_get_int(op_reply, PCMK__XA_CIB_CALLID, &reply_id); if (reply_id == msg_id) { break; } else if (reply_id < msg_id) { crm_debug("Received old reply: %d (wanted %d)", reply_id, msg_id); crm_log_xml_trace(op_reply, "Old reply"); } else if ((reply_id - 10000) > msg_id) { /* wrap-around case */ crm_debug("Received old reply: %d (wanted %d)", reply_id, msg_id); crm_log_xml_trace(op_reply, "Old reply"); } else { crm_err("Received a __future__ reply:" " %d (wanted %d)", reply_id, msg_id); } pcmk__xml_free(op_reply); op_reply = NULL; /* wasn't the right reply, try and read some more */ remaining_time = time(NULL) - start_time; } if (rc == ENOTCONN) { crm_err("Disconnected while waiting for reply."); return -ENOTCONN; } else if (op_reply == NULL) { crm_err("No reply message - empty"); return -ENOMSG; } crm_trace("Synchronous reply received"); /* Start processing the reply... */ if (pcmk__xe_get_int(op_reply, PCMK__XA_CIB_RC, &rc) != pcmk_rc_ok) { rc = -EPROTO; } if (rc == -pcmk_err_diff_resync) { /* This is an internal value that clients do not and should not care about */ rc = pcmk_ok; } if (rc == pcmk_ok || rc == -EPERM) { crm_log_xml_debug(op_reply, "passed"); } else { crm_err("Call failed: %s", pcmk_strerror(rc)); crm_log_xml_warn(op_reply, "failed"); } if (output_data == NULL) { /* do nothing more */ } else if (!(call_options & cib_discard_reply)) { xmlNode *wrapper = pcmk__xe_first_child(op_reply, PCMK__XE_CIB_CALLDATA, NULL, NULL); xmlNode *tmp = pcmk__xe_first_child(wrapper, NULL, NULL, NULL); if (tmp == NULL) { crm_trace("No output in reply to \"%s\" command %d", op, cib->call_id - 1); } else { *output_data = pcmk__xml_copy(NULL, tmp); } } pcmk__xml_free(op_reply); return rc; } static int cib_remote_callback_dispatch(gpointer user_data) { int rc; cib_t *cib = user_data; cib_remote_opaque_t *private = cib->variant_opaque; xmlNode *msg = NULL; const char *type = NULL; /* If start time is 0, we've previously handled a complete message and this * connection is being reused for a new message. Reset the start_time, * giving this new message timeout_sec from now to complete. */ if (private->start_time == 0) { private->start_time = time(NULL); } rc = pcmk__read_available_remote_data(&private->callback); switch (rc) { case pcmk_rc_ok: /* We have the whole message so process it */ break; case EAGAIN: /* Have we timed out? */ if (time(NULL) >= private->start_time + private->timeout_sec) { crm_info("Error reading from CIB manager connection: %s", pcmk_rc_str(ETIME)); return -1; } /* We haven't read the whole message yet */ return 0; default: /* Error */ crm_info("Error reading from CIB manager connection: %s", pcmk_rc_str(rc)); return -1; } msg = pcmk__remote_message_xml(&private->callback); if (msg == NULL) { private->start_time = 0; return 0; } type = pcmk__xe_get(msg, PCMK__XA_T); crm_trace("Activating %s callbacks...", type); if (pcmk__str_eq(type, PCMK__VALUE_CIB, pcmk__str_none)) { cib_native_callback(cib, msg, 0, 0); } else if (pcmk__str_eq(type, PCMK__VALUE_CIB_NOTIFY, pcmk__str_none)) { g_list_foreach(cib->notify_list, cib_native_notify, msg); } else { crm_err("Unknown message type: %s", type); } pcmk__xml_free(msg); private->start_time = 0; return 0; } static int cib_remote_command_dispatch(gpointer user_data) { int rc; cib_t *cib = user_data; cib_remote_opaque_t *private = cib->variant_opaque; /* See cib_remote_callback_dispatch */ if (private->start_time == 0) { private->start_time = time(NULL); } rc = pcmk__read_available_remote_data(&private->command); if (rc == EAGAIN) { /* Have we timed out? */ if (time(NULL) >= private->start_time + private->timeout_sec) { crm_info("Error reading from CIB manager connection: %s", pcmk_rc_str(ETIME)); return -1; } /* We haven't read the whole message yet */ return 0; } free(private->command.buffer); private->command.buffer = NULL; crm_err("received late reply for remote cib connection, discarding"); if (rc != pcmk_rc_ok) { crm_info("Error reading from CIB manager connection: %s", pcmk_rc_str(rc)); return -1; } private->start_time = 0; return 0; } static int cib_tls_close(cib_t *cib) { cib_remote_opaque_t *private = cib->variant_opaque; if (private->encrypted) { if (private->command.tls_session) { gnutls_bye(private->command.tls_session, GNUTLS_SHUT_RDWR); gnutls_deinit(private->command.tls_session); } if (private->callback.tls_session) { gnutls_bye(private->callback.tls_session, GNUTLS_SHUT_RDWR); gnutls_deinit(private->callback.tls_session); } private->command.tls_session = NULL; private->callback.tls_session = NULL; pcmk__free_tls(tls); tls = NULL; } if (private->command.tcp_socket >= 0) { shutdown(private->command.tcp_socket, SHUT_RDWR); /* no more receptions */ close(private->command.tcp_socket); } if (private->callback.tcp_socket >= 0) { shutdown(private->callback.tcp_socket, SHUT_RDWR); /* no more receptions */ close(private->callback.tcp_socket); } private->command.tcp_socket = -1; private->callback.tcp_socket = -1; free(private->command.buffer); free(private->callback.buffer); private->command.buffer = NULL; private->callback.buffer = NULL; return 0; } static void cib_remote_connection_destroy(gpointer user_data) { crm_err("Connection destroyed"); cib_tls_close(user_data); } static int cib_tls_signon(cib_t *cib, pcmk__remote_t *connection, gboolean event_channel) { cib_remote_opaque_t *private = cib->variant_opaque; int rc; xmlNode *answer = NULL; xmlNode *login = NULL; static struct mainloop_fd_callbacks cib_fd_callbacks = { 0, }; cib_fd_callbacks.dispatch = event_channel ? cib_remote_callback_dispatch : cib_remote_command_dispatch; cib_fd_callbacks.destroy = cib_remote_connection_destroy; connection->tcp_socket = -1; connection->tls_session = NULL; rc = pcmk__connect_remote(private->server, private->port, 0, NULL, &(connection->tcp_socket), NULL, NULL); if (rc != pcmk_rc_ok) { crm_info("Remote connection to %s:%d failed: %s " QB_XS " rc=%d", private->server, private->port, pcmk_rc_str(rc), rc); return -ENOTCONN; } if (private->encrypted) { bool use_cert = pcmk__x509_enabled(); int tls_rc = GNUTLS_E_SUCCESS; rc = pcmk__init_tls(&tls, false, use_cert ? GNUTLS_CRD_CERTIFICATE : GNUTLS_CRD_ANON); if (rc != pcmk_rc_ok) { return -1; } /* bind the socket to GnuTls lib */ connection->tls_session = pcmk__new_tls_session(tls, connection->tcp_socket); if (connection->tls_session == NULL) { cib_tls_close(cib); return -1; } rc = pcmk__tls_client_handshake(connection, TLS_HANDSHAKE_TIMEOUT, &tls_rc); if (rc != pcmk_rc_ok) { crm_err("Remote CIB session creation for %s:%d failed: %s", private->server, private->port, (rc == EPROTO)? gnutls_strerror(tls_rc) : pcmk_rc_str(rc)); gnutls_deinit(connection->tls_session); connection->tls_session = NULL; cib_tls_close(cib); return -1; } } /* Now that the handshake is done, see if any client TLS certificate is * close to its expiration date and log if so. If a TLS certificate is not * in use, this function will just return so we don't need to check for the * session type here. */ pcmk__tls_check_cert_expiration(connection->tls_session); /* login to server */ login = pcmk__xe_create(NULL, PCMK__XE_CIB_COMMAND); crm_xml_add(login, PCMK_XA_OP, "authenticate"); crm_xml_add(login, PCMK_XA_USER, private->user); crm_xml_add(login, PCMK__XA_PASSWORD, private->passwd); crm_xml_add(login, PCMK__XA_HIDDEN, PCMK__VALUE_PASSWORD); pcmk__remote_send_xml(connection, login); pcmk__xml_free(login); rc = pcmk_ok; if (pcmk__read_remote_message(connection, -1) == ENOTCONN) { rc = -ENOTCONN; } answer = pcmk__remote_message_xml(connection); crm_log_xml_trace(answer, "Reply"); if (answer == NULL) { rc = -EPROTO; } else { /* grab the token */ const char *msg_type = pcmk__xe_get(answer, PCMK__XA_CIB_OP); const char *tmp_ticket = pcmk__xe_get(answer, PCMK__XA_CIB_CLIENTID); if (!pcmk__str_eq(msg_type, CRM_OP_REGISTER, pcmk__str_casei)) { crm_err("Invalid registration message: %s", msg_type); rc = -EPROTO; } else if (tmp_ticket == NULL) { rc = -EPROTO; } else { connection->token = strdup(tmp_ticket); } } pcmk__xml_free(answer); answer = NULL; if (rc != 0) { cib_tls_close(cib); return rc; } crm_trace("remote client connection established"); private->timeout_sec = 60; connection->source = mainloop_add_fd("cib-remote", G_PRIORITY_HIGH, connection->tcp_socket, cib, &cib_fd_callbacks); return rc; } static int cib_remote_signon(cib_t *cib, const char *name, enum cib_conn_type type) { int rc = pcmk_ok; cib_remote_opaque_t *private = cib->variant_opaque; if (name == NULL) { name = pcmk__s(crm_system_name, "client"); } if (private->passwd == NULL) { if (private->out == NULL) { /* If no pcmk__output_t is set, just assume that a text prompt * is good enough. */ pcmk__text_prompt("Password", false, &(private->passwd)); } else { private->out->prompt("Password", false, &(private->passwd)); } } if (private->server == NULL || private->user == NULL) { rc = -EINVAL; goto done; } rc = cib_tls_signon(cib, &(private->command), FALSE); if (rc != pcmk_ok) { goto done; } rc = cib_tls_signon(cib, &(private->callback), TRUE); done: if (rc == pcmk_ok) { crm_info("Opened connection to %s:%d for %s", private->server, private->port, name); cib->state = cib_connected_command; cib->type = cib_command; } else { crm_info("Connection to %s:%d for %s failed: %s\n", private->server, private->port, name, pcmk_strerror(rc)); } return rc; } static int cib_remote_signoff(cib_t *cib) { int rc = pcmk_ok; crm_debug("Disconnecting from the CIB manager"); cib_tls_close(cib); cib->cmds->end_transaction(cib, false, cib_none); cib->state = cib_disconnected; cib->type = cib_no_connection; return rc; } static int cib_remote_free(cib_t *cib) { int rc = pcmk_ok; crm_warn("Freeing CIB"); if (cib->state != cib_disconnected) { rc = cib_remote_signoff(cib); if (rc == pcmk_ok) { cib_remote_opaque_t *private = cib->variant_opaque; free(private->server); free(private->user); free(private->passwd); free(cib->cmds); free(cib->user); free(private); free(cib); } } return rc; } static int cib_remote_register_notification(cib_t * cib, const char *callback, int enabled) { xmlNode *notify_msg = pcmk__xe_create(NULL, PCMK__XE_CIB_COMMAND); cib_remote_opaque_t *private = cib->variant_opaque; crm_xml_add(notify_msg, PCMK__XA_CIB_OP, PCMK__VALUE_CIB_NOTIFY); crm_xml_add(notify_msg, PCMK__XA_CIB_NOTIFY_TYPE, callback); - crm_xml_add_int(notify_msg, PCMK__XA_CIB_NOTIFY_ACTIVATE, enabled); + pcmk__xe_set_int(notify_msg, PCMK__XA_CIB_NOTIFY_ACTIVATE, enabled); pcmk__remote_send_xml(&private->callback, notify_msg); pcmk__xml_free(notify_msg); return pcmk_ok; } static int cib_remote_set_connection_dnotify(cib_t * cib, void (*dnotify) (gpointer user_data)) { return -EPROTONOSUPPORT; } /*! * \internal * \brief Get the given CIB connection's unique client identifiers * * These can be used to check whether this client requested the action that * triggered a CIB notification. * * \param[in] cib CIB connection * \param[out] async_id If not \p NULL, where to store asynchronous client ID * \param[out] sync_id If not \p NULL, where to store synchronous client ID * * \return Legacy Pacemaker return code (specifically, \p pcmk_ok) * * \note This is the \p cib_remote variant implementation of * \p cib_api_operations_t:client_id(). * \note The client IDs are assigned during CIB sign-on. */ static int cib_remote_client_id(const cib_t *cib, const char **async_id, const char **sync_id) { cib_remote_opaque_t *private = cib->variant_opaque; if (async_id != NULL) { // private->callback is the channel for async requests *async_id = private->callback.token; } if (sync_id != NULL) { // private->command is the channel for sync requests *sync_id = private->command.token; } return pcmk_ok; } cib_t * cib_remote_new(const char *server, const char *user, const char *passwd, int port, gboolean encrypted) { cib_remote_opaque_t *private = NULL; cib_t *cib = cib_new_variant(); if (cib == NULL) { return NULL; } private = calloc(1, sizeof(cib_remote_opaque_t)); if (private == NULL) { free(cib); return NULL; } cib->variant = cib_remote; cib->variant_opaque = private; private->server = pcmk__str_copy(server); private->user = pcmk__str_copy(user); private->passwd = pcmk__str_copy(passwd); private->port = port; private->encrypted = encrypted; /* assign variant specific ops */ cib->delegate_fn = cib_remote_perform_op; cib->cmds->signon = cib_remote_signon; cib->cmds->signoff = cib_remote_signoff; cib->cmds->free = cib_remote_free; cib->cmds->register_notification = cib_remote_register_notification; cib->cmds->set_connection_dnotify = cib_remote_set_connection_dnotify; cib->cmds->client_id = cib_remote_client_id; return cib; } void cib__set_output(cib_t *cib, pcmk__output_t *out) { cib_remote_opaque_t *private; if (cib->variant != cib_remote) { return; } private = cib->variant_opaque; private->out = out; } diff --git a/lib/cib/cib_utils.c b/lib/cib/cib_utils.c index b3c29eaa9e..f2ae4c45ca 100644 --- a/lib/cib/cib_utils.c +++ b/lib/cib/cib_utils.c @@ -1,955 +1,955 @@ /* * Original copyright 2004 International Business Machines * Later changes copyright 2008-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include gboolean cib_version_details(xmlNode * cib, int *admin_epoch, int *epoch, int *updates) { *epoch = -1; *updates = -1; *admin_epoch = -1; if (cib == NULL) { return FALSE; } pcmk__xe_get_int(cib, PCMK_XA_EPOCH, epoch); pcmk__xe_get_int(cib, PCMK_XA_NUM_UPDATES, updates); pcmk__xe_get_int(cib, PCMK_XA_ADMIN_EPOCH, admin_epoch); return TRUE; } gboolean cib_diff_version_details(xmlNode * diff, int *admin_epoch, int *epoch, int *updates, int *_admin_epoch, int *_epoch, int *_updates) { int add[] = { 0, 0, 0 }; int del[] = { 0, 0, 0 }; pcmk__xml_patchset_versions(diff, del, add); *admin_epoch = add[0]; *epoch = add[1]; *updates = add[2]; *_admin_epoch = del[0]; *_epoch = del[1]; *_updates = del[2]; return TRUE; } /*! * \internal * \brief Get the XML patchset from a CIB diff notification * * \param[in] msg CIB diff notification * \param[out] patchset Where to store XML patchset * * \return Standard Pacemaker return code */ int cib__get_notify_patchset(const xmlNode *msg, const xmlNode **patchset) { int rc = pcmk_err_generic; xmlNode *wrapper = NULL; pcmk__assert(patchset != NULL); *patchset = NULL; if (msg == NULL) { crm_err("CIB diff notification received with no XML"); return ENOMSG; } if ((pcmk__xe_get_int(msg, PCMK__XA_CIB_RC, &rc) != pcmk_rc_ok) || (rc != pcmk_ok)) { crm_warn("Ignore failed CIB update: %s " QB_XS " rc=%d", pcmk_strerror(rc), rc); crm_log_xml_debug(msg, "failed"); return pcmk_legacy2rc(rc); } wrapper = pcmk__xe_first_child(msg, PCMK__XE_CIB_UPDATE_RESULT, NULL, NULL); *patchset = pcmk__xe_first_child(wrapper, NULL, NULL, NULL); if (*patchset == NULL) { crm_err("CIB diff notification received with no patchset"); return ENOMSG; } return pcmk_rc_ok; } /*! * \brief Create XML for a new (empty) CIB * * \param[in] cib_epoch What to use as \c PCMK_XA_EPOCH CIB attribute * * \return Newly created XML for empty CIB * * \note It is the caller's responsibility to free the result with * \c pcmk__xml_free(). */ xmlNode * createEmptyCib(int cib_epoch) { xmlNode *cib_root = NULL, *config = NULL; cib_root = pcmk__xe_create(NULL, PCMK_XE_CIB); crm_xml_add(cib_root, PCMK_XA_CRM_FEATURE_SET, CRM_FEATURE_SET); crm_xml_add(cib_root, PCMK_XA_VALIDATE_WITH, pcmk__highest_schema_name()); - crm_xml_add_int(cib_root, PCMK_XA_EPOCH, cib_epoch); - crm_xml_add_int(cib_root, PCMK_XA_NUM_UPDATES, 0); - crm_xml_add_int(cib_root, PCMK_XA_ADMIN_EPOCH, 0); + pcmk__xe_set_int(cib_root, PCMK_XA_EPOCH, cib_epoch); + pcmk__xe_set_int(cib_root, PCMK_XA_NUM_UPDATES, 0); + pcmk__xe_set_int(cib_root, PCMK_XA_ADMIN_EPOCH, 0); config = pcmk__xe_create(cib_root, PCMK_XE_CONFIGURATION); pcmk__xe_create(cib_root, PCMK_XE_STATUS); pcmk__xe_create(config, PCMK_XE_CRM_CONFIG); pcmk__xe_create(config, PCMK_XE_NODES); pcmk__xe_create(config, PCMK_XE_RESOURCES); pcmk__xe_create(config, PCMK_XE_CONSTRAINTS); #if PCMK__RESOURCE_STICKINESS_DEFAULT != 0 { xmlNode *rsc_defaults = pcmk__xe_create(config, PCMK_XE_RSC_DEFAULTS); xmlNode *meta = pcmk__xe_create(rsc_defaults, PCMK_XE_META_ATTRIBUTES); xmlNode *nvpair = pcmk__xe_create(meta, PCMK_XE_NVPAIR); crm_xml_add(meta, PCMK_XA_ID, "build-resource-defaults"); crm_xml_add(nvpair, PCMK_XA_ID, "build-" PCMK_META_RESOURCE_STICKINESS); crm_xml_add(nvpair, PCMK_XA_NAME, PCMK_META_RESOURCE_STICKINESS); - crm_xml_add_int(nvpair, PCMK_XA_VALUE, - PCMK__RESOURCE_STICKINESS_DEFAULT); + pcmk__xe_set_int(nvpair, PCMK_XA_VALUE, + PCMK__RESOURCE_STICKINESS_DEFAULT); } #endif return cib_root; } static bool cib_acl_enabled(xmlNode *xml, const char *user) { bool rc = FALSE; if(pcmk_acl_required(user)) { const char *value = NULL; GHashTable *options = pcmk__strkey_table(free, free); cib_read_config(options, xml); value = pcmk__cluster_option(options, PCMK_OPT_ENABLE_ACL); rc = crm_is_true(value); g_hash_table_destroy(options); } crm_trace("CIB ACL is %s", rc ? "enabled" : "disabled"); return rc; } /*! * \internal * \brief Determine whether to perform operations on a scratch copy of the CIB * * \param[in] op CIB operation * \param[in] section CIB section * \param[in] call_options CIB call options * * \return \p true if we should make a copy of the CIB, or \p false otherwise */ static bool should_copy_cib(const char *op, const char *section, int call_options) { if (pcmk_is_set(call_options, cib_dryrun)) { // cib_dryrun implies a scratch copy by definition; no side effects return true; } if (pcmk__str_eq(op, PCMK__CIB_REQUEST_COMMIT_TRANSACT, pcmk__str_none)) { /* Commit-transaction must make a copy for atomicity. We must revert to * the original CIB if the entire transaction cannot be applied * successfully. */ return true; } if (pcmk_is_set(call_options, cib_transaction)) { /* If cib_transaction is set, then we're in the process of committing a * transaction. The commit-transaction request already made a scratch * copy, and we're accumulating changes in that copy. */ return false; } if (pcmk__str_eq(section, PCMK_XE_STATUS, pcmk__str_none)) { /* Copying large CIBs accounts for a huge percentage of our CIB usage, * and this avoids some of it. * * @TODO: Is this safe? See discussion at * https://github.com/ClusterLabs/pacemaker/pull/3094#discussion_r1211400690. */ return false; } // Default behavior is to operate on a scratch copy return true; } int cib_perform_op(cib_t *cib, const char *op, uint32_t call_options, cib__op_fn_t fn, bool is_query, const char *section, xmlNode *req, xmlNode *input, bool manage_counters, bool *config_changed, xmlNode **current_cib, xmlNode **result_cib, xmlNode **diff, xmlNode **output) { int rc = pcmk_ok; bool check_schema = true; bool make_copy = true; xmlNode *top = NULL; xmlNode *scratch = NULL; xmlNode *patchset_cib = NULL; xmlNode *local_diff = NULL; const char *user = pcmk__xe_get(req, PCMK__XA_CIB_USER); const bool enable_acl = cib_acl_enabled(*current_cib, user); bool with_digest = false; crm_trace("Begin %s%s%s op", (pcmk_is_set(call_options, cib_dryrun)? "dry run of " : ""), (is_query? "read-only " : ""), op); CRM_CHECK(output != NULL, return -ENOMSG); CRM_CHECK(current_cib != NULL, return -ENOMSG); CRM_CHECK(result_cib != NULL, return -ENOMSG); CRM_CHECK(config_changed != NULL, return -ENOMSG); if(output) { *output = NULL; } *result_cib = NULL; *config_changed = false; if (fn == NULL) { return -EINVAL; } if (is_query) { xmlNode *cib_ro = *current_cib; xmlNode *cib_filtered = NULL; if (enable_acl && xml_acl_filtered_copy(user, *current_cib, *current_cib, &cib_filtered)) { if (cib_filtered == NULL) { crm_debug("Pre-filtered the entire cib"); return -EACCES; } cib_ro = cib_filtered; crm_log_xml_trace(cib_ro, "filtered"); } rc = (*fn) (op, call_options, section, req, input, cib_ro, result_cib, output); if(output == NULL || *output == NULL) { /* nothing */ } else if(cib_filtered == *output) { cib_filtered = NULL; /* Let them have this copy */ } else if (*output == *current_cib) { /* They already know not to free it */ } else if(cib_filtered && (*output)->doc == cib_filtered->doc) { /* We're about to free the document of which *output is a part */ *output = pcmk__xml_copy(NULL, *output); } else if ((*output)->doc == (*current_cib)->doc) { /* Give them a copy they can free */ *output = pcmk__xml_copy(NULL, *output); } pcmk__xml_free(cib_filtered); return rc; } make_copy = should_copy_cib(op, section, call_options); if (!make_copy) { /* Conditional on v2 patch style */ scratch = *current_cib; // Make a copy of the top-level element to store version details top = pcmk__xe_create(NULL, (const char *) scratch->name); pcmk__xe_copy_attrs(top, scratch, pcmk__xaf_none); patchset_cib = top; pcmk__xml_commit_changes(scratch->doc); pcmk__xml_doc_set_flags(scratch->doc, pcmk__xf_tracking); if (enable_acl) { pcmk__enable_acl(*current_cib, scratch, user); } rc = (*fn) (op, call_options, section, req, input, scratch, &scratch, output); /* If scratch points to a new object now (for example, after an erase * operation), then *current_cib should point to the same object. * * @TODO Enable tracking and ACLs and calculate changes? Change tracking * and unpacked ACLs didn't carry over to new object. */ *current_cib = scratch; } else { scratch = pcmk__xml_copy(NULL, *current_cib); patchset_cib = *current_cib; pcmk__xml_doc_set_flags(scratch->doc, pcmk__xf_tracking); if (enable_acl) { pcmk__enable_acl(*current_cib, scratch, user); } rc = (*fn) (op, call_options, section, req, input, *current_cib, &scratch, output); /* @TODO This appears to be a hack to determine whether scratch points * to a new object now, without saving the old pointer (which may be * invalid now) for comparison. Confirm this, and check more clearly. */ if (!pcmk__xml_doc_all_flags_set(scratch->doc, pcmk__xf_tracking)) { crm_trace("Inferring changes after %s op", op); pcmk__xml_commit_changes(scratch->doc); if (enable_acl) { pcmk__enable_acl(*current_cib, scratch, user); } pcmk__xml_mark_changes(*current_cib, scratch); } CRM_CHECK(*current_cib != scratch, return -EINVAL); } xml_acl_disable(scratch); /* Allow the system to make any additional changes */ if (rc == pcmk_ok && scratch == NULL) { rc = -EINVAL; goto done; } else if(rc == pcmk_ok && xml_acl_denied(scratch)) { crm_trace("ACL rejected part or all of the proposed changes"); rc = -EACCES; goto done; } else if (rc != pcmk_ok) { goto done; } /* If the CIB is from a file, we don't need to check that the feature set is * supported. All we care about in that case is the schema version, which * is checked elsewhere. */ if (scratch && (cib == NULL || cib->variant != cib_file)) { const char *new_version = pcmk__xe_get(scratch, PCMK_XA_CRM_FEATURE_SET); rc = pcmk__check_feature_set(new_version); if (rc != pcmk_rc_ok) { crm_err("Discarding update with feature set '%s' greater than " "our own '%s'", new_version, CRM_FEATURE_SET); rc = pcmk_rc2legacy(rc); goto done; } } if (patchset_cib != NULL) { int old = 0; int new = 0; pcmk__xe_get_int(scratch, PCMK_XA_ADMIN_EPOCH, &new); pcmk__xe_get_int(patchset_cib, PCMK_XA_ADMIN_EPOCH, &old); if (old > new) { crm_err("%s went backwards: %d -> %d (Opts: %#x)", PCMK_XA_ADMIN_EPOCH, old, new, call_options); crm_log_xml_warn(req, "Bad Op"); crm_log_xml_warn(input, "Bad Data"); rc = -pcmk_err_old_data; } else if (old == new) { pcmk__xe_get_int(scratch, PCMK_XA_EPOCH, &new); pcmk__xe_get_int(patchset_cib, PCMK_XA_EPOCH, &old); if (old > new) { crm_err("%s went backwards: %d -> %d (Opts: %#x)", PCMK_XA_EPOCH, old, new, call_options); crm_log_xml_warn(req, "Bad Op"); crm_log_xml_warn(input, "Bad Data"); rc = -pcmk_err_old_data; } } } crm_trace("Massaging CIB contents"); pcmk__strip_xml_text(scratch); if (make_copy) { static time_t expires = 0; time_t tm_now = time(NULL); if (expires < tm_now) { expires = tm_now + 60; /* Validate clients are correctly applying v2-style diffs at most once a minute */ with_digest = true; } } local_diff = xml_create_patchset(0, patchset_cib, scratch, config_changed, manage_counters); pcmk__log_xml_changes(LOG_TRACE, scratch); pcmk__xml_commit_changes(scratch->doc); if(local_diff) { if (with_digest) { pcmk__xml_patchset_add_digest(local_diff, scratch); } pcmk__log_xml_patchset(LOG_INFO, local_diff); crm_log_xml_trace(local_diff, "raw patch"); } if (make_copy && (local_diff != NULL)) { // Original to compare against doesn't exist pcmk__if_tracing( { // Validate the calculated patch set int test_rc = pcmk_ok; int format = 1; xmlNode *cib_copy = pcmk__xml_copy(NULL, patchset_cib); pcmk__xe_get_int(local_diff, PCMK_XA_FORMAT, &format); test_rc = xml_apply_patchset(cib_copy, local_diff, manage_counters); if (test_rc != pcmk_ok) { save_xml_to_file(cib_copy, "PatchApply:calculated", NULL); save_xml_to_file(patchset_cib, "PatchApply:input", NULL); save_xml_to_file(scratch, "PatchApply:actual", NULL); save_xml_to_file(local_diff, "PatchApply:diff", NULL); crm_err("v%d patchset error, patch failed to apply: %s " "(%d)", format, pcmk_rc_str(pcmk_legacy2rc(test_rc)), test_rc); } pcmk__xml_free(cib_copy); }, {} ); } if (pcmk__str_eq(section, PCMK_XE_STATUS, pcmk__str_casei)) { /* Throttle the amount of costly validation we perform due to status updates * a) we don't really care whats in the status section * b) we don't validate any of its contents at the moment anyway */ check_schema = false; } /* === scratch must not be modified after this point === * Exceptions, anything in: static filter_t filter[] = { { 0, PCMK_XA_CRM_DEBUG_ORIGIN }, { 0, PCMK_XA_CIB_LAST_WRITTEN }, { 0, PCMK_XA_UPDATE_ORIGIN }, { 0, PCMK_XA_UPDATE_CLIENT }, { 0, PCMK_XA_UPDATE_USER }, }; */ if (*config_changed && !pcmk_is_set(call_options, cib_no_mtime)) { const char *schema = pcmk__xe_get(scratch, PCMK_XA_VALIDATE_WITH); if (schema == NULL) { rc = -pcmk_err_cib_corrupt; } pcmk__xe_add_last_written(scratch); pcmk__warn_if_schema_deprecated(schema); /* Make values of origin, client, and user in scratch match * the ones in req (if the schema allows the attributes) */ if (pcmk__cmp_schemas_by_name(schema, "pacemaker-1.2") >= 0) { const char *origin = pcmk__xe_get(req, PCMK__XA_SRC); const char *client = pcmk__xe_get(req, PCMK__XA_CIB_CLIENTNAME); if (origin != NULL) { crm_xml_add(scratch, PCMK_XA_UPDATE_ORIGIN, origin); } else { pcmk__xe_remove_attr(scratch, PCMK_XA_UPDATE_ORIGIN); } if (client != NULL) { crm_xml_add(scratch, PCMK_XA_UPDATE_CLIENT, user); } else { pcmk__xe_remove_attr(scratch, PCMK_XA_UPDATE_CLIENT); } if (user != NULL) { crm_xml_add(scratch, PCMK_XA_UPDATE_USER, user); } else { pcmk__xe_remove_attr(scratch, PCMK_XA_UPDATE_USER); } } } crm_trace("Perform validation: %s", pcmk__btoa(check_schema)); if ((rc == pcmk_ok) && check_schema && !pcmk__configured_schema_validates(scratch)) { rc = -pcmk_err_schema_validation; } done: *result_cib = scratch; /* @TODO: This may not work correctly with !make_copy, since we don't * keep the original CIB. */ if ((rc != pcmk_ok) && cib_acl_enabled(patchset_cib, user) && xml_acl_filtered_copy(user, patchset_cib, scratch, result_cib)) { if (*result_cib == NULL) { crm_debug("Pre-filtered the entire cib result"); } pcmk__xml_free(scratch); } if(diff) { *diff = local_diff; } else { pcmk__xml_free(local_diff); } pcmk__xml_free(top); crm_trace("Done"); return rc; } int cib__create_op(cib_t *cib, const char *op, const char *host, const char *section, xmlNode *data, int call_options, const char *user_name, const char *client_name, xmlNode **op_msg) { CRM_CHECK((cib != NULL) && (op_msg != NULL), return -EPROTO); *op_msg = pcmk__xe_create(NULL, PCMK__XE_CIB_COMMAND); cib->call_id++; if (cib->call_id < 1) { cib->call_id = 1; } crm_xml_add(*op_msg, PCMK__XA_T, PCMK__VALUE_CIB); crm_xml_add(*op_msg, PCMK__XA_CIB_OP, op); crm_xml_add(*op_msg, PCMK__XA_CIB_HOST, host); crm_xml_add(*op_msg, PCMK__XA_CIB_SECTION, section); crm_xml_add(*op_msg, PCMK__XA_CIB_USER, user_name); crm_xml_add(*op_msg, PCMK__XA_CIB_CLIENTNAME, client_name); - crm_xml_add_int(*op_msg, PCMK__XA_CIB_CALLID, cib->call_id); + pcmk__xe_set_int(*op_msg, PCMK__XA_CIB_CALLID, cib->call_id); crm_trace("Sending call options: %.8lx, %d", (long)call_options, call_options); - crm_xml_add_int(*op_msg, PCMK__XA_CIB_CALLOPT, call_options); + pcmk__xe_set_int(*op_msg, PCMK__XA_CIB_CALLOPT, call_options); if (data != NULL) { xmlNode *wrapper = pcmk__xe_create(*op_msg, PCMK__XE_CIB_CALLDATA); pcmk__xml_copy(wrapper, data); } return pcmk_ok; } /*! * \internal * \brief Check whether a CIB request is supported in a transaction * * \param[in] request CIB request * * \return Standard Pacemaker return code */ static int validate_transaction_request(const xmlNode *request) { const char *op = pcmk__xe_get(request, PCMK__XA_CIB_OP); const char *host = pcmk__xe_get(request, PCMK__XA_CIB_HOST); const cib__operation_t *operation = NULL; int rc = cib__get_operation(op, &operation); if (rc != pcmk_rc_ok) { // cib__get_operation() logs error return rc; } if (!pcmk_is_set(operation->flags, cib__op_attr_transaction)) { crm_err("Operation %s is not supported in CIB transactions", op); return EOPNOTSUPP; } if (host != NULL) { crm_err("Operation targeting a specific node (%s) is not supported in " "a CIB transaction", host); return EOPNOTSUPP; } return pcmk_rc_ok; } /*! * \internal * \brief Append a CIB request to a CIB transaction * * \param[in,out] cib CIB client whose transaction to extend * \param[in,out] request Request to add to transaction * * \return Legacy Pacemaker return code */ int cib__extend_transaction(cib_t *cib, xmlNode *request) { int rc = pcmk_rc_ok; pcmk__assert((cib != NULL) && (request != NULL)); rc = validate_transaction_request(request); if ((rc == pcmk_rc_ok) && (cib->transaction == NULL)) { rc = pcmk_rc_no_transaction; } if (rc == pcmk_rc_ok) { pcmk__xml_copy(cib->transaction, request); } else { const char *op = pcmk__xe_get(request, PCMK__XA_CIB_OP); const char *client_id = NULL; cib->cmds->client_id(cib, NULL, &client_id); crm_err("Failed to add '%s' operation to transaction for client %s: %s", op, pcmk__s(client_id, "(unidentified)"), pcmk_rc_str(rc)); crm_log_xml_info(request, "failed"); } return pcmk_rc2legacy(rc); } void cib_native_callback(cib_t * cib, xmlNode * msg, int call_id, int rc) { xmlNode *output = NULL; cib_callback_client_t *blob = NULL; if (msg != NULL) { xmlNode *wrapper = NULL; pcmk__xe_get_int(msg, PCMK__XA_CIB_RC, &rc); pcmk__xe_get_int(msg, PCMK__XA_CIB_CALLID, &call_id); wrapper = pcmk__xe_first_child(msg, PCMK__XE_CIB_CALLDATA, NULL, NULL); output = pcmk__xe_first_child(wrapper, NULL, NULL, NULL); } blob = cib__lookup_id(call_id); if (blob == NULL) { crm_trace("No callback found for call %d", call_id); } if (cib == NULL) { crm_debug("No cib object supplied"); } if (rc == -pcmk_err_diff_resync) { /* This is an internal value that clients do not and should not care about */ rc = pcmk_ok; } if (blob && blob->callback && (rc == pcmk_ok || blob->only_success == FALSE)) { crm_trace("Invoking callback %s for call %d", pcmk__s(blob->id, "without ID"), call_id); blob->callback(msg, call_id, rc, output, blob->user_data); } else if ((cib != NULL) && (rc != pcmk_ok)) { crm_warn("CIB command failed: %s", pcmk_strerror(rc)); crm_log_xml_debug(msg, "Failed CIB Update"); } /* This may free user_data, so do it after the callback */ if (blob) { remove_cib_op_callback(call_id, FALSE); } crm_trace("OP callback activated for %d", call_id); } void cib_native_notify(gpointer data, gpointer user_data) { xmlNode *msg = user_data; cib_notify_client_t *entry = data; const char *event = NULL; if (msg == NULL) { crm_warn("Skipping callback - NULL message"); return; } event = pcmk__xe_get(msg, PCMK__XA_SUBT); if (entry == NULL) { crm_warn("Skipping callback - NULL callback client"); return; } else if (entry->callback == NULL) { crm_warn("Skipping callback - NULL callback"); return; } else if (!pcmk__str_eq(entry->event, event, pcmk__str_casei)) { crm_trace("Skipping callback - event mismatch %p/%s vs. %s", entry, entry->event, event); return; } crm_trace("Invoking callback for %p/%s event...", entry, event); entry->callback(event, msg); crm_trace("Callback invoked..."); } gboolean cib_read_config(GHashTable * options, xmlNode * current_cib) { xmlNode *config = NULL; crm_time_t *now = NULL; if (options == NULL || current_cib == NULL) { return FALSE; } now = crm_time_new(NULL); g_hash_table_remove_all(options); config = pcmk_find_cib_element(current_cib, PCMK_XE_CRM_CONFIG); if (config) { pcmk_rule_input_t rule_input = { .now = now, }; pcmk_unpack_nvpair_blocks(config, PCMK_XE_CLUSTER_PROPERTY_SET, PCMK_VALUE_CIB_BOOTSTRAP_OPTIONS, &rule_input, options, NULL); } pcmk__validate_cluster_options(options); crm_time_free(now); return TRUE; } int cib_internal_op(cib_t * cib, const char *op, const char *host, const char *section, xmlNode * data, xmlNode ** output_data, int call_options, const char *user_name) { int (*delegate)(cib_t *cib, const char *op, const char *host, const char *section, xmlNode *data, xmlNode **output_data, int call_options, const char *user_name) = NULL; if (cib == NULL) { return -EINVAL; } delegate = cib->delegate_fn; if (delegate == NULL) { return -EPROTONOSUPPORT; } if (user_name == NULL) { user_name = getenv("CIB_user"); } return delegate(cib, op, host, section, data, output_data, call_options, user_name); } /*! * \brief Apply a CIB update patch to a given CIB * * \param[in] event CIB update patch * \param[in] input CIB to patch * \param[out] output Resulting CIB after patch * \param[in] level Log the patch at this log level (unless LOG_CRIT) * * \return Legacy Pacemaker return code * \note sbd calls this function */ int cib_apply_patch_event(xmlNode *event, xmlNode *input, xmlNode **output, int level) { int rc = pcmk_err_generic; xmlNode *wrapper = NULL; xmlNode *diff = NULL; pcmk__assert((event != NULL) && (input != NULL) && (output != NULL)); pcmk__xe_get_int(event, PCMK__XA_CIB_RC, &rc); wrapper = pcmk__xe_first_child(event, PCMK__XE_CIB_UPDATE_RESULT, NULL, NULL); diff = pcmk__xe_first_child(wrapper, NULL, NULL, NULL); if (rc < pcmk_ok || diff == NULL) { return rc; } if (level > LOG_CRIT) { pcmk__log_xml_patchset(level, diff); } if (input != NULL) { rc = cib_process_diff(NULL, cib_none, NULL, event, diff, input, output, NULL); if (rc != pcmk_ok) { crm_debug("Update didn't apply: %s (%d) %p", pcmk_strerror(rc), rc, *output); if (rc == -pcmk_err_old_data) { crm_trace("Masking error, we already have the supplied update"); return pcmk_ok; } pcmk__xml_free(*output); *output = NULL; return rc; } } return rc; } #define log_signon_query_err(out, fmt, args...) do { \ if (out != NULL) { \ out->err(out, fmt, ##args); \ } else { \ crm_err(fmt, ##args); \ } \ } while (0) int cib__signon_query(pcmk__output_t *out, cib_t **cib, xmlNode **cib_object) { int rc = pcmk_rc_ok; cib_t *cib_conn = NULL; pcmk__assert(cib_object != NULL); if (cib == NULL) { cib_conn = cib_new(); } else { if (*cib == NULL) { *cib = cib_new(); } cib_conn = *cib; } if (cib_conn == NULL) { return ENOMEM; } if (cib_conn->state == cib_disconnected) { rc = cib_conn->cmds->signon(cib_conn, crm_system_name, cib_command); rc = pcmk_legacy2rc(rc); } if (rc != pcmk_rc_ok) { log_signon_query_err(out, "Could not connect to the CIB: %s", pcmk_rc_str(rc)); goto done; } if (out != NULL) { out->transient(out, "Querying CIB..."); } rc = cib_conn->cmds->query(cib_conn, NULL, cib_object, cib_sync_call); rc = pcmk_legacy2rc(rc); if (rc != pcmk_rc_ok) { log_signon_query_err(out, "CIB query failed: %s", pcmk_rc_str(rc)); } done: if (cib == NULL) { cib__clean_up_connection(&cib_conn); } if ((rc == pcmk_rc_ok) && (*cib_object == NULL)) { return pcmk_rc_no_input; } return rc; } int cib__signon_attempts(cib_t *cib, enum cib_conn_type type, int attempts) { int rc = pcmk_rc_ok; crm_trace("Attempting connection to CIB manager (up to %d time%s)", attempts, pcmk__plural_s(attempts)); for (int remaining = attempts - 1; remaining >= 0; --remaining) { rc = cib->cmds->signon(cib, crm_system_name, type); if ((rc == pcmk_rc_ok) || (remaining == 0) || ((errno != EAGAIN) && (errno != EALREADY))) { break; } // Retry after soft error (interrupted by signal, etc.) pcmk__sleep_ms((attempts - remaining) * 500); crm_debug("Re-attempting connection to CIB manager (%d attempt%s remaining)", remaining, pcmk__plural_s(remaining)); } return rc; } int cib__clean_up_connection(cib_t **cib) { int rc; if (*cib == NULL) { return pcmk_rc_ok; } rc = (*cib)->cmds->signoff(*cib); cib_delete(*cib); *cib = NULL; return pcmk_legacy2rc(rc); } diff --git a/lib/cluster/election.c b/lib/cluster/election.c index 70b379335e..4ec64cb95b 100644 --- a/lib/cluster/election.c +++ b/lib/cluster/election.c @@ -1,742 +1,743 @@ /* * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include "crmcluster_private.h" #define STORM_INTERVAL 2 /* in seconds */ struct pcmk__election { enum election_result state; // Current state of election guint count; // How many times local node has voted void (*cb)(pcmk_cluster_t *); // Function to call if election is won GHashTable *voted; // Key = node name, value = how node voted mainloop_timer_t *timeout; // When to abort if all votes not received int election_wins; // Track wins, for storm detection bool wrote_blackbox; // Write a storm blackbox at most once time_t expires; // When storm detection period ends time_t last_election_loss; // When dampening period ends }; static void election_complete(pcmk_cluster_t *cluster) { pcmk__assert((cluster != NULL) && (cluster->priv->election != NULL)); cluster->priv->election->state = election_won; if (cluster->priv->election->cb != NULL) { cluster->priv->election->cb(cluster); } election_reset(cluster); } static gboolean election_timer_cb(gpointer user_data) { pcmk_cluster_t *cluster = user_data; crm_info("Declaring local node as winner after election timed out"); election_complete(cluster); return FALSE; } /*! * \internal * \brief Get current state of an election * * \param[in] cluster Cluster with election * * \return Current state of \e */ enum election_result election_state(const pcmk_cluster_t *cluster) { if ((cluster == NULL) || (cluster->priv->election == NULL)) { return election_error; } return cluster->priv->election->state; } /* The local node will be declared the winner if missing votes are not received * within this time. The value is chosen to be the same as the default for the * election-timeout cluster option. */ #define ELECTION_TIMEOUT_MS 120000 /*! * \internal * \brief Track election state in a cluster * * Every node that wishes to participate in an election must initialize the * election once, typically at start-up. * * \param[in] cluster Cluster that election is for * \param[in] cb Function to call if local node wins election */ void election_init(pcmk_cluster_t *cluster, void (*cb)(pcmk_cluster_t *)) { const char *name = pcmk__s(crm_system_name, "election"); CRM_CHECK(cluster->priv->election == NULL, return); cluster->priv->election = pcmk__assert_alloc(1, sizeof(pcmk__election_t)); cluster->priv->election->cb = cb; cluster->priv->election->timeout = mainloop_timer_add(name, ELECTION_TIMEOUT_MS, FALSE, election_timer_cb, cluster); } /*! * \internal * \brief Disregard any previous vote by specified peer * * This discards any recorded vote from a specified peer. Election users should * call this whenever a voting peer becomes inactive. * * \param[in,out] cluster Cluster with election * \param[in] uname Name of peer to disregard */ void election_remove(pcmk_cluster_t *cluster, const char *uname) { if ((cluster != NULL) && (cluster->priv->election != NULL) && (uname != NULL) && (cluster->priv->election->voted != NULL)) { crm_trace("Discarding (no-)vote from lost peer %s", uname); g_hash_table_remove(cluster->priv->election->voted, uname); } } /*! * \internal * \brief Stop election timer and disregard all votes * * \param[in,out] cluster Cluster with election */ void election_reset(pcmk_cluster_t *cluster) { if ((cluster != NULL) && (cluster->priv->election != NULL)) { crm_trace("Resetting election"); mainloop_timer_stop(cluster->priv->election->timeout); if (cluster->priv->election->voted != NULL) { g_hash_table_destroy(cluster->priv->election->voted); cluster->priv->election->voted = NULL; } } } /*! * \internal * \brief Free an election object * * Free all memory associated with an election object, stopping its * election timer (if running). * * \param[in,out] cluster Cluster with election */ void election_fini(pcmk_cluster_t *cluster) { if ((cluster != NULL) && (cluster->priv->election != NULL)) { election_reset(cluster); crm_trace("Destroying election"); mainloop_timer_del(cluster->priv->election->timeout); free(cluster->priv->election); cluster->priv->election = NULL; } } static void election_timeout_start(pcmk_cluster_t *cluster) { mainloop_timer_start(cluster->priv->election->timeout); } /*! * \internal * \brief Stop an election's timer, if running * * \param[in,out] cluster Cluster with election */ void election_timeout_stop(pcmk_cluster_t *cluster) { if ((cluster != NULL) && (cluster->priv->election != NULL)) { mainloop_timer_stop(cluster->priv->election->timeout); } } /*! * \internal * \brief Change an election's timeout (restarting timer if running) * * \param[in,out] cluster Cluster with election * \param[in] period New timeout */ void election_timeout_set_period(pcmk_cluster_t *cluster, guint period) { CRM_CHECK((cluster != NULL) && (cluster->priv->election != NULL), return); mainloop_timer_set_period(cluster->priv->election->timeout, period); } static int get_uptime(struct timeval *output) { static time_t expires = 0; static struct rusage info; time_t tm_now = time(NULL); if (expires < tm_now) { int rc = 0; info.ru_utime.tv_sec = 0; info.ru_utime.tv_usec = 0; rc = getrusage(RUSAGE_SELF, &info); output->tv_sec = 0; output->tv_usec = 0; if (rc < 0) { crm_perror(LOG_ERR, "Could not calculate the current uptime"); expires = 0; return -1; } crm_debug("Current CPU usage is: %lds, %ldus", (long)info.ru_utime.tv_sec, (long)info.ru_utime.tv_usec); } expires = tm_now + STORM_INTERVAL; /* N seconds after the last _access_ */ output->tv_sec = info.ru_utime.tv_sec; output->tv_usec = info.ru_utime.tv_usec; return 1; } static int compare_age(struct timeval your_age) { struct timeval our_age; get_uptime(&our_age); /* If an error occurred, our_age will be compared as {0,0} */ if (our_age.tv_sec > your_age.tv_sec) { crm_debug("Win: %ld vs %ld (seconds)", (long)our_age.tv_sec, (long)your_age.tv_sec); return 1; } else if (our_age.tv_sec < your_age.tv_sec) { crm_debug("Lose: %ld vs %ld (seconds)", (long)our_age.tv_sec, (long)your_age.tv_sec); return -1; } else if (our_age.tv_usec > your_age.tv_usec) { crm_debug("Win: %ld.%06ld vs %ld.%06ld (usec)", (long)our_age.tv_sec, (long)our_age.tv_usec, (long)your_age.tv_sec, (long)your_age.tv_usec); return 1; } else if (our_age.tv_usec < your_age.tv_usec) { crm_debug("Lose: %ld.%06ld vs %ld.%06ld (usec)", (long)our_age.tv_sec, (long)our_age.tv_usec, (long)your_age.tv_sec, (long)your_age.tv_usec); return -1; } return 0; } /*! * \internal * \brief Start a new election by offering local node's candidacy * * Broadcast a "vote" election message containing the local node's ID, * (incremented) election counter, and uptime, and start the election timer. * * \param[in,out] cluster Cluster with election * * \note Any nodes agreeing to the candidacy will send a "no-vote" reply, and if * all active peers do so, or if the election times out, the local node * wins the election. (If we lose to any peer vote, we will stop the * timer, so a timeout means we did not lose -- either some peer did not * vote, or we did not call election_check() in time.) */ void election_vote(pcmk_cluster_t *cluster) { struct timeval age; xmlNode *vote = NULL; pcmk__node_status_t *our_node = NULL; const char *message_type = NULL; CRM_CHECK((cluster != NULL) && (cluster->priv->election != NULL), return); if (cluster->priv->node_name == NULL) { crm_err("Cannot start an election: Local node name unknown"); return; } our_node = pcmk__get_node(0, cluster->priv->node_name, NULL, pcmk__node_search_cluster_member); if (!pcmk__cluster_is_node_active(our_node)) { crm_trace("Cannot vote yet: local node not connected to cluster"); return; } election_reset(cluster); cluster->priv->election->state = election_in_progress; message_type = pcmk__server_message_type(cluster->priv->server); /* @COMPAT We use message_type as the sender and recipient system for * backward compatibility (see T566). */ vote = pcmk__new_request(cluster->priv->server, message_type, NULL, message_type, CRM_OP_VOTE, NULL); cluster->priv->election->count++; crm_xml_add(vote, PCMK__XA_ELECTION_OWNER, pcmk__cluster_get_xml_id(our_node)); - crm_xml_add_int(vote, PCMK__XA_ELECTION_ID, cluster->priv->election->count); + pcmk__xe_set_int(vote, PCMK__XA_ELECTION_ID, + cluster->priv->election->count); // Warning: PCMK__XA_ELECTION_AGE_NANO_SEC value is actually microseconds get_uptime(&age); pcmk__xe_set_timeval(vote, PCMK__XA_ELECTION_AGE_SEC, PCMK__XA_ELECTION_AGE_NANO_SEC, &age); pcmk__cluster_send_message(NULL, cluster->priv->server, vote); pcmk__xml_free(vote); crm_debug("Started election round %u", cluster->priv->election->count); election_timeout_start(cluster); return; } /*! * \internal * \brief Check whether local node has won an election * * If all known peers have sent no-vote messages, stop the election timer, set * the election state to won, and call any registered win callback. * * \param[in,out] cluster Cluster with election * * \return TRUE if local node has won, FALSE otherwise * \note If all known peers have sent no-vote messages, but the election owner * does not call this function, the election will not be won (and the * callback will not be called) until the election times out. * \note This should be called when election_count_vote() returns * \c election_in_progress. */ bool election_check(pcmk_cluster_t *cluster) { int voted_size = 0; int num_members = 0; CRM_CHECK((cluster != NULL) && (cluster->priv->election != NULL), return false); if (cluster->priv->election->voted == NULL) { crm_trace("Election check requested, but no votes received yet"); return FALSE; } voted_size = g_hash_table_size(cluster->priv->election->voted); num_members = pcmk__cluster_num_active_nodes(); /* in the case of #voted > #members, it is better to * wait for the timeout and give the cluster time to * stabilize */ if (voted_size >= num_members) { /* we won and everyone has voted */ election_timeout_stop(cluster); if (voted_size > num_members) { GHashTableIter gIter; const pcmk__node_status_t *node = NULL; char *key = NULL; crm_warn("Received too many votes in election"); g_hash_table_iter_init(&gIter, pcmk__peer_cache); while (g_hash_table_iter_next(&gIter, NULL, (gpointer *) & node)) { if (pcmk__cluster_is_node_active(node)) { crm_warn("* expected vote: %s", node->name); } } g_hash_table_iter_init(&gIter, cluster->priv->election->voted); while (g_hash_table_iter_next(&gIter, (gpointer *) & key, NULL)) { crm_warn("* actual vote: %s", key); } } crm_info("Election won by local node"); election_complete(cluster); return TRUE; } else { crm_debug("Election still waiting on %d of %d vote%s", num_members - voted_size, num_members, pcmk__plural_s(num_members)); } return FALSE; } #define LOSS_DAMPEN 2 /* in seconds */ struct vote { const char *op; const char *from; const char *version; const char *election_owner; int election_id; struct timeval age; }; /*! * \internal * \brief Unpack an election message * * \param[in] message Election message XML * \param[out] vote Parsed fields from message * * \return TRUE if election message and election are valid, FALSE otherwise * \note The parsed struct's pointer members are valid only for the lifetime of * the message argument. */ static bool parse_election_message(const xmlNode *message, struct vote *vote) { CRM_CHECK(message && vote, return FALSE); vote->election_id = -1; vote->age.tv_sec = -1; vote->age.tv_usec = -1; vote->op = pcmk__xe_get(message, PCMK__XA_CRM_TASK); vote->from = pcmk__xe_get(message, PCMK__XA_SRC); vote->version = pcmk__xe_get(message, PCMK_XA_VERSION); vote->election_owner = pcmk__xe_get(message, PCMK__XA_ELECTION_OWNER); pcmk__xe_get_int(message, PCMK__XA_ELECTION_ID, &(vote->election_id)); if ((vote->op == NULL) || (vote->from == NULL) || (vote->version == NULL) || (vote->election_owner == NULL) || (vote->election_id < 0)) { crm_warn("Invalid %s message from %s", pcmk__s(vote->op, "election"), pcmk__s(vote->from, "unspecified node")); crm_log_xml_trace(message, "bad-vote"); return FALSE; } // Op-specific validation if (pcmk__str_eq(vote->op, CRM_OP_VOTE, pcmk__str_none)) { /* Only vote ops have uptime. Warning: PCMK__XA_ELECTION_AGE_NANO_SEC value is in microseconds. */ if ((pcmk__xe_get_timeval(message, PCMK__XA_ELECTION_AGE_SEC, PCMK__XA_ELECTION_AGE_NANO_SEC, &(vote->age)) != pcmk_rc_ok) || (vote->age.tv_sec < 0) || (vote->age.tv_usec < 0)) { crm_warn("Cannot count election %s from %s because uptime is " "missing or invalid", vote->op, vote->from); return FALSE; } } else if (!pcmk__str_eq(vote->op, CRM_OP_NOVOTE, pcmk__str_none)) { crm_info("Cannot process election message from %s " "because %s is not a known election op", vote->from, vote->op); return FALSE; } /* If the membership cache is NULL, we REALLY shouldn't be voting -- * the question is how we managed to get here. */ if (pcmk__peer_cache == NULL) { crm_info("Cannot count election %s from %s " "because no peer information available", vote->op, vote->from); return FALSE; } return TRUE; } static void record_vote(pcmk_cluster_t *cluster, struct vote *vote) { pcmk__assert((vote->from != NULL) && (vote->op != NULL)); if (cluster->priv->election->voted == NULL) { cluster->priv->election->voted = pcmk__strkey_table(free, free); } pcmk__insert_dup(cluster->priv->election->voted, vote->from, vote->op); } static void send_no_vote(pcmk_cluster_t *cluster, pcmk__node_status_t *peer, struct vote *vote) { const char *message_type = NULL; xmlNode *novote = NULL; message_type = pcmk__server_message_type(cluster->priv->server); novote = pcmk__new_request(cluster->priv->server, message_type, vote->from, message_type, CRM_OP_NOVOTE, NULL); crm_xml_add(novote, PCMK__XA_ELECTION_OWNER, vote->election_owner); - crm_xml_add_int(novote, PCMK__XA_ELECTION_ID, vote->election_id); + pcmk__xe_set_int(novote, PCMK__XA_ELECTION_ID, vote->election_id); pcmk__cluster_send_message(peer, cluster->priv->server, novote); pcmk__xml_free(novote); } /*! * \internal * \brief Process an election message (vote or no-vote) from a peer * * \param[in,out] cluster Cluster with election * \param[in] message Election message XML from peer * \param[in] can_win Whether local node is eligible to win * * \return Election state after new vote is considered * \note If the peer message is a vote, and we prefer the peer to win, this will * send a no-vote reply to the peer. * \note The situations "we lost to this vote" from "this is a late no-vote * after we've already lost" both return election_lost. If a caller needs * to distinguish them, it should save the current state before calling * this function, and then compare the result. */ enum election_result election_count_vote(pcmk_cluster_t *cluster, const xmlNode *message, bool can_win) { int log_level = LOG_INFO; gboolean done = FALSE; gboolean we_lose = FALSE; const char *reason = "unknown"; bool we_are_owner = FALSE; pcmk__node_status_t *our_node = NULL; pcmk__node_status_t *your_node = NULL; time_t tm_now = time(NULL); struct vote vote; CRM_CHECK((cluster != NULL) && (cluster->priv->election != NULL) && (message != NULL) && (cluster->priv->node_name != NULL), return election_error); if (!parse_election_message(message, &vote)) { return election_error; } your_node = pcmk__get_node(0, vote.from, NULL, pcmk__node_search_cluster_member); our_node = pcmk__get_node(0, cluster->priv->node_name, NULL, pcmk__node_search_cluster_member); we_are_owner = (our_node != NULL) && pcmk__str_eq(pcmk__cluster_get_xml_id(our_node), vote.election_owner, pcmk__str_none); if (!can_win) { reason = "Not eligible"; we_lose = TRUE; } else if (!pcmk__cluster_is_node_active(our_node)) { reason = "We are not part of the cluster"; log_level = LOG_ERR; we_lose = TRUE; } else if (we_are_owner && (vote.election_id != cluster->priv->election->count)) { log_level = LOG_TRACE; reason = "Superseded"; done = TRUE; } else if (!pcmk__cluster_is_node_active(your_node)) { /* Possibly we cached the message in the FSA queue at a point that it wasn't */ reason = "Peer is not part of our cluster"; log_level = LOG_WARNING; done = TRUE; } else if (pcmk__str_eq(vote.op, CRM_OP_NOVOTE, pcmk__str_none) || pcmk__str_eq(vote.from, cluster->priv->node_name, pcmk__str_casei)) { /* Receiving our own broadcast vote, or a no-vote from peer, is a vote * for us to win */ if (!we_are_owner) { crm_warn("Cannot count election round %d %s from %s " "because we did not start election (node ID %s did)", vote.election_id, vote.op, vote.from, vote.election_owner); return election_error; } if (cluster->priv->election->state != election_in_progress) { // Should only happen if we already lost crm_debug("Not counting election round %d %s from %s " "because no election in progress", vote.election_id, vote.op, vote.from); return cluster->priv->election->state; } record_vote(cluster, &vote); reason = "Recorded"; done = TRUE; } else { // A peer vote requires a comparison to determine which node is better int age_result = compare_age(vote.age); int version_result = compare_version(vote.version, CRM_FEATURE_SET); if (version_result < 0) { reason = "Version"; we_lose = TRUE; } else if (version_result > 0) { reason = "Version"; } else if (age_result < 0) { reason = "Uptime"; we_lose = TRUE; } else if (age_result > 0) { reason = "Uptime"; } else if (strcasecmp(cluster->priv->node_name, vote.from) > 0) { reason = "Host name"; we_lose = TRUE; } else { reason = "Host name"; } } if (cluster->priv->election->expires < tm_now) { cluster->priv->election->election_wins = 0; cluster->priv->election->expires = tm_now + STORM_INTERVAL; } else if (done == FALSE && we_lose == FALSE) { int peers = 1 + g_hash_table_size(pcmk__peer_cache); /* If every node has to vote down every other node, thats N*(N-1) total elections * Allow some leeway before _really_ complaining */ cluster->priv->election->election_wins++; if (cluster->priv->election->election_wins > (peers * peers)) { crm_warn("Election storm detected: %d wins in %d seconds", cluster->priv->election->election_wins, STORM_INTERVAL); cluster->priv->election->election_wins = 0; cluster->priv->election->expires = tm_now + STORM_INTERVAL; if (!(cluster->priv->election->wrote_blackbox)) { /* It's questionable whether a black box (from every node in the * cluster) would be truly helpful in diagnosing an election * storm. It's also highly doubtful a production environment * would get multiple election storms from distinct causes, so * saving one blackbox per process lifetime should be * sufficient. Alternatives would be to save a timestamp of the * last blackbox write instead of a boolean, and write a new one * if some amount of time has passed; or to save a storm count, * write a blackbox on every Nth occurrence. */ crm_write_blackbox(0, NULL); cluster->priv->election->wrote_blackbox = true; } } } if (done) { do_crm_log(log_level + 1, "Processed election round %u %s (current round %d) " "from %s (%s)", vote.election_id, vote.op, cluster->priv->election->count, vote.from, reason); return cluster->priv->election->state; } else if (we_lose == FALSE) { /* We track the time of the last election loss to implement an election * dampening period, reducing the likelihood of an election storm. If * this node has lost within the dampening period, don't start a new * election, even if we win against a peer's vote -- the peer we lost to * should win again. * * @TODO This has a problem case: if an election winner immediately * leaves the cluster, and a new election is immediately called, all * nodes could lose, with no new winner elected. The ideal solution * would be to tie the election structure with the peer caches, which * would allow us to clear the dampening when the previous winner * leaves (and would allow other improvements as well). */ if ((cluster->priv->election->last_election_loss == 0) || ((tm_now - cluster->priv->election->last_election_loss) > (time_t) LOSS_DAMPEN)) { do_crm_log(log_level, "Election round %d (started by node ID %s) pass: " "%s from %s (%s)", vote.election_id, vote.election_owner, vote.op, vote.from, reason); cluster->priv->election->last_election_loss = 0; election_timeout_stop(cluster); /* Start a new election by voting down this, and other, peers */ cluster->priv->election->state = election_start; return cluster->priv->election->state; } else { char *loss_time = NULL; loss_time = ctime(&(cluster->priv->election->last_election_loss)); if (loss_time) { // Show only HH:MM:SS loss_time += 11; loss_time[8] = '\0'; } crm_info("Ignoring election round %d (started by node ID %s) pass " "vs %s because we lost less than %ds ago at %s", vote.election_id, vote.election_owner, vote.from, LOSS_DAMPEN, (loss_time? loss_time : "unknown")); } } cluster->priv->election->last_election_loss = tm_now; do_crm_log(log_level, "Election round %d (started by node ID %s) lost: " "%s from %s (%s)", vote.election_id, vote.election_owner, vote.op, vote.from, reason); election_reset(cluster); send_no_vote(cluster, your_node, &vote); cluster->priv->election->state = election_lost; return cluster->priv->election->state; } /*! * \internal * \brief Reset any election dampening currently in effect * * \param[in,out] cluster Cluster with election */ void election_clear_dampening(pcmk_cluster_t *cluster) { if ((cluster != NULL) && (cluster->priv->election != NULL)) { cluster->priv->election->last_election_loss = 0; } } diff --git a/lib/common/ipc_attrd.c b/lib/common/ipc_attrd.c index c3d3da0fe6..4fdbae01d5 100644 --- a/lib/common/ipc_attrd.c +++ b/lib/common/ipc_attrd.c @@ -1,488 +1,488 @@ /* * Copyright 2011-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include // xmlChar #include #include #include #include #include #include "crmcommon_private.h" static void set_pairs_data(pcmk__attrd_api_reply_t *data, xmlNode *msg_data) { const char *name = NULL; pcmk__attrd_query_pair_t *pair; name = pcmk__xe_get(msg_data, PCMK__XA_ATTR_NAME); for (xmlNode *node = pcmk__xe_first_child(msg_data, PCMK_XE_NODE, NULL, NULL); node != NULL; node = pcmk__xe_next(node, PCMK_XE_NODE)) { pair = pcmk__assert_alloc(1, sizeof(pcmk__attrd_query_pair_t)); pair->node = pcmk__xe_get(node, PCMK__XA_ATTR_HOST); pair->name = name; pair->value = pcmk__xe_get(node, PCMK__XA_ATTR_VALUE); data->data.pairs = g_list_prepend(data->data.pairs, pair); } } static bool reply_expected(pcmk_ipc_api_t *api, const xmlNode *request) { const char *command = pcmk__xe_get(request, PCMK_XA_TASK); return pcmk__str_any_of(command, PCMK__ATTRD_CMD_CLEAR_FAILURE, PCMK__ATTRD_CMD_QUERY, PCMK__ATTRD_CMD_REFRESH, PCMK__ATTRD_CMD_UPDATE, PCMK__ATTRD_CMD_UPDATE_BOTH, PCMK__ATTRD_CMD_UPDATE_DELAY, NULL); } static bool dispatch(pcmk_ipc_api_t *api, xmlNode *reply) { const char *value = NULL; crm_exit_t status = CRM_EX_OK; pcmk__attrd_api_reply_t reply_data = { pcmk__attrd_reply_unknown }; if (pcmk__xe_is(reply, PCMK__XE_ACK)) { return false; } /* Do some basic validation of the reply */ value = pcmk__xe_get(reply, PCMK__XA_T); if (pcmk__str_empty(value) || !pcmk__str_eq(value, PCMK__VALUE_ATTRD, pcmk__str_none)) { crm_info("Unrecognizable message from attribute manager: " "message type '%s' not '" PCMK__VALUE_ATTRD "'", pcmk__s(value, "")); status = CRM_EX_PROTOCOL; goto done; } value = pcmk__xe_get(reply, PCMK__XA_SUBT); /* Only the query command gets a reply for now. NULL counts as query for * backward compatibility with attribute managers <2.1.3 that didn't set it. */ if (pcmk__str_eq(value, PCMK__ATTRD_CMD_QUERY, pcmk__str_null_matches)) { if (!xmlHasProp(reply, (const xmlChar *) PCMK__XA_ATTR_NAME)) { status = ENXIO; // Most likely, the attribute doesn't exist goto done; } reply_data.reply_type = pcmk__attrd_reply_query; set_pairs_data(&reply_data, reply); } else { crm_info("Unrecognizable message from attribute manager: " "message subtype '%s' unknown", pcmk__s(value, "")); status = CRM_EX_PROTOCOL; goto done; } done: pcmk__call_ipc_callback(api, pcmk_ipc_event_reply, status, &reply_data); /* Free any reply data that was allocated */ if (reply_data.data.pairs) { g_list_free_full(reply_data.data.pairs, free); } return false; } pcmk__ipc_methods_t * pcmk__attrd_api_methods(void) { pcmk__ipc_methods_t *cmds = calloc(1, sizeof(pcmk__ipc_methods_t)); if (cmds != NULL) { cmds->new_data = NULL; cmds->free_data = NULL; cmds->post_connect = NULL; cmds->reply_expected = reply_expected; cmds->dispatch = dispatch; } return cmds; } /*! * \internal * \brief Create a generic attribute manager operation * * \param[in] user_name If not NULL, ACL user to set for operation * * \return XML of attribute manager operation */ static xmlNode * create_attrd_op(const char *user_name) { xmlNode *attrd_op = pcmk__xe_create(NULL, __func__); crm_xml_add(attrd_op, PCMK__XA_T, PCMK__VALUE_ATTRD); crm_xml_add(attrd_op, PCMK__XA_SRC, pcmk__s(crm_system_name, "unknown")); crm_xml_add(attrd_op, PCMK__XA_ATTR_USER, user_name); return attrd_op; } static int connect_and_send_attrd_request(pcmk_ipc_api_t *api, const xmlNode *request) { int rc = pcmk_rc_ok; bool created_api = false; enum pcmk_ipc_dispatch dispatch = pcmk_ipc_dispatch_sync; if (api == NULL) { rc = pcmk_new_ipc_api(&api, pcmk_ipc_attrd); if (rc != pcmk_rc_ok) { return rc; } created_api = true; } else { dispatch = api->dispatch_type; } rc = pcmk__connect_ipc(api, dispatch, 5); if (rc == pcmk_rc_ok) { rc = pcmk__send_ipc_request(api, request); } if (created_api) { pcmk_free_ipc_api(api); } return rc; } int pcmk__attrd_api_clear_failures(pcmk_ipc_api_t *api, const char *node, const char *resource, const char *operation, const char *interval_spec, const char *user_name, uint32_t options) { int rc = pcmk_rc_ok; xmlNode *request = create_attrd_op(user_name); const char *interval_desc = NULL; const char *op_desc = NULL; const char *target = pcmk__node_attr_target(node); if (target != NULL) { node = target; } if (operation) { interval_desc = pcmk__s(interval_spec, "nonrecurring"); op_desc = operation; } else { interval_desc = "all"; op_desc = "operations"; } crm_debug("Asking %s to clear failure of %s %s for %s on %s", pcmk_ipc_name(api, true), interval_desc, op_desc, pcmk__s(resource, "all resources"), pcmk__s(node, "all nodes")); crm_xml_add(request, PCMK_XA_TASK, PCMK__ATTRD_CMD_CLEAR_FAILURE); crm_xml_add(request, PCMK__XA_ATTR_HOST, node); crm_xml_add(request, PCMK__XA_ATTR_RESOURCE, resource); crm_xml_add(request, PCMK__XA_ATTR_CLEAR_OPERATION, operation); crm_xml_add(request, PCMK__XA_ATTR_CLEAR_INTERVAL, interval_spec); - crm_xml_add_int(request, PCMK__XA_ATTR_IS_REMOTE, - pcmk_is_set(options, pcmk__node_attr_remote)); + pcmk__xe_set_int(request, PCMK__XA_ATTR_IS_REMOTE, + pcmk_is_set(options, pcmk__node_attr_remote)); rc = connect_and_send_attrd_request(api, request); pcmk__xml_free(request); return rc; } int pcmk__attrd_api_delete(pcmk_ipc_api_t *api, const char *node, const char *name, uint32_t options) { const char *target = NULL; if (name == NULL) { return EINVAL; } target = pcmk__node_attr_target(node); if (target != NULL) { node = target; } /* Make sure the right update option is set. */ options &= ~pcmk__node_attr_delay; options |= pcmk__node_attr_value; return pcmk__attrd_api_update(api, node, name, NULL, NULL, NULL, NULL, options); } int pcmk__attrd_api_purge(pcmk_ipc_api_t *api, const char *node, bool reap) { int rc = pcmk_rc_ok; xmlNode *request = NULL; const char *target = pcmk__node_attr_target(node); if (target != NULL) { node = target; } crm_debug("Asking %s to purge transient attributes%s for %s", pcmk_ipc_name(api, true), (reap? " and node cache entries" : ""), pcmk__s(node, "local node")); request = create_attrd_op(NULL); crm_xml_add(request, PCMK_XA_TASK, PCMK__ATTRD_CMD_PEER_REMOVE); pcmk__xe_set_bool_attr(request, PCMK__XA_REAP, reap); crm_xml_add(request, PCMK__XA_ATTR_HOST, node); rc = connect_and_send_attrd_request(api, request); pcmk__xml_free(request); return rc; } int pcmk__attrd_api_query(pcmk_ipc_api_t *api, const char *node, const char *name, uint32_t options) { int rc = pcmk_rc_ok; xmlNode *request = NULL; const char *target = NULL; if (name == NULL) { return EINVAL; } if (pcmk_is_set(options, pcmk__node_attr_query_all)) { node = NULL; } else { target = pcmk__node_attr_target(node); if (target != NULL) { node = target; } else if (node == NULL) { node = "localhost"; } } crm_debug("Querying %s for value of '%s'%s%s", pcmk_ipc_name(api, true), name, ((node == NULL)? "" : " on "), pcmk__s(node, "")); request = create_attrd_op(NULL); crm_xml_add(request, PCMK__XA_ATTR_NAME, name); crm_xml_add(request, PCMK_XA_TASK, PCMK__ATTRD_CMD_QUERY); crm_xml_add(request, PCMK__XA_ATTR_HOST, node); rc = connect_and_send_attrd_request(api, request); pcmk__xml_free(request); return rc; } int pcmk__attrd_api_refresh(pcmk_ipc_api_t *api, const char *node) { int rc = pcmk_rc_ok; xmlNode *request = NULL; const char *target = pcmk__node_attr_target(node); if (target != NULL) { node = target; } crm_debug("Asking %s to write all transient attributes for %s to CIB", pcmk_ipc_name(api, true), pcmk__s(node, "local node")); request = create_attrd_op(NULL); crm_xml_add(request, PCMK_XA_TASK, PCMK__ATTRD_CMD_REFRESH); crm_xml_add(request, PCMK__XA_ATTR_HOST, node); rc = connect_and_send_attrd_request(api, request); pcmk__xml_free(request); return rc; } static void add_op_attr(xmlNode *op, uint32_t options) { if (pcmk_all_flags_set(options, pcmk__node_attr_value | pcmk__node_attr_delay)) { crm_xml_add(op, PCMK_XA_TASK, PCMK__ATTRD_CMD_UPDATE_BOTH); } else if (pcmk_is_set(options, pcmk__node_attr_value)) { crm_xml_add(op, PCMK_XA_TASK, PCMK__ATTRD_CMD_UPDATE); } else if (pcmk_is_set(options, pcmk__node_attr_delay)) { crm_xml_add(op, PCMK_XA_TASK, PCMK__ATTRD_CMD_UPDATE_DELAY); } } static void populate_update_op(xmlNode *op, const char *node, const char *name, const char *value, const char *dampen, const char *set, uint32_t options) { if (pcmk_is_set(options, pcmk__node_attr_pattern)) { crm_xml_add(op, PCMK__XA_ATTR_REGEX, name); } else { crm_xml_add(op, PCMK__XA_ATTR_NAME, name); } if (pcmk_is_set(options, pcmk__node_attr_utilization)) { crm_xml_add(op, PCMK__XA_ATTR_SET_TYPE, PCMK_XE_UTILIZATION); } else { crm_xml_add(op, PCMK__XA_ATTR_SET_TYPE, PCMK_XE_INSTANCE_ATTRIBUTES); } add_op_attr(op, options); crm_xml_add(op, PCMK__XA_ATTR_VALUE, value); crm_xml_add(op, PCMK__XA_ATTR_DAMPENING, dampen); crm_xml_add(op, PCMK__XA_ATTR_HOST, node); crm_xml_add(op, PCMK__XA_ATTR_SET, set); - crm_xml_add_int(op, PCMK__XA_ATTR_IS_REMOTE, - pcmk_is_set(options, pcmk__node_attr_remote)); - crm_xml_add_int(op, PCMK__XA_ATTR_IS_PRIVATE, - pcmk_is_set(options, pcmk__node_attr_private)); + pcmk__xe_set_int(op, PCMK__XA_ATTR_IS_REMOTE, + pcmk_is_set(options, pcmk__node_attr_remote)); + pcmk__xe_set_int(op, PCMK__XA_ATTR_IS_PRIVATE, + pcmk_is_set(options, pcmk__node_attr_private)); if (pcmk_is_set(options, pcmk__node_attr_sync_local)) { crm_xml_add(op, PCMK__XA_ATTR_SYNC_POINT, PCMK__VALUE_LOCAL); } else if (pcmk_is_set(options, pcmk__node_attr_sync_cluster)) { crm_xml_add(op, PCMK__XA_ATTR_SYNC_POINT, PCMK__VALUE_CLUSTER); } } int pcmk__attrd_api_update(pcmk_ipc_api_t *api, const char *node, const char *name, const char *value, const char *dampen, const char *set, const char *user_name, uint32_t options) { int rc = pcmk_rc_ok; xmlNode *request = NULL; const char *target = NULL; if (name == NULL) { return EINVAL; } target = pcmk__node_attr_target(node); if (target != NULL) { node = target; } crm_debug("Asking %s to update '%s' to '%s' for %s", pcmk_ipc_name(api, true), name, pcmk__s(value, "(null)"), pcmk__s(node, "local node")); request = create_attrd_op(user_name); populate_update_op(request, node, name, value, dampen, set, options); rc = connect_and_send_attrd_request(api, request); pcmk__xml_free(request); return rc; } int pcmk__attrd_api_update_list(pcmk_ipc_api_t *api, GList *attrs, const char *dampen, const char *set, const char *user_name, uint32_t options) { int rc = pcmk_rc_ok; xmlNode *request = NULL; if (attrs == NULL) { return EINVAL; } /* There are two different ways of handling a list of attributes: * * (1) For messages originating from some command line tool, we have to send * them one at a time. In this loop, we just call pcmk__attrd_api_update * for each, letting it deal with creating the API object if it doesn't * already exist. * * The reason we can't use a single message in this case is that we can't * trust that the server supports it. Remote nodes could be involved * here, and there's no guarantee that a newer client running on a remote * node is talking to (or proxied through) a cluster node with a newer * attrd. We also can't just try sending a single message and then falling * back on multiple. There's no handshake with the attrd server to * determine its version. And then we would need to do that fallback in the * dispatch function for this to work for all connection types (mainloop in * particular), and at that point we won't know what the original message * was in order to break it apart and resend as individual messages. * * (2) For messages between daemons, we can be assured that the local attrd * will support the new message and that it can send to the other attrds * as one request or split up according to the minimum supported version. */ for (GList *iter = attrs; iter != NULL; iter = iter->next) { pcmk__attrd_query_pair_t *pair = (pcmk__attrd_query_pair_t *) iter->data; if (pcmk__is_daemon) { const char *target = NULL; xmlNode *child = NULL; /* First time through this loop - create the basic request. */ if (request == NULL) { request = create_attrd_op(user_name); add_op_attr(request, options); } /* Add a child node for this operation. We add the task to the top * level XML node so attrd_ipc_dispatch doesn't need changes. And * then we also add the task to each child node in populate_update_op * so attrd_client_update knows what form of update is taking place. */ child = pcmk__xe_create(request, PCMK_XE_OP); target = pcmk__node_attr_target(pair->node); if (target != NULL) { pair->node = target; } populate_update_op(child, pair->node, pair->name, pair->value, dampen, set, options); } else { rc = pcmk__attrd_api_update(api, pair->node, pair->name, pair->value, dampen, set, user_name, options); } } /* If we were doing multiple attributes at once, we still need to send the * request. Do that now, creating and destroying the API object if needed. */ if (pcmk__is_daemon) { rc = connect_and_send_attrd_request(api, request); pcmk__xml_free(request); } return rc; } diff --git a/lib/common/ipc_client.c b/lib/common/ipc_client.c index 6cc04543e1..76dedd44e5 100644 --- a/lib/common/ipc_client.c +++ b/lib/common/ipc_client.c @@ -1,1694 +1,1694 @@ /* * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #if defined(HAVE_UCRED) || defined(HAVE_SOCKPEERCRED) #include #elif defined(HAVE_GETPEERUCRED) #include #endif #include #include #include #include #include /* indirectly: pcmk_err_generic */ #include #include #include #include "crmcommon_private.h" static int is_ipc_provider_expected(qb_ipcc_connection_t *qb_ipc, int sock, uid_t refuid, gid_t refgid, pid_t *gotpid, uid_t *gotuid, gid_t *gotgid); /*! * \brief Create a new object for using Pacemaker daemon IPC * * \param[out] api Where to store new IPC object * \param[in] server Which Pacemaker daemon the object is for * * \return Standard Pacemaker result code * * \note The caller is responsible for freeing *api using pcmk_free_ipc_api(). * \note This is intended to supersede crm_ipc_new() but currently only * supports the controller, pacemakerd, and schedulerd IPC API. */ int pcmk_new_ipc_api(pcmk_ipc_api_t **api, enum pcmk_ipc_server server) { if (api == NULL) { return EINVAL; } *api = calloc(1, sizeof(pcmk_ipc_api_t)); if (*api == NULL) { return errno; } (*api)->server = server; if (pcmk_ipc_name(*api, false) == NULL) { pcmk_free_ipc_api(*api); *api = NULL; return EOPNOTSUPP; } (*api)->ipc_size_max = 0; // Set server methods and max_size (if not default) switch (server) { case pcmk_ipc_attrd: (*api)->cmds = pcmk__attrd_api_methods(); break; case pcmk_ipc_based: (*api)->ipc_size_max = 512 * 1024; // 512KB break; case pcmk_ipc_controld: (*api)->cmds = pcmk__controld_api_methods(); break; case pcmk_ipc_execd: break; case pcmk_ipc_fenced: break; case pcmk_ipc_pacemakerd: (*api)->cmds = pcmk__pacemakerd_api_methods(); break; case pcmk_ipc_schedulerd: (*api)->cmds = pcmk__schedulerd_api_methods(); // @TODO max_size could vary by client, maybe take as argument? (*api)->ipc_size_max = 5 * 1024 * 1024; // 5MB break; default: // pcmk_ipc_unknown pcmk_free_ipc_api(*api); *api = NULL; return EINVAL; } if ((*api)->cmds == NULL) { pcmk_free_ipc_api(*api); *api = NULL; return ENOMEM; } (*api)->ipc = crm_ipc_new(pcmk_ipc_name(*api, false), (*api)->ipc_size_max); if ((*api)->ipc == NULL) { pcmk_free_ipc_api(*api); *api = NULL; return ENOMEM; } // If daemon API has its own data to track, allocate it if ((*api)->cmds->new_data != NULL) { if ((*api)->cmds->new_data(*api) != pcmk_rc_ok) { pcmk_free_ipc_api(*api); *api = NULL; return ENOMEM; } } crm_trace("Created %s API IPC object", pcmk_ipc_name(*api, true)); return pcmk_rc_ok; } static void free_daemon_specific_data(pcmk_ipc_api_t *api) { if ((api != NULL) && (api->cmds != NULL)) { if ((api->cmds->free_data != NULL) && (api->api_data != NULL)) { api->cmds->free_data(api->api_data); api->api_data = NULL; } free(api->cmds); api->cmds = NULL; } } /*! * \internal * \brief Call an IPC API event callback, if one is registed * * \param[in,out] api IPC API connection * \param[in] event_type The type of event that occurred * \param[in] status Event status * \param[in,out] event_data Event-specific data */ void pcmk__call_ipc_callback(pcmk_ipc_api_t *api, enum pcmk_ipc_event event_type, crm_exit_t status, void *event_data) { if ((api != NULL) && (api->cb != NULL)) { api->cb(api, event_type, status, event_data, api->user_data); } } /*! * \internal * \brief Clean up after an IPC disconnect * * \param[in,out] user_data IPC API connection that disconnected * * \note This function can be used as a main loop IPC destroy callback. */ static void ipc_post_disconnect(gpointer user_data) { pcmk_ipc_api_t *api = user_data; crm_info("Disconnected from %s", pcmk_ipc_name(api, true)); // Perform any daemon-specific handling needed if ((api->cmds != NULL) && (api->cmds->post_disconnect != NULL)) { api->cmds->post_disconnect(api); } // Call client's registered event callback pcmk__call_ipc_callback(api, pcmk_ipc_event_disconnect, CRM_EX_DISCONNECT, NULL); /* If this is being called from a running main loop, mainloop_gio_destroy() * will free ipc and mainloop_io immediately after calling this function. * If this is called from a stopped main loop, these will leak, so the best * practice is to close the connection before stopping the main loop. */ api->ipc = NULL; api->mainloop_io = NULL; if (api->free_on_disconnect) { /* pcmk_free_ipc_api() has already been called, but did not free api * or api->cmds because this function needed them. Do that now. */ free_daemon_specific_data(api); crm_trace("Freeing IPC API object after disconnect"); free(api); } } /*! * \brief Free the contents of an IPC API object * * \param[in,out] api IPC API object to free */ void pcmk_free_ipc_api(pcmk_ipc_api_t *api) { bool free_on_disconnect = false; if (api == NULL) { return; } crm_debug("Releasing %s IPC API", pcmk_ipc_name(api, true)); if (api->ipc != NULL) { if (api->mainloop_io != NULL) { /* We need to keep the api pointer itself around, because it is the * user data for the IPC client destroy callback. That will be * triggered by the pcmk_disconnect_ipc() call below, but it might * happen later in the main loop (if still running). * * This flag tells the destroy callback to free the object. It can't * do that unconditionally, because the application might call this * function after a disconnect that happened by other means. */ free_on_disconnect = api->free_on_disconnect = true; } pcmk_disconnect_ipc(api); // Frees api if free_on_disconnect is true } if (!free_on_disconnect) { free_daemon_specific_data(api); crm_trace("Freeing IPC API object"); free(api); } } /*! * \brief Get the IPC name used with an IPC API connection * * \param[in] api IPC API connection * \param[in] for_log If true, return human-friendly name instead of IPC name * * \return IPC API's human-friendly or connection name, or if none is available, * "Pacemaker" if for_log is true and NULL if for_log is false */ const char * pcmk_ipc_name(const pcmk_ipc_api_t *api, bool for_log) { if (api == NULL) { return for_log? "Pacemaker" : NULL; } if (for_log) { const char *name = pcmk__server_log_name(api->server); return pcmk__s(name, "Pacemaker"); } switch (api->server) { // These servers do not have pcmk_ipc_api_t implementations yet case pcmk_ipc_based: case pcmk_ipc_execd: case pcmk_ipc_fenced: return NULL; default: return pcmk__server_ipc_name(api->server); } } /*! * \brief Check whether an IPC API connection is active * * \param[in,out] api IPC API connection * * \return true if IPC is connected, false otherwise */ bool pcmk_ipc_is_connected(pcmk_ipc_api_t *api) { return (api != NULL) && crm_ipc_connected(api->ipc); } /*! * \internal * \brief Call the daemon-specific API's dispatch function * * Perform daemon-specific handling of IPC reply dispatch. It is the daemon * method's responsibility to call the client's registered event callback, as * well as allocate and free any event data. * * \param[in,out] api IPC API connection * \param[in,out] message IPC reply XML to dispatch */ static bool call_api_dispatch(pcmk_ipc_api_t *api, xmlNode *message) { crm_log_xml_trace(message, "ipc-received"); if ((api->cmds != NULL) && (api->cmds->dispatch != NULL)) { return api->cmds->dispatch(api, message); } return false; } /*! * \internal * \brief Dispatch previously read IPC data * * \param[in] buffer Data read from IPC * \param[in,out] api IPC object * * \return Standard Pacemaker return code. In particular: * * pcmk_rc_ok: There are no more messages expected from the server. Quit * reading. * EINPROGRESS: There are more messages expected from the server. Keep reading. * * All other values indicate an error. */ static int dispatch_ipc_data(const char *buffer, pcmk_ipc_api_t *api) { bool more = false; xmlNode *msg; if (buffer == NULL) { crm_warn("Empty message received from %s IPC", pcmk_ipc_name(api, true)); return ENOMSG; } msg = pcmk__xml_parse(buffer); if (msg == NULL) { crm_warn("Malformed message received from %s IPC", pcmk_ipc_name(api, true)); return EPROTO; } more = call_api_dispatch(api, msg); pcmk__xml_free(msg); if (more) { return EINPROGRESS; } else { return pcmk_rc_ok; } } /*! * \internal * \brief Dispatch data read from IPC source * * \param[in] buffer Data read from IPC * \param[in] length Number of bytes of data in buffer (ignored) * \param[in,out] user_data IPC object * * \return Always 0 (meaning connection is still required) * * \note This function can be used as a main loop IPC dispatch callback. */ static int dispatch_ipc_source_data(const char *buffer, ssize_t length, gpointer user_data) { pcmk_ipc_api_t *api = user_data; CRM_CHECK(api != NULL, return 0); dispatch_ipc_data(buffer, api); return 0; } /*! * \brief Check whether an IPC connection has data available (without main loop) * * \param[in] api IPC API connection * \param[in] timeout_ms If less than 0, poll indefinitely; if 0, poll once * and return immediately; otherwise, poll for up to * this many milliseconds * * \return Standard Pacemaker return code * * \note Callers of pcmk_connect_ipc() using pcmk_ipc_dispatch_poll should call * this function to check whether IPC data is available. Return values of * interest include pcmk_rc_ok meaning data is available, and EAGAIN * meaning no data is available; all other values indicate errors. * \todo This does not allow the caller to poll multiple file descriptors at * once. If there is demand for that, we could add a wrapper for * pcmk__ipc_fd(api->ipc), so the caller can call poll() themselves. */ int pcmk_poll_ipc(const pcmk_ipc_api_t *api, int timeout_ms) { int rc; struct pollfd pollfd = { 0, }; if ((api == NULL) || (api->dispatch_type != pcmk_ipc_dispatch_poll)) { return EINVAL; } rc = pcmk__ipc_fd(api->ipc, &(pollfd.fd)); if (rc != pcmk_rc_ok) { crm_debug("Could not obtain file descriptor for %s IPC: %s", pcmk_ipc_name(api, true), pcmk_rc_str(rc)); return rc; } pollfd.events = POLLIN; rc = poll(&pollfd, 1, timeout_ms); if (rc < 0) { /* Some UNIX systems return negative and set EAGAIN for failure to * allocate memory; standardize the return code in that case */ return (errno == EAGAIN)? ENOMEM : errno; } else if (rc == 0) { return EAGAIN; } return pcmk_rc_ok; } /*! * \brief Dispatch available messages on an IPC connection (without main loop) * * \param[in,out] api IPC API connection * * \return Standard Pacemaker return code * * \note Callers of pcmk_connect_ipc() using pcmk_ipc_dispatch_poll should call * this function when IPC data is available. */ void pcmk_dispatch_ipc(pcmk_ipc_api_t *api) { if (api == NULL) { return; } while (crm_ipc_ready(api->ipc) > 0) { if (crm_ipc_read(api->ipc) > 0) { dispatch_ipc_data(crm_ipc_buffer(api->ipc), api); } } } // \return Standard Pacemaker return code static int connect_with_main_loop(pcmk_ipc_api_t *api) { int rc; struct ipc_client_callbacks callbacks = { .dispatch = dispatch_ipc_source_data, .destroy = ipc_post_disconnect, }; rc = pcmk__add_mainloop_ipc(api->ipc, G_PRIORITY_DEFAULT, api, &callbacks, &(api->mainloop_io)); if (rc != pcmk_rc_ok) { return rc; } crm_debug("Connected to %s IPC (attached to main loop)", pcmk_ipc_name(api, true)); /* After this point, api->mainloop_io owns api->ipc, so api->ipc * should not be explicitly freed. */ return pcmk_rc_ok; } // \return Standard Pacemaker return code static int connect_without_main_loop(pcmk_ipc_api_t *api) { int rc = pcmk__connect_generic_ipc(api->ipc); if (rc != pcmk_rc_ok) { crm_ipc_close(api->ipc); } else { crm_debug("Connected to %s IPC (without main loop)", pcmk_ipc_name(api, true)); } return rc; } /*! * \internal * \brief Connect to a Pacemaker daemon via IPC (retrying after soft errors) * * \param[in,out] api IPC API instance * \param[in] dispatch_type How IPC replies should be dispatched * \param[in] attempts How many times to try (in case of soft error) * * \return Standard Pacemaker return code */ int pcmk__connect_ipc(pcmk_ipc_api_t *api, enum pcmk_ipc_dispatch dispatch_type, int attempts) { int rc = pcmk_rc_ok; if ((api == NULL) || (attempts < 1)) { return EINVAL; } if (api->ipc == NULL) { api->ipc = crm_ipc_new(pcmk_ipc_name(api, false), api->ipc_size_max); if (api->ipc == NULL) { return ENOMEM; } } if (crm_ipc_connected(api->ipc)) { crm_trace("Already connected to %s", pcmk_ipc_name(api, true)); return pcmk_rc_ok; } api->dispatch_type = dispatch_type; crm_debug("Attempting connection to %s (up to %d time%s)", pcmk_ipc_name(api, true), attempts, pcmk__plural_s(attempts)); for (int remaining = attempts - 1; remaining >= 0; --remaining) { switch (dispatch_type) { case pcmk_ipc_dispatch_main: rc = connect_with_main_loop(api); break; case pcmk_ipc_dispatch_sync: case pcmk_ipc_dispatch_poll: rc = connect_without_main_loop(api); break; } if ((remaining == 0) || ((rc != EAGAIN) && (rc != EALREADY))) { break; // Result is final } // Retry after soft error (interrupted by signal, etc.) pcmk__sleep_ms((attempts - remaining) * 500); crm_debug("Re-attempting connection to %s (%d attempt%s remaining)", pcmk_ipc_name(api, true), remaining, pcmk__plural_s(remaining)); } if (rc != pcmk_rc_ok) { return rc; } if ((api->cmds != NULL) && (api->cmds->post_connect != NULL)) { rc = api->cmds->post_connect(api); if (rc != pcmk_rc_ok) { crm_ipc_close(api->ipc); } } return rc; } /*! * \brief Connect to a Pacemaker daemon via IPC * * \param[in,out] api IPC API instance * \param[in] dispatch_type How IPC replies should be dispatched * * \return Standard Pacemaker return code */ int pcmk_connect_ipc(pcmk_ipc_api_t *api, enum pcmk_ipc_dispatch dispatch_type) { int rc = pcmk__connect_ipc(api, dispatch_type, 2); if (rc != pcmk_rc_ok) { crm_err("Connection to %s failed: %s", pcmk_ipc_name(api, true), pcmk_rc_str(rc)); } return rc; } /*! * \brief Disconnect an IPC API instance * * \param[in,out] api IPC API connection * * \return Standard Pacemaker return code * * \note If the connection is attached to a main loop, this function should be * called before quitting the main loop, to ensure that all memory is * freed. */ void pcmk_disconnect_ipc(pcmk_ipc_api_t *api) { if ((api == NULL) || (api->ipc == NULL)) { return; } switch (api->dispatch_type) { case pcmk_ipc_dispatch_main: { mainloop_io_t *mainloop_io = api->mainloop_io; // Make sure no code with access to api can use these again api->mainloop_io = NULL; api->ipc = NULL; mainloop_del_ipc_client(mainloop_io); // After this point api might have already been freed } break; case pcmk_ipc_dispatch_poll: case pcmk_ipc_dispatch_sync: { crm_ipc_t *ipc = api->ipc; // Make sure no code with access to api can use ipc again api->ipc = NULL; // This should always be the case already, but to be safe api->free_on_disconnect = false; crm_ipc_close(ipc); crm_ipc_destroy(ipc); ipc_post_disconnect(api); } break; } } /*! * \brief Register a callback for IPC API events * * \param[in,out] api IPC API connection * \param[in] callback Callback to register * \param[in] userdata Caller data to pass to callback * * \note This function may be called multiple times to update the callback * and/or user data. The caller remains responsible for freeing * userdata in any case (after the IPC is disconnected, if the * user data is still registered with the IPC). */ void pcmk_register_ipc_callback(pcmk_ipc_api_t *api, pcmk_ipc_callback_t cb, void *user_data) { if (api == NULL) { return; } api->cb = cb; api->user_data = user_data; } /*! * \internal * \brief Send an XML request across an IPC API connection * * \param[in,out] api IPC API connection * \param[in] request XML request to send * * \return Standard Pacemaker return code * * \note Daemon-specific IPC API functions should call this function to send * requests, because it handles different dispatch types appropriately. */ int pcmk__send_ipc_request(pcmk_ipc_api_t *api, const xmlNode *request) { int rc; xmlNode *reply = NULL; enum crm_ipc_flags flags = crm_ipc_flags_none; if ((api == NULL) || (api->ipc == NULL) || (request == NULL)) { return EINVAL; } crm_log_xml_trace(request, "ipc-sent"); // Synchronous dispatch requires waiting for a reply if ((api->dispatch_type == pcmk_ipc_dispatch_sync) && (api->cmds != NULL) && (api->cmds->reply_expected != NULL) && (api->cmds->reply_expected(api, request))) { flags = crm_ipc_client_response; } /* The 0 here means a default timeout of 5 seconds * * @TODO Maybe add a timeout_ms member to pcmk_ipc_api_t and a * pcmk_set_ipc_timeout() setter for it, then use it here. */ rc = crm_ipc_send(api->ipc, request, flags, 0, &reply); if (rc < 0) { return pcmk_legacy2rc(rc); } else if (rc == 0) { return ENODATA; } // With synchronous dispatch, we dispatch any reply now if (reply != NULL) { bool more = call_api_dispatch(api, reply); pcmk__xml_free(reply); while (more) { rc = crm_ipc_read(api->ipc); if (rc == -EAGAIN) { continue; } else if (rc == -ENOMSG || rc == pcmk_ok) { return pcmk_rc_ok; } else if (rc < 0) { return -rc; } rc = dispatch_ipc_data(crm_ipc_buffer(api->ipc), api); if (rc == pcmk_rc_ok) { more = false; } else if (rc == EINPROGRESS) { more = true; } else { continue; } } } return pcmk_rc_ok; } /*! * \internal * \brief Create the XML for an IPC request to purge a node from the peer cache * * \param[in] api IPC API connection * \param[in] node_name If not NULL, name of node to purge * \param[in] nodeid If not 0, node ID of node to purge * * \return Newly allocated IPC request XML * * \note The controller, fencer, and pacemakerd use the same request syntax, but * the attribute manager uses a different one. The CIB manager doesn't * have any syntax for it. The executor and scheduler don't connect to the * cluster layer and thus don't have or need any syntax for it. * * \todo Modify the attribute manager to accept the common syntax (as well * as its current one, for compatibility with older clients). Modify * the CIB manager to accept and honor the common syntax. Modify the * executor and scheduler to accept the syntax (immediately returning * success), just for consistency. Modify this function to use the * common syntax with all daemons if their version supports it. */ static xmlNode * create_purge_node_request(const pcmk_ipc_api_t *api, const char *node_name, uint32_t nodeid) { xmlNode *request = NULL; const char *client = crm_system_name? crm_system_name : "client"; switch (api->server) { case pcmk_ipc_attrd: request = pcmk__xe_create(NULL, __func__); crm_xml_add(request, PCMK__XA_T, PCMK__VALUE_ATTRD); crm_xml_add(request, PCMK__XA_SRC, crm_system_name); crm_xml_add(request, PCMK_XA_TASK, PCMK__ATTRD_CMD_PEER_REMOVE); pcmk__xe_set_bool_attr(request, PCMK__XA_REAP, true); crm_xml_add(request, PCMK__XA_ATTR_HOST, node_name); if (nodeid > 0) { - crm_xml_add_int(request, PCMK__XA_ATTR_HOST_ID, nodeid); + pcmk__xe_set_int(request, PCMK__XA_ATTR_HOST_ID, nodeid); } break; case pcmk_ipc_controld: case pcmk_ipc_fenced: case pcmk_ipc_pacemakerd: request = pcmk__new_request(api->server, client, NULL, pcmk_ipc_name(api, false), CRM_OP_RM_NODE_CACHE, NULL); if (nodeid > 0) { pcmk__xe_set_ll(request, PCMK_XA_ID, (long long) nodeid); } crm_xml_add(request, PCMK_XA_UNAME, node_name); break; case pcmk_ipc_based: case pcmk_ipc_execd: case pcmk_ipc_schedulerd: break; default: // pcmk_ipc_unknown (shouldn't be possible) return NULL; } return request; } /*! * \brief Ask a Pacemaker daemon to purge a node from its peer cache * * \param[in,out] api IPC API connection * \param[in] node_name If not NULL, name of node to purge * \param[in] nodeid If not 0, node ID of node to purge * * \return Standard Pacemaker return code * * \note At least one of node_name or nodeid must be specified. */ int pcmk_ipc_purge_node(pcmk_ipc_api_t *api, const char *node_name, uint32_t nodeid) { int rc = 0; xmlNode *request = NULL; if (api == NULL) { return EINVAL; } if ((node_name == NULL) && (nodeid == 0)) { return EINVAL; } request = create_purge_node_request(api, node_name, nodeid); if (request == NULL) { return EOPNOTSUPP; } rc = pcmk__send_ipc_request(api, request); pcmk__xml_free(request); crm_debug("%s peer cache purge of node %s[%lu]: rc=%d", pcmk_ipc_name(api, true), node_name, (unsigned long) nodeid, rc); return rc; } /* * Generic IPC API (to eventually be deprecated as public API and made internal) */ struct crm_ipc_s { struct pollfd pfd; unsigned int max_buf_size; // maximum bytes we can send or receive over IPC unsigned int buf_size; // size of allocated buffer int msg_size; int need_reply; char *buffer; char *server_name; // server IPC name being connected to qb_ipcc_connection_t *ipc; }; /*! * \brief Create a new (legacy) object for using Pacemaker daemon IPC * * \param[in] name IPC system name to connect to * \param[in] max_size Use a maximum IPC buffer size of at least this size * * \return Newly allocated IPC object on success, NULL otherwise * * \note The caller is responsible for freeing the result using * crm_ipc_destroy(). * \note This should be considered deprecated for use with daemons supported by * pcmk_new_ipc_api(). */ crm_ipc_t * crm_ipc_new(const char *name, size_t max_size) { crm_ipc_t *client = NULL; client = calloc(1, sizeof(crm_ipc_t)); if (client == NULL) { crm_err("Could not create IPC connection: %s", strerror(errno)); return NULL; } client->server_name = strdup(name); if (client->server_name == NULL) { crm_err("Could not create %s IPC connection: %s", name, strerror(errno)); free(client); return NULL; } client->buf_size = pcmk__ipc_buffer_size(max_size); client->buffer = malloc(client->buf_size); if (client->buffer == NULL) { crm_err("Could not create %s IPC connection: %s", name, strerror(errno)); free(client->server_name); free(client); return NULL; } /* Clients initiating connection pick the max buf size */ client->max_buf_size = client->buf_size; client->pfd.fd = -1; client->pfd.events = POLLIN; client->pfd.revents = 0; return client; } /*! * \internal * \brief Connect a generic (not daemon-specific) IPC object * * \param[in,out] ipc Generic IPC object to connect * * \return Standard Pacemaker return code */ int pcmk__connect_generic_ipc(crm_ipc_t *ipc) { uid_t cl_uid = 0; gid_t cl_gid = 0; pid_t found_pid = 0; uid_t found_uid = 0; gid_t found_gid = 0; int rc = pcmk_rc_ok; if (ipc == NULL) { return EINVAL; } ipc->need_reply = FALSE; ipc->ipc = qb_ipcc_connect(ipc->server_name, ipc->buf_size); if (ipc->ipc == NULL) { return errno; } rc = qb_ipcc_fd_get(ipc->ipc, &ipc->pfd.fd); if (rc < 0) { // -errno crm_ipc_close(ipc); return -rc; } rc = pcmk_daemon_user(&cl_uid, &cl_gid); rc = pcmk_legacy2rc(rc); if (rc != pcmk_rc_ok) { crm_ipc_close(ipc); return rc; } rc = is_ipc_provider_expected(ipc->ipc, ipc->pfd.fd, cl_uid, cl_gid, &found_pid, &found_uid, &found_gid); if (rc != pcmk_rc_ok) { if (rc == pcmk_rc_ipc_unauthorized) { crm_info("%s IPC provider authentication failed: process %lld has " "uid %lld (expected %lld) and gid %lld (expected %lld)", ipc->server_name, (long long) PCMK__SPECIAL_PID_AS_0(found_pid), (long long) found_uid, (long long) cl_uid, (long long) found_gid, (long long) cl_gid); } crm_ipc_close(ipc); return rc; } ipc->max_buf_size = qb_ipcc_get_buffer_size(ipc->ipc); if (ipc->max_buf_size > ipc->buf_size) { free(ipc->buffer); ipc->buffer = calloc(ipc->max_buf_size, sizeof(char)); if (ipc->buffer == NULL) { rc = errno; crm_ipc_close(ipc); return rc; } ipc->buf_size = ipc->max_buf_size; } return pcmk_rc_ok; } void crm_ipc_close(crm_ipc_t * client) { if (client) { if (client->ipc) { qb_ipcc_connection_t *ipc = client->ipc; client->ipc = NULL; qb_ipcc_disconnect(ipc); } } } void crm_ipc_destroy(crm_ipc_t * client) { if (client) { if (client->ipc && qb_ipcc_is_connected(client->ipc)) { crm_notice("Destroying active %s IPC connection", client->server_name); /* The next line is basically unsafe * * If this connection was attached to mainloop and mainloop is active, * the 'disconnected' callback will end up back here and we'll end * up free'ing the memory twice - something that can still happen * even without this if we destroy a connection and it closes before * we call exit */ /* crm_ipc_close(client); */ } else { crm_trace("Destroying inactive %s IPC connection", client->server_name); } free(client->buffer); free(client->server_name); free(client); } } /*! * \internal * \brief Get the file descriptor for a generic IPC object * * \param[in,out] ipc Generic IPC object to get file descriptor for * \param[out] fd Where to store file descriptor * * \return Standard Pacemaker return code */ int pcmk__ipc_fd(crm_ipc_t *ipc, int *fd) { if ((ipc == NULL) || (fd == NULL)) { return EINVAL; } if ((ipc->ipc == NULL) || (ipc->pfd.fd < 0)) { return ENOTCONN; } *fd = ipc->pfd.fd; return pcmk_rc_ok; } int crm_ipc_get_fd(crm_ipc_t * client) { int fd = -1; if (pcmk__ipc_fd(client, &fd) != pcmk_rc_ok) { crm_err("Could not obtain file descriptor for %s IPC", ((client == NULL)? "unspecified" : client->server_name)); errno = EINVAL; return -EINVAL; } return fd; } bool crm_ipc_connected(crm_ipc_t * client) { bool rc = FALSE; if (client == NULL) { crm_trace("No client"); return FALSE; } else if (client->ipc == NULL) { crm_trace("No connection"); return FALSE; } else if (client->pfd.fd < 0) { crm_trace("Bad descriptor"); return FALSE; } rc = qb_ipcc_is_connected(client->ipc); if (rc == FALSE) { client->pfd.fd = -EINVAL; } return rc; } /*! * \brief Check whether an IPC connection is ready to be read * * \param[in,out] client Connection to check * * \return Positive value if ready to be read, 0 if not ready, -errno on error */ int crm_ipc_ready(crm_ipc_t *client) { int rc; pcmk__assert(client != NULL); if (!crm_ipc_connected(client)) { return -ENOTCONN; } client->pfd.revents = 0; rc = poll(&(client->pfd), 1, 0); return (rc < 0)? -errno : rc; } // \return Standard Pacemaker return code static int crm_ipc_decompress(crm_ipc_t * client) { pcmk__ipc_header_t *header = (pcmk__ipc_header_t *)(void*)client->buffer; if (header->size_compressed) { int rc = 0; unsigned int size_u = 1 + header->size_uncompressed; /* never let buf size fall below our max size required for ipc reads. */ unsigned int new_buf_size = QB_MAX((sizeof(pcmk__ipc_header_t) + size_u), client->max_buf_size); char *uncompressed = pcmk__assert_alloc(1, new_buf_size); crm_trace("Decompressing message data %u bytes into %u bytes", header->size_compressed, size_u); rc = BZ2_bzBuffToBuffDecompress(uncompressed + sizeof(pcmk__ipc_header_t), &size_u, client->buffer + sizeof(pcmk__ipc_header_t), header->size_compressed, 1, 0); rc = pcmk__bzlib2rc(rc); if (rc != pcmk_rc_ok) { crm_err("Decompression failed: %s " QB_XS " rc=%d", pcmk_rc_str(rc), rc); free(uncompressed); return rc; } pcmk__assert(size_u == header->size_uncompressed); memcpy(uncompressed, client->buffer, sizeof(pcmk__ipc_header_t)); /* Preserve the header */ header = (pcmk__ipc_header_t *)(void*)uncompressed; free(client->buffer); client->buf_size = new_buf_size; client->buffer = uncompressed; } pcmk__assert(client->buffer[sizeof(pcmk__ipc_header_t) + header->size_uncompressed - 1] == 0); return pcmk_rc_ok; } long crm_ipc_read(crm_ipc_t * client) { pcmk__ipc_header_t *header = NULL; pcmk__assert((client != NULL) && (client->ipc != NULL) && (client->buffer != NULL)); client->buffer[0] = 0; client->msg_size = qb_ipcc_event_recv(client->ipc, client->buffer, client->buf_size, 0); if (client->msg_size >= 0) { int rc = crm_ipc_decompress(client); if (rc != pcmk_rc_ok) { return pcmk_rc2legacy(rc); } header = (pcmk__ipc_header_t *)(void*)client->buffer; if (!pcmk__valid_ipc_header(header)) { return -EBADMSG; } crm_trace("Received %s IPC event %d size=%u rc=%d text='%.100s'", client->server_name, header->qb.id, header->qb.size, client->msg_size, client->buffer + sizeof(pcmk__ipc_header_t)); } else { crm_trace("No message received from %s IPC: %s", client->server_name, pcmk_strerror(client->msg_size)); if (client->msg_size == -EAGAIN) { return -EAGAIN; } } if (!crm_ipc_connected(client) || client->msg_size == -ENOTCONN) { crm_err("Connection to %s IPC failed", client->server_name); } if (header) { /* Data excluding the header */ return header->size_uncompressed; } return -ENOMSG; } const char * crm_ipc_buffer(crm_ipc_t * client) { pcmk__assert(client != NULL); return client->buffer + sizeof(pcmk__ipc_header_t); } uint32_t crm_ipc_buffer_flags(crm_ipc_t * client) { pcmk__ipc_header_t *header = NULL; pcmk__assert(client != NULL); if (client->buffer == NULL) { return 0; } header = (pcmk__ipc_header_t *)(void*)client->buffer; return header->flags; } const char * crm_ipc_name(crm_ipc_t * client) { pcmk__assert(client != NULL); return client->server_name; } // \return Standard Pacemaker return code static int internal_ipc_get_reply(crm_ipc_t *client, int request_id, int ms_timeout, ssize_t *bytes, xmlNode **reply) { pcmk__ipc_header_t *hdr = NULL; time_t timeout = 0; int32_t qb_timeout = -1; int rc = pcmk_rc_ok; if (ms_timeout > 0) { timeout = time(NULL) + 1 + pcmk__timeout_ms2s(ms_timeout); qb_timeout = 1000; } /* get the reply */ crm_trace("Expecting reply to %s IPC message %d", client->server_name, request_id); do { xmlNode *xml = NULL; *bytes = qb_ipcc_recv(client->ipc, client->buffer, client->buf_size, qb_timeout); if (*bytes <= 0) { if (!crm_ipc_connected(client)) { crm_err("%s IPC provider disconnected while waiting for message %d", client->server_name, request_id); break; } continue; } rc = crm_ipc_decompress(client); if (rc != pcmk_rc_ok) { return rc; } hdr = (pcmk__ipc_header_t *)(void*) client->buffer; if (hdr->qb.id == request_id) { /* Got the reply we were expecting. */ break; } xml = pcmk__xml_parse(crm_ipc_buffer(client)); if (hdr->qb.id < request_id) { crm_err("Discarding old reply %d (need %d)", hdr->qb.id, request_id); crm_log_xml_notice(xml, "OldIpcReply"); } else if (hdr->qb.id > request_id) { crm_err("Discarding newer reply %d (need %d)", hdr->qb.id, request_id); crm_log_xml_notice(xml, "ImpossibleReply"); pcmk__assert(hdr->qb.id <= request_id); } } while (time(NULL) < timeout || (timeout == 0 && *bytes == -EAGAIN)); if (*bytes > 0) { crm_trace("Received %zd-byte reply %" PRId32 " to %s IPC %d: %.100s", *bytes, hdr->qb.id, client->server_name, request_id, crm_ipc_buffer(client)); if (reply != NULL) { *reply = pcmk__xml_parse(crm_ipc_buffer(client)); } } else if (*bytes < 0) { rc = (int) -*bytes; // System errno crm_trace("No reply to %s IPC %d: %s " QB_XS " rc=%d", client->server_name, request_id, pcmk_rc_str(rc), rc); } /* If bytes == 0, we'll return that to crm_ipc_send which will interpret * that as pcmk_rc_ok, log that the IPC request failed (since we did not * give it a valid reply), and return that 0 to its callers. It's up to * the callers to take appropriate action after that. */ return rc; } /*! * \brief Send an IPC XML message * * \param[in,out] client Connection to IPC server * \param[in] message XML message to send * \param[in] flags Bitmask of crm_ipc_flags * \param[in] ms_timeout Give up if not sent within this much time * (5 seconds if 0, or no timeout if negative) * \param[out] reply Reply from server (or NULL if none) * * \return Negative errno on error, otherwise size of reply received in bytes * if reply was needed, otherwise number of bytes sent */ int crm_ipc_send(crm_ipc_t *client, const xmlNode *message, enum crm_ipc_flags flags, int32_t ms_timeout, xmlNode **reply) { int rc = 0; time_t timeout = 0; ssize_t qb_rc = 0; ssize_t bytes = 0; struct iovec *iov; static uint32_t id = 0; static int factor = 8; pcmk__ipc_header_t *header; if (client == NULL) { crm_notice("Can't send IPC request without connection (bug?): %.100s", message); return -ENOTCONN; } else if (!crm_ipc_connected(client)) { /* Don't even bother */ crm_notice("Can't send %s IPC requests: Connection closed", client->server_name); return -ENOTCONN; } if (ms_timeout == 0) { ms_timeout = 5000; } if (client->need_reply) { qb_rc = qb_ipcc_recv(client->ipc, client->buffer, client->buf_size, ms_timeout); if (qb_rc < 0) { crm_warn("Sending %s IPC disabled until pending reply received", client->server_name); return -EALREADY; } else { crm_notice("Sending %s IPC re-enabled after pending reply received", client->server_name); client->need_reply = FALSE; } } id++; CRM_LOG_ASSERT(id != 0); /* Crude wrap-around detection */ rc = pcmk__ipc_prepare_iov(id, message, client->max_buf_size, &iov, &bytes); if (rc != pcmk_rc_ok) { crm_warn("Couldn't prepare %s IPC request: %s " QB_XS " rc=%d", client->server_name, pcmk_rc_str(rc), rc); return pcmk_rc2legacy(rc); } header = iov[0].iov_base; pcmk__set_ipc_flags(header->flags, client->server_name, flags); if (pcmk_is_set(flags, crm_ipc_proxied)) { /* Don't look for a synchronous response */ pcmk__clear_ipc_flags(flags, "client", crm_ipc_client_response); } if(header->size_compressed) { if(factor < 10 && (client->max_buf_size / 10) < (bytes / factor)) { crm_notice("Compressed message exceeds %d0%% of configured IPC " "limit (%u bytes); consider setting PCMK_ipc_buffer to " "%u or higher", factor, client->max_buf_size, 2 * client->max_buf_size); factor++; } } crm_trace("Sending %s IPC request %d of %u bytes using %dms timeout", client->server_name, header->qb.id, header->qb.size, ms_timeout); /* Send the IPC request, respecting any timeout we were passed */ if (ms_timeout > 0) { timeout = time(NULL) + 1 + pcmk__timeout_ms2s(ms_timeout); } do { qb_rc = qb_ipcc_sendv(client->ipc, iov, 2); } while ((qb_rc == -EAGAIN) && ((timeout == 0) || (time(NULL) < timeout))); rc = (int) qb_rc; // Negative of system errno, or bytes sent if (qb_rc <= 0) { goto send_cleanup; } /* If we should not wait for a response, bail now */ if (!pcmk_is_set(flags, crm_ipc_client_response)) { crm_trace("Not waiting for reply to %s IPC request %d", client->server_name, header->qb.id); goto send_cleanup; } rc = internal_ipc_get_reply(client, header->qb.id, ms_timeout, &bytes, reply); if (rc == pcmk_rc_ok) { rc = (int) bytes; // Size of reply received } else { /* rc is either a positive system errno or a negative standard Pacemaker * return code. If it's an errno, we need to convert it back to a * negative number for comparison and return at the end of this function. */ rc = pcmk_rc2legacy(rc); if (ms_timeout > 0) { /* We didn't get the reply in time, so disable future sends for now. * The only alternative would be to close the connection since we * don't know how to detect and discard out-of-sequence replies. * * @TODO Implement out-of-sequence detection */ client->need_reply = TRUE; } } send_cleanup: if (!crm_ipc_connected(client)) { crm_notice("Couldn't send %s IPC request %d: Connection closed " QB_XS " rc=%d", client->server_name, header->qb.id, rc); } else if (rc == -ETIMEDOUT) { crm_warn("%s IPC request %d failed: %s after %dms " QB_XS " rc=%d", client->server_name, header->qb.id, pcmk_strerror(rc), ms_timeout, rc); crm_write_blackbox(0, NULL); } else if (rc <= 0) { crm_warn("%s IPC request %d failed: %s " QB_XS " rc=%d", client->server_name, header->qb.id, ((rc == 0)? "No bytes sent" : pcmk_strerror(rc)), rc); } pcmk_free_ipc_event(iov); return rc; } /*! * \brief Ensure an IPC provider has expected user or group * * \param[in] qb_ipc libqb client connection if available * \param[in] sock Connected Unix socket for IPC * \param[in] refuid Expected user ID * \param[in] refgid Expected group ID * \param[out] gotpid If not NULL, where to store provider's actual process ID * (or 1 on platforms where ID is not available) * \param[out] gotuid If not NULL, where to store provider's actual user ID * \param[out] gotgid If not NULL, where to store provider's actual group ID * * \return Standard Pacemaker return code * \note An actual user ID of 0 (root) will always be considered authorized, * regardless of the expected values provided. The caller can use the * output arguments to be stricter than this function. */ static int is_ipc_provider_expected(qb_ipcc_connection_t *qb_ipc, int sock, uid_t refuid, gid_t refgid, pid_t *gotpid, uid_t *gotuid, gid_t *gotgid) { int rc = EOPNOTSUPP; pid_t found_pid = 0; uid_t found_uid = 0; gid_t found_gid = 0; #ifdef HAVE_QB_IPCC_AUTH_GET if (qb_ipc != NULL) { rc = qb_ipcc_auth_get(qb_ipc, &found_pid, &found_uid, &found_gid); rc = -rc; // libqb returns 0 or -errno if (rc == pcmk_rc_ok) { goto found; } } #endif #ifdef HAVE_UCRED { struct ucred ucred; socklen_t ucred_len = sizeof(ucred); if (getsockopt(sock, SOL_SOCKET, SO_PEERCRED, &ucred, &ucred_len) < 0) { rc = errno; } else if (ucred_len != sizeof(ucred)) { rc = EOPNOTSUPP; } else { found_pid = ucred.pid; found_uid = ucred.uid; found_gid = ucred.gid; goto found; } } #endif #ifdef HAVE_SOCKPEERCRED { struct sockpeercred sockpeercred; socklen_t sockpeercred_len = sizeof(sockpeercred); if (getsockopt(sock, SOL_SOCKET, SO_PEERCRED, &sockpeercred, &sockpeercred_len) < 0) { rc = errno; } else if (sockpeercred_len != sizeof(sockpeercred)) { rc = EOPNOTSUPP; } else { found_pid = sockpeercred.pid; found_uid = sockpeercred.uid; found_gid = sockpeercred.gid; goto found; } } #endif #ifdef HAVE_GETPEEREID // For example, FreeBSD if (getpeereid(sock, &found_uid, &found_gid) < 0) { rc = errno; } else { found_pid = PCMK__SPECIAL_PID; goto found; } #endif #ifdef HAVE_GETPEERUCRED { ucred_t *ucred = NULL; if (getpeerucred(sock, &ucred) < 0) { rc = errno; } else { found_pid = ucred_getpid(ucred); found_uid = ucred_geteuid(ucred); found_gid = ucred_getegid(ucred); ucred_free(ucred); goto found; } } #endif return rc; // If we get here, nothing succeeded found: if (gotpid != NULL) { *gotpid = found_pid; } if (gotuid != NULL) { *gotuid = found_uid; } if (gotgid != NULL) { *gotgid = found_gid; } if ((found_uid != 0) && (found_uid != refuid) && (found_gid != refgid)) { return pcmk_rc_ipc_unauthorized; } return pcmk_rc_ok; } int crm_ipc_is_authentic_process(int sock, uid_t refuid, gid_t refgid, pid_t *gotpid, uid_t *gotuid, gid_t *gotgid) { int ret = is_ipc_provider_expected(NULL, sock, refuid, refgid, gotpid, gotuid, gotgid); /* The old function had some very odd return codes*/ if (ret == 0) { return 1; } else if (ret == pcmk_rc_ipc_unauthorized) { return 0; } else { return pcmk_rc2legacy(ret); } } int pcmk__ipc_is_authentic_process_active(const char *name, uid_t refuid, gid_t refgid, pid_t *gotpid) { static char last_asked_name[PATH_MAX / 2] = ""; /* log spam prevention */ int fd; int rc = pcmk_rc_ipc_unresponsive; int auth_rc = 0; int32_t qb_rc; pid_t found_pid = 0; uid_t found_uid = 0; gid_t found_gid = 0; qb_ipcc_connection_t *c; #ifdef HAVE_QB_IPCC_CONNECT_ASYNC struct pollfd pollfd = { 0, }; int poll_rc; c = qb_ipcc_connect_async(name, 0, &(pollfd.fd)); #else c = qb_ipcc_connect(name, 0); #endif if (c == NULL) { crm_info("Could not connect to %s IPC: %s", name, strerror(errno)); rc = pcmk_rc_ipc_unresponsive; goto bail; } #ifdef HAVE_QB_IPCC_CONNECT_ASYNC pollfd.events = POLLIN; do { poll_rc = poll(&pollfd, 1, 2000); } while ((poll_rc == -1) && (errno == EINTR)); /* If poll() failed, given that disconnect function is not registered yet, * qb_ipcc_disconnect() won't clean up the socket. In any case, call * qb_ipcc_connect_continue() here so that it may fail and do the cleanup * for us. */ if (qb_ipcc_connect_continue(c) != 0) { crm_info("Could not connect to %s IPC: %s", name, (poll_rc == 0)?"timeout":strerror(errno)); rc = pcmk_rc_ipc_unresponsive; c = NULL; // qb_ipcc_connect_continue cleaned up for us goto bail; } #endif qb_rc = qb_ipcc_fd_get(c, &fd); if (qb_rc != 0) { rc = (int) -qb_rc; // System errno crm_err("Could not get fd from %s IPC: %s " QB_XS " rc=%d", name, pcmk_rc_str(rc), rc); goto bail; } auth_rc = is_ipc_provider_expected(c, fd, refuid, refgid, &found_pid, &found_uid, &found_gid); if (auth_rc == pcmk_rc_ipc_unauthorized) { crm_err("Daemon (IPC %s) effectively blocked with unauthorized" " process %lld (uid: %lld, gid: %lld)", name, (long long) PCMK__SPECIAL_PID_AS_0(found_pid), (long long) found_uid, (long long) found_gid); rc = pcmk_rc_ipc_unauthorized; goto bail; } if (auth_rc != pcmk_rc_ok) { rc = auth_rc; crm_err("Could not get peer credentials from %s IPC: %s " QB_XS " rc=%d", name, pcmk_rc_str(rc), rc); goto bail; } if (gotpid != NULL) { *gotpid = found_pid; } rc = pcmk_rc_ok; if ((found_uid != refuid || found_gid != refgid) && strncmp(last_asked_name, name, sizeof(last_asked_name))) { if ((found_uid == 0) && (refuid != 0)) { crm_warn("Daemon (IPC %s) runs as root, whereas the expected" " credentials are %lld:%lld, hazard of violating" " the least privilege principle", name, (long long) refuid, (long long) refgid); } else { crm_notice("Daemon (IPC %s) runs as %lld:%lld, whereas the" " expected credentials are %lld:%lld, which may" " mean a different set of privileges than expected", name, (long long) found_uid, (long long) found_gid, (long long) refuid, (long long) refgid); } memccpy(last_asked_name, name, '\0', sizeof(last_asked_name)); } bail: if (c != NULL) { qb_ipcc_disconnect(c); } return rc; } // Deprecated functions kept only for backward API compatibility // LCOV_EXCL_START #include bool crm_ipc_connect(crm_ipc_t *client) { int rc = pcmk__connect_generic_ipc(client); if (rc == pcmk_rc_ok) { return true; } if ((client != NULL) && (client->ipc == NULL)) { errno = (rc > 0)? rc : ENOTCONN; crm_debug("Could not establish %s IPC connection: %s (%d)", client->server_name, pcmk_rc_str(errno), errno); } else if (rc == pcmk_rc_ipc_unauthorized) { crm_err("%s IPC provider authentication failed", (client == NULL)? "Pacemaker" : client->server_name); errno = ECONNABORTED; } else { crm_err("Could not verify authenticity of %s IPC provider", (client == NULL)? "Pacemaker" : client->server_name); errno = ENOTCONN; } return false; } // LCOV_EXCL_STOP // End deprecated API diff --git a/lib/common/ipc_server.c b/lib/common/ipc_server.c index 2a2d791053..69b8b9cb69 100644 --- a/lib/common/ipc_server.c +++ b/lib/common/ipc_server.c @@ -1,1009 +1,1009 @@ /* * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include "crmcommon_private.h" /* Evict clients whose event queue grows this large (by default) */ #define PCMK_IPC_DEFAULT_QUEUE_MAX 500 static GHashTable *client_connections = NULL; /*! * \internal * \brief Count IPC clients * * \return Number of active IPC client connections */ guint pcmk__ipc_client_count(void) { return client_connections? g_hash_table_size(client_connections) : 0; } /*! * \internal * \brief Execute a function for each active IPC client connection * * \param[in] func Function to call * \param[in,out] user_data Pointer to pass to function * * \note The parameters are the same as for g_hash_table_foreach(). */ void pcmk__foreach_ipc_client(GHFunc func, gpointer user_data) { if ((func != NULL) && (client_connections != NULL)) { g_hash_table_foreach(client_connections, func, user_data); } } pcmk__client_t * pcmk__find_client(const qb_ipcs_connection_t *c) { if (client_connections) { return g_hash_table_lookup(client_connections, c); } crm_trace("No client found for %p", c); return NULL; } pcmk__client_t * pcmk__find_client_by_id(const char *id) { if ((client_connections != NULL) && (id != NULL)) { gpointer key; pcmk__client_t *client = NULL; GHashTableIter iter; g_hash_table_iter_init(&iter, client_connections); while (g_hash_table_iter_next(&iter, &key, (gpointer *) & client)) { if (strcmp(client->id, id) == 0) { return client; } } } crm_trace("No client found with id='%s'", pcmk__s(id, "")); return NULL; } /*! * \internal * \brief Get a client identifier for use in log messages * * \param[in] c Client * * \return Client's name, client's ID, or a string literal, as available * \note This is intended to be used in format strings like "client %s". */ const char * pcmk__client_name(const pcmk__client_t *c) { if (c == NULL) { return "(unspecified)"; } else if (c->name != NULL) { return c->name; } else if (c->id != NULL) { return c->id; } else { return "(unidentified)"; } } void pcmk__client_cleanup(void) { if (client_connections != NULL) { int active = g_hash_table_size(client_connections); if (active > 0) { crm_warn("Exiting with %d active IPC client%s", active, pcmk__plural_s(active)); } g_hash_table_destroy(client_connections); client_connections = NULL; } } void pcmk__drop_all_clients(qb_ipcs_service_t *service) { qb_ipcs_connection_t *c = NULL; if (service == NULL) { return; } c = qb_ipcs_connection_first_get(service); while (c != NULL) { qb_ipcs_connection_t *last = c; c = qb_ipcs_connection_next_get(service, last); /* There really shouldn't be anyone connected at this point */ crm_notice("Disconnecting client %p, pid=%d...", last, pcmk__client_pid(last)); qb_ipcs_disconnect(last); qb_ipcs_connection_unref(last); } } /*! * \internal * \brief Allocate a new pcmk__client_t object based on an IPC connection * * \param[in] c IPC connection (NULL to allocate generic client) * \param[in] key Connection table key (NULL to use sane default) * \param[in] uid_client UID corresponding to c (ignored if c is NULL) * * \return Pointer to new pcmk__client_t (guaranteed not to be \c NULL) */ static pcmk__client_t * client_from_connection(qb_ipcs_connection_t *c, void *key, uid_t uid_client) { pcmk__client_t *client = pcmk__assert_alloc(1, sizeof(pcmk__client_t)); if (c) { client->user = pcmk__uid2username(uid_client); if (client->user == NULL) { client->user = pcmk__str_copy("#unprivileged"); crm_err("Unable to enforce ACLs for user ID %d, assuming unprivileged", uid_client); } client->ipcs = c; pcmk__set_client_flags(client, pcmk__client_ipc); client->pid = pcmk__client_pid(c); if (key == NULL) { key = c; } } client->id = crm_generate_uuid(); if (key == NULL) { key = client->id; } if (client_connections == NULL) { crm_trace("Creating IPC client table"); client_connections = g_hash_table_new(g_direct_hash, g_direct_equal); } g_hash_table_insert(client_connections, key, client); return client; } /*! * \brief Allocate a new pcmk__client_t object and generate its ID * * \param[in] key What to use as connections hash table key (NULL to use ID) * * \return Pointer to new pcmk__client_t (asserts on failure) */ pcmk__client_t * pcmk__new_unauth_client(void *key) { return client_from_connection(NULL, key, 0); } pcmk__client_t * pcmk__new_client(qb_ipcs_connection_t *c, uid_t uid_client, gid_t gid_client) { gid_t uid_cluster = 0; gid_t gid_cluster = 0; pcmk__client_t *client = NULL; CRM_CHECK(c != NULL, return NULL); if (pcmk_daemon_user(&uid_cluster, &gid_cluster) < 0) { static bool need_log = TRUE; if (need_log) { crm_warn("Could not find user and group IDs for user %s", CRM_DAEMON_USER); need_log = FALSE; } } if (uid_client != 0) { crm_trace("Giving group %u access to new IPC connection", gid_cluster); /* Passing -1 to chown(2) means don't change */ qb_ipcs_connection_auth_set(c, -1, gid_cluster, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP); } /* TODO: Do our own auth checking, return NULL if unauthorized */ client = client_from_connection(c, NULL, uid_client); if ((uid_client == 0) || (uid_client == uid_cluster)) { /* Remember when a connection came from root or hacluster */ pcmk__set_client_flags(client, pcmk__client_privileged); } crm_debug("New IPC client %s for PID %u with uid %d and gid %d", client->id, client->pid, uid_client, gid_client); return client; } static struct iovec * pcmk__new_ipc_event(void) { return (struct iovec *) pcmk__assert_alloc(2, sizeof(struct iovec)); } /*! * \brief Free an I/O vector created by pcmk__ipc_prepare_iov() * * \param[in,out] event I/O vector to free */ void pcmk_free_ipc_event(struct iovec *event) { if (event != NULL) { free(event[0].iov_base); free(event[1].iov_base); free(event); } } static void free_event(gpointer data) { pcmk_free_ipc_event((struct iovec *) data); } static void add_event(pcmk__client_t *c, struct iovec *iov) { if (c->event_queue == NULL) { c->event_queue = g_queue_new(); } g_queue_push_tail(c->event_queue, iov); } void pcmk__free_client(pcmk__client_t *c) { if (c == NULL) { return; } if (client_connections) { if (c->ipcs) { crm_trace("Destroying %p/%p (%d remaining)", c, c->ipcs, g_hash_table_size(client_connections) - 1); g_hash_table_remove(client_connections, c->ipcs); } else { crm_trace("Destroying remote connection %p (%d remaining)", c, g_hash_table_size(client_connections) - 1); g_hash_table_remove(client_connections, c->id); } } if (c->event_timer) { g_source_remove(c->event_timer); } if (c->event_queue) { crm_debug("Destroying %d events", g_queue_get_length(c->event_queue)); g_queue_free_full(c->event_queue, free_event); } free(c->id); free(c->name); free(c->user); if (c->remote) { if (c->remote->auth_timeout) { g_source_remove(c->remote->auth_timeout); } if (c->remote->tls_session != NULL) { /* @TODO Reduce duplication at callers. Put here everything * necessary to tear down and free tls_session. */ gnutls_deinit(c->remote->tls_session); } free(c->remote->buffer); free(c->remote); } free(c); } /*! * \internal * \brief Raise IPC eviction threshold for a client, if allowed * * \param[in,out] client Client to modify * \param[in] qmax New threshold */ void pcmk__set_client_queue_max(pcmk__client_t *client, const char *qmax) { int rc = pcmk_rc_ok; long long qmax_ll = 0LL; unsigned int orig_value = 0U; CRM_CHECK(client != NULL, return); orig_value = client->queue_max; if (pcmk_is_set(client->flags, pcmk__client_privileged)) { rc = pcmk__scan_ll(qmax, &qmax_ll, 0LL); if (rc == pcmk_rc_ok) { if ((qmax_ll <= 0LL) || (qmax_ll > UINT_MAX)) { rc = ERANGE; } else { client->queue_max = (unsigned int) qmax_ll; } } } else { rc = EACCES; } if (rc != pcmk_rc_ok) { crm_info("Could not set IPC threshold for client %s[%u] to %s: %s", pcmk__client_name(client), client->pid, pcmk__s(qmax, "default"), pcmk_rc_str(rc)); } else if (client->queue_max != orig_value) { crm_debug("IPC threshold for client %s[%u] is now %u (was %u)", pcmk__client_name(client), client->pid, client->queue_max, orig_value); } } int pcmk__client_pid(qb_ipcs_connection_t *c) { struct qb_ipcs_connection_stats stats; stats.client_pid = 0; qb_ipcs_connection_stats_get(c, &stats, 0); return stats.client_pid; } /*! * \internal * \brief Retrieve message XML from data read from client IPC * * \param[in,out] c IPC client connection * \param[in] data Data read from client connection * \param[out] id Where to store message ID from libqb header * \param[out] flags Where to store flags from libqb header * * \return Message XML on success, NULL otherwise */ xmlNode * pcmk__client_data2xml(pcmk__client_t *c, void *data, uint32_t *id, uint32_t *flags) { xmlNode *xml = NULL; char *uncompressed = NULL; char *text = ((char *)data) + sizeof(pcmk__ipc_header_t); pcmk__ipc_header_t *header = data; if (!pcmk__valid_ipc_header(header)) { return NULL; } if (id) { *id = ((struct qb_ipc_response_header *)data)->id; } if (flags) { *flags = header->flags; } if (pcmk_is_set(header->flags, crm_ipc_proxied)) { /* Mark this client as being the endpoint of a proxy connection. * Proxy connections responses are sent on the event channel, to avoid * blocking the controller serving as proxy. */ pcmk__set_client_flags(c, pcmk__client_proxied); } if (header->size_compressed) { int rc = 0; unsigned int size_u = 1 + header->size_uncompressed; uncompressed = pcmk__assert_alloc(1, size_u); crm_trace("Decompressing message data %u bytes into %u bytes", header->size_compressed, size_u); rc = BZ2_bzBuffToBuffDecompress(uncompressed, &size_u, text, header->size_compressed, 1, 0); text = uncompressed; rc = pcmk__bzlib2rc(rc); if (rc != pcmk_rc_ok) { crm_err("Decompression failed: %s " QB_XS " rc=%d", pcmk_rc_str(rc), rc); free(uncompressed); return NULL; } } pcmk__assert(text[header->size_uncompressed - 1] == 0); xml = pcmk__xml_parse(text); crm_log_xml_trace(xml, "[IPC received]"); free(uncompressed); return xml; } static int crm_ipcs_flush_events(pcmk__client_t *c); static gboolean crm_ipcs_flush_events_cb(gpointer data) { pcmk__client_t *c = data; c->event_timer = 0; crm_ipcs_flush_events(c); return FALSE; } /*! * \internal * \brief Add progressive delay before next event queue flush * * \param[in,out] c Client connection to add delay to * \param[in] queue_len Current event queue length */ static inline void delay_next_flush(pcmk__client_t *c, unsigned int queue_len) { /* Delay a maximum of 1.5 seconds */ guint delay = (queue_len < 5)? (1000 + 100 * queue_len) : 1500; c->event_timer = pcmk__create_timer(delay, crm_ipcs_flush_events_cb, c); } /*! * \internal * \brief Send client any messages in its queue * * \param[in,out] c Client to flush * * \return Standard Pacemaker return value */ static int crm_ipcs_flush_events(pcmk__client_t *c) { int rc = pcmk_rc_ok; ssize_t qb_rc = 0; unsigned int sent = 0; unsigned int queue_len = 0; if (c == NULL) { return rc; } else if (c->event_timer) { /* There is already a timer, wait until it goes off */ crm_trace("Timer active for %p - %d", c->ipcs, c->event_timer); return rc; } if (c->event_queue) { queue_len = g_queue_get_length(c->event_queue); } while (sent < 100) { pcmk__ipc_header_t *header = NULL; struct iovec *event = NULL; if (c->event_queue) { // We don't pop unless send is successful event = g_queue_peek_head(c->event_queue); } if (event == NULL) { // Queue is empty break; } qb_rc = qb_ipcs_event_sendv(c->ipcs, event, 2); if (qb_rc < 0) { rc = (int) -qb_rc; break; } event = g_queue_pop_head(c->event_queue); sent++; header = event[0].iov_base; if (header->size_compressed) { crm_trace("Event %" PRId32 " to %p[%u] (%zd compressed bytes) sent", header->qb.id, c->ipcs, c->pid, qb_rc); } else { crm_trace("Event %" PRId32 " to %p[%u] (%zd bytes) sent: %.120s", header->qb.id, c->ipcs, c->pid, qb_rc, (char *) (event[1].iov_base)); } pcmk_free_ipc_event(event); } queue_len -= sent; if (sent > 0 || queue_len) { crm_trace("Sent %u events (%u remaining) for %p[%d]: %s (%zd)", sent, queue_len, c->ipcs, c->pid, pcmk_rc_str(rc), qb_rc); } if (queue_len) { /* Allow clients to briefly fall behind on processing incoming messages, * but drop completely unresponsive clients so the connection doesn't * consume resources indefinitely. */ if (queue_len > QB_MAX(c->queue_max, PCMK_IPC_DEFAULT_QUEUE_MAX)) { if ((c->queue_backlog <= 1) || (queue_len < c->queue_backlog)) { /* Don't evict for a new or shrinking backlog */ crm_warn("Client with process ID %u has a backlog of %u messages " QB_XS " %p", c->pid, queue_len, c->ipcs); } else { crm_err("Evicting client with process ID %u due to backlog of %u messages " QB_XS " %p", c->pid, queue_len, c->ipcs); c->queue_backlog = 0; qb_ipcs_disconnect(c->ipcs); return rc; } } c->queue_backlog = queue_len; delay_next_flush(c, queue_len); } else { /* Event queue is empty, there is no backlog */ c->queue_backlog = 0; } return rc; } /*! * \internal * \brief Create an I/O vector for sending an IPC XML message * * \param[in] request Identifier for libqb response header * \param[in] message XML message to send * \param[in] max_send_size If 0, default IPC buffer size is used * \param[out] result Where to store prepared I/O vector * \param[out] bytes Size of prepared data in bytes * * \return Standard Pacemaker return code */ int pcmk__ipc_prepare_iov(uint32_t request, const xmlNode *message, uint32_t max_send_size, struct iovec **result, ssize_t *bytes) { struct iovec *iov; unsigned int total = 0; GString *buffer = NULL; pcmk__ipc_header_t *header = NULL; int rc = pcmk_rc_ok; if ((message == NULL) || (result == NULL)) { rc = EINVAL; goto done; } header = calloc(1, sizeof(pcmk__ipc_header_t)); if (header == NULL) { rc = ENOMEM; goto done; } buffer = g_string_sized_new(1024); pcmk__xml_string(message, 0, buffer, 0); if (max_send_size == 0) { max_send_size = crm_ipc_default_buffer_size(); } CRM_LOG_ASSERT(max_send_size != 0); *result = NULL; iov = pcmk__new_ipc_event(); iov[0].iov_len = sizeof(pcmk__ipc_header_t); iov[0].iov_base = header; header->version = PCMK__IPC_VERSION; header->size_uncompressed = 1 + buffer->len; total = iov[0].iov_len + header->size_uncompressed; if (total < max_send_size) { iov[1].iov_base = pcmk__str_copy(buffer->str); iov[1].iov_len = header->size_uncompressed; } else { static unsigned int biggest = 0; char *compressed = NULL; unsigned int new_size = 0; if (pcmk__compress(buffer->str, (unsigned int) header->size_uncompressed, (unsigned int) max_send_size, &compressed, &new_size) == pcmk_rc_ok) { pcmk__set_ipc_flags(header->flags, "send data", crm_ipc_compressed); header->size_compressed = new_size; iov[1].iov_len = header->size_compressed; iov[1].iov_base = compressed; biggest = QB_MAX(header->size_compressed, biggest); } else { crm_log_xml_trace(message, "EMSGSIZE"); biggest = QB_MAX(header->size_uncompressed, biggest); crm_err("Could not compress %u-byte message into less than IPC " "limit of %u bytes; set PCMK_ipc_buffer to higher value " "(%u bytes suggested)", header->size_uncompressed, max_send_size, 4 * biggest); free(compressed); pcmk_free_ipc_event(iov); rc = EMSGSIZE; goto done; } } header->qb.size = iov[0].iov_len + iov[1].iov_len; header->qb.id = (int32_t)request; /* Replying to a specific request */ *result = iov; pcmk__assert(header->qb.size > 0); if (bytes != NULL) { *bytes = header->qb.size; } done: if (buffer != NULL) { g_string_free(buffer, TRUE); } return rc; } int pcmk__ipc_send_iov(pcmk__client_t *c, struct iovec *iov, uint32_t flags) { int rc = pcmk_rc_ok; static uint32_t id = 1; pcmk__ipc_header_t *header = iov[0].iov_base; if (c->flags & pcmk__client_proxied) { /* _ALL_ replies to proxied connections need to be sent as events */ if (!pcmk_is_set(flags, crm_ipc_server_event)) { /* The proxied flag lets us know this was originally meant to be a * response, even though we're sending it over the event channel. */ pcmk__set_ipc_flags(flags, "server event", crm_ipc_server_event |crm_ipc_proxied_relay_response); } } pcmk__set_ipc_flags(header->flags, "server event", flags); if (flags & crm_ipc_server_event) { header->qb.id = id++; /* We don't really use it, but doesn't hurt to set one */ if (flags & crm_ipc_server_free) { crm_trace("Sending the original to %p[%d]", c->ipcs, c->pid); add_event(c, iov); } else { struct iovec *iov_copy = pcmk__new_ipc_event(); crm_trace("Sending a copy to %p[%d]", c->ipcs, c->pid); iov_copy[0].iov_len = iov[0].iov_len; iov_copy[0].iov_base = malloc(iov[0].iov_len); memcpy(iov_copy[0].iov_base, iov[0].iov_base, iov[0].iov_len); iov_copy[1].iov_len = iov[1].iov_len; iov_copy[1].iov_base = malloc(iov[1].iov_len); memcpy(iov_copy[1].iov_base, iov[1].iov_base, iov[1].iov_len); add_event(c, iov_copy); } } else { ssize_t qb_rc; CRM_LOG_ASSERT(header->qb.id != 0); /* Replying to a specific request */ qb_rc = qb_ipcs_response_sendv(c->ipcs, iov, 2); if (qb_rc < header->qb.size) { if (qb_rc < 0) { rc = (int) -qb_rc; } crm_notice("Response %" PRId32 " to pid %u failed: %s " QB_XS " bytes=%" PRId32 " rc=%zd ipcs=%p", header->qb.id, c->pid, pcmk_rc_str(rc), header->qb.size, qb_rc, c->ipcs); } else { crm_trace("Response %" PRId32 " sent, %zd bytes to %p[%u]", header->qb.id, qb_rc, c->ipcs, c->pid); } if (flags & crm_ipc_server_free) { pcmk_free_ipc_event(iov); } } if (flags & crm_ipc_server_event) { rc = crm_ipcs_flush_events(c); } else { crm_ipcs_flush_events(c); } if ((rc == EPIPE) || (rc == ENOTCONN)) { crm_trace("Client %p disconnected", c->ipcs); } return rc; } int pcmk__ipc_send_xml(pcmk__client_t *c, uint32_t request, const xmlNode *message, uint32_t flags) { struct iovec *iov = NULL; int rc = pcmk_rc_ok; if (c == NULL) { return EINVAL; } rc = pcmk__ipc_prepare_iov(request, message, crm_ipc_default_buffer_size(), &iov, NULL); if (rc == pcmk_rc_ok) { pcmk__set_ipc_flags(flags, "send data", crm_ipc_server_free); rc = pcmk__ipc_send_iov(c, iov, flags); } else { pcmk_free_ipc_event(iov); crm_notice("IPC message to pid %d failed: %s " QB_XS " rc=%d", c->pid, pcmk_rc_str(rc), rc); } return rc; } /*! * \internal * \brief Create an acknowledgement with a status code to send to a client * * \param[in] function Calling function * \param[in] line Source file line within calling function * \param[in] flags IPC flags to use when sending * \param[in] tag Element name to use for acknowledgement * \param[in] ver IPC protocol version (can be NULL) * \param[in] status Exit status code to add to ack * * \return Newly created XML for ack * * \note The caller is responsible for freeing the return value with * \c pcmk__xml_free(). */ xmlNode * pcmk__ipc_create_ack_as(const char *function, int line, uint32_t flags, const char *tag, const char *ver, crm_exit_t status) { xmlNode *ack = NULL; if (pcmk_is_set(flags, crm_ipc_client_response)) { ack = pcmk__xe_create(NULL, tag); crm_xml_add(ack, PCMK_XA_FUNCTION, function); - crm_xml_add_int(ack, PCMK__XA_LINE, line); - crm_xml_add_int(ack, PCMK_XA_STATUS, (int) status); + pcmk__xe_set_int(ack, PCMK__XA_LINE, line); + pcmk__xe_set_int(ack, PCMK_XA_STATUS, (int) status); crm_xml_add(ack, PCMK__XA_IPC_PROTO_VERSION, ver); } return ack; } /*! * \internal * \brief Send an acknowledgement with a status code to a client * * \param[in] function Calling function * \param[in] line Source file line within calling function * \param[in] c Client to send ack to * \param[in] request Request ID being replied to * \param[in] flags IPC flags to use when sending * \param[in] tag Element name to use for acknowledgement * \param[in] ver IPC protocol version (can be NULL) * \param[in] status Status code to send with acknowledgement * * \return Standard Pacemaker return code */ int pcmk__ipc_send_ack_as(const char *function, int line, pcmk__client_t *c, uint32_t request, uint32_t flags, const char *tag, const char *ver, crm_exit_t status) { int rc = pcmk_rc_ok; xmlNode *ack = pcmk__ipc_create_ack_as(function, line, flags, tag, ver, status); if (ack != NULL) { crm_trace("Ack'ing IPC message from client %s as <%s status=%d>", pcmk__client_name(c), tag, status); crm_log_xml_trace(ack, "sent-ack"); c->request_id = 0; rc = pcmk__ipc_send_xml(c, request, ack, flags); pcmk__xml_free(ack); } return rc; } /*! * \internal * \brief Add an IPC server to the main loop for the CIB manager API * * \param[out] ipcs_ro New IPC server for read-only CIB manager API * \param[out] ipcs_rw New IPC server for read/write CIB manager API * \param[out] ipcs_shm New IPC server for shared-memory CIB manager API * \param[in] ro_cb IPC callbacks for read-only API * \param[in] rw_cb IPC callbacks for read/write and shared-memory APIs * * \note This function exits fatally if unable to create the servers. * \note There is no actual difference between the three IPC endpoints other * than their names. */ void pcmk__serve_based_ipc(qb_ipcs_service_t **ipcs_ro, qb_ipcs_service_t **ipcs_rw, qb_ipcs_service_t **ipcs_shm, struct qb_ipcs_service_handlers *ro_cb, struct qb_ipcs_service_handlers *rw_cb) { *ipcs_ro = mainloop_add_ipc_server(PCMK__SERVER_BASED_RO, QB_IPC_NATIVE, ro_cb); *ipcs_rw = mainloop_add_ipc_server(PCMK__SERVER_BASED_RW, QB_IPC_NATIVE, rw_cb); *ipcs_shm = mainloop_add_ipc_server(PCMK__SERVER_BASED_SHM, QB_IPC_SHM, rw_cb); if (*ipcs_ro == NULL || *ipcs_rw == NULL || *ipcs_shm == NULL) { crm_err("Failed to create the CIB manager: exiting and inhibiting respawn"); crm_warn("Verify pacemaker and pacemaker_remote are not both enabled"); crm_exit(CRM_EX_FATAL); } } /*! * \internal * \brief Destroy IPC servers for the CIB manager API * * \param[out] ipcs_ro IPC server for read-only the CIB manager API * \param[out] ipcs_rw IPC server for read/write the CIB manager API * \param[out] ipcs_shm IPC server for shared-memory the CIB manager API * * \note This is a convenience function for calling qb_ipcs_destroy() for each * argument. */ void pcmk__stop_based_ipc(qb_ipcs_service_t *ipcs_ro, qb_ipcs_service_t *ipcs_rw, qb_ipcs_service_t *ipcs_shm) { qb_ipcs_destroy(ipcs_ro); qb_ipcs_destroy(ipcs_rw); qb_ipcs_destroy(ipcs_shm); } /*! * \internal * \brief Add an IPC server to the main loop for the controller API * * \param[in] cb IPC callbacks * * \return Newly created IPC server */ qb_ipcs_service_t * pcmk__serve_controld_ipc(struct qb_ipcs_service_handlers *cb) { return mainloop_add_ipc_server(CRM_SYSTEM_CRMD, QB_IPC_NATIVE, cb); } /*! * \internal * \brief Add an IPC server to the main loop for the attribute manager API * * \param[out] ipcs Where to store newly created IPC server * \param[in] cb IPC callbacks * * \note This function exits fatally if unable to create the servers. */ void pcmk__serve_attrd_ipc(qb_ipcs_service_t **ipcs, struct qb_ipcs_service_handlers *cb) { *ipcs = mainloop_add_ipc_server(PCMK__VALUE_ATTRD, QB_IPC_NATIVE, cb); if (*ipcs == NULL) { crm_crit("Exiting fatally because unable to serve " PCMK__SERVER_ATTRD " IPC (verify pacemaker and pacemaker_remote are not both " "enabled)"); crm_exit(CRM_EX_FATAL); } } /*! * \internal * \brief Add an IPC server to the main loop for the fencer API * * \param[out] ipcs Where to store newly created IPC server * \param[in] cb IPC callbacks * * \note This function exits fatally if unable to create the servers. */ void pcmk__serve_fenced_ipc(qb_ipcs_service_t **ipcs, struct qb_ipcs_service_handlers *cb) { *ipcs = mainloop_add_ipc_server_with_prio("stonith-ng", QB_IPC_NATIVE, cb, QB_LOOP_HIGH); if (*ipcs == NULL) { crm_err("Failed to create fencer: exiting and inhibiting respawn."); crm_warn("Verify pacemaker and pacemaker_remote are not both enabled."); crm_exit(CRM_EX_FATAL); } } /*! * \internal * \brief Add an IPC server to the main loop for the pacemakerd API * * \param[out] ipcs Where to store newly created IPC server * \param[in] cb IPC callbacks * * \note This function exits with CRM_EX_OSERR if unable to create the servers. */ void pcmk__serve_pacemakerd_ipc(qb_ipcs_service_t **ipcs, struct qb_ipcs_service_handlers *cb) { *ipcs = mainloop_add_ipc_server(CRM_SYSTEM_MCP, QB_IPC_NATIVE, cb); if (*ipcs == NULL) { crm_err("Couldn't start pacemakerd IPC server"); crm_warn("Verify pacemaker and pacemaker_remote are not both enabled."); /* sub-daemons are observed by pacemakerd. Thus we exit CRM_EX_FATAL * if we want to prevent pacemakerd from restarting them. * With pacemakerd we leave the exit-code shown to e.g. systemd * to what it was prior to moving the code here from pacemakerd.c */ crm_exit(CRM_EX_OSERR); } } /*! * \internal * \brief Add an IPC server to the main loop for the scheduler API * * \param[in] cb IPC callbacks * * \return Newly created IPC server * \note This function exits fatally if unable to create the servers. */ qb_ipcs_service_t * pcmk__serve_schedulerd_ipc(struct qb_ipcs_service_handlers *cb) { return mainloop_add_ipc_server(CRM_SYSTEM_PENGINE, QB_IPC_NATIVE, cb); } diff --git a/lib/common/patchset.c b/lib/common/patchset.c index 191dbbd257..e170f4f166 100644 --- a/lib/common/patchset.c +++ b/lib/common/patchset.c @@ -1,1003 +1,1003 @@ /* * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include // xmlNode #include #include #include #include // CRM_XML_LOG_BASE, etc. #include "crmcommon_private.h" static const char *const vfields[] = { PCMK_XA_ADMIN_EPOCH, PCMK_XA_EPOCH, PCMK_XA_NUM_UPDATES, }; /* Add changes for specified XML to patchset. * For patchset format, refer to diff schema. */ static void add_xml_changes_to_patchset(xmlNode *xml, xmlNode *patchset) { xmlNode *cIter = NULL; xmlAttr *pIter = NULL; xmlNode *change = NULL; xml_node_private_t *nodepriv = xml->_private; const char *value = NULL; if (nodepriv == NULL) { /* Elements that shouldn't occur in a CIB don't have _private set. They * should be stripped out, ignored, or have an error thrown by any code * that processes their parent, so we ignore any changes to them. */ return; } // If this XML node is new, just report that if (patchset && pcmk_is_set(nodepriv->flags, pcmk__xf_created)) { GString *xpath = pcmk__element_xpath(xml->parent); if (xpath != NULL) { int position = pcmk__xml_position(xml, pcmk__xf_deleted); change = pcmk__xe_create(patchset, PCMK_XE_CHANGE); crm_xml_add(change, PCMK_XA_OPERATION, PCMK_VALUE_CREATE); crm_xml_add(change, PCMK_XA_PATH, (const char *) xpath->str); - crm_xml_add_int(change, PCMK_XE_POSITION, position); + pcmk__xe_set_int(change, PCMK_XE_POSITION, position); pcmk__xml_copy(change, xml); g_string_free(xpath, TRUE); } return; } // Check each of the XML node's attributes for changes for (pIter = pcmk__xe_first_attr(xml); pIter != NULL; pIter = pIter->next) { xmlNode *attr = NULL; nodepriv = pIter->_private; if (!pcmk_any_flags_set(nodepriv->flags, pcmk__xf_deleted|pcmk__xf_dirty)) { continue; } if (change == NULL) { GString *xpath = pcmk__element_xpath(xml); if (xpath != NULL) { change = pcmk__xe_create(patchset, PCMK_XE_CHANGE); crm_xml_add(change, PCMK_XA_OPERATION, PCMK_VALUE_MODIFY); crm_xml_add(change, PCMK_XA_PATH, (const char *) xpath->str); change = pcmk__xe_create(change, PCMK_XE_CHANGE_LIST); g_string_free(xpath, TRUE); } } attr = pcmk__xe_create(change, PCMK_XE_CHANGE_ATTR); crm_xml_add(attr, PCMK_XA_NAME, (const char *) pIter->name); if (nodepriv->flags & pcmk__xf_deleted) { crm_xml_add(attr, PCMK_XA_OPERATION, "unset"); } else { crm_xml_add(attr, PCMK_XA_OPERATION, "set"); value = pcmk__xml_attr_value(pIter); crm_xml_add(attr, PCMK_XA_VALUE, value); } } if (change) { xmlNode *result = NULL; change = pcmk__xe_create(change->parent, PCMK_XE_CHANGE_RESULT); result = pcmk__xe_create(change, (const char *)xml->name); for (pIter = pcmk__xe_first_attr(xml); pIter != NULL; pIter = pIter->next) { nodepriv = pIter->_private; if (!pcmk_is_set(nodepriv->flags, pcmk__xf_deleted)) { value = pcmk__xe_get(xml, (const char *) pIter->name); crm_xml_add(result, (const char *)pIter->name, value); } } } // Now recursively do the same for each child node of this node for (cIter = pcmk__xml_first_child(xml); cIter != NULL; cIter = pcmk__xml_next(cIter)) { add_xml_changes_to_patchset(cIter, patchset); } nodepriv = xml->_private; if (patchset && pcmk_is_set(nodepriv->flags, pcmk__xf_moved)) { GString *xpath = pcmk__element_xpath(xml); crm_trace("%s.%s moved to position %d", xml->name, pcmk__xe_id(xml), pcmk__xml_position(xml, pcmk__xf_skip)); if (xpath != NULL) { change = pcmk__xe_create(patchset, PCMK_XE_CHANGE); crm_xml_add(change, PCMK_XA_OPERATION, PCMK_VALUE_MOVE); crm_xml_add(change, PCMK_XA_PATH, (const char *) xpath->str); - crm_xml_add_int(change, PCMK_XE_POSITION, - pcmk__xml_position(xml, pcmk__xf_deleted)); + pcmk__xe_set_int(change, PCMK_XE_POSITION, + pcmk__xml_position(xml, pcmk__xf_deleted)); g_string_free(xpath, TRUE); } } } static bool is_config_change(xmlNode *xml) { GList *gIter = NULL; xml_node_private_t *nodepriv = NULL; xml_doc_private_t *docpriv; xmlNode *config = pcmk__xe_first_child(xml, PCMK_XE_CONFIGURATION, NULL, NULL); if (config) { nodepriv = config->_private; } if ((nodepriv != NULL) && pcmk_is_set(nodepriv->flags, pcmk__xf_dirty)) { return TRUE; } if ((xml->doc != NULL) && (xml->doc->_private != NULL)) { docpriv = xml->doc->_private; for (gIter = docpriv->deleted_objs; gIter; gIter = gIter->next) { pcmk__deleted_xml_t *deleted_obj = gIter->data; if (strstr(deleted_obj->path, "/" PCMK_XE_CIB "/" PCMK_XE_CONFIGURATION) != NULL) { return TRUE; } } } return FALSE; } static xmlNode * xml_create_patchset_v2(xmlNode *source, xmlNode *target) { int lpc = 0; GList *gIter = NULL; xml_doc_private_t *docpriv; xmlNode *v = NULL; xmlNode *version = NULL; xmlNode *patchset = NULL; pcmk__assert(target != NULL); if (!pcmk__xml_doc_all_flags_set(target->doc, pcmk__xf_dirty)) { return NULL; } pcmk__assert(target->doc != NULL); docpriv = target->doc->_private; patchset = pcmk__xe_create(NULL, PCMK_XE_DIFF); - crm_xml_add_int(patchset, PCMK_XA_FORMAT, 2); + pcmk__xe_set_int(patchset, PCMK_XA_FORMAT, 2); version = pcmk__xe_create(patchset, PCMK_XE_VERSION); v = pcmk__xe_create(version, PCMK_XE_SOURCE); for (lpc = 0; lpc < PCMK__NELEM(vfields); lpc++) { const char *value = pcmk__xe_get(source, vfields[lpc]); if (value == NULL) { value = "1"; } crm_xml_add(v, vfields[lpc], value); } v = pcmk__xe_create(version, PCMK_XE_TARGET); for (lpc = 0; lpc < PCMK__NELEM(vfields); lpc++) { const char *value = pcmk__xe_get(target, vfields[lpc]); if (value == NULL) { value = "1"; } crm_xml_add(v, vfields[lpc], value); } for (gIter = docpriv->deleted_objs; gIter; gIter = gIter->next) { pcmk__deleted_xml_t *deleted_obj = gIter->data; xmlNode *change = pcmk__xe_create(patchset, PCMK_XE_CHANGE); crm_xml_add(change, PCMK_XA_OPERATION, PCMK_VALUE_DELETE); crm_xml_add(change, PCMK_XA_PATH, deleted_obj->path); if (deleted_obj->position >= 0) { - crm_xml_add_int(change, PCMK_XE_POSITION, deleted_obj->position); + pcmk__xe_set_int(change, PCMK_XE_POSITION, deleted_obj->position); } } add_xml_changes_to_patchset(target, patchset); return patchset; } xmlNode * xml_create_patchset(int format, xmlNode *source, xmlNode *target, bool *config_changed, bool manage_version) { bool local_config_changed = false; if (format == 0) { format = 2; } if (format != 2) { crm_err("Unknown patch format: %d", format); return NULL; } xml_acl_disable(target); if ((target == NULL) || !pcmk__xml_doc_all_flags_set(target->doc, pcmk__xf_dirty)) { crm_trace("No change %d", format); return NULL; } if (config_changed == NULL) { config_changed = &local_config_changed; } *config_changed = is_config_change(target); if (manage_version) { int counter = 0; if (*config_changed) { crm_xml_add(target, PCMK_XA_NUM_UPDATES, "0"); pcmk__xe_get_int(target, PCMK_XA_EPOCH, &counter); - crm_xml_add_int(target, PCMK_XA_EPOCH, counter + 1); + pcmk__xe_set_int(target, PCMK_XA_EPOCH, counter + 1); } else { pcmk__xe_get_int(target, PCMK_XA_NUM_UPDATES, &counter); - crm_xml_add_int(target, PCMK_XA_NUM_UPDATES, counter + 1); + pcmk__xe_set_int(target, PCMK_XA_NUM_UPDATES, counter + 1); } } return xml_create_patchset_v2(source, target); } /*! * \internal * \brief Add a digest of a patchset's target XML to the patchset * * \param[in,out] patchset XML patchset * \param[in] target Target XML */ void pcmk__xml_patchset_add_digest(xmlNode *patchset, const xmlNode *target) { char *digest = NULL; CRM_CHECK((patchset != NULL) && (target != NULL), return); /* If tracking is enabled and the document is dirty, we could get an * incorrect digest. Call pcmk__xml_commit_changes() before calling this. */ CRM_CHECK(!pcmk__xml_doc_all_flags_set(target->doc, pcmk__xf_dirty), return); digest = pcmk__digest_xml(target, true); crm_xml_add(patchset, PCMK__XA_DIGEST, digest); free(digest); } /*! * \internal * \brief Get the source and target CIB versions from an XML patchset * * Each output object will contain, in order, the following version fields from * the source and target: * * \c PCMK_XA_ADMIN_EPOCH * * \c PCMK_XA_EPOCH * * \c PCMK_XA_NUM_UPDATES * * \param[in] patchset XML patchset * \param[out] source Where to store versions from source CIB * \param[out] target Where to store versions from target CIB * * \return Standard Pacemaker return code */ int pcmk__xml_patchset_versions(const xmlNode *patchset, int source[3], int target[3]) { int format = 1; const xmlNode *version = NULL; const xmlNode *source_xml = NULL; const xmlNode *target_xml = NULL; CRM_CHECK((patchset != NULL) && (source != NULL) && (target != NULL), return EINVAL); pcmk__xe_get_int(patchset, PCMK_XA_FORMAT, &format); if (format != 2) { crm_err("Unknown patch format: %d", format); return EINVAL; } version = pcmk__xe_first_child(patchset, PCMK_XE_VERSION, NULL, NULL); source_xml = pcmk__xe_first_child(version, PCMK_XE_SOURCE, NULL, NULL); target_xml = pcmk__xe_first_child(version, PCMK_XE_TARGET, NULL, NULL); if ((source_xml == NULL) || (target_xml == NULL)) { return EINVAL; } for (int i = 0; i < PCMK__NELEM(vfields); i++) { if (pcmk__xe_get_int(source_xml, vfields[i], &(source[i])) != pcmk_rc_ok) { return EINVAL; } crm_trace("Got %d for source[%s]", source[i], vfields[i]); if (pcmk__xe_get_int(target_xml, vfields[i], &(target[i])) != pcmk_rc_ok) { return EINVAL; } crm_trace("Got %d for target[%s]", target[i], vfields[i]); } return pcmk_rc_ok; } /*! * \internal * \brief Check whether patchset can be applied to current CIB * * \param[in] cib_root Root of current CIB * \param[in] patchset Patchset to check * * \return Standard Pacemaker return code */ static int check_patchset_versions(const xmlNode *cib_root, const xmlNode *patchset) { int current[] = { 0, 0, 0 }; int source[] = { 0, 0, 0 }; int target[] = { 0, 0, 0 }; int rc = pcmk_rc_ok; for (int i = 0; i < PCMK__NELEM(vfields); i++) { /* @COMPAT We should probably fail with EINVAL for negative or invalid * valid reason for such values to be present. * * Preserve behavior for xml_apply_patchset(). Use new behavior in * libpacemaker replacement. */ if (pcmk__xe_get_int(cib_root, vfields[i], &(current[i])) == pcmk_rc_ok) { crm_trace("Got %d for current[%s]%s", current[i], vfields[i], ((current[i] < 0)? ", using 0" : "")); } else { crm_debug("Failed to get value for current[%s], using 0", vfields[i]); } if (current[i] < 0) { current[i] = 0; } } /* Set some defaults in case nothing is present. * * @COMPAT We should probably skip this step, and fail immediately below if * target[i] < source[i]. * * Preserve behavior for xml_apply_patchset(). Use new behavior in * libpacemaker replacement. */ target[0] = current[0]; target[1] = current[1]; target[2] = current[2] + 1; for (int i = 0; i < PCMK__NELEM(vfields); i++) { source[i] = current[i]; } rc = pcmk__xml_patchset_versions(patchset, source, target); if (rc != pcmk_rc_ok) { return rc; } // Ensure current version matches patchset source version for (int i = 0; i < PCMK__NELEM(vfields); i++) { if (current[i] < source[i]) { crm_debug("Current %s is too low " "(%d.%d.%d < %d.%d.%d --> %d.%d.%d)", vfields[i], current[0], current[1], current[2], source[0], source[1], source[2], target[0], target[1], target[2]); return pcmk_rc_diff_resync; } if (current[i] > source[i]) { crm_info("Current %s is too high " "(%d.%d.%d > %d.%d.%d --> %d.%d.%d)", vfields[i], current[0], current[1], current[2], source[0], source[1], source[2], target[0], target[1], target[2]); crm_log_xml_info(patchset, "OldPatch"); return pcmk_rc_old_data; } } // Ensure target version is newer than source version for (int i = 0; i < PCMK__NELEM(vfields); i++) { if (target[i] > source[i]) { crm_debug("Can apply patch %d.%d.%d to %d.%d.%d", target[0], target[1], target[2], current[0], current[1], current[2]); return pcmk_rc_ok; } } crm_notice("Versions did not change in patch %d.%d.%d", target[0], target[1], target[2]); return pcmk_rc_old_data; } // Return first child matching element name and optionally id or position static xmlNode * first_matching_xml_child(const xmlNode *parent, const char *name, const char *id, int position) { xmlNode *cIter = NULL; for (cIter = pcmk__xml_first_child(parent); cIter != NULL; cIter = pcmk__xml_next(cIter)) { if (strcmp((const char *) cIter->name, name) != 0) { continue; } else if (id) { const char *cid = pcmk__xe_id(cIter); if ((cid == NULL) || (strcmp(cid, id) != 0)) { continue; } } // "position" makes sense only for XML comments for now if ((cIter->type == XML_COMMENT_NODE) && (position >= 0) && (pcmk__xml_position(cIter, pcmk__xf_skip) != position)) { continue; } return cIter; } return NULL; } /*! * \internal * \brief Simplified, more efficient alternative to pcmk__xpath_find_one() * * \param[in] top Root of XML to search * \param[in] key Search xpath * \param[in] target_position If deleting, where to delete * * \return XML child matching xpath if found, NULL otherwise * * \note This only works on simplified xpaths found in v2 patchset diffs, * i.e. the only allowed search predicate is [@id='XXX']. */ static xmlNode * search_v2_xpath(const xmlNode *top, const char *key, int target_position) { xmlNode *target = (xmlNode *) top->doc; const char *current = key; char *section; char *remainder; char *id; char *tag; char *path = NULL; int rc; size_t key_len; CRM_CHECK(key != NULL, return NULL); key_len = strlen(key); /* These are scanned from key after a slash, so they can't be bigger * than key_len - 1 characters plus a null terminator. */ remainder = pcmk__assert_alloc(key_len, sizeof(char)); section = pcmk__assert_alloc(key_len, sizeof(char)); id = pcmk__assert_alloc(key_len, sizeof(char)); tag = pcmk__assert_alloc(key_len, sizeof(char)); do { // Look for /NEXT_COMPONENT/REMAINING_COMPONENTS rc = sscanf(current, "/%[^/]%s", section, remainder); if (rc > 0) { // Separate FIRST_COMPONENT into TAG[@id='ID'] int f = sscanf(section, "%[^[][@" PCMK_XA_ID "='%[^']", tag, id); int current_position = -1; /* The target position is for the final component tag, so only use * it if there is nothing left to search after this component. */ if ((rc == 1) && (target_position >= 0)) { current_position = target_position; } switch (f) { case 1: target = first_matching_xml_child(target, tag, NULL, current_position); break; case 2: target = first_matching_xml_child(target, tag, id, current_position); break; default: // This should not be possible target = NULL; break; } current = remainder; } // Continue if something remains to search, and we've matched so far } while ((rc == 2) && target); if (target) { crm_trace("Found %s for %s", (path = (char *) xmlGetNodePath(target)), key); free(path); } else { crm_debug("No match for %s", key); } free(remainder); free(section); free(tag); free(id); return target; } typedef struct xml_change_obj_s { const xmlNode *change; xmlNode *match; } xml_change_obj_t; static gint sort_change_obj_by_position(gconstpointer a, gconstpointer b) { const xml_change_obj_t *change_obj_a = a; const xml_change_obj_t *change_obj_b = b; int position_a = -1; int position_b = -1; pcmk__xe_get_int(change_obj_a->change, PCMK_XE_POSITION, &position_a); pcmk__xe_get_int(change_obj_b->change, PCMK_XE_POSITION, &position_b); if (position_a < position_b) { return -1; } else if (position_a > position_b) { return 1; } return 0; } /*! * \internal * \brief Apply a version 2 patchset to an XML node * * \param[in,out] xml XML to apply patchset to * \param[in] patchset Patchset to apply * * \return Standard Pacemaker return code */ static int apply_v2_patchset(xmlNode *xml, const xmlNode *patchset) { int rc = pcmk_rc_ok; const xmlNode *change = NULL; GList *change_objs = NULL; GList *gIter = NULL; for (change = pcmk__xml_first_child(patchset); change != NULL; change = pcmk__xml_next(change)) { xmlNode *match = NULL; const char *op = pcmk__xe_get(change, PCMK_XA_OPERATION); const char *xpath = pcmk__xe_get(change, PCMK_XA_PATH); int position = -1; if (op == NULL) { continue; } crm_trace("Processing %s %s", change->name, op); /* PCMK_VALUE_DELETE changes for XML comments are generated with * PCMK_XE_POSITION */ if (strcmp(op, PCMK_VALUE_DELETE) == 0) { pcmk__xe_get_int(change, PCMK_XE_POSITION, &position); } match = search_v2_xpath(xml, xpath, position); crm_trace("Performing %s on %s with %p", op, xpath, match); if ((match == NULL) && (strcmp(op, PCMK_VALUE_DELETE) == 0)) { crm_debug("No %s match for %s in %p", op, xpath, xml->doc); continue; } else if (match == NULL) { crm_err("No %s match for %s in %p", op, xpath, xml->doc); rc = pcmk_rc_diff_failed; continue; } else if (pcmk__str_any_of(op, PCMK_VALUE_CREATE, PCMK_VALUE_MOVE, NULL)) { // Delay the adding of a PCMK_VALUE_CREATE object xml_change_obj_t *change_obj = pcmk__assert_alloc(1, sizeof(xml_change_obj_t)); change_obj->change = change; change_obj->match = match; change_objs = g_list_append(change_objs, change_obj); if (strcmp(op, PCMK_VALUE_MOVE) == 0) { // Temporarily put the PCMK_VALUE_MOVE object after the last sibling if ((match->parent != NULL) && (match->parent->last != NULL)) { xmlAddNextSibling(match->parent->last, match); } } } else if (strcmp(op, PCMK_VALUE_DELETE) == 0) { pcmk__xml_free(match); } else if (strcmp(op, PCMK_VALUE_MODIFY) == 0) { const xmlNode *child = pcmk__xe_first_child(change, PCMK_XE_CHANGE_RESULT, NULL, NULL); const xmlNode *attrs = pcmk__xml_first_child(child); if (attrs == NULL) { rc = ENOMSG; continue; } // Remove all attributes pcmk__xe_remove_matching_attrs(match, false, NULL, NULL); for (xmlAttrPtr pIter = pcmk__xe_first_attr(attrs); pIter != NULL; pIter = pIter->next) { const char *name = (const char *) pIter->name; const char *value = pcmk__xml_attr_value(pIter); crm_xml_add(match, name, value); } } else { crm_err("Unknown operation: %s", op); rc = pcmk_rc_diff_failed; } } // Changes should be generated in the right order. Double checking. change_objs = g_list_sort(change_objs, sort_change_obj_by_position); for (gIter = change_objs; gIter; gIter = gIter->next) { xml_change_obj_t *change_obj = gIter->data; xmlNode *match = change_obj->match; const char *op = NULL; const char *xpath = NULL; change = change_obj->change; op = pcmk__xe_get(change, PCMK_XA_OPERATION); xpath = pcmk__xe_get(change, PCMK_XA_PATH); crm_trace("Continue performing %s on %s with %p", op, xpath, match); if (strcmp(op, PCMK_VALUE_CREATE) == 0) { int position = 0; xmlNode *child = NULL; xmlNode *match_child = NULL; match_child = match->children; pcmk__xe_get_int(change, PCMK_XE_POSITION, &position); while ((match_child != NULL) && (position != pcmk__xml_position(match_child, pcmk__xf_skip))) { match_child = match_child->next; } child = pcmk__xml_copy(match, change->children); if (match_child != NULL) { crm_trace("Adding %s at position %d", child->name, position); xmlAddPrevSibling(match_child, child); } else { crm_trace("Adding %s at position %d (end)", child->name, position); } } else if (strcmp(op, PCMK_VALUE_MOVE) == 0) { int position = 0; pcmk__xe_get_int(change, PCMK_XE_POSITION, &position); if (position != pcmk__xml_position(match, pcmk__xf_skip)) { xmlNode *match_child = NULL; int p = position; if (p > pcmk__xml_position(match, pcmk__xf_skip)) { p++; // Skip ourselves } pcmk__assert(match->parent != NULL); match_child = match->parent->children; while ((match_child != NULL) && (p != pcmk__xml_position(match_child, pcmk__xf_skip))) { match_child = match_child->next; } crm_trace("Moving %s to position %d (was %d, prev %p, %s %p)", match->name, position, pcmk__xml_position(match, pcmk__xf_skip), match->prev, (match_child? "next":"last"), (match_child? match_child : match->parent->last)); if (match_child) { xmlAddPrevSibling(match_child, match); } else { pcmk__assert(match->parent->last != NULL); xmlAddNextSibling(match->parent->last, match); } } else { crm_trace("%s is already in position %d", match->name, position); } if (position != pcmk__xml_position(match, pcmk__xf_skip)) { crm_err("Moved %s.%s to position %d instead of %d (%p)", match->name, pcmk__xe_id(match), pcmk__xml_position(match, pcmk__xf_skip), position, match->prev); rc = pcmk_rc_diff_failed; } } } g_list_free_full(change_objs, free); return rc; } int xml_apply_patchset(xmlNode *xml, const xmlNode *patchset, bool check_version) { int format = 1; int rc = pcmk_ok; xmlNode *old = NULL; const char *digest = NULL; if (patchset == NULL) { return rc; } pcmk__log_xml_patchset(LOG_TRACE, patchset); if (check_version) { rc = pcmk_rc2legacy(check_patchset_versions(xml, patchset)); if (rc != pcmk_ok) { return rc; } } digest = pcmk__xe_get(patchset, PCMK__XA_DIGEST); if (digest != NULL) { /* Make original XML available for logging in case result doesn't have * expected digest */ pcmk__if_tracing(old = pcmk__xml_copy(NULL, xml), {}); } if (rc == pcmk_ok) { pcmk__xe_get_int(patchset, PCMK_XA_FORMAT, &format); if (format != 2) { crm_err("Unknown patch format: %d", format); rc = -EINVAL; } else { rc = pcmk_rc2legacy(apply_v2_patchset(xml, patchset)); } } if ((rc == pcmk_ok) && (digest != NULL)) { char *new_digest = NULL; new_digest = pcmk__digest_xml(xml, true); if (!pcmk__str_eq(new_digest, digest, pcmk__str_casei)) { crm_info("v%d digest mis-match: expected %s, calculated %s", format, digest, new_digest); rc = -pcmk_err_diff_failed; pcmk__if_tracing( { save_xml_to_file(old, "PatchDigest:input", NULL); save_xml_to_file(xml, "PatchDigest:result", NULL); save_xml_to_file(patchset, "PatchDigest:diff", NULL); }, {} ); } else { crm_trace("v%d digest matched: expected %s, calculated %s", format, digest, new_digest); } free(new_digest); } pcmk__xml_free(old); return rc; } /*! * \internal * \brief Check whether a given CIB element was modified in a CIB patchset * * \param[in] patchset CIB XML patchset * \param[in] element XML tag of CIB element to check (\c NULL is equivalent * to \c PCMK_XE_CIB). Supported values include any CIB * element supported by \c pcmk__cib_abs_xpath_for(). * * \retval \c true if \p element was modified * \retval \c false otherwise */ bool pcmk__cib_element_in_patchset(const xmlNode *patchset, const char *element) { const char *element_xpath = pcmk__cib_abs_xpath_for(element); const char *parent_xpath = pcmk_cib_parent_name_for(element); char *element_regex = NULL; bool rc = false; int format = 1; pcmk__assert(patchset != NULL); pcmk__xe_get_int(patchset, PCMK_XA_FORMAT, &format); if (format != 2) { crm_warn("Unknown patch format: %d", format); return false; } CRM_CHECK(element_xpath != NULL, return false); // Unsupported element /* Matches if and only if element_xpath is part of a changed path * (supported values for element never contain XML IDs with schema * validation enabled) * * @TODO Use POSIX word boundary instead of (/|$), if it works: * https://www.regular-expressions.info/wordboundaries.html. */ element_regex = crm_strdup_printf("^%s(/|$)", element_xpath); for (const xmlNode *change = pcmk__xe_first_child(patchset, PCMK_XE_CHANGE, NULL, NULL); change != NULL; change = pcmk__xe_next(change, PCMK_XE_CHANGE)) { const char *op = pcmk__xe_get(change, PCMK_XA_OPERATION); const char *diff_xpath = pcmk__xe_get(change, PCMK_XA_PATH); if (pcmk__str_eq(diff_xpath, element_regex, pcmk__str_regex)) { // Change to an existing element rc = true; break; } if (pcmk__str_eq(op, PCMK_VALUE_CREATE, pcmk__str_none) && pcmk__str_eq(diff_xpath, parent_xpath, pcmk__str_none) && pcmk__xe_is(pcmk__xe_first_child(change, NULL, NULL, NULL), element)) { // Newly added element rc = true; break; } } free(element_regex); return rc; } // Deprecated functions kept only for backward API compatibility // LCOV_EXCL_START #include // Return value of true means failure; false means success bool xml_patch_versions(const xmlNode *patchset, int add[3], int del[3]) { const xmlNode *version = pcmk__xe_first_child(patchset, PCMK_XE_VERSION, NULL, NULL); const xmlNode *source = pcmk__xe_first_child(version, PCMK_XE_SOURCE, NULL, NULL); const xmlNode *target = pcmk__xe_first_child(version, PCMK_XE_TARGET, NULL, NULL); int format = 1; pcmk__xe_get_int(patchset, PCMK_XA_FORMAT, &format); if (format != 2) { crm_err("Unknown patch format: %d", format); return true; } if (source != NULL) { for (int i = 0; i < PCMK__NELEM(vfields); i++) { pcmk__xe_get_int(source, vfields[i], &(del[i])); crm_trace("Got %d for del[%s]", del[i], vfields[i]); } } if (target != NULL) { for (int i = 0; i < PCMK__NELEM(vfields); i++) { pcmk__xe_get_int(target, vfields[i], &(add[i])); crm_trace("Got %d for add[%s]", add[i], vfields[i]); } } return false; } void patchset_process_digest(xmlNode *patch, const xmlNode *source, const xmlNode *target, bool with_digest) { char *digest = NULL; if ((patch == NULL) || (source == NULL) || (target == NULL) || !with_digest) { return; } /* We should always call pcmk__xml_commit_changes() before calculating a * digest. Otherwise, with an on-tracking dirty target, we could get a wrong * digest. */ CRM_LOG_ASSERT(!pcmk__xml_doc_all_flags_set(target->doc, pcmk__xf_dirty)); digest = pcmk__digest_xml(target, true); crm_xml_add(patch, PCMK__XA_DIGEST, digest); free(digest); return; } // LCOV_EXCL_STOP // End deprecated API diff --git a/lib/common/xml_element.c b/lib/common/xml_element.c index b4da750266..e6e4ae532b 100644 --- a/lib/common/xml_element.c +++ b/lib/common/xml_element.c @@ -1,1813 +1,1835 @@ /* * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include // va_start(), etc. #include // uint32_t #include // NULL, etc. #include // free(), etc. #include // strchr(), etc. #include // time_t, etc. #include // xmlNode, etc. #include // xmlValidateNameValue() #include // xmlChar #include #include // crm_xml_add(), etc. #include // pcmk_rc_ok, etc. #include #include "crmcommon_private.h" /*! * \internal * \brief Find first XML child element matching given criteria * * \param[in] parent XML element to search (can be \c NULL) * \param[in] node_name If not \c NULL, only match children of this type * \param[in] attr_n If not \c NULL, only match children with an attribute * of this name. * \param[in] attr_v If \p attr_n and this are not NULL, only match children * with an attribute named \p attr_n and this value * * \return Matching XML child element, or \c NULL if none found */ xmlNode * pcmk__xe_first_child(const xmlNode *parent, const char *node_name, const char *attr_n, const char *attr_v) { xmlNode *child = NULL; CRM_CHECK((attr_v == NULL) || (attr_n != NULL), return NULL); if (parent == NULL) { return NULL; } child = parent->children; while ((child != NULL) && (child->type != XML_ELEMENT_NODE)) { child = child->next; } for (; child != NULL; child = pcmk__xe_next(child, NULL)) { const char *value = NULL; if ((node_name != NULL) && !pcmk__xe_is(child, node_name)) { // Node name mismatch continue; } if (attr_n == NULL) { // No attribute match needed return child; } value = pcmk__xe_get(child, attr_n); if ((attr_v == NULL) && (value != NULL)) { // attr_v == NULL: Attribute attr_n must be set (to any value) return child; } if ((attr_v != NULL) && (pcmk__str_eq(value, attr_v, pcmk__str_none))) { // attr_v != NULL: Attribute attr_n must be set to value attr_v return child; } } if (attr_n == NULL) { crm_trace("%s XML has no child element of %s type", (const char *) parent->name, pcmk__s(node_name, "any")); } else { crm_trace("%s XML has no child element of %s type with %s='%s'", (const char *) parent->name, pcmk__s(node_name, "any"), attr_n, attr_v); } return NULL; } /*! * \internal * \brief Return next sibling element of an XML element * * \param[in] xml XML element to check * \param[in] element_name If not NULL, get next sibling with this element name * * \return Next desired sibling of \p xml (or NULL if none) */ xmlNode * pcmk__xe_next(const xmlNode *xml, const char *element_name) { for (xmlNode *next = (xml == NULL)? NULL : xml->next; next != NULL; next = next->next) { if ((next->type == XML_ELEMENT_NODE) && ((element_name == NULL) || pcmk__xe_is(next, element_name))) { return next; } } return NULL; } /*! * \internal * \brief Parse an integer score from an XML attribute * * \param[in] xml XML element with attribute to parse * \param[in] name Name of attribute to parse * \param[out] score Where to store parsed score (can be NULL to * just validate) * \param[in] default_score What to return if the attribute value is not * present or invalid * * \return Standard Pacemaker return code */ int pcmk__xe_get_score(const xmlNode *xml, const char *name, int *score, int default_score) { const char *value = NULL; CRM_CHECK((xml != NULL) && (name != NULL), return EINVAL); value = pcmk__xe_get(xml, name); return pcmk_parse_score(value, score, default_score); } /*! * \internal * \brief Set an XML attribute, expanding \c ++ and \c += where appropriate * * If \p target already has an attribute named \p name set to an integer value * and \p value is an addition assignment expression on \p name, then expand * \p value to an integer and set attribute \p name to the expanded value in * \p target. * * Otherwise, set attribute \p name on \p target using the literal \p value. * * The original attribute value in \p target and the number in an assignment * expression in \p value are parsed and added as scores (that is, their values * are capped at \c INFINITY and \c -INFINITY). For more details, refer to * \c pcmk_parse_score(). * * For example, suppose \p target has an attribute named \c "X" with value * \c "5", and that \p name is \c "X". * * If \p value is \c "X++", the new value of \c "X" in \p target is \c "6". * * If \p value is \c "X+=3", the new value of \c "X" in \p target is \c "8". * * If \p value is \c "val", the new value of \c "X" in \p target is \c "val". * * If \p value is \c "Y++", the new value of \c "X" in \p target is \c "Y++". * * \param[in,out] target XML node whose attribute to set * \param[in] name Name of the attribute to set * \param[in] value New value of attribute to set (if NULL, initial value * will be left unchanged) * * \return Standard Pacemaker return code (specifically, \c EINVAL on invalid * argument, or \c pcmk_rc_ok otherwise) */ int pcmk__xe_set_score(xmlNode *target, const char *name, const char *value) { const char *old_value = NULL; CRM_CHECK((target != NULL) && (name != NULL), return EINVAL); if (value == NULL) { // @TODO Maybe instead delete the attribute or set it to 0 return pcmk_rc_ok; } old_value = pcmk__xe_get(target, name); // If no previous value, skip to default case and set the value unexpanded. if (old_value != NULL) { const char *n = name; const char *v = value; // Stop at first character that differs between name and value for (; (*n == *v) && (*n != '\0'); n++, v++); // If value begins with name followed by a "++" or "+=" if ((*n == '\0') && (*v++ == '+') && ((*v == '+') || (*v == '='))) { int add = 1; int old_value_i = 0; int rc = pcmk_rc_ok; // If we're expanding ourselves, no previous value was set; use 0 if (old_value != value) { rc = pcmk_parse_score(old_value, &old_value_i, 0); if (rc != pcmk_rc_ok) { // @TODO This is inconsistent with old_value==NULL crm_trace("Using 0 before incrementing %s because '%s' " "is not a score", name, old_value); } } /* value="X++": new value of X is old_value + 1 * value="X+=Y": new value of X is old_value + Y (for some number Y) */ if (*v != '+') { rc = pcmk_parse_score(++v, &add, 0); if (rc != pcmk_rc_ok) { // @TODO We should probably skip expansion instead crm_trace("Not incrementing %s because '%s' does not have " "a valid increment", name, value); } } - crm_xml_add_int(target, name, pcmk__add_scores(old_value_i, add)); + pcmk__xe_set_int(target, name, pcmk__add_scores(old_value_i, add)); return pcmk_rc_ok; } } // Default case: set the attribute unexpanded (with value treated literally) if (old_value != value) { crm_xml_add(target, name, value); } return pcmk_rc_ok; } /*! * \internal * \brief Copy XML attributes from a source element to a target element * * This is similar to \c xmlCopyPropList() except that attributes are marked * as dirty for change tracking purposes. * * \param[in,out] target XML element to receive copied attributes from \p src * \param[in] src XML element whose attributes to copy to \p target * \param[in] flags Group of enum pcmk__xa_flags * * \return Standard Pacemaker return code */ int pcmk__xe_copy_attrs(xmlNode *target, const xmlNode *src, uint32_t flags) { CRM_CHECK((src != NULL) && (target != NULL), return EINVAL); for (xmlAttr *attr = pcmk__xe_first_attr(src); attr != NULL; attr = attr->next) { const char *name = (const char *) attr->name; const char *value = pcmk__xml_attr_value(attr); if (pcmk_is_set(flags, pcmk__xaf_no_overwrite) && (pcmk__xe_get(target, name) != NULL)) { continue; } if (pcmk_is_set(flags, pcmk__xaf_score_update)) { pcmk__xe_set_score(target, name, value); } else { crm_xml_add(target, name, value); } } return pcmk_rc_ok; } /*! * \internal * \brief Compare two XML attributes by name * * \param[in] a First XML attribute to compare * \param[in] b Second XML attribute to compare * * \retval negative \c a->name is \c NULL or comes before \c b->name * lexicographically * \retval 0 \c a->name and \c b->name are equal * \retval positive \c b->name is \c NULL or comes before \c a->name * lexicographically */ static gint compare_xml_attr(gconstpointer a, gconstpointer b) { const xmlAttr *attr_a = a; const xmlAttr *attr_b = b; return pcmk__strcmp((const char *) attr_a->name, (const char *) attr_b->name, pcmk__str_none); } /*! * \internal * \brief Sort an XML element's attributes by name * * This does not consider ACLs and does not mark the attributes as deleted or * dirty. Upon return, all attributes still exist and are set to the same values * as before the call. The only thing that may change is the order of the * attribute list. * * \param[in,out] xml XML element whose attributes to sort */ void pcmk__xe_sort_attrs(xmlNode *xml) { GSList *attr_list = NULL; for (xmlAttr *iter = pcmk__xe_first_attr(xml); iter != NULL; iter = iter->next) { attr_list = g_slist_prepend(attr_list, iter); } attr_list = g_slist_sort(attr_list, compare_xml_attr); for (GSList *iter = attr_list; iter != NULL; iter = iter->next) { xmlNode *attr = iter->data; xmlUnlinkNode(attr); xmlAddChild(xml, attr); } g_slist_free(attr_list); } /*! * \internal * \brief Remove a named attribute from an XML element * * \param[in,out] element XML element to remove an attribute from * \param[in] name Name of attribute to remove */ void pcmk__xe_remove_attr(xmlNode *element, const char *name) { if (name != NULL) { pcmk__xa_remove(xmlHasProp(element, (const xmlChar *) name), false); } } /*! * \internal * \brief Remove a named attribute from an XML element * * This is a wrapper for \c pcmk__xe_remove_attr() for use with * \c pcmk__xml_tree_foreach(). * * \param[in,out] xml XML element to remove an attribute from * \param[in] user_data Name of attribute to remove * * \return \c true (to continue traversing the tree) * * \note This is compatible with \c pcmk__xml_tree_foreach(). */ bool pcmk__xe_remove_attr_cb(xmlNode *xml, void *user_data) { const char *name = user_data; pcmk__xe_remove_attr(xml, name); return true; } /*! * \internal * \brief Remove an XML element's attributes that match some criteria * * \param[in,out] element XML element to modify * \param[in] force If \c true, remove matching attributes immediately, * ignoring ACLs and change tracking * \param[in] match If not NULL, only remove attributes for which * this function returns true * \param[in,out] user_data Data to pass to \p match */ void pcmk__xe_remove_matching_attrs(xmlNode *element, bool force, bool (*match)(xmlAttrPtr, void *), void *user_data) { xmlAttrPtr next = NULL; for (xmlAttrPtr a = pcmk__xe_first_attr(element); a != NULL; a = next) { next = a->next; // Grab now because attribute might get removed if ((match == NULL) || match(a, user_data)) { if (pcmk__xa_remove(a, force) != pcmk_rc_ok) { return; } } } } /*! * \internal * \brief Create a new XML element under a given parent * * \param[in,out] parent XML element that will be the new element's parent * (\c NULL to create a new XML document with the new * node as root) * \param[in] name Name of new element * * \return Newly created XML element (guaranteed not to be \c NULL) */ xmlNode * pcmk__xe_create(xmlNode *parent, const char *name) { xmlNode *node = NULL; pcmk__assert(!pcmk__str_empty(name)); if (parent == NULL) { xmlDoc *doc = pcmk__xml_new_doc(); node = xmlNewDocRawNode(doc, NULL, (const xmlChar *) name, NULL); pcmk__mem_assert(node); xmlDocSetRootElement(doc, node); } else { node = xmlNewChild(parent, NULL, (const xmlChar *) name, NULL); pcmk__mem_assert(node); } pcmk__xml_new_private_data(node); return node; } /*! * \internal * \brief Set a formatted string as an XML node's content * * \param[in,out] node Node whose content to set * \param[in] format printf(3)-style format string * \param[in] ... Arguments for \p format * * \note This function escapes special characters. \c xmlNodeSetContent() does * not. */ G_GNUC_PRINTF(2, 3) void pcmk__xe_set_content(xmlNode *node, const char *format, ...) { if (node != NULL) { const char *content = NULL; char *buf = NULL; /* xmlNodeSetContent() frees node->children and replaces it with new * text. If this function is called for a node that already has a non- * text child, it's a bug. */ CRM_CHECK((node->children == NULL) || (node->children->type == XML_TEXT_NODE), return); if (strchr(format, '%') == NULL) { // Nothing to format content = format; } else { va_list ap; va_start(ap, format); if (pcmk__str_eq(format, "%s", pcmk__str_none)) { // No need to make a copy content = va_arg(ap, const char *); } else { pcmk__assert(vasprintf(&buf, format, ap) >= 0); content = buf; } va_end(ap); } xmlNodeSetContent(node, (const xmlChar *) content); free(buf); } } /*! * \internal * \brief Set a formatted string as an XML element's ID * * If the formatted string would not be a valid ID, it's first sanitized by * \c pcmk__xml_sanitize_id(). * * \param[in,out] node Node whose ID to set * \param[in] format printf(3)-style format string * \param[in] ... Arguments for \p format */ G_GNUC_PRINTF(2, 3) void pcmk__xe_set_id(xmlNode *node, const char *format, ...) { char *id = NULL; va_list ap; pcmk__assert(!pcmk__str_empty(format)); if (node == NULL) { return; } va_start(ap, format); pcmk__assert(vasprintf(&id, format, ap) >= 0); va_end(ap); if (!xmlValidateNameValue((const xmlChar *) id)) { pcmk__xml_sanitize_id(id); } crm_xml_add(node, PCMK_XA_ID, id); free(id); } /*! * \internal * \brief Add a "last written" attribute to an XML element, set to current time * * \param[in,out] xe XML element to add attribute to * * \return Value that was set, or NULL on error */ const char * pcmk__xe_add_last_written(xmlNode *xe) { char *now_s = pcmk__epoch2str(NULL, 0); const char *result = NULL; result = crm_xml_add(xe, PCMK_XA_CIB_LAST_WRITTEN, pcmk__s(now_s, "Could not determine current time")); free(now_s); return result; } /*! * \internal * \brief Merge one XML tree into another * * Here, "merge" means: * 1. Copy attribute values from \p update to the target, overwriting in case of * conflict. * 2. Descend through \p update and the target in parallel. At each level, for * each child of \p update, look for a matching child of the target. * a. For each child, if a match is found, go to step 1, recursively merging * the child of \p update into the child of the target. * b. Otherwise, copy the child of \p update as a child of the target. * * A match is defined as the first child of the same type within the target, * with: * * the \c PCMK_XA_ID attribute matching, if set in \p update; otherwise, * * the \c PCMK_XA_ID_REF attribute matching, if set in \p update * * This function does not delete any elements or attributes from the target. It * may add elements or overwrite attributes, as described above. * * \param[in,out] parent If \p target is NULL and this is not, add or update * child of this XML node that matches \p update * \param[in,out] target If not NULL, update this XML * \param[in] update Make the desired XML match this (must not be \c NULL) * \param[in] flags Group of enum pcmk__xa_flags * * \note At least one of \p parent and \p target must be non-NULL. * \note This function is recursive. For the top-level call, \p parent is * \c NULL and \p target is not \c NULL. For recursive calls, \p target is * \c NULL and \p parent is not \c NULL. */ static void update_xe(xmlNode *parent, xmlNode *target, xmlNode *update, uint32_t flags) { // @TODO Try to refactor further, possibly using pcmk__xml_tree_foreach() const char *update_name = NULL; const char *update_id_attr = NULL; const char *update_id_val = NULL; char *trace_s = NULL; crm_log_xml_trace(update, "update"); crm_log_xml_trace(target, "target"); CRM_CHECK(update != NULL, goto done); if (update->type == XML_COMMENT_NODE) { pcmk__xc_update(parent, target, update); goto done; } update_name = (const char *) update->name; CRM_CHECK(update_name != NULL, goto done); CRM_CHECK((target != NULL) || (parent != NULL), goto done); update_id_val = pcmk__xe_id(update); if (update_id_val != NULL) { update_id_attr = PCMK_XA_ID; } else { update_id_val = pcmk__xe_get(update, PCMK_XA_ID_REF); if (update_id_val != NULL) { update_id_attr = PCMK_XA_ID_REF; } } pcmk__if_tracing( { if (update_id_attr != NULL) { trace_s = crm_strdup_printf("<%s %s=%s/>", update_name, update_id_attr, update_id_val); } else { trace_s = crm_strdup_printf("<%s/>", update_name); } }, {} ); if (target == NULL) { // Recursive call target = pcmk__xe_first_child(parent, update_name, update_id_attr, update_id_val); } if (target == NULL) { // Recursive call with no existing matching child target = pcmk__xe_create(parent, update_name); crm_trace("Added %s", pcmk__s(trace_s, update_name)); } else { // Either recursive call with match, or top-level call crm_trace("Found node %s to update", pcmk__s(trace_s, update_name)); } CRM_CHECK(pcmk__xe_is(target, (const char *) update->name), return); pcmk__xe_copy_attrs(target, update, flags); for (xmlNode *child = pcmk__xml_first_child(update); child != NULL; child = pcmk__xml_next(child)) { crm_trace("Updating child of %s", pcmk__s(trace_s, update_name)); update_xe(target, NULL, child, flags); } crm_trace("Finished with %s", pcmk__s(trace_s, update_name)); done: free(trace_s); } /*! * \internal * \brief Delete an XML subtree if it matches a search element * * A match is defined as follows: * * \p xml and \p user_data are both element nodes of the same type. * * If \p user_data has attributes set, \p xml has those attributes set to the * same values. (\p xml may have additional attributes set to arbitrary * values.) * * \param[in,out] xml XML subtree to delete upon match * \param[in] user_data Search element * * \return \c true to continue traversing the tree, or \c false to stop (because * \p xml was deleted) * * \note This is compatible with \c pcmk__xml_tree_foreach(). */ static bool delete_xe_if_matching(xmlNode *xml, void *user_data) { xmlNode *search = user_data; if (!pcmk__xe_is(search, (const char *) xml->name)) { // No match: either not both elements, or different element types return true; } for (const xmlAttr *attr = pcmk__xe_first_attr(search); attr != NULL; attr = attr->next) { const char *search_val = pcmk__xml_attr_value(attr); const char *xml_val = pcmk__xe_get(xml, (const char *) attr->name); if (!pcmk__str_eq(search_val, xml_val, pcmk__str_casei)) { // No match: an attr in xml doesn't match the attr in search return true; } } crm_log_xml_trace(xml, "delete-match"); crm_log_xml_trace(search, "delete-search"); pcmk__xml_free(xml); // Found a match and deleted it; stop traversing tree return false; } /*! * \internal * \brief Search an XML tree depth-first and delete the first matching element * * This function does not attempt to match the tree root (\p xml). * * A match with a node \c node is defined as follows: * * \c node and \p search are both element nodes of the same type. * * If \p search has attributes set, \c node has those attributes set to the * same values. (\c node may have additional attributes set to arbitrary * values.) * * \param[in,out] xml XML subtree to search * \param[in] search Element to match against * * \return Standard Pacemaker return code (specifically, \c pcmk_rc_ok on * successful deletion and an error code otherwise) */ int pcmk__xe_delete_match(xmlNode *xml, xmlNode *search) { // See @COMPAT comment in pcmk__xe_replace_match() CRM_CHECK((xml != NULL) && (search != NULL), return EINVAL); for (xml = pcmk__xe_first_child(xml, NULL, NULL, NULL); xml != NULL; xml = pcmk__xe_next(xml, NULL)) { if (!pcmk__xml_tree_foreach(xml, delete_xe_if_matching, search)) { // Found and deleted an element return pcmk_rc_ok; } } // No match found in this subtree return ENXIO; } /*! * \internal * \brief Replace one XML node with a copy of another XML node * * This function handles change tracking and applies ACLs. * * \param[in,out] old XML node to replace * \param[in] new XML node to copy as replacement for \p old * * \note This frees \p old. */ static void replace_node(xmlNode *old, xmlNode *new) { // Pass old for its doc; it won't remain the parent of new new = pcmk__xml_copy(old, new); old = xmlReplaceNode(old, new); // old == NULL means memory allocation error pcmk__assert(old != NULL); // May be unnecessary but avoids slight changes to some test outputs pcmk__xml_tree_foreach(new, pcmk__xml_reset_node_flags, NULL); if (pcmk__xml_doc_all_flags_set(new->doc, pcmk__xf_tracking)) { // Replaced sections may have included relevant ACLs pcmk__apply_acl(new); } pcmk__xml_mark_changes(old, new); pcmk__xml_free_node(old); } /*! * \internal * \brief Replace one XML subtree with a copy of another if the two match * * A match is defined as follows: * * \p xml and \p user_data are both element nodes of the same type. * * If \p user_data has the \c PCMK_XA_ID attribute set, then \p xml has * \c PCMK_XA_ID set to the same value. * * \param[in,out] xml XML subtree to replace with \p user_data upon match * \param[in] user_data XML to replace \p xml with a copy of upon match * * \return \c true to continue traversing the tree, or \c false to stop (because * \p xml was replaced by \p user_data) * * \note This is compatible with \c pcmk__xml_tree_foreach(). */ static bool replace_xe_if_matching(xmlNode *xml, void *user_data) { xmlNode *replace = user_data; const char *xml_id = NULL; const char *replace_id = NULL; xml_id = pcmk__xe_id(xml); replace_id = pcmk__xe_id(replace); if (!pcmk__xe_is(replace, (const char *) xml->name)) { // No match: either not both elements, or different element types return true; } if ((replace_id != NULL) && !pcmk__str_eq(replace_id, xml_id, pcmk__str_none)) { // No match: ID was provided in replace and doesn't match xml's ID return true; } crm_log_xml_trace(xml, "replace-match"); crm_log_xml_trace(replace, "replace-with"); replace_node(xml, replace); // Found a match and replaced it; stop traversing tree return false; } /*! * \internal * \brief Search an XML tree depth-first and replace the first matching element * * This function does not attempt to match the tree root (\p xml). * * A match with a node \c node is defined as follows: * * \c node and \p replace are both element nodes of the same type. * * If \p replace has the \c PCMK_XA_ID attribute set, then \c node has * \c PCMK_XA_ID set to the same value. * * \param[in,out] xml XML tree to search * \param[in] replace XML to replace a matching element with a copy of * * \return Standard Pacemaker return code (specifically, \c pcmk_rc_ok on * successful replacement and an error code otherwise) */ int pcmk__xe_replace_match(xmlNode *xml, xmlNode *replace) { /* @COMPAT Some of this behavior (like not matching the tree root, which is * allowed by pcmk__xe_update_match()) is questionable for general use but * required for backward compatibility by cib_process_replace() and * cib_process_delete(). Behavior can change at a major version release if * desired. */ CRM_CHECK((xml != NULL) && (replace != NULL), return EINVAL); for (xml = pcmk__xe_first_child(xml, NULL, NULL, NULL); xml != NULL; xml = pcmk__xe_next(xml, NULL)) { if (!pcmk__xml_tree_foreach(xml, replace_xe_if_matching, replace)) { // Found and replaced an element return pcmk_rc_ok; } } // No match found in this subtree return ENXIO; } //! User data for \c update_xe_if_matching() struct update_data { xmlNode *update; //!< Update source uint32_t flags; //!< Group of enum pcmk__xa_flags }; /*! * \internal * \brief Update one XML subtree with another if the two match * * "Update" means to merge a source subtree into a target subtree (see * \c update_xe()). * * A match is defined as follows: * * \p xml and \p user_data->update are both element nodes of the same type. * * \p xml and \p user_data->update have the same \c PCMK_XA_ID attribute * value, or \c PCMK_XA_ID is unset in both * * \param[in,out] xml XML subtree to update with \p user_data->update * upon match * \param[in] user_data struct update_data object * * \return \c true to continue traversing the tree, or \c false to stop (because * \p xml was updated by \p user_data->update) * * \note This is compatible with \c pcmk__xml_tree_foreach(). */ static bool update_xe_if_matching(xmlNode *xml, void *user_data) { struct update_data *data = user_data; xmlNode *update = data->update; if (!pcmk__xe_is(update, (const char *) xml->name)) { // No match: either not both elements, or different element types return true; } if (!pcmk__str_eq(pcmk__xe_id(xml), pcmk__xe_id(update), pcmk__str_none)) { // No match: ID mismatch return true; } crm_log_xml_trace(xml, "update-match"); crm_log_xml_trace(update, "update-with"); update_xe(NULL, xml, update, data->flags); // Found a match and replaced it; stop traversing tree return false; } /*! * \internal * \brief Search an XML tree depth-first and update the first matching element * * "Update" means to merge a source subtree into a target subtree (see * \c update_xe()). * * A match with a node \c node is defined as follows: * * \c node and \p update are both element nodes of the same type. * * \c node and \p update have the same \c PCMK_XA_ID attribute value, or * \c PCMK_XA_ID is unset in both * * \param[in,out] xml XML tree to search * \param[in] update XML to update a matching element with * \param[in] flags Group of enum pcmk__xa_flags * * \return Standard Pacemaker return code (specifically, \c pcmk_rc_ok on * successful update and an error code otherwise) */ int pcmk__xe_update_match(xmlNode *xml, xmlNode *update, uint32_t flags) { /* @COMPAT In pcmk__xe_delete_match() and pcmk__xe_replace_match(), we * compare IDs only if the equivalent of the update argument has an ID. * Here, we're stricter: we consider it a mismatch if only one element has * an ID attribute, or if both elements have IDs but they don't match. * * Perhaps we should align the behavior at a major version release. */ struct update_data data = { .update = update, .flags = flags, }; CRM_CHECK((xml != NULL) && (update != NULL), return EINVAL); if (!pcmk__xml_tree_foreach(xml, update_xe_if_matching, &data)) { // Found and updated an element return pcmk_rc_ok; } // No match found in this subtree return ENXIO; } void pcmk__xe_set_propv(xmlNodePtr node, va_list pairs) { while (true) { const char *name, *value; name = va_arg(pairs, const char *); if (name == NULL) { return; } value = va_arg(pairs, const char *); if (value != NULL) { crm_xml_add(node, name, value); } } } void pcmk__xe_set_props(xmlNodePtr node, ...) { va_list pairs; va_start(pairs, node); pcmk__xe_set_propv(node, pairs); va_end(pairs); } int pcmk__xe_foreach_child(xmlNode *xml, const char *child_element_name, int (*handler)(xmlNode *xml, void *userdata), void *userdata) { xmlNode *children = (xml? xml->children : NULL); pcmk__assert(handler != NULL); for (xmlNode *node = children; node != NULL; node = node->next) { if ((node->type == XML_ELEMENT_NODE) && ((child_element_name == NULL) || pcmk__xe_is(node, child_element_name))) { int rc = handler(node, userdata); if (rc != pcmk_rc_ok) { return rc; } } } return pcmk_rc_ok; } // XML attribute handling /*! * \brief Create an XML attribute with specified name and value * * \param[in,out] node XML node to modify * \param[in] name Attribute name to set * \param[in] value Attribute value to set * * \return New value on success, \c NULL otherwise * \note This does nothing if node, name, or value are \c NULL or empty. */ const char * crm_xml_add(xmlNode *node, const char *name, const char *value) { // @TODO Replace with internal function that returns the new attribute bool dirty = FALSE; xmlAttr *attr = NULL; CRM_CHECK(node != NULL, return NULL); CRM_CHECK(name != NULL, return NULL); if (value == NULL) { return NULL; } if (pcmk__xml_doc_all_flags_set(node->doc, pcmk__xf_tracking)) { const char *old = pcmk__xe_get(node, name); if (old == NULL || value == NULL || strcmp(old, value) != 0) { dirty = TRUE; } } if (dirty && (pcmk__check_acl(node, name, pcmk__xf_acl_create) == FALSE)) { crm_trace("Cannot add %s=%s to %s", name, value, node->name); return NULL; } attr = xmlSetProp(node, (const xmlChar *) name, (const xmlChar *) value); /* If the attribute already exists, this does nothing. Attribute values * don't get private data. */ pcmk__xml_new_private_data((xmlNode *) attr); if (dirty) { pcmk__mark_xml_attr_dirty(attr); } CRM_CHECK(attr && attr->children && attr->children->content, return NULL); return (char *)attr->children->content; } /*! * \brief Create an XML attribute with specified name and integer value * * This is like \c crm_xml_add() but taking an integer value. * * \param[in,out] node XML node to modify * \param[in] name Attribute name to set * \param[in] value Attribute value to set * * \return New value as string on success, \c NULL otherwise * \note This does nothing if node or name are \c NULL or empty. */ const char * crm_xml_add_int(xmlNode *node, const char *name, int value) { char *number = pcmk__itoa(value); const char *added = crm_xml_add(node, name, number); free(number); return added; } /*! * \internal * \brief Retrieve the value of an XML attribute * * \param[in] xml XML element whose attribute to get * \param[in] attr_name Attribute name * * \return Value of specified attribute (may be \c NULL) */ const char * pcmk__xe_get(const xmlNode *xml, const char *attr_name) { xmlAttr *attr = NULL; CRM_CHECK((xml != NULL) && (attr_name != NULL), return NULL); attr = xmlHasProp(xml, (const xmlChar *) attr_name); if ((attr == NULL) || (attr->children == NULL)) { return NULL; } return (const char *) attr->children->content; } /*! * \internal * \brief Retrieve a flag group from an XML attribute value * * This is like \c pcmk__xe_get() but returns the value as a \c uint32_t. * * \param[in] xml XML node to check * \param[in] name Attribute name to check (must not be NULL) * \param[out] dest Where to store flags (may be NULL to just * validate type) * \param[in] default_value What to use for missing or invalid value * * \return Standard Pacemaker return code */ int pcmk__xe_get_flags(const xmlNode *xml, const char *name, uint32_t *dest, uint32_t default_value) { const char *value = NULL; long long value_ll = 0LL; int rc = pcmk_rc_ok; if (dest != NULL) { *dest = default_value; } if (name == NULL) { return EINVAL; } if (xml == NULL) { return pcmk_rc_ok; } value = pcmk__xe_get(xml, name); if (value == NULL) { return pcmk_rc_ok; } rc = pcmk__scan_ll(value, &value_ll, default_value); if ((value_ll < 0) || (value_ll > UINT32_MAX)) { value_ll = default_value; if (rc == pcmk_rc_ok) { rc = pcmk_rc_bad_input; } } if (dest != NULL) { *dest = (uint32_t) value_ll; } return rc; } /*! * \internal * \brief Retrieve a \c guint value from an XML attribute * * This is like \c pcmk__xe_get() but returns the value as a \c guint. * * \param[in] xml XML element whose attribute to get * \param[in] attr Attribute name * \param[out] dest Where to store attribute value (unchanged on error) * * \return Standard Pacemaker return code */ int pcmk__xe_get_guint(const xmlNode *xml, const char *attr, guint *dest) { long long value_ll = 0; int rc = pcmk_rc_ok; CRM_CHECK((xml != NULL) && (attr != NULL) && (dest != NULL), return EINVAL); rc = pcmk__xe_get_ll(xml, attr, &value_ll); if (rc != pcmk_rc_ok) { return rc; } if ((value_ll < 0) || (value_ll > G_MAXUINT)) { return ERANGE; } *dest = (guint) value_ll; return pcmk_rc_ok; } /*! * \internal * \brief Set an XML attribute using a \c guint value * * This is like \c crm_xml_add() but takes a \c guint. * * \param[in,out] xml XML node to modify * \param[in] attr Attribute name * \param[in] value Attribute value to set */ void pcmk__xe_set_guint(xmlNode *xml, const char *attr, guint value) { char *value_s = NULL; CRM_CHECK((xml != NULL) && (attr != NULL), return); value_s = crm_strdup_printf("%u", value); crm_xml_add(xml, attr, value_s); free(value_s); } /*! * \internal * \brief Retrieve an \c int value from an XML attribute * * This is like \c pcmk__xe_get() but returns the value as an \c int. * * \param[in] xml XML element whose attribute to get * \param[in] attr Attribute name * \param[out] dest Where to store element value (unchanged on error) * * \return Standard Pacemaker return code */ int pcmk__xe_get_int(const xmlNode *xml, const char *attr, int *dest) { long long value_ll = 0; int rc = pcmk_rc_ok; CRM_CHECK((xml != NULL) && (attr != NULL) && (dest != NULL), return EINVAL); rc = pcmk__xe_get_ll(xml, attr, &value_ll); if (rc != pcmk_rc_ok) { return rc; } if ((value_ll < INT_MIN) || (value_ll > INT_MAX)) { return ERANGE; } *dest = (int) value_ll; return pcmk_rc_ok; } +/*! + * \internal + * \brief Set an XML attribute using an \c int value. + * + * This is like \c crm_xml_add() but takes an \c int. + * + * \param[in,out] xml XML node to modify + * \param[in] attr Attribute name + * \param[in] value Attribute value to set + */ +void +pcmk__xe_set_int(xmlNode *xml, const char *attr, int value) +{ + char *value_s = NULL; + + CRM_CHECK((xml != NULL) && (attr != NULL), return); + + value_s = pcmk__itoa(value); + crm_xml_add(xml, attr, value_s); + free(value_s); +} + /*! * \internal * \brief Retrieve a long long value from an XML attribute * * This is like \c pcmk__xe_get() but returns the value as a long long. * * \param[in] xml XML element whose attribute to get * \param[in] attr Attribute name * \param[out] dest Where to store element value (unchanged on error) * * \return Standard Pacemaker return code */ int pcmk__xe_get_ll(const xmlNode *xml, const char *attr, long long *dest) { const char *value = NULL; long long value_ll = 0; int rc = pcmk_rc_ok; CRM_CHECK((xml != NULL) && (attr != NULL) && (dest != NULL), return EINVAL); value = pcmk__xe_get(xml, attr); if (value == NULL) { return ENXIO; } rc = pcmk__scan_ll(value, &value_ll, PCMK__PARSE_INT_DEFAULT); if (rc != pcmk_rc_ok) { return rc; } *dest = value_ll; return pcmk_rc_ok; } /*! * \internal * \brief Set an XML attribute using a long long value * * This is like \c crm_xml_add() but takes a long long. * * \param[in,out] xml XML node to modify * \param[in] attr Attribute name * \param[in] value Attribute value to set * * \return Standard Pacemaker return code */ int pcmk__xe_set_ll(xmlNode *xml, const char *attr, long long value) { char *value_s = NULL; int rc = pcmk_rc_ok; CRM_CHECK((xml != NULL) && (attr != NULL), return EINVAL); value_s = crm_strdup_printf("%lld", value); errno = 0; if (crm_xml_add(xml, attr, value_s) == NULL) { // ACL denial rc = (errno != 0)? errno : EACCES; } free(value_s); return rc; } /*! * \internal * \brief Retrieve a \c time_t value from an XML attribute * * This is like \c pcmk__xe_get() but returns the value as a \c time_t. * * \param[in] xml XML element whose attribute to get * \param[in] attr Attribute name * \param[out] dest Where to store attribute value (unchanged on error) * * \return Standard Pacemaker return code */ int pcmk__xe_get_time(const xmlNode *xml, const char *attr, time_t *dest) { long long value_ll = 0; int rc = pcmk_rc_ok; CRM_CHECK((xml != NULL) && (attr != NULL) && (dest != NULL), return EINVAL); rc = pcmk__xe_get_ll(xml, attr, &value_ll); if (rc != pcmk_rc_ok) { return rc; } /* We don't do any bounds checking, since there are no constants provided * for the bounds of time_t, and calculating them isn't worth the effort. If * there are XML values beyond the native sizes, there will likely be worse * problems anyway. */ *dest = (time_t) value_ll; return pcmk_rc_ok; } /*! * \internal * \brief Set an XML attribute using a \c time_t value * * This is like \c crm_xml_add() but takes a \c time_t. * * \param[in,out] xml XML element whose attribute to set * \param[in] attr Attribute name * \param[in] value Attribute value to set (in seconds) */ void pcmk__xe_set_time(xmlNode *xml, const char *attr, time_t value) { // Could be inline, but keep it underneath pcmk__xe_get_time() CRM_CHECK((xml != NULL) && (attr != NULL), return); pcmk__xe_set_ll(xml, attr, (long long) value); } /*! * \internal * \brief Retrieve the values of XML second/microsecond attributes as time * * This is like \c pcmk__xe_get() but returns the value as a * struct timeval. * * \param[in] xml XML element whose attributes to get * \param[in] sec_attr Name of XML attribute for seconds * \param[in] usec_attr Name of XML attribute for microseconds * \param[out] dest Where to store result (unchanged on error) * * \return Standard Pacemaker return code */ int pcmk__xe_get_timeval(const xmlNode *xml, const char *sec_attr, const char *usec_attr, struct timeval *dest) { long long value_ll = 0; struct timeval result = { 0, 0 }; int rc = pcmk_rc_ok; // Could allow one of sec_attr and usec_attr to be NULL in the future CRM_CHECK((xml != NULL) && (sec_attr != NULL) && (usec_attr != NULL) && (dest != NULL), return EINVAL); // No bounds checking; see comment in pcmk__xe_get_time() // Parse seconds rc = pcmk__xe_get_time(xml, sec_attr, &(result.tv_sec)); if (rc != pcmk_rc_ok) { return rc; } // Parse microseconds rc = pcmk__xe_get_ll(xml, usec_attr, &value_ll); if (rc != pcmk_rc_ok) { return rc; } result.tv_usec = (suseconds_t) value_ll; *dest = result; return pcmk_rc_ok; } /*! * \internal * \brief Set XML attribute values for seconds and microseconds * * This is like \c crm_xml_add() but takes a struct timeval *. * * \param[in,out] xml XML element whose attributes to set * \param[in] sec_attr Name of XML attribute for seconds * \param[in] usec_attr Name of XML attribute for microseconds * \param[in] value Attribute values to set * * \note This does nothing if \p value is \c NULL. */ void pcmk__xe_set_timeval(xmlNode *xml, const char *sec_attr, const char *usec_attr, const struct timeval *value) { CRM_CHECK((xml != NULL) && (sec_attr != NULL) && (usec_attr != NULL), return); if (value == NULL) { return; } if (pcmk__xe_set_ll(xml, sec_attr, (long long) value->tv_sec) != pcmk_rc_ok) { return; } /* Seconds were added successfully. Ignore any errors adding microseconds. * * It would be nice to make this atomic: revert the seconds attribute if * adding the microseconds attribute fails. That's somewhat complicated due * to change tracking: the chain of parents is already marked dirty, etc. In * practice, microseconds should succeed if seconds succeeded, unless it's * due to memory allocation failure. Nothing checks the return values of * these setter functions at time of writing, anyway. */ pcmk__xe_set_ll(xml, usec_attr, (long long) value->tv_usec); } /*! * \internal * \brief Get a date/time object from an XML attribute value * * \param[in] xml XML with attribute to parse (from CIB) * \param[in] attr Name of attribute to parse * \param[out] t Where to create date/time object * (\p *t must be NULL initially) * * \return Standard Pacemaker return code * \note The caller is responsible for freeing \p *t using crm_time_free(). */ int pcmk__xe_get_datetime(const xmlNode *xml, const char *attr, crm_time_t **t) { const char *value = NULL; if ((t == NULL) || (*t != NULL) || (xml == NULL) || (attr == NULL)) { return EINVAL; } value = pcmk__xe_get(xml, attr); if (value != NULL) { *t = crm_time_new(value); if (*t == NULL) { return pcmk_rc_unpack_error; } } return pcmk_rc_ok; } /*! * \internal * \brief Add a boolean attribute to an XML node. * * \param[in,out] node XML node to add attributes to * \param[in] name XML attribute to create * \param[in] value Value to give to the attribute */ void pcmk__xe_set_bool_attr(xmlNodePtr node, const char *name, bool value) { crm_xml_add(node, name, pcmk__btoa(value)); } /*! * \internal * \brief Extract a boolean attribute's value from an XML element, with * error checking * * \param[in] node XML node to get attribute from * \param[in] name XML attribute to get * \param[out] value Destination for the value of the attribute * * \return EINVAL if \p name or \p value are NULL, ENODATA if \p node is * NULL or the attribute does not exist, pcmk_rc_unknown_format * if the attribute is not a boolean, and pcmk_rc_ok otherwise. * * \note \p value only has any meaning if the return value is pcmk_rc_ok. */ int pcmk__xe_get_bool_attr(const xmlNode *node, const char *name, bool *value) { const char *xml_value = NULL; int ret, rc; if (node == NULL) { return ENODATA; } else if (name == NULL || value == NULL) { return EINVAL; } xml_value = pcmk__xe_get(node, name); if (xml_value == NULL) { return ENODATA; } rc = crm_str_to_boolean(xml_value, &ret); if (rc == 1) { *value = ret; return pcmk_rc_ok; } else { return pcmk_rc_bad_input; } } /*! * \internal * \brief Extract a boolean attribute's value from an XML element * * \param[in] node XML node to get attribute from * \param[in] name XML attribute to get * * \return True if the given \p name is an attribute on \p node and has * the value \c PCMK_VALUE_TRUE, False in all other cases */ bool pcmk__xe_attr_is_true(const xmlNode *node, const char *name) { bool value = false; int rc; rc = pcmk__xe_get_bool_attr(node, name, &value); return rc == pcmk_rc_ok && value == true; } // Deprecated functions kept only for backward API compatibility // LCOV_EXCL_START #include // gboolean, GSList #include // pcmk_xml_attrs2nvpairs(), etc. #include // crm_xml_sanitize_id() #include xmlNode * expand_idref(xmlNode *input, xmlNode *top) { return pcmk__xe_resolve_idref(input, top); } void crm_xml_set_id(xmlNode *xml, const char *format, ...) { va_list ap; int len = 0; char *id = NULL; /* equivalent to crm_strdup_printf() */ va_start(ap, format); len = vasprintf(&id, format, ap); va_end(ap); pcmk__assert(len > 0); crm_xml_sanitize_id(id); crm_xml_add(xml, PCMK_XA_ID, id); free(id); } xmlNode * sorted_xml(xmlNode *input, xmlNode *parent, gboolean recursive) { xmlNode *child = NULL; GSList *nvpairs = NULL; xmlNode *result = NULL; CRM_CHECK(input != NULL, return NULL); result = pcmk__xe_create(parent, (const char *) input->name); nvpairs = pcmk_xml_attrs2nvpairs(input); nvpairs = pcmk_sort_nvpairs(nvpairs); pcmk_nvpairs2xml_attrs(nvpairs, result); pcmk_free_nvpairs(nvpairs); for (child = pcmk__xe_first_child(input, NULL, NULL, NULL); child != NULL; child = pcmk__xe_next(child, NULL)) { if (recursive) { sorted_xml(child, result, recursive); } else { pcmk__xml_copy(result, child); } } return result; } const char * crm_element_value(const xmlNode *data, const char *name) { xmlAttr *attr = NULL; if (data == NULL) { crm_err("Couldn't find %s in NULL", name ? name : ""); CRM_LOG_ASSERT(data != NULL); return NULL; } else if (name == NULL) { crm_err("Couldn't find NULL in %s", data->name); return NULL; } attr = xmlHasProp(data, (const xmlChar *) name); if (!attr || !attr->children) { return NULL; } return (const char *) attr->children->content; } const char * crm_copy_xml_element(const xmlNode *obj1, xmlNode *obj2, const char *element) { const char *value = crm_element_value(obj1, element); crm_xml_add(obj2, element, value); return value; } int crm_element_value_ll(const xmlNode *data, const char *name, long long *dest) { const char *value = NULL; CRM_CHECK(dest != NULL, return -1); value = crm_element_value(data, name); if (value != NULL) { int rc = pcmk__scan_ll(value, dest, PCMK__PARSE_INT_DEFAULT); if (rc == pcmk_rc_ok) { return 0; } crm_warn("Using default for %s " "because '%s' is not a valid integer: %s", name, value, pcmk_rc_str(rc)); } return -1; } int crm_element_value_timeval(const xmlNode *xml, const char *name_sec, const char *name_usec, struct timeval *dest) { long long value_i = 0; CRM_CHECK(dest != NULL, return -EINVAL); dest->tv_sec = 0; dest->tv_usec = 0; if (xml == NULL) { return pcmk_ok; } // No bounds checking; see comment in pcmk__xe_get_time() // Parse seconds errno = 0; if (crm_element_value_ll(xml, name_sec, &value_i) < 0) { return -errno; } dest->tv_sec = (time_t) value_i; // Parse microseconds if (crm_element_value_ll(xml, name_usec, &value_i) < 0) { return -errno; } dest->tv_usec = (suseconds_t) value_i; return pcmk_ok; } int crm_element_value_epoch(const xmlNode *xml, const char *name, time_t *dest) { long long value_ll = 0; if (crm_element_value_ll(xml, name, &value_ll) < 0) { return -1; } // No bounds checking; see comment in pcmk__xe_get_time() *dest = (time_t) value_ll; return pcmk_ok; } int crm_element_value_ms(const xmlNode *data, const char *name, guint *dest) { const char *value = NULL; long long value_ll; int rc = pcmk_rc_ok; CRM_CHECK(dest != NULL, return -1); *dest = 0; value = crm_element_value(data, name); rc = pcmk__scan_ll(value, &value_ll, 0LL); if (rc != pcmk_rc_ok) { crm_warn("Using default for %s " "because '%s' is not valid milliseconds: %s", name, value, pcmk_rc_str(rc)); return -1; } if ((value_ll < 0) || (value_ll > G_MAXUINT)) { crm_warn("Using default for %s because '%s' is out of range", name, value); return -1; } *dest = (guint) value_ll; return pcmk_ok; } int crm_element_value_int(const xmlNode *data, const char *name, int *dest) { const char *value = NULL; CRM_CHECK(dest != NULL, return -1); value = crm_element_value(data, name); if (value) { long long value_ll; int rc = pcmk__scan_ll(value, &value_ll, 0LL); *dest = PCMK__PARSE_INT_DEFAULT; if (rc != pcmk_rc_ok) { crm_warn("Using default for %s " "because '%s' is not a valid integer: %s", name, value, pcmk_rc_str(rc)); } else if ((value_ll < INT_MIN) || (value_ll > INT_MAX)) { crm_warn("Using default for %s because '%s' is out of range", name, value); } else { *dest = (int) value_ll; return 0; } } return -1; } char * crm_element_value_copy(const xmlNode *data, const char *name) { CRM_CHECK((data != NULL) && (name != NULL), return NULL); return pcmk__str_copy(pcmk__xe_get(data, name)); } // Maximum size of null-terminated string representation of 64-bit integer // -9223372036854775808 #define LLSTRSIZE 21 const char * crm_xml_add_ll(xmlNode *xml, const char *name, long long value) { char s[LLSTRSIZE] = { '\0', }; if (snprintf(s, LLSTRSIZE, "%lld", (long long) value) == LLSTRSIZE) { return NULL; } return crm_xml_add(xml, name, s); } const char * crm_xml_add_timeval(xmlNode *xml, const char *name_sec, const char *name_usec, const struct timeval *value) { const char *added = NULL; if (xml && name_sec && value) { added = crm_xml_add_ll(xml, name_sec, (long long) value->tv_sec); if (added && name_usec) { // Any error is ignored (we successfully added seconds) crm_xml_add_ll(xml, name_usec, (long long) value->tv_usec); } } return added; } const char * crm_xml_add_ms(xmlNode *node, const char *name, guint ms) { char *number = crm_strdup_printf("%u", ms); const char *added = crm_xml_add(node, name, number); free(number); return added; } // LCOV_EXCL_STOP // End deprecated API diff --git a/lib/fencing/st_actions.c b/lib/fencing/st_actions.c index b60926fe03..a3f98b7d97 100644 --- a/lib/fencing/st_actions.c +++ b/lib/fencing/st_actions.c @@ -1,730 +1,730 @@ /* * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include // xmlNode #include #include #include #include #include #include "fencing_private.h" struct stonith_action_s { /*! user defined data */ char *agent; char *action; GHashTable *args; int timeout; bool async; void *userdata; void (*done_cb) (int pid, const pcmk__action_result_t *result, void *user_data); void (*fork_cb) (int pid, void *user_data); svc_action_t *svc_action; /*! internal timing information */ time_t initial_start_time; int tries; int remaining_timeout; int max_retries; int pid; pcmk__action_result_t result; }; static int internal_stonith_action_execute(stonith_action_t *action); static void log_action(stonith_action_t *action, pid_t pid); /*! * \internal * \brief Set an action's result based on services library result * * \param[in,out] action Fence action to set result for * \param[in,out] svc_action Service action to get result from */ static void set_result_from_svc_action(stonith_action_t *action, svc_action_t *svc_action) { services__copy_result(svc_action, &(action->result)); pcmk__set_result_output(&(action->result), services__grab_stdout(svc_action), services__grab_stderr(svc_action)); } static void log_action(stonith_action_t *action, pid_t pid) { /* The services library has already logged the output at info or debug * level, so just raise to warning for stderr. */ if (action->result.action_stderr != NULL) { /* Logging the whole string confuses syslog when the string is xml */ char *prefix = crm_strdup_printf("%s[%d] stderr:", action->agent, pid); crm_log_output(LOG_WARNING, prefix, action->result.action_stderr); free(prefix); } } static void append_config_arg(gpointer key, gpointer value, gpointer user_data) { /* Filter out parameters handled directly by Pacemaker. * * STONITH_ATTR_ACTION_OP is added elsewhere and should never be part of the * fencing resource's parameter list. We should ignore its value if it is * configured there. */ if (!pcmk__str_eq(key, STONITH_ATTR_ACTION_OP, pcmk__str_casei) && !pcmk_stonith_param(key) && (strstr(key, CRM_META) == NULL) && !pcmk__str_eq(key, PCMK_XA_CRM_FEATURE_SET, pcmk__str_none)) { crm_trace("Passing %s=%s with fence action", (const char *) key, (const char *) (value? value : "")); pcmk__insert_dup((GHashTable *) user_data, key, pcmk__s(value, "")); } } /*! * \internal * \brief Create a table of arguments for a fencing action * * \param[in] agent Fencing agent name * \param[in] action Name of fencing action * \param[in] target Name of target node for fencing action * \param[in] target_nodeid Node ID of target node for fencing action * \param[in] device_args Fence device parameters * \param[in] port_map Target node-to-port mapping for fence device * \param[in] host_arg Argument name for passing target * * \return Newly created hash table of arguments for fencing action */ static GHashTable * make_args(const char *agent, const char *action, const char *target, uint32_t target_nodeid, GHashTable *device_args, GHashTable *port_map, const char *host_arg) { GHashTable *arg_list = NULL; const char *value = NULL; CRM_CHECK(action != NULL, return NULL); arg_list = pcmk__strkey_table(free, free); // Add action to arguments (using an alias if requested) if (device_args) { char buffer[512]; snprintf(buffer, sizeof(buffer), "pcmk_%s_action", action); value = g_hash_table_lookup(device_args, buffer); if (value) { crm_debug("Substituting '%s' for fence action %s targeting %s", value, action, pcmk__s(target, "no node")); action = value; } } // Tell the fence agent what action to perform pcmk__insert_dup(arg_list, STONITH_ATTR_ACTION_OP, action); /* If this is a fencing operation against another node, add more standard * arguments. */ if ((target != NULL) && (device_args != NULL)) { const char *param = NULL; /* Always pass the target's name, per * https://github.com/ClusterLabs/fence-agents/blob/main/doc/FenceAgentAPI.md */ pcmk__insert_dup(arg_list, "nodename", target); // If the target's node ID was specified, pass it, too if (target_nodeid != 0) { char *nodeid = crm_strdup_printf("%" PRIu32, target_nodeid); // cts-fencing looks for this log message crm_info("Passing '%s' as nodeid with fence action '%s' targeting %s", nodeid, action, pcmk__s(target, "no node")); g_hash_table_insert(arg_list, strdup("nodeid"), nodeid); } // Check whether target should be specified as some other argument param = g_hash_table_lookup(device_args, PCMK_STONITH_HOST_ARGUMENT); if (param == NULL) { // Use caller's default (likely from agent metadata) param = host_arg; } if ((param != NULL) && !pcmk__str_eq(agent, "fence_legacy", pcmk__str_none) && !pcmk__str_eq(param, PCMK_VALUE_NONE, pcmk__str_casei)) { value = g_hash_table_lookup(device_args, param); if (pcmk__str_eq(value, "dynamic", pcmk__str_casei|pcmk__str_null_matches)) { /* If the host argument is "dynamic" or not configured, * reset it to the target */ const char *alias = NULL; if (port_map) { alias = g_hash_table_lookup(port_map, target); } if (alias == NULL) { alias = target; } crm_debug("Passing %s='%s' with fence action %s targeting %s", param, alias, action, pcmk__s(target, "no node")); pcmk__insert_dup(arg_list, param, alias); } } } if (device_args) { g_hash_table_foreach(device_args, append_config_arg, arg_list); } return arg_list; } /*! * \internal * \brief Free all memory used by a stonith action * * \param[in,out] action Action to free */ void stonith__destroy_action(stonith_action_t *action) { if (action) { free(action->agent); if (action->args) { g_hash_table_destroy(action->args); } free(action->action); if (action->svc_action) { services_action_free(action->svc_action); } pcmk__reset_result(&(action->result)); free(action); } } /*! * \internal * \brief Get the result of an executed stonith action * * \param[in] action Executed action * * \return Pointer to action's result (or NULL if \p action is NULL) */ pcmk__action_result_t * stonith__action_result(stonith_action_t *action) { return (action == NULL)? NULL : &(action->result); } #define FAILURE_MAX_RETRIES 2 /*! * \internal * \brief Create a new fencing action to be executed * * \param[in] agent Fence agent to use * \param[in] action_name Fencing action to be executed * \param[in] target Name of target of fencing action (if known) * \param[in] target_nodeid Node ID of target of fencing action (if known) * \param[in] timeout_sec Timeout to be used when executing action * \param[in] device_args Parameters to pass to fence agent * \param[in] port_map Mapping of target names to device ports * \param[in] host_arg Agent parameter used to pass target name * * \return Newly created fencing action (asserts on error, never NULL) */ stonith_action_t * stonith__action_create(const char *agent, const char *action_name, const char *target, uint32_t target_nodeid, int timeout_sec, GHashTable *device_args, GHashTable *port_map, const char *host_arg) { stonith_action_t *action = pcmk__assert_alloc(1, sizeof(stonith_action_t)); action->args = make_args(agent, action_name, target, target_nodeid, device_args, port_map, host_arg); crm_debug("Preparing '%s' action targeting %s using agent %s", action_name, pcmk__s(target, "no node"), agent); action->agent = strdup(agent); action->action = strdup(action_name); action->timeout = action->remaining_timeout = timeout_sec; action->max_retries = FAILURE_MAX_RETRIES; pcmk__set_result(&(action->result), PCMK_OCF_UNKNOWN, PCMK_EXEC_UNKNOWN, "Initialization bug in fencing library"); if (device_args) { char buffer[512]; const char *value = NULL; snprintf(buffer, sizeof(buffer), "pcmk_%s_retries", action_name); value = g_hash_table_lookup(device_args, buffer); if (value) { action->max_retries = atoi(value); } } return action; } static gboolean update_remaining_timeout(stonith_action_t * action) { int diff = time(NULL) - action->initial_start_time; if (action->tries >= action->max_retries) { crm_info("Attempted to execute agent %s (%s) the maximum number of times (%d) allowed", action->agent, action->action, action->max_retries); action->remaining_timeout = 0; } else if ((action->result.execution_status != PCMK_EXEC_TIMEOUT) && (diff < (action->timeout * 0.7))) { /* only set remaining timeout period if there is 30% * or greater of the original timeout period left */ action->remaining_timeout = action->timeout - diff; } else { action->remaining_timeout = 0; } return action->remaining_timeout ? TRUE : FALSE; } /*! * \internal * \brief Map a fencing action result to a standard return code * * \param[in] result Fencing action result to map * * \return Standard Pacemaker return code that best corresponds to \p result */ int stonith__result2rc(const pcmk__action_result_t *result) { if (pcmk__result_ok(result)) { return pcmk_rc_ok; } switch (result->execution_status) { case PCMK_EXEC_PENDING: return EINPROGRESS; case PCMK_EXEC_CANCELLED: return ECANCELED; case PCMK_EXEC_TIMEOUT: return ETIME; case PCMK_EXEC_NOT_INSTALLED: return ENOENT; case PCMK_EXEC_NOT_SUPPORTED: return EOPNOTSUPP; case PCMK_EXEC_NOT_CONNECTED: return ENOTCONN; case PCMK_EXEC_NO_FENCE_DEVICE: return ENODEV; case PCMK_EXEC_NO_SECRETS: return EACCES; /* For the fencing API, PCMK_EXEC_INVALID is used with fencer API * operations that don't involve executing an agent (for example, * registering devices). This allows us to use the CRM_EX_* codes in the * exit status for finer-grained responses. */ case PCMK_EXEC_INVALID: switch (result->exit_status) { case CRM_EX_INVALID_PARAM: return EINVAL; case CRM_EX_INSUFFICIENT_PRIV: return EACCES; case CRM_EX_PROTOCOL: return EPROTO; /* CRM_EX_EXPIRED is used for orphaned fencing operations left * over from a previous instance of the fencer. For API backward * compatibility, this is mapped to the previously used code for * this case, EHOSTUNREACH. */ case CRM_EX_EXPIRED: return EHOSTUNREACH; default: break; } break; default: break; } // Try to provide useful error code based on result's error output if (result->action_stderr == NULL) { return ENODATA; } else if (strcasestr(result->action_stderr, "timed out") || strcasestr(result->action_stderr, "timeout")) { return ETIME; } else if (strcasestr(result->action_stderr, "unrecognised action") || strcasestr(result->action_stderr, "unrecognized action") || strcasestr(result->action_stderr, "unsupported action")) { return EOPNOTSUPP; } // Oh well, we tried return pcmk_rc_error; } /*! * \internal * \brief Determine execution status equivalent of legacy fencer return code * * Fence action notifications, and fence action callbacks from older fencers * (<=2.1.2) in a rolling upgrade, will have only a legacy return code. Map this * to an execution status as best as possible (essentially, the inverse of * stonith__result2rc()). * * \param[in] rc Legacy return code from fencer * * \return Execution status best corresponding to \p rc */ int stonith__legacy2status(int rc) { if (rc >= 0) { return PCMK_EXEC_DONE; } switch (-rc) { case EACCES: return PCMK_EXEC_NO_SECRETS; case ECANCELED: return PCMK_EXEC_CANCELLED; case EHOSTUNREACH: return PCMK_EXEC_INVALID; case EINPROGRESS: return PCMK_EXEC_PENDING; case ENODEV: return PCMK_EXEC_NO_FENCE_DEVICE; case ENOENT: return PCMK_EXEC_NOT_INSTALLED; case ENOTCONN: return PCMK_EXEC_NOT_CONNECTED; case EOPNOTSUPP: return PCMK_EXEC_NOT_SUPPORTED; case EPROTO: return PCMK_EXEC_INVALID; case EPROTONOSUPPORT: return PCMK_EXEC_NOT_SUPPORTED; case ETIME: return PCMK_EXEC_TIMEOUT; case ETIMEDOUT: return PCMK_EXEC_TIMEOUT; default: return PCMK_EXEC_ERROR; } } /*! * \internal * \brief Add a fencing result to an XML element as attributes * * \param[in,out] xml XML element to add result to * \param[in] result Fencing result to add (assume success if NULL) */ void stonith__xe_set_result(xmlNode *xml, const pcmk__action_result_t *result) { int exit_status = CRM_EX_OK; enum pcmk_exec_status execution_status = PCMK_EXEC_DONE; const char *exit_reason = NULL; const char *action_stdout = NULL; int rc = pcmk_ok; CRM_CHECK(xml != NULL, return); if (result != NULL) { exit_status = result->exit_status; execution_status = result->execution_status; exit_reason = result->exit_reason; action_stdout = result->action_stdout; rc = pcmk_rc2legacy(stonith__result2rc(result)); } - crm_xml_add_int(xml, PCMK__XA_OP_STATUS, (int) execution_status); - crm_xml_add_int(xml, PCMK__XA_RC_CODE, exit_status); + pcmk__xe_set_int(xml, PCMK__XA_OP_STATUS, (int) execution_status); + pcmk__xe_set_int(xml, PCMK__XA_RC_CODE, exit_status); crm_xml_add(xml, PCMK_XA_EXIT_REASON, exit_reason); crm_xml_add(xml, PCMK__XA_ST_OUTPUT, action_stdout); /* @COMPAT Peers in rolling upgrades, Pacemaker Remote nodes, and external * code that use libstonithd <=2.1.2 don't check for the full result, and * need a legacy return code instead. */ - crm_xml_add_int(xml, PCMK__XA_ST_RC, rc); + pcmk__xe_set_int(xml, PCMK__XA_ST_RC, rc); } /*! * \internal * \brief Find a fencing result beneath an XML element * * \param[in] xml XML element to search * * \return \p xml or descendant of it that contains a fencing result, else NULL */ xmlNode * stonith__find_xe_with_result(xmlNode *xml) { xmlNode *match = pcmk__xpath_find_one(xml->doc, "//*[@" PCMK__XA_RC_CODE "]", LOG_NEVER); if (match == NULL) { /* @COMPAT Peers <=2.1.2 in a rolling upgrade provide only a legacy * return code, not a full result, so check for that. */ match = pcmk__xpath_find_one(xml->doc, "//*[@" PCMK__XA_ST_RC "]", LOG_ERR); } return match; } /*! * \internal * \brief Get a fencing result from an XML element's attributes * * \param[in] xml XML element with fencing result * \param[out] result Where to store fencing result */ void stonith__xe_get_result(const xmlNode *xml, pcmk__action_result_t *result) { int exit_status = CRM_EX_OK; int execution_status = PCMK_EXEC_DONE; const char *exit_reason = NULL; char *action_stdout = NULL; CRM_CHECK((xml != NULL) && (result != NULL), return); exit_reason = pcmk__xe_get(xml, PCMK_XA_EXIT_REASON); action_stdout = pcmk__xe_get_copy(xml, PCMK__XA_ST_OUTPUT); // A result must include an exit status and execution status if ((pcmk__xe_get_int(xml, PCMK__XA_RC_CODE, &exit_status) != pcmk_rc_ok) || (pcmk__xe_get_int(xml, PCMK__XA_OP_STATUS, &execution_status) != pcmk_rc_ok)) { int rc = pcmk_ok; exit_status = CRM_EX_ERROR; /* @COMPAT Peers <=2.1.2 in rolling upgrades provide only a legacy * return code, not a full result, so check for that. */ if (pcmk__xe_get_int(xml, PCMK__XA_ST_RC, &rc) == pcmk_rc_ok) { if ((rc == pcmk_ok) || (rc == -EINPROGRESS)) { exit_status = CRM_EX_OK; } execution_status = stonith__legacy2status(rc); exit_reason = pcmk_strerror(rc); } else { execution_status = PCMK_EXEC_ERROR; exit_reason = "Fencer reply contained neither a full result " "nor a legacy return code (bug?)"; } } pcmk__set_result(result, exit_status, execution_status, exit_reason); pcmk__set_result_output(result, action_stdout, NULL); } static void stonith_action_async_done(svc_action_t *svc_action) { stonith_action_t *action = (stonith_action_t *) svc_action->cb_data; set_result_from_svc_action(action, svc_action); svc_action->params = NULL; log_action(action, action->pid); if (!pcmk__result_ok(&(action->result)) && update_remaining_timeout(action)) { int rc = internal_stonith_action_execute(action); if (rc == pcmk_ok) { return; } } if (action->done_cb) { action->done_cb(action->pid, &(action->result), action->userdata); } action->svc_action = NULL; // don't remove our caller stonith__destroy_action(action); } static void stonith_action_async_forked(svc_action_t *svc_action) { stonith_action_t *action = (stonith_action_t *) svc_action->cb_data; action->pid = svc_action->pid; action->svc_action = svc_action; if (action->fork_cb) { (action->fork_cb) (svc_action->pid, action->userdata); } pcmk__set_result(&(action->result), PCMK_OCF_UNKNOWN, PCMK_EXEC_PENDING, NULL); crm_trace("Child process %d performing action '%s' successfully forked", action->pid, action->action); } /*! * \internal * \brief Convert a fencing library action to a services library action * * \param[in,out] action Fencing library action to convert * * \return Services library action equivalent to \p action on success; on error, * NULL will be returned and \p action's result will be set */ static svc_action_t * stonith_action_to_svc(stonith_action_t *action) { static int stonith_sequence = 0; char *path = crm_strdup_printf(PCMK__FENCE_BINDIR "/%s", action->agent); svc_action_t *svc_action = services_action_create_generic(path, NULL); free(path); if (svc_action->rc != PCMK_OCF_UNKNOWN) { set_result_from_svc_action(action, svc_action); services_action_free(svc_action); return NULL; } svc_action->timeout = action->remaining_timeout * 1000; svc_action->standard = pcmk__str_copy(PCMK_RESOURCE_CLASS_STONITH); svc_action->id = crm_strdup_printf("%s_%s_%dof%d", action->agent, action->action, action->tries, action->max_retries); svc_action->agent = pcmk__str_copy(action->agent); svc_action->sequence = stonith_sequence++; svc_action->params = action->args; svc_action->cb_data = (void *) action; svc_action->flags = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, "Action", svc_action->id, svc_action->flags, SVC_ACTION_NON_BLOCKED, "SVC_ACTION_NON_BLOCKED"); return svc_action; } static int internal_stonith_action_execute(stonith_action_t * action) { int rc = pcmk_ok; int is_retry = 0; svc_action_t *svc_action = NULL; CRM_CHECK(action != NULL, return -EINVAL); if ((action->action == NULL) || (action->args == NULL) || (action->agent == NULL)) { pcmk__set_result(&(action->result), PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR_FATAL, "Bug in fencing library"); return -EINVAL; } if (action->tries++ == 0) { // First attempt of the desired action action->initial_start_time = time(NULL); } else { // Later attempt after earlier failure crm_info("Attempt %d to execute '%s' action of agent %s " "(%ds timeout remaining)", action->tries, action->action, action->agent, action->remaining_timeout); is_retry = 1; } svc_action = stonith_action_to_svc(action); if (svc_action == NULL) { // The only possible errors are out-of-memory and too many arguments return -E2BIG; } /* keep retries from executing out of control and free previous results */ if (is_retry) { pcmk__reset_result(&(action->result)); // @TODO This should be nonblocking via timer if mainloop is used sleep(1); } if (action->async) { // We never create a recurring action, so this should always return TRUE CRM_LOG_ASSERT(services_action_async_fork_notify(svc_action, &stonith_action_async_done, &stonith_action_async_forked)); return pcmk_ok; } else if (!services_action_sync(svc_action)) { rc = -ECONNABORTED; // @TODO Update API to return more useful error } set_result_from_svc_action(action, svc_action); svc_action->params = NULL; services_action_free(svc_action); return rc; } /*! * \internal * \brief Kick off execution of an async stonith action * * \param[in,out] action Action to be executed * \param[in,out] userdata Datapointer to be passed to callbacks * \param[in] done Callback to notify action has failed/succeeded * \param[in] fork_callback Callback to notify successful fork of child * * \return pcmk_ok if ownership of action has been taken, -errno otherwise */ int stonith__execute_async(stonith_action_t * action, void *userdata, void (*done) (int pid, const pcmk__action_result_t *result, void *user_data), void (*fork_cb) (int pid, void *user_data)) { if (!action) { return -EINVAL; } action->userdata = userdata; action->done_cb = done; action->fork_cb = fork_cb; action->async = true; return internal_stonith_action_execute(action); } /*! * \internal * \brief Execute a stonith action * * \param[in,out] action Action to execute * * \return pcmk_ok on success, -errno otherwise */ int stonith__execute(stonith_action_t *action) { int rc = pcmk_ok; CRM_CHECK(action != NULL, return -EINVAL); // Keep trying until success, max retries, or timeout do { rc = internal_stonith_action_execute(action); } while ((rc != pcmk_ok) && update_remaining_timeout(action)); return rc; } diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c index eec165b8ad..af581f650e 100644 --- a/lib/fencing/st_client.c +++ b/lib/fencing/st_client.c @@ -1,2733 +1,2733 @@ /* * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include // xmlNode #include // xmlXPathObject, etc. #include #include #include #include #include #include "fencing_private.h" CRM_TRACE_INIT_DATA(stonith); // Used as stonith_t:st_private typedef struct stonith_private_s { char *token; crm_ipc_t *ipc; mainloop_io_t *source; GHashTable *stonith_op_callback_table; GList *notify_list; int notify_refcnt; bool notify_deletes; void (*op_callback) (stonith_t * st, stonith_callback_data_t * data); } stonith_private_t; // Used as stonith_event_t:opaque struct event_private { pcmk__action_result_t result; }; typedef struct stonith_notify_client_s { const char *event; const char *obj_id; /* implement one day */ const char *obj_type; /* implement one day */ void (*notify) (stonith_t * st, stonith_event_t * e); bool delete; } stonith_notify_client_t; typedef struct stonith_callback_client_s { void (*callback) (stonith_t * st, stonith_callback_data_t * data); const char *id; void *user_data; gboolean only_success; gboolean allow_timeout_updates; struct timer_rec_s *timer; } stonith_callback_client_t; struct notify_blob_s { stonith_t *stonith; xmlNode *xml; }; struct timer_rec_s { int call_id; int timeout; guint ref; stonith_t *stonith; }; typedef int (*stonith_op_t) (const char *, int, const char *, xmlNode *, xmlNode *, xmlNode *, xmlNode **, xmlNode **); bool stonith_dispatch(stonith_t * st); xmlNode *stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data, int call_options); static int stonith_send_command(stonith_t *stonith, const char *op, xmlNode *data, xmlNode **output_data, int call_options, int timeout); static void stonith_connection_destroy(gpointer user_data); static void stonith_send_notification(gpointer data, gpointer user_data); static int stonith_api_del_notification(stonith_t *stonith, const char *event); /*! * \brief Get agent namespace by name * * \param[in] namespace_s Name of namespace as string * * \return Namespace as enum value */ enum stonith_namespace stonith_text2namespace(const char *namespace_s) { if (pcmk__str_eq(namespace_s, "any", pcmk__str_null_matches)) { return st_namespace_any; } else if (!strcmp(namespace_s, "redhat") || !strcmp(namespace_s, "stonith-ng")) { return st_namespace_rhcs; } else if (!strcmp(namespace_s, "internal")) { return st_namespace_internal; } else if (!strcmp(namespace_s, "heartbeat")) { return st_namespace_lha; } return st_namespace_invalid; } /*! * \brief Get agent namespace name * * \param[in] namespace Namespace as enum value * * \return Namespace name as string */ const char * stonith_namespace2text(enum stonith_namespace st_namespace) { switch (st_namespace) { case st_namespace_any: return "any"; case st_namespace_rhcs: return "stonith-ng"; case st_namespace_internal: return "internal"; case st_namespace_lha: return "heartbeat"; default: break; } return "unsupported"; } /*! * \brief Determine namespace of a fence agent * * \param[in] agent Fence agent type * \param[in] namespace_s Name of agent namespace as string, if known * * \return Namespace of specified agent, as enum value */ enum stonith_namespace stonith_get_namespace(const char *agent, const char *namespace_s) { if (pcmk__str_eq(namespace_s, "internal", pcmk__str_none)) { return st_namespace_internal; } if (stonith__agent_is_rhcs(agent)) { return st_namespace_rhcs; } #if HAVE_STONITH_STONITH_H if (stonith__agent_is_lha(agent)) { return st_namespace_lha; } #endif return st_namespace_invalid; } gboolean stonith__watchdog_fencing_enabled_for_node_api(stonith_t *st, const char *node) { gboolean rv = FALSE; stonith_t *stonith_api = st?st:stonith_api_new(); char *list = NULL; if(stonith_api) { if (stonith_api->state == stonith_disconnected) { int rc = stonith_api->cmds->connect(stonith_api, "stonith-api", NULL); if (rc != pcmk_ok) { crm_err("Failed connecting to Stonith-API for watchdog-fencing-query."); } } if (stonith_api->state != stonith_disconnected) { /* caveat!!! * this might fail when when stonithd is just updating the device-list * probably something we should fix as well for other api-calls */ int rc = stonith_api->cmds->list(stonith_api, st_opt_sync_call, STONITH_WATCHDOG_ID, &list, 0); if ((rc != pcmk_ok) || (list == NULL)) { /* due to the race described above it can happen that * we drop in here - so as not to make remote nodes * panic on that answer */ if (rc == -ENODEV) { crm_notice("Cluster does not have watchdog fencing device"); } else { crm_warn("Could not check for watchdog fencing device: %s", pcmk_strerror(rc)); } } else if (list[0] == '\0') { rv = TRUE; } else { GList *targets = stonith__parse_targets(list); rv = pcmk__str_in_list(node, targets, pcmk__str_casei); g_list_free_full(targets, free); } free(list); if (!st) { /* if we're provided the api we still might have done the * connection - but let's assume the caller won't bother */ stonith_api->cmds->disconnect(stonith_api); } } if (!st) { stonith_api_delete(stonith_api); } } else { crm_err("Stonith-API for watchdog-fencing-query couldn't be created."); } crm_trace("Pacemaker assumes node %s %sto do watchdog-fencing.", node, rv?"":"not "); return rv; } gboolean stonith__watchdog_fencing_enabled_for_node(const char *node) { return stonith__watchdog_fencing_enabled_for_node_api(NULL, node); } /* when cycling through the list we don't want to delete items so just mark them and when we know nobody is using the list loop over it to remove the marked items */ static void foreach_notify_entry (stonith_private_t *private, GFunc func, gpointer user_data) { private->notify_refcnt++; g_list_foreach(private->notify_list, func, user_data); private->notify_refcnt--; if ((private->notify_refcnt == 0) && private->notify_deletes) { GList *list_item = private->notify_list; private->notify_deletes = FALSE; while (list_item != NULL) { stonith_notify_client_t *list_client = list_item->data; GList *next = g_list_next(list_item); if (list_client->delete) { free(list_client); private->notify_list = g_list_delete_link(private->notify_list, list_item); } list_item = next; } } } static void stonith_connection_destroy(gpointer user_data) { stonith_t *stonith = user_data; stonith_private_t *native = NULL; struct notify_blob_s blob; crm_trace("Sending destroyed notification"); blob.stonith = stonith; blob.xml = pcmk__xe_create(NULL, PCMK__XE_NOTIFY); native = stonith->st_private; native->ipc = NULL; native->source = NULL; free(native->token); native->token = NULL; stonith->state = stonith_disconnected; crm_xml_add(blob.xml, PCMK__XA_T, PCMK__VALUE_ST_NOTIFY); crm_xml_add(blob.xml, PCMK__XA_SUBT, PCMK__VALUE_ST_NOTIFY_DISCONNECT); foreach_notify_entry(native, stonith_send_notification, &blob); pcmk__xml_free(blob.xml); } xmlNode * create_device_registration_xml(const char *id, enum stonith_namespace standard, const char *agent, const stonith_key_value_t *params, const char *rsc_provides) { xmlNode *data = pcmk__xe_create(NULL, PCMK__XE_ST_DEVICE_ID); xmlNode *args = pcmk__xe_create(data, PCMK__XE_ATTRIBUTES); #if HAVE_STONITH_STONITH_H if (standard == st_namespace_any) { standard = stonith_get_namespace(agent, NULL); } if (standard == st_namespace_lha) { hash2field((gpointer) "plugin", (gpointer) agent, args); agent = "fence_legacy"; } #endif crm_xml_add(data, PCMK_XA_ID, id); crm_xml_add(data, PCMK__XA_ST_ORIGIN, __func__); crm_xml_add(data, PCMK_XA_AGENT, agent); if ((standard != st_namespace_any) && (standard != st_namespace_invalid)) { crm_xml_add(data, PCMK__XA_NAMESPACE, stonith_namespace2text(standard)); } if (rsc_provides) { crm_xml_add(data, PCMK__XA_RSC_PROVIDES, rsc_provides); } for (; params; params = params->next) { hash2field((gpointer) params->key, (gpointer) params->value, args); } return data; } static int stonith_api_register_device(stonith_t *st, int call_options, const char *id, const char *namespace_s, const char *agent, const stonith_key_value_t *params) { int rc = 0; xmlNode *data = NULL; data = create_device_registration_xml(id, stonith_text2namespace(namespace_s), agent, params, NULL); rc = stonith_send_command(st, STONITH_OP_DEVICE_ADD, data, NULL, call_options, 0); pcmk__xml_free(data); return rc; } static int stonith_api_remove_device(stonith_t * st, int call_options, const char *name) { int rc = 0; xmlNode *data = NULL; data = pcmk__xe_create(NULL, PCMK__XE_ST_DEVICE_ID); crm_xml_add(data, PCMK__XA_ST_ORIGIN, __func__); crm_xml_add(data, PCMK_XA_ID, name); rc = stonith_send_command(st, STONITH_OP_DEVICE_DEL, data, NULL, call_options, 0); pcmk__xml_free(data); return rc; } static int stonith_api_remove_level_full(stonith_t *st, int options, const char *node, const char *pattern, const char *attr, const char *value, int level) { int rc = 0; xmlNode *data = NULL; CRM_CHECK(node || pattern || (attr && value), return -EINVAL); data = pcmk__xe_create(NULL, PCMK_XE_FENCING_LEVEL); crm_xml_add(data, PCMK__XA_ST_ORIGIN, __func__); if (node) { crm_xml_add(data, PCMK_XA_TARGET, node); } else if (pattern) { crm_xml_add(data, PCMK_XA_TARGET_PATTERN, pattern); } else { crm_xml_add(data, PCMK_XA_TARGET_ATTRIBUTE, attr); crm_xml_add(data, PCMK_XA_TARGET_VALUE, value); } - crm_xml_add_int(data, PCMK_XA_INDEX, level); + pcmk__xe_set_int(data, PCMK_XA_INDEX, level); rc = stonith_send_command(st, STONITH_OP_LEVEL_DEL, data, NULL, options, 0); pcmk__xml_free(data); return rc; } static int stonith_api_remove_level(stonith_t * st, int options, const char *node, int level) { return stonith_api_remove_level_full(st, options, node, NULL, NULL, NULL, level); } /*! * \internal * \brief Create XML for fence topology level registration request * * \param[in] node If not NULL, target level by this node name * \param[in] pattern If not NULL, target by node name using this regex * \param[in] attr If not NULL, target by this node attribute * \param[in] value If not NULL, target by this node attribute value * \param[in] level Index number of level to register * \param[in] device_list List of devices in level * * \return Newly allocated XML tree on success, NULL otherwise * * \note The caller should set only one of node, pattern or attr/value. */ xmlNode * create_level_registration_xml(const char *node, const char *pattern, const char *attr, const char *value, int level, const stonith_key_value_t *device_list) { GString *list = NULL; xmlNode *data; CRM_CHECK(node || pattern || (attr && value), return NULL); data = pcmk__xe_create(NULL, PCMK_XE_FENCING_LEVEL); crm_xml_add(data, PCMK__XA_ST_ORIGIN, __func__); - crm_xml_add_int(data, PCMK_XA_ID, level); - crm_xml_add_int(data, PCMK_XA_INDEX, level); + pcmk__xe_set_int(data, PCMK_XA_ID, level); + pcmk__xe_set_int(data, PCMK_XA_INDEX, level); if (node) { crm_xml_add(data, PCMK_XA_TARGET, node); } else if (pattern) { crm_xml_add(data, PCMK_XA_TARGET_PATTERN, pattern); } else { crm_xml_add(data, PCMK_XA_TARGET_ATTRIBUTE, attr); crm_xml_add(data, PCMK_XA_TARGET_VALUE, value); } for (; device_list; device_list = device_list->next) { pcmk__add_separated_word(&list, 1024, device_list->value, ","); } if (list != NULL) { crm_xml_add(data, PCMK_XA_DEVICES, (const char *) list->str); g_string_free(list, TRUE); } return data; } static int stonith_api_register_level_full(stonith_t *st, int options, const char *node, const char *pattern, const char *attr, const char *value, int level, const stonith_key_value_t *device_list) { int rc = 0; xmlNode *data = create_level_registration_xml(node, pattern, attr, value, level, device_list); CRM_CHECK(data != NULL, return -EINVAL); rc = stonith_send_command(st, STONITH_OP_LEVEL_ADD, data, NULL, options, 0); pcmk__xml_free(data); return rc; } static int stonith_api_register_level(stonith_t * st, int options, const char *node, int level, const stonith_key_value_t * device_list) { return stonith_api_register_level_full(st, options, node, NULL, NULL, NULL, level, device_list); } static int stonith_api_device_list(stonith_t *stonith, int call_options, const char *namespace_s, stonith_key_value_t **devices, int timeout) { int count = 0; enum stonith_namespace ns = stonith_text2namespace(namespace_s); if (devices == NULL) { crm_err("Parameter error: stonith_api_device_list"); return -EFAULT; } #if HAVE_STONITH_STONITH_H // Include Linux-HA agents if requested if ((ns == st_namespace_any) || (ns == st_namespace_lha)) { count += stonith__list_lha_agents(devices); } #endif // Include Red Hat agents if requested if ((ns == st_namespace_any) || (ns == st_namespace_rhcs)) { count += stonith__list_rhcs_agents(devices); } return count; } // See stonith_api_operations_t:metadata() documentation static int stonith_api_device_metadata(stonith_t *stonith, int call_options, const char *agent, const char *namespace_s, char **output, int timeout_sec) { /* By executing meta-data directly, we can get it from stonith_admin when * the cluster is not running, which is important for higher-level tools. */ enum stonith_namespace ns = stonith_get_namespace(agent, namespace_s); if (timeout_sec <= 0) { timeout_sec = PCMK_DEFAULT_ACTION_TIMEOUT_MS; } crm_trace("Looking up metadata for %s agent %s", stonith_namespace2text(ns), agent); switch (ns) { case st_namespace_rhcs: return stonith__rhcs_metadata(agent, timeout_sec, output); #if HAVE_STONITH_STONITH_H case st_namespace_lha: return stonith__lha_metadata(agent, timeout_sec, output); #endif default: crm_err("Can't get fence agent '%s' meta-data: No such agent", agent); break; } return -ENODEV; } static int stonith_api_query(stonith_t * stonith, int call_options, const char *target, stonith_key_value_t ** devices, int timeout) { int rc = 0, lpc = 0, max = 0; xmlNode *data = NULL; xmlNode *output = NULL; xmlXPathObject *xpathObj = NULL; CRM_CHECK(devices != NULL, return -EINVAL); data = pcmk__xe_create(NULL, PCMK__XE_ST_DEVICE_ID); crm_xml_add(data, PCMK__XA_ST_ORIGIN, __func__); crm_xml_add(data, PCMK__XA_ST_TARGET, target); crm_xml_add(data, PCMK__XA_ST_DEVICE_ACTION, PCMK_ACTION_OFF); rc = stonith_send_command(stonith, STONITH_OP_QUERY, data, &output, call_options, timeout); if (rc < 0) { return rc; } xpathObj = pcmk__xpath_search(output->doc, "//*[@" PCMK_XA_AGENT "]"); if (xpathObj) { max = pcmk__xpath_num_results(xpathObj); for (lpc = 0; lpc < max; lpc++) { xmlNode *match = pcmk__xpath_result(xpathObj, lpc); CRM_LOG_ASSERT(match != NULL); if(match != NULL) { xmlChar *match_path = xmlGetNodePath(match); crm_info("//*[@" PCMK_XA_AGENT "][%d] = %s", lpc, match_path); free(match_path); *devices = stonith_key_value_add(*devices, NULL, pcmk__xe_get(match, PCMK_XA_ID)); } } xmlXPathFreeObject(xpathObj); } pcmk__xml_free(output); pcmk__xml_free(data); return max; } /*! * \internal * \brief Make a STONITH_OP_EXEC request * * \param[in,out] stonith Fencer connection * \param[in] call_options Bitmask of \c stonith_call_options * \param[in] id Fence device ID that request is for * \param[in] action Agent action to request (list, status, monitor) * \param[in] target Name of target node for requested action * \param[in] timeout_sec Error if not completed within this many seconds * \param[out] output Where to set agent output */ static int stonith_api_call(stonith_t *stonith, int call_options, const char *id, const char *action, const char *target, int timeout_sec, xmlNode **output) { int rc = 0; xmlNode *data = NULL; data = pcmk__xe_create(NULL, PCMK__XE_ST_DEVICE_ID); crm_xml_add(data, PCMK__XA_ST_ORIGIN, __func__); crm_xml_add(data, PCMK__XA_ST_DEVICE_ID, id); crm_xml_add(data, PCMK__XA_ST_DEVICE_ACTION, action); crm_xml_add(data, PCMK__XA_ST_TARGET, target); rc = stonith_send_command(stonith, STONITH_OP_EXEC, data, output, call_options, timeout_sec); pcmk__xml_free(data); return rc; } static int stonith_api_list(stonith_t * stonith, int call_options, const char *id, char **list_info, int timeout) { int rc; xmlNode *output = NULL; rc = stonith_api_call(stonith, call_options, id, PCMK_ACTION_LIST, NULL, timeout, &output); if (output && list_info) { const char *list_str; list_str = pcmk__xe_get(output, PCMK__XA_ST_OUTPUT); if (list_str) { *list_info = strdup(list_str); } } if (output) { pcmk__xml_free(output); } return rc; } static int stonith_api_monitor(stonith_t * stonith, int call_options, const char *id, int timeout) { return stonith_api_call(stonith, call_options, id, PCMK_ACTION_MONITOR, NULL, timeout, NULL); } static int stonith_api_status(stonith_t * stonith, int call_options, const char *id, const char *port, int timeout) { return stonith_api_call(stonith, call_options, id, PCMK_ACTION_STATUS, port, timeout, NULL); } static int stonith_api_fence_with_delay(stonith_t * stonith, int call_options, const char *node, const char *action, int timeout, int tolerance, int delay) { int rc = 0; xmlNode *data = NULL; data = pcmk__xe_create(NULL, __func__); crm_xml_add(data, PCMK__XA_ST_TARGET, node); crm_xml_add(data, PCMK__XA_ST_DEVICE_ACTION, action); - crm_xml_add_int(data, PCMK__XA_ST_TIMEOUT, timeout); - crm_xml_add_int(data, PCMK__XA_ST_TOLERANCE, tolerance); - crm_xml_add_int(data, PCMK__XA_ST_DELAY, delay); + pcmk__xe_set_int(data, PCMK__XA_ST_TIMEOUT, timeout); + pcmk__xe_set_int(data, PCMK__XA_ST_TOLERANCE, tolerance); + pcmk__xe_set_int(data, PCMK__XA_ST_DELAY, delay); rc = stonith_send_command(stonith, STONITH_OP_FENCE, data, NULL, call_options, timeout); pcmk__xml_free(data); return rc; } static int stonith_api_fence(stonith_t * stonith, int call_options, const char *node, const char *action, int timeout, int tolerance) { return stonith_api_fence_with_delay(stonith, call_options, node, action, timeout, tolerance, 0); } static int stonith_api_confirm(stonith_t * stonith, int call_options, const char *target) { stonith__set_call_options(call_options, target, st_opt_manual_ack); return stonith_api_fence(stonith, call_options, target, PCMK_ACTION_OFF, 0, 0); } static int stonith_api_history(stonith_t * stonith, int call_options, const char *node, stonith_history_t ** history, int timeout) { int rc = 0; xmlNode *data = NULL; xmlNode *output = NULL; stonith_history_t *last = NULL; *history = NULL; if (node) { data = pcmk__xe_create(NULL, __func__); crm_xml_add(data, PCMK__XA_ST_TARGET, node); } stonith__set_call_options(call_options, node, st_opt_sync_call); rc = stonith_send_command(stonith, STONITH_OP_FENCE_HISTORY, data, &output, call_options, timeout); pcmk__xml_free(data); if (rc == 0) { xmlNode *op = NULL; xmlNode *reply = pcmk__xpath_find_one(output->doc, "//" PCMK__XE_ST_HISTORY, LOG_NEVER); for (op = pcmk__xe_first_child(reply, NULL, NULL, NULL); op != NULL; op = pcmk__xe_next(op, NULL)) { stonith_history_t *kvp = pcmk__assert_alloc(1, sizeof(stonith_history_t)); long long completed_nsec = 0LL; kvp->target = pcmk__xe_get_copy(op, PCMK__XA_ST_TARGET); kvp->action = pcmk__xe_get_copy(op, PCMK__XA_ST_DEVICE_ACTION); kvp->origin = pcmk__xe_get_copy(op, PCMK__XA_ST_ORIGIN); kvp->delegate = pcmk__xe_get_copy(op, PCMK__XA_ST_DELEGATE); kvp->client = pcmk__xe_get_copy(op, PCMK__XA_ST_CLIENTNAME); pcmk__xe_get_time(op, PCMK__XA_ST_DATE, &kvp->completed); pcmk__xe_get_ll(op, PCMK__XA_ST_DATE_NSEC, &completed_nsec); if ((completed_nsec >= LONG_MIN) && (completed_nsec <= LONG_MAX)) { kvp->completed_nsec = (long) completed_nsec; } pcmk__xe_get_int(op, PCMK__XA_ST_STATE, &kvp->state); kvp->exit_reason = pcmk__xe_get_copy(op, PCMK_XA_EXIT_REASON); if (last) { last->next = kvp; } else { *history = kvp; } last = kvp; } } pcmk__xml_free(output); return rc; } void stonith_history_free(stonith_history_t *history) { stonith_history_t *hp, *hp_old; for (hp = history; hp; hp_old = hp, hp = hp->next, free(hp_old)) { free(hp->target); free(hp->action); free(hp->origin); free(hp->delegate); free(hp->client); free(hp->exit_reason); } } static gint stonithlib_GCompareFunc(gconstpointer a, gconstpointer b) { int rc = 0; const stonith_notify_client_t *a_client = a; const stonith_notify_client_t *b_client = b; if (a_client->delete || b_client->delete) { /* make entries marked for deletion not findable */ return -1; } CRM_CHECK(a_client->event != NULL && b_client->event != NULL, return 0); rc = strcmp(a_client->event, b_client->event); if (rc == 0) { if (a_client->notify == NULL || b_client->notify == NULL) { return 0; } else if (a_client->notify == b_client->notify) { return 0; } else if (((long)a_client->notify) < ((long)b_client->notify)) { crm_err("callbacks for %s are not equal: %p vs. %p", a_client->event, a_client->notify, b_client->notify); return -1; } crm_err("callbacks for %s are not equal: %p vs. %p", a_client->event, a_client->notify, b_client->notify); return 1; } return rc; } xmlNode * stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data, int call_options) { xmlNode *op_msg = NULL; CRM_CHECK(token != NULL, return NULL); op_msg = pcmk__xe_create(NULL, PCMK__XE_STONITH_COMMAND); crm_xml_add(op_msg, PCMK__XA_T, PCMK__VALUE_STONITH_NG); crm_xml_add(op_msg, PCMK__XA_ST_OP, op); - crm_xml_add_int(op_msg, PCMK__XA_ST_CALLID, call_id); + pcmk__xe_set_int(op_msg, PCMK__XA_ST_CALLID, call_id); crm_trace("Sending call options: %.8lx, %d", (long)call_options, call_options); - crm_xml_add_int(op_msg, PCMK__XA_ST_CALLOPT, call_options); + pcmk__xe_set_int(op_msg, PCMK__XA_ST_CALLOPT, call_options); if (data != NULL) { xmlNode *wrapper = pcmk__xe_create(op_msg, PCMK__XE_ST_CALLDATA); pcmk__xml_copy(wrapper, data); } return op_msg; } static void stonith_destroy_op_callback(gpointer data) { stonith_callback_client_t *blob = data; if (blob->timer && blob->timer->ref > 0) { g_source_remove(blob->timer->ref); } free(blob->timer); free(blob); } static int stonith_api_signoff(stonith_t * stonith) { stonith_private_t *native = stonith->st_private; crm_debug("Disconnecting from the fencer"); if (native->source != NULL) { /* Attached to mainloop */ mainloop_del_ipc_client(native->source); native->source = NULL; native->ipc = NULL; } else if (native->ipc) { /* Not attached to mainloop */ crm_ipc_t *ipc = native->ipc; native->ipc = NULL; crm_ipc_close(ipc); crm_ipc_destroy(ipc); } free(native->token); native->token = NULL; stonith->state = stonith_disconnected; return pcmk_ok; } static int stonith_api_del_callback(stonith_t * stonith, int call_id, bool all_callbacks) { stonith_private_t *private = stonith->st_private; if (all_callbacks) { private->op_callback = NULL; g_hash_table_destroy(private->stonith_op_callback_table); private->stonith_op_callback_table = pcmk__intkey_table(stonith_destroy_op_callback); } else if (call_id == 0) { private->op_callback = NULL; } else { pcmk__intkey_table_remove(private->stonith_op_callback_table, call_id); } return pcmk_ok; } /*! * \internal * \brief Invoke a (single) specified fence action callback * * \param[in,out] st Fencer API connection * \param[in] call_id If positive, call ID of completed fence action, * otherwise legacy return code for early failure * \param[in,out] result Full result for action * \param[in,out] userdata User data to pass to callback * \param[in] callback Fence action callback to invoke */ static void invoke_fence_action_callback(stonith_t *st, int call_id, pcmk__action_result_t *result, void *userdata, void (*callback) (stonith_t *st, stonith_callback_data_t *data)) { stonith_callback_data_t data = { 0, }; data.call_id = call_id; data.rc = pcmk_rc2legacy(stonith__result2rc(result)); data.userdata = userdata; data.opaque = (void *) result; callback(st, &data); } /*! * \internal * \brief Invoke any callbacks registered for a specified fence action result * * Given a fence action result from the fencer, invoke any callback registered * for that action, as well as any global callback registered. * * \param[in,out] stonith Fencer API connection * \param[in] msg If non-NULL, fencer reply * \param[in] call_id If \p msg is NULL, call ID of action that timed out */ static void invoke_registered_callbacks(stonith_t *stonith, const xmlNode *msg, int call_id) { stonith_private_t *private = NULL; stonith_callback_client_t *cb_info = NULL; pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; CRM_CHECK(stonith != NULL, return); CRM_CHECK(stonith->st_private != NULL, return); private = stonith->st_private; if (msg == NULL) { // Fencer didn't reply in time pcmk__set_result(&result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT, "Fencer accepted request but did not reply in time"); CRM_LOG_ASSERT(call_id > 0); } else { // We have the fencer reply if ((pcmk__xe_get_int(msg, PCMK__XA_ST_CALLID, &call_id) != pcmk_rc_ok) || (call_id <= 0)) { crm_log_xml_warn(msg, "Bad fencer reply"); } stonith__xe_get_result(msg, &result); } if (call_id > 0) { cb_info = pcmk__intkey_table_lookup(private->stonith_op_callback_table, call_id); } if ((cb_info != NULL) && (cb_info->callback != NULL) && (pcmk__result_ok(&result) || !(cb_info->only_success))) { crm_trace("Invoking callback %s for call %d", pcmk__s(cb_info->id, "without ID"), call_id); invoke_fence_action_callback(stonith, call_id, &result, cb_info->user_data, cb_info->callback); } else if ((private->op_callback == NULL) && !pcmk__result_ok(&result)) { crm_warn("Fencing action without registered callback failed: %d (%s%s%s)", result.exit_status, pcmk_exec_status_str(result.execution_status), ((result.exit_reason == NULL)? "" : ": "), ((result.exit_reason == NULL)? "" : result.exit_reason)); crm_log_xml_debug(msg, "Failed fence update"); } if (private->op_callback != NULL) { crm_trace("Invoking global callback for call %d", call_id); invoke_fence_action_callback(stonith, call_id, &result, NULL, private->op_callback); } if (cb_info != NULL) { stonith_api_del_callback(stonith, call_id, FALSE); } pcmk__reset_result(&result); } static gboolean stonith_async_timeout_handler(gpointer data) { struct timer_rec_s *timer = data; crm_err("Async call %d timed out after %dms", timer->call_id, timer->timeout); invoke_registered_callbacks(timer->stonith, NULL, timer->call_id); /* Always return TRUE, never remove the handler * We do that in stonith_del_callback() */ return TRUE; } static void set_callback_timeout(stonith_callback_client_t * callback, stonith_t * stonith, int call_id, int timeout) { struct timer_rec_s *async_timer = callback->timer; if (timeout <= 0) { return; } if (!async_timer) { async_timer = pcmk__assert_alloc(1, sizeof(struct timer_rec_s)); callback->timer = async_timer; } async_timer->stonith = stonith; async_timer->call_id = call_id; /* Allow a fair bit of grace to allow the server to tell us of a timeout * This is only a fallback */ async_timer->timeout = (timeout + 60) * 1000; if (async_timer->ref) { g_source_remove(async_timer->ref); } async_timer->ref = pcmk__create_timer(async_timer->timeout, stonith_async_timeout_handler, async_timer); } static void update_callback_timeout(int call_id, int timeout, stonith_t * st) { stonith_callback_client_t *callback = NULL; stonith_private_t *private = st->st_private; callback = pcmk__intkey_table_lookup(private->stonith_op_callback_table, call_id); if (!callback || !callback->allow_timeout_updates) { return; } set_callback_timeout(callback, st, call_id, timeout); } static int stonith_dispatch_internal(const char *buffer, ssize_t length, gpointer userdata) { const char *type = NULL; struct notify_blob_s blob; stonith_t *st = userdata; stonith_private_t *private = NULL; pcmk__assert(st != NULL); private = st->st_private; blob.stonith = st; blob.xml = pcmk__xml_parse(buffer); if (blob.xml == NULL) { crm_warn("Received malformed message from fencer: %s", buffer); return 0; } /* do callbacks */ type = pcmk__xe_get(blob.xml, PCMK__XA_T); crm_trace("Activating %s callbacks...", type); if (pcmk__str_eq(type, PCMK__VALUE_STONITH_NG, pcmk__str_none)) { invoke_registered_callbacks(st, blob.xml, 0); } else if (pcmk__str_eq(type, PCMK__VALUE_ST_NOTIFY, pcmk__str_none)) { foreach_notify_entry(private, stonith_send_notification, &blob); } else if (pcmk__str_eq(type, PCMK__VALUE_ST_ASYNC_TIMEOUT_VALUE, pcmk__str_none)) { int call_id = 0; int timeout = 0; pcmk__xe_get_int(blob.xml, PCMK__XA_ST_TIMEOUT, &timeout); pcmk__xe_get_int(blob.xml, PCMK__XA_ST_CALLID, &call_id); update_callback_timeout(call_id, timeout, st); } else { crm_err("Unknown message type: %s", type); crm_log_xml_warn(blob.xml, "BadReply"); } pcmk__xml_free(blob.xml); return 1; } static int stonith_api_signon(stonith_t * stonith, const char *name, int *stonith_fd) { int rc = pcmk_ok; stonith_private_t *native = NULL; const char *display_name = name? name : "client"; struct ipc_client_callbacks st_callbacks = { .dispatch = stonith_dispatch_internal, .destroy = stonith_connection_destroy }; CRM_CHECK(stonith != NULL, return -EINVAL); native = stonith->st_private; pcmk__assert(native != NULL); crm_debug("Attempting fencer connection by %s with%s mainloop", display_name, (stonith_fd? "out" : "")); stonith->state = stonith_connected_command; if (stonith_fd) { /* No mainloop */ native->ipc = crm_ipc_new("stonith-ng", 0); if (native->ipc != NULL) { rc = pcmk__connect_generic_ipc(native->ipc); if (rc == pcmk_rc_ok) { rc = pcmk__ipc_fd(native->ipc, stonith_fd); if (rc != pcmk_rc_ok) { crm_debug("Couldn't get file descriptor for IPC: %s", pcmk_rc_str(rc)); } } if (rc != pcmk_rc_ok) { crm_ipc_close(native->ipc); crm_ipc_destroy(native->ipc); native->ipc = NULL; } } } else { /* With mainloop */ native->source = mainloop_add_ipc_client("stonith-ng", G_PRIORITY_MEDIUM, 0, stonith, &st_callbacks); native->ipc = mainloop_get_ipc_client(native->source); } if (native->ipc == NULL) { rc = -ENOTCONN; } else { xmlNode *reply = NULL; xmlNode *hello = pcmk__xe_create(NULL, PCMK__XE_STONITH_COMMAND); crm_xml_add(hello, PCMK__XA_T, PCMK__VALUE_STONITH_NG); crm_xml_add(hello, PCMK__XA_ST_OP, CRM_OP_REGISTER); crm_xml_add(hello, PCMK__XA_ST_CLIENTNAME, name); rc = crm_ipc_send(native->ipc, hello, crm_ipc_client_response, -1, &reply); if (rc < 0) { crm_debug("Couldn't register with the fencer: %s " QB_XS " rc=%d", pcmk_strerror(rc), rc); rc = -ECOMM; } else if (reply == NULL) { crm_debug("Couldn't register with the fencer: no reply"); rc = -EPROTO; } else { const char *msg_type = pcmk__xe_get(reply, PCMK__XA_ST_OP); native->token = pcmk__xe_get_copy(reply, PCMK__XA_ST_CLIENTID); if (!pcmk__str_eq(msg_type, CRM_OP_REGISTER, pcmk__str_none)) { crm_debug("Couldn't register with the fencer: invalid reply type '%s'", (msg_type? msg_type : "(missing)")); crm_log_xml_debug(reply, "Invalid fencer reply"); rc = -EPROTO; } else if (native->token == NULL) { crm_debug("Couldn't register with the fencer: no token in reply"); crm_log_xml_debug(reply, "Invalid fencer reply"); rc = -EPROTO; } else { crm_debug("Connection to fencer by %s succeeded (registration token: %s)", display_name, native->token); rc = pcmk_ok; } } pcmk__xml_free(reply); pcmk__xml_free(hello); } if (rc != pcmk_ok) { crm_debug("Connection attempt to fencer by %s failed: %s " QB_XS " rc=%d", display_name, pcmk_strerror(rc), rc); stonith->cmds->disconnect(stonith); } return rc; } static int stonith_set_notification(stonith_t * stonith, const char *callback, int enabled) { int rc = pcmk_ok; xmlNode *notify_msg = pcmk__xe_create(NULL, __func__); stonith_private_t *native = stonith->st_private; if (stonith->state != stonith_disconnected) { crm_xml_add(notify_msg, PCMK__XA_ST_OP, STONITH_OP_NOTIFY); if (enabled) { crm_xml_add(notify_msg, PCMK__XA_ST_NOTIFY_ACTIVATE, callback); } else { crm_xml_add(notify_msg, PCMK__XA_ST_NOTIFY_DEACTIVATE, callback); } rc = crm_ipc_send(native->ipc, notify_msg, crm_ipc_client_response, -1, NULL); if (rc < 0) { crm_perror(LOG_DEBUG, "Couldn't register for fencing notifications: %d", rc); rc = -ECOMM; } else { rc = pcmk_ok; } } pcmk__xml_free(notify_msg); return rc; } static int stonith_api_add_notification(stonith_t * stonith, const char *event, void (*callback) (stonith_t * stonith, stonith_event_t * e)) { GList *list_item = NULL; stonith_notify_client_t *new_client = NULL; stonith_private_t *private = NULL; private = stonith->st_private; crm_trace("Adding callback for %s events (%d)", event, g_list_length(private->notify_list)); new_client = pcmk__assert_alloc(1, sizeof(stonith_notify_client_t)); new_client->event = event; new_client->notify = callback; list_item = g_list_find_custom(private->notify_list, new_client, stonithlib_GCompareFunc); if (list_item != NULL) { crm_warn("Callback already present"); free(new_client); return -ENOTUNIQ; } else { private->notify_list = g_list_append(private->notify_list, new_client); stonith_set_notification(stonith, event, 1); crm_trace("Callback added (%d)", g_list_length(private->notify_list)); } return pcmk_ok; } static void del_notify_entry(gpointer data, gpointer user_data) { stonith_notify_client_t *entry = data; stonith_t * stonith = user_data; if (!entry->delete) { crm_debug("Removing callback for %s events", entry->event); stonith_api_del_notification(stonith, entry->event); } } static int stonith_api_del_notification(stonith_t * stonith, const char *event) { GList *list_item = NULL; stonith_notify_client_t *new_client = NULL; stonith_private_t *private = stonith->st_private; if (event == NULL) { foreach_notify_entry(private, del_notify_entry, stonith); crm_trace("Removed callback"); return pcmk_ok; } crm_debug("Removing callback for %s events", event); new_client = pcmk__assert_alloc(1, sizeof(stonith_notify_client_t)); new_client->event = event; new_client->notify = NULL; list_item = g_list_find_custom(private->notify_list, new_client, stonithlib_GCompareFunc); stonith_set_notification(stonith, event, 0); if (list_item != NULL) { stonith_notify_client_t *list_client = list_item->data; if (private->notify_refcnt) { list_client->delete = TRUE; private->notify_deletes = TRUE; } else { private->notify_list = g_list_remove(private->notify_list, list_client); free(list_client); } crm_trace("Removed callback"); } else { crm_trace("Callback not present"); } free(new_client); return pcmk_ok; } static int stonith_api_add_callback(stonith_t * stonith, int call_id, int timeout, int options, void *user_data, const char *callback_name, void (*callback) (stonith_t * st, stonith_callback_data_t * data)) { stonith_callback_client_t *blob = NULL; stonith_private_t *private = NULL; CRM_CHECK(stonith != NULL, return -EINVAL); CRM_CHECK(stonith->st_private != NULL, return -EINVAL); private = stonith->st_private; if (call_id == 0) { // Add global callback private->op_callback = callback; } else if (call_id < 0) { // Call failed immediately, so call callback now if (!(options & st_opt_report_only_success)) { pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; crm_trace("Call failed, calling %s: %s", callback_name, pcmk_strerror(call_id)); pcmk__set_result(&result, CRM_EX_ERROR, stonith__legacy2status(call_id), NULL); invoke_fence_action_callback(stonith, call_id, &result, user_data, callback); } else { crm_warn("Fencer call failed: %s", pcmk_strerror(call_id)); } return FALSE; } blob = pcmk__assert_alloc(1, sizeof(stonith_callback_client_t)); blob->id = callback_name; blob->only_success = (options & st_opt_report_only_success) ? TRUE : FALSE; blob->user_data = user_data; blob->callback = callback; blob->allow_timeout_updates = (options & st_opt_timeout_updates) ? TRUE : FALSE; if (timeout > 0) { set_callback_timeout(blob, stonith, call_id, timeout); } pcmk__intkey_table_insert(private->stonith_op_callback_table, call_id, blob); crm_trace("Added callback to %s for call %d", callback_name, call_id); return TRUE; } static void stonith_dump_pending_op(gpointer key, gpointer value, gpointer user_data) { int call = GPOINTER_TO_INT(key); stonith_callback_client_t *blob = value; crm_debug("Call %d (%s): pending", call, pcmk__s(blob->id, "no ID")); } void stonith_dump_pending_callbacks(stonith_t * stonith) { stonith_private_t *private = stonith->st_private; if (private->stonith_op_callback_table == NULL) { return; } return g_hash_table_foreach(private->stonith_op_callback_table, stonith_dump_pending_op, NULL); } /*! * \internal * \brief Get the data section of a fencer notification * * \param[in] msg Notification XML * \param[in] ntype Notification type */ static xmlNode * get_event_data_xml(xmlNode *msg, const char *ntype) { char *data_addr = crm_strdup_printf("//%s", ntype); xmlNode *data = pcmk__xpath_find_one(msg->doc, data_addr, LOG_DEBUG); free(data_addr); return data; } /* */ static stonith_event_t * xml_to_event(xmlNode *msg) { stonith_event_t *event = pcmk__assert_alloc(1, sizeof(stonith_event_t)); struct event_private *event_private = NULL; event->opaque = pcmk__assert_alloc(1, sizeof(struct event_private)); event_private = (struct event_private *) event->opaque; crm_log_xml_trace(msg, "stonith_notify"); // All notification types have the operation result and notification subtype stonith__xe_get_result(msg, &event_private->result); event->operation = pcmk__xe_get_copy(msg, PCMK__XA_ST_OP); // @COMPAT The API originally provided the result as a legacy return code event->result = pcmk_rc2legacy(stonith__result2rc(&event_private->result)); // Some notification subtypes have additional information if (pcmk__str_eq(event->operation, PCMK__VALUE_ST_NOTIFY_FENCE, pcmk__str_none)) { xmlNode *data = get_event_data_xml(msg, event->operation); if (data == NULL) { crm_err("No data for %s event", event->operation); crm_log_xml_notice(msg, "BadEvent"); } else { event->origin = pcmk__xe_get_copy(data, PCMK__XA_ST_ORIGIN); event->action = pcmk__xe_get_copy(data, PCMK__XA_ST_DEVICE_ACTION); event->target = pcmk__xe_get_copy(data, PCMK__XA_ST_TARGET); event->executioner = pcmk__xe_get_copy(data, PCMK__XA_ST_DELEGATE); event->id = pcmk__xe_get_copy(data, PCMK__XA_ST_REMOTE_OP); event->client_origin = pcmk__xe_get_copy(data, PCMK__XA_ST_CLIENTNAME); event->device = pcmk__xe_get_copy(data, PCMK__XA_ST_DEVICE_ID); } } else if (pcmk__str_any_of(event->operation, STONITH_OP_DEVICE_ADD, STONITH_OP_DEVICE_DEL, STONITH_OP_LEVEL_ADD, STONITH_OP_LEVEL_DEL, NULL)) { xmlNode *data = get_event_data_xml(msg, event->operation); if (data == NULL) { crm_err("No data for %s event", event->operation); crm_log_xml_notice(msg, "BadEvent"); } else { event->device = pcmk__xe_get_copy(data, PCMK__XA_ST_DEVICE_ID); } } return event; } static void event_free(stonith_event_t * event) { struct event_private *event_private = event->opaque; free(event->id); free(event->operation); free(event->origin); free(event->action); free(event->target); free(event->executioner); free(event->device); free(event->client_origin); pcmk__reset_result(&event_private->result); free(event->opaque); free(event); } static void stonith_send_notification(gpointer data, gpointer user_data) { struct notify_blob_s *blob = user_data; stonith_notify_client_t *entry = data; stonith_event_t *st_event = NULL; const char *event = NULL; if (blob->xml == NULL) { crm_warn("Skipping callback - NULL message"); return; } event = pcmk__xe_get(blob->xml, PCMK__XA_SUBT); if (entry == NULL) { crm_warn("Skipping callback - NULL callback client"); return; } else if (entry->delete) { crm_trace("Skipping callback - marked for deletion"); return; } else if (entry->notify == NULL) { crm_warn("Skipping callback - NULL callback"); return; } else if (!pcmk__str_eq(entry->event, event, pcmk__str_none)) { crm_trace("Skipping callback - event mismatch %p/%s vs. %s", entry, entry->event, event); return; } st_event = xml_to_event(blob->xml); crm_trace("Invoking callback for %p/%s event...", entry, event); entry->notify(blob->stonith, st_event); crm_trace("Callback invoked..."); event_free(st_event); } /*! * \internal * \brief Create and send an API request * * \param[in,out] stonith Stonith connection * \param[in] op API operation to request * \param[in] data Data to attach to request * \param[out] output_data If not NULL, will be set to reply if synchronous * \param[in] call_options Bitmask of stonith_call_options to use * \param[in] timeout Error if not completed within this many seconds * * \return pcmk_ok (for synchronous requests) or positive call ID * (for asynchronous requests) on success, -errno otherwise */ static int stonith_send_command(stonith_t * stonith, const char *op, xmlNode * data, xmlNode ** output_data, int call_options, int timeout) { int rc = 0; int reply_id = -1; xmlNode *op_msg = NULL; xmlNode *op_reply = NULL; stonith_private_t *native = NULL; pcmk__assert((stonith != NULL) && (stonith->st_private != NULL) && (op != NULL)); native = stonith->st_private; if (output_data != NULL) { *output_data = NULL; } if ((stonith->state == stonith_disconnected) || (native->token == NULL)) { return -ENOTCONN; } /* Increment the call ID, which must be positive to avoid conflicting with * error codes. This shouldn't be a problem unless the client mucked with * it or the counter wrapped around. */ stonith->call_id++; if (stonith->call_id < 1) { stonith->call_id = 1; } op_msg = stonith_create_op(stonith->call_id, native->token, op, data, call_options); if (op_msg == NULL) { return -EINVAL; } - crm_xml_add_int(op_msg, PCMK__XA_ST_TIMEOUT, timeout); + pcmk__xe_set_int(op_msg, PCMK__XA_ST_TIMEOUT, timeout); crm_trace("Sending %s message to fencer with timeout %ds", op, timeout); if (data) { const char *delay_s = pcmk__xe_get(data, PCMK__XA_ST_DELAY); if (delay_s) { crm_xml_add(op_msg, PCMK__XA_ST_DELAY, delay_s); } } { enum crm_ipc_flags ipc_flags = crm_ipc_flags_none; if (call_options & st_opt_sync_call) { pcmk__set_ipc_flags(ipc_flags, "stonith command", crm_ipc_client_response); } rc = crm_ipc_send(native->ipc, op_msg, ipc_flags, 1000 * (timeout + 60), &op_reply); } pcmk__xml_free(op_msg); if (rc < 0) { crm_perror(LOG_ERR, "Couldn't perform %s operation (timeout=%ds): %d", op, timeout, rc); rc = -ECOMM; goto done; } crm_log_xml_trace(op_reply, "Reply"); if (!(call_options & st_opt_sync_call)) { crm_trace("Async call %d, returning", stonith->call_id); pcmk__xml_free(op_reply); return stonith->call_id; } pcmk__xe_get_int(op_reply, PCMK__XA_ST_CALLID, &reply_id); if (reply_id == stonith->call_id) { pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; crm_trace("Synchronous reply %d received", reply_id); stonith__xe_get_result(op_reply, &result); rc = pcmk_rc2legacy(stonith__result2rc(&result)); pcmk__reset_result(&result); if ((call_options & st_opt_discard_reply) || output_data == NULL) { crm_trace("Discarding reply"); } else { *output_data = op_reply; op_reply = NULL; /* Prevent subsequent free */ } } else if (reply_id <= 0) { crm_err("Received bad reply: No id set"); crm_log_xml_err(op_reply, "Bad reply"); pcmk__xml_free(op_reply); op_reply = NULL; rc = -ENOMSG; } else { crm_err("Received bad reply: %d (wanted %d)", reply_id, stonith->call_id); crm_log_xml_err(op_reply, "Old reply"); pcmk__xml_free(op_reply); op_reply = NULL; rc = -ENOMSG; } done: if (!crm_ipc_connected(native->ipc)) { crm_err("Fencer disconnected"); free(native->token); native->token = NULL; stonith->state = stonith_disconnected; } pcmk__xml_free(op_reply); return rc; } /* Not used with mainloop */ bool stonith_dispatch(stonith_t * st) { gboolean stay_connected = TRUE; stonith_private_t *private = NULL; pcmk__assert(st != NULL); private = st->st_private; while (crm_ipc_ready(private->ipc)) { if (crm_ipc_read(private->ipc) > 0) { const char *msg = crm_ipc_buffer(private->ipc); stonith_dispatch_internal(msg, strlen(msg), st); } if (!crm_ipc_connected(private->ipc)) { crm_err("Connection closed"); stay_connected = FALSE; } } return stay_connected; } static int stonith_api_free(stonith_t * stonith) { int rc = pcmk_ok; crm_trace("Destroying %p", stonith); if (stonith->state != stonith_disconnected) { crm_trace("Unregistering notifications and disconnecting %p first", stonith); stonith->cmds->remove_notification(stonith, NULL); rc = stonith->cmds->disconnect(stonith); } if (stonith->state == stonith_disconnected) { stonith_private_t *private = stonith->st_private; crm_trace("Removing %d callbacks", g_hash_table_size(private->stonith_op_callback_table)); g_hash_table_destroy(private->stonith_op_callback_table); crm_trace("Destroying %d notification clients", g_list_length(private->notify_list)); g_list_free_full(private->notify_list, free); free(stonith->st_private); free(stonith->cmds); free(stonith); } else { crm_err("Not free'ing active connection: %s (%d)", pcmk_strerror(rc), rc); } return rc; } void stonith_api_delete(stonith_t * stonith) { crm_trace("Destroying %p", stonith); if(stonith) { stonith->cmds->free(stonith); } } static gboolean is_stonith_param(gpointer key, gpointer value, gpointer user_data) { return pcmk_stonith_param(key); } int stonith__validate(stonith_t *st, int call_options, const char *rsc_id, const char *namespace_s, const char *agent, GHashTable *params, int timeout_sec, char **output, char **error_output) { int rc = pcmk_rc_ok; /* Use a dummy node name in case the agent requires a target. We assume the * actual target doesn't matter for validation purposes (if in practice, * that is incorrect, we will need to allow the caller to pass the target). */ const char *target = "node1"; char *host_arg = NULL; if (params != NULL) { host_arg = pcmk__str_copy(g_hash_table_lookup(params, PCMK_STONITH_HOST_ARGUMENT)); /* Remove special stonith params from the table before doing anything else */ g_hash_table_foreach_remove(params, is_stonith_param, NULL); } #if PCMK__ENABLE_CIBSECRETS rc = pcmk__substitute_secrets(rsc_id, params); if (rc != pcmk_rc_ok) { crm_warn("Could not replace secret parameters for validation of %s: %s", agent, pcmk_rc_str(rc)); // rc is standard return value, don't return it in this function } #endif if (output) { *output = NULL; } if (error_output) { *error_output = NULL; } if (timeout_sec <= 0) { timeout_sec = PCMK_DEFAULT_ACTION_TIMEOUT_MS; } switch (stonith_get_namespace(agent, namespace_s)) { case st_namespace_rhcs: rc = stonith__rhcs_validate(st, call_options, target, agent, params, host_arg, timeout_sec, output, error_output); rc = pcmk_legacy2rc(rc); break; #if HAVE_STONITH_STONITH_H case st_namespace_lha: rc = stonith__lha_validate(st, call_options, target, agent, params, timeout_sec, output, error_output); rc = pcmk_legacy2rc(rc); break; #endif case st_namespace_invalid: errno = ENOENT; rc = errno; if (error_output) { *error_output = crm_strdup_printf("Agent %s not found", agent); } else { crm_err("Agent %s not found", agent); } break; default: errno = EOPNOTSUPP; rc = errno; if (error_output) { *error_output = crm_strdup_printf("Agent %s does not support validation", agent); } else { crm_err("Agent %s does not support validation", agent); } break; } free(host_arg); return rc; } static int stonith_api_validate(stonith_t *st, int call_options, const char *rsc_id, const char *namespace_s, const char *agent, const stonith_key_value_t *params, int timeout_sec, char **output, char **error_output) { /* Validation should be done directly via the agent, so we can get it from * stonith_admin when the cluster is not running, which is important for * higher-level tools. */ int rc = pcmk_ok; GHashTable *params_table = pcmk__strkey_table(free, free); // Convert parameter list to a hash table for (; params; params = params->next) { if (!pcmk_stonith_param(params->key)) { pcmk__insert_dup(params_table, params->key, params->value); } } rc = stonith__validate(st, call_options, rsc_id, namespace_s, agent, params_table, timeout_sec, output, error_output); g_hash_table_destroy(params_table); return rc; } stonith_t * stonith_api_new(void) { stonith_t *new_stonith = NULL; stonith_private_t *private = NULL; new_stonith = calloc(1, sizeof(stonith_t)); if (new_stonith == NULL) { return NULL; } private = calloc(1, sizeof(stonith_private_t)); if (private == NULL) { free(new_stonith); return NULL; } new_stonith->st_private = private; private->stonith_op_callback_table = pcmk__intkey_table(stonith_destroy_op_callback); private->notify_list = NULL; private->notify_refcnt = 0; private->notify_deletes = FALSE; new_stonith->call_id = 1; new_stonith->state = stonith_disconnected; new_stonith->cmds = calloc(1, sizeof(stonith_api_operations_t)); if (new_stonith->cmds == NULL) { free(new_stonith->st_private); free(new_stonith); return NULL; } /* *INDENT-OFF* */ new_stonith->cmds->free = stonith_api_free; new_stonith->cmds->connect = stonith_api_signon; new_stonith->cmds->disconnect = stonith_api_signoff; new_stonith->cmds->list = stonith_api_list; new_stonith->cmds->monitor = stonith_api_monitor; new_stonith->cmds->status = stonith_api_status; new_stonith->cmds->fence = stonith_api_fence; new_stonith->cmds->fence_with_delay = stonith_api_fence_with_delay; new_stonith->cmds->confirm = stonith_api_confirm; new_stonith->cmds->history = stonith_api_history; new_stonith->cmds->list_agents = stonith_api_device_list; new_stonith->cmds->metadata = stonith_api_device_metadata; new_stonith->cmds->query = stonith_api_query; new_stonith->cmds->remove_device = stonith_api_remove_device; new_stonith->cmds->register_device = stonith_api_register_device; new_stonith->cmds->remove_level = stonith_api_remove_level; new_stonith->cmds->remove_level_full = stonith_api_remove_level_full; new_stonith->cmds->register_level = stonith_api_register_level; new_stonith->cmds->register_level_full = stonith_api_register_level_full; new_stonith->cmds->remove_callback = stonith_api_del_callback; new_stonith->cmds->register_callback = stonith_api_add_callback; new_stonith->cmds->remove_notification = stonith_api_del_notification; new_stonith->cmds->register_notification = stonith_api_add_notification; new_stonith->cmds->validate = stonith_api_validate; /* *INDENT-ON* */ return new_stonith; } /*! * \brief Make a blocking connection attempt to the fencer * * \param[in,out] st Fencer API object * \param[in] name Client name to use with fencer * \param[in] max_attempts Return error if this many attempts fail * * \return pcmk_ok on success, result of last attempt otherwise */ int stonith_api_connect_retry(stonith_t *st, const char *name, int max_attempts) { int rc = -EINVAL; // if max_attempts is not positive for (int attempt = 1; attempt <= max_attempts; attempt++) { rc = st->cmds->connect(st, name, NULL); if (rc == pcmk_ok) { return pcmk_ok; } else if (attempt < max_attempts) { crm_notice("Fencer connection attempt %d of %d failed (retrying in 2s): %s " QB_XS " rc=%d", attempt, max_attempts, pcmk_strerror(rc), rc); sleep(2); } } crm_notice("Could not connect to fencer: %s " QB_XS " rc=%d", pcmk_strerror(rc), rc); return rc; } stonith_key_value_t * stonith_key_value_add(stonith_key_value_t * head, const char *key, const char *value) { stonith_key_value_t *p, *end; p = pcmk__assert_alloc(1, sizeof(stonith_key_value_t)); p->key = pcmk__str_copy(key); p->value = pcmk__str_copy(value); end = head; while (end && end->next) { end = end->next; } if (end) { end->next = p; } else { head = p; } return head; } void stonith_key_value_freeall(stonith_key_value_t * head, int keys, int values) { stonith_key_value_t *p; while (head) { p = head->next; if (keys) { free(head->key); } if (values) { free(head->value); } free(head); head = p; } } #define api_log_open() openlog("stonith-api", LOG_CONS | LOG_NDELAY | LOG_PID, LOG_DAEMON) #define api_log(level, fmt, args...) syslog(level, "%s: "fmt, __func__, args) int stonith_api_kick(uint32_t nodeid, const char *uname, int timeout, bool off) { int rc = pcmk_ok; stonith_t *st = stonith_api_new(); const char *action = off? PCMK_ACTION_OFF : PCMK_ACTION_REBOOT; api_log_open(); if (st == NULL) { api_log(LOG_ERR, "API initialization failed, could not kick (%s) node %u/%s", action, nodeid, uname); return -EPROTO; } rc = st->cmds->connect(st, "stonith-api", NULL); if (rc != pcmk_ok) { api_log(LOG_ERR, "Connection failed, could not kick (%s) node %u/%s : %s (%d)", action, nodeid, uname, pcmk_strerror(rc), rc); } else { char *name = (uname == NULL)? pcmk__itoa(nodeid) : strdup(uname); int opts = 0; stonith__set_call_options(opts, name, st_opt_sync_call|st_opt_allow_self_fencing); if ((uname == NULL) && (nodeid > 0)) { stonith__set_call_options(opts, name, st_opt_cs_nodeid); } rc = st->cmds->fence(st, opts, name, action, timeout, 0); free(name); if (rc != pcmk_ok) { api_log(LOG_ERR, "Could not kick (%s) node %u/%s : %s (%d)", action, nodeid, uname, pcmk_strerror(rc), rc); } else { api_log(LOG_NOTICE, "Node %u/%s kicked: %s", nodeid, uname, action); } } stonith_api_delete(st); return rc; } time_t stonith_api_time(uint32_t nodeid, const char *uname, bool in_progress) { int rc = pcmk_ok; time_t when = 0; stonith_t *st = stonith_api_new(); stonith_history_t *history = NULL, *hp = NULL; if (st == NULL) { api_log(LOG_ERR, "Could not retrieve fence history for %u/%s: " "API initialization failed", nodeid, uname); return when; } rc = st->cmds->connect(st, "stonith-api", NULL); if (rc != pcmk_ok) { api_log(LOG_NOTICE, "Connection failed: %s (%d)", pcmk_strerror(rc), rc); } else { int entries = 0; int progress = 0; int completed = 0; int opts = 0; char *name = (uname == NULL)? pcmk__itoa(nodeid) : strdup(uname); stonith__set_call_options(opts, name, st_opt_sync_call); if ((uname == NULL) && (nodeid > 0)) { stonith__set_call_options(opts, name, st_opt_cs_nodeid); } rc = st->cmds->history(st, opts, name, &history, 120); free(name); for (hp = history; hp; hp = hp->next) { entries++; if (in_progress) { progress++; if (hp->state != st_done && hp->state != st_failed) { when = time(NULL); } } else if (hp->state == st_done) { completed++; if (hp->completed > when) { when = hp->completed; } } } stonith_history_free(history); if(rc == pcmk_ok) { api_log(LOG_INFO, "Found %d entries for %u/%s: %d in progress, %d completed", entries, nodeid, uname, progress, completed); } else { api_log(LOG_ERR, "Could not retrieve fence history for %u/%s: %s (%d)", nodeid, uname, pcmk_strerror(rc), rc); } } stonith_api_delete(st); if(when) { api_log(LOG_INFO, "Node %u/%s last kicked at: %ld", nodeid, uname, (long int)when); } return when; } bool stonith_agent_exists(const char *agent, int timeout) { stonith_t *st = NULL; stonith_key_value_t *devices = NULL; stonith_key_value_t *dIter = NULL; bool rc = FALSE; if (agent == NULL) { return rc; } st = stonith_api_new(); if (st == NULL) { crm_err("Could not list fence agents: API memory allocation failed"); return FALSE; } st->cmds->list_agents(st, st_opt_sync_call, NULL, &devices, timeout == 0 ? 120 : timeout); for (dIter = devices; dIter != NULL; dIter = dIter->next) { if (pcmk__str_eq(dIter->value, agent, pcmk__str_none)) { rc = TRUE; break; } } stonith_key_value_freeall(devices, 1, 1); stonith_api_delete(st); return rc; } const char * stonith_action_str(const char *action) { if (action == NULL) { return "fencing"; } else if (strcmp(action, PCMK_ACTION_ON) == 0) { return "unfencing"; } else if (strcmp(action, PCMK_ACTION_OFF) == 0) { return "turning off"; } else { return action; } } /*! * \internal * \brief Parse a target name from one line of a target list string * * \param[in] line One line of a target list string * \param[in] len String length of line * \param[in,out] output List to add newly allocated target name to */ static void parse_list_line(const char *line, int len, GList **output) { size_t i = 0; size_t entry_start = 0; /* Skip complaints about additional parameters device doesn't understand * * @TODO Document or eliminate the implied restriction of target names */ if (strstr(line, "invalid") || strstr(line, "variable")) { crm_debug("Skipping list output line: %s", line); return; } // Process line content, character by character for (i = 0; i <= len; i++) { if (isspace(line[i]) || (line[i] == ',') || (line[i] == ';') || (line[i] == '\0')) { // We've found a separator (i.e. the end of an entry) int rc = 0; char *entry = NULL; if (i == entry_start) { // Skip leading and sequential separators entry_start = i + 1; continue; } entry = pcmk__assert_alloc(i - entry_start + 1, sizeof(char)); /* Read entry, stopping at first separator * * @TODO Document or eliminate these character restrictions */ rc = sscanf(line + entry_start, "%[a-zA-Z0-9_-.]", entry); if (rc != 1) { crm_warn("Could not parse list output entry: %s " QB_XS " entry_start=%d position=%d", line + entry_start, entry_start, i); free(entry); } else if (pcmk__strcase_any_of(entry, PCMK_ACTION_ON, PCMK_ACTION_OFF, NULL)) { /* Some agents print the target status in the list output, * though none are known now (the separate list-status command * is used for this, but it can also print "UNKNOWN"). To handle * this possibility, skip such entries. * * @TODO Document or eliminate the implied restriction of target * names. */ free(entry); } else { // We have a valid entry *output = g_list_append(*output, entry); } entry_start = i + 1; } } } /*! * \internal * \brief Parse a list of targets from a string * * \param[in] list_output Target list as a string * * \return List of target names * \note The target list string format is flexible, to allow for user-specified * lists such pcmk_host_list and the output of an agent's list action * (whether direct or via the API, which escapes newlines). There may be * multiple lines, separated by either a newline or an escaped newline * (backslash n). Each line may have one or more target names, separated * by any combination of whitespace, commas, and semi-colons. Lines * containing "invalid" or "variable" will be ignored entirely. Target * names "on" or "off" (case-insensitive) will be ignored. Target names * may contain only alphanumeric characters, underbars (_), dashes (-), * and dots (.) (if any other character occurs in the name, it and all * subsequent characters in the name will be ignored). * \note The caller is responsible for freeing the result with * g_list_free_full(result, free). */ GList * stonith__parse_targets(const char *target_spec) { GList *targets = NULL; if (target_spec != NULL) { size_t out_len = strlen(target_spec); size_t line_start = 0; // Starting index of line being processed for (size_t i = 0; i <= out_len; ++i) { if ((target_spec[i] == '\n') || (target_spec[i] == '\0') || ((target_spec[i] == '\\') && (target_spec[i + 1] == 'n'))) { // We've reached the end of one line of output int len = i - line_start; if (len > 0) { char *line = strndup(target_spec + line_start, len); line[len] = '\0'; // Because it might be a newline parse_list_line(line, len, &targets); free(line); } if (target_spec[i] == '\\') { ++i; // backslash-n takes up two positions } line_start = i + 1; } } } return targets; } /*! * \internal * \brief Check whether a fencing failure was followed by an equivalent success * * \param[in] event Fencing failure * \param[in] top_history Complete fencing history (must be sorted by * stonith__sort_history() beforehand) * * \return The name of the node that executed the fencing if a later successful * event exists, or NULL if no such event exists */ const char * stonith__later_succeeded(const stonith_history_t *event, const stonith_history_t *top_history) { const char *other = NULL; for (const stonith_history_t *prev_hp = top_history; prev_hp != NULL; prev_hp = prev_hp->next) { if (prev_hp == event) { break; } if ((prev_hp->state == st_done) && pcmk__str_eq(event->target, prev_hp->target, pcmk__str_casei) && pcmk__str_eq(event->action, prev_hp->action, pcmk__str_none) && ((event->completed < prev_hp->completed) || ((event->completed == prev_hp->completed) && (event->completed_nsec < prev_hp->completed_nsec)))) { if ((event->delegate == NULL) || pcmk__str_eq(event->delegate, prev_hp->delegate, pcmk__str_casei)) { // Prefer equivalent fencing by same executioner return prev_hp->delegate; } else if (other == NULL) { // Otherwise remember first successful executioner other = (prev_hp->delegate == NULL)? "some node" : prev_hp->delegate; } } } return other; } /*! * \internal * \brief Sort fencing history, pending first then by most recently completed * * \param[in,out] history List of stonith actions * * \return New head of sorted \p history */ stonith_history_t * stonith__sort_history(stonith_history_t *history) { stonith_history_t *new = NULL, *pending = NULL, *hp, *np, *tmp; for (hp = history; hp; ) { tmp = hp->next; if ((hp->state == st_done) || (hp->state == st_failed)) { /* sort into new */ if ((!new) || (hp->completed > new->completed) || ((hp->completed == new->completed) && (hp->completed_nsec > new->completed_nsec))) { hp->next = new; new = hp; } else { np = new; do { if ((!np->next) || (hp->completed > np->next->completed) || ((hp->completed == np->next->completed) && (hp->completed_nsec > np->next->completed_nsec))) { hp->next = np->next; np->next = hp; break; } np = np->next; } while (1); } } else { /* put into pending */ hp->next = pending; pending = hp; } hp = tmp; } /* pending actions don't have a completed-stamp so make them go front */ if (pending) { stonith_history_t *last_pending = pending; while (last_pending->next) { last_pending = last_pending->next; } last_pending->next = new; new = pending; } return new; } /*! * \brief Return string equivalent of an operation state value * * \param[in] state Fencing operation state value * * \return Human-friendly string equivalent of state */ const char * stonith_op_state_str(enum op_state state) { switch (state) { case st_query: return "querying"; case st_exec: return "executing"; case st_done: return "completed"; case st_duplicate: return "duplicate"; case st_failed: return "failed"; } return "unknown"; } stonith_history_t * stonith__first_matching_event(stonith_history_t *history, bool (*matching_fn)(stonith_history_t *, void *), void *user_data) { for (stonith_history_t *hp = history; hp; hp = hp->next) { if (matching_fn(hp, user_data)) { return hp; } } return NULL; } bool stonith__event_state_pending(stonith_history_t *history, void *user_data) { return history->state != st_failed && history->state != st_done; } bool stonith__event_state_eq(stonith_history_t *history, void *user_data) { return history->state == GPOINTER_TO_INT(user_data); } bool stonith__event_state_neq(stonith_history_t *history, void *user_data) { return history->state != GPOINTER_TO_INT(user_data); } void stonith__device_parameter_flags(uint32_t *device_flags, const char *device_name, xmlNode *metadata) { xmlXPathObject *xpath = NULL; int max = 0; int lpc = 0; CRM_CHECK((device_flags != NULL) && (metadata != NULL), return); xpath = pcmk__xpath_search(metadata->doc, "//" PCMK_XE_PARAMETER); max = pcmk__xpath_num_results(xpath); if (max == 0) { xmlXPathFreeObject(xpath); return; } for (lpc = 0; lpc < max; lpc++) { const char *parameter = NULL; xmlNode *match = pcmk__xpath_result(xpath, lpc); CRM_LOG_ASSERT(match != NULL); if (match == NULL) { continue; } parameter = pcmk__xe_get(match, PCMK_XA_NAME); if (pcmk__str_eq(parameter, "plug", pcmk__str_casei)) { stonith__set_device_flags(*device_flags, device_name, st_device_supports_parameter_plug); } else if (pcmk__str_eq(parameter, "port", pcmk__str_casei)) { stonith__set_device_flags(*device_flags, device_name, st_device_supports_parameter_port); } } xmlXPathFreeObject(xpath); } /*! * \internal * \brief Retrieve fence agent meta-data asynchronously * * \param[in] agent Agent to execute * \param[in] timeout_sec Error if not complete within this time * \param[in] callback Function to call with result (this will always be * called, whether by this function directly or * later via the main loop, and on success the * metadata will be in its result argument's * action_stdout) * \param[in,out] user_data User data to pass to callback * * \return Standard Pacemaker return code * \note The caller must use a main loop. This function is not a * stonith_api_operations_t method because it does not need a stonith_t * object and does not go through the fencer, but executes the agent * directly. */ int stonith__metadata_async(const char *agent, int timeout_sec, void (*callback)(int pid, const pcmk__action_result_t *result, void *user_data), void *user_data) { switch (stonith_get_namespace(agent, NULL)) { case st_namespace_rhcs: { stonith_action_t *action = NULL; int rc = pcmk_ok; action = stonith__action_create(agent, PCMK_ACTION_METADATA, NULL, 0, timeout_sec, NULL, NULL, NULL); rc = stonith__execute_async(action, user_data, callback, NULL); if (rc != pcmk_ok) { callback(0, stonith__action_result(action), user_data); stonith__destroy_action(action); } return pcmk_legacy2rc(rc); } #if HAVE_STONITH_STONITH_H case st_namespace_lha: // LHA metadata is simply synthesized, so simulate async { pcmk__action_result_t result = { .exit_status = CRM_EX_OK, .execution_status = PCMK_EXEC_DONE, .exit_reason = NULL, .action_stdout = NULL, .action_stderr = NULL, }; stonith__lha_metadata(agent, timeout_sec, &result.action_stdout); callback(0, &result, user_data); pcmk__reset_result(&result); return pcmk_rc_ok; } #endif default: { pcmk__action_result_t result = { .exit_status = CRM_EX_NOSUCH, .execution_status = PCMK_EXEC_ERROR_HARD, .exit_reason = crm_strdup_printf("No such agent '%s'", agent), .action_stdout = NULL, .action_stderr = NULL, }; callback(0, &result, user_data); pcmk__reset_result(&result); return ENOENT; } } } /*! * \internal * \brief Return the exit status from an async action callback * * \param[in] data Callback data * * \return Exit status from callback data */ int stonith__exit_status(const stonith_callback_data_t *data) { if ((data == NULL) || (data->opaque == NULL)) { return CRM_EX_ERROR; } return ((pcmk__action_result_t *) data->opaque)->exit_status; } /*! * \internal * \brief Return the execution status from an async action callback * * \param[in] data Callback data * * \return Execution status from callback data */ int stonith__execution_status(const stonith_callback_data_t *data) { if ((data == NULL) || (data->opaque == NULL)) { return PCMK_EXEC_UNKNOWN; } return ((pcmk__action_result_t *) data->opaque)->execution_status; } /*! * \internal * \brief Return the exit reason from an async action callback * * \param[in] data Callback data * * \return Exit reason from callback data */ const char * stonith__exit_reason(const stonith_callback_data_t *data) { if ((data == NULL) || (data->opaque == NULL)) { return NULL; } return ((pcmk__action_result_t *) data->opaque)->exit_reason; } /*! * \internal * \brief Return the exit status from an event notification * * \param[in] event Event * * \return Exit status from event */ int stonith__event_exit_status(const stonith_event_t *event) { if ((event == NULL) || (event->opaque == NULL)) { return CRM_EX_ERROR; } else { struct event_private *event_private = event->opaque; return event_private->result.exit_status; } } /*! * \internal * \brief Return the execution status from an event notification * * \param[in] event Event * * \return Execution status from event */ int stonith__event_execution_status(const stonith_event_t *event) { if ((event == NULL) || (event->opaque == NULL)) { return PCMK_EXEC_UNKNOWN; } else { struct event_private *event_private = event->opaque; return event_private->result.execution_status; } } /*! * \internal * \brief Return the exit reason from an event notification * * \param[in] event Event * * \return Exit reason from event */ const char * stonith__event_exit_reason(const stonith_event_t *event) { if ((event == NULL) || (event->opaque == NULL)) { return NULL; } else { struct event_private *event_private = event->opaque; return event_private->result.exit_reason; } } /*! * \internal * \brief Return a human-friendly description of a fencing event * * \param[in] event Event to describe * * \return Newly allocated string with description of \p event * \note The caller is responsible for freeing the return value. * This function asserts on memory errors and never returns NULL. */ char * stonith__event_description(const stonith_event_t *event) { // Use somewhat readable defaults const char *origin = pcmk__s(event->client_origin, "a client"); const char *origin_node = pcmk__s(event->origin, "a node"); const char *executioner = pcmk__s(event->executioner, "the cluster"); const char *device = pcmk__s(event->device, "unknown"); const char *action = pcmk__s(event->action, event->operation); const char *target = pcmk__s(event->target, "no node"); const char *reason = stonith__event_exit_reason(event); const char *status; if (action == NULL) { action = "(unknown)"; } if (stonith__event_execution_status(event) != PCMK_EXEC_DONE) { status = pcmk_exec_status_str(stonith__event_execution_status(event)); } else if (stonith__event_exit_status(event) != CRM_EX_OK) { status = pcmk_exec_status_str(PCMK_EXEC_ERROR); } else { status = crm_exit_str(CRM_EX_OK); } if (pcmk__str_eq(event->operation, PCMK__VALUE_ST_NOTIFY_HISTORY, pcmk__str_none)) { return crm_strdup_printf("Fencing history may have changed"); } else if (pcmk__str_eq(event->operation, STONITH_OP_DEVICE_ADD, pcmk__str_none)) { return crm_strdup_printf("A fencing device (%s) was added", device); } else if (pcmk__str_eq(event->operation, STONITH_OP_DEVICE_DEL, pcmk__str_none)) { return crm_strdup_printf("A fencing device (%s) was removed", device); } else if (pcmk__str_eq(event->operation, STONITH_OP_LEVEL_ADD, pcmk__str_none)) { return crm_strdup_printf("A fencing topology level (%s) was added", device); } else if (pcmk__str_eq(event->operation, STONITH_OP_LEVEL_DEL, pcmk__str_none)) { return crm_strdup_printf("A fencing topology level (%s) was removed", device); } // event->operation should be PCMK__VALUE_ST_NOTIFY_FENCE at this point return crm_strdup_printf("Operation %s of %s by %s for %s@%s: %s%s%s%s (ref=%s)", action, target, executioner, origin, origin_node, status, ((reason == NULL)? "" : " ("), pcmk__s(reason, ""), ((reason == NULL)? "" : ")"), pcmk__s(event->id, "(none)")); } diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c index b47e0068ef..45f490d6fe 100644 --- a/lib/lrmd/lrmd_client.c +++ b/lib/lrmd/lrmd_client.c @@ -1,2672 +1,2672 @@ /* * Copyright 2012-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include // uint32_t, uint64_t #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include // stonith__* #include #include #include #include #include #include #define MAX_TLS_RECV_WAIT 10000 CRM_TRACE_INIT_DATA(lrmd); static int lrmd_api_disconnect(lrmd_t * lrmd); static int lrmd_api_is_connected(lrmd_t * lrmd); /* IPC proxy functions */ int lrmd_internal_proxy_send(lrmd_t * lrmd, xmlNode *msg); static void lrmd_internal_proxy_dispatch(lrmd_t *lrmd, xmlNode *msg); void lrmd_internal_set_proxy_callback(lrmd_t * lrmd, void *userdata, void (*callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg)); // GnuTLS client handshake timeout in seconds #define TLS_HANDSHAKE_TIMEOUT 5 static void lrmd_tls_disconnect(lrmd_t * lrmd); static int global_remote_msg_id = 0; static void lrmd_tls_connection_destroy(gpointer userdata); static int add_tls_to_mainloop(lrmd_t *lrmd, bool do_api_handshake); typedef struct lrmd_private_s { uint64_t type; char *token; mainloop_io_t *source; /* IPC parameters */ crm_ipc_t *ipc; pcmk__remote_t *remote; /* Extra TLS parameters */ char *remote_nodename; char *server; int port; pcmk__tls_t *tls; /* while the async connection is occurring, this is the id * of the connection timeout timer. */ int async_timer; int sock; /* since tls requires a round trip across the network for a * request/reply, there are times where we just want to be able * to send a request from the client and not wait around (or even care * about) what the reply is. */ int expected_late_replies; GList *pending_notify; crm_trigger_t *process_notify; crm_trigger_t *handshake_trigger; lrmd_event_callback callback; /* Internal IPC proxy msg passing for remote guests */ void (*proxy_callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg); void *proxy_callback_userdata; char *peer_version; } lrmd_private_t; static int process_lrmd_handshake_reply(xmlNode *reply, lrmd_private_t *native); static void report_async_connection_result(lrmd_t * lrmd, int rc); static lrmd_list_t * lrmd_list_add(lrmd_list_t * head, const char *value) { lrmd_list_t *p, *end; p = pcmk__assert_alloc(1, sizeof(lrmd_list_t)); p->val = strdup(value); end = head; while (end && end->next) { end = end->next; } if (end) { end->next = p; } else { head = p; } return head; } void lrmd_list_freeall(lrmd_list_t * head) { lrmd_list_t *p; while (head) { char *val = (char *)head->val; p = head->next; free(val); free(head); head = p; } } lrmd_key_value_t * lrmd_key_value_add(lrmd_key_value_t * head, const char *key, const char *value) { lrmd_key_value_t *p, *end; p = pcmk__assert_alloc(1, sizeof(lrmd_key_value_t)); p->key = strdup(key); p->value = strdup(value); end = head; while (end && end->next) { end = end->next; } if (end) { end->next = p; } else { head = p; } return head; } void lrmd_key_value_freeall(lrmd_key_value_t * head) { lrmd_key_value_t *p; while (head) { p = head->next; free(head->key); free(head->value); free(head); head = p; } } /*! * \brief Create a new lrmd_event_data_t object * * \param[in] rsc_id ID of resource involved in event * \param[in] task Action name * \param[in] interval_ms Action interval * * \return Newly allocated and initialized lrmd_event_data_t * \note This functions asserts on memory errors, so the return value is * guaranteed to be non-NULL. The caller is responsible for freeing the * result with lrmd_free_event(). */ lrmd_event_data_t * lrmd_new_event(const char *rsc_id, const char *task, guint interval_ms) { lrmd_event_data_t *event = pcmk__assert_alloc(1, sizeof(lrmd_event_data_t)); // lrmd_event_data_t has (const char *) members that lrmd_free_event() frees event->rsc_id = pcmk__str_copy(rsc_id); event->op_type = pcmk__str_copy(task); event->interval_ms = interval_ms; return event; } lrmd_event_data_t * lrmd_copy_event(lrmd_event_data_t * event) { lrmd_event_data_t *copy = NULL; copy = pcmk__assert_alloc(1, sizeof(lrmd_event_data_t)); copy->type = event->type; // lrmd_event_data_t has (const char *) members that lrmd_free_event() frees copy->rsc_id = pcmk__str_copy(event->rsc_id); copy->op_type = pcmk__str_copy(event->op_type); copy->user_data = pcmk__str_copy(event->user_data); copy->output = pcmk__str_copy(event->output); copy->remote_nodename = pcmk__str_copy(event->remote_nodename); copy->exit_reason = pcmk__str_copy(event->exit_reason); copy->call_id = event->call_id; copy->timeout = event->timeout; copy->interval_ms = event->interval_ms; copy->start_delay = event->start_delay; copy->rsc_deleted = event->rsc_deleted; copy->rc = event->rc; copy->op_status = event->op_status; copy->t_run = event->t_run; copy->t_rcchange = event->t_rcchange; copy->exec_time = event->exec_time; copy->queue_time = event->queue_time; copy->connection_rc = event->connection_rc; copy->params = pcmk__str_table_dup(event->params); return copy; } /*! * \brief Free an executor event * * \param[in,out] Executor event object to free */ void lrmd_free_event(lrmd_event_data_t *event) { if (event == NULL) { return; } // @TODO Why are these const char *? free((void *) event->rsc_id); free((void *) event->op_type); free((void *) event->user_data); free((void *) event->remote_nodename); lrmd__reset_result(event); if (event->params != NULL) { g_hash_table_destroy(event->params); } free(event); } static void lrmd_dispatch_internal(gpointer data, gpointer user_data) { xmlNode *msg = data; lrmd_t *lrmd = user_data; const char *type; const char *proxy_session = pcmk__xe_get(msg, PCMK__XA_LRMD_IPC_SESSION); lrmd_private_t *native = lrmd->lrmd_private; lrmd_event_data_t event = { 0, }; if (proxy_session != NULL) { /* this is proxy business */ lrmd_internal_proxy_dispatch(lrmd, msg); return; } else if (!native->callback) { /* no callback set */ crm_trace("notify event received but client has not set callback"); return; } event.remote_nodename = native->remote_nodename; type = pcmk__xe_get(msg, PCMK__XA_LRMD_OP); pcmk__xe_get_int(msg, PCMK__XA_LRMD_CALLID, &event.call_id); event.rsc_id = pcmk__xe_get(msg, PCMK__XA_LRMD_RSC_ID); if (pcmk__str_eq(type, LRMD_OP_RSC_REG, pcmk__str_none)) { event.type = lrmd_event_register; } else if (pcmk__str_eq(type, LRMD_OP_RSC_UNREG, pcmk__str_none)) { event.type = lrmd_event_unregister; } else if (pcmk__str_eq(type, LRMD_OP_RSC_EXEC, pcmk__str_none)) { int rc = 0; int exec_time = 0; int queue_time = 0; pcmk__xe_get_int(msg, PCMK__XA_LRMD_TIMEOUT, &event.timeout); pcmk__xe_get_guint(msg, PCMK__XA_LRMD_RSC_INTERVAL, &event.interval_ms); pcmk__xe_get_int(msg, PCMK__XA_LRMD_RSC_START_DELAY, &event.start_delay); pcmk__xe_get_int(msg, PCMK__XA_LRMD_EXEC_RC, &rc); event.rc = (enum ocf_exitcode) rc; pcmk__xe_get_int(msg, PCMK__XA_LRMD_EXEC_OP_STATUS, &event.op_status); pcmk__xe_get_int(msg, PCMK__XA_LRMD_RSC_DELETED, &event.rsc_deleted); pcmk__xe_get_time(msg, PCMK__XA_LRMD_RUN_TIME, &event.t_run); pcmk__xe_get_time(msg, PCMK__XA_LRMD_RCCHANGE_TIME, &event.t_rcchange); pcmk__xe_get_int(msg, PCMK__XA_LRMD_EXEC_TIME, &exec_time); CRM_LOG_ASSERT(exec_time >= 0); event.exec_time = QB_MAX(0, exec_time); pcmk__xe_get_int(msg, PCMK__XA_LRMD_QUEUE_TIME, &queue_time); CRM_LOG_ASSERT(queue_time >= 0); event.queue_time = QB_MAX(0, queue_time); event.op_type = pcmk__xe_get(msg, PCMK__XA_LRMD_RSC_ACTION); event.user_data = pcmk__xe_get(msg, PCMK__XA_LRMD_RSC_USERDATA_STR); event.type = lrmd_event_exec_complete; /* output and exit_reason may be freed by a callback */ event.output = pcmk__xe_get_copy(msg, PCMK__XA_LRMD_RSC_OUTPUT); lrmd__set_result(&event, event.rc, event.op_status, pcmk__xe_get(msg, PCMK__XA_LRMD_RSC_EXIT_REASON)); event.params = xml2list(msg); } else if (pcmk__str_eq(type, LRMD_OP_NEW_CLIENT, pcmk__str_none)) { event.type = lrmd_event_new_client; } else if (pcmk__str_eq(type, LRMD_OP_POKE, pcmk__str_none)) { event.type = lrmd_event_poke; } else { return; } crm_trace("op %s notify event received", type); native->callback(&event); if (event.params) { g_hash_table_destroy(event.params); } lrmd__reset_result(&event); } // \return Always 0, to indicate that IPC mainloop source should be kept static int lrmd_ipc_dispatch(const char *buffer, ssize_t length, gpointer userdata) { lrmd_t *lrmd = userdata; lrmd_private_t *native = lrmd->lrmd_private; if (native->callback != NULL) { xmlNode *msg = pcmk__xml_parse(buffer); lrmd_dispatch_internal(msg, lrmd); pcmk__xml_free(msg); } return 0; } static void lrmd_free_xml(gpointer userdata) { pcmk__xml_free((xmlNode *) userdata); } static bool remote_executor_connected(lrmd_t * lrmd) { lrmd_private_t *native = lrmd->lrmd_private; return (native->remote->tls_session != NULL); } static void handle_remote_msg(xmlNode *xml, lrmd_t *lrmd) { lrmd_private_t *native = lrmd->lrmd_private; const char *msg_type = NULL; msg_type = pcmk__xe_get(xml, PCMK__XA_LRMD_REMOTE_MSG_TYPE); if (pcmk__str_eq(msg_type, "notify", pcmk__str_casei)) { lrmd_dispatch_internal(xml, lrmd); } else if (pcmk__str_eq(msg_type, "reply", pcmk__str_casei)) { const char *op = pcmk__xe_get(xml, PCMK__XA_LRMD_OP); if (native->expected_late_replies > 0) { native->expected_late_replies--; /* The register op message we get as a response to lrmd_handshake_async * is a reply, so we have to handle that here. */ if (pcmk__str_eq(op, "register", pcmk__str_casei)) { int rc = process_lrmd_handshake_reply(xml, native); report_async_connection_result(lrmd, pcmk_rc2legacy(rc)); } } else { int reply_id = 0; pcmk__xe_get_int(xml, PCMK__XA_LRMD_CALLID, &reply_id); /* if this happens, we want to know about it */ crm_err("Got outdated Pacemaker Remote reply %d", reply_id); } } } /*! * \internal * \brief Notify trigger handler * * \param[in,out] userdata API connection * * \return Always return G_SOURCE_CONTINUE to leave this trigger handler in the * mainloop */ static int process_pending_notifies(gpointer userdata) { lrmd_t *lrmd = userdata; lrmd_private_t *native = lrmd->lrmd_private; if (native->pending_notify == NULL) { return G_SOURCE_CONTINUE; } crm_trace("Processing pending notifies"); g_list_foreach(native->pending_notify, lrmd_dispatch_internal, lrmd); g_list_free_full(native->pending_notify, lrmd_free_xml); native->pending_notify = NULL; return G_SOURCE_CONTINUE; } /*! * \internal * \brief TLS dispatch function for file descriptor sources * * \param[in,out] userdata API connection * * \return -1 on error to remove the source from the mainloop, or 0 otherwise * to leave it in the mainloop */ static int lrmd_tls_dispatch(gpointer userdata) { lrmd_t *lrmd = userdata; lrmd_private_t *native = lrmd->lrmd_private; xmlNode *xml = NULL; int rc = pcmk_rc_ok; if (!remote_executor_connected(lrmd)) { crm_trace("TLS dispatch triggered after disconnect"); return -1; } crm_trace("TLS dispatch triggered"); rc = pcmk__remote_ready(native->remote, 0); if (rc == pcmk_rc_ok) { rc = pcmk__read_remote_message(native->remote, -1); } if (rc != pcmk_rc_ok && rc != ETIME) { crm_info("Lost %s executor connection while reading data", (native->remote_nodename? native->remote_nodename : "local")); lrmd_tls_disconnect(lrmd); return -1; } /* If rc is ETIME, there was nothing to read but we may already have a * full message in the buffer */ xml = pcmk__remote_message_xml(native->remote); if (xml == NULL) { return 0; } handle_remote_msg(xml, lrmd); pcmk__xml_free(xml); return 0; } /* Not used with mainloop */ int lrmd_poll(lrmd_t * lrmd, int timeout) { lrmd_private_t *native = lrmd->lrmd_private; switch (native->type) { case pcmk__client_ipc: return crm_ipc_ready(native->ipc); case pcmk__client_tls: if (native->pending_notify) { return 1; } else { int rc = pcmk__remote_ready(native->remote, 0); switch (rc) { case pcmk_rc_ok: return 1; case ETIME: return 0; default: return pcmk_rc2legacy(rc); } } default: crm_err("Unsupported executor connection type (bug?): %d", native->type); return -EPROTONOSUPPORT; } } /* Not used with mainloop */ bool lrmd_dispatch(lrmd_t * lrmd) { lrmd_private_t *private = NULL; pcmk__assert(lrmd != NULL); private = lrmd->lrmd_private; switch (private->type) { case pcmk__client_ipc: while (crm_ipc_ready(private->ipc)) { if (crm_ipc_read(private->ipc) > 0) { const char *msg = crm_ipc_buffer(private->ipc); lrmd_ipc_dispatch(msg, strlen(msg), lrmd); } } break; case pcmk__client_tls: lrmd_tls_dispatch(lrmd); break; default: crm_err("Unsupported executor connection type (bug?): %d", private->type); } if (lrmd_api_is_connected(lrmd) == FALSE) { crm_err("Connection closed"); return FALSE; } return TRUE; } static xmlNode * lrmd_create_op(const char *token, const char *op, xmlNode *data, int timeout, enum lrmd_call_options options) { xmlNode *op_msg = NULL; CRM_CHECK(token != NULL, return NULL); op_msg = pcmk__xe_create(NULL, PCMK__XE_LRMD_COMMAND); crm_xml_add(op_msg, PCMK__XA_T, PCMK__VALUE_LRMD); crm_xml_add(op_msg, PCMK__XA_LRMD_OP, op); - crm_xml_add_int(op_msg, PCMK__XA_LRMD_TIMEOUT, timeout); - crm_xml_add_int(op_msg, PCMK__XA_LRMD_CALLOPT, options); + pcmk__xe_set_int(op_msg, PCMK__XA_LRMD_TIMEOUT, timeout); + pcmk__xe_set_int(op_msg, PCMK__XA_LRMD_CALLOPT, options); if (data != NULL) { xmlNode *wrapper = pcmk__xe_create(op_msg, PCMK__XE_LRMD_CALLDATA); pcmk__xml_copy(wrapper, data); } crm_trace("Created executor %s command with call options %.8lx (%d)", op, (long)options, options); return op_msg; } static void lrmd_ipc_connection_destroy(gpointer userdata) { lrmd_t *lrmd = userdata; lrmd_private_t *native = lrmd->lrmd_private; switch (native->type) { case pcmk__client_ipc: crm_info("Disconnected from local executor"); break; case pcmk__client_tls: crm_info("Disconnected from remote executor on %s", native->remote_nodename); break; default: crm_err("Unsupported executor connection type %d (bug?)", native->type); } /* Prevent these from being cleaned up in lrmd_api_disconnect() */ native->ipc = NULL; native->source = NULL; if (native->callback) { lrmd_event_data_t event = { 0, }; event.type = lrmd_event_disconnect; event.remote_nodename = native->remote_nodename; native->callback(&event); } } static void lrmd_tls_connection_destroy(gpointer userdata) { lrmd_t *lrmd = userdata; lrmd_private_t *native = lrmd->lrmd_private; crm_info("TLS connection destroyed"); if (native->remote->tls_session) { gnutls_bye(native->remote->tls_session, GNUTLS_SHUT_RDWR); gnutls_deinit(native->remote->tls_session); native->remote->tls_session = NULL; } if (native->tls) { pcmk__free_tls(native->tls); native->tls = NULL; } if (native->sock >= 0) { close(native->sock); } if (native->process_notify) { mainloop_destroy_trigger(native->process_notify); native->process_notify = NULL; } if (native->pending_notify) { g_list_free_full(native->pending_notify, lrmd_free_xml); native->pending_notify = NULL; } if (native->handshake_trigger != NULL) { mainloop_destroy_trigger(native->handshake_trigger); native->handshake_trigger = NULL; } free(native->remote->buffer); free(native->remote->start_state); native->remote->buffer = NULL; native->remote->start_state = NULL; native->source = 0; native->sock = -1; if (native->callback) { lrmd_event_data_t event = { 0, }; event.remote_nodename = native->remote_nodename; event.type = lrmd_event_disconnect; native->callback(&event); } return; } // \return Standard Pacemaker return code int lrmd__remote_send_xml(pcmk__remote_t *session, xmlNode *msg, uint32_t id, const char *msg_type) { - crm_xml_add_int(msg, PCMK__XA_LRMD_REMOTE_MSG_ID, id); + pcmk__xe_set_int(msg, PCMK__XA_LRMD_REMOTE_MSG_ID, id); crm_xml_add(msg, PCMK__XA_LRMD_REMOTE_MSG_TYPE, msg_type); return pcmk__remote_send_xml(session, msg); } // \return Standard Pacemaker return code static int read_remote_reply(lrmd_t *lrmd, int total_timeout, int expected_reply_id, xmlNode **reply) { lrmd_private_t *native = lrmd->lrmd_private; time_t start = time(NULL); const char *msg_type = NULL; int reply_id = 0; int remaining_timeout = 0; int rc = pcmk_rc_ok; /* A timeout of 0 here makes no sense. We have to wait a period of time * for the response to come back. If -1 or 0, default to 10 seconds. */ if (total_timeout <= 0 || total_timeout > MAX_TLS_RECV_WAIT) { total_timeout = MAX_TLS_RECV_WAIT; } for (*reply = NULL; *reply == NULL; ) { *reply = pcmk__remote_message_xml(native->remote); if (*reply == NULL) { /* read some more off the tls buffer if we still have time left. */ if (remaining_timeout) { remaining_timeout = total_timeout - ((time(NULL) - start) * 1000); } else { remaining_timeout = total_timeout; } if (remaining_timeout <= 0) { return ETIME; } rc = pcmk__read_remote_message(native->remote, remaining_timeout); if (rc != pcmk_rc_ok) { return rc; } *reply = pcmk__remote_message_xml(native->remote); if (*reply == NULL) { return ENOMSG; } } pcmk__xe_get_int(*reply, PCMK__XA_LRMD_REMOTE_MSG_ID, &reply_id); msg_type = pcmk__xe_get(*reply, PCMK__XA_LRMD_REMOTE_MSG_TYPE); if (!msg_type) { crm_err("Empty msg type received while waiting for reply"); pcmk__xml_free(*reply); *reply = NULL; } else if (pcmk__str_eq(msg_type, "notify", pcmk__str_casei)) { /* got a notify while waiting for reply, trigger the notify to be processed later */ crm_info("queueing notify"); native->pending_notify = g_list_append(native->pending_notify, *reply); if (native->process_notify) { crm_info("notify trigger set."); mainloop_set_trigger(native->process_notify); } *reply = NULL; } else if (!pcmk__str_eq(msg_type, "reply", pcmk__str_casei)) { /* msg isn't a reply, make some noise */ crm_err("Expected a reply, got %s", msg_type); pcmk__xml_free(*reply); *reply = NULL; } else if (reply_id != expected_reply_id) { if (native->expected_late_replies > 0) { native->expected_late_replies--; } else { crm_err("Got outdated reply, expected id %d got id %d", expected_reply_id, reply_id); } pcmk__xml_free(*reply); *reply = NULL; } } if (native->remote->buffer && native->process_notify) { mainloop_set_trigger(native->process_notify); } return rc; } // \return Standard Pacemaker return code static int send_remote_message(lrmd_t *lrmd, xmlNode *msg) { int rc = pcmk_rc_ok; lrmd_private_t *native = lrmd->lrmd_private; global_remote_msg_id++; if (global_remote_msg_id <= 0) { global_remote_msg_id = 1; } rc = lrmd__remote_send_xml(native->remote, msg, global_remote_msg_id, "request"); if (rc != pcmk_rc_ok) { crm_err("Disconnecting because TLS message could not be sent to " "Pacemaker Remote: %s", pcmk_rc_str(rc)); lrmd_tls_disconnect(lrmd); } return rc; } static int lrmd_tls_send_recv(lrmd_t * lrmd, xmlNode * msg, int timeout, xmlNode ** reply) { int rc = 0; xmlNode *xml = NULL; if (!remote_executor_connected(lrmd)) { return -ENOTCONN; } rc = send_remote_message(lrmd, msg); if (rc != pcmk_rc_ok) { return pcmk_rc2legacy(rc); } rc = read_remote_reply(lrmd, timeout, global_remote_msg_id, &xml); if (rc != pcmk_rc_ok) { crm_err("Disconnecting remote after request %d reply not received: %s " QB_XS " rc=%d timeout=%dms", global_remote_msg_id, pcmk_rc_str(rc), rc, timeout); lrmd_tls_disconnect(lrmd); } if (reply) { *reply = xml; } else { pcmk__xml_free(xml); } return pcmk_rc2legacy(rc); } static int lrmd_send_xml(lrmd_t * lrmd, xmlNode * msg, int timeout, xmlNode ** reply) { int rc = pcmk_ok; lrmd_private_t *native = lrmd->lrmd_private; switch (native->type) { case pcmk__client_ipc: rc = crm_ipc_send(native->ipc, msg, crm_ipc_client_response, timeout, reply); break; case pcmk__client_tls: rc = lrmd_tls_send_recv(lrmd, msg, timeout, reply); break; default: crm_err("Unsupported executor connection type (bug?): %d", native->type); rc = -EPROTONOSUPPORT; } return rc; } static int lrmd_send_xml_no_reply(lrmd_t * lrmd, xmlNode * msg) { int rc = pcmk_ok; lrmd_private_t *native = lrmd->lrmd_private; switch (native->type) { case pcmk__client_ipc: rc = crm_ipc_send(native->ipc, msg, crm_ipc_flags_none, 0, NULL); break; case pcmk__client_tls: rc = send_remote_message(lrmd, msg); if (rc == pcmk_rc_ok) { /* we don't want to wait around for the reply, but * since the request/reply protocol needs to behave the same * as libqb, a reply will eventually come later anyway. */ native->expected_late_replies++; } rc = pcmk_rc2legacy(rc); break; default: crm_err("Unsupported executor connection type (bug?): %d", native->type); rc = -EPROTONOSUPPORT; } return rc; } static int lrmd_api_is_connected(lrmd_t * lrmd) { lrmd_private_t *native = lrmd->lrmd_private; switch (native->type) { case pcmk__client_ipc: return crm_ipc_connected(native->ipc); case pcmk__client_tls: return remote_executor_connected(lrmd); default: crm_err("Unsupported executor connection type (bug?): %d", native->type); return 0; } } /*! * \internal * \brief Send a prepared API command to the executor * * \param[in,out] lrmd Existing connection to the executor * \param[in] op Name of API command to send * \param[in] data Command data XML to add to the sent command * \param[out] output_data If expecting a reply, it will be stored here * \param[in] timeout Timeout in milliseconds (if 0, defaults to * a sensible value per the type of connection, * standard vs. pacemaker remote); * also propagated to the command XML * \param[in] call_options Call options to pass to server when sending * \param[in] expect_reply If true, wait for a reply from the server; * must be true for IPC (as opposed to TLS) clients * * \return pcmk_ok on success, -errno on error */ static int lrmd_send_command(lrmd_t *lrmd, const char *op, xmlNode *data, xmlNode **output_data, int timeout, enum lrmd_call_options options, bool expect_reply) { int rc = pcmk_ok; lrmd_private_t *native = lrmd->lrmd_private; xmlNode *op_msg = NULL; xmlNode *op_reply = NULL; if (!lrmd_api_is_connected(lrmd)) { return -ENOTCONN; } if (op == NULL) { crm_err("No operation specified"); return -EINVAL; } CRM_LOG_ASSERT(native->token != NULL); crm_trace("Sending %s op to executor", op); op_msg = lrmd_create_op(native->token, op, data, timeout, options); if (op_msg == NULL) { return -EINVAL; } if (expect_reply) { rc = lrmd_send_xml(lrmd, op_msg, timeout, &op_reply); } else { rc = lrmd_send_xml_no_reply(lrmd, op_msg); goto done; } if (rc < 0) { crm_perror(LOG_ERR, "Couldn't perform %s operation (timeout=%d): %d", op, timeout, rc); goto done; } else if (op_reply == NULL) { rc = -ENOMSG; goto done; } rc = pcmk_ok; crm_trace("%s op reply received", op); if (pcmk__xe_get_int(op_reply, PCMK__XA_LRMD_RC, &rc) != pcmk_rc_ok) { rc = -ENOMSG; goto done; } crm_log_xml_trace(op_reply, "Reply"); if (output_data) { *output_data = op_reply; op_reply = NULL; /* Prevent subsequent free */ } done: if (lrmd_api_is_connected(lrmd) == FALSE) { crm_err("Executor disconnected"); } pcmk__xml_free(op_msg); pcmk__xml_free(op_reply); return rc; } static int lrmd_api_poke_connection(lrmd_t * lrmd) { int rc; lrmd_private_t *native = lrmd->lrmd_private; xmlNode *data = pcmk__xe_create(NULL, PCMK__XE_LRMD_RSC); crm_xml_add(data, PCMK__XA_LRMD_ORIGIN, __func__); rc = lrmd_send_command(lrmd, LRMD_OP_POKE, data, NULL, 0, 0, (native->type == pcmk__client_ipc)); pcmk__xml_free(data); return rc < 0 ? rc : pcmk_ok; } // \return Standard Pacemaker return code int lrmd__validate_remote_settings(lrmd_t *lrmd, GHashTable *hash) { int rc = pcmk_rc_ok; const char *value; lrmd_private_t *native = lrmd->lrmd_private; xmlNode *data = pcmk__xe_create(NULL, PCMK__XA_LRMD_OP); crm_xml_add(data, PCMK__XA_LRMD_ORIGIN, __func__); value = g_hash_table_lookup(hash, PCMK_OPT_STONITH_WATCHDOG_TIMEOUT); if ((value) && (stonith__watchdog_fencing_enabled_for_node(native->remote_nodename))) { crm_xml_add(data, PCMK__XA_LRMD_WATCHDOG, value); } rc = lrmd_send_command(lrmd, LRMD_OP_CHECK, data, NULL, 0, 0, (native->type == pcmk__client_ipc)); pcmk__xml_free(data); return (rc < 0)? pcmk_legacy2rc(rc) : pcmk_rc_ok; } static xmlNode * lrmd_handshake_hello_msg(const char *name, bool is_proxy) { xmlNode *hello = pcmk__xe_create(NULL, PCMK__XE_LRMD_COMMAND); crm_xml_add(hello, PCMK__XA_T, PCMK__VALUE_LRMD); crm_xml_add(hello, PCMK__XA_LRMD_OP, CRM_OP_REGISTER); crm_xml_add(hello, PCMK__XA_LRMD_CLIENTNAME, name); crm_xml_add(hello, PCMK__XA_LRMD_PROTOCOL_VERSION, LRMD_PROTOCOL_VERSION); /* advertise that we are a proxy provider */ if (is_proxy) { pcmk__xe_set_bool_attr(hello, PCMK__XA_LRMD_IS_IPC_PROVIDER, true); } return hello; } static int process_lrmd_handshake_reply(xmlNode *reply, lrmd_private_t *native) { int rc = pcmk_rc_ok; const char *version = pcmk__xe_get(reply, PCMK__XA_LRMD_PROTOCOL_VERSION); const char *msg_type = pcmk__xe_get(reply, PCMK__XA_LRMD_OP); const char *tmp_ticket = pcmk__xe_get(reply, PCMK__XA_LRMD_CLIENTID); const char *start_state = pcmk__xe_get(reply, PCMK__XA_NODE_START_STATE); pcmk__xe_get_int(reply, PCMK__XA_LRMD_RC, &rc); rc = pcmk_legacy2rc(rc); /* The remote executor may add its uptime to the XML reply, which is useful * in handling transient attributes when the connection to the remote node * unexpectedly drops. If no parameter is given, just default to -1. */ native->remote->uptime = -1; pcmk__xe_get_time(reply, PCMK__XA_UPTIME, &native->remote->uptime); if (start_state) { native->remote->start_state = strdup(start_state); } if (rc == EPROTO) { crm_err("Executor protocol version mismatch between client (%s) and server (%s)", LRMD_PROTOCOL_VERSION, version); crm_log_xml_err(reply, "Protocol Error"); } else if (!pcmk__str_eq(msg_type, CRM_OP_REGISTER, pcmk__str_casei)) { crm_err("Invalid registration message: %s", msg_type); crm_log_xml_err(reply, "Bad reply"); rc = EPROTO; } else if (tmp_ticket == NULL) { crm_err("No registration token provided"); crm_log_xml_err(reply, "Bad reply"); rc = EPROTO; } else { crm_trace("Obtained registration token: %s", tmp_ticket); native->token = strdup(tmp_ticket); native->peer_version = strdup(version?version:"1.0"); /* Included since 1.1 */ rc = pcmk_rc_ok; } return rc; } static int lrmd_handshake(lrmd_t * lrmd, const char *name) { int rc = pcmk_rc_ok; lrmd_private_t *native = lrmd->lrmd_private; xmlNode *reply = NULL; xmlNode *hello = lrmd_handshake_hello_msg(name, native->proxy_callback != NULL); rc = lrmd_send_xml(lrmd, hello, -1, &reply); if (rc < 0) { crm_perror(LOG_DEBUG, "Couldn't complete registration with the executor API: %d", rc); rc = ECOMM; } else if (reply == NULL) { crm_err("Did not receive registration reply"); rc = EPROTO; } else { rc = process_lrmd_handshake_reply(reply, native); } pcmk__xml_free(reply); pcmk__xml_free(hello); if (rc != pcmk_rc_ok) { lrmd_api_disconnect(lrmd); } return rc; } static int lrmd_handshake_async(lrmd_t * lrmd, const char *name) { int rc = pcmk_rc_ok; lrmd_private_t *native = lrmd->lrmd_private; xmlNode *hello = lrmd_handshake_hello_msg(name, native->proxy_callback != NULL); rc = send_remote_message(lrmd, hello); if (rc == pcmk_rc_ok) { native->expected_late_replies++; } else { lrmd_api_disconnect(lrmd); } pcmk__xml_free(hello); return rc; } static int lrmd_ipc_connect(lrmd_t * lrmd, int *fd) { int rc = pcmk_ok; lrmd_private_t *native = lrmd->lrmd_private; struct ipc_client_callbacks lrmd_callbacks = { .dispatch = lrmd_ipc_dispatch, .destroy = lrmd_ipc_connection_destroy }; crm_info("Connecting to executor"); if (fd) { /* No mainloop */ native->ipc = crm_ipc_new(CRM_SYSTEM_LRMD, 0); if (native->ipc != NULL) { rc = pcmk__connect_generic_ipc(native->ipc); if (rc == pcmk_rc_ok) { rc = pcmk__ipc_fd(native->ipc, fd); } if (rc != pcmk_rc_ok) { crm_err("Connection to executor failed: %s", pcmk_rc_str(rc)); rc = -ENOTCONN; } } } else { native->source = mainloop_add_ipc_client(CRM_SYSTEM_LRMD, G_PRIORITY_HIGH, 0, lrmd, &lrmd_callbacks); native->ipc = mainloop_get_ipc_client(native->source); } if (native->ipc == NULL) { crm_debug("Could not connect to the executor API"); rc = -ENOTCONN; } return rc; } static void copy_gnutls_datum(gnutls_datum_t *dest, gnutls_datum_t *source) { pcmk__assert((dest != NULL) && (source != NULL) && (source->data != NULL)); dest->data = gnutls_malloc(source->size); pcmk__mem_assert(dest->data); memcpy(dest->data, source->data, source->size); dest->size = source->size; } static void clear_gnutls_datum(gnutls_datum_t *datum) { gnutls_free(datum->data); datum->data = NULL; datum->size = 0; } #define KEY_READ_LEN 256 // Chunk size for reading key from file // \return Standard Pacemaker return code static int read_gnutls_key(const char *location, gnutls_datum_t *key) { FILE *stream = NULL; size_t buf_len = KEY_READ_LEN; if ((location == NULL) || (key == NULL)) { return EINVAL; } stream = fopen(location, "r"); if (stream == NULL) { return errno; } key->data = gnutls_malloc(buf_len); key->size = 0; while (!feof(stream)) { int next = fgetc(stream); if (next == EOF) { if (!feof(stream)) { crm_warn("Pacemaker Remote key read was partially successful " "(copy in memory may be corrupted)"); } break; } if (key->size == buf_len) { buf_len = key->size + KEY_READ_LEN; key->data = gnutls_realloc(key->data, buf_len); pcmk__assert(key->data); } key->data[key->size++] = (unsigned char) next; } fclose(stream); if (key->size == 0) { clear_gnutls_datum(key); return ENOKEY; } return pcmk_rc_ok; } // Cache the most recently used Pacemaker Remote authentication key struct key_cache_s { time_t updated; // When cached key was read (valid for 1 minute) const char *location; // Where cached key was read from gnutls_datum_t key; // Cached key }; static bool key_is_cached(struct key_cache_s *key_cache) { return key_cache->updated != 0; } static bool key_cache_expired(struct key_cache_s *key_cache) { return (time(NULL) - key_cache->updated) >= 60; } static void clear_key_cache(struct key_cache_s *key_cache) { clear_gnutls_datum(&(key_cache->key)); if ((key_cache->updated != 0) || (key_cache->location != NULL)) { key_cache->updated = 0; key_cache->location = NULL; crm_debug("Cleared Pacemaker Remote key cache"); } } static void get_cached_key(struct key_cache_s *key_cache, gnutls_datum_t *key) { copy_gnutls_datum(key, &(key_cache->key)); crm_debug("Using cached Pacemaker Remote key from %s", pcmk__s(key_cache->location, "unknown location")); } static void cache_key(struct key_cache_s *key_cache, gnutls_datum_t *key, const char *location) { key_cache->updated = time(NULL); key_cache->location = location; copy_gnutls_datum(&(key_cache->key), key); crm_debug("Using (and cacheing) Pacemaker Remote key from %s", pcmk__s(location, "unknown location")); } /*! * \internal * \brief Get Pacemaker Remote authentication key from file or cache * * \param[in] location Path to key file to try (this memory must * persist across all calls of this function) * \param[out] key Key from location or cache * * \return Standard Pacemaker return code */ static int get_remote_key(const char *location, gnutls_datum_t *key) { static struct key_cache_s key_cache = { 0, }; int rc = pcmk_rc_ok; if ((location == NULL) || (key == NULL)) { return EINVAL; } if (key_is_cached(&key_cache)) { if (key_cache_expired(&key_cache)) { clear_key_cache(&key_cache); } else { get_cached_key(&key_cache, key); return pcmk_rc_ok; } } rc = read_gnutls_key(location, key); if (rc != pcmk_rc_ok) { return rc; } cache_key(&key_cache, key, location); return pcmk_rc_ok; } /*! * \internal * \brief Initialize the Pacemaker Remote authentication key * * Try loading the Pacemaker Remote authentication key from cache if available, * otherwise from these locations, in order of preference: * * - The value of the PCMK_authkey_location environment variable, if set * - The Pacemaker default key file location * * \param[out] key Where to store key * * \return Standard Pacemaker return code */ int lrmd__init_remote_key(gnutls_datum_t *key) { static const char *env_location = NULL; static bool need_env = true; int rc = pcmk_rc_ok; if (need_env) { env_location = pcmk__env_option(PCMK__ENV_AUTHKEY_LOCATION); need_env = false; } // Try location in environment variable, if set if (env_location != NULL) { rc = get_remote_key(env_location, key); if (rc == pcmk_rc_ok) { return pcmk_rc_ok; } crm_warn("Could not read Pacemaker Remote key from %s: %s", env_location, pcmk_rc_str(rc)); return ENOKEY; } // Try default location, if environment wasn't explicitly set to it rc = get_remote_key(DEFAULT_REMOTE_KEY_LOCATION, key); if (rc == pcmk_rc_ok) { return pcmk_rc_ok; } crm_warn("Could not read Pacemaker Remote key from default location %s: %s", DEFAULT_REMOTE_KEY_LOCATION, pcmk_rc_str(rc)); return ENOKEY; } static void report_async_connection_result(lrmd_t * lrmd, int rc) { lrmd_private_t *native = lrmd->lrmd_private; if (native->callback) { lrmd_event_data_t event = { 0, }; event.type = lrmd_event_connect; event.remote_nodename = native->remote_nodename; event.connection_rc = rc; native->callback(&event); } } static void tls_handshake_failed(lrmd_t *lrmd, int tls_rc, int rc) { lrmd_private_t *native = lrmd->lrmd_private; crm_warn("Disconnecting after TLS handshake with " "Pacemaker Remote server %s:%d failed: %s", native->server, native->port, (rc == EPROTO)? gnutls_strerror(tls_rc) : pcmk_rc_str(rc)); report_async_connection_result(lrmd, pcmk_rc2legacy(rc)); gnutls_deinit(native->remote->tls_session); native->remote->tls_session = NULL; lrmd_tls_connection_destroy(lrmd); } static void tls_handshake_succeeded(lrmd_t *lrmd) { int rc = pcmk_rc_ok; lrmd_private_t *native = lrmd->lrmd_private; /* Now that the handshake is done, see if any client TLS certificate is * close to its expiration date and log if so. If a TLS certificate is not * in use, this function will just return so we don't need to check for the * session type here. */ pcmk__tls_check_cert_expiration(native->remote->tls_session); crm_info("TLS connection to Pacemaker Remote server %s:%d succeeded", native->server, native->port); rc = add_tls_to_mainloop(lrmd, true); /* If add_tls_to_mainloop failed, report that right now. Otherwise, we have * to wait until we read the async reply to report anything. */ if (rc != pcmk_rc_ok) { report_async_connection_result(lrmd, pcmk_rc2legacy(rc)); } } /*! * \internal * \brief Perform a TLS client handshake with a Pacemaker Remote server * * \param[in] lrmd Newly established Pacemaker Remote executor connection * * \return Standard Pacemaker return code */ static int tls_client_handshake(lrmd_t *lrmd) { lrmd_private_t *native = lrmd->lrmd_private; int tls_rc = GNUTLS_E_SUCCESS; int rc = pcmk__tls_client_handshake(native->remote, TLS_HANDSHAKE_TIMEOUT, &tls_rc); if (rc != pcmk_rc_ok) { tls_handshake_failed(lrmd, tls_rc, rc); } return rc; } /*! * \internal * \brief Add trigger and file descriptor mainloop sources for TLS * * \param[in,out] lrmd API connection with established TLS session * \param[in] do_api_handshake Whether to perform executor handshake * * \return Standard Pacemaker return code */ static int add_tls_to_mainloop(lrmd_t *lrmd, bool do_api_handshake) { lrmd_private_t *native = lrmd->lrmd_private; int rc = pcmk_rc_ok; char *name = crm_strdup_printf("pacemaker-remote-%s:%d", native->server, native->port); struct mainloop_fd_callbacks tls_fd_callbacks = { .dispatch = lrmd_tls_dispatch, .destroy = lrmd_tls_connection_destroy, }; native->process_notify = mainloop_add_trigger(G_PRIORITY_HIGH, process_pending_notifies, lrmd); native->source = mainloop_add_fd(name, G_PRIORITY_HIGH, native->sock, lrmd, &tls_fd_callbacks); /* Async connections lose the client name provided by the API caller, so we * have to use our generated name here to perform the executor handshake. * * @TODO Keep track of the caller-provided name. Perhaps we should be using * that name in this function instead of generating one anyway. */ if (do_api_handshake) { rc = lrmd_handshake_async(lrmd, name); } free(name); return rc; } struct handshake_data_s { lrmd_t *lrmd; time_t start_time; int timeout_sec; }; static gboolean try_handshake_cb(gpointer user_data) { struct handshake_data_s *hs = user_data; lrmd_t *lrmd = hs->lrmd; lrmd_private_t *native = lrmd->lrmd_private; pcmk__remote_t *remote = native->remote; int rc = pcmk_rc_ok; int tls_rc = GNUTLS_E_SUCCESS; if (time(NULL) >= hs->start_time + hs->timeout_sec) { rc = ETIME; tls_handshake_failed(lrmd, GNUTLS_E_TIMEDOUT, rc); free(hs); return 0; } rc = pcmk__tls_client_try_handshake(remote, &tls_rc); if (rc == pcmk_rc_ok) { tls_handshake_succeeded(lrmd); free(hs); return 0; } else if (rc == EAGAIN) { mainloop_set_trigger(native->handshake_trigger); return 1; } else { rc = EKEYREJECTED; tls_handshake_failed(lrmd, tls_rc, rc); free(hs); return 0; } } static void lrmd_tcp_connect_cb(void *userdata, int rc, int sock) { lrmd_t *lrmd = userdata; lrmd_private_t *native = lrmd->lrmd_private; int tls_rc = GNUTLS_E_SUCCESS; bool use_cert = pcmk__x509_enabled(); native->async_timer = 0; if (rc != pcmk_rc_ok) { lrmd_tls_connection_destroy(lrmd); crm_info("Could not connect to Pacemaker Remote at %s:%d: %s " QB_XS " rc=%d", native->server, native->port, pcmk_rc_str(rc), rc); report_async_connection_result(lrmd, pcmk_rc2legacy(rc)); return; } /* The TCP connection was successful, so establish the TLS connection. */ native->sock = sock; if (native->tls == NULL) { rc = pcmk__init_tls(&native->tls, false, use_cert ? GNUTLS_CRD_CERTIFICATE : GNUTLS_CRD_PSK); if (rc != pcmk_rc_ok) { lrmd_tls_connection_destroy(lrmd); report_async_connection_result(lrmd, pcmk_rc2legacy(rc)); return; } } if (!use_cert) { gnutls_datum_t psk_key = { NULL, 0 }; rc = lrmd__init_remote_key(&psk_key); if (rc != pcmk_rc_ok) { crm_info("Could not connect to Pacemaker Remote at %s:%d: %s " QB_XS " rc=%d", native->server, native->port, pcmk_rc_str(rc), rc); lrmd_tls_connection_destroy(lrmd); report_async_connection_result(lrmd, pcmk_rc2legacy(rc)); return; } pcmk__tls_add_psk_key(native->tls, &psk_key); gnutls_free(psk_key.data); } native->remote->tls_session = pcmk__new_tls_session(native->tls, sock); if (native->remote->tls_session == NULL) { lrmd_tls_connection_destroy(lrmd); report_async_connection_result(lrmd, -EPROTO); return; } /* If the TLS handshake immediately succeeds or fails, we can handle that * now without having to deal with mainloops and retries. Otherwise, add a * trigger to keep trying until we get a result (or it times out). */ rc = pcmk__tls_client_try_handshake(native->remote, &tls_rc); if (rc == EAGAIN) { struct handshake_data_s *hs = NULL; if (native->handshake_trigger != NULL) { return; } hs = pcmk__assert_alloc(1, sizeof(struct handshake_data_s)); hs->lrmd = lrmd; hs->start_time = time(NULL); hs->timeout_sec = TLS_HANDSHAKE_TIMEOUT; native->handshake_trigger = mainloop_add_trigger(G_PRIORITY_LOW, try_handshake_cb, hs); mainloop_set_trigger(native->handshake_trigger); } else if (rc == pcmk_rc_ok) { tls_handshake_succeeded(lrmd); } else { tls_handshake_failed(lrmd, tls_rc, rc); } } static int lrmd_tls_connect_async(lrmd_t * lrmd, int timeout /*ms */ ) { int rc = pcmk_rc_ok; int timer_id = 0; lrmd_private_t *native = lrmd->lrmd_private; native->sock = -1; rc = pcmk__connect_remote(native->server, native->port, timeout, &timer_id, &(native->sock), lrmd, lrmd_tcp_connect_cb); if (rc != pcmk_rc_ok) { crm_warn("Pacemaker Remote connection to %s:%d failed: %s " QB_XS " rc=%d", native->server, native->port, pcmk_rc_str(rc), rc); return rc; } native->async_timer = timer_id; return rc; } static int lrmd_tls_connect(lrmd_t * lrmd, int *fd) { int rc = pcmk_rc_ok; bool use_cert = pcmk__x509_enabled(); lrmd_private_t *native = lrmd->lrmd_private; native->sock = -1; rc = pcmk__connect_remote(native->server, native->port, 0, NULL, &(native->sock), NULL, NULL); if (rc != pcmk_rc_ok) { crm_warn("Pacemaker Remote connection to %s:%d failed: %s " QB_XS " rc=%d", native->server, native->port, pcmk_rc_str(rc), rc); lrmd_tls_connection_destroy(lrmd); return ENOTCONN; } if (native->tls == NULL) { rc = pcmk__init_tls(&native->tls, false, use_cert ? GNUTLS_CRD_CERTIFICATE : GNUTLS_CRD_PSK); if (rc != pcmk_rc_ok) { lrmd_tls_connection_destroy(lrmd); return rc; } } if (!use_cert) { gnutls_datum_t psk_key = { NULL, 0 }; rc = lrmd__init_remote_key(&psk_key); if (rc != pcmk_rc_ok) { lrmd_tls_connection_destroy(lrmd); return rc; } pcmk__tls_add_psk_key(native->tls, &psk_key); gnutls_free(psk_key.data); } native->remote->tls_session = pcmk__new_tls_session(native->tls, native->sock); if (native->remote->tls_session == NULL) { lrmd_tls_connection_destroy(lrmd); return EPROTO; } if (tls_client_handshake(lrmd) != pcmk_rc_ok) { return EKEYREJECTED; } crm_info("Client TLS connection established with Pacemaker Remote server %s:%d", native->server, native->port); if (fd) { *fd = native->sock; } else { rc = add_tls_to_mainloop(lrmd, false); } return rc; } static int lrmd_api_connect(lrmd_t * lrmd, const char *name, int *fd) { int rc = -ENOTCONN; lrmd_private_t *native = lrmd->lrmd_private; switch (native->type) { case pcmk__client_ipc: rc = lrmd_ipc_connect(lrmd, fd); break; case pcmk__client_tls: rc = lrmd_tls_connect(lrmd, fd); rc = pcmk_rc2legacy(rc); break; default: crm_err("Unsupported executor connection type (bug?): %d", native->type); rc = -EPROTONOSUPPORT; } if (rc == pcmk_ok) { rc = lrmd_handshake(lrmd, name); rc = pcmk_rc2legacy(rc); } return rc; } static int lrmd_api_connect_async(lrmd_t * lrmd, const char *name, int timeout) { int rc = pcmk_ok; lrmd_private_t *native = lrmd->lrmd_private; CRM_CHECK(native && native->callback, return -EINVAL); switch (native->type) { case pcmk__client_ipc: /* fake async connection with ipc. it should be fast * enough that we gain very little from async */ rc = lrmd_api_connect(lrmd, name, NULL); if (!rc) { report_async_connection_result(lrmd, rc); } break; case pcmk__client_tls: rc = lrmd_tls_connect_async(lrmd, timeout); rc = pcmk_rc2legacy(rc); break; default: crm_err("Unsupported executor connection type (bug?): %d", native->type); rc = -EPROTONOSUPPORT; } return rc; } static void lrmd_ipc_disconnect(lrmd_t * lrmd) { lrmd_private_t *native = lrmd->lrmd_private; if (native->source != NULL) { /* Attached to mainloop */ mainloop_del_ipc_client(native->source); native->source = NULL; native->ipc = NULL; } else if (native->ipc) { /* Not attached to mainloop */ crm_ipc_t *ipc = native->ipc; native->ipc = NULL; crm_ipc_close(ipc); crm_ipc_destroy(ipc); } } static void lrmd_tls_disconnect(lrmd_t * lrmd) { lrmd_private_t *native = lrmd->lrmd_private; if (native->remote->tls_session) { gnutls_bye(native->remote->tls_session, GNUTLS_SHUT_RDWR); gnutls_deinit(native->remote->tls_session); native->remote->tls_session = NULL; } if (native->async_timer) { g_source_remove(native->async_timer); native->async_timer = 0; } if (native->source != NULL) { /* Attached to mainloop */ mainloop_del_ipc_client(native->source); native->source = NULL; } else if (native->sock >= 0) { close(native->sock); native->sock = -1; } if (native->pending_notify) { g_list_free_full(native->pending_notify, lrmd_free_xml); native->pending_notify = NULL; } } static int lrmd_api_disconnect(lrmd_t * lrmd) { lrmd_private_t *native = lrmd->lrmd_private; int rc = pcmk_ok; switch (native->type) { case pcmk__client_ipc: crm_debug("Disconnecting from local executor"); lrmd_ipc_disconnect(lrmd); break; case pcmk__client_tls: crm_debug("Disconnecting from remote executor on %s", native->remote_nodename); lrmd_tls_disconnect(lrmd); break; default: crm_err("Unsupported executor connection type (bug?): %d", native->type); rc = -EPROTONOSUPPORT; } free(native->token); native->token = NULL; free(native->peer_version); native->peer_version = NULL; return rc; } static int lrmd_api_register_rsc(lrmd_t * lrmd, const char *rsc_id, const char *class, const char *provider, const char *type, enum lrmd_call_options options) { int rc = pcmk_ok; xmlNode *data = NULL; if (!class || !type || !rsc_id) { return -EINVAL; } if (pcmk_is_set(pcmk_get_ra_caps(class), pcmk_ra_cap_provider) && (provider == NULL)) { return -EINVAL; } data = pcmk__xe_create(NULL, PCMK__XE_LRMD_RSC); crm_xml_add(data, PCMK__XA_LRMD_ORIGIN, __func__); crm_xml_add(data, PCMK__XA_LRMD_RSC_ID, rsc_id); crm_xml_add(data, PCMK__XA_LRMD_CLASS, class); crm_xml_add(data, PCMK__XA_LRMD_PROVIDER, provider); crm_xml_add(data, PCMK__XA_LRMD_TYPE, type); rc = lrmd_send_command(lrmd, LRMD_OP_RSC_REG, data, NULL, 0, options, true); pcmk__xml_free(data); return rc; } static int lrmd_api_unregister_rsc(lrmd_t * lrmd, const char *rsc_id, enum lrmd_call_options options) { int rc = pcmk_ok; xmlNode *data = pcmk__xe_create(NULL, PCMK__XE_LRMD_RSC); crm_xml_add(data, PCMK__XA_LRMD_ORIGIN, __func__); crm_xml_add(data, PCMK__XA_LRMD_RSC_ID, rsc_id); rc = lrmd_send_command(lrmd, LRMD_OP_RSC_UNREG, data, NULL, 0, options, true); pcmk__xml_free(data); return rc; } lrmd_rsc_info_t * lrmd_new_rsc_info(const char *rsc_id, const char *standard, const char *provider, const char *type) { lrmd_rsc_info_t *rsc_info = pcmk__assert_alloc(1, sizeof(lrmd_rsc_info_t)); rsc_info->id = pcmk__str_copy(rsc_id); rsc_info->standard = pcmk__str_copy(standard); rsc_info->provider = pcmk__str_copy(provider); rsc_info->type = pcmk__str_copy(type); return rsc_info; } lrmd_rsc_info_t * lrmd_copy_rsc_info(lrmd_rsc_info_t * rsc_info) { return lrmd_new_rsc_info(rsc_info->id, rsc_info->standard, rsc_info->provider, rsc_info->type); } void lrmd_free_rsc_info(lrmd_rsc_info_t * rsc_info) { if (!rsc_info) { return; } free(rsc_info->id); free(rsc_info->type); free(rsc_info->standard); free(rsc_info->provider); free(rsc_info); } static lrmd_rsc_info_t * lrmd_api_get_rsc_info(lrmd_t * lrmd, const char *rsc_id, enum lrmd_call_options options) { lrmd_rsc_info_t *rsc_info = NULL; xmlNode *data = pcmk__xe_create(NULL, PCMK__XE_LRMD_RSC); xmlNode *output = NULL; const char *class = NULL; const char *provider = NULL; const char *type = NULL; crm_xml_add(data, PCMK__XA_LRMD_ORIGIN, __func__); crm_xml_add(data, PCMK__XA_LRMD_RSC_ID, rsc_id); lrmd_send_command(lrmd, LRMD_OP_RSC_INFO, data, &output, 0, options, true); pcmk__xml_free(data); if (!output) { return NULL; } class = pcmk__xe_get(output, PCMK__XA_LRMD_CLASS); provider = pcmk__xe_get(output, PCMK__XA_LRMD_PROVIDER); type = pcmk__xe_get(output, PCMK__XA_LRMD_TYPE); if (!class || !type) { pcmk__xml_free(output); return NULL; } else if (pcmk_is_set(pcmk_get_ra_caps(class), pcmk_ra_cap_provider) && !provider) { pcmk__xml_free(output); return NULL; } rsc_info = lrmd_new_rsc_info(rsc_id, class, provider, type); pcmk__xml_free(output); return rsc_info; } void lrmd_free_op_info(lrmd_op_info_t *op_info) { if (op_info) { free(op_info->rsc_id); free(op_info->action); free(op_info->interval_ms_s); free(op_info->timeout_ms_s); free(op_info); } } static int lrmd_api_get_recurring_ops(lrmd_t *lrmd, const char *rsc_id, int timeout_ms, enum lrmd_call_options options, GList **output) { xmlNode *data = NULL; xmlNode *output_xml = NULL; int rc = pcmk_ok; if (output == NULL) { return -EINVAL; } *output = NULL; // Send request if (rsc_id) { data = pcmk__xe_create(NULL, PCMK__XE_LRMD_RSC); crm_xml_add(data, PCMK__XA_LRMD_ORIGIN, __func__); crm_xml_add(data, PCMK__XA_LRMD_RSC_ID, rsc_id); } rc = lrmd_send_command(lrmd, LRMD_OP_GET_RECURRING, data, &output_xml, timeout_ms, options, true); if (data) { pcmk__xml_free(data); } // Process reply if ((rc != pcmk_ok) || (output_xml == NULL)) { return rc; } for (const xmlNode *rsc_xml = pcmk__xe_first_child(output_xml, PCMK__XE_LRMD_RSC, NULL, NULL); (rsc_xml != NULL) && (rc == pcmk_ok); rsc_xml = pcmk__xe_next(rsc_xml, PCMK__XE_LRMD_RSC)) { rsc_id = pcmk__xe_get(rsc_xml, PCMK__XA_LRMD_RSC_ID); if (rsc_id == NULL) { crm_err("Could not parse recurring operation information from executor"); continue; } for (const xmlNode *op_xml = pcmk__xe_first_child(rsc_xml, PCMK__XE_LRMD_RSC_OP, NULL, NULL); op_xml != NULL; op_xml = pcmk__xe_next(op_xml, PCMK__XE_LRMD_RSC_OP)) { lrmd_op_info_t *op_info = calloc(1, sizeof(lrmd_op_info_t)); if (op_info == NULL) { rc = -ENOMEM; break; } op_info->rsc_id = strdup(rsc_id); op_info->action = pcmk__xe_get_copy(op_xml, PCMK__XA_LRMD_RSC_ACTION); op_info->interval_ms_s = pcmk__xe_get_copy(op_xml, PCMK__XA_LRMD_RSC_INTERVAL); op_info->timeout_ms_s = pcmk__xe_get_copy(op_xml, PCMK__XA_LRMD_TIMEOUT); *output = g_list_prepend(*output, op_info); } } pcmk__xml_free(output_xml); return rc; } static void lrmd_api_set_callback(lrmd_t * lrmd, lrmd_event_callback callback) { lrmd_private_t *native = lrmd->lrmd_private; native->callback = callback; } void lrmd_internal_set_proxy_callback(lrmd_t * lrmd, void *userdata, void (*callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg)) { lrmd_private_t *native = lrmd->lrmd_private; native->proxy_callback = callback; native->proxy_callback_userdata = userdata; } void lrmd_internal_proxy_dispatch(lrmd_t *lrmd, xmlNode *msg) { lrmd_private_t *native = lrmd->lrmd_private; if (native->proxy_callback) { crm_log_xml_trace(msg, "PROXY_INBOUND"); native->proxy_callback(lrmd, native->proxy_callback_userdata, msg); } } int lrmd_internal_proxy_send(lrmd_t * lrmd, xmlNode *msg) { if (lrmd == NULL) { return -ENOTCONN; } crm_xml_add(msg, PCMK__XA_LRMD_OP, CRM_OP_IPC_FWD); crm_log_xml_trace(msg, "PROXY_OUTBOUND"); return lrmd_send_xml_no_reply(lrmd, msg); } static int stonith_get_metadata(const char *provider, const char *type, char **output) { int rc = pcmk_ok; stonith_t *stonith_api = stonith_api_new(); if (stonith_api == NULL) { crm_err("Could not get fence agent meta-data: API memory allocation failed"); return -ENOMEM; } rc = stonith_api->cmds->metadata(stonith_api, st_opt_sync_call, type, provider, output, 0); if ((rc == pcmk_ok) && (*output == NULL)) { rc = -EIO; } stonith_api->cmds->free(stonith_api); return rc; } static int lrmd_api_get_metadata(lrmd_t *lrmd, const char *standard, const char *provider, const char *type, char **output, enum lrmd_call_options options) { return lrmd->cmds->get_metadata_params(lrmd, standard, provider, type, output, options, NULL); } static int lrmd_api_get_metadata_params(lrmd_t *lrmd, const char *standard, const char *provider, const char *type, char **output, enum lrmd_call_options options, lrmd_key_value_t *params) { svc_action_t *action = NULL; GHashTable *params_table = NULL; if (!standard || !type) { lrmd_key_value_freeall(params); return -EINVAL; } if (pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { lrmd_key_value_freeall(params); return stonith_get_metadata(provider, type, output); } params_table = pcmk__strkey_table(free, free); for (const lrmd_key_value_t *param = params; param; param = param->next) { pcmk__insert_dup(params_table, param->key, param->value); } action = services__create_resource_action(type, standard, provider, type, PCMK_ACTION_META_DATA, 0, PCMK_DEFAULT_ACTION_TIMEOUT_MS, params_table, 0); lrmd_key_value_freeall(params); if (action == NULL) { return -ENOMEM; } if (action->rc != PCMK_OCF_UNKNOWN) { services_action_free(action); return -EINVAL; } if (!services_action_sync(action)) { crm_err("Failed to retrieve meta-data for %s:%s:%s", standard, provider, type); services_action_free(action); return -EIO; } if (!action->stdout_data) { crm_err("Failed to receive meta-data for %s:%s:%s", standard, provider, type); services_action_free(action); return -EIO; } *output = strdup(action->stdout_data); services_action_free(action); return pcmk_ok; } static int lrmd_api_exec(lrmd_t *lrmd, const char *rsc_id, const char *action, const char *userdata, guint interval_ms, int timeout, /* ms */ int start_delay, /* ms */ enum lrmd_call_options options, lrmd_key_value_t * params) { int rc = pcmk_ok; xmlNode *data = pcmk__xe_create(NULL, PCMK__XE_LRMD_RSC); xmlNode *args = pcmk__xe_create(data, PCMK__XE_ATTRIBUTES); lrmd_key_value_t *tmp = NULL; crm_xml_add(data, PCMK__XA_LRMD_ORIGIN, __func__); crm_xml_add(data, PCMK__XA_LRMD_RSC_ID, rsc_id); crm_xml_add(data, PCMK__XA_LRMD_RSC_ACTION, action); crm_xml_add(data, PCMK__XA_LRMD_RSC_USERDATA_STR, userdata); pcmk__xe_set_guint(data, PCMK__XA_LRMD_RSC_INTERVAL, interval_ms); - crm_xml_add_int(data, PCMK__XA_LRMD_TIMEOUT, timeout); - crm_xml_add_int(data, PCMK__XA_LRMD_RSC_START_DELAY, start_delay); + pcmk__xe_set_int(data, PCMK__XA_LRMD_TIMEOUT, timeout); + pcmk__xe_set_int(data, PCMK__XA_LRMD_RSC_START_DELAY, start_delay); for (tmp = params; tmp; tmp = tmp->next) { hash2smartfield((gpointer) tmp->key, (gpointer) tmp->value, args); } rc = lrmd_send_command(lrmd, LRMD_OP_RSC_EXEC, data, NULL, timeout, options, true); pcmk__xml_free(data); lrmd_key_value_freeall(params); return rc; } /* timeout is in ms */ static int lrmd_api_exec_alert(lrmd_t *lrmd, const char *alert_id, const char *alert_path, int timeout, lrmd_key_value_t *params) { int rc = pcmk_ok; xmlNode *data = pcmk__xe_create(NULL, PCMK__XE_LRMD_ALERT); xmlNode *args = pcmk__xe_create(data, PCMK__XE_ATTRIBUTES); lrmd_key_value_t *tmp = NULL; crm_xml_add(data, PCMK__XA_LRMD_ORIGIN, __func__); crm_xml_add(data, PCMK__XA_LRMD_ALERT_ID, alert_id); crm_xml_add(data, PCMK__XA_LRMD_ALERT_PATH, alert_path); - crm_xml_add_int(data, PCMK__XA_LRMD_TIMEOUT, timeout); + pcmk__xe_set_int(data, PCMK__XA_LRMD_TIMEOUT, timeout); for (tmp = params; tmp; tmp = tmp->next) { hash2smartfield((gpointer) tmp->key, (gpointer) tmp->value, args); } rc = lrmd_send_command(lrmd, LRMD_OP_ALERT_EXEC, data, NULL, timeout, lrmd_opt_notify_orig_only, true); pcmk__xml_free(data); lrmd_key_value_freeall(params); return rc; } static int lrmd_api_cancel(lrmd_t *lrmd, const char *rsc_id, const char *action, guint interval_ms) { int rc = pcmk_ok; xmlNode *data = pcmk__xe_create(NULL, PCMK__XE_LRMD_RSC); crm_xml_add(data, PCMK__XA_LRMD_ORIGIN, __func__); crm_xml_add(data, PCMK__XA_LRMD_RSC_ACTION, action); crm_xml_add(data, PCMK__XA_LRMD_RSC_ID, rsc_id); pcmk__xe_set_guint(data, PCMK__XA_LRMD_RSC_INTERVAL, interval_ms); rc = lrmd_send_command(lrmd, LRMD_OP_RSC_CANCEL, data, NULL, 0, 0, true); pcmk__xml_free(data); return rc; } static int list_stonith_agents(lrmd_list_t ** resources) { int rc = 0; stonith_t *stonith_api = stonith_api_new(); stonith_key_value_t *stonith_resources = NULL; stonith_key_value_t *dIter = NULL; if (stonith_api == NULL) { crm_err("Could not list fence agents: API memory allocation failed"); return -ENOMEM; } stonith_api->cmds->list_agents(stonith_api, st_opt_sync_call, NULL, &stonith_resources, 0); stonith_api->cmds->free(stonith_api); for (dIter = stonith_resources; dIter; dIter = dIter->next) { rc++; if (resources) { *resources = lrmd_list_add(*resources, dIter->value); } } stonith_key_value_freeall(stonith_resources, 1, 0); return rc; } static int lrmd_api_list_agents(lrmd_t * lrmd, lrmd_list_t ** resources, const char *class, const char *provider) { int rc = 0; int stonith_count = 0; // Initially, whether to include stonith devices if (pcmk__str_eq(class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { stonith_count = 1; } else { GList *gIter = NULL; GList *agents = resources_list_agents(class, provider); for (gIter = agents; gIter != NULL; gIter = gIter->next) { *resources = lrmd_list_add(*resources, (const char *)gIter->data); rc++; } g_list_free_full(agents, free); if (!class) { stonith_count = 1; } } if (stonith_count) { // Now, if stonith devices are included, how many there are stonith_count = list_stonith_agents(resources); if (stonith_count > 0) { rc += stonith_count; } } if (rc == 0) { crm_notice("No agents found for class %s", class); rc = -EPROTONOSUPPORT; } return rc; } static bool does_provider_have_agent(const char *agent, const char *provider, const char *class) { bool found = false; GList *agents = NULL; GList *gIter2 = NULL; agents = resources_list_agents(class, provider); for (gIter2 = agents; gIter2 != NULL; gIter2 = gIter2->next) { if (pcmk__str_eq(agent, gIter2->data, pcmk__str_casei)) { found = true; } } g_list_free_full(agents, free); return found; } static int lrmd_api_list_ocf_providers(lrmd_t * lrmd, const char *agent, lrmd_list_t ** providers) { int rc = pcmk_ok; char *provider = NULL; GList *ocf_providers = NULL; GList *gIter = NULL; ocf_providers = resources_list_providers(PCMK_RESOURCE_CLASS_OCF); for (gIter = ocf_providers; gIter != NULL; gIter = gIter->next) { provider = gIter->data; if (!agent || does_provider_have_agent(agent, provider, PCMK_RESOURCE_CLASS_OCF)) { *providers = lrmd_list_add(*providers, (const char *)gIter->data); rc++; } } g_list_free_full(ocf_providers, free); return rc; } static int lrmd_api_list_standards(lrmd_t * lrmd, lrmd_list_t ** supported) { int rc = 0; GList *standards = NULL; GList *gIter = NULL; standards = resources_list_standards(); for (gIter = standards; gIter != NULL; gIter = gIter->next) { *supported = lrmd_list_add(*supported, (const char *)gIter->data); rc++; } if (list_stonith_agents(NULL) > 0) { *supported = lrmd_list_add(*supported, PCMK_RESOURCE_CLASS_STONITH); rc++; } g_list_free_full(standards, free); return rc; } /*! * \internal * \brief Create an executor API object * * \param[out] api Will be set to newly created API object (it is the * caller's responsibility to free this value with * lrmd_api_delete() if this function succeeds) * \param[in] nodename If the object will be used for a remote connection, * the node name to use in cluster for remote executor * \param[in] server If the object will be used for a remote connection, * the resolvable host name to connect to * \param[in] port If the object will be used for a remote connection, * port number on \p server to connect to * * \return Standard Pacemaker return code * \note If the caller leaves one of \p nodename or \p server NULL, the other's * value will be used for both. If the caller leaves both NULL, an API * object will be created for a local executor connection. */ int lrmd__new(lrmd_t **api, const char *nodename, const char *server, int port) { lrmd_private_t *pvt = NULL; if (api == NULL) { return EINVAL; } *api = NULL; // Allocate all memory needed *api = calloc(1, sizeof(lrmd_t)); if (*api == NULL) { return ENOMEM; } pvt = calloc(1, sizeof(lrmd_private_t)); if (pvt == NULL) { lrmd_api_delete(*api); *api = NULL; return ENOMEM; } (*api)->lrmd_private = pvt; // @TODO Do we need to do this for local connections? pvt->remote = calloc(1, sizeof(pcmk__remote_t)); (*api)->cmds = calloc(1, sizeof(lrmd_api_operations_t)); if ((pvt->remote == NULL) || ((*api)->cmds == NULL)) { lrmd_api_delete(*api); *api = NULL; return ENOMEM; } // Set methods (*api)->cmds->connect = lrmd_api_connect; (*api)->cmds->connect_async = lrmd_api_connect_async; (*api)->cmds->is_connected = lrmd_api_is_connected; (*api)->cmds->poke_connection = lrmd_api_poke_connection; (*api)->cmds->disconnect = lrmd_api_disconnect; (*api)->cmds->register_rsc = lrmd_api_register_rsc; (*api)->cmds->unregister_rsc = lrmd_api_unregister_rsc; (*api)->cmds->get_rsc_info = lrmd_api_get_rsc_info; (*api)->cmds->get_recurring_ops = lrmd_api_get_recurring_ops; (*api)->cmds->set_callback = lrmd_api_set_callback; (*api)->cmds->get_metadata = lrmd_api_get_metadata; (*api)->cmds->exec = lrmd_api_exec; (*api)->cmds->cancel = lrmd_api_cancel; (*api)->cmds->list_agents = lrmd_api_list_agents; (*api)->cmds->list_ocf_providers = lrmd_api_list_ocf_providers; (*api)->cmds->list_standards = lrmd_api_list_standards; (*api)->cmds->exec_alert = lrmd_api_exec_alert; (*api)->cmds->get_metadata_params = lrmd_api_get_metadata_params; if ((nodename == NULL) && (server == NULL)) { pvt->type = pcmk__client_ipc; } else { if (nodename == NULL) { nodename = server; } else if (server == NULL) { server = nodename; } pvt->type = pcmk__client_tls; pvt->remote_nodename = strdup(nodename); pvt->server = strdup(server); if ((pvt->remote_nodename == NULL) || (pvt->server == NULL)) { lrmd_api_delete(*api); *api = NULL; return ENOMEM; } pvt->port = port; if (pvt->port == 0) { pvt->port = crm_default_remote_port(); } } return pcmk_rc_ok; } lrmd_t * lrmd_api_new(void) { lrmd_t *api = NULL; pcmk__assert(lrmd__new(&api, NULL, NULL, 0) == pcmk_rc_ok); return api; } lrmd_t * lrmd_remote_api_new(const char *nodename, const char *server, int port) { lrmd_t *api = NULL; pcmk__assert(lrmd__new(&api, nodename, server, port) == pcmk_rc_ok); return api; } void lrmd_api_delete(lrmd_t * lrmd) { if (lrmd == NULL) { return; } if (lrmd->cmds != NULL) { // Never NULL, but make static analysis happy if (lrmd->cmds->disconnect != NULL) { // Also never really NULL lrmd->cmds->disconnect(lrmd); // No-op if already disconnected } free(lrmd->cmds); } if (lrmd->lrmd_private != NULL) { lrmd_private_t *native = lrmd->lrmd_private; free(native->server); free(native->remote_nodename); free(native->remote); free(native->token); free(native->peer_version); free(lrmd->lrmd_private); } free(lrmd); } struct metadata_cb { void (*callback)(int pid, const pcmk__action_result_t *result, void *user_data); void *user_data; }; /*! * \internal * \brief Process asynchronous metadata completion * * \param[in,out] action Metadata action that completed */ static void metadata_complete(svc_action_t *action) { struct metadata_cb *metadata_cb = (struct metadata_cb *) action->cb_data; pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; services__copy_result(action, &result); pcmk__set_result_output(&result, action->stdout_data, action->stderr_data); metadata_cb->callback(0, &result, metadata_cb->user_data); result.action_stdout = NULL; // Prevent free, because action owns it result.action_stderr = NULL; // Prevent free, because action owns it pcmk__reset_result(&result); free(metadata_cb); } /*! * \internal * \brief Retrieve agent metadata asynchronously * * \param[in] rsc Resource agent specification * \param[in] callback Function to call with result (this will always be * called, whether by this function directly or later * via the main loop, and on success the metadata will * be in its result argument's action_stdout) * \param[in,out] user_data User data to pass to callback * * \return Standard Pacemaker return code * \note This function is not a lrmd_api_operations_t method because it does not * need an lrmd_t object and does not go through the executor, but * executes the agent directly. */ int lrmd__metadata_async(const lrmd_rsc_info_t *rsc, void (*callback)(int pid, const pcmk__action_result_t *result, void *user_data), void *user_data) { svc_action_t *action = NULL; struct metadata_cb *metadata_cb = NULL; pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; CRM_CHECK(callback != NULL, return EINVAL); if ((rsc == NULL) || (rsc->standard == NULL) || (rsc->type == NULL)) { pcmk__set_result(&result, PCMK_OCF_NOT_CONFIGURED, PCMK_EXEC_ERROR_FATAL, "Invalid resource specification"); callback(0, &result, user_data); pcmk__reset_result(&result); return EINVAL; } if (strcmp(rsc->standard, PCMK_RESOURCE_CLASS_STONITH) == 0) { return stonith__metadata_async(rsc->type, pcmk__timeout_ms2s(PCMK_DEFAULT_ACTION_TIMEOUT_MS), callback, user_data); } action = services__create_resource_action(pcmk__s(rsc->id, rsc->type), rsc->standard, rsc->provider, rsc->type, PCMK_ACTION_META_DATA, 0, PCMK_DEFAULT_ACTION_TIMEOUT_MS, NULL, 0); if (action == NULL) { pcmk__set_result(&result, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, "Out of memory"); callback(0, &result, user_data); pcmk__reset_result(&result); return ENOMEM; } if (action->rc != PCMK_OCF_UNKNOWN) { services__copy_result(action, &result); callback(0, &result, user_data); pcmk__reset_result(&result); services_action_free(action); return EINVAL; } action->cb_data = calloc(1, sizeof(struct metadata_cb)); if (action->cb_data == NULL) { services_action_free(action); pcmk__set_result(&result, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, "Out of memory"); callback(0, &result, user_data); pcmk__reset_result(&result); return ENOMEM; } metadata_cb = (struct metadata_cb *) action->cb_data; metadata_cb->callback = callback; metadata_cb->user_data = user_data; if (!services_action_async(action, metadata_complete)) { services_action_free(action); return pcmk_rc_error; // @TODO Derive from action->rc and ->status } // The services library has taken responsibility for action return pcmk_rc_ok; } /*! * \internal * \brief Set the result of an executor event * * \param[in,out] event Executor event to set * \param[in] rc OCF exit status of event * \param[in] op_status Executor status of event * \param[in] exit_reason Human-friendly description of event */ void lrmd__set_result(lrmd_event_data_t *event, enum ocf_exitcode rc, int op_status, const char *exit_reason) { if (event == NULL) { return; } event->rc = rc; event->op_status = op_status; // lrmd_event_data_t has (const char *) members that lrmd_free_event() frees pcmk__str_update((char **) &event->exit_reason, exit_reason); } /*! * \internal * \brief Clear an executor event's exit reason, output, and error output * * \param[in,out] event Executor event to reset */ void lrmd__reset_result(lrmd_event_data_t *event) { if (event == NULL) { return; } free((void *) event->exit_reason); event->exit_reason = NULL; free((void *) event->output); event->output = NULL; } /*! * \internal * \brief Get the uptime of a remote resource connection * * When the cluster connects to a remote resource, part of that resource's * handshake includes the uptime of the remote resource's connection. This * uptime is stored in the lrmd_t object. * * \return The connection's uptime, or -1 if unknown */ time_t lrmd__uptime(lrmd_t *lrmd) { lrmd_private_t *native = lrmd->lrmd_private; if (native->remote == NULL) { return -1; } else { return native->remote->uptime; } } const char * lrmd__node_start_state(lrmd_t *lrmd) { lrmd_private_t *native = lrmd->lrmd_private; if (native->remote == NULL) { return NULL; } else { return native->remote->start_state; } } diff --git a/lib/lrmd/proxy_common.c b/lib/lrmd/proxy_common.c index b73e19a12a..660be40795 100644 --- a/lib/lrmd/proxy_common.c +++ b/lib/lrmd/proxy_common.c @@ -1,334 +1,334 @@ /* * Copyright 2015-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include int lrmd_internal_proxy_send(lrmd_t * lrmd, xmlNode *msg); GHashTable *proxy_table = NULL; static void remote_proxy_notify_destroy(lrmd_t *lrmd, const char *session_id) { /* sending to the remote node that an ipc connection has been destroyed */ xmlNode *msg = pcmk__xe_create(NULL, PCMK__XE_LRMD_IPC_PROXY); crm_xml_add(msg, PCMK__XA_LRMD_IPC_OP, LRMD_IPC_OP_DESTROY); crm_xml_add(msg, PCMK__XA_LRMD_IPC_SESSION, session_id); lrmd_internal_proxy_send(lrmd, msg); pcmk__xml_free(msg); } /*! * \internal * \brief Acknowledge a remote proxy shutdown request * * \param[in,out] lrmd Connection to proxy */ void remote_proxy_ack_shutdown(lrmd_t *lrmd) { xmlNode *msg = pcmk__xe_create(NULL, PCMK__XE_LRMD_IPC_PROXY); crm_xml_add(msg, PCMK__XA_LRMD_IPC_OP, LRMD_IPC_OP_SHUTDOWN_ACK); lrmd_internal_proxy_send(lrmd, msg); pcmk__xml_free(msg); } /*! * \internal * \brief Reject a remote proxy shutdown request * * \param[in,out] lrmd Connection to proxy */ void remote_proxy_nack_shutdown(lrmd_t *lrmd) { xmlNode *msg = pcmk__xe_create(NULL, PCMK__XE_LRMD_IPC_PROXY); crm_xml_add(msg, PCMK__XA_LRMD_IPC_OP, LRMD_IPC_OP_SHUTDOWN_NACK); lrmd_internal_proxy_send(lrmd, msg); pcmk__xml_free(msg); } void remote_proxy_relay_event(remote_proxy_t *proxy, xmlNode *msg) { /* sending to the remote node an event msg. */ xmlNode *event = pcmk__xe_create(NULL, PCMK__XE_LRMD_IPC_PROXY); xmlNode *wrapper = NULL; crm_xml_add(event, PCMK__XA_LRMD_IPC_OP, LRMD_IPC_OP_EVENT); crm_xml_add(event, PCMK__XA_LRMD_IPC_SESSION, proxy->session_id); wrapper = pcmk__xe_create(event, PCMK__XE_LRMD_IPC_MSG); pcmk__xml_copy(wrapper, msg); crm_log_xml_explicit(event, "EventForProxy"); lrmd_internal_proxy_send(proxy->lrm, event); pcmk__xml_free(event); } void remote_proxy_relay_response(remote_proxy_t *proxy, xmlNode *msg, int msg_id) { /* sending to the remote node a response msg. */ xmlNode *response = pcmk__xe_create(NULL, PCMK__XE_LRMD_IPC_PROXY); xmlNode *wrapper = NULL; crm_xml_add(response, PCMK__XA_LRMD_IPC_OP, LRMD_IPC_OP_RESPONSE); crm_xml_add(response, PCMK__XA_LRMD_IPC_SESSION, proxy->session_id); - crm_xml_add_int(response, PCMK__XA_LRMD_IPC_MSG_ID, msg_id); + pcmk__xe_set_int(response, PCMK__XA_LRMD_IPC_MSG_ID, msg_id); wrapper = pcmk__xe_create(response, PCMK__XE_LRMD_IPC_MSG); pcmk__xml_copy(wrapper, msg); lrmd_internal_proxy_send(proxy->lrm, response); pcmk__xml_free(response); } static void remote_proxy_end_session(remote_proxy_t *proxy) { if (proxy == NULL) { return; } crm_trace("ending session ID %s", proxy->session_id); if (proxy->source) { mainloop_del_ipc_client(proxy->source); } } void remote_proxy_free(gpointer data) { remote_proxy_t *proxy = data; crm_trace("freed proxy session ID %s", proxy->session_id); free(proxy->node_name); free(proxy->session_id); free(proxy); } int remote_proxy_dispatch(const char *buffer, ssize_t length, gpointer userdata) { // Async responses from servers to clients via the remote executor xmlNode *xml = NULL; uint32_t flags = 0; remote_proxy_t *proxy = userdata; xml = pcmk__xml_parse(buffer); if (xml == NULL) { crm_warn("Received a NULL msg from IPC service."); return 1; } flags = crm_ipc_buffer_flags(proxy->ipc); if (flags & crm_ipc_proxied_relay_response) { crm_trace("Passing response back to %.8s on %s: %.200s - request id: %d", proxy->session_id, proxy->node_name, buffer, proxy->last_request_id); remote_proxy_relay_response(proxy, xml, proxy->last_request_id); proxy->last_request_id = 0; } else { crm_trace("Passing event back to %.8s on %s: %.200s", proxy->session_id, proxy->node_name, buffer); remote_proxy_relay_event(proxy, xml); } pcmk__xml_free(xml); return 1; } void remote_proxy_disconnected(gpointer userdata) { remote_proxy_t *proxy = userdata; crm_trace("destroying %p", proxy); proxy->source = NULL; proxy->ipc = NULL; if(proxy->lrm) { remote_proxy_notify_destroy(proxy->lrm, proxy->session_id); proxy->lrm = NULL; } g_hash_table_remove(proxy_table, proxy->session_id); } remote_proxy_t * remote_proxy_new(lrmd_t *lrmd, struct ipc_client_callbacks *proxy_callbacks, const char *node_name, const char *session_id, const char *channel) { remote_proxy_t *proxy = NULL; if(channel == NULL) { crm_err("No channel specified to proxy"); remote_proxy_notify_destroy(lrmd, session_id); return NULL; } proxy = pcmk__assert_alloc(1, sizeof(remote_proxy_t)); proxy->node_name = strdup(node_name); proxy->session_id = strdup(session_id); proxy->lrm = lrmd; if ((pcmk__parse_server(crm_system_name) == pcmk_ipc_controld) && (pcmk__parse_server(channel) == pcmk_ipc_controld)) { // The controller doesn't need to connect to itself proxy->is_local = TRUE; } else { proxy->source = mainloop_add_ipc_client(channel, G_PRIORITY_LOW, 0, proxy, proxy_callbacks); proxy->ipc = mainloop_get_ipc_client(proxy->source); if (proxy->source == NULL) { remote_proxy_free(proxy); remote_proxy_notify_destroy(lrmd, session_id); return NULL; } } crm_trace("new remote proxy client established to %s on %s, session id %s", channel, node_name, session_id); g_hash_table_insert(proxy_table, proxy->session_id, proxy); return proxy; } void remote_proxy_cb(lrmd_t *lrmd, const char *node_name, xmlNode *msg) { const char *op = pcmk__xe_get(msg, PCMK__XA_LRMD_IPC_OP); const char *session = pcmk__xe_get(msg, PCMK__XA_LRMD_IPC_SESSION); remote_proxy_t *proxy = g_hash_table_lookup(proxy_table, session); int msg_id = 0; /* sessions are raw ipc connections to IPC, * all we do is proxy requests/responses exactly * like they are given to us at the ipc level. */ CRM_CHECK(op != NULL, return); CRM_CHECK(session != NULL, return); pcmk__xe_get_int(msg, PCMK__XA_LRMD_IPC_MSG_ID, &msg_id); /* This is msg from remote ipc client going to real ipc server */ if (pcmk__str_eq(op, LRMD_IPC_OP_DESTROY, pcmk__str_casei)) { remote_proxy_end_session(proxy); } else if (pcmk__str_eq(op, LRMD_IPC_OP_REQUEST, pcmk__str_casei)) { uint32_t flags = 0U; int rc = pcmk_rc_ok; const char *name = pcmk__xe_get(msg, PCMK__XA_LRMD_IPC_CLIENT); xmlNode *wrapper = pcmk__xe_first_child(msg, PCMK__XE_LRMD_IPC_MSG, NULL, NULL); xmlNode *request = pcmk__xe_first_child(wrapper, NULL, NULL, NULL); CRM_CHECK(request != NULL, return); if (proxy == NULL) { /* proxy connection no longer exists */ remote_proxy_notify_destroy(lrmd, session); return; } // Controller requests MUST be handled by the controller, not us CRM_CHECK(proxy->is_local == FALSE, remote_proxy_end_session(proxy); return); if (!crm_ipc_connected(proxy->ipc)) { remote_proxy_end_session(proxy); return; } proxy->last_request_id = 0; crm_xml_add(request, PCMK_XE_ACL_ROLE, "pacemaker-remote"); rc = pcmk__xe_get_flags(msg, PCMK__XA_LRMD_IPC_MSG_FLAGS, &flags, 0U); if (rc != pcmk_rc_ok) { crm_warn("Couldn't parse controller flags from remote request: %s", pcmk_rc_str(rc)); } pcmk__assert(node_name != NULL); pcmk__update_acl_user(request, PCMK__XA_LRMD_IPC_USER, node_name); if (pcmk_is_set(flags, crm_ipc_proxied)) { const char *type = pcmk__xe_get(request, PCMK__XA_T); int rc = 0; if (pcmk__str_eq(type, PCMK__VALUE_ATTRD, pcmk__str_none) && (pcmk__xe_get(request, PCMK__XA_ATTR_HOST) == NULL) && pcmk__str_any_of(pcmk__xe_get(request, PCMK_XA_TASK), PCMK__ATTRD_CMD_UPDATE, PCMK__ATTRD_CMD_UPDATE_BOTH, PCMK__ATTRD_CMD_UPDATE_DELAY, NULL)) { crm_xml_add(request, PCMK__XA_ATTR_HOST, proxy->node_name); } rc = crm_ipc_send(proxy->ipc, request, flags, 5000, NULL); if(rc < 0) { xmlNode *op_reply = pcmk__xe_create(NULL, PCMK__XE_NACK); crm_err("Could not relay %s request %d from %s to %s for %s: %s (%d)", op, msg_id, proxy->node_name, crm_ipc_name(proxy->ipc), name, pcmk_strerror(rc), rc); /* Send a n'ack so the caller doesn't block */ crm_xml_add(op_reply, PCMK_XA_FUNCTION, __func__); - crm_xml_add_int(op_reply, PCMK__XA_LINE, __LINE__); - crm_xml_add_int(op_reply, PCMK_XA_RC, rc); + pcmk__xe_set_int(op_reply, PCMK__XA_LINE, __LINE__); + pcmk__xe_set_int(op_reply, PCMK_XA_RC, rc); remote_proxy_relay_response(proxy, op_reply, msg_id); pcmk__xml_free(op_reply); } else { crm_trace("Relayed %s request %d from %s to %s for %s", op, msg_id, proxy->node_name, crm_ipc_name(proxy->ipc), name); proxy->last_request_id = msg_id; } } else { int rc = pcmk_ok; xmlNode *op_reply = NULL; // @COMPAT pacemaker_remoted <= 1.1.10 crm_trace("Relaying %s request %d from %s to %s for %s", op, msg_id, proxy->node_name, crm_ipc_name(proxy->ipc), name); rc = crm_ipc_send(proxy->ipc, request, flags, 10000, &op_reply); if(rc < 0) { crm_err("Could not relay %s request %d from %s to %s for %s: %s (%d)", op, msg_id, proxy->node_name, crm_ipc_name(proxy->ipc), name, pcmk_strerror(rc), rc); } else { crm_trace("Relayed %s request %d from %s to %s for %s", op, msg_id, proxy->node_name, crm_ipc_name(proxy->ipc), name); } if(op_reply) { remote_proxy_relay_response(proxy, op_reply, msg_id); pcmk__xml_free(op_reply); } } } else { crm_err("Unknown proxy operation: %s", op); } } diff --git a/lib/pacemaker/pcmk_graph_producer.c b/lib/pacemaker/pcmk_graph_producer.c index 16105172c7..2d4fffee08 100644 --- a/lib/pacemaker/pcmk_graph_producer.c +++ b/lib/pacemaker/pcmk_graph_producer.c @@ -1,1104 +1,1104 @@ /* * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include "libpacemaker_private.h" // Convenience macros for logging action properties #define action_type_str(flags) \ (pcmk_is_set((flags), pcmk__action_pseudo)? "pseudo-action" : "action") #define action_optional_str(flags) \ (pcmk_is_set((flags), pcmk__action_optional)? "optional" : "required") #define action_runnable_str(flags) \ (pcmk_is_set((flags), pcmk__action_runnable)? "runnable" : "unrunnable") #define action_node_str(a) \ (((a)->node == NULL)? "no node" : (a)->node->priv->name) /*! * \internal * \brief Add an XML node tag for a specified ID * * \param[in] id Node UUID to add * \param[in,out] xml Parent XML tag to add to */ static xmlNode* add_node_to_xml_by_id(const char *id, xmlNode *xml) { xmlNode *node_xml; node_xml = pcmk__xe_create(xml, PCMK_XE_NODE); crm_xml_add(node_xml, PCMK_XA_ID, id); return node_xml; } /*! * \internal * \brief Add an XML node tag for a specified node * * \param[in] node Node to add * \param[in,out] xml XML to add node to */ static void add_node_to_xml(const pcmk_node_t *node, void *xml) { add_node_to_xml_by_id(node->priv->id, (xmlNode *) xml); } /*! * \internal * \brief Count (optionally add to XML) nodes needing maintenance state update * * \param[in,out] xml Parent XML tag to add to, if any * \param[in] scheduler Scheduler data * * \return Count of nodes added * \note Only Pacemaker Remote nodes are considered currently */ static int add_maintenance_nodes(xmlNode *xml, const pcmk_scheduler_t *scheduler) { xmlNode *maintenance = NULL; int count = 0; if (xml != NULL) { maintenance = pcmk__xe_create(xml, PCMK__XE_MAINTENANCE); } for (const GList *iter = scheduler->nodes; iter != NULL; iter = iter->next) { const pcmk_node_t *node = iter->data; if (!pcmk__is_pacemaker_remote_node(node)) { continue; } if ((node->details->maintenance && !pcmk_is_set(node->priv->flags, pcmk__node_remote_maint)) || (!node->details->maintenance && pcmk_is_set(node->priv->flags, pcmk__node_remote_maint))) { if (maintenance != NULL) { crm_xml_add(add_node_to_xml_by_id(node->priv->id, maintenance), PCMK__XA_NODE_IN_MAINTENANCE, (node->details->maintenance? "1" : "0")); } count++; } } crm_trace("%s %d nodes in need of maintenance mode update in state", ((maintenance == NULL)? "Counted" : "Added"), count); return count; } /*! * \internal * \brief Add pseudo action with nodes needing maintenance state update * * \param[in,out] scheduler Scheduler data */ static void add_maintenance_update(pcmk_scheduler_t *scheduler) { pcmk_action_t *action = NULL; if (add_maintenance_nodes(NULL, scheduler) != 0) { action = get_pseudo_op(PCMK_ACTION_MAINTENANCE_NODES, scheduler); pcmk__set_action_flags(action, pcmk__action_always_in_graph); } } /*! * \internal * \brief Add XML with nodes that an action is expected to bring down * * If a specified action is expected to bring any nodes down, add an XML block * with their UUIDs. When a node is lost, this allows the controller to * determine whether it was expected. * * \param[in,out] xml Parent XML tag to add to * \param[in] action Action to check for downed nodes */ static void add_downed_nodes(xmlNode *xml, const pcmk_action_t *action) { CRM_CHECK((xml != NULL) && (action != NULL) && (action->node != NULL), return); if (pcmk__str_eq(action->task, PCMK_ACTION_DO_SHUTDOWN, pcmk__str_none)) { /* Shutdown makes the action's node down */ xmlNode *downed = pcmk__xe_create(xml, PCMK__XE_DOWNED); add_node_to_xml_by_id(action->node->priv->id, downed); } else if (pcmk__str_eq(action->task, PCMK_ACTION_STONITH, pcmk__str_none)) { /* Fencing makes the action's node and any hosted guest nodes down */ const char *fence = g_hash_table_lookup(action->meta, PCMK__META_STONITH_ACTION); if (pcmk__is_fencing_action(fence)) { xmlNode *downed = pcmk__xe_create(xml, PCMK__XE_DOWNED); add_node_to_xml_by_id(action->node->priv->id, downed); pe_foreach_guest_node(action->node->priv->scheduler, action->node, add_node_to_xml, downed); } } else if ((action->rsc != NULL) && pcmk_is_set(action->rsc->flags, pcmk__rsc_is_remote_connection) && pcmk__str_eq(action->task, PCMK_ACTION_STOP, pcmk__str_none)) { /* Stopping a remote connection resource makes connected node down, * unless it's part of a migration */ GList *iter; pcmk_action_t *input; bool migrating = false; for (iter = action->actions_before; iter != NULL; iter = iter->next) { input = ((pcmk__related_action_t *) iter->data)->action; if ((input->rsc != NULL) && pcmk__str_eq(action->rsc->id, input->rsc->id, pcmk__str_none) && pcmk__str_eq(input->task, PCMK_ACTION_MIGRATE_FROM, pcmk__str_none)) { migrating = true; break; } } if (!migrating) { xmlNode *downed = pcmk__xe_create(xml, PCMK__XE_DOWNED); add_node_to_xml_by_id(action->rsc->id, downed); } } } /*! * \internal * \brief Create a transition graph operation key for a clone action * * \param[in] action Clone action * \param[in] interval_ms Action interval in milliseconds * * \return Newly allocated string with transition graph operation key */ static char * clone_op_key(const pcmk_action_t *action, guint interval_ms) { if (pcmk__str_eq(action->task, PCMK_ACTION_NOTIFY, pcmk__str_none)) { const char *n_type = g_hash_table_lookup(action->meta, "notify_type"); const char *n_task = g_hash_table_lookup(action->meta, "notify_operation"); return pcmk__notify_key(action->rsc->priv->history_id, n_type, n_task); } return pcmk__op_key(action->rsc->priv->history_id, pcmk__s(action->cancel_task, action->task), interval_ms); } /*! * \internal * \brief Add node details to transition graph action XML * * \param[in] action Scheduled action * \param[in,out] xml Transition graph action XML for \p action */ static void add_node_details(const pcmk_action_t *action, xmlNode *xml) { pcmk_node_t *router_node = pcmk__connection_host_for_action(action); crm_xml_add(xml, PCMK__META_ON_NODE, action->node->priv->name); crm_xml_add(xml, PCMK__META_ON_NODE_UUID, action->node->priv->id); if (router_node != NULL) { crm_xml_add(xml, PCMK__XA_ROUTER_NODE, router_node->priv->name); } } /*! * \internal * \brief Add resource details to transition graph action XML * * \param[in] action Scheduled action * \param[in,out] action_xml Transition graph action XML for \p action */ static void add_resource_details(const pcmk_action_t *action, xmlNode *action_xml) { xmlNode *rsc_xml = NULL; const char *attr_list[] = { PCMK_XA_CLASS, PCMK_XA_PROVIDER, PCMK_XA_TYPE, }; /* If a resource is locked to a node via PCMK_OPT_SHUTDOWN_LOCK, mark its * actions so the controller can preserve the lock when the action * completes. */ if (pcmk__action_locks_rsc_to_node(action)) { pcmk__xe_set_time(action_xml, PCMK_OPT_SHUTDOWN_LOCK, action->rsc->priv->lock_time); } // List affected resource rsc_xml = pcmk__xe_create(action_xml, (const char *) action->rsc->priv->xml->name); if (pcmk_is_set(action->rsc->flags, pcmk__rsc_removed) && (action->rsc->priv->history_id != NULL)) { /* Use the numbered instance name here, because if there is more * than one instance on a node, we need to make sure the command * goes to the right one. * * This is important even for anonymous clones, because the clone's * unique meta-attribute might have just been toggled from on to * off. */ crm_debug("Using orphan clone name %s instead of history ID %s", action->rsc->id, action->rsc->priv->history_id); crm_xml_add(rsc_xml, PCMK_XA_ID, action->rsc->priv->history_id); crm_xml_add(rsc_xml, PCMK__XA_LONG_ID, action->rsc->id); } else if (!pcmk_is_set(action->rsc->flags, pcmk__rsc_unique)) { const char *xml_id = pcmk__xe_id(action->rsc->priv->xml); crm_debug("Using anonymous clone name %s for %s (aka %s)", xml_id, action->rsc->id, action->rsc->priv->history_id); /* ID is what we'd like client to use * LONG_ID is what they might know it as instead * * LONG_ID is only strictly needed /here/ during the * transition period until all nodes in the cluster * are running the new software /and/ have rebooted * once (meaning that they've only ever spoken to a DC * supporting this feature). (@TODO The effect of removing this on * regression tests suggests that it is still needed for unique clones) * * If anyone toggles the unique flag to 'on', the * 'instance free' name will correspond to an orphan * and fall into the clause above instead */ crm_xml_add(rsc_xml, PCMK_XA_ID, xml_id); if ((action->rsc->priv->history_id != NULL) && !pcmk__str_eq(xml_id, action->rsc->priv->history_id, pcmk__str_none)) { crm_xml_add(rsc_xml, PCMK__XA_LONG_ID, action->rsc->priv->history_id); } else { crm_xml_add(rsc_xml, PCMK__XA_LONG_ID, action->rsc->id); } } else { pcmk__assert(action->rsc->priv->history_id == NULL); crm_xml_add(rsc_xml, PCMK_XA_ID, action->rsc->id); } for (int lpc = 0; lpc < PCMK__NELEM(attr_list); lpc++) { crm_xml_add(rsc_xml, attr_list[lpc], g_hash_table_lookup(action->rsc->priv->meta, attr_list[lpc])); } } /*! * \internal * \brief Add action attributes to transition graph action XML * * \param[in,out] action Scheduled action * \param[in,out] action_xml Transition graph action XML for \p action */ static void add_action_attributes(pcmk_action_t *action, xmlNode *action_xml) { xmlNode *args_xml = NULL; pcmk_resource_t *rsc = action->rsc; /* We create free-standing XML to start, so we can sort the attributes * before adding it to action_xml, which keeps the scheduler regression * test graphs comparable. */ args_xml = pcmk__xe_create(action_xml, PCMK__XE_ATTRIBUTES); crm_xml_add(args_xml, PCMK_XA_CRM_FEATURE_SET, CRM_FEATURE_SET); g_hash_table_foreach(action->extra, hash2field, args_xml); if ((rsc != NULL) && (action->node != NULL)) { // Get the resource instance attributes, evaluated properly for node GHashTable *params = pe_rsc_params(rsc, action->node, rsc->priv->scheduler); pcmk__substitute_remote_addr(rsc, params); g_hash_table_foreach(params, hash2smartfield, args_xml); } else if ((rsc != NULL) && (rsc->priv->variant <= pcmk__rsc_variant_primitive)) { GHashTable *params = pe_rsc_params(rsc, NULL, rsc->priv->scheduler); g_hash_table_foreach(params, hash2smartfield, args_xml); } g_hash_table_foreach(action->meta, hash2metafield, args_xml); if (rsc != NULL) { pcmk_resource_t *parent = rsc; while (parent != NULL) { parent->priv->cmds->add_graph_meta(parent, args_xml); parent = parent->priv->parent; } pcmk__add_guest_meta_to_xml(args_xml, action); } pcmk__xe_sort_attrs(args_xml); } /*! * \internal * \brief Create the transition graph XML for a scheduled action * * \param[in,out] parent Parent XML element to add action to * \param[in,out] action Scheduled action * \param[in] skip_details If false, add action details as sub-elements * \param[in] scheduler Scheduler data */ static void create_graph_action(xmlNode *parent, pcmk_action_t *action, bool skip_details, const pcmk_scheduler_t *scheduler) { bool needs_node_info = true; bool needs_maintenance_info = false; xmlNode *action_xml = NULL; if ((action == NULL) || (scheduler == NULL)) { return; } // Create the top-level element based on task if (pcmk__str_eq(action->task, PCMK_ACTION_STONITH, pcmk__str_none)) { /* All fences need node info; guest node fences are pseudo-events */ if (pcmk_is_set(action->flags, pcmk__action_pseudo)) { action_xml = pcmk__xe_create(parent, PCMK__XE_PSEUDO_EVENT); } else { action_xml = pcmk__xe_create(parent, PCMK__XE_CRM_EVENT); } } else if (pcmk__str_any_of(action->task, PCMK_ACTION_DO_SHUTDOWN, PCMK_ACTION_CLEAR_FAILCOUNT, NULL)) { action_xml = pcmk__xe_create(parent, PCMK__XE_CRM_EVENT); } else if (pcmk__str_eq(action->task, PCMK_ACTION_LRM_DELETE, pcmk__str_none)) { // CIB-only clean-up for shutdown locks action_xml = pcmk__xe_create(parent, PCMK__XE_CRM_EVENT); crm_xml_add(action_xml, PCMK__XA_MODE, PCMK__VALUE_CIB); } else if (pcmk_is_set(action->flags, pcmk__action_pseudo)) { if (pcmk__str_eq(action->task, PCMK_ACTION_MAINTENANCE_NODES, pcmk__str_none)) { needs_maintenance_info = true; } action_xml = pcmk__xe_create(parent, PCMK__XE_PSEUDO_EVENT); needs_node_info = false; } else { action_xml = pcmk__xe_create(parent, PCMK__XE_RSC_OP); } - crm_xml_add_int(action_xml, PCMK_XA_ID, action->id); + pcmk__xe_set_int(action_xml, PCMK_XA_ID, action->id); crm_xml_add(action_xml, PCMK_XA_OPERATION, action->task); if ((action->rsc != NULL) && (action->rsc->priv->history_id != NULL)) { char *clone_key = NULL; guint interval_ms; if (pcmk__guint_from_hash(action->meta, PCMK_META_INTERVAL, 0, &interval_ms) != pcmk_rc_ok) { interval_ms = 0; } clone_key = clone_op_key(action, interval_ms); crm_xml_add(action_xml, PCMK__XA_OPERATION_KEY, clone_key); crm_xml_add(action_xml, "internal_" PCMK__XA_OPERATION_KEY, action->uuid); free(clone_key); } else { crm_xml_add(action_xml, PCMK__XA_OPERATION_KEY, action->uuid); } if (needs_node_info && (action->node != NULL)) { add_node_details(action, action_xml); pcmk__insert_dup(action->meta, PCMK__META_ON_NODE, action->node->priv->name); pcmk__insert_dup(action->meta, PCMK__META_ON_NODE_UUID, action->node->priv->id); } if (skip_details) { return; } if ((action->rsc != NULL) && !pcmk_is_set(action->flags, pcmk__action_pseudo)) { // This is a real resource action, so add resource details add_resource_details(action, action_xml); } /* List any attributes in effect */ add_action_attributes(action, action_xml); /* List any nodes this action is expected to make down */ if (needs_node_info && (action->node != NULL)) { add_downed_nodes(action_xml, action); } if (needs_maintenance_info) { add_maintenance_nodes(action_xml, scheduler); } } /*! * \internal * \brief Check whether an action should be added to the transition graph * * \param[in,out] action Action to check * * \return true if action should be added to graph, otherwise false */ static bool should_add_action_to_graph(pcmk_action_t *action) { if (!pcmk_is_set(action->flags, pcmk__action_runnable)) { crm_trace("Ignoring action %s (%d): unrunnable", action->uuid, action->id); return false; } if (pcmk_is_set(action->flags, pcmk__action_optional) && !pcmk_is_set(action->flags, pcmk__action_always_in_graph)) { crm_trace("Ignoring action %s (%d): optional", action->uuid, action->id); return false; } /* Actions for unmanaged resources should be excluded from the graph, * with the exception of monitors and cancellation of recurring monitors. */ if ((action->rsc != NULL) && !pcmk_is_set(action->rsc->flags, pcmk__rsc_managed) && !pcmk__str_eq(action->task, PCMK_ACTION_MONITOR, pcmk__str_none)) { const char *interval_ms_s; /* A cancellation of a recurring monitor will get here because the task * is cancel rather than monitor, but the interval can still be used to * recognize it. The interval has been normalized to milliseconds by * this point, so a string comparison is sufficient. */ interval_ms_s = g_hash_table_lookup(action->meta, PCMK_META_INTERVAL); if (pcmk__str_eq(interval_ms_s, "0", pcmk__str_null_matches)) { crm_trace("Ignoring action %s (%d): for unmanaged resource (%s)", action->uuid, action->id, action->rsc->id); return false; } } /* Always add pseudo-actions, fence actions, and shutdown actions (already * determined to be required and runnable by this point) */ if (pcmk_is_set(action->flags, pcmk__action_pseudo) || pcmk__strcase_any_of(action->task, PCMK_ACTION_STONITH, PCMK_ACTION_DO_SHUTDOWN, NULL)) { return true; } if (action->node == NULL) { pcmk__sched_err(action->scheduler, "Skipping action %s (%d) " "because it was not assigned to a node (bug?)", action->uuid, action->id); pcmk__log_action("Unassigned", action, false); return false; } if (pcmk_is_set(action->flags, pcmk__action_on_dc)) { crm_trace("Action %s (%d) should be dumped: " "can run on DC instead of %s", action->uuid, action->id, pcmk__node_name(action->node)); } else if (pcmk__is_guest_or_bundle_node(action->node) && !pcmk_is_set(action->node->priv->flags, pcmk__node_remote_reset)) { crm_trace("Action %s (%d) should be dumped: " "assuming will be runnable on guest %s", action->uuid, action->id, pcmk__node_name(action->node)); } else if (!action->node->details->online) { pcmk__sched_err(action->scheduler, "Skipping action %s (%d) " "because it was scheduled for offline node (bug?)", action->uuid, action->id); pcmk__log_action("Offline node", action, false); return false; } else if (action->node->details->unclean) { pcmk__sched_err(action->scheduler, "Skipping action %s (%d) " "because it was scheduled for unclean node (bug?)", action->uuid, action->id); pcmk__log_action("Unclean node", action, false); return false; } return true; } /*! * \internal * \brief Check whether an ordering's flags can change an action * * \param[in] ordering Ordering to check * * \return true if ordering has flags that can change an action, false otherwise */ static bool ordering_can_change_actions(const pcmk__related_action_t *ordering) { return pcmk_any_flags_set(ordering->flags, ~(pcmk__ar_then_implies_first_graphed |pcmk__ar_first_implies_then_graphed |pcmk__ar_ordered)); } /*! * \internal * \brief Check whether an action input should be in the transition graph * * \param[in] action Action to check * \param[in,out] input Action input to check * * \return true if input should be in graph, false otherwise * \note This function may not only check an input, but disable it under certian * circumstances (load or anti-colocation orderings that are not needed). */ static bool should_add_input_to_graph(const pcmk_action_t *action, pcmk__related_action_t *input) { if (input->graphed) { return true; } if (input->flags == pcmk__ar_none) { crm_trace("Ignoring %s (%d) input %s (%d): " "ordering disabled", action->uuid, action->id, input->action->uuid, input->action->id); return false; } else if (!pcmk_is_set(input->action->flags, pcmk__action_runnable) && !ordering_can_change_actions(input)) { crm_trace("Ignoring %s (%d) input %s (%d): " "optional and input unrunnable", action->uuid, action->id, input->action->uuid, input->action->id); return false; } else if (!pcmk_is_set(input->action->flags, pcmk__action_runnable) && pcmk_is_set(input->flags, pcmk__ar_min_runnable)) { crm_trace("Ignoring %s (%d) input %s (%d): " "minimum number of instances required but input unrunnable", action->uuid, action->id, input->action->uuid, input->action->id); return false; } else if (pcmk_is_set(input->flags, pcmk__ar_unmigratable_then_blocks) && !pcmk_is_set(input->action->flags, pcmk__action_runnable)) { crm_trace("Ignoring %s (%d) input %s (%d): " "input blocked if 'then' unmigratable", action->uuid, action->id, input->action->uuid, input->action->id); return false; } else if (pcmk_is_set(input->flags, pcmk__ar_if_first_unmigratable) && pcmk_is_set(input->action->flags, pcmk__action_migratable)) { crm_trace("Ignoring %s (%d) input %s (%d): ordering applies " "only if input is unmigratable, but it is migratable", action->uuid, action->id, input->action->uuid, input->action->id); return false; } else if ((input->flags == pcmk__ar_ordered) && pcmk_is_set(input->action->flags, pcmk__action_migratable) && pcmk__ends_with(input->action->uuid, "_stop_0")) { crm_trace("Ignoring %s (%d) input %s (%d): " "optional but stop in migration", action->uuid, action->id, input->action->uuid, input->action->id); return false; } else if (input->flags == pcmk__ar_if_on_same_node_or_target) { pcmk_node_t *input_node = input->action->node; if ((action->rsc != NULL) && pcmk__str_eq(action->task, PCMK_ACTION_MIGRATE_TO, pcmk__str_none)) { pcmk_node_t *assigned = action->rsc->priv->assigned_node; /* For load_stopped -> migrate_to orderings, we care about where * the resource has been assigned, not where migrate_to will be * executed. */ if (!pcmk__same_node(input_node, assigned)) { crm_trace("Ignoring %s (%d) input %s (%d): " "migration target %s is not same as input node %s", action->uuid, action->id, input->action->uuid, input->action->id, (assigned? assigned->priv->name : ""), (input_node? input_node->priv->name : "")); input->flags = pcmk__ar_none; return false; } } else if (!pcmk__same_node(input_node, action->node)) { crm_trace("Ignoring %s (%d) input %s (%d): " "not on same node (%s vs %s)", action->uuid, action->id, input->action->uuid, input->action->id, (action->node? action->node->priv->name : ""), (input_node? input_node->priv->name : "")); input->flags = pcmk__ar_none; return false; } else if (pcmk_is_set(input->action->flags, pcmk__action_optional)) { crm_trace("Ignoring %s (%d) input %s (%d): " "ordering optional", action->uuid, action->id, input->action->uuid, input->action->id); input->flags = pcmk__ar_none; return false; } } else if (input->flags == pcmk__ar_if_required_on_same_node) { if (input->action->node && action->node && !pcmk__same_node(input->action->node, action->node)) { crm_trace("Ignoring %s (%d) input %s (%d): " "not on same node (%s vs %s)", action->uuid, action->id, input->action->uuid, input->action->id, pcmk__node_name(action->node), pcmk__node_name(input->action->node)); input->flags = pcmk__ar_none; return false; } else if (pcmk_is_set(input->action->flags, pcmk__action_optional)) { crm_trace("Ignoring %s (%d) input %s (%d): optional", action->uuid, action->id, input->action->uuid, input->action->id); input->flags = pcmk__ar_none; return false; } } else if (input->action->rsc && input->action->rsc != action->rsc && pcmk_is_set(input->action->rsc->flags, pcmk__rsc_failed) && !pcmk_is_set(input->action->rsc->flags, pcmk__rsc_managed) && pcmk__ends_with(input->action->uuid, "_stop_0") && pcmk__is_clone(action->rsc)) { crm_warn("Ignoring requirement that %s complete before %s:" " unmanaged failed resources cannot prevent clone shutdown", input->action->uuid, action->uuid); return false; } else if (pcmk_is_set(input->action->flags, pcmk__action_optional) && !pcmk_any_flags_set(input->action->flags, pcmk__action_always_in_graph |pcmk__action_added_to_graph) && !should_add_action_to_graph(input->action)) { crm_trace("Ignoring %s (%d) input %s (%d): " "input optional", action->uuid, action->id, input->action->uuid, input->action->id); return false; } crm_trace("%s (%d) input %s %s (%d) on %s should be dumped: %s %s %#.6x", action->uuid, action->id, action_type_str(input->action->flags), input->action->uuid, input->action->id, action_node_str(input->action), action_runnable_str(input->action->flags), action_optional_str(input->action->flags), input->flags); return true; } /*! * \internal * \brief Check whether an ordering creates an ordering loop * * \param[in] init_action "First" action in ordering * \param[in] action Callers should always set this the same as * \p init_action (this function may use a different * value for recursive calls) * \param[in,out] input Action wrapper for "then" action in ordering * * \return true if the ordering creates a loop, otherwise false */ bool pcmk__graph_has_loop(const pcmk_action_t *init_action, const pcmk_action_t *action, pcmk__related_action_t *input) { bool has_loop = false; if (pcmk_is_set(input->action->flags, pcmk__action_detect_loop)) { crm_trace("Breaking tracking loop: %s@%s -> %s@%s (%#.6x)", input->action->uuid, input->action->node? input->action->node->priv->name : "", action->uuid, action->node? action->node->priv->name : "", input->flags); return false; } // Don't need to check inputs that won't be used if (!should_add_input_to_graph(action, input)) { return false; } if (input->action == init_action) { crm_debug("Input loop found in %s@%s ->...-> %s@%s", action->uuid, action->node? action->node->priv->name : "", init_action->uuid, init_action->node? init_action->node->priv->name : ""); return true; } pcmk__set_action_flags(input->action, pcmk__action_detect_loop); crm_trace("Checking inputs of action %s@%s input %s@%s (%#.6x)" "for graph loop with %s@%s ", action->uuid, action->node? action->node->priv->name : "", input->action->uuid, input->action->node? input->action->node->priv->name : "", input->flags, init_action->uuid, init_action->node? init_action->node->priv->name : ""); // Recursively check input itself for loops for (GList *iter = input->action->actions_before; iter != NULL; iter = iter->next) { if (pcmk__graph_has_loop(init_action, input->action, (pcmk__related_action_t *) iter->data)) { // Recursive call already logged a debug message has_loop = true; break; } } pcmk__clear_action_flags(input->action, pcmk__action_detect_loop); if (!has_loop) { crm_trace("No input loop found in %s@%s -> %s@%s (%#.6x)", input->action->uuid, input->action->node? input->action->node->priv->name : "", action->uuid, action->node? action->node->priv->name : "", input->flags); } return has_loop; } /*! * \internal * \brief Create a synapse XML element for a transition graph * * \param[in] action Action that synapse is for * \param[in,out] scheduler Scheduler data containing graph * * \return Newly added XML element for new graph synapse */ static xmlNode * create_graph_synapse(const pcmk_action_t *action, pcmk_scheduler_t *scheduler) { int synapse_priority = 0; xmlNode *syn = pcmk__xe_create(scheduler->priv->graph, PCMK__XE_SYNAPSE); - crm_xml_add_int(syn, PCMK_XA_ID, scheduler->priv->synapse_count++); + pcmk__xe_set_int(syn, PCMK_XA_ID, scheduler->priv->synapse_count++); if (action->rsc != NULL) { synapse_priority = action->rsc->priv->priority; } if (action->priority > synapse_priority) { synapse_priority = action->priority; } if (synapse_priority > 0) { - crm_xml_add_int(syn, PCMK__XA_PRIORITY, synapse_priority); + pcmk__xe_set_int(syn, PCMK__XA_PRIORITY, synapse_priority); } return syn; } /*! * \internal * \brief Add an action to the transition graph XML if appropriate * * \param[in,out] data Action to possibly add * \param[in,out] user_data Scheduler data * * \note This will de-duplicate the action inputs, meaning that the * pcmk__related_action_t:type flags can no longer be relied on to retain * their original settings. That means this MUST be called after * pcmk__apply_orderings() is complete, and nothing after this should rely * on those type flags. (For example, some code looks for type equal to * some flag rather than whether the flag is set, and some code looks for * particular combinations of flags -- such code must be done before * pcmk__create_graph().) */ static void add_action_to_graph(gpointer data, gpointer user_data) { pcmk_action_t *action = (pcmk_action_t *) data; pcmk_scheduler_t *scheduler = (pcmk_scheduler_t *) user_data; xmlNode *syn = NULL; xmlNode *set = NULL; xmlNode *in = NULL; /* If we haven't already, de-duplicate inputs (even if we won't be adding * the action to the graph, so that crm_simulate's dot graphs don't have * duplicates). */ if (!pcmk_is_set(action->flags, pcmk__action_inputs_deduplicated)) { pcmk__deduplicate_action_inputs(action); pcmk__set_action_flags(action, pcmk__action_inputs_deduplicated); } if (pcmk_is_set(action->flags, pcmk__action_added_to_graph) || !should_add_action_to_graph(action)) { return; // Already added, or shouldn't be } pcmk__set_action_flags(action, pcmk__action_added_to_graph); crm_trace("Adding action %d (%s%s%s) to graph", action->id, action->uuid, ((action->node == NULL)? "" : " on "), ((action->node == NULL)? "" : action->node->priv->name)); syn = create_graph_synapse(action, scheduler); set = pcmk__xe_create(syn, PCMK__XE_ACTION_SET); in = pcmk__xe_create(syn, PCMK__XE_INPUTS); create_graph_action(set, action, false, scheduler); for (GList *lpc = action->actions_before; lpc != NULL; lpc = lpc->next) { pcmk__related_action_t *input = lpc->data; if (should_add_input_to_graph(action, input)) { xmlNode *input_xml = pcmk__xe_create(in, PCMK__XE_TRIGGER); input->graphed = true; create_graph_action(input_xml, input->action, true, scheduler); } } } static int transition_id = 0; /*! * \internal * \brief Log a message after calculating a transition * * \param[in] scheduler Scheduler data * \param[in] filename Where transition input is stored */ void pcmk__log_transition_summary(const pcmk_scheduler_t *scheduler, const char *filename) { if (pcmk_is_set(scheduler->flags, pcmk__sched_processing_error) || pcmk__config_has_error) { crm_err("Calculated transition %d (with errors)%s%s", transition_id, (filename == NULL)? "" : ", saving inputs in ", (filename == NULL)? "" : filename); } else if (pcmk_is_set(scheduler->flags, pcmk__sched_processing_warning) || pcmk__config_has_warning) { crm_warn("Calculated transition %d (with warnings)%s%s", transition_id, (filename == NULL)? "" : ", saving inputs in ", (filename == NULL)? "" : filename); } else { crm_notice("Calculated transition %d%s%s", transition_id, (filename == NULL)? "" : ", saving inputs in ", (filename == NULL)? "" : filename); } if (pcmk__config_has_error) { crm_notice("Configuration errors found during scheduler processing," " please run \"crm_verify -L\" to identify issues"); } } /*! * \internal * \brief Add a resource's actions to the transition graph * * \param[in,out] rsc Resource whose actions should be added */ void pcmk__add_rsc_actions_to_graph(pcmk_resource_t *rsc) { GList *iter = NULL; pcmk__assert(rsc != NULL); pcmk__rsc_trace(rsc, "Adding actions for %s to graph", rsc->id); // First add the resource's own actions g_list_foreach(rsc->priv->actions, add_action_to_graph, rsc->priv->scheduler); // Then recursively add its children's actions (appropriate to variant) for (iter = rsc->priv->children; iter != NULL; iter = iter->next) { pcmk_resource_t *child_rsc = (pcmk_resource_t *) iter->data; child_rsc->priv->cmds->add_actions_to_graph(child_rsc); } } /*! * \internal * \brief Create a transition graph with all cluster actions needed * * \param[in,out] scheduler Scheduler data */ void pcmk__create_graph(pcmk_scheduler_t *scheduler) { GList *iter = NULL; const char *value = NULL; long long limit = 0LL; GHashTable *config_hash = scheduler->priv->options; int rc = pcmk_rc_ok; transition_id++; crm_trace("Creating transition graph %d", transition_id); scheduler->priv->graph = pcmk__xe_create(NULL, PCMK__XE_TRANSITION_GRAPH); value = pcmk__cluster_option(config_hash, PCMK_OPT_CLUSTER_DELAY); crm_xml_add(scheduler->priv->graph, PCMK_OPT_CLUSTER_DELAY, value); value = pcmk__cluster_option(config_hash, PCMK_OPT_STONITH_TIMEOUT); crm_xml_add(scheduler->priv->graph, PCMK_OPT_STONITH_TIMEOUT, value); crm_xml_add(scheduler->priv->graph, PCMK__XA_FAILED_STOP_OFFSET, PCMK_VALUE_INFINITY); if (pcmk_is_set(scheduler->flags, pcmk__sched_start_failure_fatal)) { crm_xml_add(scheduler->priv->graph, PCMK__XA_FAILED_START_OFFSET, PCMK_VALUE_INFINITY); } else { crm_xml_add(scheduler->priv->graph, PCMK__XA_FAILED_START_OFFSET, "1"); } value = pcmk__cluster_option(config_hash, PCMK_OPT_BATCH_LIMIT); crm_xml_add(scheduler->priv->graph, PCMK_OPT_BATCH_LIMIT, value); - crm_xml_add_int(scheduler->priv->graph, "transition_id", transition_id); + pcmk__xe_set_int(scheduler->priv->graph, "transition_id", transition_id); value = pcmk__cluster_option(config_hash, PCMK_OPT_MIGRATION_LIMIT); rc = pcmk__scan_ll(value, &limit, 0LL); if (rc != pcmk_rc_ok) { crm_warn("Ignoring invalid value '%s' for " PCMK_OPT_MIGRATION_LIMIT ": %s", value, pcmk_rc_str(rc)); } else if (limit > 0) { crm_xml_add(scheduler->priv->graph, PCMK_OPT_MIGRATION_LIMIT, value); } if (scheduler->priv->recheck_by > 0) { pcmk__xe_set_time(scheduler->priv->graph, "recheck-by", scheduler->priv->recheck_by); } /* The following code will de-duplicate action inputs, so nothing past this * should rely on the action input type flags retaining their original * values. */ // Add resource actions to graph for (iter = scheduler->priv->resources; iter != NULL; iter = iter->next) { pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data; pcmk__rsc_trace(rsc, "Processing actions for %s", rsc->id); rsc->priv->cmds->add_actions_to_graph(rsc); } // Add pseudo-action for list of nodes with maintenance state update add_maintenance_update(scheduler); // Add non-resource (node) actions for (iter = scheduler->priv->actions; iter != NULL; iter = iter->next) { pcmk_action_t *action = (pcmk_action_t *) iter->data; if ((action->rsc != NULL) && (action->node != NULL) && action->node->details->shutdown && !pcmk_is_set(action->rsc->flags, pcmk__rsc_maintenance) && !pcmk_any_flags_set(action->flags, pcmk__action_optional|pcmk__action_runnable) && pcmk__str_eq(action->task, PCMK_ACTION_STOP, pcmk__str_none)) { /* Eventually we should just ignore the 'fence' case, but for now * it's the best way to detect (in CTS) when CIB resource updates * are being lost. */ if (pcmk_is_set(scheduler->flags, pcmk__sched_quorate) || (scheduler->no_quorum_policy == pcmk_no_quorum_ignore)) { const bool managed = pcmk_is_set(action->rsc->flags, pcmk__rsc_managed); const bool failed = pcmk_is_set(action->rsc->flags, pcmk__rsc_failed); crm_crit("Cannot %s %s because of %s:%s%s (%s)", action->node->details->unclean? "fence" : "shut down", pcmk__node_name(action->node), action->rsc->id, (managed? " blocked" : " unmanaged"), (failed? " failed" : ""), action->uuid); } } add_action_to_graph((gpointer) action, (gpointer) scheduler); } crm_log_xml_trace(scheduler->priv->graph, "graph"); } diff --git a/lib/pacemaker/pcmk_sched_actions.c b/lib/pacemaker/pcmk_sched_actions.c index 01896567e7..419ff759c9 100644 --- a/lib/pacemaker/pcmk_sched_actions.c +++ b/lib/pacemaker/pcmk_sched_actions.c @@ -1,1955 +1,1955 @@ /* * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include // bool, true, false #include #include #include #include // xmlNode #include #include #include #include "libpacemaker_private.h" /*! * \internal * \brief Get the action flags relevant to ordering constraints * * \param[in,out] action Action to check * \param[in] node Node that *other* action in the ordering is on * (used only for clone resource actions) * * \return Action flags that should be used for orderings */ static uint32_t action_flags_for_ordering(pcmk_action_t *action, const pcmk_node_t *node) { bool runnable = false; uint32_t flags; // For non-resource actions, return the action flags if (action->rsc == NULL) { return action->flags; } /* For non-clone resources, or a clone action not assigned to a node, * return the flags as determined by the resource method without a node * specified. */ flags = action->rsc->priv->cmds->action_flags(action, NULL); if ((node == NULL) || !pcmk__is_clone(action->rsc)) { return flags; } /* Otherwise (i.e., for clone resource actions on a specific node), first * remember whether the non-node-specific action is runnable. */ runnable = pcmk_is_set(flags, pcmk__action_runnable); // Then recheck the resource method with the node flags = action->rsc->priv->cmds->action_flags(action, node); /* For clones in ordering constraints, the node-specific "runnable" doesn't * matter, just the non-node-specific setting (i.e., is the action runnable * anywhere). * * This applies only to runnable, and only for ordering constraints. This * function shouldn't be used for other types of constraints without * changes. Not very satisfying, but it's logical and appears to work well. */ if (runnable && !pcmk_is_set(flags, pcmk__action_runnable)) { pcmk__set_raw_action_flags(flags, action->rsc->id, pcmk__action_runnable); } return flags; } /*! * \internal * \brief Get action UUID that should be used with a resource ordering * * When an action is ordered relative to an action for a collective resource * (clone, group, or bundle), it actually needs to be ordered after all * instances of the collective have completed the relevant action (for example, * given "start CLONE then start RSC", RSC must wait until all instances of * CLONE have started). Given the UUID and resource of the first action in an * ordering, this returns the UUID of the action that should actually be used * for ordering (for example, "CLONE_started_0" instead of "CLONE_start_0"). * * \param[in] first_uuid UUID of first action in ordering * \param[in] first_rsc Resource of first action in ordering * * \return Newly allocated copy of UUID to use with ordering * \note It is the caller's responsibility to free the return value. */ static char * action_uuid_for_ordering(const char *first_uuid, const pcmk_resource_t *first_rsc) { guint interval_ms = 0; char *uuid = NULL; char *rid = NULL; char *first_task_str = NULL; enum pcmk__action_type first_task = pcmk__action_unspecified; enum pcmk__action_type remapped_task = pcmk__action_unspecified; // Only non-notify actions for collective resources need remapping if ((strstr(first_uuid, PCMK_ACTION_NOTIFY) != NULL) || (first_rsc->priv->variant < pcmk__rsc_variant_group)) { goto done; } // Only non-recurring actions need remapping pcmk__assert(parse_op_key(first_uuid, &rid, &first_task_str, &interval_ms)); if (interval_ms > 0) { goto done; } first_task = pcmk__parse_action(first_task_str); switch (first_task) { case pcmk__action_stop: case pcmk__action_start: case pcmk__action_notify: case pcmk__action_promote: case pcmk__action_demote: remapped_task = first_task + 1; break; case pcmk__action_stopped: case pcmk__action_started: case pcmk__action_notified: case pcmk__action_promoted: case pcmk__action_demoted: remapped_task = first_task; break; case pcmk__action_monitor: case pcmk__action_shutdown: case pcmk__action_fence: break; default: crm_err("Unknown action '%s' in ordering", first_task_str); break; } if (remapped_task != pcmk__action_unspecified) { /* If a clone or bundle has notifications enabled, the ordering will be * relative to when notifications have been sent for the remapped task. */ if (pcmk_is_set(first_rsc->flags, pcmk__rsc_notify) && (pcmk__is_clone(first_rsc) || pcmk__is_bundled(first_rsc))) { uuid = pcmk__notify_key(rid, "confirmed-post", pcmk__action_text(remapped_task)); } else { uuid = pcmk__op_key(rid, pcmk__action_text(remapped_task), 0); } pcmk__rsc_trace(first_rsc, "Remapped action UUID %s to %s for ordering purposes", first_uuid, uuid); } done: free(first_task_str); free(rid); return (uuid != NULL)? uuid : pcmk__str_copy(first_uuid); } /*! * \internal * \brief Get actual action that should be used with an ordering * * When an action is ordered relative to an action for a collective resource * (clone, group, or bundle), it actually needs to be ordered after all * instances of the collective have completed the relevant action (for example, * given "start CLONE then start RSC", RSC must wait until all instances of * CLONE have started). Given the first action in an ordering, this returns the * the action that should actually be used for ordering (for example, the * started action instead of the start action). * * \param[in] action First action in an ordering * * \return Actual action that should be used for the ordering */ static pcmk_action_t * action_for_ordering(pcmk_action_t *action) { pcmk_action_t *result = action; pcmk_resource_t *rsc = action->rsc; if (rsc == NULL) { return result; } if ((rsc->priv->variant >= pcmk__rsc_variant_group) && (action->uuid != NULL)) { char *uuid = action_uuid_for_ordering(action->uuid, rsc); result = find_first_action(rsc->priv->actions, uuid, NULL, NULL); if (result == NULL) { crm_warn("Not remapping %s to %s because %s does not have " "remapped action", action->uuid, uuid, rsc->id); result = action; } free(uuid); } return result; } /*! * \internal * \brief Wrapper for update_ordered_actions() method for readability * * \param[in,out] rsc Resource to call method for * \param[in,out] first 'First' action in an ordering * \param[in,out] then 'Then' action in an ordering * \param[in] node If not NULL, limit scope of ordering to this * node (only used when interleaving instances) * \param[in] flags Action flags for \p first for ordering purposes * \param[in] filter Action flags to limit scope of certain updates * (may include pcmk__action_optional to affect only * mandatory actions, and pe_action_runnable to * affect only runnable actions) * \param[in] type Group of enum pcmk__action_relation_flags to apply * \param[in,out] scheduler Scheduler data * * \return Group of enum pcmk__updated flags indicating what was updated */ static inline uint32_t update(pcmk_resource_t *rsc, pcmk_action_t *first, pcmk_action_t *then, const pcmk_node_t *node, uint32_t flags, uint32_t filter, uint32_t type, pcmk_scheduler_t *scheduler) { return rsc->priv->cmds->update_ordered_actions(first, then, node, flags, filter, type, scheduler); } /*! * \internal * \brief Update flags for ordering's actions appropriately for ordering's flags * * \param[in,out] first First action in an ordering * \param[in,out] then Then action in an ordering * \param[in] first_flags Action flags for \p first for ordering purposes * \param[in] then_flags Action flags for \p then for ordering purposes * \param[in,out] order Action wrapper for \p first in ordering * \param[in,out] scheduler Scheduler data * * \return Group of enum pcmk__updated flags */ static uint32_t update_action_for_ordering_flags(pcmk_action_t *first, pcmk_action_t *then, uint32_t first_flags, uint32_t then_flags, pcmk__related_action_t *order, pcmk_scheduler_t *scheduler) { uint32_t changed = pcmk__updated_none; /* The node will only be used for clones. If interleaved, node will be NULL, * otherwise the ordering scope will be limited to the node. Normally, the * whole 'then' clone should restart if 'first' is restarted, so then->node * is needed. */ pcmk_node_t *node = then->node; if (pcmk_is_set(order->flags, pcmk__ar_first_implies_same_node_then)) { /* For unfencing, only instances of 'then' on the same node as 'first' * (the unfencing operation) should restart, so reset node to * first->node, at which point this case is handled like a normal * pcmk__ar_first_implies_then. */ pcmk__clear_relation_flags(order->flags, pcmk__ar_first_implies_same_node_then); pcmk__set_relation_flags(order->flags, pcmk__ar_first_implies_then); node = first->node; pcmk__rsc_trace(then->rsc, "%s then %s: mapped " "pcmk__ar_first_implies_same_node_then to " "pcmk__ar_first_implies_then on %s", first->uuid, then->uuid, pcmk__node_name(node)); } if (pcmk_is_set(order->flags, pcmk__ar_first_implies_then)) { if (then->rsc != NULL) { changed |= update(then->rsc, first, then, node, first_flags & pcmk__action_optional, pcmk__action_optional, pcmk__ar_first_implies_then, scheduler); } else if (!pcmk_is_set(first_flags, pcmk__action_optional) && pcmk_is_set(then->flags, pcmk__action_optional)) { pcmk__clear_action_flags(then, pcmk__action_optional); pcmk__set_updated_flags(changed, first, pcmk__updated_then); } pcmk__rsc_trace(then->rsc, "%s then %s: %s after pcmk__ar_first_implies_then", first->uuid, then->uuid, (changed? "changed" : "unchanged")); } if (pcmk_is_set(order->flags, pcmk__ar_intermediate_stop) && (then->rsc != NULL)) { enum pcmk__action_flags restart = pcmk__action_optional |pcmk__action_runnable; changed |= update(then->rsc, first, then, node, first_flags, restart, pcmk__ar_intermediate_stop, scheduler); pcmk__rsc_trace(then->rsc, "%s then %s: %s after pcmk__ar_intermediate_stop", first->uuid, then->uuid, (changed? "changed" : "unchanged")); } if (pcmk_is_set(order->flags, pcmk__ar_then_implies_first)) { if (first->rsc != NULL) { changed |= update(first->rsc, first, then, node, first_flags, pcmk__action_optional, pcmk__ar_then_implies_first, scheduler); } else if (!pcmk_is_set(first_flags, pcmk__action_optional) && pcmk_is_set(first->flags, pcmk__action_runnable)) { pcmk__clear_action_flags(first, pcmk__action_runnable); pcmk__set_updated_flags(changed, first, pcmk__updated_first); } pcmk__rsc_trace(then->rsc, "%s then %s: %s after pcmk__ar_then_implies_first", first->uuid, then->uuid, (changed? "changed" : "unchanged")); } if (pcmk_is_set(order->flags, pcmk__ar_promoted_then_implies_first)) { if (then->rsc != NULL) { changed |= update(then->rsc, first, then, node, first_flags & pcmk__action_optional, pcmk__action_optional, pcmk__ar_promoted_then_implies_first, scheduler); } pcmk__rsc_trace(then->rsc, "%s then %s: %s after " "pcmk__ar_promoted_then_implies_first", first->uuid, then->uuid, (changed? "changed" : "unchanged")); } if (pcmk_is_set(order->flags, pcmk__ar_min_runnable)) { if (then->rsc != NULL) { changed |= update(then->rsc, first, then, node, first_flags, pcmk__action_runnable, pcmk__ar_min_runnable, scheduler); } else if (pcmk_is_set(first_flags, pcmk__action_runnable)) { // We have another runnable instance of "first" then->runnable_before++; /* Mark "then" as runnable if it requires a certain number of * "before" instances to be runnable, and they now are. */ if ((then->runnable_before >= then->required_runnable_before) && !pcmk_is_set(then->flags, pcmk__action_runnable)) { pcmk__set_action_flags(then, pcmk__action_runnable); pcmk__set_updated_flags(changed, first, pcmk__updated_then); } } pcmk__rsc_trace(then->rsc, "%s then %s: %s after pcmk__ar_min_runnable", first->uuid, then->uuid, (changed? "changed" : "unchanged")); } if (pcmk_is_set(order->flags, pcmk__ar_nested_remote_probe) && (then->rsc != NULL)) { if (!pcmk_is_set(first_flags, pcmk__action_runnable) && (first->rsc != NULL) && (first->rsc->priv->active_nodes != NULL)) { pcmk__rsc_trace(then->rsc, "%s then %s: ignoring because first is stopping", first->uuid, then->uuid); order->flags = pcmk__ar_none; } else { changed |= update(then->rsc, first, then, node, first_flags, pcmk__action_runnable, pcmk__ar_unrunnable_first_blocks, scheduler); } pcmk__rsc_trace(then->rsc, "%s then %s: %s after pcmk__ar_nested_remote_probe", first->uuid, then->uuid, (changed? "changed" : "unchanged")); } if (pcmk_is_set(order->flags, pcmk__ar_unrunnable_first_blocks)) { if (then->rsc != NULL) { changed |= update(then->rsc, first, then, node, first_flags, pcmk__action_runnable, pcmk__ar_unrunnable_first_blocks, scheduler); } else if (!pcmk_is_set(first_flags, pcmk__action_runnable) && pcmk_is_set(then->flags, pcmk__action_runnable)) { pcmk__clear_action_flags(then, pcmk__action_runnable); pcmk__set_updated_flags(changed, first, pcmk__updated_then); } pcmk__rsc_trace(then->rsc, "%s then %s: %s after pcmk__ar_unrunnable_first_blocks", first->uuid, then->uuid, (changed? "changed" : "unchanged")); } if (pcmk_is_set(order->flags, pcmk__ar_unmigratable_then_blocks)) { if (then->rsc != NULL) { changed |= update(then->rsc, first, then, node, first_flags, pcmk__action_optional, pcmk__ar_unmigratable_then_blocks, scheduler); } pcmk__rsc_trace(then->rsc, "%s then %s: %s after " "pcmk__ar_unmigratable_then_blocks", first->uuid, then->uuid, (changed? "changed" : "unchanged")); } if (pcmk_is_set(order->flags, pcmk__ar_first_else_then)) { if (then->rsc != NULL) { changed |= update(then->rsc, first, then, node, first_flags, pcmk__action_optional, pcmk__ar_first_else_then, scheduler); } pcmk__rsc_trace(then->rsc, "%s then %s: %s after pcmk__ar_first_else_then", first->uuid, then->uuid, (changed? "changed" : "unchanged")); } if (pcmk_is_set(order->flags, pcmk__ar_ordered)) { if (then->rsc != NULL) { changed |= update(then->rsc, first, then, node, first_flags, pcmk__action_runnable, pcmk__ar_ordered, scheduler); } pcmk__rsc_trace(then->rsc, "%s then %s: %s after pcmk__ar_ordered", first->uuid, then->uuid, (changed? "changed" : "unchanged")); } if (pcmk_is_set(order->flags, pcmk__ar_asymmetric)) { if (then->rsc != NULL) { changed |= update(then->rsc, first, then, node, first_flags, pcmk__action_runnable, pcmk__ar_asymmetric, scheduler); } pcmk__rsc_trace(then->rsc, "%s then %s: %s after pcmk__ar_asymmetric", first->uuid, then->uuid, (changed? "changed" : "unchanged")); } if (pcmk_is_set(first->flags, pcmk__action_runnable) && pcmk_is_set(order->flags, pcmk__ar_first_implies_then_graphed) && !pcmk_is_set(first_flags, pcmk__action_optional)) { pcmk__rsc_trace(then->rsc, "%s will be in graph because %s is required", then->uuid, first->uuid); pcmk__set_action_flags(then, pcmk__action_always_in_graph); // Don't bother marking 'then' as changed just for this } if (pcmk_is_set(order->flags, pcmk__ar_then_implies_first_graphed) && !pcmk_is_set(then_flags, pcmk__action_optional)) { pcmk__rsc_trace(then->rsc, "%s will be in graph because %s is required", first->uuid, then->uuid); pcmk__set_action_flags(first, pcmk__action_always_in_graph); // Don't bother marking 'first' as changed just for this } if (pcmk_any_flags_set(order->flags, pcmk__ar_first_implies_then |pcmk__ar_then_implies_first |pcmk__ar_intermediate_stop) && (first->rsc != NULL) && !pcmk_is_set(first->rsc->flags, pcmk__rsc_managed) && pcmk_is_set(first->rsc->flags, pcmk__rsc_blocked) && !pcmk_is_set(first->flags, pcmk__action_runnable) && pcmk__str_eq(first->task, PCMK_ACTION_STOP, pcmk__str_none)) { /* @TODO This seems odd; why wouldn't an unrunnable "first" already * block "then" before this? Note that the unmanaged-stop-{1,2} * scheduler regression tests and the test CIB for T209 have tests for * "stop then stop" relations that would be good for checking any * changes. */ if (pcmk_is_set(then->flags, pcmk__action_runnable)) { pcmk__clear_action_flags(then, pcmk__action_runnable); pcmk__set_updated_flags(changed, first, pcmk__updated_then); } pcmk__rsc_trace(then->rsc, "%s then %s: %s after checking whether first " "is blocked, unmanaged, unrunnable stop", first->uuid, then->uuid, (changed? "changed" : "unchanged")); } return changed; } // Convenience macros for logging action properties #define action_type_str(flags) \ (pcmk_is_set((flags), pcmk__action_pseudo)? "pseudo-action" : "action") #define action_optional_str(flags) \ (pcmk_is_set((flags), pcmk__action_optional)? "optional" : "required") #define action_runnable_str(flags) \ (pcmk_is_set((flags), pcmk__action_runnable)? "runnable" : "unrunnable") #define action_node_str(a) \ (((a)->node == NULL)? "no node" : (a)->node->priv->name) /*! * \internal * \brief Update an action's flags for all orderings where it is "then" * * \param[in,out] then Action to update * \param[in,out] scheduler Scheduler data */ void pcmk__update_action_for_orderings(pcmk_action_t *then, pcmk_scheduler_t *scheduler) { GList *lpc = NULL; uint32_t changed = pcmk__updated_none; int last_flags = then->flags; pcmk__rsc_trace(then->rsc, "Updating %s %s (%s %s) on %s", action_type_str(then->flags), then->uuid, action_optional_str(then->flags), action_runnable_str(then->flags), action_node_str(then)); if (then->required_runnable_before > 0) { /* Initialize current known "runnable before" actions. As * update_action_for_ordering_flags() is called for each of then's * before actions, this number will increment as runnable 'first' * actions are encountered. */ then->runnable_before = 0; /* The pcmk__ar_min_runnable clause of * update_action_for_ordering_flags() (called below) * will reset runnable if appropriate. */ pcmk__clear_action_flags(then, pcmk__action_runnable); } for (lpc = then->actions_before; lpc != NULL; lpc = lpc->next) { pcmk__related_action_t *other = lpc->data; pcmk_action_t *first = other->action; pcmk_node_t *then_node = then->node; pcmk_node_t *first_node = first->node; const uint32_t target = pcmk__rsc_node_assigned; if ((first->rsc != NULL) && pcmk__is_group(first->rsc) && pcmk__str_eq(first->task, PCMK_ACTION_START, pcmk__str_none)) { first_node = first->rsc->priv->fns->location(first->rsc, NULL, target); if (first_node != NULL) { pcmk__rsc_trace(first->rsc, "Found %s for 'first' %s", pcmk__node_name(first_node), first->uuid); } } if (pcmk__is_group(then->rsc) && pcmk__str_eq(then->task, PCMK_ACTION_START, pcmk__str_none)) { then_node = then->rsc->priv->fns->location(then->rsc, NULL, target); if (then_node != NULL) { pcmk__rsc_trace(then->rsc, "Found %s for 'then' %s", pcmk__node_name(then_node), then->uuid); } } // Disable constraint if it only applies when on same node, but isn't if (pcmk_is_set(other->flags, pcmk__ar_if_on_same_node) && (first_node != NULL) && (then_node != NULL) && !pcmk__same_node(first_node, then_node)) { pcmk__rsc_trace(then->rsc, "Disabled ordering %s on %s then %s on %s: " "not same node", other->action->uuid, pcmk__node_name(first_node), then->uuid, pcmk__node_name(then_node)); other->flags = pcmk__ar_none; continue; } pcmk__clear_updated_flags(changed, then, pcmk__updated_first); if ((first->rsc != NULL) && pcmk_is_set(other->flags, pcmk__ar_then_cancels_first) && !pcmk_is_set(then->flags, pcmk__action_optional)) { /* 'then' is required, so we must abandon 'first' * (e.g. a required stop cancels any agent reload). */ pcmk__set_action_flags(other->action, pcmk__action_optional); if (!strcmp(first->task, PCMK_ACTION_RELOAD_AGENT)) { pcmk__clear_rsc_flags(first->rsc, pcmk__rsc_reload); } } if ((first->rsc != NULL) && (then->rsc != NULL) && (first->rsc != then->rsc) && !is_parent(then->rsc, first->rsc)) { first = action_for_ordering(first); } if (first != other->action) { pcmk__rsc_trace(then->rsc, "Ordering %s after %s instead of %s", then->uuid, first->uuid, other->action->uuid); } pcmk__rsc_trace(then->rsc, "%s (%#.6x) then %s (%#.6x): type=%#.6x node=%s", first->uuid, first->flags, then->uuid, then->flags, other->flags, action_node_str(first)); if (first == other->action) { /* 'first' was not remapped (e.g. from 'start' to 'running'), which * could mean it is a non-resource action, a primitive resource * action, or already expanded. */ uint32_t first_flags, then_flags; first_flags = action_flags_for_ordering(first, then_node); then_flags = action_flags_for_ordering(then, first_node); changed |= update_action_for_ordering_flags(first, then, first_flags, then_flags, other, scheduler); /* 'first' was for a complex resource (clone, group, etc), * create a new dependency if necessary */ } else if (order_actions(first, then, other->flags)) { /* This was the first time 'first' and 'then' were associated, * start again to get the new actions_before list */ pcmk__set_updated_flags(changed, then, pcmk__updated_then); pcmk__rsc_trace(then->rsc, "Disabled ordering %s then %s in favor of %s " "then %s", other->action->uuid, then->uuid, first->uuid, then->uuid); other->flags = pcmk__ar_none; } if (pcmk_is_set(changed, pcmk__updated_first)) { crm_trace("Re-processing %s and its 'after' actions " "because it changed", first->uuid); for (GList *lpc2 = first->actions_after; lpc2 != NULL; lpc2 = lpc2->next) { pcmk__related_action_t *other = lpc2->data; pcmk__update_action_for_orderings(other->action, scheduler); } pcmk__update_action_for_orderings(first, scheduler); } } if (then->required_runnable_before > 0) { if (last_flags == then->flags) { pcmk__clear_updated_flags(changed, then, pcmk__updated_then); } else { pcmk__set_updated_flags(changed, then, pcmk__updated_then); } } if (pcmk_is_set(changed, pcmk__updated_then)) { crm_trace("Re-processing %s and its 'after' actions because it changed", then->uuid); if (pcmk_is_set(last_flags, pcmk__action_runnable) && !pcmk_is_set(then->flags, pcmk__action_runnable)) { pcmk__block_colocation_dependents(then); } pcmk__update_action_for_orderings(then, scheduler); for (lpc = then->actions_after; lpc != NULL; lpc = lpc->next) { pcmk__related_action_t *other = lpc->data; pcmk__update_action_for_orderings(other->action, scheduler); } } } static inline bool is_primitive_action(const pcmk_action_t *action) { return (action != NULL) && pcmk__is_primitive(action->rsc); } /*! * \internal * \brief Clear a single action flag and set reason text * * \param[in,out] action Action whose flag should be cleared * \param[in] flag Action flag that should be cleared * \param[in] reason Action that is the reason why flag is being cleared */ #define clear_action_flag_because(action, flag, reason) do { \ if (pcmk_is_set((action)->flags, (flag))) { \ pcmk__clear_action_flags(action, flag); \ if ((action)->rsc != (reason)->rsc) { \ char *reason_text = pe__action2reason((reason), (flag)); \ pe_action_set_reason((action), reason_text, false); \ free(reason_text); \ } \ } \ } while (0) /*! * \internal * \brief Update actions in an asymmetric ordering * * If the "first" action in an asymmetric ordering is unrunnable, make the * "second" action unrunnable as well, if appropriate. * * \param[in] first 'First' action in an asymmetric ordering * \param[in,out] then 'Then' action in an asymmetric ordering */ static void handle_asymmetric_ordering(const pcmk_action_t *first, pcmk_action_t *then) { /* Only resource actions after an unrunnable 'first' action need updates for * asymmetric ordering. */ if ((then->rsc == NULL) || pcmk_is_set(first->flags, pcmk__action_runnable)) { return; } // Certain optional 'then' actions are unaffected by unrunnable 'first' if (pcmk_is_set(then->flags, pcmk__action_optional)) { enum rsc_role_e then_rsc_role; then_rsc_role = then->rsc->priv->fns->state(then->rsc, true); if ((then_rsc_role == pcmk_role_stopped) && pcmk__str_eq(then->task, PCMK_ACTION_STOP, pcmk__str_none)) { /* If 'then' should stop after 'first' but is already stopped, the * ordering is irrelevant. */ return; } else if ((then_rsc_role >= pcmk_role_started) && pcmk__str_eq(then->task, PCMK_ACTION_START, pcmk__str_none) && pe__rsc_running_on_only(then->rsc, then->node)) { /* Similarly if 'then' should start after 'first' but is already * started on a single node. */ return; } } // 'First' can't run, so 'then' can't either clear_action_flag_because(then, pcmk__action_optional, first); clear_action_flag_because(then, pcmk__action_runnable, first); } /*! * \internal * \brief Set action bits appropriately when pcmk__ar_intermediate_stop is used * * \param[in,out] first 'First' action in ordering * \param[in,out] then 'Then' action in ordering * \param[in] filter What action flags to care about * * \note pcmk__ar_intermediate_stop is set for "stop resource before starting * it" and "stop later group member before stopping earlier group member" */ static void handle_restart_ordering(pcmk_action_t *first, pcmk_action_t *then, uint32_t filter) { const char *reason = NULL; pcmk__assert(is_primitive_action(first) && is_primitive_action(then)); // We need to update the action in two cases: // ... if 'then' is required if (pcmk_is_set(filter, pcmk__action_optional) && !pcmk_is_set(then->flags, pcmk__action_optional)) { reason = "restart"; } /* ... if 'then' is unrunnable action on same resource (if a resource * should restart but can't start, we still want to stop) */ if (pcmk_is_set(filter, pcmk__action_runnable) && !pcmk_is_set(then->flags, pcmk__action_runnable) && pcmk_is_set(then->rsc->flags, pcmk__rsc_managed) && (first->rsc == then->rsc)) { reason = "stop"; } if (reason == NULL) { return; } pcmk__rsc_trace(first->rsc, "Handling %s -> %s for %s", first->uuid, then->uuid, reason); // Make 'first' required if it is runnable if (pcmk_is_set(first->flags, pcmk__action_runnable)) { clear_action_flag_because(first, pcmk__action_optional, then); } // Make 'first' required if 'then' is required if (!pcmk_is_set(then->flags, pcmk__action_optional)) { clear_action_flag_because(first, pcmk__action_optional, then); } // Make 'first' unmigratable if 'then' is unmigratable if (!pcmk_is_set(then->flags, pcmk__action_migratable)) { clear_action_flag_because(first, pcmk__action_migratable, then); } // Make 'then' unrunnable if 'first' is required but unrunnable if (!pcmk_is_set(first->flags, pcmk__action_optional) && !pcmk_is_set(first->flags, pcmk__action_runnable)) { clear_action_flag_because(then, pcmk__action_runnable, first); } } /*! * \internal * \brief Update two actions according to an ordering between them * * Given information about an ordering of two actions, update the actions' flags * (and runnable_before members if appropriate) as appropriate for the ordering. * Effects may cascade to other orderings involving the actions as well. * * \param[in,out] first 'First' action in an ordering * \param[in,out] then 'Then' action in an ordering * \param[in] node If not NULL, limit scope of ordering to this node * (ignored) * \param[in] flags Action flags for \p first for ordering purposes * \param[in] filter Action flags to limit scope of certain updates (may * include pcmk__action_optional to affect only * mandatory actions, and pcmk__action_runnable to * affect only runnable actions) * \param[in] type Group of enum pcmk__action_relation_flags to apply * \param[in,out] scheduler Scheduler data * * \return Group of enum pcmk__updated flags indicating what was updated */ uint32_t pcmk__update_ordered_actions(pcmk_action_t *first, pcmk_action_t *then, const pcmk_node_t *node, uint32_t flags, uint32_t filter, uint32_t type, pcmk_scheduler_t *scheduler) { uint32_t changed = pcmk__updated_none; uint32_t then_flags = 0U; uint32_t first_flags = 0U; pcmk__assert((first != NULL) && (then != NULL) && (scheduler != NULL)); then_flags = then->flags; first_flags = first->flags; if (pcmk_is_set(type, pcmk__ar_asymmetric)) { handle_asymmetric_ordering(first, then); } if (pcmk_is_set(type, pcmk__ar_then_implies_first) && !pcmk_is_set(then_flags, pcmk__action_optional)) { // Then is required, and implies first should be, too if (pcmk_is_set(filter, pcmk__action_optional) && !pcmk_is_set(flags, pcmk__action_optional) && pcmk_is_set(first_flags, pcmk__action_optional)) { clear_action_flag_because(first, pcmk__action_optional, then); } if (pcmk_is_set(flags, pcmk__action_migratable) && !pcmk_is_set(then->flags, pcmk__action_migratable)) { clear_action_flag_because(first, pcmk__action_migratable, then); } } if (pcmk_is_set(type, pcmk__ar_promoted_then_implies_first) && (then->rsc != NULL) && (then->rsc->priv->orig_role == pcmk_role_promoted) && pcmk_is_set(filter, pcmk__action_optional) && !pcmk_is_set(then->flags, pcmk__action_optional)) { clear_action_flag_because(first, pcmk__action_optional, then); if (pcmk_is_set(first->flags, pcmk__action_migratable) && !pcmk_is_set(then->flags, pcmk__action_migratable)) { clear_action_flag_because(first, pcmk__action_migratable, then); } } if (pcmk_is_set(type, pcmk__ar_unmigratable_then_blocks) && pcmk_is_set(filter, pcmk__action_optional)) { if (!pcmk_all_flags_set(then->flags, pcmk__action_migratable |pcmk__action_runnable)) { clear_action_flag_because(first, pcmk__action_runnable, then); } if (!pcmk_is_set(then->flags, pcmk__action_optional)) { clear_action_flag_because(first, pcmk__action_optional, then); } } if (pcmk_is_set(type, pcmk__ar_first_else_then) && pcmk_is_set(filter, pcmk__action_optional) && !pcmk_is_set(first->flags, pcmk__action_runnable)) { clear_action_flag_because(then, pcmk__action_migratable, first); pcmk__clear_action_flags(then, pcmk__action_pseudo); } if (pcmk_is_set(type, pcmk__ar_unrunnable_first_blocks) && pcmk_is_set(filter, pcmk__action_runnable) && pcmk_is_set(then->flags, pcmk__action_runnable) && !pcmk_is_set(flags, pcmk__action_runnable)) { clear_action_flag_because(then, pcmk__action_runnable, first); clear_action_flag_because(then, pcmk__action_migratable, first); } if (pcmk_is_set(type, pcmk__ar_first_implies_then) && pcmk_is_set(filter, pcmk__action_optional) && pcmk_is_set(then->flags, pcmk__action_optional) && !pcmk_is_set(flags, pcmk__action_optional) && !pcmk_is_set(first->flags, pcmk__action_migratable)) { clear_action_flag_because(then, pcmk__action_optional, first); } if (pcmk_is_set(type, pcmk__ar_intermediate_stop)) { handle_restart_ordering(first, then, filter); } if (then_flags != then->flags) { pcmk__set_updated_flags(changed, first, pcmk__updated_then); pcmk__rsc_trace(then->rsc, "%s on %s: flags are now %#.6x (was %#.6x) " "because of 'first' %s (%#.6x)", then->uuid, pcmk__node_name(then->node), then->flags, then_flags, first->uuid, first->flags); if ((then->rsc != NULL) && (then->rsc->priv->parent != NULL)) { // Required to handle "X_stop then X_start" for cloned groups pcmk__update_action_for_orderings(then, scheduler); } } if (first_flags != first->flags) { pcmk__set_updated_flags(changed, first, pcmk__updated_first); pcmk__rsc_trace(first->rsc, "%s on %s: flags are now %#.6x (was %#.6x) " "because of 'then' %s (%#.6x)", first->uuid, pcmk__node_name(first->node), first->flags, first_flags, then->uuid, then->flags); } return changed; } /*! * \internal * \brief Trace-log an action (optionally with its dependent actions) * * \param[in] pre_text If not NULL, prefix the log with this plus ": " * \param[in] action Action to log * \param[in] details If true, recursively log dependent actions */ void pcmk__log_action(const char *pre_text, const pcmk_action_t *action, bool details) { const char *node_uname = NULL; const char *node_uuid = NULL; const char *desc = NULL; CRM_CHECK(action != NULL, return); if (!pcmk_is_set(action->flags, pcmk__action_pseudo)) { if (action->node != NULL) { node_uname = action->node->priv->name; node_uuid = action->node->priv->id; } else { node_uname = ""; } } switch (pcmk__parse_action(action->task)) { case pcmk__action_fence: case pcmk__action_shutdown: if (pcmk_is_set(action->flags, pcmk__action_pseudo)) { desc = "Pseudo "; } else if (pcmk_is_set(action->flags, pcmk__action_optional)) { desc = "Optional "; } else if (!pcmk_is_set(action->flags, pcmk__action_runnable)) { desc = "!!Non-Startable!! "; } else { desc = "(Provisional) "; } crm_trace("%s%s%sAction %d: %s%s%s%s%s%s", ((pre_text == NULL)? "" : pre_text), ((pre_text == NULL)? "" : ": "), desc, action->id, action->uuid, (node_uname? "\ton " : ""), (node_uname? node_uname : ""), (node_uuid? "\t\t(" : ""), (node_uuid? node_uuid : ""), (node_uuid? ")" : "")); break; default: if (pcmk_is_set(action->flags, pcmk__action_optional)) { desc = "Optional "; } else if (pcmk_is_set(action->flags, pcmk__action_pseudo)) { desc = "Pseudo "; } else if (!pcmk_is_set(action->flags, pcmk__action_runnable)) { desc = "!!Non-Startable!! "; } else { desc = "(Provisional) "; } crm_trace("%s%s%sAction %d: %s %s%s%s%s%s%s", ((pre_text == NULL)? "" : pre_text), ((pre_text == NULL)? "" : ": "), desc, action->id, action->uuid, (action->rsc? action->rsc->id : ""), (node_uname? "\ton " : ""), (node_uname? node_uname : ""), (node_uuid? "\t\t(" : ""), (node_uuid? node_uuid : ""), (node_uuid? ")" : "")); break; } if (details) { const GList *iter = NULL; const pcmk__related_action_t *other = NULL; crm_trace("\t\t====== Preceding Actions"); for (iter = action->actions_before; iter != NULL; iter = iter->next) { other = (const pcmk__related_action_t *) iter->data; pcmk__log_action("\t\t", other->action, false); } crm_trace("\t\t====== Subsequent Actions"); for (iter = action->actions_after; iter != NULL; iter = iter->next) { other = (const pcmk__related_action_t *) iter->data; pcmk__log_action("\t\t", other->action, false); } crm_trace("\t\t====== End"); } else { crm_trace("\t\t(before=%d, after=%d)", g_list_length(action->actions_before), g_list_length(action->actions_after)); } } /*! * \internal * \brief Create a new shutdown action for a node * * \param[in,out] node Node being shut down * * \return Newly created shutdown action for \p node */ pcmk_action_t * pcmk__new_shutdown_action(pcmk_node_t *node) { char *shutdown_id = NULL; pcmk_action_t *shutdown_op = NULL; pcmk__assert(node != NULL); shutdown_id = crm_strdup_printf("%s-%s", PCMK_ACTION_DO_SHUTDOWN, node->priv->name); shutdown_op = custom_action(NULL, shutdown_id, PCMK_ACTION_DO_SHUTDOWN, node, FALSE, node->priv->scheduler); pcmk__order_stops_before_shutdown(node, shutdown_op); pcmk__insert_meta(shutdown_op, PCMK__META_OP_NO_WAIT, PCMK_VALUE_TRUE); return shutdown_op; } /*! * \internal * \brief Calculate and add an operation digest to XML * * Calculate an operation digest, which enables us to later determine when a * restart is needed due to the resource's parameters being changed, and add it * to given XML. * * \param[in] op Operation result from executor * \param[in,out] update XML to add digest to */ static void add_op_digest_to_xml(const lrmd_event_data_t *op, xmlNode *update) { char *digest = NULL; xmlNode *args_xml = NULL; if (op->params == NULL) { return; } args_xml = pcmk__xe_create(NULL, PCMK_XE_PARAMETERS); g_hash_table_foreach(op->params, hash2field, args_xml); pcmk__filter_op_for_digest(args_xml); digest = pcmk__digest_operation(args_xml); crm_xml_add(update, PCMK__XA_OP_DIGEST, digest); pcmk__xml_free(args_xml); free(digest); } #define FAKE_TE_ID "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" /*! * \internal * \brief Create XML for resource operation history update * * \param[in,out] parent Parent XML node to add to * \param[in,out] op Operation event data * \param[in] caller_version DC feature set * \param[in] target_rc Expected result of operation * \param[in] node Name of node on which operation was performed * \param[in] origin Arbitrary description of update source * * \return Newly created XML node for history update */ xmlNode * pcmk__create_history_xml(xmlNode *parent, lrmd_event_data_t *op, const char *caller_version, int target_rc, const char *node, const char *origin) { char *key = NULL; char *magic = NULL; char *op_id = NULL; char *op_id_additional = NULL; char *local_user_data = NULL; const char *exit_reason = NULL; xmlNode *xml_op = NULL; const char *task = NULL; CRM_CHECK(op != NULL, return NULL); crm_trace("Creating history XML for %s-interval %s action for %s on %s " "(DC version: %s, origin: %s)", pcmk__readable_interval(op->interval_ms), op->op_type, op->rsc_id, ((node == NULL)? "no node" : node), caller_version, origin); task = op->op_type; /* Record a successful agent reload as a start, and a failed one as a * monitor, to make life easier for the scheduler when determining the * current state. * * @COMPAT We should check "reload" here only if the operation was for a * pre-OCF-1.1 resource agent, but we don't know that here, and we should * only ever get results for actions scheduled by us, so we can reasonably * assume any "reload" is actually a pre-1.1 agent reload. * * @TODO This remapping can make log messages with task confusing for users * (for example, an "Initiating reload ..." followed by "... start ... * confirmed"). Either do this remapping in the scheduler if possible, or * store the original task in a new XML attribute for later logging. */ if (pcmk__str_any_of(task, PCMK_ACTION_RELOAD, PCMK_ACTION_RELOAD_AGENT, NULL)) { if (op->op_status == PCMK_EXEC_DONE) { task = PCMK_ACTION_START; } else { task = PCMK_ACTION_MONITOR; } } key = pcmk__op_key(op->rsc_id, task, op->interval_ms); if (pcmk__str_eq(task, PCMK_ACTION_NOTIFY, pcmk__str_none)) { const char *n_type = crm_meta_value(op->params, "notify_type"); const char *n_task = crm_meta_value(op->params, "notify_operation"); CRM_LOG_ASSERT(n_type != NULL); CRM_LOG_ASSERT(n_task != NULL); op_id = pcmk__notify_key(op->rsc_id, n_type, n_task); if (op->op_status != PCMK_EXEC_PENDING) { /* Ignore notify errors. * * @TODO It might be better to keep the correct result here, and * ignore it in process_graph_event(). */ lrmd__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); } /* Migration history is preserved separately, which usually matters for * multiple nodes and is important for future cluster transitions. */ } else if (pcmk__str_any_of(op->op_type, PCMK_ACTION_MIGRATE_TO, PCMK_ACTION_MIGRATE_FROM, NULL)) { op_id = strdup(key); } else if (did_rsc_op_fail(op, target_rc)) { op_id = pcmk__op_key(op->rsc_id, "last_failure", 0); if (op->interval_ms == 0) { /* Ensure 'last' gets updated, in case PCMK_META_RECORD_PENDING is * true */ op_id_additional = pcmk__op_key(op->rsc_id, "last", 0); } exit_reason = op->exit_reason; } else if (op->interval_ms > 0) { op_id = strdup(key); } else { op_id = pcmk__op_key(op->rsc_id, "last", 0); } again: xml_op = pcmk__xe_first_child(parent, PCMK__XE_LRM_RSC_OP, PCMK_XA_ID, op_id); if (xml_op == NULL) { xml_op = pcmk__xe_create(parent, PCMK__XE_LRM_RSC_OP); } if (op->user_data == NULL) { crm_debug("Generating fake transition key for: " PCMK__OP_FMT " %d from %s", op->rsc_id, op->op_type, op->interval_ms, op->call_id, origin); local_user_data = pcmk__transition_key(-1, op->call_id, target_rc, FAKE_TE_ID); op->user_data = local_user_data; } if (magic == NULL) { magic = crm_strdup_printf("%d:%d;%s", op->op_status, op->rc, (const char *) op->user_data); } crm_xml_add(xml_op, PCMK_XA_ID, op_id); crm_xml_add(xml_op, PCMK__XA_OPERATION_KEY, key); crm_xml_add(xml_op, PCMK_XA_OPERATION, task); crm_xml_add(xml_op, PCMK_XA_CRM_DEBUG_ORIGIN, origin); crm_xml_add(xml_op, PCMK_XA_CRM_FEATURE_SET, caller_version); crm_xml_add(xml_op, PCMK__XA_TRANSITION_KEY, op->user_data); crm_xml_add(xml_op, PCMK__XA_TRANSITION_MAGIC, magic); crm_xml_add(xml_op, PCMK_XA_EXIT_REASON, pcmk__s(exit_reason, "")); crm_xml_add(xml_op, PCMK__META_ON_NODE, node); // For context during triage - crm_xml_add_int(xml_op, PCMK__XA_CALL_ID, op->call_id); - crm_xml_add_int(xml_op, PCMK__XA_RC_CODE, op->rc); - crm_xml_add_int(xml_op, PCMK__XA_OP_STATUS, op->op_status); + pcmk__xe_set_int(xml_op, PCMK__XA_CALL_ID, op->call_id); + pcmk__xe_set_int(xml_op, PCMK__XA_RC_CODE, op->rc); + pcmk__xe_set_int(xml_op, PCMK__XA_OP_STATUS, op->op_status); pcmk__xe_set_guint(xml_op, PCMK_META_INTERVAL, op->interval_ms); if ((op->t_run > 0) || (op->t_rcchange > 0) || (op->exec_time > 0) || (op->queue_time > 0)) { crm_trace("Timing data (" PCMK__OP_FMT "): " "last=%lld change=%lld exec=%u queue=%u", op->rsc_id, op->op_type, op->interval_ms, (long long) op->t_run, (long long) op->t_rcchange, op->exec_time, op->queue_time); if ((op->interval_ms > 0) && (op->t_rcchange > 0)) { // Recurring ops may have changed rc after initial run pcmk__xe_set_time(xml_op, PCMK_XA_LAST_RC_CHANGE, op->t_rcchange); } else { pcmk__xe_set_time(xml_op, PCMK_XA_LAST_RC_CHANGE, op->t_run); } - crm_xml_add_int(xml_op, PCMK_XA_EXEC_TIME, op->exec_time); - crm_xml_add_int(xml_op, PCMK_XA_QUEUE_TIME, op->queue_time); + pcmk__xe_set_int(xml_op, PCMK_XA_EXEC_TIME, op->exec_time); + pcmk__xe_set_int(xml_op, PCMK_XA_QUEUE_TIME, op->queue_time); } if (pcmk__str_any_of(op->op_type, PCMK_ACTION_MIGRATE_TO, PCMK_ACTION_MIGRATE_FROM, NULL)) { /* Record PCMK__META_MIGRATE_SOURCE and PCMK__META_MIGRATE_TARGET always * for migrate ops. */ const char *name = PCMK__META_MIGRATE_SOURCE; crm_xml_add(xml_op, name, crm_meta_value(op->params, name)); name = PCMK__META_MIGRATE_TARGET; crm_xml_add(xml_op, name, crm_meta_value(op->params, name)); } add_op_digest_to_xml(op, xml_op); if (op_id_additional) { free(op_id); op_id = op_id_additional; op_id_additional = NULL; goto again; } if (local_user_data) { free(local_user_data); op->user_data = NULL; } free(magic); free(op_id); free(key); return xml_op; } /*! * \internal * \brief Check whether an action shutdown-locks a resource to a node * * If the PCMK_OPT_SHUTDOWN_LOCK cluster property is set, resources will not be * recovered on a different node if cleanly stopped, and may start only on that * same node. This function checks whether that applies to a given action, so * that the transition graph can be marked appropriately. * * \param[in] action Action to check * * \return true if \p action locks its resource to the action's node, * otherwise false */ bool pcmk__action_locks_rsc_to_node(const pcmk_action_t *action) { // Only resource actions taking place on resource's lock node are locked if ((action == NULL) || (action->rsc == NULL) || !pcmk__same_node(action->node, action->rsc->priv->lock_node)) { return false; } /* During shutdown, only stops are locked (otherwise, another action such as * a demote would cause the controller to clear the lock) */ if (action->node->details->shutdown && (action->task != NULL) && (strcmp(action->task, PCMK_ACTION_STOP) != 0)) { return false; } return true; } /* lowest to highest */ static gint sort_action_id(gconstpointer a, gconstpointer b) { const pcmk__related_action_t *action_wrapper2 = a; const pcmk__related_action_t *action_wrapper1 = b; if (a == NULL) { return 1; } if (b == NULL) { return -1; } if (action_wrapper1->action->id < action_wrapper2->action->id) { return 1; } if (action_wrapper1->action->id > action_wrapper2->action->id) { return -1; } return 0; } /*! * \internal * \brief Remove any duplicate action inputs, merging action flags * * \param[in,out] action Action whose inputs should be checked */ void pcmk__deduplicate_action_inputs(pcmk_action_t *action) { GList *item = NULL; GList *next = NULL; pcmk__related_action_t *last_input = NULL; action->actions_before = g_list_sort(action->actions_before, sort_action_id); for (item = action->actions_before; item != NULL; item = next) { pcmk__related_action_t *input = item->data; next = item->next; if ((last_input != NULL) && (input->action->id == last_input->action->id)) { crm_trace("Input %s (%d) duplicate skipped for action %s (%d)", input->action->uuid, input->action->id, action->uuid, action->id); /* For the purposes of scheduling, the ordering flags no longer * matter, but crm_simulate looks at certain ones when creating a * dot graph. Combining the flags is sufficient for that purpose. */ pcmk__set_relation_flags(last_input->flags, input->flags); if (input->graphed) { last_input->graphed = true; } free(item->data); action->actions_before = g_list_delete_link(action->actions_before, item); } else { last_input = input; input->graphed = false; } } } /*! * \internal * \brief Output all scheduled actions * * \param[in,out] scheduler Scheduler data */ void pcmk__output_actions(pcmk_scheduler_t *scheduler) { pcmk__output_t *out = scheduler->priv->out; // Output node (non-resource) actions for (GList *iter = scheduler->priv->actions; iter != NULL; iter = iter->next) { char *node_name = NULL; char *task = NULL; pcmk_action_t *action = (pcmk_action_t *) iter->data; if (action->rsc != NULL) { continue; // Resource actions will be output later } else if (pcmk_is_set(action->flags, pcmk__action_optional)) { continue; // This action was not scheduled } if (pcmk__str_eq(action->task, PCMK_ACTION_DO_SHUTDOWN, pcmk__str_none)) { task = strdup("Shutdown"); } else if (pcmk__str_eq(action->task, PCMK_ACTION_STONITH, pcmk__str_none)) { const char *op = g_hash_table_lookup(action->meta, PCMK__META_STONITH_ACTION); task = crm_strdup_printf("Fence (%s)", op); } else { continue; // Don't display other node action types } if (pcmk__is_guest_or_bundle_node(action->node)) { const pcmk_resource_t *remote = action->node->priv->remote; node_name = crm_strdup_printf("%s (resource: %s)", pcmk__node_name(action->node), remote->priv->launcher->id); } else if (action->node != NULL) { node_name = crm_strdup_printf("%s", pcmk__node_name(action->node)); } out->message(out, "node-action", task, node_name, action->reason); free(node_name); free(task); } // Output resource actions for (GList *iter = scheduler->priv->resources; iter != NULL; iter = iter->next) { pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data; rsc->priv->cmds->output_actions(rsc); } } /*! * \internal * \brief Get action name needed to compare digest for configuration changes * * \param[in] task Action name from history * \param[in] interval_ms Action interval (in milliseconds) * * \return Action name whose digest should be compared */ static const char * task_for_digest(const char *task, guint interval_ms) { /* Certain actions need to be compared against the parameters used to start * the resource. */ if ((interval_ms == 0) && pcmk__str_any_of(task, PCMK_ACTION_MONITOR, PCMK_ACTION_MIGRATE_FROM, PCMK_ACTION_PROMOTE, NULL)) { task = PCMK_ACTION_START; } return task; } /*! * \internal * \brief Check whether only sanitized parameters to an action changed * * When collecting CIB files for troubleshooting, crm_report will mask * sensitive resource parameters. If simulations were run using that, affected * resources would appear to need a restart, which would complicate * troubleshooting. To avoid that, we save a "secure digest" of non-sensitive * parameters. This function used that digest to check whether only masked * parameters are different. * * \param[in] xml_op Resource history entry with secure digest * \param[in] digest_data Operation digest information being compared * \param[in] scheduler Scheduler data * * \return true if only sanitized parameters changed, otherwise false */ static bool only_sanitized_changed(const xmlNode *xml_op, const pcmk__op_digest_t *digest_data, const pcmk_scheduler_t *scheduler) { const char *digest_secure = NULL; if (!pcmk_is_set(scheduler->flags, pcmk__sched_sanitized)) { // The scheduler is not being run as a simulation return false; } digest_secure = pcmk__xe_get(xml_op, PCMK__XA_OP_SECURE_DIGEST); return (digest_data->rc != pcmk__digest_match) && (digest_secure != NULL) && (digest_data->digest_secure_calc != NULL) && (strcmp(digest_data->digest_secure_calc, digest_secure) == 0); } /*! * \internal * \brief Force a restart due to a configuration change * * \param[in,out] rsc Resource that action is for * \param[in] task Name of action whose configuration changed * \param[in] interval_ms Action interval (in milliseconds) * \param[in,out] node Node where resource should be restarted */ static void force_restart(pcmk_resource_t *rsc, const char *task, guint interval_ms, pcmk_node_t *node) { char *key = pcmk__op_key(rsc->id, task, interval_ms); pcmk_action_t *required = custom_action(rsc, key, task, NULL, FALSE, rsc->priv->scheduler); pe_action_set_reason(required, "resource definition change", true); trigger_unfencing(rsc, node, "Device parameters changed", NULL, rsc->priv->scheduler); } /*! * \internal * \brief Schedule a reload of a resource on a node * * \param[in,out] data Resource to reload * \param[in] user_data Where resource should be reloaded */ static void schedule_reload(gpointer data, gpointer user_data) { pcmk_resource_t *rsc = data; const pcmk_node_t *node = user_data; pcmk_action_t *reload = NULL; // For collective resources, just call recursively for children if (rsc->priv->variant > pcmk__rsc_variant_primitive) { g_list_foreach(rsc->priv->children, schedule_reload, user_data); return; } // Skip the reload in certain situations if ((node == NULL) || !pcmk_is_set(rsc->flags, pcmk__rsc_managed) || pcmk_is_set(rsc->flags, pcmk__rsc_failed)) { pcmk__rsc_trace(rsc, "Skip reload of %s:%s%s %s", rsc->id, pcmk_is_set(rsc->flags, pcmk__rsc_managed)? "" : " unmanaged", pcmk_is_set(rsc->flags, pcmk__rsc_failed)? " failed" : "", (node == NULL)? "inactive" : node->priv->name); return; } /* If a resource's configuration changed while a start was pending, * force a full restart instead of a reload. */ if (pcmk_is_set(rsc->flags, pcmk__rsc_start_pending)) { pcmk__rsc_trace(rsc, "%s: preventing agent reload because start pending", rsc->id); custom_action(rsc, stop_key(rsc), PCMK_ACTION_STOP, node, FALSE, rsc->priv->scheduler); return; } // Schedule the reload pcmk__set_rsc_flags(rsc, pcmk__rsc_reload); reload = custom_action(rsc, reload_key(rsc), PCMK_ACTION_RELOAD_AGENT, node, FALSE, rsc->priv->scheduler); pe_action_set_reason(reload, "resource definition change", FALSE); // Set orderings so that a required stop or demote cancels the reload pcmk__new_ordering(NULL, NULL, reload, rsc, stop_key(rsc), NULL, pcmk__ar_ordered|pcmk__ar_then_cancels_first, rsc->priv->scheduler); pcmk__new_ordering(NULL, NULL, reload, rsc, demote_key(rsc), NULL, pcmk__ar_ordered|pcmk__ar_then_cancels_first, rsc->priv->scheduler); } /*! * \internal * \brief Handle any configuration change for an action * * Given an action from resource history, if the resource's configuration * changed since the action was done, schedule any actions needed (restart, * reload, unfencing, rescheduling recurring actions, etc.). * * \param[in,out] rsc Resource that action is for * \param[in,out] node Node that action was on * \param[in] xml_op Action XML from resource history * * \return true if action configuration changed, otherwise false */ bool pcmk__check_action_config(pcmk_resource_t *rsc, pcmk_node_t *node, const xmlNode *xml_op) { guint interval_ms = 0; const char *task = NULL; const pcmk__op_digest_t *digest_data = NULL; CRM_CHECK((rsc != NULL) && (node != NULL) && (xml_op != NULL), return false); task = pcmk__xe_get(xml_op, PCMK_XA_OPERATION); CRM_CHECK(task != NULL, return false); pcmk__xe_get_guint(xml_op, PCMK_META_INTERVAL, &interval_ms); // If this is a recurring action, check whether it has been orphaned if (interval_ms > 0) { if (pcmk__find_action_config(rsc, task, interval_ms, false) != NULL) { pcmk__rsc_trace(rsc, "%s-interval %s for %s on %s is in configuration", pcmk__readable_interval(interval_ms), task, rsc->id, pcmk__node_name(node)); } else if (pcmk_is_set(rsc->priv->scheduler->flags, pcmk__sched_cancel_removed_actions)) { pcmk__schedule_cancel(rsc, pcmk__xe_get(xml_op, PCMK__XA_CALL_ID), task, interval_ms, node, "orphan"); return true; } else { pcmk__rsc_debug(rsc, "%s-interval %s for %s on %s is orphaned", pcmk__readable_interval(interval_ms), task, rsc->id, pcmk__node_name(node)); return true; } } crm_trace("Checking %s-interval %s for %s on %s for configuration changes", pcmk__readable_interval(interval_ms), task, rsc->id, pcmk__node_name(node)); task = task_for_digest(task, interval_ms); digest_data = rsc_action_digest_cmp(rsc, xml_op, node, rsc->priv->scheduler); if (only_sanitized_changed(xml_op, digest_data, rsc->priv->scheduler)) { if (!pcmk__is_daemon && (rsc->priv->scheduler->priv->out != NULL)) { pcmk__output_t *out = rsc->priv->scheduler->priv->out; out->info(out, "Only 'private' parameters to %s-interval %s for %s " "on %s changed: %s", pcmk__readable_interval(interval_ms), task, rsc->id, pcmk__node_name(node), pcmk__xe_get(xml_op, PCMK__XA_TRANSITION_MAGIC)); } return false; } switch (digest_data->rc) { case pcmk__digest_restart: crm_log_xml_debug(digest_data->params_restart, "params:restart"); force_restart(rsc, task, interval_ms, node); return true; case pcmk__digest_unknown: case pcmk__digest_mismatch: // Changes that can potentially be handled by an agent reload if (interval_ms > 0) { /* Recurring actions aren't reloaded per se, they are just * re-scheduled so the next run uses the new parameters. * The old instance will be cancelled automatically. */ crm_log_xml_debug(digest_data->params_all, "params:reschedule"); pcmk__reschedule_recurring(rsc, task, interval_ms, node); } else if (pcmk__xe_get(xml_op, PCMK__XA_OP_RESTART_DIGEST) != NULL) { // Agent supports reload, so use it trigger_unfencing(rsc, node, "Device parameters changed (reload)", NULL, rsc->priv->scheduler); crm_log_xml_debug(digest_data->params_all, "params:reload"); schedule_reload((gpointer) rsc, (gpointer) node); } else { pcmk__rsc_trace(rsc, "Restarting %s " "because agent doesn't support reload", rsc->id); crm_log_xml_debug(digest_data->params_restart, "params:restart"); force_restart(rsc, task, interval_ms, node); } return true; default: break; } return false; } /*! * \internal * \brief Create a list of resource's action history entries, sorted by call ID * * \param[in] rsc_entry Resource's \c PCMK__XE_LRM_RSC_OP status XML * \param[out] start_index Where to store index of start-like action, if any * \param[out] stop_index Where to store index of stop action, if any */ static GList * rsc_history_as_list(const xmlNode *rsc_entry, int *start_index, int *stop_index) { GList *ops = NULL; for (xmlNode *rsc_op = pcmk__xe_first_child(rsc_entry, PCMK__XE_LRM_RSC_OP, NULL, NULL); rsc_op != NULL; rsc_op = pcmk__xe_next(rsc_op, PCMK__XE_LRM_RSC_OP)) { ops = g_list_prepend(ops, rsc_op); } ops = g_list_sort(ops, sort_op_by_callid); calculate_active_ops(ops, start_index, stop_index); return ops; } /*! * \internal * \brief Process a resource's action history from the CIB status * * Given a resource's action history, if the resource's configuration * changed since the actions were done, schedule any actions needed (restart, * reload, unfencing, rescheduling recurring actions, clean-up, etc.). * (This also cancels recurring actions for maintenance mode, which is not * entirely related but convenient to do here.) * * \param[in] rsc_entry Resource's \c PCMK__XE_LRM_RSC_OP status XML * \param[in,out] rsc Resource whose history is being processed * \param[in,out] node Node whose history is being processed */ static void process_rsc_history(const xmlNode *rsc_entry, pcmk_resource_t *rsc, pcmk_node_t *node) { int offset = -1; int stop_index = 0; int start_index = 0; GList *sorted_op_list = NULL; if (pcmk_is_set(rsc->flags, pcmk__rsc_removed)) { if (pcmk__is_anonymous_clone(pe__const_top_resource(rsc, false))) { /* @TODO Should this be done for bundled primitives as well? Added * by 2ac43ae31 */ pcmk__rsc_trace(rsc, "Skipping configuration check " "for orphaned clone instance %s", rsc->id); } else { pcmk__rsc_trace(rsc, "Skipping configuration check and scheduling " "clean-up for orphaned resource %s", rsc->id); pcmk__schedule_cleanup(rsc, node, false); } return; } if (pe_find_node_id(rsc->priv->active_nodes, node->priv->id) == NULL) { if (pcmk__rsc_agent_changed(rsc, node, rsc_entry, false)) { pcmk__schedule_cleanup(rsc, node, false); } pcmk__rsc_trace(rsc, "Skipping configuration check for %s " "because no longer active on %s", rsc->id, pcmk__node_name(node)); return; } pcmk__rsc_trace(rsc, "Checking for configuration changes for %s on %s", rsc->id, pcmk__node_name(node)); if (pcmk__rsc_agent_changed(rsc, node, rsc_entry, true)) { pcmk__schedule_cleanup(rsc, node, false); } sorted_op_list = rsc_history_as_list(rsc_entry, &start_index, &stop_index); if (start_index < stop_index) { return; // Resource is stopped } for (GList *iter = sorted_op_list; iter != NULL; iter = iter->next) { xmlNode *rsc_op = (xmlNode *) iter->data; const char *task = NULL; guint interval_ms = 0; if (++offset < start_index) { // Skip actions that happened before a start continue; } task = pcmk__xe_get(rsc_op, PCMK_XA_OPERATION); pcmk__xe_get_guint(rsc_op, PCMK_META_INTERVAL, &interval_ms); if ((interval_ms > 0) && (pcmk_is_set(rsc->flags, pcmk__rsc_maintenance) || node->details->maintenance)) { // Maintenance mode cancels recurring operations pcmk__schedule_cancel(rsc, pcmk__xe_get(rsc_op, PCMK__XA_CALL_ID), task, interval_ms, node, "maintenance mode"); } else if ((interval_ms > 0) || pcmk__strcase_any_of(task, PCMK_ACTION_MONITOR, PCMK_ACTION_START, PCMK_ACTION_PROMOTE, PCMK_ACTION_MIGRATE_FROM, NULL)) { /* If a resource operation failed, and the operation's definition * has changed, clear any fail count so they can be retried fresh. */ if (pe__bundle_needs_remote_name(rsc)) { /* We haven't assigned resources to nodes yet, so if the * REMOTE_CONTAINER_HACK is used, we may calculate the digest * based on the literal "#uname" value rather than the properly * substituted value. That would mistakenly make the action * definition appear to have been changed. Defer the check until * later in this case. */ pcmk__add_param_check(rsc_op, rsc, node, pcmk__check_active); } else if (pcmk__check_action_config(rsc, node, rsc_op) && (pe_get_failcount(node, rsc, NULL, pcmk__fc_effective, NULL) != 0)) { pe__clear_failcount(rsc, node, "action definition changed", rsc->priv->scheduler); } } } g_list_free(sorted_op_list); } /*! * \internal * \brief Process a node's action history from the CIB status * * Given a node's resource history, if the resource's configuration changed * since the actions were done, schedule any actions needed (restart, * reload, unfencing, rescheduling recurring actions, clean-up, etc.). * (This also cancels recurring actions for maintenance mode, which is not * entirely related but convenient to do here.) * * \param[in,out] node Node whose history is being processed * \param[in] lrm_rscs Node's \c PCMK__XE_LRM_RESOURCES from CIB status XML */ static void process_node_history(pcmk_node_t *node, const xmlNode *lrm_rscs) { crm_trace("Processing node history for %s", pcmk__node_name(node)); for (const xmlNode *rsc_entry = pcmk__xe_first_child(lrm_rscs, PCMK__XE_LRM_RESOURCE, NULL, NULL); rsc_entry != NULL; rsc_entry = pcmk__xe_next(rsc_entry, PCMK__XE_LRM_RESOURCE)) { if (rsc_entry->children != NULL) { GList *result = pcmk__rscs_matching_id(pcmk__xe_id(rsc_entry), node->priv->scheduler); for (GList *iter = result; iter != NULL; iter = iter->next) { pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data; if (pcmk__is_primitive(rsc)) { process_rsc_history(rsc_entry, rsc, node); } } g_list_free(result); } } } // XPath to find a node's resource history #define XPATH_NODE_HISTORY "/" PCMK_XE_CIB "/" PCMK_XE_STATUS \ "/" PCMK__XE_NODE_STATE \ "[@" PCMK_XA_UNAME "='%s']" \ "/" PCMK__XE_LRM "/" PCMK__XE_LRM_RESOURCES /*! * \internal * \brief Process any resource configuration changes in the CIB status * * Go through all nodes' resource history, and if a resource's configuration * changed since its actions were done, schedule any actions needed (restart, * reload, unfencing, rescheduling recurring actions, clean-up, etc.). * (This also cancels recurring actions for maintenance mode, which is not * entirely related but convenient to do here.) * * \param[in,out] scheduler Scheduler data */ void pcmk__handle_rsc_config_changes(pcmk_scheduler_t *scheduler) { crm_trace("Check resource and action configuration for changes"); /* Rather than iterate through the status section, iterate through the nodes * and search for the appropriate status subsection for each. This skips * orphaned nodes and lets us eliminate some cases before searching the XML. */ for (GList *iter = scheduler->nodes; iter != NULL; iter = iter->next) { pcmk_node_t *node = (pcmk_node_t *) iter->data; /* Don't bother checking actions for a node that can't run actions ... * unless it's in maintenance mode, in which case we still need to * cancel any existing recurring monitors. */ if (node->details->maintenance || pcmk__node_available(node, false, false)) { char *xpath = NULL; xmlNode *history = NULL; xpath = crm_strdup_printf(XPATH_NODE_HISTORY, node->priv->name); history = pcmk__xpath_find_one(scheduler->input->doc, xpath, LOG_NEVER); free(xpath); process_node_history(node, history); } } } diff --git a/lib/pacemaker/pcmk_sched_clone.c b/lib/pacemaker/pcmk_sched_clone.c index 4f86621cbd..551c82c239 100644 --- a/lib/pacemaker/pcmk_sched_clone.c +++ b/lib/pacemaker/pcmk_sched_clone.c @@ -1,746 +1,746 @@ /* - * Copyright 2004-2024 the Pacemaker project contributors + * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include "libpacemaker_private.h" /*! * \internal * \brief Assign a clone resource's instances to nodes * * \param[in,out] rsc Clone resource to assign * \param[in] prefer Node to prefer, if all else is equal * \param[in] stop_if_fail If \c true and a primitive descendant of \p rsc * can't be assigned to a node, set the * descendant's next role to stopped and update * existing actions * * \return NULL (clones are not assigned to a single node) * * \note If \p stop_if_fail is \c false, then \c pcmk__unassign_resource() can * completely undo the assignment. A successful assignment can be either * undone or left alone as final. A failed assignment has the same effect * as calling pcmk__unassign_resource(); there are no side effects on * roles or actions. */ pcmk_node_t * pcmk__clone_assign(pcmk_resource_t *rsc, const pcmk_node_t *prefer, bool stop_if_fail) { GList *colocations = NULL; pcmk__assert(pcmk__is_clone(rsc)); if (!pcmk_is_set(rsc->flags, pcmk__rsc_unassigned)) { return NULL; // Assignment has already been done } // Detect assignment loops if (pcmk_is_set(rsc->flags, pcmk__rsc_assigning)) { pcmk__rsc_debug(rsc, "Breaking assignment loop involving %s", rsc->id); return NULL; } pcmk__set_rsc_flags(rsc, pcmk__rsc_assigning); // If this clone is promotable, consider nodes' promotion scores if (pcmk_is_set(rsc->flags, pcmk__rsc_promotable)) { pcmk__add_promotion_scores(rsc); } // If this clone is colocated with any other resources, assign those first colocations = pcmk__this_with_colocations(rsc); for (GList *iter = colocations; iter != NULL; iter = iter->next) { pcmk__colocation_t *constraint = (pcmk__colocation_t *) iter->data; pcmk_resource_t *primary = constraint->primary; pcmk__rsc_trace(rsc, "%s: Assigning colocation %s primary %s first", rsc->id, constraint->id, primary->id); primary->priv->cmds->assign(primary, prefer, stop_if_fail); } g_list_free(colocations); // If any resources are colocated with this one, consider their preferences colocations = pcmk__with_this_colocations(rsc); g_list_foreach(colocations, pcmk__add_dependent_scores, rsc); g_list_free(colocations); pe__show_node_scores(!pcmk_is_set(rsc->priv->scheduler->flags, pcmk__sched_output_scores), rsc, __func__, rsc->priv->allowed_nodes, rsc->priv->scheduler); rsc->priv->children = g_list_sort(rsc->priv->children, pcmk__cmp_instance); pcmk__assign_instances(rsc, rsc->priv->children, pe__clone_max(rsc), pe__clone_node_max(rsc)); if (pcmk_is_set(rsc->flags, pcmk__rsc_promotable)) { pcmk__set_instance_roles(rsc); } pcmk__clear_rsc_flags(rsc, pcmk__rsc_unassigned|pcmk__rsc_assigning); pcmk__rsc_trace(rsc, "Assigned clone %s", rsc->id); return NULL; } /*! * \internal * \brief Create all actions needed for a given clone resource * * \param[in,out] rsc Clone resource to create actions for */ void pcmk__clone_create_actions(pcmk_resource_t *rsc) { pcmk__assert(pcmk__is_clone(rsc)); pcmk__rsc_trace(rsc, "Creating actions for clone %s", rsc->id); pcmk__create_instance_actions(rsc, rsc->priv->children); if (pcmk_is_set(rsc->flags, pcmk__rsc_promotable)) { pcmk__create_promotable_actions(rsc); } } /*! * \internal * \brief Create implicit constraints needed for a clone resource * * \param[in,out] rsc Clone resource to create implicit constraints for */ void pcmk__clone_internal_constraints(pcmk_resource_t *rsc) { bool ordered = false; pcmk__assert(pcmk__is_clone(rsc)); pcmk__rsc_trace(rsc, "Creating internal constraints for clone %s", rsc->id); // Restart ordering: Stop -> stopped -> start -> started pcmk__order_resource_actions(rsc, PCMK_ACTION_STOPPED, rsc, PCMK_ACTION_START, pcmk__ar_ordered); pcmk__order_resource_actions(rsc, PCMK_ACTION_START, rsc, PCMK_ACTION_RUNNING, pcmk__ar_unrunnable_first_blocks); pcmk__order_resource_actions(rsc, PCMK_ACTION_STOP, rsc, PCMK_ACTION_STOPPED, pcmk__ar_unrunnable_first_blocks); // Demoted -> stop and started -> promote if (pcmk_is_set(rsc->flags, pcmk__rsc_promotable)) { pcmk__order_resource_actions(rsc, PCMK_ACTION_DEMOTED, rsc, PCMK_ACTION_STOP, pcmk__ar_ordered); pcmk__order_resource_actions(rsc, PCMK_ACTION_RUNNING, rsc, PCMK_ACTION_PROMOTE, pcmk__ar_unrunnable_first_blocks); } ordered = pe__clone_is_ordered(rsc); if (ordered) { /* Ordered clone instances must start and stop by instance number. The * instances might have been previously shuffled for assignment or * promotion purposes, so re-sort them. */ rsc->priv->children = g_list_sort(rsc->priv->children, pcmk__cmp_instance_number); } for (GList *iter = rsc->priv->children; iter != NULL; iter = iter->next) { pcmk_resource_t *instance = (pcmk_resource_t *) iter->data; instance->priv->cmds->internal_constraints(instance); // Start clone -> start instance -> clone started pcmk__order_starts(rsc, instance, pcmk__ar_unrunnable_first_blocks |pcmk__ar_then_implies_first_graphed); pcmk__order_resource_actions(instance, PCMK_ACTION_START, rsc, PCMK_ACTION_RUNNING, pcmk__ar_first_implies_then_graphed); // Stop clone -> stop instance -> clone stopped pcmk__order_stops(rsc, instance, pcmk__ar_then_implies_first_graphed); pcmk__order_resource_actions(instance, PCMK_ACTION_STOP, rsc, PCMK_ACTION_STOPPED, pcmk__ar_first_implies_then_graphed); /* Instances of ordered clones must be started and stopped by instance * number. Since only some instances may be starting or stopping, order * each instance relative to every later instance. */ if (ordered) { for (GList *later = iter->next; later != NULL; later = later->next) { pcmk__order_starts(instance, (pcmk_resource_t *) later->data, pcmk__ar_ordered); pcmk__order_stops((pcmk_resource_t *) later->data, instance, pcmk__ar_ordered); } } } if (pcmk_is_set(rsc->flags, pcmk__rsc_promotable)) { pcmk__order_promotable_instances(rsc); } } /*! * \internal * \brief Check whether colocated resources can be interleaved * * \param[in] colocation Colocation constraint with clone as primary * * \return true if colocated resources can be interleaved, otherwise false */ static bool can_interleave(const pcmk__colocation_t *colocation) { const pcmk_resource_t *primary = colocation->primary; const pcmk_resource_t *dependent = colocation->dependent; // Only colocations between clone or bundle resources use interleaving if (dependent->priv->variant <= pcmk__rsc_variant_group) { return false; } // Only the dependent needs to be marked for interleaving if (!crm_is_true(g_hash_table_lookup(dependent->priv->meta, PCMK_META_INTERLEAVE))) { return false; } /* @TODO Do we actually care about multiple primary instances sharing a * dependent instance? */ if (dependent->priv->fns->max_per_node(dependent) != primary->priv->fns->max_per_node(primary)) { pcmk__config_err("Cannot interleave %s and %s because they do not " "support the same number of instances per node", dependent->id, primary->id); return false; } return true; } /*! * \internal * \brief Apply a colocation's score to node scores or resource priority * * Given a colocation constraint, apply its score to the dependent's * allowed node scores (if we are still placing resources) or priority (if * we are choosing promotable clone instance roles). * * \param[in,out] dependent Dependent resource in colocation * \param[in] primary Primary resource in colocation * \param[in] colocation Colocation constraint to apply * \param[in] for_dependent true if called on behalf of dependent * * \return The score added to the dependent's priority */ int pcmk__clone_apply_coloc_score(pcmk_resource_t *dependent, const pcmk_resource_t *primary, const pcmk__colocation_t *colocation, bool for_dependent) { const GList *iter = NULL; int priority_delta = 0; /* This should never be called for the clone itself as a dependent. Instead, * we add its colocation constraints to its instances and call the * apply_coloc_score() method for the instances as dependents. */ pcmk__assert(!for_dependent && (colocation != NULL) && pcmk__is_clone(primary) && pcmk__is_primitive(dependent)); if (pcmk_is_set(primary->flags, pcmk__rsc_unassigned)) { pcmk__rsc_trace(primary, "Delaying processing colocation %s " "because cloned primary %s is still provisional", colocation->id, primary->id); return 0; } pcmk__rsc_trace(primary, "Processing colocation %s (%s with clone %s @%s)", colocation->id, dependent->id, primary->id, pcmk_readable_score(colocation->score)); // Apply role-specific colocations if (pcmk_is_set(primary->flags, pcmk__rsc_promotable) && (colocation->primary_role != pcmk_role_unknown)) { if (pcmk_is_set(dependent->flags, pcmk__rsc_unassigned)) { // We're assigning the dependent to a node pcmk__update_dependent_with_promotable(primary, dependent, colocation); return 0; } if (colocation->dependent_role == pcmk_role_promoted) { // We're choosing a role for the dependent return pcmk__update_promotable_dependent_priority(primary, dependent, colocation); } } // Apply interleaved colocations if (can_interleave(colocation)) { const pcmk_resource_t *primary_instance = NULL; primary_instance = pcmk__find_compatible_instance(dependent, primary, pcmk_role_unknown, false); if (primary_instance != NULL) { pcmk__rsc_debug(primary, "Interleaving %s with %s", dependent->id, primary_instance->id); return dependent->priv->cmds->apply_coloc_score(dependent, primary_instance, colocation, true); } if (colocation->score >= PCMK_SCORE_INFINITY) { crm_notice("%s cannot run because it cannot interleave with " "any instance of %s", dependent->id, primary->id); pcmk__assign_resource(dependent, NULL, true, true); } else { pcmk__rsc_debug(primary, "%s will not colocate with %s " "because no instance can interleave with it", dependent->id, primary->id); } return 0; } // Apply mandatory colocations if (colocation->score >= PCMK_SCORE_INFINITY) { GList *primary_nodes = NULL; // Dependent can run only where primary will have unblocked instances for (iter = primary->priv->children; iter != NULL; iter = iter->next) { const pcmk_resource_t *instance = iter->data; pcmk_node_t *chosen = NULL; chosen = instance->priv->fns->location(instance, NULL, pcmk__rsc_node_assigned); if ((chosen != NULL) && !is_set_recursive(instance, pcmk__rsc_blocked, TRUE)) { pcmk__rsc_trace(primary, "Allowing %s: %s %d", colocation->id, pcmk__node_name(chosen), chosen->assign->score); primary_nodes = g_list_prepend(primary_nodes, chosen); } } pcmk__colocation_intersect_nodes(dependent, primary, colocation, primary_nodes, false); g_list_free(primary_nodes); return 0; } // Apply optional colocations for (iter = primary->priv->children; iter != NULL; iter = iter->next) { const pcmk_resource_t *instance = iter->data; int instance_delta = instance->priv->cmds->apply_coloc_score(dependent, instance, colocation, false); priority_delta = pcmk__add_scores(priority_delta, instance_delta); } return priority_delta; } // Clone implementation of pcmk__assignment_methods_t:with_this_colocations() void pcmk__with_clone_colocations(const pcmk_resource_t *rsc, const pcmk_resource_t *orig_rsc, GList **list) { const pcmk_resource_t *parent = NULL; CRM_CHECK((rsc != NULL) && (orig_rsc != NULL) && (list != NULL), return); parent = rsc->priv->parent; pcmk__add_with_this_list(list, rsc->priv->with_this_colocations, orig_rsc); if (parent != NULL) { parent->priv->cmds->with_this_colocations(parent, orig_rsc, list); } } // Clone implementation of pcmk__assignment_methods_t:this_with_colocations() void pcmk__clone_with_colocations(const pcmk_resource_t *rsc, const pcmk_resource_t *orig_rsc, GList **list) { const pcmk_resource_t *parent = NULL; CRM_CHECK((rsc != NULL) && (orig_rsc != NULL) && (list != NULL), return); parent = rsc->priv->parent; pcmk__add_this_with_list(list, rsc->priv->this_with_colocations, orig_rsc); if (parent != NULL) { parent->priv->cmds->this_with_colocations(parent, orig_rsc, list); } } /*! * \internal * \brief Return action flags for a given clone resource action * * \param[in,out] action Action to get flags for * \param[in] node If not NULL, limit effects to this node * * \return Flags appropriate to \p action on \p node */ uint32_t pcmk__clone_action_flags(pcmk_action_t *action, const pcmk_node_t *node) { pcmk__assert((action != NULL) && pcmk__is_clone(action->rsc)); return pcmk__collective_action_flags(action, action->rsc->priv->children, node); } /*! * \internal * \brief Apply a location constraint to a clone resource's allowed node scores * * \param[in,out] rsc Clone resource to apply constraint to * \param[in,out] location Location constraint to apply */ void pcmk__clone_apply_location(pcmk_resource_t *rsc, pcmk__location_t *location) { CRM_CHECK((location != NULL) && pcmk__is_clone(rsc), return); pcmk__apply_location(rsc, location); for (GList *iter = rsc->priv->children; iter != NULL; iter = iter->next) { pcmk_resource_t *instance = (pcmk_resource_t *) iter->data; instance->priv->cmds->apply_location(instance, location); } } // GFunc wrapper for calling the action_flags() resource method static void call_action_flags(gpointer data, gpointer user_data) { pcmk_resource_t *rsc = user_data; rsc->priv->cmds->action_flags((pcmk_action_t *) data, NULL); } /*! * \internal * \brief Add a clone resource's actions to the transition graph * * \param[in,out] rsc Resource whose actions should be added */ void pcmk__clone_add_actions_to_graph(pcmk_resource_t *rsc) { pcmk__assert(pcmk__is_clone(rsc)); g_list_foreach(rsc->priv->actions, call_action_flags, rsc); pe__create_clone_notifications(rsc); for (GList *iter = rsc->priv->children; iter != NULL; iter = iter->next) { pcmk_resource_t *child_rsc = (pcmk_resource_t *) iter->data; child_rsc->priv->cmds->add_actions_to_graph(child_rsc); } pcmk__add_rsc_actions_to_graph(rsc); pe__free_clone_notification_data(rsc); } /*! * \internal * \brief Check whether a resource or any children have been probed on a node * * \param[in] rsc Resource to check * \param[in] node Node to check * * \return true if \p node is in the probed_nodes table of \p rsc or any of its * children, otherwise false */ static bool rsc_probed_on(const pcmk_resource_t *rsc, const pcmk_node_t *node) { if (rsc->priv->children != NULL) { for (GList *child_iter = rsc->priv->children; child_iter != NULL; child_iter = child_iter->next) { pcmk_resource_t *child = (pcmk_resource_t *) child_iter->data; if (rsc_probed_on(child, node)) { return true; } } return false; } if (rsc->priv->probed_nodes != NULL) { GHashTableIter iter; pcmk_node_t *known_node = NULL; g_hash_table_iter_init(&iter, rsc->priv->probed_nodes); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &known_node)) { if (pcmk__same_node(node, known_node)) { return true; } } } return false; } /*! * \internal * \brief Find clone instance that has been probed on given node * * \param[in] clone Clone resource to check * \param[in] node Node to check * * \return Instance of \p clone that has been probed on \p node if any, * otherwise NULL */ static pcmk_resource_t * find_probed_instance_on(const pcmk_resource_t *clone, const pcmk_node_t *node) { for (GList *iter = clone->priv->children; iter != NULL; iter = iter->next) { pcmk_resource_t *instance = (pcmk_resource_t *) iter->data; if (rsc_probed_on(instance, node)) { return instance; } } return NULL; } /*! * \internal * \brief Probe an anonymous clone on a node * * \param[in,out] clone Anonymous clone to probe * \param[in,out] node Node to probe \p clone on */ static bool probe_anonymous_clone(pcmk_resource_t *clone, pcmk_node_t *node) { // Check whether we already probed an instance on this node pcmk_resource_t *child = find_probed_instance_on(clone, node); // Otherwise, check if we plan to start an instance on this node for (GList *iter = clone->priv->children; (iter != NULL) && (child == NULL); iter = iter->next) { pcmk_resource_t *instance = (pcmk_resource_t *) iter->data; const pcmk_node_t *instance_node = NULL; instance_node = instance->priv->fns->location(instance, NULL, pcmk__rsc_node_assigned); if (pcmk__same_node(instance_node, node)) { child = instance; } } // Otherwise, use the first clone instance if (child == NULL) { child = clone->priv->children->data; } // Anonymous clones only need to probe a single instance return child->priv->cmds->create_probe(child, node); } /*! * \internal * \brief Schedule any probes needed for a resource on a node * * \param[in,out] rsc Resource to create probe for * \param[in,out] node Node to create probe on * * \return true if any probe was created, otherwise false */ bool pcmk__clone_create_probe(pcmk_resource_t *rsc, pcmk_node_t *node) { pcmk__assert((node != NULL) && pcmk__is_clone(rsc)); if (pcmk_is_set(rsc->flags, pcmk__rsc_exclusive_probes)) { /* The clone is configured to be probed only where a location constraint * exists with PCMK_XA_RESOURCE_DISCOVERY set to exclusive. * * This check is not strictly necessary here since the instance's * create_probe() method would also check, but doing it here is more * efficient (especially for unique clones with a large number of * instances), and affects the CRM_meta_notify_available_uname variable * passed with notify actions. */ pcmk_node_t *allowed = g_hash_table_lookup(rsc->priv->allowed_nodes, node->priv->id); if ((allowed == NULL) || (allowed->assign->probe_mode != pcmk__probe_exclusive)) { /* This node is not marked for resource discovery. Remove it from * allowed nodes so that notifications contain only nodes that the * clone can possibly run on. */ pcmk__rsc_trace(rsc, "Skipping probe for %s on %s because resource has " "exclusive discovery but is not allowed on node", rsc->id, pcmk__node_name(node)); g_hash_table_remove(rsc->priv->allowed_nodes, node->priv->id); return false; } } rsc->priv->children = g_list_sort(rsc->priv->children, pcmk__cmp_instance_number); if (pcmk_is_set(rsc->flags, pcmk__rsc_unique)) { return pcmk__probe_resource_list(rsc->priv->children, node); } else { return probe_anonymous_clone(rsc, node); } } /*! * \internal * \brief Add meta-attributes relevant to transition graph actions to XML * * Add clone-specific meta-attributes needed for transition graph actions. * * \param[in] rsc Clone resource whose meta-attributes should be added * \param[in,out] xml Transition graph action attributes XML to add to */ void pcmk__clone_add_graph_meta(const pcmk_resource_t *rsc, xmlNode *xml) { char *name = NULL; pcmk__assert(pcmk__is_clone(rsc) && (xml != NULL)); name = crm_meta_name(PCMK_META_GLOBALLY_UNIQUE); crm_xml_add(xml, name, pcmk__flag_text(rsc->flags, pcmk__rsc_unique)); free(name); name = crm_meta_name(PCMK_META_NOTIFY); crm_xml_add(xml, name, pcmk__flag_text(rsc->flags, pcmk__rsc_notify)); free(name); name = crm_meta_name(PCMK_META_CLONE_MAX); - crm_xml_add_int(xml, name, pe__clone_max(rsc)); + pcmk__xe_set_int(xml, name, pe__clone_max(rsc)); free(name); name = crm_meta_name(PCMK_META_CLONE_NODE_MAX); - crm_xml_add_int(xml, name, pe__clone_node_max(rsc)); + pcmk__xe_set_int(xml, name, pe__clone_node_max(rsc)); free(name); if (pcmk_is_set(rsc->flags, pcmk__rsc_promotable)) { int promoted_max = pe__clone_promoted_max(rsc); int promoted_node_max = pe__clone_promoted_node_max(rsc); name = crm_meta_name(PCMK_META_PROMOTED_MAX); - crm_xml_add_int(xml, name, promoted_max); + pcmk__xe_set_int(xml, name, promoted_max); free(name); name = crm_meta_name(PCMK_META_PROMOTED_NODE_MAX); - crm_xml_add_int(xml, name, promoted_node_max); + pcmk__xe_set_int(xml, name, promoted_node_max); free(name); /* @COMPAT Maintain backward compatibility with resource agents that * expect the old names (deprecated since 2.0.0). */ name = crm_meta_name(PCMK__META_PROMOTED_MAX_LEGACY); - crm_xml_add_int(xml, name, promoted_max); + pcmk__xe_set_int(xml, name, promoted_max); free(name); name = crm_meta_name(PCMK__META_PROMOTED_NODE_MAX_LEGACY); - crm_xml_add_int(xml, name, promoted_node_max); + pcmk__xe_set_int(xml, name, promoted_node_max); free(name); } } // Clone implementation of pcmk__assignment_methods_t:add_utilization() void pcmk__clone_add_utilization(const pcmk_resource_t *rsc, const pcmk_resource_t *orig_rsc, GList *all_rscs, GHashTable *utilization) { bool existing = false; pcmk_resource_t *child = NULL; pcmk__assert(pcmk__is_clone(rsc) && (orig_rsc != NULL) && (utilization != NULL)); if (!pcmk_is_set(rsc->flags, pcmk__rsc_unassigned)) { return; } // Look for any child already existing in the list for (GList *iter = rsc->priv->children; iter != NULL; iter = iter->next) { child = (pcmk_resource_t *) iter->data; if (g_list_find(all_rscs, child)) { existing = true; // Keep checking remaining children } else { // If this is a clone of a group, look for group's members for (GList *member_iter = child->priv->children; member_iter != NULL; member_iter = member_iter->next) { pcmk_resource_t *member = (pcmk_resource_t *) member_iter->data; if (g_list_find(all_rscs, member) != NULL) { // Add *child's* utilization, not group member's child->priv->cmds->add_utilization(child, orig_rsc, all_rscs, utilization); existing = true; break; } } } } if (!existing && (rsc->priv->children != NULL)) { // If nothing was found, still add first child's utilization child = (pcmk_resource_t *) rsc->priv->children->data; child->priv->cmds->add_utilization(child, orig_rsc, all_rscs, utilization); } } // Clone implementation of pcmk__assignment_methods_t:shutdown_lock() void pcmk__clone_shutdown_lock(pcmk_resource_t *rsc) { pcmk__assert(pcmk__is_clone(rsc)); return; // Clones currently don't support shutdown locks }