diff --git a/daemons/attrd/attrd_cib.c b/daemons/attrd/attrd_cib.c index a92bfe223b..d1ce2b20ab 100644 --- a/daemons/attrd/attrd_cib.c +++ b/daemons/attrd/attrd_cib.c @@ -1,713 +1,713 @@ /* * Copyright 2013-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include // cib__* #include #include #include #include #include // pcmk__get_node() #include "pacemaker-attrd.h" static int last_cib_op_done = 0; static void write_attribute(attribute_t *a, bool ignore_delay); static void attrd_cib_destroy_cb(gpointer user_data) { cib_t *cib = user_data; cib->cmds->signoff(cib); if (attrd_shutting_down(false)) { crm_info("Disconnected from the CIB manager"); } else { // @TODO This should trigger a reconnect, not a shutdown - crm_crit("Lost connection to the CIB manager, shutting down"); + pcmk__crit("Lost connection to the CIB manager, shutting down"); attrd_exit_status = CRM_EX_DISCONNECT; attrd_shutdown(0); } } static void attrd_cib_updated_cb(const char *event, xmlNode *msg) { const xmlNode *patchset = NULL; const char *client_name = NULL; bool status_changed = false; if (cib__get_notify_patchset(msg, &patchset) != pcmk_rc_ok) { return; } if (pcmk__cib_element_in_patchset(patchset, PCMK_XE_ALERTS)) { if (attrd_shutting_down(true)) { crm_debug("Ignoring alerts change in CIB during shutdown"); } else { mainloop_set_trigger(attrd_config_read); } } status_changed = pcmk__cib_element_in_patchset(patchset, PCMK_XE_STATUS); client_name = pcmk__xe_get(msg, PCMK__XA_CIB_CLIENTNAME); if (!cib__client_triggers_refresh(client_name)) { /* This change came from a source that ensured the CIB is consistent * with our attributes table, so we don't need to write anything out. */ return; } if (!attrd_election_won()) { // Don't write attributes if we're not the writer return; } if (status_changed || pcmk__cib_element_in_patchset(patchset, PCMK_XE_NODES)) { if (attrd_shutting_down(true)) { crm_debug("Ignoring node change in CIB during shutdown"); return; } /* An unsafe client modified the PCMK_XE_NODES or PCMK_XE_STATUS * section. Write transient attributes to ensure they're up-to-date in * the CIB. */ if (client_name == NULL) { client_name = pcmk__xe_get(msg, PCMK__XA_CIB_CLIENTID); } crm_notice("Updating all attributes after %s event triggered by %s", event, pcmk__s(client_name, "unidentified client")); attrd_write_attributes(attrd_write_all); } } int attrd_cib_connect(int max_retry) { static int attempts = 0; int rc = -ENOTCONN; the_cib = cib_new(); if (the_cib == NULL) { return -ENOTCONN; } do { if (attempts > 0) { sleep(attempts); } attempts++; crm_debug("Connection attempt %d to the CIB manager", attempts); rc = the_cib->cmds->signon(the_cib, crm_system_name, cib_command); } while ((rc != pcmk_ok) && (attempts < max_retry)); if (rc != pcmk_ok) { crm_err("Connection to the CIB manager failed: %s " QB_XS " rc=%d", pcmk_strerror(rc), rc); goto cleanup; } crm_debug("Connected to the CIB manager after %d attempts", attempts); rc = the_cib->cmds->set_connection_dnotify(the_cib, attrd_cib_destroy_cb); if (rc != pcmk_ok) { crm_err("Could not set disconnection callback"); goto cleanup; } rc = the_cib->cmds->add_notify_callback(the_cib, PCMK__VALUE_CIB_DIFF_NOTIFY, attrd_cib_updated_cb); if (rc != pcmk_ok) { crm_err("Could not set CIB notification callback"); goto cleanup; } return pcmk_ok; cleanup: cib__clean_up_connection(&the_cib); return -ENOTCONN; } void attrd_cib_disconnect(void) { CRM_CHECK(the_cib != NULL, return); the_cib->cmds->del_notify_callback(the_cib, PCMK__VALUE_CIB_DIFF_NOTIFY, attrd_cib_updated_cb); cib__clean_up_connection(&the_cib); mainloop_destroy_trigger(attrd_config_read); } static void attrd_erase_cb(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { const char *node = pcmk__s((const char *) user_data, "a node"); if (rc == pcmk_ok) { crm_info("Cleared transient node attributes for %s from CIB", node); } else { crm_err("Unable to clear transient node attributes for %s from CIB: %s", node, pcmk_strerror(rc)); } } #define XPATH_TRANSIENT "//" PCMK__XE_NODE_STATE \ "[@" PCMK_XA_UNAME "='%s']" \ "/" PCMK__XE_TRANSIENT_ATTRIBUTES /*! * \internal * \brief Wipe all transient node attributes for a node from the CIB * * \param[in] node Node to clear attributes for */ void attrd_cib_erase_transient_attrs(const char *node) { int call_id = 0; char *xpath = NULL; CRM_CHECK(node != NULL, return); xpath = pcmk__assert_asprintf(XPATH_TRANSIENT, node); crm_debug("Clearing transient node attributes for %s from CIB using %s", node, xpath); call_id = the_cib->cmds->remove(the_cib, xpath, NULL, cib_xpath); free(xpath); the_cib->cmds->register_callback_full(the_cib, call_id, 120, FALSE, pcmk__str_copy(node), "attrd_erase_cb", attrd_erase_cb, free); } /*! * \internal * \brief Prepare the CIB after cluster is connected */ void attrd_cib_init(void) { /* We have no attribute values in memory, so wipe the CIB to match. This is * normally done by the DC's controller when this node leaves the cluster, but * this handles the case where the node restarted so quickly that the * cluster layer didn't notice. * * \todo If the attribute manager respawns after crashing (see * PCMK_ENV_RESPAWNED), ideally we'd skip this and sync our attributes * from the writer. However, currently we reject any values for us * that the writer has, in attrd_peer_update(). */ attrd_cib_erase_transient_attrs(attrd_cluster->priv->node_name); // Set a trigger for reading the CIB (for the alerts section) attrd_config_read = mainloop_add_trigger(G_PRIORITY_HIGH, attrd_read_options, NULL); // Always read the CIB at start-up mainloop_set_trigger(attrd_config_read); } static gboolean attribute_timer_cb(gpointer data) { attribute_t *a = data; crm_trace("Dampen interval expired for %s", a->id); attrd_write_or_elect_attribute(a); return FALSE; } static void attrd_cib_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { int level = LOG_ERR; GHashTableIter iter; const char *peer = NULL; attribute_value_t *v = NULL; char *name = user_data; attribute_t *a = g_hash_table_lookup(attributes, name); if(a == NULL) { crm_info("Attribute %s no longer exists", name); return; } a->update = 0; if (rc == pcmk_ok && call_id < 0) { rc = call_id; } switch (rc) { case pcmk_ok: level = LOG_INFO; last_cib_op_done = call_id; if (a->timer && !a->timeout_ms) { // Remove temporary dampening for failed writes mainloop_timer_del(a->timer); a->timer = NULL; } break; case -pcmk_err_diff_failed: /* When an attr changes while the CIB is syncing */ case -ETIME: /* When an attr changes while there is a DC election */ case -ENXIO: /* When an attr changes while the CIB is syncing a * newer config from a node that just came up */ level = LOG_WARNING; break; } do_crm_log(level, "CIB update %d result for %s: %s " QB_XS " rc=%d", call_id, a->id, pcmk_strerror(rc), rc); g_hash_table_iter_init(&iter, a->values); while (g_hash_table_iter_next(&iter, (gpointer *) & peer, (gpointer *) & v)) { if (rc == pcmk_ok) { crm_info("* Wrote %s[%s]=%s", a->id, peer, pcmk__s(v->requested, "(unset)")); pcmk__str_update(&(v->requested), NULL); } else { do_crm_log(level, "* Could not write %s[%s]=%s", a->id, peer, pcmk__s(v->requested, "(unset)")); /* Reattempt write below if we are still the writer */ attrd_set_attr_flags(a, attrd_attr_changed); } } if (pcmk__is_set(a->flags, attrd_attr_changed) && attrd_election_won()) { if (rc == pcmk_ok) { /* We deferred a write of a new update because this update was in * progress. Write out the new value without additional delay. */ crm_debug("Pending update for %s can be written now", a->id); write_attribute(a, false); /* We're re-attempting a write because the original failed; delay * the next attempt so we don't potentially flood the CIB manager * and logs with a zillion attempts per second. * * @TODO We could elect a new writer instead. However, we'd have to * somehow downgrade our vote, and we'd still need something like this * if all peers similarly fail to write this attribute (which may * indicate a corrupted attribute entry rather than a CIB issue). */ } else if (a->timer) { // Attribute has a dampening value, so use that as delay if (!mainloop_timer_running(a->timer)) { crm_trace("Delayed re-attempted write for %s by %s", name, pcmk__readable_interval(a->timeout_ms)); mainloop_timer_start(a->timer); } } else { /* Set a temporary dampening of 2 seconds (timer will continue * to exist until the attribute's dampening gets set or the * write succeeds). */ a->timer = attrd_add_timer(a->id, 2000, a); mainloop_timer_start(a->timer); } } } /*! * \internal * \brief Add a set-attribute update request to the current CIB transaction * * \param[in] attr Attribute to update * \param[in] attr_id ID of attribute to update * \param[in] node_id ID of node for which to update attribute value * \param[in] set_id ID of attribute set * \param[in] value New value for attribute * * \return Standard Pacemaker return code */ static int add_set_attr_update(const attribute_t *attr, const char *attr_id, const char *node_id, const char *set_id, const char *value) { xmlNode *update = pcmk__xe_create(NULL, PCMK__XE_NODE_STATE); xmlNode *child = update; int rc = ENOMEM; pcmk__xe_set(child, PCMK_XA_ID, node_id); child = pcmk__xe_create(child, PCMK__XE_TRANSIENT_ATTRIBUTES); pcmk__xe_set(child, PCMK_XA_ID, node_id); child = pcmk__xe_create(child, attr->set_type); pcmk__xe_set(child, PCMK_XA_ID, set_id); child = pcmk__xe_create(child, PCMK_XE_NVPAIR); pcmk__xe_set(child, PCMK_XA_ID, attr_id); pcmk__xe_set(child, PCMK_XA_NAME, attr->id); pcmk__xe_set(child, PCMK_XA_VALUE, value); rc = the_cib->cmds->modify(the_cib, PCMK_XE_STATUS, update, cib_can_create|cib_transaction); rc = pcmk_legacy2rc(rc); pcmk__xml_free(update); return rc; } /*! * \internal * \brief Add an unset-attribute update request to the current CIB transaction * * \param[in] attr Attribute to update * \param[in] attr_id ID of attribute to update * \param[in] node_id ID of node for which to update attribute value * \param[in] set_id ID of attribute set * * \return Standard Pacemaker return code */ static int add_unset_attr_update(const attribute_t *attr, const char *attr_id, const char *node_id, const char *set_id) { char *xpath = pcmk__assert_asprintf("/" PCMK_XE_CIB "/" PCMK_XE_STATUS "/" PCMK__XE_NODE_STATE "[@" PCMK_XA_ID "='%s']" "/" PCMK__XE_TRANSIENT_ATTRIBUTES "[@" PCMK_XA_ID "='%s']" "/%s[@" PCMK_XA_ID "='%s']" "/" PCMK_XE_NVPAIR "[@" PCMK_XA_ID "='%s' " "and @" PCMK_XA_NAME "='%s']", node_id, node_id, attr->set_type, set_id, attr_id, attr->id); int rc = the_cib->cmds->remove(the_cib, xpath, NULL, cib_xpath|cib_transaction); free(xpath); return pcmk_legacy2rc(rc); } /*! * \internal * \brief Add an attribute update request to the current CIB transaction * * \param[in] attr Attribute to update * \param[in] value New value for attribute * \param[in] node_id ID of node for which to update attribute value * * \return Standard Pacemaker return code */ static int add_attr_update(const attribute_t *attr, const char *value, const char *node_id) { char *set_id = attrd_set_id(attr, node_id); char *nvpair_id = attrd_nvpair_id(attr, node_id); int rc = pcmk_rc_ok; if (value == NULL) { rc = add_unset_attr_update(attr, nvpair_id, node_id, set_id); } else { rc = add_set_attr_update(attr, nvpair_id, node_id, set_id, value); } free(set_id); free(nvpair_id); return rc; } static void send_alert_attributes_value(attribute_t *a, GHashTable *t) { int rc = 0; attribute_value_t *at = NULL; GHashTableIter vIter; g_hash_table_iter_init(&vIter, t); while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & at)) { const char *node_xml_id = attrd_get_node_xml_id(at->nodename); rc = attrd_send_attribute_alert(at->nodename, node_xml_id, a->id, at->current); crm_trace("Sent alerts for %s[%s]=%s with node XML ID %s " "(%s agents failed)", a->id, at->nodename, at->current, pcmk__s(node_xml_id, "unknown"), ((rc == 0)? "no" : ((rc == -1)? "some" : "all"))); } } static void set_alert_attribute_value(GHashTable *t, attribute_value_t *v) { attribute_value_t *a_v = pcmk__assert_alloc(1, sizeof(attribute_value_t)); a_v->nodename = pcmk__str_copy(v->nodename); a_v->current = pcmk__str_copy(v->current); g_hash_table_replace(t, a_v->nodename, a_v); } mainloop_timer_t * attrd_add_timer(const char *id, int timeout_ms, attribute_t *attr) { return mainloop_timer_add(id, timeout_ms, FALSE, attribute_timer_cb, attr); } /*! * \internal * \brief Write an attribute's values to the CIB if appropriate * * \param[in,out] a Attribute to write * \param[in] ignore_delay If true, write attribute now regardless of any * configured delay */ static void write_attribute(attribute_t *a, bool ignore_delay) { int private_updates = 0, cib_updates = 0; attribute_value_t *v = NULL; GHashTableIter iter; GHashTable *alert_attribute_value = NULL; int rc = pcmk_ok; bool should_write = true; if (a == NULL) { return; } // Private attributes (or any in standalone mode) are not written to the CIB if (stand_alone || pcmk__is_set(a->flags, attrd_attr_is_private)) { should_write = false; } /* If this attribute will be written to the CIB ... */ if (should_write) { /* Defer the write if now's not a good time */ if (a->update && (a->update < last_cib_op_done)) { crm_info("Write out of '%s' continuing: update %d considered lost", a->id, a->update); a->update = 0; // Don't log this message again } else if (a->update) { crm_info("Write out of '%s' delayed: update %d in progress", a->id, a->update); goto done; } else if (mainloop_timer_running(a->timer)) { if (ignore_delay) { mainloop_timer_stop(a->timer); crm_debug("Overriding '%s' write delay", a->id); } else { crm_info("Delaying write of '%s'", a->id); goto done; } } // Initiate a transaction for all the peer value updates CRM_CHECK(the_cib != NULL, goto done); the_cib->cmds->set_user(the_cib, a->user); rc = the_cib->cmds->init_transaction(the_cib); if (rc != pcmk_ok) { crm_err("Failed to write %s (set %s): Could not initiate " "CIB transaction", a->id, pcmk__s(a->set_id, "unspecified")); goto done; } } /* The changed and force-write flags apply only to the next write, * which this is, so clear them now. Also clear the "node unknown" flag * because we will check whether it is known below and reset if appopriate. */ attrd_clear_attr_flags(a, attrd_attr_changed |attrd_attr_force_write |attrd_attr_node_unknown); /* Make the table for the attribute trap */ alert_attribute_value = pcmk__strikey_table(NULL, attrd_free_attribute_value); /* Iterate over each peer value of this attribute */ g_hash_table_iter_init(&iter, a->values); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &v)) { const char *node_xml_id = NULL; const char *prev_xml_id = NULL; if (!should_write) { private_updates++; continue; } /* We need the node's CIB XML ID to write out its attributes, so look * for it now. Check the node caches first, even if the ID was * previously known (in case it changed), but use any previous value as * a fallback. */ prev_xml_id = attrd_get_node_xml_id(v->nodename); if (pcmk__is_set(v->flags, attrd_value_remote)) { // A Pacemaker Remote node's XML ID is the same as its name node_xml_id = v->nodename; } else { // This creates a cluster node cache entry if none exists pcmk__node_status_t *peer = pcmk__get_node(0, v->nodename, prev_xml_id, pcmk__node_search_any); node_xml_id = pcmk__cluster_get_xml_id(peer); if (node_xml_id == NULL) { node_xml_id = prev_xml_id; } } // Defer write if this is a cluster node that's never been seen if (node_xml_id == NULL) { attrd_set_attr_flags(a, attrd_attr_node_unknown); crm_notice("Cannot write %s[%s]='%s' to CIB because node's XML ID " "is unknown (will retry if learned)", a->id, v->nodename, v->current); continue; } if (!pcmk__str_eq(prev_xml_id, node_xml_id, pcmk__str_none)) { crm_trace("Setting %s[%s] node XML ID to %s (was %s)", a->id, v->nodename, node_xml_id, pcmk__s(prev_xml_id, "unknown")); attrd_set_node_xml_id(v->nodename, node_xml_id); } // Update this value as part of the CIB transaction we're building rc = add_attr_update(a, v->current, node_xml_id); if (rc != pcmk_rc_ok) { crm_err("Couldn't add %s[%s]='%s' to CIB transaction: %s " QB_XS " node XML ID %s", a->id, v->nodename, v->current, pcmk_rc_str(rc), node_xml_id); continue; } crm_debug("Added %s[%s]=%s to CIB transaction (node XML ID %s)", a->id, v->nodename, pcmk__s(v->current, "(unset)"), node_xml_id); cib_updates++; /* Preservation of the attribute to transmit alert */ set_alert_attribute_value(alert_attribute_value, v); // Save this value so we can log it when write completes pcmk__str_update(&(v->requested), v->current); } if (private_updates) { crm_info("Processed %d private change%s for %s (set %s)", private_updates, pcmk__plural_s(private_updates), a->id, pcmk__s(a->set_id, "unspecified")); } if (cib_updates > 0) { char *id = pcmk__str_copy(a->id); // Commit transaction a->update = the_cib->cmds->end_transaction(the_cib, true, cib_none); crm_info("Sent CIB request %d with %d change%s for %s (set %s)", a->update, cib_updates, pcmk__plural_s(cib_updates), a->id, pcmk__s(a->set_id, "unspecified")); if (the_cib->cmds->register_callback_full(the_cib, a->update, CIB_OP_TIMEOUT_S, FALSE, id, "attrd_cib_callback", attrd_cib_callback, free)) { // Transmit alert of the attribute send_alert_attributes_value(a, alert_attribute_value); } } done: // Discard transaction (if any) if (the_cib != NULL) { the_cib->cmds->end_transaction(the_cib, false, cib_none); the_cib->cmds->set_user(the_cib, NULL); } if (alert_attribute_value != NULL) { g_hash_table_destroy(alert_attribute_value); } } /*! * \internal * \brief Write out attributes * * \param[in] options Group of enum attrd_write_options */ void attrd_write_attributes(uint32_t options) { GHashTableIter iter; attribute_t *a = NULL; crm_debug("Writing out %s attributes", pcmk__is_set(options, attrd_write_all)? "all" : "changed"); g_hash_table_iter_init(&iter, attributes); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & a)) { if (!pcmk__is_set(options, attrd_write_all) && pcmk__is_set(a->flags, attrd_attr_node_unknown)) { // Try writing this attribute again, in case peer ID was learned attrd_set_attr_flags(a, attrd_attr_changed); } else if (pcmk__is_set(a->flags, attrd_attr_force_write)) { /* If the force_write flag is set, write the attribute. */ attrd_set_attr_flags(a, attrd_attr_changed); } if (pcmk__is_set(options, attrd_write_all) || pcmk__is_set(a->flags, attrd_attr_changed)) { bool ignore_delay = pcmk__is_set(options, attrd_write_no_delay); if (pcmk__is_set(a->flags, attrd_attr_force_write)) { // Always ignore delay when forced write flag is set ignore_delay = true; } write_attribute(a, ignore_delay); } else { crm_trace("Skipping unchanged attribute %s", a->id); } } } void attrd_write_or_elect_attribute(attribute_t *a) { if (attrd_election_won()) { write_attribute(a, false); } else { attrd_start_election_if_needed(); } } diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c index b9ae186f74..27c9a58e53 100644 --- a/daemons/attrd/attrd_corosync.c +++ b/daemons/attrd/attrd_corosync.c @@ -1,616 +1,616 @@ /* * Copyright 2013-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include "pacemaker-attrd.h" static xmlNode * attrd_confirmation(int callid) { xmlNode *node = pcmk__xe_create(NULL, __func__); pcmk__xe_set(node, PCMK__XA_T, PCMK__VALUE_ATTRD); pcmk__xe_set(node, PCMK__XA_SRC, pcmk__cluster_local_node_name()); pcmk__xe_set(node, PCMK_XA_TASK, PCMK__ATTRD_CMD_CONFIRM); pcmk__xe_set_int(node, PCMK__XA_CALL_ID, callid); return node; } static void attrd_peer_message(pcmk__node_status_t *peer, xmlNode *xml) { const char *election_op = pcmk__xe_get(xml, PCMK__XA_CRM_TASK); if (election_op) { attrd_handle_election_op(peer, xml); return; } if (attrd_shutting_down(false)) { /* If we're shutting down, we want to continue responding to election * ops as long as we're a cluster member (because our vote may be * needed). Ignore all other messages. */ return; } else { pcmk__request_t request = { .ipc_client = NULL, .ipc_id = 0, .ipc_flags = 0, .peer = peer->name, .xml = xml, .call_options = 0, .result = PCMK__UNKNOWN_RESULT, }; request.op = pcmk__xe_get_copy(request.xml, PCMK_XA_TASK); CRM_CHECK(request.op != NULL, return); attrd_handle_request(&request); /* Having finished handling the request, check to see if the originating * peer requested confirmation. If so, send that confirmation back now. */ if (pcmk__xe_attr_is_true(xml, PCMK__XA_CONFIRM) && !pcmk__str_eq(request.op, PCMK__ATTRD_CMD_CONFIRM, pcmk__str_none)) { int callid = 0; xmlNode *reply = NULL; /* Add the confirmation ID for the message we are confirming to the * response so the originating peer knows what they're a confirmation * for. */ pcmk__xe_get_int(xml, PCMK__XA_CALL_ID, &callid); reply = attrd_confirmation(callid); /* And then send the confirmation back to the originating peer. This * ends up right back in this same function (attrd_peer_message) on the * peer where it will have to do something with a PCMK__XA_CONFIRM type * message. */ crm_debug("Sending %s a confirmation", peer->name); attrd_send_message(peer, reply, false); pcmk__xml_free(reply); } pcmk__reset_request(&request); } } static void attrd_cpg_dispatch(cpg_handle_t handle, const struct cpg_name *groupName, uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) { xmlNode *xml = NULL; const char *from = NULL; char *data = pcmk__cpg_message_data(handle, nodeid, pid, msg, &from); if(data == NULL) { return; } xml = pcmk__xml_parse(data); if (xml == NULL) { crm_err("Bad message received from %s[%u]: '%.120s'", from, nodeid, data); } else { attrd_peer_message(pcmk__get_node(nodeid, from, NULL, pcmk__node_search_cluster_member), xml); } pcmk__xml_free(xml); free(data); } static void attrd_cpg_destroy(gpointer unused) { if (attrd_shutting_down(false)) { crm_info("Disconnected from Corosync process group"); } else { - crm_crit("Lost connection to Corosync process group, shutting down"); + pcmk__crit("Lost connection to Corosync process group, shutting down"); attrd_exit_status = CRM_EX_DISCONNECT; attrd_shutdown(0); } } /*! * \internal * \brief Broadcast an update for a single attribute value * * \param[in] a Attribute to broadcast * \param[in] v Attribute value to broadcast */ void attrd_broadcast_value(const attribute_t *a, const attribute_value_t *v) { xmlNode *op = pcmk__xe_create(NULL, PCMK_XE_OP); pcmk__xe_set(op, PCMK_XA_TASK, PCMK__ATTRD_CMD_UPDATE); attrd_add_value_xml(op, a, v, false); attrd_send_message(NULL, op, false); pcmk__xml_free(op); } #define state_text(state) pcmk__s((state), "in unknown state") static void attrd_peer_change_cb(enum pcmk__node_update kind, pcmk__node_status_t *peer, const void *data) { bool gone = false; bool is_remote = pcmk__is_set(peer->flags, pcmk__node_status_remote); switch (kind) { case pcmk__node_update_name: crm_debug("%s node %s is now %s", (is_remote? "Remote" : "Cluster"), peer->name, state_text(peer->state)); break; case pcmk__node_update_processes: if (!pcmk__is_set(peer->processes, crm_get_cluster_proc())) { gone = true; } crm_debug("Node %s is %s a peer", peer->name, (gone? "no longer" : "now")); break; case pcmk__node_update_state: crm_debug("%s node %s is now %s (was %s)", (is_remote? "Remote" : "Cluster"), peer->name, state_text(peer->state), state_text(data)); if (pcmk__str_eq(peer->state, PCMK_VALUE_MEMBER, pcmk__str_none)) { /* If we're the writer, send new peers a list of all attributes * (unless it's a remote node, which doesn't run its own attrd) */ if (attrd_election_won() && !pcmk__is_set(peer->flags, pcmk__node_status_remote)) { attrd_peer_sync(peer); } } else { // Remove all attribute values associated with lost nodes attrd_peer_remove(peer->name, false, "loss"); gone = true; } break; } // Remove votes from cluster nodes that leave, in case election in progress if (gone && !is_remote) { attrd_remove_voter(peer); attrd_remove_peer_protocol_ver(peer->name); attrd_do_not_expect_from_peer(peer->name); } } #define readable_value(rv_v) pcmk__s((rv_v)->current, "(unset)") #define readable_peer(p) \ (((p) == NULL)? "all peers" : pcmk__s((p)->name, "unknown peer")) static void update_attr_on_host(attribute_t *a, const pcmk__node_status_t *peer, const xmlNode *xml, const char *attr, const char *value, const char *host, bool filter) { int is_remote = 0; bool changed = false; attribute_value_t *v = NULL; const char *prev_xml_id = NULL; const char *node_xml_id = pcmk__xe_get(xml, PCMK__XA_ATTR_HOST_ID); // Create entry for value if not already existing v = g_hash_table_lookup(a->values, host); if (v == NULL) { v = pcmk__assert_alloc(1, sizeof(attribute_value_t)); v->nodename = pcmk__str_copy(host); g_hash_table_replace(a->values, v->nodename, v); } /* If update doesn't contain the node XML ID, fall back to any previously * known value (for logging) */ prev_xml_id = attrd_get_node_xml_id(v->nodename); if (node_xml_id == NULL) { node_xml_id = prev_xml_id; } // If value is for a Pacemaker Remote node, remember that pcmk__xe_get_int(xml, PCMK__XA_ATTR_IS_REMOTE, &is_remote); if (is_remote) { attrd_set_value_flags(v, attrd_value_remote); pcmk__assert(pcmk__cluster_lookup_remote_node(host) != NULL); } // Check whether the value changed changed = !pcmk__str_eq(v->current, value, pcmk__str_casei); if (changed && filter && pcmk__str_eq(host, attrd_cluster->priv->node_name, pcmk__str_casei)) { /* Broadcast the local value for an attribute that differs from the * value provided in a peer's attribute synchronization response. This * ensures a node's values for itself take precedence and all peers are * kept in sync. */ v = g_hash_table_lookup(a->values, attrd_cluster->priv->node_name); crm_notice("%s[%s]: local value '%s' takes priority over '%s' from %s", attr, host, readable_value(v), value, peer->name); attrd_broadcast_value(a, v); } else if (changed) { crm_notice("Setting %s[%s]%s%s: %s -> %s " QB_XS " from %s with %s write delay and node XML ID %s", attr, host, a->set_type ? " in " : "", pcmk__s(a->set_type, ""), readable_value(v), pcmk__s(value, "(unset)"), peer->name, (a->timeout_ms == 0)? "no" : pcmk__readable_interval(a->timeout_ms), pcmk__s(node_xml_id, "unknown")); pcmk__str_update(&v->current, value); attrd_set_attr_flags(a, attrd_attr_changed); if (pcmk__str_eq(host, attrd_cluster->priv->node_name, pcmk__str_casei) && pcmk__str_eq(attr, PCMK__NODE_ATTR_SHUTDOWN, pcmk__str_none)) { if (!pcmk__str_eq(value, "0", pcmk__str_null_matches)) { attrd_set_requesting_shutdown(); } else { attrd_clear_requesting_shutdown(); } } // Write out new value or start dampening timer if (a->timeout_ms && a->timer) { crm_trace("Delaying write of %s %s for dampening", attr, pcmk__readable_interval(a->timeout_ms)); mainloop_timer_start(a->timer); } else { attrd_write_or_elect_attribute(a); } } else { int is_force_write = 0; pcmk__xe_get_int(xml, PCMK__XA_ATTRD_IS_FORCE_WRITE, &is_force_write); if (is_force_write == 1 && a->timeout_ms && a->timer) { /* Save forced writing and set change flag. */ /* The actual attribute is written by Writer after election. */ crm_trace("%s[%s] from %s is unchanged (%s), forcing write", attr, host, peer->name, pcmk__s(value, "unset")); attrd_set_attr_flags(a, attrd_attr_force_write); } else { crm_trace("%s[%s] from %s is unchanged (%s)", attr, host, peer->name, pcmk__s(value, "unset")); } } // This allows us to later detect local values that peer doesn't know about attrd_set_value_flags(v, attrd_value_from_peer); // Remember node's XML ID if we're just learning it if ((node_xml_id != NULL) && !pcmk__str_eq(node_xml_id, prev_xml_id, pcmk__str_none)) { crm_trace("Learned %s[%s] node XML ID is %s (was %s)", a->id, v->nodename, node_xml_id, pcmk__s(prev_xml_id, "unknown")); attrd_set_node_xml_id(v->nodename, node_xml_id); if (attrd_election_won()) { // In case we couldn't write a value missing the XML ID before attrd_write_attributes(attrd_write_changed); } } } static void attrd_peer_update_one(const pcmk__node_status_t *peer, xmlNode *xml, bool filter) { attribute_t *a = NULL; const char *attr = pcmk__xe_get(xml, PCMK__XA_ATTR_NAME); const char *value = pcmk__xe_get(xml, PCMK__XA_ATTR_VALUE); const char *host = pcmk__xe_get(xml, PCMK__XA_ATTR_HOST); if (attr == NULL) { crm_warn("Could not update attribute: peer did not specify name"); return; } a = attrd_populate_attribute(xml, attr); if (a == NULL) { return; } if (host == NULL) { // If no host was specified, update all hosts GHashTableIter vIter; crm_debug("Setting %s for all hosts to %s", attr, value); pcmk__xe_remove_attr(xml, PCMK__XA_ATTR_HOST_ID); g_hash_table_iter_init(&vIter, a->values); while (g_hash_table_iter_next(&vIter, (gpointer *) & host, NULL)) { update_attr_on_host(a, peer, xml, attr, value, host, filter); } } else { // Update attribute value for the given host update_attr_on_host(a, peer, xml, attr, value, host, filter); } /* If this is a message from some attrd instance broadcasting its protocol * version, check to see if it's a new minimum version. */ if (pcmk__str_eq(attr, CRM_ATTR_PROTOCOL, pcmk__str_none)) { attrd_update_minimum_protocol_ver(peer->name, value); } } static void broadcast_unseen_local_values(void) { GHashTableIter aIter; GHashTableIter vIter; attribute_t *a = NULL; attribute_value_t *v = NULL; xmlNode *sync = NULL; g_hash_table_iter_init(&aIter, attributes); while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) { g_hash_table_iter_init(&vIter, a->values); while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) { if (!pcmk__is_set(v->flags, attrd_value_from_peer) && pcmk__str_eq(v->nodename, attrd_cluster->priv->node_name, pcmk__str_casei)) { crm_trace("* %s[%s]='%s' is local-only", a->id, v->nodename, readable_value(v)); if (sync == NULL) { sync = pcmk__xe_create(NULL, __func__); pcmk__xe_set(sync, PCMK_XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE); } attrd_add_value_xml(sync, a, v, a->timeout_ms && a->timer); } } } if (sync != NULL) { crm_debug("Broadcasting local-only values"); attrd_send_message(NULL, sync, false); pcmk__xml_free(sync); } } int attrd_cluster_connect(void) { int rc = pcmk_rc_ok; attrd_cluster = pcmk_cluster_new(); pcmk_cluster_set_destroy_fn(attrd_cluster, attrd_cpg_destroy); pcmk_cpg_set_deliver_fn(attrd_cluster, attrd_cpg_dispatch); pcmk_cpg_set_confchg_fn(attrd_cluster, pcmk__cpg_confchg_cb); pcmk__cluster_set_status_callback(&attrd_peer_change_cb); rc = pcmk_cluster_connect(attrd_cluster); rc = pcmk_rc2legacy(rc); if (rc != pcmk_ok) { crm_err("Cluster connection failed"); return rc; } return pcmk_ok; } void attrd_peer_clear_failure(pcmk__request_t *request) { xmlNode *xml = request->xml; const char *rsc = pcmk__xe_get(xml, PCMK__XA_ATTR_RESOURCE); const char *host = pcmk__xe_get(xml, PCMK__XA_ATTR_HOST); const char *op = pcmk__xe_get(xml, PCMK__XA_ATTR_CLEAR_OPERATION); const char *interval_spec = pcmk__xe_get(xml, PCMK__XA_ATTR_CLEAR_INTERVAL); guint interval_ms = 0U; char *attr = NULL; GHashTableIter iter; regex_t regex; pcmk__node_status_t *peer = pcmk__get_node(0, request->peer, NULL, pcmk__node_search_cluster_member); pcmk_parse_interval_spec(interval_spec, &interval_ms); if (attrd_failure_regex(®ex, rsc, op, interval_ms) != pcmk_ok) { crm_info("Ignoring invalid request to clear failures for %s", pcmk__s(rsc, "all resources")); return; } pcmk__xe_set(xml, PCMK_XA_TASK, PCMK__ATTRD_CMD_UPDATE); /* Make sure value is not set, so we delete */ pcmk__xe_remove_attr(xml, PCMK__XA_ATTR_VALUE); g_hash_table_iter_init(&iter, attributes); while (g_hash_table_iter_next(&iter, (gpointer *) &attr, NULL)) { if (regexec(®ex, attr, 0, NULL, 0) == 0) { crm_trace("Matched %s when clearing %s", attr, pcmk__s(rsc, "all resources")); pcmk__xe_set(xml, PCMK__XA_ATTR_NAME, attr); attrd_peer_update(peer, xml, host, false); } } regfree(®ex); } /*! * \internal * \brief Load attributes from a peer sync response * * \param[in] peer Peer that sent sync response * \param[in] peer_won Whether peer is the attribute writer * \param[in,out] xml Request XML */ void attrd_peer_sync_response(const pcmk__node_status_t *peer, bool peer_won, xmlNode *xml) { crm_info("Processing " PCMK__ATTRD_CMD_SYNC_RESPONSE " from %s", peer->name); if (peer_won) { /* Initialize the "seen" flag for all attributes to cleared, so we can * detect attributes that local node has but the writer doesn't. */ attrd_clear_value_seen(); } // Process each attribute update in the sync response for (xmlNode *child = pcmk__xe_first_child(xml, NULL, NULL, NULL); child != NULL; child = pcmk__xe_next(child, NULL)) { attrd_peer_update(peer, child, pcmk__xe_get(child, PCMK__XA_ATTR_HOST), true); } if (peer_won) { /* If any attributes are still not marked as seen, the writer doesn't * know about them, so send all peers an update with them. */ broadcast_unseen_local_values(); } } /*! * \internal * \brief Remove all attributes and optionally peer cache entries for a node * * \param[in] host Name of node to purge * \param[in] uncache If true, remove node from peer caches * \param[in] source Who requested removal (only used for logging) */ void attrd_peer_remove(const char *host, bool uncache, const char *source) { attribute_t *a = NULL; GHashTableIter aIter; CRM_CHECK(host != NULL, return); crm_notice("Removing all %s attributes for node %s " QB_XS " %s reaping node from cache", host, source, (uncache? "and" : "without")); g_hash_table_iter_init(&aIter, attributes); while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) { if(g_hash_table_remove(a->values, host)) { crm_debug("Removed %s[%s] for peer %s", a->id, host, source); } } if (uncache) { pcmk__purge_node_from_cache(host, 0); attrd_forget_node_xml_id(host); } } /*! * \internal * \brief Send all known attributes and values to a peer * * \param[in] peer Peer to send sync to (if NULL, broadcast to all peers) */ void attrd_peer_sync(pcmk__node_status_t *peer) { GHashTableIter aIter; GHashTableIter vIter; attribute_t *a = NULL; attribute_value_t *v = NULL; xmlNode *sync = pcmk__xe_create(NULL, __func__); pcmk__xe_set(sync, PCMK_XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE); g_hash_table_iter_init(&aIter, attributes); while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) { g_hash_table_iter_init(&vIter, a->values); while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) { crm_debug("Syncing %s[%s]='%s' to %s", a->id, v->nodename, readable_value(v), readable_peer(peer)); attrd_add_value_xml(sync, a, v, false); } } crm_debug("Syncing values to %s", readable_peer(peer)); attrd_send_message(peer, sync, false); pcmk__xml_free(sync); } void attrd_peer_update(const pcmk__node_status_t *peer, xmlNode *xml, const char *host, bool filter) { bool handle_sync_point = false; CRM_CHECK((peer != NULL) && (xml != NULL), return); if (xml->children != NULL) { for (xmlNode *child = pcmk__xe_first_child(xml, PCMK_XE_OP, NULL, NULL); child != NULL; child = pcmk__xe_next(child, PCMK_XE_OP)) { pcmk__xe_copy_attrs(child, xml, pcmk__xaf_no_overwrite); attrd_peer_update_one(peer, child, filter); if (attrd_request_has_sync_point(child)) { handle_sync_point = true; } } } else { attrd_peer_update_one(peer, xml, filter); if (attrd_request_has_sync_point(xml)) { handle_sync_point = true; } } /* If the update XML specified that the client wanted to wait for a sync * point, process that now. */ if (handle_sync_point) { crm_trace("Hit local sync point for attribute update"); attrd_ack_waitlist_clients(attrd_sync_point_local, xml); } } diff --git a/daemons/attrd/pacemaker-attrd.c b/daemons/attrd/pacemaker-attrd.c index 3c31bcd932..b4d3583dad 100644 --- a/daemons/attrd/pacemaker-attrd.c +++ b/daemons/attrd/pacemaker-attrd.c @@ -1,225 +1,225 @@ /* - * Copyright 2013-2024 the Pacemaker project contributors + * Copyright 2013-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "pacemaker-attrd.h" #define SUMMARY "daemon for managing Pacemaker node attributes" gboolean stand_alone = FALSE; gchar **log_files = NULL; static GOptionEntry entries[] = { { "stand-alone", 's', G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE, &stand_alone, "(Advanced use only) Run in stand-alone mode", NULL }, { "logfile", 'l', G_OPTION_FLAG_NONE, G_OPTION_ARG_FILENAME_ARRAY, &log_files, "Send logs to the additional named logfile", NULL }, { NULL } }; static pcmk__output_t *out = NULL; static pcmk__supported_format_t formats[] = { PCMK__SUPPORTED_FORMAT_NONE, PCMK__SUPPORTED_FORMAT_TEXT, PCMK__SUPPORTED_FORMAT_XML, { NULL, NULL, NULL } }; lrmd_t *the_lrmd = NULL; pcmk_cluster_t *attrd_cluster = NULL; crm_trigger_t *attrd_config_read = NULL; crm_exit_t attrd_exit_status = CRM_EX_OK; static bool ipc_already_running(void) { pcmk_ipc_api_t *old_instance = NULL; int rc = pcmk_rc_ok; rc = pcmk_new_ipc_api(&old_instance, pcmk_ipc_attrd); if (rc != pcmk_rc_ok) { return false; } rc = pcmk__connect_ipc(old_instance, pcmk_ipc_dispatch_sync, 2); if (rc != pcmk_rc_ok) { crm_debug("No existing %s manager instance found: %s", pcmk_ipc_name(old_instance, true), pcmk_rc_str(rc)); pcmk_free_ipc_api(old_instance); return false; } pcmk_disconnect_ipc(old_instance); pcmk_free_ipc_api(old_instance); return true; } static GOptionContext * build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) { GOptionContext *context = NULL; context = pcmk__build_arg_context(args, "text (default), xml", group, NULL); pcmk__add_main_args(context, entries); return context; } int main(int argc, char **argv) { int rc = pcmk_rc_ok; GError *error = NULL; bool initialized = false; GOptionGroup *output_group = NULL; pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY); gchar **processed_args = pcmk__cmdline_preproc(argv, NULL); GOptionContext *context = build_arg_context(args, &output_group); attrd_init_mainloop(); crm_log_preinit(NULL, argc, argv); mainloop_add_signal(SIGTERM, attrd_shutdown); pcmk__register_formats(output_group, formats); if (!g_option_context_parse_strv(context, &processed_args, &error)) { attrd_exit_status = CRM_EX_USAGE; goto done; } rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv); if ((rc != pcmk_rc_ok) || (out == NULL)) { attrd_exit_status = CRM_EX_ERROR; g_set_error(&error, PCMK__EXITC_ERROR, attrd_exit_status, "Error creating output format %s: %s", args->output_ty, pcmk_rc_str(rc)); goto done; } if (args->version) { out->version(out, false); goto done; } // Open additional log files pcmk__add_logfiles(log_files, out); crm_log_init(PCMK__VALUE_ATTRD, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); crm_notice("Starting Pacemaker node attribute manager%s", stand_alone ? " in standalone mode" : ""); if (ipc_already_running()) { attrd_exit_status = CRM_EX_OK; g_set_error(&error, PCMK__EXITC_ERROR, attrd_exit_status, "Aborting start-up because an attribute manager " "instance is already active"); - crm_crit("%s", error->message); + pcmk__crit("%s", error->message); goto done; } initialized = true; attributes = pcmk__strkey_table(NULL, attrd_free_attribute); /* Connect to the CIB before connecting to the cluster or listening for IPC. * This allows us to assume the CIB is connected whenever we process a * cluster or IPC message (which also avoids start-up race conditions). */ if (!stand_alone) { if (attrd_cib_connect(30) != pcmk_ok) { attrd_exit_status = CRM_EX_FATAL; g_set_error(&error, PCMK__EXITC_ERROR, attrd_exit_status, "Could not connect to the CIB"); goto done; } crm_info("CIB connection active"); } if (attrd_cluster_connect() != pcmk_ok) { attrd_exit_status = CRM_EX_FATAL; g_set_error(&error, PCMK__EXITC_ERROR, attrd_exit_status, "Could not connect to the cluster"); goto done; } crm_info("Cluster connection active"); // Initialization that requires the cluster to be connected attrd_election_init(); if (!stand_alone) { attrd_cib_init(); } /* Set a private attribute for ourselves with the protocol version we * support. This lets all nodes determine the minimum supported version * across all nodes. It also ensures that the writer learns our node name, * so it can send our attributes to the CIB. */ attrd_broadcast_protocol(); attrd_init_ipc(); crm_notice("Pacemaker node attribute manager successfully started and accepting connections"); attrd_run_mainloop(); done: if (initialized) { crm_info("Shutting down attribute manager"); attrd_ipc_fini(); attrd_lrmd_disconnect(); if (!stand_alone) { attrd_cib_disconnect(); } attrd_free_waitlist(); pcmk_cluster_disconnect(attrd_cluster); pcmk_cluster_free(attrd_cluster); g_hash_table_destroy(attributes); } attrd_cleanup_xml_ids(); g_strfreev(processed_args); pcmk__free_arg_context(context); g_strfreev(log_files); pcmk__output_and_clear_error(&error, out); if (out != NULL) { out->finish(out, attrd_exit_status, true, NULL); pcmk__output_free(out); } pcmk__unregister_formats(); crm_exit(attrd_exit_status); } diff --git a/daemons/based/based_io.c b/daemons/based/based_io.c index e00b14c727..0d611a0f70 100644 --- a/daemons/based/based_io.c +++ b/daemons/based/based_io.c @@ -1,423 +1,424 @@ /* * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include crm_trigger_t *cib_writer = NULL; int write_cib_contents(gpointer p); static void cib_rename(const char *old) { int new_fd; char *new = pcmk__assert_asprintf("%s/cib.auto.XXXXXX", cib_root); umask(S_IWGRP | S_IWOTH | S_IROTH); new_fd = mkstemp(new); if ((new_fd < 0) || (rename(old, new) < 0)) { crm_err("Couldn't archive unusable file %s (disabling disk writes and continuing)", old); cib_writes_enabled = FALSE; } else { crm_err("Archived unusable file %s as %s", old, new); } if (new_fd > 0) { close(new_fd); } free(new); } /* * It is the callers responsibility to free the output of this function */ static xmlNode * retrieveCib(const char *filename, const char *sigfile) { xmlNode *root = NULL; int rc = cib_file_read_and_verify(filename, sigfile, &root); if (rc == pcmk_ok) { crm_info("Loaded CIB from %s (with digest %s)", filename, sigfile); } else { crm_warn("Continuing but NOT using CIB from %s (with digest %s): %s", filename, sigfile, pcmk_strerror(rc)); if (rc == -pcmk_err_cib_modified) { // Archive the original files so the contents are not lost cib_rename(filename); cib_rename(sigfile); } } return root; } static int cib_archive_filter(const struct dirent * a) { int rc = 0; // Looking for regular files starting with "cib-" and not ending in .sig struct stat s; char *a_path = pcmk__assert_asprintf("%s/%s", cib_root, a->d_name); if(stat(a_path, &s) != 0) { rc = errno; crm_trace("%s - stat failed: %s (%d)", a->d_name, pcmk_rc_str(rc), rc); rc = 0; } else if (!S_ISREG(s.st_mode)) { crm_trace("%s - wrong type (%#o)", a->d_name, (unsigned int) (s.st_mode & S_IFMT)); } else if(strstr(a->d_name, "cib-") != a->d_name) { crm_trace("%s - wrong prefix", a->d_name); } else if (pcmk__ends_with_ext(a->d_name, ".sig")) { crm_trace("%s - wrong suffix", a->d_name); } else { crm_debug("%s - candidate", a->d_name); rc = 1; } free(a_path); return rc; } static int cib_archive_sort(const struct dirent ** a, const struct dirent **b) { /* Order by creation date - most recently created file first */ int rc = 0; struct stat buf; time_t a_age = 0; time_t b_age = 0; char *a_path = pcmk__assert_asprintf("%s/%s", cib_root, a[0]->d_name); char *b_path = pcmk__assert_asprintf("%s/%s", cib_root, b[0]->d_name); if(stat(a_path, &buf) == 0) { a_age = buf.st_ctime; } if(stat(b_path, &buf) == 0) { b_age = buf.st_ctime; } free(a_path); free(b_path); if(a_age > b_age) { rc = 1; } else if(a_age < b_age) { rc = -1; } crm_trace("%s (%lu) vs. %s (%lu) : %d", a[0]->d_name, (unsigned long)a_age, b[0]->d_name, (unsigned long)b_age, rc); return rc; } xmlNode * readCibXmlFile(const char *dir, const char *file, gboolean discard_status) { struct dirent **namelist = NULL; int lpc = 0; char *sigfile = NULL; char *sigfilepath = NULL; char *filename = NULL; const char *name = NULL; const char *value = NULL; const char *use_valgrind = pcmk__env_option(PCMK__ENV_VALGRIND_ENABLED); xmlNode *root = NULL; xmlNode *status = NULL; sigfile = pcmk__assert_asprintf("%s.sig", file); if (pcmk__daemon_can_write(dir, file) == FALSE || pcmk__daemon_can_write(dir, sigfile) == FALSE) { cib_status = -EACCES; return NULL; } filename = pcmk__assert_asprintf("%s/%s", dir, file); sigfilepath = pcmk__assert_asprintf("%s/%s", dir, sigfile); free(sigfile); cib_status = pcmk_ok; root = retrieveCib(filename, sigfilepath); free(filename); free(sigfilepath); if (root == NULL) { lpc = scandir(cib_root, &namelist, cib_archive_filter, cib_archive_sort); if (lpc < 0) { crm_err("Could not check for CIB backups in %s: %s", cib_root, pcmk_rc_str(errno)); } } while (root == NULL && lpc > 1) { int rc = pcmk_ok; lpc--; filename = pcmk__assert_asprintf("%s/%s", cib_root, namelist[lpc]->d_name); sigfile = pcmk__assert_asprintf("%s.sig", filename); rc = cib_file_read_and_verify(filename, sigfile, &root); if (rc == pcmk_ok) { crm_notice("Loaded CIB from last valid backup %s (with digest %s)", filename, sigfile); } else { crm_warn("Not using next most recent CIB backup from %s " "(with digest %s): %s", filename, sigfile, pcmk_strerror(rc)); } free(namelist[lpc]); free(filename); free(sigfile); } free(namelist); if (root == NULL) { root = createEmptyCib(0); crm_warn("Continuing with an empty configuration"); } if (cib_writes_enabled && (use_valgrind != NULL) && (pcmk__is_true(use_valgrind) || (strstr(use_valgrind, PCMK__SERVER_BASED) != NULL))) { cib_writes_enabled = FALSE; crm_err("*** Disabling disk writes to avoid confusing Valgrind ***"); } status = pcmk__xe_first_child(root, PCMK_XE_STATUS, NULL, NULL); if (discard_status && status != NULL) { // Strip out the PCMK_XE_STATUS section if there is one pcmk__xml_free(status); status = NULL; } if (status == NULL) { pcmk__xe_create(root, PCMK_XE_STATUS); } /* Do this before schema validation happens */ /* fill in some defaults */ value = pcmk__xe_get(root, PCMK_XA_ADMIN_EPOCH); if (value == NULL) { // Not possible with schema validation enabled crm_warn("Defaulting missing " PCMK_XA_ADMIN_EPOCH " to 0, but " "cluster may get confused about which node's configuration " "is most recent"); pcmk__xe_set_int(root, PCMK_XA_ADMIN_EPOCH, 0); } name = PCMK_XA_EPOCH; value = pcmk__xe_get(root, name); if (value == NULL) { pcmk__xe_set_int(root, name, 0); } name = PCMK_XA_NUM_UPDATES; value = pcmk__xe_get(root, name); if (value == NULL) { pcmk__xe_set_int(root, name, 0); } // Unset (DC should set appropriate value) pcmk__xe_remove_attr(root, PCMK_XA_DC_UUID); if (discard_status) { crm_log_xml_trace(root, "[on-disk]"); } if (!pcmk__configured_schema_validates(root)) { cib_status = -pcmk_err_schema_validation; } return root; } gboolean uninitializeCib(void) { xmlNode *tmp_cib = the_cib; if (tmp_cib == NULL) { return FALSE; } the_cib = NULL; pcmk__xml_free(tmp_cib); return TRUE; } /* * This method will free the old CIB pointer on success and the new one * on failure. */ int activateCibXml(xmlNode * new_cib, gboolean to_disk, const char *op) { if (new_cib) { xmlNode *saved_cib = the_cib; pcmk__assert(new_cib != saved_cib); the_cib = new_cib; pcmk__xml_free(saved_cib); if (cib_writes_enabled && cib_status == pcmk_ok && to_disk) { crm_debug("Triggering CIB write for %s op", op); mainloop_set_trigger(cib_writer); } return pcmk_ok; } crm_err("Ignoring invalid CIB"); if (the_cib) { crm_warn("Reverting to last known CIB"); } else { - crm_crit("Could not write out new CIB and no saved version to revert to"); + pcmk__crit("Could not write out new CIB and no saved version to revert " + "to"); } return -ENODATA; } static void cib_diskwrite_complete(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode) { const char *errmsg = "Could not write CIB to disk"; if ((exitcode != 0) && cib_writes_enabled) { cib_writes_enabled = FALSE; errmsg = "Disabling CIB disk writes after failure"; } if ((signo == 0) && (exitcode == 0)) { crm_trace("Disk write [%d] succeeded", (int) pid); } else if (signo == 0) { crm_err("%s: process %d exited %d", errmsg, (int) pid, exitcode); } else { crm_err("%s: process %d terminated with signal %d (%s)%s", errmsg, (int) pid, signo, strsignal(signo), (core? " and dumped core" : "")); } mainloop_trigger_complete(cib_writer); } int write_cib_contents(gpointer p) { int exit_rc = pcmk_ok; xmlNode *cib_local = NULL; /* Make a copy of the CIB to write (possibly in a forked child) */ if (p) { /* Synchronous write out */ cib_local = pcmk__xml_copy(NULL, p); } else { int pid = 0; int bb_state = qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_STATE_GET, 0); /* Turn it off before the fork() to avoid: * - 2 processes writing to the same shared mem * - the child needing to disable it * (which would close it from underneath the parent) * This way, the shared mem files are already closed */ qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_ENABLED, QB_FALSE); pid = fork(); if (pid < 0) { crm_err("Disabling disk writes after fork failure: %s", pcmk_rc_str(errno)); cib_writes_enabled = FALSE; return FALSE; } if (pid) { /* Parent */ mainloop_child_add(pid, 0, "disk-writer", NULL, cib_diskwrite_complete); if (bb_state == QB_LOG_STATE_ENABLED) { /* Re-enable now that it it safe */ qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_ENABLED, QB_TRUE); } return -1; /* -1 means 'still work to do' */ } /* Asynchronous write-out after a fork() */ /* In theory, we can scribble on the_cib here and not affect the parent, * but let's be safe anyway. */ cib_local = pcmk__xml_copy(NULL, the_cib); } /* Write the CIB */ exit_rc = cib_file_write_with_digest(cib_local, cib_root, "cib.xml"); /* A nonzero exit code will cause further writes to be disabled */ pcmk__xml_free(cib_local); if (p == NULL) { crm_exit_t exit_code = CRM_EX_OK; switch (exit_rc) { case pcmk_ok: exit_code = CRM_EX_OK; break; case pcmk_err_cib_modified: exit_code = CRM_EX_DIGEST; // Existing CIB doesn't match digest break; case pcmk_err_cib_backup: // Existing CIB couldn't be backed up case pcmk_err_cib_save: // New CIB couldn't be saved exit_code = CRM_EX_CANTCREAT; break; default: exit_code = CRM_EX_ERROR; break; } /* Use _exit() because exit() could affect the parent adversely */ pcmk_common_cleanup(); _exit(exit_code); } return exit_rc; } diff --git a/daemons/based/pacemaker-based.c b/daemons/based/pacemaker-based.c index c556d758c2..4e7deb4aa4 100644 --- a/daemons/based/pacemaker-based.c +++ b/daemons/based/pacemaker-based.c @@ -1,434 +1,434 @@ /* * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define SUMMARY "daemon for managing the configuration of a Pacemaker cluster" extern int init_remote_listener(int port, gboolean encrypted); gboolean cib_shutdown_flag = FALSE; int cib_status = pcmk_ok; pcmk_cluster_t *crm_cluster = NULL; GMainLoop *mainloop = NULL; gchar *cib_root = NULL; static gboolean preserve_status = FALSE; gboolean cib_writes_enabled = TRUE; gboolean stand_alone = FALSE; int remote_fd = 0; int remote_tls_fd = 0; GHashTable *config_hash = NULL; static void cib_init(void); void cib_shutdown(int nsig); static bool startCib(const char *filename); extern int write_cib_contents(gpointer p); static crm_exit_t exit_code = CRM_EX_OK; static void cib_enable_writes(int nsig) { crm_info("(Re)enabling disk writes"); cib_writes_enabled = TRUE; } /*! * \internal * \brief Set up options, users, and groups for stand-alone mode * * \param[out] error GLib error object * * \return Standard Pacemaker return code */ static int setup_stand_alone(GError **error) { uid_t uid = 0; gid_t gid = 0; int rc = pcmk_rc_ok; preserve_status = TRUE; cib_writes_enabled = FALSE; rc = pcmk__daemon_user(&uid, &gid); if (rc != pcmk_rc_ok) { exit_code = CRM_EX_FATAL; g_set_error(error, PCMK__EXITC_ERROR, exit_code, "Error getting password DB entry for '%s': %s", CRM_DAEMON_USER, pcmk_rc_str(rc)); return rc; } rc = setgid(gid); if (rc < 0) { rc = errno; exit_code = CRM_EX_FATAL; g_set_error(error, PCMK__EXITC_ERROR, exit_code, "Could not set group to %lld: %s", (long long) gid, pcmk_rc_str(rc)); return rc; } rc = initgroups(CRM_DAEMON_USER, gid); if (rc < 0) { rc = errno; exit_code = CRM_EX_FATAL; g_set_error(error, PCMK__EXITC_ERROR, exit_code, "Could not set up groups for user %lld: %s", (long long) uid, pcmk_rc_str(rc)); return rc; } rc = setuid(uid); if (rc < 0) { rc = errno; exit_code = CRM_EX_FATAL; g_set_error(error, PCMK__EXITC_ERROR, exit_code, "Could not set user to %lld: %s", (long long) uid, pcmk_rc_str(rc)); return rc; } return pcmk_rc_ok; } /* @COMPAT Deprecated since 2.1.8. Use pcmk_list_cluster_options() or * crm_attribute --list-options=cluster instead of querying daemon metadata. * * NOTE: pcs (as of at least 0.11.8) uses this */ static int based_metadata(pcmk__output_t *out) { return pcmk__daemon_metadata(out, PCMK__SERVER_BASED, "Cluster Information Base manager options", "Cluster options used by Pacemaker's Cluster " "Information Base manager", pcmk__opt_based); } static GOptionEntry entries[] = { { "stand-alone", 's', G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE, &stand_alone, "(Advanced use only) Run in stand-alone mode", NULL }, { "disk-writes", 'w', G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE, &cib_writes_enabled, "(Advanced use only) Enable disk writes (enabled by default unless in " "stand-alone mode)", NULL }, { "cib-root", 'r', G_OPTION_FLAG_NONE, G_OPTION_ARG_FILENAME, &cib_root, "(Advanced use only) Directory where the CIB XML file should be located " "(default: " CRM_CONFIG_DIR ")", NULL }, { NULL } }; static pcmk__supported_format_t formats[] = { PCMK__SUPPORTED_FORMAT_NONE, PCMK__SUPPORTED_FORMAT_TEXT, PCMK__SUPPORTED_FORMAT_XML, { NULL, NULL, NULL } }; static GOptionContext * build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) { GOptionContext *context = NULL; context = pcmk__build_arg_context(args, "text (default), xml", group, NULL); pcmk__add_main_args(context, entries); return context; } int main(int argc, char **argv) { int rc = pcmk_rc_ok; crm_ipc_t *old_instance = NULL; pcmk__output_t *out = NULL; GError *error = NULL; GOptionGroup *output_group = NULL; pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY); gchar **processed_args = pcmk__cmdline_preproc(argv, "r"); GOptionContext *context = build_arg_context(args, &output_group); crm_log_preinit(NULL, argc, argv); pcmk__register_formats(output_group, formats); if (!g_option_context_parse_strv(context, &processed_args, &error)) { exit_code = CRM_EX_USAGE; goto done; } rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv); if (rc != pcmk_rc_ok) { exit_code = CRM_EX_ERROR; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Error creating output format %s: %s", args->output_ty, pcmk_rc_str(rc)); goto done; } if (args->version) { out->version(out, false); goto done; } mainloop_add_signal(SIGTERM, cib_shutdown); mainloop_add_signal(SIGPIPE, cib_enable_writes); cib_writer = mainloop_add_trigger(G_PRIORITY_LOW, write_cib_contents, NULL); if ((g_strv_length(processed_args) >= 2) && pcmk__str_eq(processed_args[1], "metadata", pcmk__str_none)) { rc = based_metadata(out); if (rc != pcmk_rc_ok) { exit_code = CRM_EX_FATAL; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Unable to display metadata: %s", pcmk_rc_str(rc)); } goto done; } pcmk__cli_init_logging(PCMK__SERVER_BASED, args->verbosity); crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); crm_notice("Starting Pacemaker CIB manager"); old_instance = crm_ipc_new(PCMK__SERVER_BASED_RO, 0); if (old_instance == NULL) { /* crm_ipc_new() will have already logged an error message with * crm_err() */ exit_code = CRM_EX_FATAL; goto done; } if (pcmk__connect_generic_ipc(old_instance) == pcmk_rc_ok) { /* IPC end-point already up */ crm_ipc_close(old_instance); crm_ipc_destroy(old_instance); - crm_crit("Aborting start-up because another CIB manager instance is " - "already active"); + pcmk__crit("Aborting start-up because another CIB manager instance is " + "already active"); goto done; } else { /* not up or not authentic, we'll proceed either way */ crm_ipc_destroy(old_instance); old_instance = NULL; } if (stand_alone) { rc = setup_stand_alone(&error); if (rc != pcmk_rc_ok) { goto done; } } if (cib_root == NULL) { cib_root = g_strdup(CRM_CONFIG_DIR); } else { crm_notice("Using custom config location: %s", cib_root); } if (!pcmk__daemon_can_write(cib_root, NULL)) { exit_code = CRM_EX_FATAL; crm_err("Terminating due to bad permissions on %s", cib_root); g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Bad permissions on %s (see logs for details)", cib_root); goto done; } pcmk__cluster_init_node_caches(); // Read initial CIB, connect to cluster, and start IPC servers cib_init(); // Run the main loop mainloop = g_main_loop_new(NULL, FALSE); crm_notice("Pacemaker CIB manager successfully started and accepting connections"); g_main_loop_run(mainloop); /* If main loop returned, clean up and exit. We disconnect in case * terminate_cib(-1) was called. */ pcmk_cluster_disconnect(crm_cluster); pcmk__stop_based_ipc(ipcs_ro, ipcs_rw, ipcs_shm); done: g_strfreev(processed_args); pcmk__free_arg_context(context); pcmk__cluster_destroy_node_caches(); if (config_hash != NULL) { g_hash_table_destroy(config_hash); } pcmk__client_cleanup(); pcmk_cluster_free(crm_cluster); g_free(cib_root); pcmk__output_and_clear_error(&error, out); if (out != NULL) { out->finish(out, exit_code, true, NULL); pcmk__output_free(out); } pcmk__unregister_formats(); crm_exit(exit_code); } #if SUPPORT_COROSYNC static void cib_cs_dispatch(cpg_handle_t handle, const struct cpg_name *groupName, uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) { xmlNode *xml = NULL; const char *from = NULL; char *data = pcmk__cpg_message_data(handle, nodeid, pid, msg, &from); if(data == NULL) { return; } xml = pcmk__xml_parse(data); if (xml == NULL) { crm_err("Invalid XML: '%.120s'", data); free(data); return; } pcmk__xe_set(xml, PCMK__XA_SRC, from); cib_peer_callback(xml, NULL); pcmk__xml_free(xml); free(data); } static void cib_cs_destroy(gpointer user_data) { if (cib_shutdown_flag) { crm_info("Corosync disconnection complete"); } else { - crm_crit("Exiting immediately after losing connection " - "to cluster layer"); + pcmk__crit("Exiting immediately after losing connection to cluster " + "layer"); terminate_cib(CRM_EX_DISCONNECT); } } #endif static void cib_peer_update_callback(enum pcmk__node_update type, pcmk__node_status_t *node, const void *data) { switch (type) { case pcmk__node_update_name: case pcmk__node_update_state: if (cib_shutdown_flag && (pcmk__cluster_num_active_nodes() < 2) && (pcmk__ipc_client_count() == 0)) { crm_info("Exiting after no more peers or clients remain"); terminate_cib(-1); } break; default: break; } } static void cib_init(void) { crm_cluster = pcmk_cluster_new(); #if SUPPORT_COROSYNC if (pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync) { pcmk_cluster_set_destroy_fn(crm_cluster, cib_cs_destroy); pcmk_cpg_set_deliver_fn(crm_cluster, cib_cs_dispatch); pcmk_cpg_set_confchg_fn(crm_cluster, pcmk__cpg_confchg_cb); } #endif // SUPPORT_COROSYNC config_hash = pcmk__strkey_table(free, free); if (startCib("cib.xml") == FALSE) { - crm_crit("Cannot start CIB... terminating"); + pcmk__crit("Cannot start CIB... terminating"); crm_exit(CRM_EX_NOINPUT); } if (!stand_alone) { pcmk__cluster_set_status_callback(&cib_peer_update_callback); if (pcmk_cluster_connect(crm_cluster) != pcmk_rc_ok) { - crm_crit("Cannot sign in to the cluster... terminating"); + pcmk__crit("Cannot sign in to the cluster... terminating"); crm_exit(CRM_EX_FATAL); } } pcmk__serve_based_ipc(&ipcs_ro, &ipcs_rw, &ipcs_shm, &ipc_ro_callbacks, &ipc_rw_callbacks); if (stand_alone) { based_is_primary = true; } } static bool startCib(const char *filename) { gboolean active = FALSE; xmlNode *cib = readCibXmlFile(cib_root, filename, !preserve_status); if (activateCibXml(cib, TRUE, "start") == 0) { int port = 0; active = TRUE; cib_read_config(config_hash, cib); pcmk__scan_port(pcmk__xe_get(cib, PCMK_XA_REMOTE_TLS_PORT), &port); if (port >= 0) { remote_tls_fd = init_remote_listener(port, TRUE); } pcmk__scan_port(pcmk__xe_get(cib, PCMK_XA_REMOTE_CLEAR_PORT), &port); if (port >= 0) { remote_fd = init_remote_listener(port, FALSE); } } return active; } diff --git a/daemons/controld/controld_cib.c b/daemons/controld/controld_cib.c index cba30eddc8..58c79fd9cf 100644 --- a/daemons/controld/controld_cib.c +++ b/daemons/controld/controld_cib.c @@ -1,1062 +1,1062 @@ /* * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include /* sleep */ #include #include // crm_meta_value() #include #include #include #include // Call ID of the most recent in-progress CIB resource update (or 0 if none) static int pending_rsc_update = 0; /*! * \internal * \brief Respond to a dropped CIB connection * * \param[in] user_data CIB connection that dropped */ static void handle_cib_disconnect(gpointer user_data) { CRM_LOG_ASSERT(user_data == controld_globals.cib_conn); controld_trigger_fsa(); controld_globals.cib_conn->state = cib_disconnected; if (pcmk__is_set(controld_globals.fsa_input_register, R_CIB_CONNECTED)) { // @TODO This should trigger a reconnect, not a shutdown - crm_crit("Lost connection to the CIB manager, shutting down"); + pcmk__crit("Lost connection to the CIB manager, shutting down"); register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL); controld_clear_fsa_input_flags(R_CIB_CONNECTED); } else { // Expected crm_info("Disconnected from the CIB manager"); } } static void do_cib_updated(const char *event, xmlNode * msg) { const xmlNode *patchset = NULL; const char *client_name = NULL; crm_debug("Received CIB diff notification: DC=%s", pcmk__btoa(AM_I_DC)); if (cib__get_notify_patchset(msg, &patchset) != pcmk_rc_ok) { return; } if (pcmk__cib_element_in_patchset(patchset, PCMK_XE_ALERTS) || pcmk__cib_element_in_patchset(patchset, PCMK_XE_CRM_CONFIG)) { controld_trigger_config(); } if (!AM_I_DC) { // We're not in control of the join sequence return; } client_name = pcmk__xe_get(msg, PCMK__XA_CIB_CLIENTNAME); if (!cib__client_triggers_refresh(client_name)) { // The CIB is still accurate return; } if (pcmk__cib_element_in_patchset(patchset, PCMK_XE_NODES) || pcmk__cib_element_in_patchset(patchset, PCMK_XE_STATUS)) { /* An unsafe client modified the PCMK_XE_NODES or PCMK_XE_STATUS * section. Ensure the node list is up-to-date, and start the join * process again so we get everyone's current resource history. */ if (client_name == NULL) { client_name = pcmk__xe_get(msg, PCMK__XA_CIB_CLIENTID); } crm_notice("Populating nodes and starting an election after %s event " "triggered by %s", event, pcmk__s(client_name, "(unidentified client)")); populate_cib_nodes(node_update_quick|node_update_all, __func__); register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL); } } void controld_disconnect_cib_manager(void) { cib_t *cib_conn = controld_globals.cib_conn; pcmk__assert(cib_conn != NULL); crm_debug("Disconnecting from the CIB manager"); controld_clear_fsa_input_flags(R_CIB_CONNECTED); cib_conn->cmds->del_notify_callback(cib_conn, PCMK__VALUE_CIB_DIFF_NOTIFY, do_cib_updated); cib_free_callbacks(cib_conn); if (cib_conn->state != cib_disconnected) { cib_conn->cmds->set_secondary(cib_conn, cib_discard_reply); cib_conn->cmds->signoff(cib_conn); } } /* A_CIB_STOP, A_CIB_START, O_CIB_RESTART */ void do_cib_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { static int cib_retries = 0; cib_t *cib_conn = controld_globals.cib_conn; void (*dnotify_fn) (gpointer user_data) = handle_cib_disconnect; void (*update_cb) (const char *event, xmlNodePtr msg) = do_cib_updated; int rc = pcmk_ok; pcmk__assert(cib_conn != NULL); if (pcmk__is_set(action, A_CIB_STOP)) { if ((cib_conn->state != cib_disconnected) && (pending_rsc_update != 0)) { crm_info("Waiting for resource update %d to complete", pending_rsc_update); crmd_fsa_stall(FALSE); return; } controld_disconnect_cib_manager(); } if (!pcmk__is_set(action, A_CIB_START)) { return; } if (cur_state == S_STOPPING) { crm_err("Ignoring request to connect to the CIB manager after " "shutdown"); return; } rc = cib_conn->cmds->signon(cib_conn, crm_system_name, cib_command_nonblocking); if (rc != pcmk_ok) { // A short wait that usually avoids stalling the FSA sleep(1); rc = cib_conn->cmds->signon(cib_conn, crm_system_name, cib_command_nonblocking); } if (rc != pcmk_ok) { crm_info("Could not connect to the CIB manager: %s", pcmk_strerror(rc)); } else if (cib_conn->cmds->set_connection_dnotify(cib_conn, dnotify_fn) != pcmk_ok) { crm_err("Could not set dnotify callback"); } else if (cib_conn->cmds->add_notify_callback(cib_conn, PCMK__VALUE_CIB_DIFF_NOTIFY, update_cb) != pcmk_ok) { crm_err("Could not set CIB notification callback (update)"); } else { controld_set_fsa_input_flags(R_CIB_CONNECTED); cib_retries = 0; } if (!pcmk__is_set(controld_globals.fsa_input_register, R_CIB_CONNECTED)) { cib_retries++; if (cib_retries < 30) { crm_warn("Couldn't complete CIB registration %d times... " "pause and retry", cib_retries); controld_start_wait_timer(); crmd_fsa_stall(FALSE); } else { crm_err("Could not complete CIB registration %d times... " "hard error", cib_retries); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } } #define MIN_CIB_OP_TIMEOUT (30) /*! * \internal * \brief Get the timeout (in seconds) that should be used with CIB operations * * \return The maximum of 30 seconds, the value of the PCMK_cib_timeout * environment variable, or 10 seconds times one more than the number of * nodes in the cluster. */ unsigned int cib_op_timeout(void) { unsigned int calculated_timeout = 10U * (pcmk__cluster_num_active_nodes() + pcmk__cluster_num_remote_nodes() + 1U); calculated_timeout = QB_MAX(calculated_timeout, MIN_CIB_OP_TIMEOUT); crm_trace("Calculated timeout: %s", pcmk__readable_interval(calculated_timeout * 1000)); if (controld_globals.cib_conn) { controld_globals.cib_conn->call_timeout = calculated_timeout; } return calculated_timeout; } /*! * \internal * \brief Get CIB call options to use local scope if primary is unavailable * * \return CIB call options */ int crmd_cib_smart_opt(void) { int call_opt = cib_none; if ((controld_globals.fsa_state == S_ELECTION) || (controld_globals.fsa_state == S_PENDING)) { crm_info("Sending update to local CIB in state: %s", fsa_state2string(controld_globals.fsa_state)); cib__set_call_options(call_opt, "update", cib_none); } return call_opt; } static void cib_delete_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { char *desc = user_data; if (rc == 0) { crm_debug("Deletion of %s (via CIB call %d) succeeded", desc, call_id); } else { crm_warn("Deletion of %s (via CIB call %d) failed: %s " QB_XS " rc=%d", desc, call_id, pcmk_strerror(rc), rc); } } // Searches for various portions of PCMK__XE_NODE_STATE to delete // Match a particular node's PCMK__XE_NODE_STATE (takes node name 1x) #define XPATH_NODE_STATE "//" PCMK__XE_NODE_STATE "[@" PCMK_XA_UNAME "='%s']" // Node's lrm section (name 1x) #define XPATH_NODE_LRM XPATH_NODE_STATE "/" PCMK__XE_LRM /* Node's PCMK__XE_LRM_RSC_OP entries and PCMK__XE_LRM_RESOURCE entries without * unexpired lock * (name 2x, (seconds_since_epoch - PCMK_OPT_SHUTDOWN_LOCK_LIMIT) 1x) */ #define XPATH_NODE_LRM_UNLOCKED XPATH_NODE_STATE "//" PCMK__XE_LRM_RSC_OP \ "|" XPATH_NODE_STATE \ "//" PCMK__XE_LRM_RESOURCE \ "[not(@" PCMK_OPT_SHUTDOWN_LOCK ") " \ "or " PCMK_OPT_SHUTDOWN_LOCK "<%lld]" // Node's PCMK__XE_TRANSIENT_ATTRIBUTES section (name 1x) #define XPATH_NODE_ATTRS XPATH_NODE_STATE "/" PCMK__XE_TRANSIENT_ATTRIBUTES // Everything under PCMK__XE_NODE_STATE (name 1x) #define XPATH_NODE_ALL XPATH_NODE_STATE "/*" /* Unlocked history + transient attributes * (name 2x, (seconds_since_epoch - PCMK_OPT_SHUTDOWN_LOCK_LIMIT) 1x, name 1x) */ #define XPATH_NODE_ALL_UNLOCKED XPATH_NODE_LRM_UNLOCKED "|" XPATH_NODE_ATTRS /*! * \internal * \brief Get the XPath and description of a node state section to be deleted * * \param[in] uname Desired node * \param[in] section Subsection of \c PCMK__XE_NODE_STATE to be deleted * \param[out] xpath Where to store XPath of \p section * \param[out] desc If not \c NULL, where to store description of \p section */ void controld_node_state_deletion_strings(const char *uname, enum controld_section_e section, char **xpath, char **desc) { const char *desc_pre = NULL; // Shutdown locks that started before this time are expired long long expire = (long long) time(NULL) - controld_globals.shutdown_lock_limit; switch (section) { case controld_section_lrm: *xpath = pcmk__assert_asprintf(XPATH_NODE_LRM, uname); desc_pre = "resource history"; break; case controld_section_lrm_unlocked: *xpath = pcmk__assert_asprintf(XPATH_NODE_LRM_UNLOCKED, uname, uname, expire); desc_pre = "resource history (other than shutdown locks)"; break; case controld_section_attrs: *xpath = pcmk__assert_asprintf(XPATH_NODE_ATTRS, uname); desc_pre = "transient attributes"; break; case controld_section_all: *xpath = pcmk__assert_asprintf(XPATH_NODE_ALL, uname); desc_pre = "all state"; break; case controld_section_all_unlocked: *xpath = pcmk__assert_asprintf(XPATH_NODE_ALL_UNLOCKED, uname, uname, expire, uname); desc_pre = "all state (other than shutdown locks)"; break; default: // We called this function incorrectly pcmk__assert(false); break; } if (desc != NULL) { *desc = pcmk__assert_asprintf("%s for node %s", desc_pre, uname); } } /*! * \internal * \brief Delete subsection of a node's CIB \c PCMK__XE_NODE_STATE * * \param[in] uname Desired node * \param[in] section Subsection of \c PCMK__XE_NODE_STATE to delete * \param[in] options CIB call options to use */ void controld_delete_node_state(const char *uname, enum controld_section_e section, int options) { cib_t *cib = controld_globals.cib_conn; char *xpath = NULL; char *desc = NULL; int cib_rc = pcmk_ok; pcmk__assert((uname != NULL) && (cib != NULL)); controld_node_state_deletion_strings(uname, section, &xpath, &desc); cib__set_call_options(options, "node state deletion", cib_xpath|cib_multiple); cib_rc = cib->cmds->remove(cib, xpath, NULL, options); fsa_register_cib_callback(cib_rc, desc, cib_delete_callback); crm_info("Deleting %s (via CIB call %d) " QB_XS " xpath=%s", desc, cib_rc, xpath); // CIB library handles freeing desc free(xpath); } // Takes node name and resource ID #define XPATH_RESOURCE_HISTORY "//" PCMK__XE_NODE_STATE \ "[@" PCMK_XA_UNAME "='%s']/" \ PCMK__XE_LRM "/" PCMK__XE_LRM_RESOURCES \ "/" PCMK__XE_LRM_RESOURCE \ "[@" PCMK_XA_ID "='%s']" // @TODO could add "and @PCMK_OPT_SHUTDOWN_LOCK" to limit to locks /*! * \internal * \brief Clear resource history from CIB for a given resource and node * * \param[in] rsc_id ID of resource to be cleared * \param[in] node Node whose resource history should be cleared * \param[in] user_name ACL user name to use * \param[in] call_options CIB call options * * \return Standard Pacemaker return code */ int controld_delete_resource_history(const char *rsc_id, const char *node, const char *user_name, int call_options) { char *desc = NULL; char *xpath = NULL; int rc = pcmk_rc_ok; cib_t *cib = controld_globals.cib_conn; CRM_CHECK((rsc_id != NULL) && (node != NULL), return EINVAL); desc = pcmk__assert_asprintf("resource history for %s on %s", rsc_id, node); if (cib == NULL) { crm_err("Unable to clear %s: no CIB connection", desc); free(desc); return ENOTCONN; } // Ask CIB to delete the entry xpath = pcmk__assert_asprintf(XPATH_RESOURCE_HISTORY, node, rsc_id); cib->cmds->set_user(cib, user_name); rc = cib->cmds->remove(cib, xpath, NULL, call_options|cib_xpath); cib->cmds->set_user(cib, NULL); if (rc < 0) { rc = pcmk_legacy2rc(rc); crm_err("Could not delete resource status of %s on %s%s%s: %s " QB_XS " rc=%d", rsc_id, node, (user_name? " for user " : ""), (user_name? user_name : ""), pcmk_rc_str(rc), rc); free(desc); free(xpath); return rc; } if (pcmk__is_set(call_options, cib_sync_call)) { if (pcmk__is_set(call_options, cib_dryrun)) { crm_debug("Deletion of %s would succeed", desc); } else { crm_debug("Deletion of %s succeeded", desc); } free(desc); } else { crm_info("Clearing %s (via CIB call %d) " QB_XS " xpath=%s", desc, rc, xpath); fsa_register_cib_callback(rc, desc, cib_delete_callback); // CIB library handles freeing desc } free(xpath); return pcmk_rc_ok; } /*! * \internal * \brief Build XML and string of parameters meeting some criteria, for digest * * \param[in] op Executor event with parameter table to use * \param[in] metadata Parsed meta-data for executed resource agent * \param[in] param_type Flag used for selection criteria * \param[out] result Will be set to newly created XML with selected * parameters as attributes * * \return Newly allocated space-separated string of parameter names * \note Selection criteria varies by param_type: for the restart digest, we * want parameters that are *not* marked reloadable (OCF 1.1) or that * *are* marked unique (pre-1.1), for both string and XML results; for the * secure digest, we want parameters that *are* marked private for the * string, but parameters that are *not* marked private for the XML. * \note It is the caller's responsibility to free the string return value with * \p g_string_free() and the XML result with \p pcmk__xml_free(). */ static GString * build_parameter_list(const lrmd_event_data_t *op, const struct ra_metadata_s *metadata, enum ra_param_flags_e param_type, xmlNode **result) { GString *list = NULL; *result = pcmk__xe_create(NULL, PCMK_XE_PARAMETERS); /* Consider all parameters only except private ones to be consistent with * what scheduler does with calculate_secure_digest(). */ if ((param_type == ra_param_private) && (pcmk__compare_versions(controld_globals.dc_version, "3.16.0") >= 0)) { g_hash_table_foreach(op->params, hash2field, *result); pcmk__filter_op_for_digest(*result); } for (GList *iter = metadata->ra_params; iter != NULL; iter = iter->next) { struct ra_param_s *param = (struct ra_param_s *) iter->data; bool accept_for_list = false; bool accept_for_xml = false; switch (param_type) { case ra_param_reloadable: accept_for_list = !pcmk__is_set(param->rap_flags, param_type); accept_for_xml = accept_for_list; break; case ra_param_unique: accept_for_list = pcmk__is_set(param->rap_flags, param_type); accept_for_xml = accept_for_list; break; case ra_param_private: accept_for_list = pcmk__is_set(param->rap_flags, param_type); accept_for_xml = !accept_for_list; break; } if (accept_for_list) { crm_trace("Attr %s is %s", param->rap_name, ra_param_flag2text(param_type)); if (list == NULL) { // We will later search for " WORD ", so start list with a space pcmk__add_word(&list, 256, " "); } pcmk__add_word(&list, 0, param->rap_name); } else { crm_trace("Rejecting %s for %s", param->rap_name, ra_param_flag2text(param_type)); } if (accept_for_xml) { const char *v = g_hash_table_lookup(op->params, param->rap_name); if (v != NULL) { crm_trace("Adding attr %s=%s to the xml result", param->rap_name, v); pcmk__xe_set(*result, param->rap_name, v); } } else { crm_trace("Removing attr %s from the xml result", param->rap_name); pcmk__xe_remove_attr(*result, param->rap_name); } } if (list != NULL) { // We will later search for " WORD ", so end list with a space pcmk__add_word(&list, 0, " "); } return list; } static void append_restart_list(lrmd_event_data_t *op, struct ra_metadata_s *metadata, xmlNode *update, const char *version) { GString *list = NULL; char *digest = NULL; xmlNode *restart = NULL; CRM_LOG_ASSERT(op->params != NULL); if (op->interval_ms > 0) { /* monitors are not reloadable */ return; } if (pcmk__is_set(metadata->ra_flags, ra_supports_reload_agent)) { /* Add parameters not marked reloadable to the PCMK__XA_OP_FORCE_RESTART * list */ list = build_parameter_list(op, metadata, ra_param_reloadable, &restart); } else if (pcmk__is_set(metadata->ra_flags, ra_supports_legacy_reload)) { /* @COMPAT pre-OCF-1.1 resource agents * * Before OCF 1.1, Pacemaker abused "unique=0" to indicate * reloadability. Add any parameters with unique="1" to the * PCMK__XA_OP_FORCE_RESTART list. */ list = build_parameter_list(op, metadata, ra_param_unique, &restart); } else { // Resource does not support agent reloads return; } digest = pcmk__digest_operation(restart); /* Add PCMK__XA_OP_FORCE_RESTART and PCMK__XA_OP_RESTART_DIGEST to indicate * the resource supports reload, no matter if it actually supports any * reloadable parameters */ pcmk__xe_set(update, PCMK__XA_OP_FORCE_RESTART, (list == NULL)? "" : (const char *) list->str); pcmk__xe_set(update, PCMK__XA_OP_RESTART_DIGEST, digest); if ((list != NULL) && (list->len > 0)) { crm_trace("%s: %s, %s", op->rsc_id, digest, (const char *) list->str); } else { crm_trace("%s: %s", op->rsc_id, digest); } if (list != NULL) { g_string_free(list, TRUE); } pcmk__xml_free(restart); free(digest); } static void append_secure_list(lrmd_event_data_t *op, struct ra_metadata_s *metadata, xmlNode *update, const char *version) { GString *list = NULL; char *digest = NULL; xmlNode *secure = NULL; CRM_LOG_ASSERT(op->params != NULL); /* To keep PCMK__XA_OP_SECURE_PARAMS short, we want it to contain the secure * parameters but PCMK__XA_OP_SECURE_DIGEST to be based on the insecure ones */ list = build_parameter_list(op, metadata, ra_param_private, &secure); if (list != NULL) { digest = pcmk__digest_operation(secure); pcmk__xe_set(update, PCMK__XA_OP_SECURE_PARAMS, (const char *) list->str); pcmk__xe_set(update, PCMK__XA_OP_SECURE_DIGEST, digest); crm_trace("%s: %s, %s", op->rsc_id, digest, (const char *) list->str); g_string_free(list, TRUE); } else { crm_trace("%s: no secure parameters", op->rsc_id); } pcmk__xml_free(secure); free(digest); } /*! * \internal * \brief Create XML for a resource history entry * * \param[in] func Function name of caller * \param[in,out] parent XML to add entry to * \param[in] rsc Affected resource * \param[in,out] op Action to add an entry for (or NULL to do nothing) * \param[in] node_name Node where action occurred */ void controld_add_resource_history_xml_as(const char *func, xmlNode *parent, const lrmd_rsc_info_t *rsc, lrmd_event_data_t *op, const char *node_name) { int target_rc = 0; xmlNode *xml_op = NULL; struct ra_metadata_s *metadata = NULL; const char *caller_version = NULL; lrm_state_t *lrm_state = NULL; if (op == NULL) { return; } target_rc = rsc_op_expected_rc(op); caller_version = g_hash_table_lookup(op->params, PCMK_XA_CRM_FEATURE_SET); CRM_CHECK(caller_version != NULL, caller_version = CRM_FEATURE_SET); xml_op = pcmk__create_history_xml(parent, op, caller_version, target_rc, controld_globals.cluster->priv->node_name, func); if (xml_op == NULL) { return; } if ((rsc == NULL) || (op->params == NULL) || !crm_op_needs_metadata(rsc->standard, op->op_type)) { crm_trace("No digests needed for %s action on %s (params=%p rsc=%p)", op->op_type, op->rsc_id, op->params, rsc); return; } lrm_state = controld_get_executor_state(node_name, false); if (lrm_state == NULL) { crm_warn("Cannot calculate digests for operation " PCMK__OP_FMT " because we have no connection to executor for %s", op->rsc_id, op->op_type, op->interval_ms, node_name); return; } /* Ideally the metadata is cached, and the agent is just a fallback. * * @TODO Go through all callers and ensure they get metadata asynchronously * first. */ metadata = controld_get_rsc_metadata(lrm_state, rsc, controld_metadata_from_agent |controld_metadata_from_cache); if (metadata == NULL) { return; } crm_trace("Including additional digests for %s:%s:%s", rsc->standard, rsc->provider, rsc->type); append_restart_list(op, metadata, xml_op, caller_version); append_secure_list(op, metadata, xml_op, caller_version); return; } /*! * \internal * \brief Record an action as pending in the CIB, if appropriate * * \param[in] node_name Node where the action is pending * \param[in] rsc Resource that action is for * \param[in,out] op Pending action * * \return true if action was recorded in CIB, otherwise false */ bool controld_record_pending_op(const char *node_name, const lrmd_rsc_info_t *rsc, lrmd_event_data_t *op) { const char *record_pending = NULL; CRM_CHECK((node_name != NULL) && (rsc != NULL) && (op != NULL), return false); // Never record certain operation types as pending if ((op->op_type == NULL) || (op->params == NULL) || !controld_action_is_recordable(op->op_type)) { return false; } // Check action's PCMK_META_RECORD_PENDING meta-attribute (defaults to true) record_pending = crm_meta_value(op->params, PCMK_META_RECORD_PENDING); if ((record_pending != NULL) && !pcmk__is_true(record_pending)) { pcmk__warn_once(pcmk__wo_record_pending, "The " PCMK_META_RECORD_PENDING " option (for example, " "for the %s resource's %s operation) is deprecated and " "will be removed in a future release", rsc->id, op->op_type); return false; } op->call_id = -1; op->t_run = time(NULL); op->t_rcchange = op->t_run; lrmd__set_result(op, PCMK_OCF_UNKNOWN, PCMK_EXEC_PENDING, NULL); crm_debug("Recording pending %s-interval %s for %s on %s in the CIB", pcmk__readable_interval(op->interval_ms), op->op_type, op->rsc_id, node_name); controld_update_resource_history(node_name, rsc, op, 0); return true; } static void cib_rsc_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { switch (rc) { case pcmk_ok: case -pcmk_err_diff_failed: case -pcmk_err_diff_resync: crm_trace("Resource history update completed (call=%d rc=%d)", call_id, rc); break; default: if (call_id > 0) { crm_warn("Resource history update %d failed: %s " QB_XS " rc=%d", call_id, pcmk_strerror(rc), rc); } else { crm_warn("Resource history update failed: %s " QB_XS " rc=%d", pcmk_strerror(rc), rc); } } if (call_id == pending_rsc_update) { pending_rsc_update = 0; controld_trigger_fsa(); } } /* Only successful stops, and probes that found the resource inactive, get locks * recorded in the history. This ensures the resource stays locked to the node * until it is active there again after the node comes back up. */ static bool should_preserve_lock(lrmd_event_data_t *op) { if (!pcmk__is_set(controld_globals.flags, controld_shutdown_lock_enabled)) { return false; } if (!strcmp(op->op_type, PCMK_ACTION_STOP) && (op->rc == PCMK_OCF_OK)) { return true; } if (!strcmp(op->op_type, PCMK_ACTION_MONITOR) && (op->rc == PCMK_OCF_NOT_RUNNING)) { return true; } return false; } /*! * \internal * \brief Request a CIB update * * \param[in] section Section of CIB to update * \param[in] data New XML of CIB section to update * \param[in] options CIB call options * \param[in] callback If not \c NULL, set this as the operation callback * * \return Standard Pacemaker return code * * \note If \p callback is \p cib_rsc_callback(), the CIB update's call ID is * stored in \p pending_rsc_update on success. */ int controld_update_cib(const char *section, xmlNode *data, int options, void (*callback)(xmlNode *, int, int, xmlNode *, void *)) { cib_t *cib = controld_globals.cib_conn; int cib_rc = -ENOTCONN; pcmk__assert(data != NULL); if (cib != NULL) { cib_rc = cib->cmds->modify(cib, section, data, options); if (cib_rc >= 0) { crm_debug("Submitted CIB update %d for %s section", cib_rc, section); } } if (callback == NULL) { if (cib_rc < 0) { crm_err("Failed to update CIB %s section: %s", section, pcmk_rc_str(pcmk_legacy2rc(cib_rc))); } } else { if ((cib_rc >= 0) && (callback == cib_rsc_callback)) { /* Checking for a particular callback is a little hacky, but it * didn't seem worth adding an output argument for cib_rc for just * one use case. */ pending_rsc_update = cib_rc; } fsa_register_cib_callback(cib_rc, NULL, callback); } return (cib_rc >= 0)? pcmk_rc_ok : pcmk_legacy2rc(cib_rc); } /*! * \internal * \brief Update resource history entry in CIB * * \param[in] node_name Node where action occurred * \param[in] rsc Resource that action is for * \param[in,out] op Action to record * \param[in] lock_time If nonzero, when resource was locked to node * * \note On success, the CIB update's call ID will be stored in * pending_rsc_update. */ void controld_update_resource_history(const char *node_name, const lrmd_rsc_info_t *rsc, lrmd_event_data_t *op, time_t lock_time) { xmlNode *update = NULL; xmlNode *xml = NULL; int call_opt = crmd_cib_smart_opt(); const char *node_id = NULL; const char *container = NULL; CRM_CHECK((node_name != NULL) && (op != NULL), return); if (rsc == NULL) { crm_warn("Resource %s no longer exists in the executor", op->rsc_id); controld_ack_event_directly(NULL, NULL, rsc, op, op->rsc_id); return; } // update = pcmk__xe_create(NULL, PCMK_XE_STATUS); // xml = pcmk__xe_create(update, PCMK__XE_NODE_STATE); if (controld_is_local_node(node_name)) { node_id = controld_globals.our_uuid; } else { node_id = node_name; pcmk__xe_set_bool_attr(xml, PCMK_XA_REMOTE_NODE, true); } pcmk__xe_set(xml, PCMK_XA_ID, node_id); pcmk__xe_set(xml, PCMK_XA_UNAME, node_name); pcmk__xe_set(xml, PCMK_XA_CRM_DEBUG_ORIGIN, __func__); // xml = pcmk__xe_create(xml, PCMK__XE_LRM); pcmk__xe_set(xml, PCMK_XA_ID, node_id); // xml = pcmk__xe_create(xml, PCMK__XE_LRM_RESOURCES); // xml = pcmk__xe_create(xml, PCMK__XE_LRM_RESOURCE); pcmk__xe_set(xml, PCMK_XA_ID, op->rsc_id); pcmk__xe_set(xml, PCMK_XA_CLASS, rsc->standard); pcmk__xe_set(xml, PCMK_XA_PROVIDER, rsc->provider); pcmk__xe_set(xml, PCMK_XA_TYPE, rsc->type); if (lock_time != 0) { /* Actions on a locked resource should either preserve the lock by * recording it with the action result, or clear it. */ if (!should_preserve_lock(op)) { lock_time = 0; } pcmk__xe_set_time(xml, PCMK_OPT_SHUTDOWN_LOCK, lock_time); } if (op->params != NULL) { container = g_hash_table_lookup(op->params, CRM_META "_" PCMK__META_CONTAINER); if (container != NULL) { crm_trace("Resource %s is a part of container resource %s", op->rsc_id, container); pcmk__xe_set(xml, PCMK__META_CONTAINER, container); } } // (possibly more than one) controld_add_resource_history_xml(xml, rsc, op, node_name); /* Update CIB asynchronously. Even if it fails, the resource state should be * discovered during the next election. Worst case, the node is wrongly * fenced for running a resource it isn't. */ crm_log_xml_trace(update, __func__); controld_update_cib(PCMK_XE_STATUS, update, call_opt, cib_rsc_callback); pcmk__xml_free(update); } /*! * \internal * \brief Erase an LRM history entry from the CIB, given the operation data * * \param[in] op Operation whose history should be deleted */ void controld_delete_action_history(const lrmd_event_data_t *op) { xmlNode *xml_top = NULL; CRM_CHECK(op != NULL, return); xml_top = pcmk__xe_create(NULL, PCMK__XE_LRM_RSC_OP); pcmk__xe_set_int(xml_top, PCMK__XA_CALL_ID, op->call_id); pcmk__xe_set(xml_top, PCMK__XA_TRANSITION_KEY, op->user_data); if (op->interval_ms > 0) { char *op_id = pcmk__op_key(op->rsc_id, op->op_type, op->interval_ms); /* Avoid deleting last_failure too (if it was a result of this recurring op failing) */ pcmk__xe_set(xml_top, PCMK_XA_ID, op_id); free(op_id); } crm_debug("Erasing resource operation history for " PCMK__OP_FMT " (call=%d)", op->rsc_id, op->op_type, op->interval_ms, op->call_id); controld_globals.cib_conn->cmds->remove(controld_globals.cib_conn, PCMK_XE_STATUS, xml_top, cib_none); crm_log_xml_trace(xml_top, "op:cancel"); pcmk__xml_free(xml_top); } /* Define xpath to find LRM resource history entry by node and resource */ #define XPATH_HISTORY \ "/" PCMK_XE_CIB "/" PCMK_XE_STATUS \ "/" PCMK__XE_NODE_STATE "[@" PCMK_XA_UNAME "='%s']" \ "/" PCMK__XE_LRM "/" PCMK__XE_LRM_RESOURCES \ "/" PCMK__XE_LRM_RESOURCE "[@" PCMK_XA_ID "='%s']" \ "/" PCMK__XE_LRM_RSC_OP /* ... and also by operation key */ #define XPATH_HISTORY_ID XPATH_HISTORY "[@" PCMK_XA_ID "='%s']" /* ... and also by operation key and operation call ID */ #define XPATH_HISTORY_CALL XPATH_HISTORY \ "[@" PCMK_XA_ID "='%s' and @" PCMK__XA_CALL_ID "='%d']" /* ... and also by operation key and original operation key */ #define XPATH_HISTORY_ORIG XPATH_HISTORY \ "[@" PCMK_XA_ID "='%s' and @" PCMK__XA_OPERATION_KEY "='%s']" /*! * \internal * \brief Delete a last_failure resource history entry from the CIB * * \param[in] rsc_id Name of resource to clear history for * \param[in] node Name of node to clear history for * \param[in] action If specified, delete only if this was failed action * \param[in] interval_ms If \p action is specified, it has this interval */ void controld_cib_delete_last_failure(const char *rsc_id, const char *node, const char *action, guint interval_ms) { char *xpath = NULL; char *last_failure_key = NULL; CRM_CHECK((rsc_id != NULL) && (node != NULL), return); // Generate XPath to match desired entry last_failure_key = pcmk__op_key(rsc_id, "last_failure", 0); if (action == NULL) { xpath = pcmk__assert_asprintf(XPATH_HISTORY_ID, node, rsc_id, last_failure_key); } else { char *action_key = pcmk__op_key(rsc_id, action, interval_ms); xpath = pcmk__assert_asprintf(XPATH_HISTORY_ORIG, node, rsc_id, last_failure_key, action_key); free(action_key); } free(last_failure_key); controld_globals.cib_conn->cmds->remove(controld_globals.cib_conn, xpath, NULL, cib_xpath); free(xpath); } /*! * \internal * \brief Delete resource history entry from the CIB, given operation key * * \param[in] rsc_id Name of resource to clear history for * \param[in] node Name of node to clear history for * \param[in] key Operation key of operation to clear history for * \param[in] call_id If specified, delete entry only if it has this call ID */ void controld_delete_action_history_by_key(const char *rsc_id, const char *node, const char *key, int call_id) { char *xpath = NULL; CRM_CHECK((rsc_id != NULL) && (node != NULL) && (key != NULL), return); if (call_id > 0) { xpath = pcmk__assert_asprintf(XPATH_HISTORY_CALL, node, rsc_id, key, call_id); } else { xpath = pcmk__assert_asprintf(XPATH_HISTORY_ID, node, rsc_id, key); } controld_globals.cib_conn->cmds->remove(controld_globals.cib_conn, xpath, NULL, cib_xpath); free(xpath); } diff --git a/daemons/controld/controld_corosync.c b/daemons/controld/controld_corosync.c index d5eef3710f..d04a47e7ee 100644 --- a/daemons/controld/controld_corosync.c +++ b/daemons/controld/controld_corosync.c @@ -1,163 +1,163 @@ /* * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include // PRIu32 #include // NULL #include // free(), etc. #include #include #include #include #include #include #include #if SUPPORT_COROSYNC extern void post_cache_update(int seq); /* A_HA_CONNECT */ static void crmd_cs_dispatch(cpg_handle_t handle, const struct cpg_name *groupName, uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) { const char *from = NULL; char *data = pcmk__cpg_message_data(handle, nodeid, pid, msg, &from); pcmk__node_status_t *peer = NULL; xmlNode *xml = NULL; if(data == NULL) { return; } xml = pcmk__xml_parse(data); if (xml == NULL) { crm_err("Could not parse message content: %.100s", data); free(data); return; } pcmk__xe_set(xml, PCMK__XA_SRC, from); peer = pcmk__get_node(0, from, NULL, pcmk__node_search_cluster_member); if (!pcmk__is_set(peer->processes, crm_proc_cpg)) { /* If we can still talk to our peer process on that node, then it must * be part of the corosync membership */ crm_warn("Receiving messages from a node we think is dead: " "%s[%" PRIu32 "]", peer->name, peer->cluster_layer_id); crm_update_peer_proc(__func__, peer, crm_proc_cpg, PCMK_VALUE_ONLINE); } crmd_ha_msg_filter(xml); pcmk__xml_free(xml); free(data); } static gboolean crmd_quorum_callback(unsigned long long seq, gboolean quorate) { crm_update_quorum(quorate, FALSE); post_cache_update(seq); return TRUE; } static void crmd_cs_destroy(gpointer user_data) { if (!pcmk__is_set(controld_globals.fsa_input_register, R_HA_DISCONNECTED)) { - crm_crit("Lost connection to cluster layer, shutting down"); + pcmk__crit("Lost connection to cluster layer, shutting down"); crmd_exit(CRM_EX_DISCONNECT); } } /*! * \brief Handle a Corosync notification of a CPG configuration change * * \param[in] handle CPG connection * \param[in] cpg_name CPG group name * \param[in] member_list List of current CPG members * \param[in] member_list_entries Number of entries in \p member_list * \param[in] left_list List of CPG members that left * \param[in] left_list_entries Number of entries in \p left_list * \param[in] joined_list List of CPG members that joined * \param[in] joined_list_entries Number of entries in \p joined_list */ static void cpg_membership_callback(cpg_handle_t handle, const struct cpg_name *cpg_name, const struct cpg_address *member_list, size_t member_list_entries, const struct cpg_address *left_list, size_t left_list_entries, const struct cpg_address *joined_list, size_t joined_list_entries) { /* When nodes leave CPG, the DC clears their transient node attributes. * * However if there is no DC, or the DC is among the nodes that left, each * remaining node needs to do the clearing, to ensure it gets done. * Otherwise, the attributes would persist when the nodes rejoin, which * could have serious consequences for unfencing, agents that use attributes * for internal logic, etc. * * Here, we set a global boolean if the DC is among the nodes that left, for * use by the peer callback. */ if (controld_globals.dc_name != NULL) { pcmk__node_status_t *peer = NULL; peer = pcmk__search_node_caches(0, controld_globals.dc_name, NULL, pcmk__node_search_cluster_member); if (peer != NULL) { for (int i = 0; i < left_list_entries; ++i) { if (left_list[i].nodeid == peer->cluster_layer_id) { controld_set_global_flags(controld_dc_left); break; } } } } // Process the change normally, which will call the peer callback as needed pcmk__cpg_confchg_cb(handle, cpg_name, member_list, member_list_entries, left_list, left_list_entries, joined_list, joined_list_entries); controld_clear_global_flags(controld_dc_left); } extern gboolean crm_connect_corosync(pcmk_cluster_t *cluster); gboolean crm_connect_corosync(pcmk_cluster_t *cluster) { if (pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync) { pcmk__cluster_set_status_callback(&peer_update_callback); pcmk_cluster_set_destroy_fn(cluster, crmd_cs_destroy); pcmk_cpg_set_deliver_fn(cluster, crmd_cs_dispatch); pcmk_cpg_set_confchg_fn(cluster, cpg_membership_callback); if (pcmk_cluster_connect(cluster) == pcmk_rc_ok) { pcmk__corosync_quorum_connect(crmd_quorum_callback, crmd_cs_destroy); return TRUE; } } return FALSE; } #endif diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c index 8892a56887..dfff2082ed 100644 --- a/daemons/controld/controld_execd.c +++ b/daemons/controld/controld_execd.c @@ -1,2406 +1,2406 @@ /* * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include // lrmd_event_data_t, lrmd_rsc_info_t, etc. #include #include // crm_meta_name() #include #include #include #include #define START_DELAY_THRESHOLD 5 * 60 * 1000 #define MAX_LRM_REG_FAILS 30 struct delete_event_s { int rc; const char *rsc; lrm_state_t *lrm_state; }; static gboolean is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id); static gboolean build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list); static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer user_data); static lrmd_event_data_t *construct_op(const lrm_state_t *lrm_state, const xmlNode *rsc_op, const char *rsc_id, const char *operation); static void do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, xmlNode *msg, struct ra_metadata_s *md); static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, int log_level); static void lrm_connection_destroy(void) { if (pcmk__is_set(controld_globals.fsa_input_register, R_LRM_CONNECTED)) { - crm_crit("Lost connection to local executor"); + pcmk__crit("Lost connection to local executor"); register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL); controld_clear_fsa_input_flags(R_LRM_CONNECTED); } } static char * make_stop_id(const char *rsc, int call_id) { return pcmk__assert_asprintf("%s:%d", rsc, call_id); } static void copy_instance_keys(gpointer key, gpointer value, gpointer user_data) { if (strstr(key, CRM_META "_") == NULL) { pcmk__insert_dup(user_data, (const char *) key, (const char *) value); } } static void copy_meta_keys(gpointer key, gpointer value, gpointer user_data) { if (strstr(key, CRM_META "_") != NULL) { pcmk__insert_dup(user_data, (const char *) key, (const char *) value); } } /*! * \internal * \brief Remove a recurring operation from a resource's history * * \param[in,out] history Resource history to modify * \param[in] op Operation to remove * * \return TRUE if the operation was found and removed, FALSE otherwise */ static gboolean history_remove_recurring_op(rsc_history_t *history, const lrmd_event_data_t *op) { GList *iter; for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) { lrmd_event_data_t *existing = iter->data; if ((op->interval_ms == existing->interval_ms) && pcmk__str_eq(op->rsc_id, existing->rsc_id, pcmk__str_none) && pcmk__str_eq(op->op_type, existing->op_type, pcmk__str_casei)) { history->recurring_op_list = g_list_delete_link(history->recurring_op_list, iter); lrmd_free_event(existing); return TRUE; } } return FALSE; } /*! * \internal * \brief Free all recurring operations in resource history * * \param[in,out] history Resource history to modify */ static void history_free_recurring_ops(rsc_history_t *history) { GList *iter; for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) { lrmd_free_event(iter->data); } g_list_free(history->recurring_op_list); history->recurring_op_list = NULL; } /*! * \internal * \brief Free resource history * * \param[in,out] history Resource history to free */ void history_free(gpointer data) { rsc_history_t *history = (rsc_history_t*)data; if (history->stop_params) { g_hash_table_destroy(history->stop_params); } /* Don't need to free history->rsc.id because it's set to history->id */ free(history->rsc.type); free(history->rsc.standard); free(history->rsc.provider); lrmd_free_event(history->failed); lrmd_free_event(history->last); free(history->id); history_free_recurring_ops(history); free(history); } static void update_history_cache(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op) { int target_rc = 0; rsc_history_t *entry = NULL; if (op->rsc_deleted) { crm_debug("Purged history for '%s' after %s", op->rsc_id, op->op_type); controld_delete_resource_history(op->rsc_id, lrm_state->node_name, NULL, crmd_cib_smart_opt()); return; } if (pcmk__str_eq(op->op_type, PCMK_ACTION_NOTIFY, pcmk__str_casei)) { return; } crm_debug("Updating history for '%s' with %s op", op->rsc_id, op->op_type); entry = g_hash_table_lookup(lrm_state->resource_history, op->rsc_id); if (entry == NULL && rsc) { entry = pcmk__assert_alloc(1, sizeof(rsc_history_t)); entry->id = pcmk__str_copy(op->rsc_id); g_hash_table_insert(lrm_state->resource_history, entry->id, entry); entry->rsc.id = entry->id; entry->rsc.type = pcmk__str_copy(rsc->type); entry->rsc.standard = pcmk__str_copy(rsc->standard); entry->rsc.provider = pcmk__str_copy(rsc->provider); } else if (entry == NULL) { crm_info("Resource %s no longer exists, not updating cache", op->rsc_id); return; } entry->last_callid = op->call_id; target_rc = rsc_op_expected_rc(op); if (op->op_status == PCMK_EXEC_CANCELLED) { if (op->interval_ms > 0) { crm_trace("Removing cancelled recurring op: " PCMK__OP_FMT, op->rsc_id, op->op_type, op->interval_ms); history_remove_recurring_op(entry, op); return; } else { crm_trace("Skipping " PCMK__OP_FMT " rc=%d, status=%d", op->rsc_id, op->op_type, op->interval_ms, op->rc, op->op_status); } } else if (did_rsc_op_fail(op, target_rc)) { /* Store failed monitors here, otherwise the block below will cause them * to be forgotten when a stop happens. */ if (entry->failed) { lrmd_free_event(entry->failed); } entry->failed = lrmd_copy_event(op); } else if (op->interval_ms == 0) { if (entry->last) { lrmd_free_event(entry->last); } entry->last = lrmd_copy_event(op); if (op->params && pcmk__strcase_any_of(op->op_type, PCMK_ACTION_START, PCMK_ACTION_RELOAD, PCMK_ACTION_RELOAD_AGENT, PCMK_ACTION_MONITOR, NULL)) { if (entry->stop_params) { g_hash_table_destroy(entry->stop_params); } entry->stop_params = pcmk__strkey_table(free, free); g_hash_table_foreach(op->params, copy_instance_keys, entry->stop_params); } } if (op->interval_ms > 0) { /* Ensure there are no duplicates */ history_remove_recurring_op(entry, op); crm_trace("Adding recurring op: " PCMK__OP_FMT, op->rsc_id, op->op_type, op->interval_ms); entry->recurring_op_list = g_list_prepend(entry->recurring_op_list, lrmd_copy_event(op)); } else if ((entry->recurring_op_list != NULL) && !pcmk__str_eq(op->op_type, PCMK_ACTION_MONITOR, pcmk__str_casei)) { crm_trace("Dropping %d recurring ops because of: " PCMK__OP_FMT, g_list_length(entry->recurring_op_list), op->rsc_id, op->op_type, op->interval_ms); history_free_recurring_ops(entry); } } /*! * \internal * \brief Send a direct OK ack for a resource task * * \param[in] lrm_state LRM connection * \param[in] input Input message being ack'ed * \param[in] rsc_id ID of affected resource * \param[in] rsc Affected resource (if available) * \param[in] task Operation task being ack'ed * \param[in] ack_host Name of host to send ack to * \param[in] ack_sys IPC system name to ack */ static void send_task_ok_ack(const lrm_state_t *lrm_state, const ha_msg_input_t *input, const char *rsc_id, const lrmd_rsc_info_t *rsc, const char *task, const char *ack_host, const char *ack_sys) { lrmd_event_data_t *op = construct_op(lrm_state, input->xml, rsc_id, task); lrmd__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); controld_ack_event_directly(ack_host, ack_sys, rsc, op, rsc_id); lrmd_free_event(op); } static inline const char * op_node_name(lrmd_event_data_t *op) { return pcmk__s(op->remote_nodename, controld_globals.cluster->priv->node_name); } void lrm_op_callback(lrmd_event_data_t * op) { CRM_CHECK(op != NULL, return); switch (op->type) { case lrmd_event_disconnect: if (op->remote_nodename == NULL) { /* If this is the local executor IPC connection, set the right * bits in the controller when the connection goes down. */ lrm_connection_destroy(); } break; case lrmd_event_exec_complete: { lrm_state_t *lrm_state = controld_get_executor_state(op_node_name(op), false); pcmk__assert(lrm_state != NULL); process_lrm_event(lrm_state, op, NULL, NULL); } break; default: break; } } static void try_local_executor_connect(long long action, fsa_data_t *msg_data, lrm_state_t *lrm_state) { int rc = pcmk_rc_ok; crm_debug("Connecting to the local executor"); // If we can connect, great rc = controld_connect_local_executor(lrm_state); if (rc == pcmk_rc_ok) { controld_set_fsa_input_flags(R_LRM_CONNECTED); crm_info("Connection to the local executor established"); return; } // Otherwise, if we can try again, set a timer to do so if (lrm_state->num_lrm_register_fails < MAX_LRM_REG_FAILS) { crm_warn("Failed to connect to the local executor %d time%s " "(%d max): %s", lrm_state->num_lrm_register_fails, pcmk__plural_s(lrm_state->num_lrm_register_fails), MAX_LRM_REG_FAILS, pcmk_rc_str(rc)); controld_start_wait_timer(); crmd_fsa_stall(FALSE); return; } // Otherwise give up crm_err("Failed to connect to the executor the max allowed " "%d time%s: %s", lrm_state->num_lrm_register_fails, pcmk__plural_s(lrm_state->num_lrm_register_fails), pcmk_rc_str(rc)); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } /* A_LRM_CONNECT */ void do_lrm_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { /* This only pertains to local executor connections. Remote connections are * handled as resources within the scheduler. Connecting and disconnecting * from remote executor instances is handled differently. */ lrm_state_t *lrm_state = NULL; if (controld_globals.cluster->priv->node_name == NULL) { return; // Shouldn't be possible } lrm_state = controld_get_executor_state(NULL, true); if (lrm_state == NULL) { register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return; } if (action & A_LRM_DISCONNECT) { if (lrm_state_verify_stopped(lrm_state, cur_state, LOG_INFO) == FALSE) { if (action == A_LRM_DISCONNECT) { crmd_fsa_stall(FALSE); return; } } controld_clear_fsa_input_flags(R_LRM_CONNECTED); lrm_state_disconnect(lrm_state); lrm_state_reset_tables(lrm_state, FALSE); } if (action & A_LRM_CONNECT) { try_local_executor_connect(action, msg_data, lrm_state); } if (action & ~(A_LRM_CONNECT | A_LRM_DISCONNECT)) { crm_err("Unexpected action %s in %s", fsa_action2string(action), __func__); } } static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, int log_level) { int counter = 0; gboolean rc = TRUE; const char *when = "lrm disconnect"; GHashTableIter gIter; const char *key = NULL; rsc_history_t *entry = NULL; active_op_t *pending = NULL; crm_debug("Checking for active resources before exit"); if (cur_state == S_TERMINATE) { log_level = LOG_ERR; when = "shutdown"; } else if (pcmk__is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { when = "shutdown... waiting"; } if ((lrm_state->active_ops != NULL) && lrm_state_is_connected(lrm_state)) { guint removed = g_hash_table_foreach_remove(lrm_state->active_ops, stop_recurring_actions, lrm_state); guint nremaining = g_hash_table_size(lrm_state->active_ops); if (removed || nremaining) { crm_notice("Stopped %u recurring operation%s at %s (%u remaining)", removed, pcmk__plural_s(removed), when, nremaining); } } if (lrm_state->active_ops != NULL) { g_hash_table_iter_init(&gIter, lrm_state->active_ops); while (g_hash_table_iter_next(&gIter, NULL, (void **)&pending)) { /* Ignore recurring actions in the shutdown calculations */ if (pending->interval_ms == 0) { counter++; } } } if (counter > 0) { do_crm_log(log_level, "%d pending executor operation%s at %s", counter, pcmk__plural_s(counter), when); if ((cur_state == S_TERMINATE) || !pcmk__is_set(controld_globals.fsa_input_register, R_SENT_RSC_STOP)) { g_hash_table_iter_init(&gIter, lrm_state->active_ops); while (g_hash_table_iter_next(&gIter, (gpointer*)&key, (gpointer*)&pending)) { do_crm_log(log_level, "Pending action: %s (%s)", key, pending->op_key); } } else { rc = FALSE; } return rc; } if (lrm_state->resource_history == NULL) { return rc; } if (pcmk__is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { /* At this point we're not waiting, we're just shutting down */ when = "shutdown"; } counter = 0; g_hash_table_iter_init(&gIter, lrm_state->resource_history); while (g_hash_table_iter_next(&gIter, NULL, (gpointer*)&entry)) { if (is_rsc_active(lrm_state, entry->id) == FALSE) { continue; } counter++; if (log_level == LOG_ERR) { crm_info("Found %s active at %s", entry->id, when); } else { crm_trace("Found %s active at %s", entry->id, when); } if (lrm_state->active_ops != NULL) { GHashTableIter hIter; g_hash_table_iter_init(&hIter, lrm_state->active_ops); while (g_hash_table_iter_next(&hIter, (gpointer*)&key, (gpointer*)&pending)) { if (pcmk__str_eq(entry->id, pending->rsc_id, pcmk__str_none)) { crm_notice("%sction %s (%s) incomplete at %s", pending->interval_ms == 0 ? "A" : "Recurring a", key, pending->op_key, when); } } } } if (counter) { crm_err("%d resource%s active at %s", counter, (counter == 1)? " was" : "s were", when); } return rc; } static gboolean is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id) { rsc_history_t *entry = NULL; entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); if (entry == NULL || entry->last == NULL) { return FALSE; } crm_trace("Processing %s: %s.%d=%d", rsc_id, entry->last->op_type, entry->last->interval_ms, entry->last->rc); if ((entry->last->rc == PCMK_OCF_OK) && pcmk__str_eq(entry->last->op_type, PCMK_ACTION_STOP, pcmk__str_casei)) { return FALSE; } else if (entry->last->rc == PCMK_OCF_OK && pcmk__str_eq(entry->last->op_type, PCMK_ACTION_MIGRATE_TO, pcmk__str_casei)) { // A stricter check is too complex ... leave that to the scheduler return FALSE; } else if (entry->last->rc == PCMK_OCF_NOT_RUNNING) { return FALSE; } else if ((entry->last->interval_ms == 0) && (entry->last->rc == PCMK_OCF_NOT_CONFIGURED)) { /* Badly configured resources can't be reliably stopped */ return FALSE; } return TRUE; } static gboolean build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list) { GHashTableIter iter; rsc_history_t *entry = NULL; g_hash_table_iter_init(&iter, lrm_state->resource_history); while (g_hash_table_iter_next(&iter, NULL, (void **)&entry)) { GList *gIter = NULL; xmlNode *xml_rsc = pcmk__xe_create(rsc_list, PCMK__XE_LRM_RESOURCE); pcmk__xe_set(xml_rsc, PCMK_XA_ID, entry->id); pcmk__xe_set(xml_rsc, PCMK_XA_TYPE, entry->rsc.type); pcmk__xe_set(xml_rsc, PCMK_XA_CLASS, entry->rsc.standard); pcmk__xe_set(xml_rsc, PCMK_XA_PROVIDER, entry->rsc.provider); if (entry->last && entry->last->params) { static const char *name = CRM_META "_" PCMK__META_CONTAINER; const char *container = g_hash_table_lookup(entry->last->params, name); if (container) { crm_trace("Resource %s is a part of container resource %s", entry->id, container); pcmk__xe_set(xml_rsc, PCMK__META_CONTAINER, container); } } controld_add_resource_history_xml(xml_rsc, &(entry->rsc), entry->failed, lrm_state->node_name); controld_add_resource_history_xml(xml_rsc, &(entry->rsc), entry->last, lrm_state->node_name); for (gIter = entry->recurring_op_list; gIter != NULL; gIter = gIter->next) { controld_add_resource_history_xml(xml_rsc, &(entry->rsc), gIter->data, lrm_state->node_name); } } return FALSE; } xmlNode * controld_query_executor_state(void) { // @TODO Ensure all callers handle NULL returns xmlNode *xml_state = NULL; xmlNode *xml_data = NULL; xmlNode *rsc_list = NULL; pcmk__node_status_t *peer = NULL; lrm_state_t *lrm_state = controld_get_executor_state(NULL, false); if (!lrm_state) { crm_err("Could not get executor state for local node"); return NULL; } peer = pcmk__get_node(0, lrm_state->node_name, NULL, pcmk__node_search_any); CRM_CHECK(peer != NULL, return NULL); xml_state = create_node_state_update(peer, node_update_cluster|node_update_peer, NULL, __func__); if (xml_state == NULL) { return NULL; } xml_data = pcmk__xe_create(xml_state, PCMK__XE_LRM); pcmk__xe_set(xml_data, PCMK_XA_ID, peer->xml_id); rsc_list = pcmk__xe_create(xml_data, PCMK__XE_LRM_RESOURCES); // Build a list of active (not necessarily running) resources build_active_RAs(lrm_state, rsc_list); crm_log_xml_trace(xml_state, "Current executor state"); return xml_state; } /*! * \internal * \brief Map standard Pacemaker return code to operation status and OCF code * * \param[out] event Executor event whose status and return code should be set * \param[in] rc Standard Pacemaker return code */ void controld_rc2event(lrmd_event_data_t *event, int rc) { /* This is called for cleanup requests from controller peers/clients, not * for resource actions, so no exit reason is needed. */ switch (rc) { case pcmk_rc_ok: lrmd__set_result(event, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); break; case EACCES: lrmd__set_result(event, PCMK_OCF_INSUFFICIENT_PRIV, PCMK_EXEC_ERROR, NULL); break; default: lrmd__set_result(event, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, NULL); break; } } /*! * \internal * \brief Trigger a new transition after CIB status was deleted * * If a CIB status delete was not expected (as part of the transition graph), * trigger a new transition by updating the (arbitrary) "last-lrm-refresh" * cluster property. * * \param[in] from_sys IPC name that requested the delete * \param[in] rsc_id Resource whose status was deleted (for logging only) */ void controld_trigger_delete_refresh(const char *from_sys, const char *rsc_id) { if (!pcmk__str_eq(from_sys, CRM_SYSTEM_TENGINE, pcmk__str_casei)) { char *now_s = pcmk__assert_asprintf("%lld", (long long) time(NULL)); crm_debug("Triggering a refresh after %s cleaned %s", from_sys, rsc_id); cib__update_node_attr(controld_globals.logger_out, controld_globals.cib_conn, cib_none, PCMK_XE_CRM_CONFIG, NULL, NULL, NULL, NULL, "last-lrm-refresh", now_s, NULL, NULL); free(now_s); } } static void notify_deleted(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_id, int rc) { lrmd_event_data_t *op = NULL; const char *from_sys = pcmk__xe_get(input->msg, PCMK__XA_CRM_SYS_FROM); const char *from_host = pcmk__xe_get(input->msg, PCMK__XA_SRC); crm_info("Notifying %s on %s that %s was%s deleted", from_sys, (from_host? from_host : "localhost"), rsc_id, ((rc == pcmk_ok)? "" : " not")); op = construct_op(lrm_state, input->xml, rsc_id, PCMK_ACTION_DELETE); controld_rc2event(op, pcmk_legacy2rc(rc)); controld_ack_event_directly(from_host, from_sys, NULL, op, rsc_id); lrmd_free_event(op); controld_trigger_delete_refresh(from_sys, rsc_id); } static gboolean lrm_remove_deleted_rsc(gpointer key, gpointer value, gpointer user_data) { struct delete_event_s *event = user_data; struct pending_deletion_op_s *op = value; if (pcmk__str_eq(event->rsc, op->rsc, pcmk__str_none)) { notify_deleted(event->lrm_state, op->input, event->rsc, event->rc); return TRUE; } return FALSE; } static gboolean lrm_remove_deleted_op(gpointer key, gpointer value, gpointer user_data) { const char *rsc = user_data; active_op_t *pending = value; if (pcmk__str_eq(rsc, pending->rsc_id, pcmk__str_none)) { crm_info("Removing op %s:%d for deleted resource %s", pending->op_key, pending->call_id, rsc); return TRUE; } return FALSE; } static void delete_rsc_entry(lrm_state_t *lrm_state, ha_msg_input_t *input, const char *rsc_id, GHashTableIter *rsc_iter, int rc, const char *user_name, bool from_cib) { struct delete_event_s event; CRM_CHECK(rsc_id != NULL, return); if (rc == pcmk_ok) { char *rsc_id_copy = pcmk__str_copy(rsc_id); if (rsc_iter) { g_hash_table_iter_remove(rsc_iter); } else { g_hash_table_remove(lrm_state->resource_history, rsc_id_copy); } if (from_cib) { controld_delete_resource_history(rsc_id_copy, lrm_state->node_name, user_name, crmd_cib_smart_opt()); } g_hash_table_foreach_remove(lrm_state->active_ops, lrm_remove_deleted_op, rsc_id_copy); free(rsc_id_copy); } if (input) { notify_deleted(lrm_state, input, rsc_id, rc); } event.rc = rc; event.rsc = rsc_id; event.lrm_state = lrm_state; g_hash_table_foreach_remove(lrm_state->deletion_ops, lrm_remove_deleted_rsc, &event); } static inline gboolean last_failed_matches_op(rsc_history_t *entry, const char *op, guint interval_ms) { if (entry == NULL) { return FALSE; } if (op == NULL) { return TRUE; } return (pcmk__str_eq(op, entry->failed->op_type, pcmk__str_casei) && (interval_ms == entry->failed->interval_ms)); } /*! * \internal * \brief Clear a resource's last failure * * Erase a resource's last failure on a particular node from both the * LRM resource history in the CIB, and the resource history remembered * for the LRM state. * * \param[in] rsc_id Resource name * \param[in] node_name Node name * \param[in] operation If specified, only clear if matching this operation * \param[in] interval_ms If operation is specified, it has this interval */ void lrm_clear_last_failure(const char *rsc_id, const char *node_name, const char *operation, guint interval_ms) { lrm_state_t *lrm_state = controld_get_executor_state(node_name, false); if (lrm_state == NULL) { return; } if (lrm_state->resource_history != NULL) { rsc_history_t *entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); if (last_failed_matches_op(entry, operation, interval_ms)) { lrmd_free_event(entry->failed); entry->failed = NULL; } } } /* Returns: gboolean - cancellation is in progress */ static gboolean cancel_op(lrm_state_t * lrm_state, const char *rsc_id, const char *key, int op, gboolean remove) { int rc = pcmk_ok; char *local_key = NULL; active_op_t *pending = NULL; CRM_CHECK(op != 0, return FALSE); CRM_CHECK(rsc_id != NULL, return FALSE); if (key == NULL) { local_key = make_stop_id(rsc_id, op); key = local_key; } pending = g_hash_table_lookup(lrm_state->active_ops, key); if (pending) { if (remove && !pcmk__is_set(pending->flags, active_op_remove)) { controld_set_active_op_flags(pending, active_op_remove); crm_debug("Scheduling %s for removal", key); } if (pcmk__is_set(pending->flags, active_op_cancelled)) { crm_debug("Operation %s already cancelled", key); free(local_key); return FALSE; } controld_set_active_op_flags(pending, active_op_cancelled); } else { crm_info("No pending op found for %s", key); free(local_key); return FALSE; } crm_debug("Cancelling op %d for %s (%s)", op, rsc_id, key); rc = lrm_state_cancel(lrm_state, pending->rsc_id, pending->op_type, pending->interval_ms); if (rc == pcmk_ok) { crm_debug("Op %d for %s (%s): cancelled", op, rsc_id, key); free(local_key); return TRUE; } crm_debug("Op %d for %s (%s): Nothing to cancel", op, rsc_id, key); /* The caller needs to make sure the entry is * removed from the active operations list * * Usually by returning TRUE inside the worker function * supplied to g_hash_table_foreach_remove() * * Not removing the entry from active operations will block * the node from shutting down */ free(local_key); return FALSE; } struct cancel_data { gboolean done; gboolean remove; const char *key; lrmd_rsc_info_t *rsc; lrm_state_t *lrm_state; }; static gboolean cancel_action_by_key(gpointer key, gpointer value, gpointer user_data) { gboolean remove = FALSE; struct cancel_data *data = user_data; active_op_t *op = value; if (pcmk__str_eq(op->op_key, data->key, pcmk__str_none)) { data->done = TRUE; remove = !cancel_op(data->lrm_state, data->rsc->id, key, op->call_id, data->remove); } return remove; } static gboolean cancel_op_key(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *key, gboolean remove) { guint removed = 0; struct cancel_data data; CRM_CHECK(rsc != NULL, return FALSE); CRM_CHECK(key != NULL, return FALSE); data.key = key; data.rsc = rsc; data.done = FALSE; data.remove = remove; data.lrm_state = lrm_state; removed = g_hash_table_foreach_remove(lrm_state->active_ops, cancel_action_by_key, &data); crm_trace("Removed %u op cache entries, new size: %u", removed, g_hash_table_size(lrm_state->active_ops)); return data.done; } /*! * \internal * \brief Retrieve resource information from LRM * * \param[in,out] lrm_state Executor connection state to use * \param[in] rsc_xml XML containing resource configuration * \param[in] do_create If true, register resource if not already * \param[out] rsc_info Where to store information obtained from executor * * \retval pcmk_ok Success (and rsc_info holds newly allocated result) * \retval -EINVAL Required information is missing from arguments * \retval -ENOTCONN No active connection to LRM * \retval -ENODEV Resource not found * \retval -errno Error communicating with executor when registering resource * * \note Caller is responsible for freeing result on success. */ static int get_lrm_resource(lrm_state_t *lrm_state, const xmlNode *rsc_xml, gboolean do_create, lrmd_rsc_info_t **rsc_info) { const char *id = pcmk__xe_id(rsc_xml); CRM_CHECK(lrm_state && rsc_xml && rsc_info, return -EINVAL); CRM_CHECK(id, return -EINVAL); if (lrm_state_is_connected(lrm_state) == FALSE) { return -ENOTCONN; } crm_trace("Retrieving resource information for %s from the executor", id); *rsc_info = lrm_state_get_rsc_info(lrm_state, id, 0); // If resource isn't known by ID, try clone name, if provided if (!*rsc_info) { const char *long_id = pcmk__xe_get(rsc_xml, PCMK__XA_LONG_ID); if (long_id) { *rsc_info = lrm_state_get_rsc_info(lrm_state, long_id, 0); } } if ((*rsc_info == NULL) && do_create) { const char *class = pcmk__xe_get(rsc_xml, PCMK_XA_CLASS); const char *provider = pcmk__xe_get(rsc_xml, PCMK_XA_PROVIDER); const char *type = pcmk__xe_get(rsc_xml, PCMK_XA_TYPE); int rc; crm_trace("Registering resource %s with the executor", id); rc = lrm_state_register_rsc(lrm_state, id, class, provider, type, lrmd_opt_drop_recurring); if (rc != pcmk_ok) { fsa_data_t *msg_data = NULL; crm_err("Could not register resource %s with the executor on %s: %s " QB_XS " rc=%d", id, lrm_state->node_name, pcmk_strerror(rc), rc); /* Register this as an internal error if this involves the local * executor. Otherwise, we're likely dealing with an unresponsive * remote node, which is not an FSA failure. */ if (lrm_state_is_local(lrm_state) == TRUE) { register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } return rc; } *rsc_info = lrm_state_get_rsc_info(lrm_state, id, 0); } return *rsc_info? pcmk_ok : -ENODEV; } static void delete_resource(lrm_state_t *lrm_state, const char *id, lrmd_rsc_info_t *rsc, GHashTableIter *iter, const char *sys, const char *user, ha_msg_input_t *request, bool unregister, bool from_cib) { int rc = pcmk_ok; crm_info("Removing resource %s from executor for %s%s%s", id, sys, (user? " as " : ""), (user? user : "")); if (rsc && unregister) { rc = lrm_state_unregister_rsc(lrm_state, id, 0); } if (rc == pcmk_ok) { crm_trace("Resource %s deleted from executor", id); } else if (rc == -EINPROGRESS) { crm_info("Deletion of resource '%s' from executor is pending", id); if (request) { struct pending_deletion_op_s *op = NULL; char *ref = pcmk__xe_get_copy(request->msg, PCMK_XA_REFERENCE); op = pcmk__assert_alloc(1, sizeof(struct pending_deletion_op_s)); op->rsc = pcmk__str_copy(rsc->id); op->input = copy_ha_msg_input(request); g_hash_table_insert(lrm_state->deletion_ops, ref, op); } return; } else { crm_warn("Could not delete '%s' from executor for %s%s%s: %s " QB_XS " rc=%d", id, sys, (user? " as " : ""), (user? user : ""), pcmk_strerror(rc), rc); } delete_rsc_entry(lrm_state, request, id, iter, rc, user, from_cib); } static int get_fake_call_id(lrm_state_t *lrm_state, const char *rsc_id) { int call_id = 999999999; rsc_history_t *entry = NULL; if(lrm_state) { entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); } /* Make sure the call id is greater than the last successful operation, * otherwise the failure will not result in a possible recovery of the resource * as it could appear the failure occurred before the successful start */ if (entry) { call_id = entry->last_callid + 1; } if (call_id < 0) { call_id = 1; } return call_id; } static void fake_op_status(lrm_state_t *lrm_state, lrmd_event_data_t *op, int op_status, enum ocf_exitcode op_exitcode, const char *exit_reason) { op->call_id = get_fake_call_id(lrm_state, op->rsc_id); op->t_run = time(NULL); op->t_rcchange = op->t_run; lrmd__set_result(op, op_exitcode, op_status, exit_reason); } static void force_reprobe(lrm_state_t *lrm_state, const char *from_sys, const char *from_host, const char *user_name, gboolean is_remote_node, bool reprobe_all_nodes) { GHashTableIter gIter; rsc_history_t *entry = NULL; crm_info("Clearing resource history on node %s", lrm_state->node_name); g_hash_table_iter_init(&gIter, lrm_state->resource_history); while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) { /* only unregister the resource during a reprobe if it is not a remote connection * resource. otherwise unregistering the connection will terminate remote-node * membership */ bool unregister = true; if (is_remote_lrmd_ra(NULL, NULL, entry->id)) { unregister = false; if (reprobe_all_nodes) { lrm_state_t *remote_lrm_state = controld_get_executor_state(entry->id, false); if (remote_lrm_state != NULL) { /* If reprobing all nodes, be sure to reprobe the remote * node before clearing its connection resource */ force_reprobe(remote_lrm_state, from_sys, from_host, user_name, TRUE, reprobe_all_nodes); } } } /* Don't delete from the CIB, since we'll delete the whole node's LRM * state from the CIB soon */ delete_resource(lrm_state, entry->id, &entry->rsc, &gIter, from_sys, user_name, NULL, unregister, false); } /* Now delete the copy in the CIB */ controld_delete_node_state(lrm_state->node_name, controld_section_lrm, cib_none); } /*! * \internal * \brief Fail a requested action without actually executing it * * For an action that can't be executed, process it similarly to an actual * execution result, with specified error status (except for notify actions, * which will always be treated as successful). * * \param[in,out] lrm_state Executor connection that action is for * \param[in] action Action XML from request * \param[in] rc Desired return code to use * \param[in] op_status Desired operation status to use * \param[in] exit_reason Human-friendly detail, if error */ static void synthesize_lrmd_failure(lrm_state_t *lrm_state, const xmlNode *action, int op_status, enum ocf_exitcode rc, const char *exit_reason) { lrmd_event_data_t *op = NULL; const char *operation = pcmk__xe_get(action, PCMK_XA_OPERATION); const char *target_node = pcmk__xe_get(action, PCMK__META_ON_NODE); xmlNode *xml_rsc = pcmk__xe_first_child(action, PCMK_XE_PRIMITIVE, NULL, NULL); if ((xml_rsc == NULL) || (pcmk__xe_id(xml_rsc) == NULL)) { /* @TODO Should we do something else, like direct ack? */ crm_info("Can't fake %s failure (%d) on %s without resource configuration", pcmk__xe_get(action, PCMK__XA_OPERATION_KEY), rc, target_node); return; } else if(operation == NULL) { /* This probably came from crm_resource -C, nothing to do */ crm_info("Can't fake %s failure (%d) on %s without operation", pcmk__xe_id(xml_rsc), rc, target_node); return; } op = construct_op(lrm_state, action, pcmk__xe_id(xml_rsc), operation); if (pcmk__str_eq(operation, PCMK_ACTION_NOTIFY, pcmk__str_casei)) { // Notifications can't fail fake_op_status(lrm_state, op, PCMK_EXEC_DONE, PCMK_OCF_OK, NULL); } else { fake_op_status(lrm_state, op, op_status, rc, exit_reason); } crm_info("Faking " PCMK__OP_FMT " result (%d) on %s", op->rsc_id, op->op_type, op->interval_ms, op->rc, target_node); // Process the result as if it came from the LRM process_lrm_event(lrm_state, op, NULL, action); lrmd_free_event(op); } /*! * \internal * \brief Get target of an LRM operation (replacing \p NULL with local node * name) * * \param[in] xml LRM operation data XML * * \return LRM operation target node name (local node or Pacemaker Remote node) */ static const char * lrm_op_target(const xmlNode *xml) { const char *target = NULL; if (xml) { target = pcmk__xe_get(xml, PCMK__META_ON_NODE); } if (target == NULL) { target = controld_globals.cluster->priv->node_name; } return target; } static void fail_lrm_resource(xmlNode *xml, lrm_state_t *lrm_state, const char *user_name, const char *from_host, const char *from_sys) { lrmd_event_data_t *op = NULL; lrmd_rsc_info_t *rsc = NULL; xmlNode *xml_rsc = pcmk__xe_first_child(xml, PCMK_XE_PRIMITIVE, NULL, NULL); CRM_CHECK(xml_rsc != NULL, return); /* The executor simply executes operations and reports the results, without * any concept of success or failure, so to fail a resource, we must fake * what a failure looks like. * * To do this, we create a fake executor operation event for the resource, * and pass that event to the executor client callback so it will be * processed as if it came from the executor. */ op = construct_op(lrm_state, xml, pcmk__xe_id(xml_rsc), "asyncmon"); free((char*) op->user_data); op->user_data = NULL; op->interval_ms = 0; if (user_name && !pcmk__is_privileged(user_name)) { crm_err("%s does not have permission to fail %s", user_name, pcmk__xe_id(xml_rsc)); fake_op_status(lrm_state, op, PCMK_EXEC_ERROR, PCMK_OCF_INSUFFICIENT_PRIV, "Unprivileged user cannot fail resources"); controld_ack_event_directly(from_host, from_sys, NULL, op, pcmk__xe_id(xml_rsc)); lrmd_free_event(op); return; } if (get_lrm_resource(lrm_state, xml_rsc, TRUE, &rsc) == pcmk_ok) { crm_info("Failing resource %s...", rsc->id); fake_op_status(lrm_state, op, PCMK_EXEC_DONE, PCMK_OCF_UNKNOWN_ERROR, "Simulated failure"); process_lrm_event(lrm_state, op, NULL, xml); op->rc = PCMK_OCF_OK; // The request to fail the resource succeeded lrmd_free_rsc_info(rsc); } else { crm_info("Cannot find/create resource in order to fail it..."); crm_log_xml_warn(xml, "bad input"); fake_op_status(lrm_state, op, PCMK_EXEC_ERROR, PCMK_OCF_UNKNOWN_ERROR, "Cannot fail unknown resource"); } controld_ack_event_directly(from_host, from_sys, NULL, op, pcmk__xe_id(xml_rsc)); lrmd_free_event(op); } static void handle_reprobe_op(lrm_state_t *lrm_state, xmlNode *msg, const char *from_sys, const char *from_host, const char *user_name, gboolean is_remote_node, bool reprobe_all_nodes) { crm_notice("Forcing the status of all resources to be redetected"); force_reprobe(lrm_state, from_sys, from_host, user_name, is_remote_node, reprobe_all_nodes); if (!pcmk__strcase_any_of(from_sys, CRM_SYSTEM_PENGINE, CRM_SYSTEM_TENGINE, NULL)) { xmlNode *reply = pcmk__new_reply(msg, NULL); crm_debug("ACK'ing re-probe from %s (%s)", from_sys, from_host); if (relay_message(reply, TRUE) == FALSE) { crm_log_xml_err(reply, "Unable to route reply"); } pcmk__xml_free(reply); } } static bool do_lrm_cancel(ha_msg_input_t *input, lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, const char *from_host, const char *from_sys) { char *op_key = NULL; char *meta_key = NULL; int call = 0; const char *call_id = NULL; const char *op_task = NULL; guint interval_ms = 0; gboolean in_progress = FALSE; xmlNode *params = pcmk__xe_first_child(input->xml, PCMK__XE_ATTRIBUTES, NULL, NULL); CRM_CHECK(params != NULL, return FALSE); meta_key = crm_meta_name(PCMK_XA_OPERATION); op_task = pcmk__xe_get(params, meta_key); free(meta_key); CRM_CHECK(op_task != NULL, return FALSE); meta_key = crm_meta_name(PCMK_META_INTERVAL); if (pcmk__xe_get_guint(params, meta_key, &interval_ms) != pcmk_rc_ok) { free(meta_key); return FALSE; } free(meta_key); op_key = pcmk__op_key(rsc->id, op_task, interval_ms); meta_key = crm_meta_name(PCMK__XA_CALL_ID); call_id = pcmk__xe_get(params, meta_key); free(meta_key); crm_debug("Scheduler requested op %s (call=%s) be cancelled", op_key, (call_id? call_id : "NA")); pcmk__scan_min_int(call_id, &call, 0); if (call == 0) { // Normal case when the scheduler cancels a recurring op in_progress = cancel_op_key(lrm_state, rsc, op_key, TRUE); } else { // Normal case when the scheduler cancels an orphan op in_progress = cancel_op(lrm_state, rsc->id, NULL, call, TRUE); } // Acknowledge cancellation operation if for a remote connection resource if (!in_progress || is_remote_lrmd_ra(NULL, NULL, rsc->id)) { char *op_id = make_stop_id(rsc->id, call); if (is_remote_lrmd_ra(NULL, NULL, rsc->id) == FALSE) { crm_info("Nothing known about operation %d for %s", call, op_key); } controld_delete_action_history_by_key(rsc->id, lrm_state->node_name, op_key, call); send_task_ok_ack(lrm_state, input, rsc->id, rsc, op_task, from_host, from_sys); /* needed at least for cancellation of a remote operation */ if (lrm_state->active_ops != NULL) { g_hash_table_remove(lrm_state->active_ops, op_id); } free(op_id); } free(op_key); return TRUE; } static void do_lrm_delete(ha_msg_input_t *input, lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, const char *from_sys, const char *from_host, bool crm_rsc_delete, const char *user_name) { bool unregister = true; int cib_rc = controld_delete_resource_history(rsc->id, lrm_state->node_name, user_name, cib_dryrun|cib_sync_call); if (cib_rc != pcmk_rc_ok) { lrmd_event_data_t *op = NULL; op = construct_op(lrm_state, input->xml, rsc->id, PCMK_ACTION_DELETE); /* These are resource clean-ups, not actions, so no exit reason is * needed. */ lrmd__set_result(op, pcmk_rc2ocf(cib_rc), PCMK_EXEC_ERROR, NULL); controld_ack_event_directly(from_host, from_sys, NULL, op, rsc->id); lrmd_free_event(op); return; } if (crm_rsc_delete && is_remote_lrmd_ra(NULL, NULL, rsc->id)) { unregister = false; } delete_resource(lrm_state, rsc->id, rsc, NULL, from_sys, user_name, input, unregister, true); } // User data for asynchronous metadata execution struct metadata_cb_data { lrmd_rsc_info_t *rsc; // Copy of resource information xmlNode *input_xml; // Copy of FSA input XML }; static struct metadata_cb_data * new_metadata_cb_data(lrmd_rsc_info_t *rsc, xmlNode *input_xml) { struct metadata_cb_data *data = NULL; data = pcmk__assert_alloc(1, sizeof(struct metadata_cb_data)); data->input_xml = pcmk__xml_copy(NULL, input_xml); data->rsc = lrmd_copy_rsc_info(rsc); return data; } static void free_metadata_cb_data(struct metadata_cb_data *data) { lrmd_free_rsc_info(data->rsc); pcmk__xml_free(data->input_xml); free(data); } /*! * \internal * \brief Execute an action after metadata has been retrieved * * \param[in] pid Ignored * \param[in] result Result of metadata action * \param[in] user_data Metadata callback data */ static void metadata_complete(int pid, const pcmk__action_result_t *result, void *user_data) { struct metadata_cb_data *data = (struct metadata_cb_data *) user_data; struct ra_metadata_s *md = NULL; lrm_state_t *lrm_state = controld_get_executor_state(lrm_op_target(data->input_xml), false); if ((lrm_state != NULL) && pcmk__result_ok(result)) { md = controld_cache_metadata(lrm_state->metadata_cache, data->rsc, result->action_stdout); } if (!pcmk__is_set(controld_globals.fsa_input_register, R_HA_DISCONNECTED)) { do_lrm_rsc_op(lrm_state, data->rsc, data->input_xml, md); } free_metadata_cb_data(data); } /* A_LRM_INVOKE */ void do_lrm_invoke(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { lrm_state_t *lrm_state = NULL; const char *crm_op = NULL; const char *from_sys = NULL; const char *from_host = NULL; const char *operation = NULL; ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); const char *user_name = NULL; const char *target_node = lrm_op_target(input->xml); gboolean is_remote_node = FALSE; bool crm_rsc_delete = FALSE; // Message routed to the local node is targeting a specific, non-local node is_remote_node = !controld_is_local_node(target_node); lrm_state = controld_get_executor_state(target_node, false); if ((lrm_state == NULL) && is_remote_node) { crm_err("Failing action because local node has never had connection to remote node %s", target_node); synthesize_lrmd_failure(NULL, input->xml, PCMK_EXEC_NOT_CONNECTED, PCMK_OCF_UNKNOWN_ERROR, "Local node has no connection to remote"); return; } pcmk__assert(lrm_state != NULL); user_name = pcmk__update_acl_user(input->msg, PCMK__XA_CRM_USER, NULL); crm_op = pcmk__xe_get(input->msg, PCMK__XA_CRM_TASK); from_sys = pcmk__xe_get(input->msg, PCMK__XA_CRM_SYS_FROM); if (!pcmk__str_eq(from_sys, CRM_SYSTEM_TENGINE, pcmk__str_none)) { from_host = pcmk__xe_get(input->msg, PCMK__XA_SRC); } if (pcmk__str_eq(crm_op, PCMK_ACTION_LRM_DELETE, pcmk__str_none)) { if (!pcmk__str_eq(from_sys, CRM_SYSTEM_TENGINE, pcmk__str_none)) { crm_rsc_delete = TRUE; // from crm_resource } operation = PCMK_ACTION_DELETE; } else if (input->xml != NULL) { operation = pcmk__xe_get(input->xml, PCMK_XA_OPERATION); } CRM_CHECK(!pcmk__str_empty(crm_op) || !pcmk__str_empty(operation), return); crm_trace("'%s' execution request from %s as %s user", pcmk__s(crm_op, operation), pcmk__s(from_sys, "unknown subsystem"), pcmk__s(user_name, "current")); if (pcmk__str_eq(crm_op, CRM_OP_LRM_FAIL, pcmk__str_none)) { fail_lrm_resource(input->xml, lrm_state, user_name, from_host, from_sys); } else if (pcmk__str_eq(crm_op, CRM_OP_REPROBE, pcmk__str_none) || pcmk__str_eq(operation, CRM_OP_REPROBE, pcmk__str_none)) { const char *raw_target = NULL; if (input->xml != NULL) { // For CRM_OP_REPROBE, a NULL target means we're targeting all nodes raw_target = pcmk__xe_get(input->xml, PCMK__META_ON_NODE); } handle_reprobe_op(lrm_state, input->msg, from_sys, from_host, user_name, is_remote_node, (raw_target == NULL)); } else if (operation != NULL) { lrmd_rsc_info_t *rsc = NULL; xmlNode *xml_rsc = pcmk__xe_first_child(input->xml, PCMK_XE_PRIMITIVE, NULL, NULL); gboolean create_rsc = !pcmk__str_eq(operation, PCMK_ACTION_DELETE, pcmk__str_none); int rc; // We can't return anything meaningful without a resource ID CRM_CHECK((xml_rsc != NULL) && (pcmk__xe_id(xml_rsc) != NULL), return); rc = get_lrm_resource(lrm_state, xml_rsc, create_rsc, &rsc); if (rc == -ENOTCONN) { synthesize_lrmd_failure(lrm_state, input->xml, PCMK_EXEC_NOT_CONNECTED, PCMK_OCF_UNKNOWN_ERROR, "Not connected to remote executor"); return; } else if ((rc < 0) && !create_rsc) { /* Delete of malformed or nonexistent resource * (deleting something that does not exist is a success) */ crm_debug("Not registering resource '%s' for a %s event " QB_XS " get-rc=%d (%s) transition-key=%s", pcmk__xe_id(xml_rsc), operation, rc, pcmk_strerror(rc), pcmk__xe_id(input->xml)); delete_rsc_entry(lrm_state, input, pcmk__xe_id(xml_rsc), NULL, pcmk_ok, user_name, true); return; } else if (rc == -EINVAL) { // Resource operation on malformed resource crm_err("Invalid resource definition for %s", pcmk__xe_id(xml_rsc)); crm_log_xml_warn(input->msg, "invalid resource"); synthesize_lrmd_failure(lrm_state, input->xml, PCMK_EXEC_ERROR, PCMK_OCF_NOT_CONFIGURED, // fatal error "Invalid resource definition"); return; } else if (rc < 0) { // Error communicating with the executor crm_err("Could not register resource '%s' with executor: %s " QB_XS " rc=%d", pcmk__xe_id(xml_rsc), pcmk_strerror(rc), rc); crm_log_xml_warn(input->msg, "failed registration"); synthesize_lrmd_failure(lrm_state, input->xml, PCMK_EXEC_ERROR, PCMK_OCF_INVALID_PARAM, // hard error "Could not register resource with executor"); return; } if (pcmk__str_eq(operation, PCMK_ACTION_CANCEL, pcmk__str_none)) { if (!do_lrm_cancel(input, lrm_state, rsc, from_host, from_sys)) { crm_log_xml_warn(input->xml, "Bad command"); } } else if (pcmk__str_eq(operation, PCMK_ACTION_DELETE, pcmk__str_none)) { do_lrm_delete(input, lrm_state, rsc, from_sys, from_host, crm_rsc_delete, user_name); } else { struct ra_metadata_s *md = NULL; /* Getting metadata from cache is OK except for start actions -- * always refresh from the agent for those, in case the resource * agent was updated. * * @TODO Only refresh metadata for starts if the agent actually * changed (using something like inotify, or a hash or modification * time of the agent executable). */ if (strcmp(operation, PCMK_ACTION_START) != 0) { md = controld_get_rsc_metadata(lrm_state, rsc, controld_metadata_from_cache); } if ((md == NULL) && crm_op_needs_metadata(rsc->standard, operation)) { /* Most likely, we'll need the agent metadata to record the * pending operation and the operation result. Get it now rather * than wait until then, so the metadata action doesn't eat into * the real action's timeout. * * @TODO Metadata is retrieved via direct execution of the * agent, which has a couple of related issues: the executor * should execute agents, not the controller; and metadata for * Pacemaker Remote nodes should be collected on those nodes, * not locally. */ struct metadata_cb_data *data = NULL; data = new_metadata_cb_data(rsc, input->xml); crm_info("Retrieving metadata for %s (%s%s%s:%s) asynchronously", rsc->id, rsc->standard, ((rsc->provider == NULL)? "" : ":"), ((rsc->provider == NULL)? "" : rsc->provider), rsc->type); (void) lrmd__metadata_async(rsc, metadata_complete, (void *) data); } else { do_lrm_rsc_op(lrm_state, rsc, input->xml, md); } } lrmd_free_rsc_info(rsc); } else { crm_err("Invalid execution request: unknown command '%s' (bug?)", crm_op); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } static lrmd_event_data_t * construct_op(const lrm_state_t *lrm_state, const xmlNode *rsc_op, const char *rsc_id, const char *operation) { lrmd_event_data_t *op = NULL; const char *op_delay = NULL; const char *op_timeout = NULL; GHashTable *params = NULL; xmlNode *primitive = NULL; const char *class = NULL; const char *transition = NULL; pcmk__assert((rsc_id != NULL) && (operation != NULL)); op = lrmd_new_event(rsc_id, operation, 0); op->type = lrmd_event_exec_complete; op->timeout = 0; op->start_delay = 0; lrmd__set_result(op, PCMK_OCF_UNKNOWN, PCMK_EXEC_PENDING, NULL); if (rsc_op == NULL) { CRM_LOG_ASSERT(pcmk__str_eq(operation, PCMK_ACTION_STOP, pcmk__str_casei)); op->user_data = NULL; /* the stop_all_resources() case * by definition there is no DC (or they'd be shutting * us down). * So we should put our version here. */ op->params = pcmk__strkey_table(free, free); pcmk__insert_dup(op->params, PCMK_XA_CRM_FEATURE_SET, CRM_FEATURE_SET); crm_trace("Constructed %s op for %s", operation, rsc_id); return op; } params = xml2list(rsc_op); g_hash_table_remove(params, CRM_META "_" PCMK__META_OP_TARGET_RC); op_delay = crm_meta_value(params, PCMK_META_START_DELAY); pcmk__scan_min_int(op_delay, &op->start_delay, 0); op_timeout = crm_meta_value(params, PCMK_META_TIMEOUT); pcmk__scan_min_int(op_timeout, &op->timeout, 0); if (pcmk__guint_from_hash(params, CRM_META "_" PCMK_META_INTERVAL, 0, &(op->interval_ms)) != pcmk_rc_ok) { op->interval_ms = 0; } /* Use pcmk_monitor_timeout instead of meta timeout for stonith recurring monitor, if set */ primitive = pcmk__xe_first_child(rsc_op, PCMK_XE_PRIMITIVE, NULL, NULL); class = pcmk__xe_get(primitive, PCMK_XA_CLASS); if (pcmk__is_set(pcmk_get_ra_caps(class), pcmk_ra_cap_fence_params) && pcmk__str_eq(operation, PCMK_ACTION_MONITOR, pcmk__str_casei) && (op->interval_ms > 0)) { op_timeout = g_hash_table_lookup(params, "pcmk_monitor_timeout"); if (op_timeout != NULL) { long long timeout_ms = 0; if ((pcmk__parse_ms(op_timeout, &timeout_ms) == pcmk_rc_ok) && (timeout_ms >= 0)) { op->timeout = (int) QB_MIN(timeout_ms, INT_MAX); } } } if (!pcmk__str_eq(operation, PCMK_ACTION_STOP, pcmk__str_casei)) { op->params = params; } else { rsc_history_t *entry = NULL; if (lrm_state) { entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); } /* If we do not have stop parameters cached, use * whatever we are given */ if (!entry || !entry->stop_params) { op->params = params; } else { /* Copy the cached parameter list so that we stop the resource * with the old attributes, not the new ones */ op->params = pcmk__strkey_table(free, free); g_hash_table_foreach(params, copy_meta_keys, op->params); g_hash_table_foreach(entry->stop_params, copy_instance_keys, op->params); g_hash_table_destroy(params); params = NULL; } } /* sanity */ if (op->timeout <= 0) { op->timeout = op->interval_ms; } if (op->start_delay < 0) { op->start_delay = 0; } transition = pcmk__xe_get(rsc_op, PCMK__XA_TRANSITION_KEY); CRM_CHECK(transition != NULL, return op); op->user_data = pcmk__str_copy(transition); if (op->interval_ms != 0) { if (pcmk__strcase_any_of(operation, PCMK_ACTION_START, PCMK_ACTION_STOP, NULL)) { crm_err("Start and Stop actions cannot have an interval: %u", op->interval_ms); op->interval_ms = 0; } } crm_trace("Constructed %s op for %s: interval=%u", operation, rsc_id, op->interval_ms); return op; } /*! * \internal * \brief Send a (synthesized) event result * * Reply with a synthesized event result directly, as opposed to going through * the executor. * * \param[in] to_host Host to send result to * \param[in] to_sys IPC name to send result (NULL for transition engine) * \param[in] rsc Type information about resource the result is for * \param[in,out] op Event with result to send * \param[in] rsc_id ID of resource the result is for */ void controld_ack_event_directly(const char *to_host, const char *to_sys, const lrmd_rsc_info_t *rsc, lrmd_event_data_t *op, const char *rsc_id) { xmlNode *reply = NULL; xmlNode *update, *iter; pcmk__node_status_t *peer = NULL; CRM_CHECK(op != NULL, return); if (op->rsc_id == NULL) { // op->rsc_id is a (const char *) but lrmd_free_event() frees it pcmk__assert(rsc_id != NULL); op->rsc_id = pcmk__str_copy(rsc_id); } if (to_sys == NULL) { to_sys = CRM_SYSTEM_TENGINE; } peer = controld_get_local_node_status(); update = create_node_state_update(peer, node_update_none, NULL, __func__); iter = pcmk__xe_create(update, PCMK__XE_LRM); pcmk__xe_set(iter, PCMK_XA_ID, controld_globals.our_uuid); iter = pcmk__xe_create(iter, PCMK__XE_LRM_RESOURCES); iter = pcmk__xe_create(iter, PCMK__XE_LRM_RESOURCE); pcmk__xe_set(iter, PCMK_XA_ID, op->rsc_id); controld_add_resource_history_xml(iter, rsc, op, controld_globals.cluster->priv->node_name); /* We don't have the original message ID, so use "direct-ack" (we just need * something non-NULL for this to create a reply) * * @TODO It would be better to use the server, message ID, and task from the * original request when callers have it available */ reply = pcmk__new_message(pcmk_ipc_controld, "direct-ack", CRM_SYSTEM_LRMD, to_host, to_sys, CRM_OP_INVOKE_LRM, update); crm_log_xml_trace(update, "[direct ACK]"); crm_debug("ACK'ing resource op " PCMK__OP_FMT " from %s: %s", op->rsc_id, op->op_type, op->interval_ms, op->user_data, pcmk__xe_get(reply, PCMK_XA_REFERENCE)); if (relay_message(reply, TRUE) == FALSE) { crm_log_xml_err(reply, "Unable to route reply"); } pcmk__xml_free(update); pcmk__xml_free(reply); } gboolean verify_stopped(enum crmd_fsa_state cur_state, int log_level) { gboolean res = TRUE; GList *lrm_state_list = lrm_state_get_list(); GList *state_entry; for (state_entry = lrm_state_list; state_entry != NULL; state_entry = state_entry->next) { lrm_state_t *lrm_state = state_entry->data; if (!lrm_state_verify_stopped(lrm_state, cur_state, log_level)) { /* keep iterating through all even when false is returned */ res = FALSE; } } controld_set_fsa_input_flags(R_SENT_RSC_STOP); g_list_free(lrm_state_list); lrm_state_list = NULL; return res; } struct stop_recurring_action_s { lrmd_rsc_info_t *rsc; lrm_state_t *lrm_state; }; static gboolean stop_recurring_action_by_rsc(gpointer key, gpointer value, gpointer user_data) { gboolean remove = FALSE; struct stop_recurring_action_s *event = user_data; active_op_t *op = value; if ((op->interval_ms != 0) && pcmk__str_eq(op->rsc_id, event->rsc->id, pcmk__str_none)) { crm_debug("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id, (char*)key); remove = !cancel_op(event->lrm_state, event->rsc->id, key, op->call_id, FALSE); } return remove; } static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer user_data) { gboolean remove = FALSE; lrm_state_t *lrm_state = user_data; active_op_t *op = value; if (op->interval_ms != 0) { crm_info("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id, (const char *) key); remove = !cancel_op(lrm_state, op->rsc_id, key, op->call_id, FALSE); } return remove; } /*! * \internal * \brief Check whether recurring actions should be cancelled before an action * * \param[in] rsc_id Resource that action is for * \param[in] action Action being performed * \param[in] interval_ms Operation interval of \p action (in milliseconds) * * \return true if recurring actions should be cancelled, otherwise false */ static bool should_cancel_recurring(const char *rsc_id, const char *action, guint interval_ms) { if (is_remote_lrmd_ra(NULL, NULL, rsc_id) && (interval_ms == 0) && (strcmp(action, PCMK_ACTION_MIGRATE_TO) == 0)) { /* Don't stop monitoring a migrating Pacemaker Remote connection * resource until the entire migration has completed. We must detect if * the connection is unexpectedly severed, even during a migration. */ return false; } // Cancel recurring actions before changing resource state return (interval_ms == 0) && !pcmk__str_any_of(action, PCMK_ACTION_MONITOR, PCMK_ACTION_NOTIFY, NULL); } /*! * \internal * \brief Check whether an action should not be performed at this time * * \param[in] operation Action to be performed * * \return Readable description of why action should not be performed, * or NULL if it should be performed */ static const char * should_nack_action(const char *action) { if (pcmk__is_set(controld_globals.fsa_input_register, R_SHUTDOWN) && pcmk__str_eq(action, PCMK_ACTION_START, pcmk__str_none)) { register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL); return "Not attempting start due to shutdown in progress"; } switch (controld_globals.fsa_state) { case S_NOT_DC: case S_POLICY_ENGINE: // Recalculating case S_TRANSITION_ENGINE: break; default: if (!pcmk__str_eq(action, PCMK_ACTION_STOP, pcmk__str_none)) { return "Controller cannot attempt actions at this time"; } break; } return NULL; } static void do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, xmlNode *msg, struct ra_metadata_s *md) { int rc; int call_id = 0; char *op_id = NULL; lrmd_event_data_t *op = NULL; fsa_data_t *msg_data = NULL; const char *transition = NULL; const char *operation = NULL; const char *nack_reason = NULL; CRM_CHECK((rsc != NULL) && (msg != NULL), return); operation = pcmk__xe_get(msg, PCMK_XA_OPERATION); CRM_CHECK(!pcmk__str_empty(operation), return); transition = pcmk__xe_get(msg, PCMK__XA_TRANSITION_KEY); if (pcmk__str_empty(transition)) { crm_log_xml_err(msg, "Missing transition number"); } if (lrm_state == NULL) { // This shouldn't be possible, but provide a failsafe just in case crm_err("Cannot execute %s of %s: No executor connection " QB_XS " transition_key=%s", operation, rsc->id, pcmk__s(transition, "")); synthesize_lrmd_failure(NULL, msg, PCMK_EXEC_INVALID, PCMK_OCF_UNKNOWN_ERROR, "No executor connection"); return; } if (pcmk__str_any_of(operation, PCMK_ACTION_RELOAD, PCMK_ACTION_RELOAD_AGENT, NULL)) { /* Pre-2.1.0 DCs will schedule reload actions only, and 2.1.0+ DCs * will schedule reload-agent actions only. In either case, we need * to map that to whatever the resource agent actually supports. * Default to the OCF 1.1 name. */ if ((md != NULL) && pcmk__is_set(md->ra_flags, ra_supports_legacy_reload)) { operation = PCMK_ACTION_RELOAD; } else { operation = PCMK_ACTION_RELOAD_AGENT; } } op = construct_op(lrm_state, msg, rsc->id, operation); CRM_CHECK(op != NULL, return); if (should_cancel_recurring(rsc->id, operation, op->interval_ms)) { guint removed = 0; struct stop_recurring_action_s data; data.rsc = rsc; data.lrm_state = lrm_state; removed = g_hash_table_foreach_remove(lrm_state->active_ops, stop_recurring_action_by_rsc, &data); if (removed) { crm_debug("Stopped %u recurring operation%s in preparation for " PCMK__OP_FMT, removed, pcmk__plural_s(removed), rsc->id, operation, op->interval_ms); } } nack_reason = should_nack_action(operation); if (nack_reason != NULL) { crm_notice("Not requesting local execution of %s operation for %s on %s" " in state %s: %s", pcmk__readable_action(op->op_type, op->interval_ms), rsc->id, lrm_state->node_name, fsa_state2string(controld_globals.fsa_state), nack_reason); lrmd__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_INVALID, nack_reason); controld_ack_event_directly(NULL, NULL, rsc, op, rsc->id); lrmd_free_event(op); free(op_id); return; } crm_notice("Requesting local execution of %s operation for %s on %s " QB_XS " transition %s", pcmk__readable_action(op->op_type, op->interval_ms), rsc->id, lrm_state->node_name, pcmk__s(transition, "")); controld_record_pending_op(lrm_state->node_name, rsc, op); op_id = pcmk__op_key(rsc->id, op->op_type, op->interval_ms); if (op->interval_ms > 0) { /* cancel it so we can then restart it without conflict */ cancel_op_key(lrm_state, rsc, op_id, FALSE); } rc = controld_execute_resource_agent(lrm_state, rsc->id, op->op_type, op->user_data, op->interval_ms, op->timeout, op->start_delay, op->params, &call_id); if (rc == pcmk_rc_ok) { /* record all operations so we can wait * for them to complete during shutdown */ char *call_id_s = make_stop_id(rsc->id, call_id); active_op_t *pending = NULL; pending = pcmk__assert_alloc(1, sizeof(active_op_t)); crm_trace("Recording pending op: %d - %s %s", call_id, op_id, call_id_s); pending->call_id = call_id; pending->interval_ms = op->interval_ms; pending->op_type = pcmk__str_copy(operation); pending->op_key = pcmk__str_copy(op_id); pending->rsc_id = pcmk__str_copy(rsc->id); pending->start_time = time(NULL); pending->user_data = pcmk__str_copy(op->user_data); pcmk__xe_get_time(msg, PCMK_OPT_SHUTDOWN_LOCK, &(pending->lock_time)); g_hash_table_replace(lrm_state->active_ops, call_id_s, pending); if ((op->interval_ms > 0) && (op->start_delay > START_DELAY_THRESHOLD)) { int target_rc = PCMK_OCF_OK; crm_info("Faking confirmation of %s: execution postponed for over 5 minutes", op_id); decode_transition_key(op->user_data, NULL, NULL, NULL, &target_rc); lrmd__set_result(op, target_rc, PCMK_EXEC_DONE, NULL); controld_ack_event_directly(NULL, NULL, rsc, op, rsc->id); } pending->params = op->params; op->params = NULL; } else if (lrm_state_is_local(lrm_state)) { crm_err("Could not initiate %s action for resource %s locally: %s " QB_XS " rc=%d", operation, rsc->id, pcmk_rc_str(rc), rc); fake_op_status(lrm_state, op, PCMK_EXEC_NOT_CONNECTED, PCMK_OCF_UNKNOWN_ERROR, pcmk_rc_str(rc)); process_lrm_event(lrm_state, op, NULL, NULL); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } else { crm_err("Could not initiate %s action for resource %s remotely on %s: " "%s " QB_XS " rc=%d", operation, rsc->id, lrm_state->node_name, pcmk_rc_str(rc), rc); fake_op_status(lrm_state, op, PCMK_EXEC_NOT_CONNECTED, PCMK_OCF_UNKNOWN_ERROR, pcmk_rc_str(rc)); process_lrm_event(lrm_state, op, NULL, NULL); } free(op_id); lrmd_free_event(op); } static char * unescape_newlines(const char *string) { char *pch = NULL; char *ret = NULL; static const char *escaped_newline = "\\n"; if (!string) { return NULL; } ret = pcmk__str_copy(string); pch = strstr(ret, escaped_newline); while (pch != NULL) { /* Replace newline escape pattern with actual newline (and a space so we * don't have to shuffle the rest of the buffer) */ pch[0] = '\n'; pch[1] = ' '; pch = strstr(pch, escaped_newline); } return ret; } static bool did_lrm_rsc_op_fail(lrm_state_t *lrm_state, const char * rsc_id, const char * op_type, guint interval_ms) { rsc_history_t *entry = NULL; CRM_CHECK(lrm_state != NULL, return FALSE); CRM_CHECK(rsc_id != NULL, return FALSE); CRM_CHECK(op_type != NULL, return FALSE); entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); if (entry == NULL || entry->failed == NULL) { return FALSE; } if (pcmk__str_eq(entry->failed->rsc_id, rsc_id, pcmk__str_none) && pcmk__str_eq(entry->failed->op_type, op_type, pcmk__str_casei) && entry->failed->interval_ms == interval_ms) { return TRUE; } return FALSE; } /*! * \internal * \brief Log the result of an executor action (actual or synthesized) * * \param[in] op Executor action to log result for * \param[in] op_key Operation key for action * \param[in] node_name Name of node action was performed on, if known * \param[in] confirmed Whether to log that graph action was confirmed */ static void log_executor_event(const lrmd_event_data_t *op, const char *op_key, const char *node_name, gboolean confirmed) { int log_level = LOG_ERR; GString *str = g_string_sized_new(100); // reasonable starting size pcmk__g_strcat(str, "Result of ", pcmk__readable_action(op->op_type, op->interval_ms), " operation for ", op->rsc_id, NULL); if (node_name != NULL) { pcmk__g_strcat(str, " on ", node_name, NULL); } switch (op->op_status) { case PCMK_EXEC_DONE: log_level = LOG_NOTICE; pcmk__g_strcat(str, ": ", crm_exit_str((crm_exit_t) op->rc), NULL); break; case PCMK_EXEC_TIMEOUT: pcmk__g_strcat(str, ": ", pcmk_exec_status_str(op->op_status), " after ", pcmk__readable_interval(op->timeout), NULL); break; case PCMK_EXEC_CANCELLED: log_level = LOG_INFO; pcmk__g_strcat(str, ": ", pcmk_exec_status_str(op->op_status), NULL); break; default: pcmk__g_strcat(str, ": ", pcmk_exec_status_str(op->op_status), NULL); break; } if ((op->exit_reason != NULL) && ((op->op_status != PCMK_EXEC_DONE) || (op->rc != PCMK_OCF_OK))) { pcmk__g_strcat(str, " (", op->exit_reason, ")", NULL); } g_string_append(str, " " QB_XS); g_string_append_printf(str, " graph action %sconfirmed; call=%d key=%s", (confirmed? "" : "un"), op->call_id, op_key); if (op->op_status == PCMK_EXEC_DONE) { g_string_append_printf(str, " rc=%d", op->rc); } do_crm_log(log_level, "%s", str->str); g_string_free(str, TRUE); /* The services library has already logged the output at info or debug * level, so just raise to notice if it looks like a failure. */ if ((op->output != NULL) && (op->rc != PCMK_OCF_OK)) { char *prefix = pcmk__assert_asprintf(PCMK__OP_FMT "@%s output", op->rsc_id, op->op_type, op->interval_ms, node_name); crm_log_output(LOG_NOTICE, prefix, op->output); free(prefix); } } void process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op, active_op_t *pending, const xmlNode *action_xml) { char *op_id = NULL; char *op_key = NULL; gboolean remove = FALSE; gboolean removed = FALSE; bool need_direct_ack = FALSE; lrmd_rsc_info_t *rsc = NULL; const char *node_name = NULL; CRM_CHECK(op != NULL, return); CRM_CHECK(op->rsc_id != NULL, return); // Remap new status codes for older DCs if (pcmk__compare_versions(controld_globals.dc_version, "3.2.0") < 0) { switch (op->op_status) { case PCMK_EXEC_NOT_CONNECTED: lrmd__set_result(op, PCMK_OCF_CONNECTION_DIED, PCMK_EXEC_ERROR, op->exit_reason); break; case PCMK_EXEC_INVALID: lrmd__set_result(op, CRM_DIRECT_NACK_RC, PCMK_EXEC_ERROR, op->exit_reason); break; default: break; } } op_id = make_stop_id(op->rsc_id, op->call_id); op_key = pcmk__op_key(op->rsc_id, op->op_type, op->interval_ms); // Get resource info if available (from executor state or action XML) if (lrm_state) { rsc = lrm_state_get_rsc_info(lrm_state, op->rsc_id, 0); } if ((rsc == NULL) && action_xml) { xmlNode *xml = pcmk__xe_first_child(action_xml, PCMK_XE_PRIMITIVE, NULL, NULL); const char *standard = pcmk__xe_get(xml, PCMK_XA_CLASS); const char *provider = pcmk__xe_get(xml, PCMK_XA_PROVIDER); const char *type = pcmk__xe_get(xml, PCMK_XA_TYPE); if (standard && type) { crm_info("%s agent information not cached, using %s%s%s:%s from action XML", op->rsc_id, standard, (provider? ":" : ""), (provider? provider : ""), type); rsc = lrmd_new_rsc_info(op->rsc_id, standard, provider, type); } else { crm_err("Can't process %s result because %s agent information not cached or in XML", op_key, op->rsc_id); } } // Get node name if available (from executor state or action XML) if (lrm_state) { node_name = lrm_state->node_name; } else if (action_xml) { node_name = pcmk__xe_get(action_xml, PCMK__META_ON_NODE); } if(pending == NULL) { remove = TRUE; if (lrm_state) { pending = g_hash_table_lookup(lrm_state->active_ops, op_id); } } if (op->op_status == PCMK_EXEC_ERROR) { switch(op->rc) { case PCMK_OCF_NOT_RUNNING: case PCMK_OCF_RUNNING_PROMOTED: case PCMK_OCF_DEGRADED: case PCMK_OCF_DEGRADED_PROMOTED: // Leave it to the TE/scheduler to decide if this is an error op->op_status = PCMK_EXEC_DONE; break; default: /* Nothing to do */ break; } } if (op->op_status != PCMK_EXEC_CANCELLED) { /* We might not record the result, so directly acknowledge it to the * originator instead, so it doesn't time out waiting for the result * (especially important if part of a transition). */ need_direct_ack = TRUE; if (controld_action_is_recordable(op->op_type)) { if (node_name && rsc) { // We should record the result, and happily, we can time_t lock_time = (pending == NULL)? 0 : pending->lock_time; controld_update_resource_history(node_name, rsc, op, lock_time); need_direct_ack = FALSE; } else if (op->rsc_deleted) { /* We shouldn't record the result (likely the resource was * refreshed, cleaned, or removed while this operation was * in flight). */ crm_notice("Not recording %s result in CIB because " "resource information was removed since it was initiated", op_key); } else { /* This shouldn't be possible; the executor didn't consider the * resource deleted, but we couldn't find resource or node * information. */ crm_err("Unable to record %s result in CIB: %s", op_key, (node_name? "No resource information" : "No node name")); } } } else if (op->interval_ms == 0) { /* A non-recurring operation was cancelled. Most likely, the * never-initiated action was removed from the executor's pending * operations list upon resource removal. */ need_direct_ack = TRUE; } else if (pending == NULL) { /* This recurring operation was cancelled, but was not pending. No * transition actions are waiting on it, nothing needs to be done. */ } else if (op->user_data == NULL) { /* This recurring operation was cancelled and pending, but we don't * have a transition key. This should never happen. */ crm_err("Recurring operation %s was cancelled without transition information", op_key); } else if (pcmk__is_set(pending->flags, active_op_remove)) { /* This recurring operation was cancelled (by us) and pending, and we * have been waiting for it to finish. */ if (lrm_state) { controld_delete_action_history(op); } /* Directly acknowledge failed recurring actions here. The above call to * controld_delete_action_history() will not erase any corresponding * last_failure entry, which means that the DC won't confirm the * cancellation via process_op_deletion(), and the transition would * otherwise wait for the action timer to pop. */ if (did_lrm_rsc_op_fail(lrm_state, pending->rsc_id, pending->op_type, pending->interval_ms)) { need_direct_ack = TRUE; } } else if (op->rsc_deleted) { /* This recurring operation was cancelled (but not by us, and the * executor does not have resource information, likely due to resource * cleanup, refresh, or removal) and pending. */ crm_debug("Recurring op %s was cancelled due to resource deletion", op_key); need_direct_ack = TRUE; } else { /* This recurring operation was cancelled (but not by us, likely by the * executor before stopping the resource) and pending. We don't need to * do anything special. */ } if (need_direct_ack) { controld_ack_event_directly(NULL, NULL, NULL, op, op->rsc_id); } if(remove == FALSE) { /* The caller will do this afterwards, but keep the logging consistent */ removed = TRUE; } else if (lrm_state && ((op->interval_ms == 0) || (op->op_status == PCMK_EXEC_CANCELLED))) { gboolean found = g_hash_table_remove(lrm_state->active_ops, op_id); if (op->interval_ms != 0) { removed = TRUE; } else if (found) { removed = TRUE; crm_trace("Op %s (call=%d, stop-id=%s, remaining=%u): Confirmed", op_key, op->call_id, op_id, g_hash_table_size(lrm_state->active_ops)); } } log_executor_event(op, op_key, node_name, removed); if (lrm_state) { if (!pcmk__str_eq(op->op_type, PCMK_ACTION_META_DATA, pcmk__str_casei)) { crmd_alert_resource_op(lrm_state->node_name, op); } else if (rsc && (op->rc == PCMK_OCF_OK)) { char *metadata = unescape_newlines(op->output); controld_cache_metadata(lrm_state->metadata_cache, rsc, metadata); free(metadata); } } if (op->rsc_deleted) { crm_info("Deletion of resource '%s' complete after %s", op->rsc_id, op_key); if (lrm_state) { delete_rsc_entry(lrm_state, NULL, op->rsc_id, NULL, pcmk_ok, NULL, true); } } /* If a shutdown was escalated while operations were pending, * then the FSA will be stalled right now... allow it to continue */ controld_trigger_fsa(); if (lrm_state && rsc) { update_history_cache(lrm_state, rsc, op); } lrmd_free_rsc_info(rsc); free(op_key); free(op_id); } diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c index b9947453cb..e87256a7f5 100644 --- a/daemons/controld/controld_fencing.c +++ b/daemons/controld/controld_fencing.c @@ -1,1114 +1,1114 @@ /* * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include // crm_meta_value() #include // PCMK_SCORE_INFINITY #include #include #include #include static void tengine_stonith_history_synced(stonith_t *st, stonith_event_t *st_event); /* * stonith failure counting * * We don't want to get stuck in a permanent fencing loop. Keep track of the * number of fencing failures for each target node, and the most we'll restart a * transition for. */ struct st_fail_rec { int count; }; #define DEFAULT_STONITH_MAX_ATTEMPTS 10 static bool fence_reaction_panic = false; static unsigned long int stonith_max_attempts = DEFAULT_STONITH_MAX_ATTEMPTS; static GHashTable *stonith_failures = NULL; /*! * \internal * \brief Update max fencing attempts before giving up * * \param[in] value New max fencing attempts */ static void update_stonith_max_attempts(const char *value) { int score = 0; int rc = pcmk_parse_score(value, &score, DEFAULT_STONITH_MAX_ATTEMPTS); // The option validator ensures invalid values shouldn't be possible CRM_CHECK((rc == pcmk_rc_ok) && (score > 0), return); if (stonith_max_attempts != score) { crm_debug("Maximum fencing attempts per transition is now %d (was %lu)", score, stonith_max_attempts); } stonith_max_attempts = score; } /*! * \internal * \brief Configure reaction to notification of local node being fenced * * \param[in] reaction_s Reaction type */ static void set_fence_reaction(const char *reaction_s) { if (pcmk__str_eq(reaction_s, "panic", pcmk__str_casei)) { fence_reaction_panic = true; } else { if (!pcmk__str_eq(reaction_s, PCMK_VALUE_STOP, pcmk__str_casei)) { crm_warn("Invalid value '%s' for %s, using 'stop'", reaction_s, PCMK_OPT_FENCE_REACTION); } fence_reaction_panic = false; } } /*! * \internal * \brief Configure fencing options based on the CIB * * \param[in,out] options Name/value pairs for configured options */ void controld_configure_fencing(GHashTable *options) { const char *value = NULL; value = g_hash_table_lookup(options, PCMK_OPT_FENCE_REACTION); set_fence_reaction(value); value = g_hash_table_lookup(options, PCMK_OPT_STONITH_MAX_ATTEMPTS); update_stonith_max_attempts(value); } static gboolean too_many_st_failures(const char *target) { GHashTableIter iter; const char *key = NULL; struct st_fail_rec *value = NULL; if (stonith_failures == NULL) { return FALSE; } if (target == NULL) { g_hash_table_iter_init(&iter, stonith_failures); while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) { if (value->count >= stonith_max_attempts) { target = (const char*)key; goto too_many; } } } else { value = g_hash_table_lookup(stonith_failures, target); if ((value != NULL) && (value->count >= stonith_max_attempts)) { goto too_many; } } return FALSE; too_many: crm_warn("Too many failures (%d) to fence %s, giving up", value->count, target); return TRUE; } /*! * \internal * \brief Reset a stonith fail count * * \param[in] target Name of node to reset, or NULL for all */ void st_fail_count_reset(const char *target) { if (stonith_failures == NULL) { return; } if (target) { struct st_fail_rec *rec = NULL; rec = g_hash_table_lookup(stonith_failures, target); if (rec) { rec->count = 0; } } else { GHashTableIter iter; const char *key = NULL; struct st_fail_rec *rec = NULL; g_hash_table_iter_init(&iter, stonith_failures); while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &rec)) { rec->count = 0; } } } static void st_fail_count_increment(const char *target) { struct st_fail_rec *rec = NULL; if (stonith_failures == NULL) { stonith_failures = pcmk__strkey_table(free, free); } rec = g_hash_table_lookup(stonith_failures, target); if (rec) { rec->count++; } else { rec = malloc(sizeof(struct st_fail_rec)); if(rec == NULL) { return; } rec->count = 1; g_hash_table_insert(stonith_failures, pcmk__str_copy(target), rec); } } /* end stonith fail count functions */ static void cib_fencing_updated(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { if (rc < pcmk_ok) { crm_err("Fencing update %d for %s: failed - %s (%d)", call_id, (char *)user_data, pcmk_strerror(rc), rc); crm_log_xml_warn(msg, "Failed update"); abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_shutdown, "CIB update failed", NULL); } else { crm_info("Fencing update %d for %s: complete", call_id, (char *)user_data); } } /*! * \internal * \brief Update a fencing target's node state * * \param[in] target Node that was successfully fenced * \param[in] target_xml_id CIB XML ID of target */ static void update_node_state_after_fencing(const char *target, const char *target_xml_id) { int rc = pcmk_ok; pcmk__node_status_t *peer = NULL; xmlNode *node_state = NULL; /* We (usually) rely on the membership layer to do node_update_cluster, * and the peer status callback to do node_update_peer, because the node * might have already rejoined before we get the stonith result here. */ int flags = node_update_join | node_update_expected; CRM_CHECK((target != NULL) && (target_xml_id != NULL), return); // Ensure target is cached peer = pcmk__get_node(0, target, target_xml_id, pcmk__node_search_any); CRM_CHECK(peer != NULL, return); if (peer->state == NULL) { /* Usually, we rely on the membership layer to update the cluster state * in the CIB. However, if the node has never been seen, do it here, so * the node is not considered unclean. */ flags |= node_update_cluster; } if (peer->xml_id == NULL) { crm_info("Recording XML ID '%s' for node '%s'", target_xml_id, target); peer->xml_id = pcmk__str_copy(target_xml_id); } crmd_peer_down(peer, TRUE); node_state = create_node_state_update(peer, flags, NULL, __func__); pcmk__xe_set(node_state, PCMK_XA_ID, target_xml_id); if (pcmk__is_set(peer->flags, pcmk__node_status_remote)) { char *now_s = pcmk__ttoa(time(NULL)); pcmk__xe_set(node_state, PCMK__XA_NODE_FENCED, now_s); free(now_s); } rc = controld_globals.cib_conn->cmds->modify(controld_globals.cib_conn, PCMK_XE_STATUS, node_state, cib_can_create); pcmk__xml_free(node_state); crm_debug("Updating node state for %s after fencing (call %d)", target, rc); fsa_register_cib_callback(rc, pcmk__str_copy(target), cib_fencing_updated); controld_delete_node_state(peer->name, controld_section_all, cib_none); } /*! * \internal * \brief Abort transition due to stonith failure * * \param[in] abort_action Whether to restart or stop transition * \param[in] target Don't restart if this (NULL for any) has too many failures * \param[in] reason Log this stonith action XML as abort reason (or NULL) */ static void abort_for_stonith_failure(enum pcmk__graph_next abort_action, const char *target, const xmlNode *reason) { /* If stonith repeatedly fails, we eventually give up on starting a new * transition for that reason. */ if ((abort_action != pcmk__graph_wait) && too_many_st_failures(target)) { abort_action = pcmk__graph_wait; } abort_transition(PCMK_SCORE_INFINITY, abort_action, "Stonith failed", reason); } /* * stonith cleanup list * * If the DC is shot, proper notifications might not go out. * The stonith cleanup list allows the cluster to (re-)send * notifications once a new DC is elected. */ static GList *stonith_cleanup_list = NULL; /*! * \internal * \brief Add a node to the stonith cleanup list * * \param[in] target Name of node to add */ void add_stonith_cleanup(const char *target) { stonith_cleanup_list = g_list_append(stonith_cleanup_list, pcmk__str_copy(target)); } /*! * \internal * \brief Remove a node from the stonith cleanup list * * \param[in] Name of node to remove */ void remove_stonith_cleanup(const char *target) { GList *iter = stonith_cleanup_list; while (iter != NULL) { GList *tmp = iter; char *iter_name = tmp->data; iter = iter->next; if (pcmk__str_eq(target, iter_name, pcmk__str_casei)) { crm_trace("Removing %s from the cleanup list", iter_name); stonith_cleanup_list = g_list_delete_link(stonith_cleanup_list, tmp); free(iter_name); } } } /*! * \internal * \brief Purge all entries from the stonith cleanup list */ void purge_stonith_cleanup(void) { if (stonith_cleanup_list) { GList *iter = NULL; for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) { char *target = iter->data; crm_info("Purging %s from stonith cleanup list", target); free(target); } g_list_free(stonith_cleanup_list); stonith_cleanup_list = NULL; } } /*! * \internal * \brief Send stonith updates for all entries in cleanup list, then purge it */ void execute_stonith_cleanup(void) { GList *iter; for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) { char *target = iter->data; pcmk__node_status_t *target_node = pcmk__get_node(0, target, NULL, pcmk__node_search_cluster_member); const char *uuid = pcmk__cluster_get_xml_id(target_node); crm_notice("Marking %s, target of a previous stonith action, as clean", target); update_node_state_after_fencing(target, uuid); free(target); } g_list_free(stonith_cleanup_list); stonith_cleanup_list = NULL; } /* end stonith cleanup list functions */ /* stonith API client * * Functions that need to interact directly with the fencer via its API */ static stonith_t *stonith_api = NULL; static mainloop_timer_t *controld_fencer_connect_timer = NULL; static char *te_client_id = NULL; static gboolean fail_incompletable_stonith(pcmk__graph_t *graph) { GList *lpc = NULL; const char *task = NULL; xmlNode *last_action = NULL; if (graph == NULL) { return FALSE; } for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) { GList *lpc2 = NULL; pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) lpc->data; if (pcmk__is_set(synapse->flags, pcmk__synapse_confirmed)) { continue; } for (lpc2 = synapse->actions; lpc2 != NULL; lpc2 = lpc2->next) { pcmk__graph_action_t *action = (pcmk__graph_action_t *) lpc2->data; if ((action->type != pcmk__cluster_graph_action) || pcmk__is_set(action->flags, pcmk__graph_action_confirmed)) { continue; } task = pcmk__xe_get(action->xml, PCMK_XA_OPERATION); if (pcmk__str_eq(task, PCMK_ACTION_STONITH, pcmk__str_casei)) { pcmk__set_graph_action_flags(action, pcmk__graph_action_failed); last_action = action->xml; pcmk__update_graph(graph, action); crm_notice("Failing action %d (%s): fencer terminated", action->id, pcmk__xe_id(action->xml)); } } } if (last_action != NULL) { crm_warn("Fencer failure resulted in unrunnable actions"); abort_for_stonith_failure(pcmk__graph_restart, NULL, last_action); return TRUE; } return FALSE; } static void tengine_stonith_connection_destroy(stonith_t *st, stonith_event_t *e) { te_cleanup_stonith_history_sync(st, FALSE); if (pcmk__is_set(controld_globals.fsa_input_register, R_ST_REQUIRED)) { crm_err("Lost fencer connection (will attempt to reconnect)"); if (!mainloop_timer_running(controld_fencer_connect_timer)) { mainloop_timer_start(controld_fencer_connect_timer); } } else { crm_info("Disconnected from fencer"); } if (stonith_api) { /* the client API won't properly reconnect notifications * if they are still in the table - so remove them */ if (stonith_api->state != stonith_disconnected) { stonith_api->cmds->disconnect(st); } stonith_api->cmds->remove_notification(stonith_api, NULL); } if (AM_I_DC) { fail_incompletable_stonith(controld_globals.transition_graph); trigger_graph(); } } /*! * \internal * \brief Handle an event notification from the fencing API * * \param[in] st Fencing API connection (ignored) * \param[in] event Fencing API event notification */ static void handle_fence_notification(stonith_t *st, stonith_event_t *event) { bool succeeded = true; const char *executioner = "the cluster"; const char *client = "a client"; const char *reason = NULL; int exec_status; if (te_client_id == NULL) { te_client_id = pcmk__assert_asprintf("%s.%lu", crm_system_name, (unsigned long) getpid()); } if (event == NULL) { crm_err("Notify data not found"); return; } if (event->executioner != NULL) { executioner = event->executioner; } if (event->client_origin != NULL) { client = event->client_origin; } exec_status = stonith__event_execution_status(event); if ((stonith__event_exit_status(event) != CRM_EX_OK) || (exec_status != PCMK_EXEC_DONE)) { succeeded = false; if (exec_status == PCMK_EXEC_DONE) { exec_status = PCMK_EXEC_ERROR; } } reason = stonith__event_exit_reason(event); crmd_alert_fencing_op(event); if (pcmk__str_eq(PCMK_ACTION_ON, event->action, pcmk__str_none)) { // Unfencing doesn't need special handling, just a log message if (succeeded) { crm_notice("%s was unfenced by %s at the request of %s@%s", event->target, executioner, client, event->origin); } else { crm_err("Unfencing of %s by %s failed (%s%s%s) with exit status %d", event->target, executioner, pcmk_exec_status_str(exec_status), ((reason == NULL)? "" : ": "), ((reason == NULL)? "" : reason), stonith__event_exit_status(event)); } return; } if (succeeded && controld_is_local_node(event->target)) { /* We were notified of our own fencing. Most likely, either fencing was * misconfigured, or fabric fencing that doesn't cut cluster * communication is in use. * * Either way, shutting down the local host is a good idea, to require * administrator intervention. Also, other nodes would otherwise likely * set our status to lost because of the fencing callback and discard * our subsequent election votes as "not part of our cluster". */ - crm_crit("We were allegedly just fenced by %s for %s!", - executioner, event->origin); // Dumps blackbox if enabled + pcmk__crit("We were allegedly just fenced by %s for %s!", executioner, + event->origin); // Dumps blackbox if enabled if (fence_reaction_panic) { pcmk__panic("Notified of own fencing"); } else { crm_exit(CRM_EX_FATAL); } return; // Should never get here } /* Update the count of fencing failures for this target, in case we become * DC later. The current DC has already updated its fail count in * tengine_stonith_callback(). */ if (!AM_I_DC) { if (succeeded) { st_fail_count_reset(event->target); } else { st_fail_count_increment(event->target); } } crm_notice("Peer %s was%s terminated (%s) by %s on behalf of %s@%s: " "%s%s%s%s " QB_XS " event=%s", event->target, (succeeded? "" : " not"), event->action, executioner, client, event->origin, (succeeded? "OK" : pcmk_exec_status_str(exec_status)), ((reason == NULL)? "" : " ("), ((reason == NULL)? "" : reason), ((reason == NULL)? "" : ")"), event->id); if (succeeded) { const uint32_t flags = pcmk__node_search_any |pcmk__node_search_cluster_cib; pcmk__node_status_t *peer = pcmk__search_node_caches(0, event->target, NULL, flags); const char *uuid = NULL; if (peer == NULL) { return; } uuid = pcmk__cluster_get_xml_id(peer); if (AM_I_DC) { /* The DC always sends updates */ update_node_state_after_fencing(event->target, uuid); /* @TODO Ideally, at this point, we'd check whether the fenced node * hosted any guest nodes, and call remote_node_down() for them. * Unfortunately, the controller doesn't have a simple, reliable way * to map hosts to guests. It might be possible to track this in the * peer cache via refresh_remote_nodes(). For now, we rely on the * scheduler creating fence pseudo-events for the guests. */ if (!pcmk__str_eq(client, te_client_id, pcmk__str_casei)) { /* Abort the current transition if it wasn't the cluster that * initiated fencing. */ crm_info("External fencing operation from %s fenced %s", client, event->target); abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart, "External Fencing Operation", NULL); } } else if (pcmk__str_eq(controld_globals.dc_name, event->target, pcmk__str_null_matches|pcmk__str_casei) && !pcmk__is_set(peer->flags, pcmk__node_status_remote)) { // Assume the target was our DC if we don't currently have one if (controld_globals.dc_name != NULL) { crm_notice("Fencing target %s was our DC", event->target); } else { crm_notice("Fencing target %s may have been our DC", event->target); } /* Given the CIB resyncing that occurs around elections, * have one node update the CIB now and, if the new DC is different, * have them do so too after the election */ if (controld_is_local_node(event->executioner)) { update_node_state_after_fencing(event->target, uuid); } add_stonith_cleanup(event->target); } /* If the target is a remote node, and we host its connection, * immediately fail all monitors so it can be recovered quickly. * The connection won't necessarily drop when a remote node is fenced, * so the failure might not otherwise be detected until the next poke. */ if (pcmk__is_set(peer->flags, pcmk__node_status_remote)) { remote_ra_fail(event->target); } crmd_peer_down(peer, TRUE); } } /*! * \brief Connect to fencer * * \param[in] user_data If NULL, retry failures now, otherwise retry in mainloop timer * * \return G_SOURCE_REMOVE on success, G_SOURCE_CONTINUE to retry * \note If user_data is NULL, this will wait 2s between attempts, for up to * 30 attempts, meaning the controller could be blocked as long as 58s. */ gboolean controld_timer_fencer_connect(gpointer user_data) { int rc = pcmk_ok; if (stonith_api == NULL) { stonith_api = stonith_api_new(); if (stonith_api == NULL) { crm_err("Could not connect to fencer: API memory allocation failed"); return G_SOURCE_REMOVE; } } if (stonith_api->state != stonith_disconnected) { crm_trace("Already connected to fencer, no need to retry"); return G_SOURCE_REMOVE; } if (user_data == NULL) { // Blocking (retry failures now until successful) rc = stonith_api_connect_retry(stonith_api, crm_system_name, 30); if (rc != pcmk_ok) { crm_err("Could not connect to fencer in 30 attempts: %s " QB_XS " rc=%d", pcmk_strerror(rc), rc); } } else { // Non-blocking (retry failures later in main loop) rc = stonith_api->cmds->connect(stonith_api, crm_system_name, NULL); if (controld_fencer_connect_timer == NULL) { controld_fencer_connect_timer = mainloop_timer_add("controld_fencer_connect", 1000, TRUE, controld_timer_fencer_connect, GINT_TO_POINTER(TRUE)); } if (rc != pcmk_ok) { if (pcmk__is_set(controld_globals.fsa_input_register, R_ST_REQUIRED)) { crm_notice("Fencer connection failed (will retry): %s " QB_XS " rc=%d", pcmk_strerror(rc), rc); if (!mainloop_timer_running(controld_fencer_connect_timer)) { mainloop_timer_start(controld_fencer_connect_timer); } return G_SOURCE_CONTINUE; } else { crm_info("Fencer connection failed (ignoring because no longer required): %s " QB_XS " rc=%d", pcmk_strerror(rc), rc); } return G_SOURCE_REMOVE; } } if (rc == pcmk_ok) { stonith_api_operations_t *cmds = stonith_api->cmds; cmds->register_notification(stonith_api, PCMK__VALUE_ST_NOTIFY_DISCONNECT, tengine_stonith_connection_destroy); cmds->register_notification(stonith_api, PCMK__VALUE_ST_NOTIFY_FENCE, handle_fence_notification); cmds->register_notification(stonith_api, PCMK__VALUE_ST_NOTIFY_HISTORY_SYNCED, tengine_stonith_history_synced); te_trigger_stonith_history_sync(TRUE); crm_notice("Fencer successfully connected"); } return G_SOURCE_REMOVE; } void controld_disconnect_fencer(bool destroy) { if (stonith_api) { // Prevent fencer connection from coming up again controld_clear_fsa_input_flags(R_ST_REQUIRED); if (stonith_api->state != stonith_disconnected) { stonith_api->cmds->disconnect(stonith_api); } stonith_api->cmds->remove_notification(stonith_api, NULL); } if (destroy) { if (stonith_api) { stonith_api->cmds->free(stonith_api); stonith_api = NULL; } if (controld_fencer_connect_timer) { mainloop_timer_del(controld_fencer_connect_timer); controld_fencer_connect_timer = NULL; } if (te_client_id) { free(te_client_id); te_client_id = NULL; } } } static gboolean do_stonith_history_sync(gpointer user_data) { if (stonith_api && (stonith_api->state != stonith_disconnected)) { stonith_history_t *history = NULL; te_cleanup_stonith_history_sync(stonith_api, FALSE); stonith_api->cmds->history(stonith_api, st_opt_sync_call | st_opt_broadcast, NULL, &history, 5); stonith_history_free(history); return TRUE; } else { crm_info("Skip triggering stonith history-sync as stonith is disconnected"); return FALSE; } } static void tengine_stonith_callback(stonith_t *stonith, stonith_callback_data_t *data) { char *uuid = NULL; int stonith_id = -1; int transition_id = -1; pcmk__graph_action_t *action = NULL; const char *target = NULL; if ((data == NULL) || (data->userdata == NULL)) { crm_err("Ignoring fence operation %d result: " "No transition key given (bug?)", ((data == NULL)? -1 : data->call_id)); return; } if (!AM_I_DC) { const char *reason = stonith__exit_reason(data); if (reason == NULL) { reason = pcmk_exec_status_str(stonith__execution_status(data)); } crm_notice("Result of fence operation %d: %d (%s) " QB_XS " key=%s", data->call_id, stonith__exit_status(data), reason, (const char *) data->userdata); return; } CRM_CHECK(decode_transition_key(data->userdata, &uuid, &transition_id, &stonith_id, NULL), goto bail); if (controld_globals.transition_graph->complete || (stonith_id < 0) || !pcmk__str_eq(uuid, controld_globals.te_uuid, pcmk__str_none) || (controld_globals.transition_graph->id != transition_id)) { crm_info("Ignoring fence operation %d result: " "Not from current transition " QB_XS " complete=%s action=%d uuid=%s (vs %s) transition=%d (vs %d)", data->call_id, pcmk__btoa(controld_globals.transition_graph->complete), stonith_id, uuid, controld_globals.te_uuid, transition_id, controld_globals.transition_graph->id); goto bail; } action = controld_get_action(stonith_id); if (action == NULL) { crm_err("Ignoring fence operation %d result: " "Action %d not found in transition graph (bug?) " QB_XS " uuid=%s transition=%d", data->call_id, stonith_id, uuid, transition_id); goto bail; } target = pcmk__xe_get(action->xml, PCMK__META_ON_NODE); if (target == NULL) { crm_err("Ignoring fence operation %d result: No target given (bug?)", data->call_id); goto bail; } stop_te_timer(action); if (stonith__exit_status(data) == CRM_EX_OK) { const char *uuid = pcmk__xe_get(action->xml, PCMK__META_ON_NODE_UUID); const char *op = crm_meta_value(action->params, PCMK__META_STONITH_ACTION); crm_info("Fence operation %d for %s succeeded", data->call_id, target); if (!(pcmk__is_set(action->flags, pcmk__graph_action_confirmed))) { te_action_confirmed(action, NULL); if (pcmk__str_eq(PCMK_ACTION_ON, op, pcmk__str_casei)) { const char *value = NULL; char *now = pcmk__ttoa(time(NULL)); gboolean is_remote_node = FALSE; /* This check is not 100% reliable, since this node is not * guaranteed to have the remote node cached. However, it * doesn't have to be reliable, since the attribute manager can * learn a node's "remoteness" by other means sooner or later. * This allows it to learn more quickly if this node does have * the information. */ if (g_hash_table_lookup(pcmk__remote_peer_cache, uuid) != NULL) { is_remote_node = TRUE; } update_attrd(target, CRM_ATTR_UNFENCED, now, NULL, is_remote_node); free(now); value = crm_meta_value(action->params, PCMK__META_DIGESTS_ALL); update_attrd(target, CRM_ATTR_DIGESTS_ALL, value, NULL, is_remote_node); value = crm_meta_value(action->params, PCMK__META_DIGESTS_SECURE); update_attrd(target, CRM_ATTR_DIGESTS_SECURE, value, NULL, is_remote_node); } else if (!pcmk__is_set(action->flags, pcmk__graph_action_sent_update)) { update_node_state_after_fencing(target, uuid); pcmk__set_graph_action_flags(action, pcmk__graph_action_sent_update); } } st_fail_count_reset(target); } else { enum pcmk__graph_next abort_action = pcmk__graph_restart; int status = stonith__execution_status(data); const char *reason = stonith__exit_reason(data); if (reason == NULL) { if (status == PCMK_EXEC_DONE) { reason = "Agent returned error"; } else { reason = pcmk_exec_status_str(status); } } pcmk__set_graph_action_flags(action, pcmk__graph_action_failed); /* If no fence devices were available, there's no use in immediately * checking again, so don't start a new transition in that case. */ if (status == PCMK_EXEC_NO_FENCE_DEVICE) { crm_warn("Fence operation %d for %s failed: %s " "(aborting transition and giving up for now)", data->call_id, target, reason); abort_action = pcmk__graph_wait; } else { crm_notice("Fence operation %d for %s failed: %s " "(aborting transition)", data->call_id, target, reason); } /* Increment the fail count now, so abort_for_stonith_failure() can * check it. Non-DC nodes will increment it in * handle_fence_notification(). */ st_fail_count_increment(target); abort_for_stonith_failure(abort_action, target, NULL); } pcmk__update_graph(controld_globals.transition_graph, action); trigger_graph(); bail: free(data->userdata); free(uuid); return; } static int fence_with_delay(const char *target, const char *type, int delay) { uint32_t options = st_opt_none; // Group of enum stonith_call_options int timeout_sec = pcmk__timeout_ms2s(controld_globals.transition_graph->stonith_timeout); if (crmd_join_phase_count(controld_join_confirmed) == 1) { stonith__set_call_options(options, target, st_opt_allow_self_fencing); } return stonith_api->cmds->fence_with_delay(stonith_api, options, target, type, timeout_sec, 0, delay); } /*! * \internal * \brief Execute a fencing action from a transition graph * * \param[in] graph Transition graph being executed (ignored) * \param[in] action Fencing action to execute * * \return Standard Pacemaker return code */ int controld_execute_fence_action(pcmk__graph_t *graph, pcmk__graph_action_t *action) { int rc = 0; const char *id = pcmk__xe_id(action->xml); const char *uuid = pcmk__xe_get(action->xml, PCMK__META_ON_NODE_UUID); const char *target = pcmk__xe_get(action->xml, PCMK__META_ON_NODE); const char *type = crm_meta_value(action->params, PCMK__META_STONITH_ACTION); char *transition_key = NULL; const char *priority_delay = NULL; int delay_i = 0; gboolean invalid_action = FALSE; int stonith_timeout = pcmk__timeout_ms2s(controld_globals.transition_graph->stonith_timeout); CRM_CHECK(id != NULL, invalid_action = TRUE); CRM_CHECK(uuid != NULL, invalid_action = TRUE); CRM_CHECK(type != NULL, invalid_action = TRUE); CRM_CHECK(target != NULL, invalid_action = TRUE); if (invalid_action) { crm_log_xml_warn(action->xml, "BadAction"); return EPROTO; } priority_delay = crm_meta_value(action->params, PCMK_OPT_PRIORITY_FENCING_DELAY); crm_notice("Requesting fencing (%s) targeting node %s " QB_XS " action=%s timeout=%i%s%s", type, target, id, stonith_timeout, priority_delay ? " priority_delay=" : "", priority_delay ? priority_delay : ""); /* Passing NULL means block until we can connect... */ controld_timer_fencer_connect(NULL); pcmk__scan_min_int(priority_delay, &delay_i, 0); rc = fence_with_delay(target, type, delay_i); transition_key = pcmk__transition_key(controld_globals.transition_graph->id, action->id, 0, controld_globals.te_uuid), stonith_api->cmds->register_callback(stonith_api, rc, (stonith_timeout + (delay_i > 0 ? delay_i : 0)), st_opt_timeout_updates, transition_key, "tengine_stonith_callback", tengine_stonith_callback); return pcmk_rc_ok; } bool controld_verify_stonith_watchdog_timeout(const char *value) { const char *our_nodename = controld_globals.cluster->priv->node_name; if ((stonith_api == NULL) || (stonith_api->state == stonith_disconnected) || !stonith__watchdog_fencing_enabled_for_node_api(stonith_api, our_nodename)) { // Anything is valid since it won't be used return true; } return pcmk__valid_stonith_watchdog_timeout(value); } /* end stonith API client functions */ /* * stonith history synchronization * * Each node's fencer keeps track of a cluster-wide fencing history. When a node * joins or leaves, we need to synchronize the history across all nodes. */ static crm_trigger_t *stonith_history_sync_trigger = NULL; static mainloop_timer_t *stonith_history_sync_timer_short = NULL; static mainloop_timer_t *stonith_history_sync_timer_long = NULL; void te_cleanup_stonith_history_sync(stonith_t *st, bool free_timers) { if (free_timers) { mainloop_timer_del(stonith_history_sync_timer_short); stonith_history_sync_timer_short = NULL; mainloop_timer_del(stonith_history_sync_timer_long); stonith_history_sync_timer_long = NULL; } else { mainloop_timer_stop(stonith_history_sync_timer_short); mainloop_timer_stop(stonith_history_sync_timer_long); } if (st) { st->cmds->remove_notification(st, PCMK__VALUE_ST_NOTIFY_HISTORY_SYNCED); } } static void tengine_stonith_history_synced(stonith_t *st, stonith_event_t *st_event) { te_cleanup_stonith_history_sync(st, FALSE); crm_debug("Fence-history synced - cancel all timers"); } static gboolean stonith_history_sync_set_trigger(gpointer user_data) { mainloop_set_trigger(stonith_history_sync_trigger); return FALSE; } void te_trigger_stonith_history_sync(bool long_timeout) { /* trigger a sync in 5s to give more nodes the * chance to show up so that we don't create * unnecessary stonith-history-sync traffic * * the long timeout of 30s is there as a fallback * so that after a successful connection to fenced * we will wait for 30s for the DC to trigger a * history-sync * if this doesn't happen we trigger a sync locally * (e.g. fenced segfaults and is restarted by pacemakerd) */ /* as we are finally checking the stonith-connection * in do_stonith_history_sync we should be fine * leaving stonith_history_sync_time & stonith_history_sync_trigger * around */ if (stonith_history_sync_trigger == NULL) { stonith_history_sync_trigger = mainloop_add_trigger(G_PRIORITY_LOW, do_stonith_history_sync, NULL); } if (long_timeout) { if(stonith_history_sync_timer_long == NULL) { stonith_history_sync_timer_long = mainloop_timer_add("history_sync_long", 30000, FALSE, stonith_history_sync_set_trigger, NULL); } crm_info("Fence history will be synchronized cluster-wide within 30 seconds"); mainloop_timer_start(stonith_history_sync_timer_long); } else { if(stonith_history_sync_timer_short == NULL) { stonith_history_sync_timer_short = mainloop_timer_add("history_sync_short", 5000, FALSE, stonith_history_sync_set_trigger, NULL); } crm_info("Fence history will be synchronized cluster-wide within 5 seconds"); mainloop_timer_start(stonith_history_sync_timer_short); } } /* end stonith history synchronization functions */ diff --git a/daemons/controld/controld_join_dc.c b/daemons/controld/controld_join_dc.c index 043e6873fd..f3412fab9b 100644 --- a/daemons/controld/controld_join_dc.c +++ b/daemons/controld/controld_join_dc.c @@ -1,1098 +1,1098 @@ /* * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include // PRIu32 #include // bool, true, false #include // NULL #include // free(), etc. #include // gboolean, etc. #include // xmlNode #include #include // PCMK_SCORE_INFINITY #include #include #include static char *max_generation_from = NULL; static xmlNodePtr max_generation_xml = NULL; /*! * \internal * \brief Nodes from which a CIB sync has failed since the peer joined * * This table is of the form (node_name -> join_id). \p node_name is * the name of a client node from which a CIB \p sync_from() call has failed in * \p do_dc_join_finalize() since the client joined the cluster as a peer. * \p join_id is the ID of the join round in which the \p sync_from() failed, * and is intended for use in nack log messages. */ static GHashTable *failed_sync_nodes = NULL; void finalize_join_for(gpointer key, gpointer value, gpointer user_data); void finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data); gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source); /* Numeric counter used to identify join rounds (an unsigned int would be * appropriate, except we get and set it in XML as int) */ static int current_join_id = 0; /*! * \internal * \brief Get log-friendly string equivalent of a controller group join phase * * \param[in] phase Join phase * * \return Log-friendly string equivalent of \p phase */ static const char * join_phase_text(enum controld_join_phase phase) { switch (phase) { case controld_join_nack: return "nack"; case controld_join_none: return "none"; case controld_join_welcomed: return "welcomed"; case controld_join_integrated: return "integrated"; case controld_join_finalized: return "finalized"; case controld_join_confirmed: return "confirmed"; default: return "invalid"; } } /*! * \internal * \brief Destroy the hash table containing failed sync nodes */ void controld_destroy_failed_sync_table(void) { if (failed_sync_nodes != NULL) { g_hash_table_destroy(failed_sync_nodes); failed_sync_nodes = NULL; } } /*! * \internal * \brief Remove a node from the failed sync nodes table if present * * \param[in] node_name Node name to remove */ void controld_remove_failed_sync_node(const char *node_name) { if (failed_sync_nodes != NULL) { g_hash_table_remove(failed_sync_nodes, (gchar *) node_name); } } /*! * \internal * \brief Add to a hash table a node whose CIB failed to sync * * \param[in] node_name Name of node whose CIB failed to sync * \param[in] join_id Join round when the failure occurred */ static void record_failed_sync_node(const char *node_name, gint join_id) { if (failed_sync_nodes == NULL) { failed_sync_nodes = pcmk__strikey_table(g_free, NULL); } /* If the node is already in the table then we failed to nack it during the * filter offer step */ CRM_LOG_ASSERT(g_hash_table_insert(failed_sync_nodes, g_strdup(node_name), GINT_TO_POINTER(join_id))); } /*! * \internal * \brief Look up a node name in the failed sync table * * \param[in] node_name Name of node to look up * \param[out] join_id Where to store the join ID of when the sync failed * * \return Standard Pacemaker return code. Specifically, \p pcmk_rc_ok if the * node name was found, or \p pcmk_rc_node_unknown otherwise. * \note \p *join_id is set to -1 if the node is not found. */ static int lookup_failed_sync_node(const char *node_name, gint *join_id) { *join_id = -1; if (failed_sync_nodes != NULL) { gpointer result = g_hash_table_lookup(failed_sync_nodes, (gchar *) node_name); if (result != NULL) { *join_id = GPOINTER_TO_INT(result); return pcmk_rc_ok; } } return pcmk_rc_node_unknown; } void crm_update_peer_join(const char *source, pcmk__node_status_t *node, enum controld_join_phase phase) { enum controld_join_phase last = controld_get_join_phase(node); CRM_CHECK(node != NULL, return); /* Remote nodes do not participate in joins */ if (pcmk__is_set(node->flags, pcmk__node_status_remote)) { return; } if (phase == last) { crm_trace("Node %s join-%d phase is still %s " QB_XS " nodeid=%" PRIu32 " source=%s", node->name, current_join_id, join_phase_text(last), node->cluster_layer_id, source); return; } if ((phase <= controld_join_none) || (phase == (last + 1))) { struct controld_node_status_data *data = NULL; if (node->user_data == NULL) { node->user_data = pcmk__assert_alloc(1, sizeof(struct controld_node_status_data)); } data = node->user_data; data->join_phase = phase; crm_trace("Node %s join-%d phase is now %s (was %s) " QB_XS " nodeid=%" PRIu32 " source=%s", node->name, current_join_id, join_phase_text(phase), join_phase_text(last), node->cluster_layer_id, source); return; } crm_warn("Rejecting join-%d phase update for node %s because can't go from " "%s to %s " QB_XS " nodeid=%" PRIu32 " source=%s", current_join_id, node->name, join_phase_text(last), join_phase_text(phase), node->cluster_layer_id, source); } static void start_join_round(void) { GHashTableIter iter; pcmk__node_status_t *peer = NULL; crm_debug("Starting new join round join-%d", current_join_id); g_hash_table_iter_init(&iter, pcmk__peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) { crm_update_peer_join(__func__, peer, controld_join_none); } if (max_generation_from != NULL) { free(max_generation_from); max_generation_from = NULL; } if (max_generation_xml != NULL) { pcmk__xml_free(max_generation_xml); max_generation_xml = NULL; } controld_clear_fsa_input_flags(R_HAVE_CIB); } /*! * \internal * \brief Create a join message from the DC * * \param[in] join_op Join operation name * \param[in] host_to Recipient of message */ static xmlNode * create_dc_message(const char *join_op, const char *host_to) { xmlNode *msg = pcmk__new_request(pcmk_ipc_controld, CRM_SYSTEM_DC, host_to, CRM_SYSTEM_CRMD, join_op, NULL); /* Identify which election this is a part of */ pcmk__xe_set_int(msg, PCMK__XA_JOIN_ID, current_join_id); /* Add a field specifying whether the DC is shutting down. This keeps the * joining node from fencing the old DC if it becomes the new DC. */ pcmk__xe_set_bool_attr(msg, PCMK__XA_DC_LEAVING, pcmk__is_set(controld_globals.fsa_input_register, R_SHUTDOWN)); return msg; } static void join_make_offer(gpointer key, gpointer value, gpointer user_data) { /* @TODO We don't use user_data except to distinguish one particular call * from others. Make this clearer. */ xmlNode *offer = NULL; pcmk__node_status_t *member = (pcmk__node_status_t *) value; pcmk__assert(member != NULL); if (!pcmk__cluster_is_node_active(member)) { crm_info("Not making join-%d offer to inactive node %s", current_join_id, pcmk__s(member->name, "with unknown name")); if ((member->expected == NULL) && pcmk__str_eq(member->state, PCMK__VALUE_LOST, pcmk__str_none)) { /* You would think this unsafe, but in fact this plus an * active resource is what causes it to be fenced. * * Yes, this does mean that any node that dies at the same * time as the old DC and is not running resource (still) * won't be fenced. * * I'm not happy about this either. */ pcmk__update_peer_expected(__func__, member, CRMD_JOINSTATE_DOWN); } return; } if (member->name == NULL) { crm_info("Not making join-%d offer to node uuid %s with unknown name", current_join_id, member->xml_id); return; } if (controld_globals.membership_id != controld_globals.peer_seq) { controld_globals.membership_id = controld_globals.peer_seq; crm_info("Making join-%d offers based on membership event %llu", current_join_id, controld_globals.peer_seq); } if (user_data != NULL) { enum controld_join_phase phase = controld_get_join_phase(member); if (phase > controld_join_none) { crm_info("Not making join-%d offer to already known node %s (%s)", current_join_id, member->name, join_phase_text(phase)); return; } } crm_update_peer_join(__func__, (pcmk__node_status_t*) member, controld_join_none); offer = create_dc_message(CRM_OP_JOIN_OFFER, member->name); // Advertise our feature set so the joining node can bail if not compatible pcmk__xe_set(offer, PCMK_XA_CRM_FEATURE_SET, CRM_FEATURE_SET); crm_info("Sending join-%d offer to %s", current_join_id, member->name); pcmk__cluster_send_message(member, pcmk_ipc_controld, offer); pcmk__xml_free(offer); crm_update_peer_join(__func__, member, controld_join_welcomed); } /* A_DC_JOIN_OFFER_ALL */ void do_dc_join_offer_all(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { int count; /* Reset everyone's status back to down or in_ccm in the CIB. * Any nodes that are active in the CIB but not in the cluster membership * will be seen as offline by the scheduler anyway. */ current_join_id++; start_join_round(); update_dc(NULL); if (cause == C_HA_MESSAGE && current_input == I_NODE_JOIN) { crm_info("A new node joined the cluster"); } g_hash_table_foreach(pcmk__peer_cache, join_make_offer, NULL); count = crmd_join_phase_count(controld_join_welcomed); crm_info("Waiting on join-%d requests from %d outstanding node%s", current_join_id, count, pcmk__plural_s(count)); // Don't waste time by invoking the scheduler yet } /* A_DC_JOIN_OFFER_ONE */ void do_dc_join_offer_one(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { pcmk__node_status_t *member = NULL; ha_msg_input_t *welcome = NULL; int count; const char *join_to = NULL; if (msg_data->data == NULL) { crm_info("Making join-%d offers to any unconfirmed nodes " "because an unknown node joined", current_join_id); g_hash_table_foreach(pcmk__peer_cache, join_make_offer, &member); check_join_state(cur_state, __func__); return; } welcome = fsa_typed_data(fsa_dt_ha_msg); if (welcome == NULL) { // fsa_typed_data() already logged an error return; } join_to = pcmk__xe_get(welcome->msg, PCMK__XA_SRC); if (join_to == NULL) { crm_err("Can't make join-%d offer to unknown node", current_join_id); return; } member = pcmk__get_node(0, join_to, NULL, pcmk__node_search_cluster_member); /* It is possible that a node will have been sick or starting up when the * original offer was made. However, it will either re-announce itself in * due course, or we can re-store the original offer on the client. */ crm_update_peer_join(__func__, member, controld_join_none); join_make_offer(NULL, member, NULL); /* If the offer isn't to the local node, make an offer to the local node as * well, to ensure the correct value for max_generation_from. */ if (!controld_is_local_node(join_to)) { member = controld_get_local_node_status(); join_make_offer(NULL, member, NULL); } /* This was a genuine join request; cancel any existing transition and * invoke the scheduler. */ abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart, "Node join", NULL); count = crmd_join_phase_count(controld_join_welcomed); crm_info("Waiting on join-%d requests from %d outstanding node%s", current_join_id, count, pcmk__plural_s(count)); // Don't waste time by invoking the scheduler yet } static int compare_int_fields(xmlNode * left, xmlNode * right, const char *field) { const char *elem_l = pcmk__xe_get(left, field); const char *elem_r = pcmk__xe_get(right, field); long long int_elem_l; long long int_elem_r; int rc = pcmk_rc_ok; rc = pcmk__scan_ll(elem_l, &int_elem_l, -1LL); if (rc != pcmk_rc_ok) { // Shouldn't be possible crm_warn("Comparing current CIB %s as -1 " "because '%s' is not an integer", field, elem_l); } rc = pcmk__scan_ll(elem_r, &int_elem_r, -1LL); if (rc != pcmk_rc_ok) { // Shouldn't be possible crm_warn("Comparing joining node's CIB %s as -1 " "because '%s' is not an integer", field, elem_r); } if (int_elem_l < int_elem_r) { return -1; } else if (int_elem_l > int_elem_r) { return 1; } return 0; } /* A_DC_JOIN_PROCESS_REQ */ void do_dc_join_filter_offer(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { xmlNode *generation = NULL; int cmp = 0; int join_id = -1; int count = 0; gint value = 0; gboolean ack_nack_bool = TRUE; ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg); const char *join_from = pcmk__xe_get(join_ack->msg, PCMK__XA_SRC); const char *ref = pcmk__xe_get(join_ack->msg, PCMK_XA_REFERENCE); const char *join_version = pcmk__xe_get(join_ack->msg, PCMK_XA_CRM_FEATURE_SET); pcmk__node_status_t *join_node = NULL; if (join_from == NULL) { crm_err("Ignoring invalid join request without node name"); return; } join_node = pcmk__get_node(0, join_from, NULL, pcmk__node_search_cluster_member); pcmk__xe_get_int(join_ack->msg, PCMK__XA_JOIN_ID, &join_id); if (join_id != current_join_id) { crm_debug("Ignoring join-%d request from %s because we are on join-%d", join_id, join_from, current_join_id); check_join_state(cur_state, __func__); return; } generation = join_ack->xml; if (max_generation_xml != NULL && generation != NULL) { int lpc = 0; const char *attributes[] = { PCMK_XA_ADMIN_EPOCH, PCMK_XA_EPOCH, PCMK_XA_NUM_UPDATES, }; /* It's not obvious that join_ack->xml is the PCMK__XE_GENERATION_TUPLE * element from the join client. The "if" guard is for clarity. */ if (pcmk__xe_is(generation, PCMK__XE_GENERATION_TUPLE)) { for (lpc = 0; cmp == 0 && lpc < PCMK__NELEM(attributes); lpc++) { cmp = compare_int_fields(max_generation_xml, generation, attributes[lpc]); } } else { // Should always be PCMK__XE_GENERATION_TUPLE CRM_LOG_ASSERT(false); } } if (ref == NULL) { ref = "none"; // for logging only } if (lookup_failed_sync_node(join_from, &value) == pcmk_rc_ok) { crm_err("Rejecting join-%d request from node %s because we failed to " "sync its CIB in join-%d " QB_XS " ref=%s", join_id, join_from, value, ref); ack_nack_bool = FALSE; } else if (!pcmk__cluster_is_node_active(join_node)) { if (match_down_event(join_from) != NULL) { /* The join request was received after the node was fenced or * otherwise shutdown in a way that we're aware of. No need to log * an error in this rare occurrence; we know the client was recently * shut down, and receiving a lingering in-flight request is not * cause for alarm. */ crm_debug("Rejecting join-%d request from inactive node %s " QB_XS " ref=%s", join_id, join_from, ref); } else { crm_err("Rejecting join-%d request from inactive node %s " QB_XS " ref=%s", join_id, join_from, ref); } ack_nack_bool = FALSE; } else if (generation == NULL) { crm_err("Rejecting invalid join-%d request from node %s " "missing CIB generation " QB_XS " ref=%s", join_id, join_from, ref); ack_nack_bool = FALSE; } else if ((join_version == NULL) || !feature_set_compatible(CRM_FEATURE_SET, join_version)) { crm_err("Rejecting join-%d request from node %s because feature set %s" " is incompatible with ours (%s) " QB_XS " ref=%s", join_id, join_from, (join_version? join_version : "pre-3.1.0"), CRM_FEATURE_SET, ref); ack_nack_bool = FALSE; } else if (max_generation_xml == NULL) { const char *validation = pcmk__xe_get(generation, PCMK_XA_VALIDATE_WITH); if (pcmk__get_schema(validation) == NULL) { crm_err("Rejecting join-%d request from %s (with first CIB " "generation) due to %s schema version %s " QB_XS " ref=%s", join_id, join_from, ((validation == NULL)? "missing" : "unknown"), pcmk__s(validation, ""), ref); ack_nack_bool = FALSE; } else { crm_debug("Accepting join-%d request from %s (with first CIB " "generation) " QB_XS " ref=%s", join_id, join_from, ref); max_generation_xml = pcmk__xml_copy(NULL, generation); pcmk__str_update(&max_generation_from, join_from); } } else if ((cmp < 0) || ((cmp == 0) && controld_is_local_node(join_from))) { const char *validation = pcmk__xe_get(generation, PCMK_XA_VALIDATE_WITH); if (pcmk__get_schema(validation) == NULL) { crm_err("Rejecting join-%d request from %s (with better CIB " "generation than current best from %s) due to %s " "schema version %s " QB_XS " ref=%s", join_id, join_from, max_generation_from, ((validation == NULL)? "missing" : "unknown"), pcmk__s(validation, ""), ref); ack_nack_bool = FALSE; } else { crm_debug("Accepting join-%d request from %s (with better CIB " "generation than current best from %s) " QB_XS " ref=%s", join_id, join_from, max_generation_from, ref); crm_log_xml_debug(max_generation_xml, "Old max generation"); crm_log_xml_debug(generation, "New max generation"); pcmk__xml_free(max_generation_xml); max_generation_xml = pcmk__xml_copy(NULL, join_ack->xml); pcmk__str_update(&max_generation_from, join_from); } } else { crm_debug("Accepting join-%d request from %s " QB_XS " ref=%s", join_id, join_from, ref); } if (!ack_nack_bool) { crm_update_peer_join(__func__, join_node, controld_join_nack); pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_NACK); } else { crm_update_peer_join(__func__, join_node, controld_join_integrated); pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_MEMBER); } count = crmd_join_phase_count(controld_join_integrated); crm_debug("%d node%s currently integrated in join-%d", count, pcmk__plural_s(count), join_id); if (check_join_state(cur_state, __func__) == FALSE) { // Don't waste time by invoking the scheduler yet count = crmd_join_phase_count(controld_join_welcomed); crm_debug("Waiting on join-%d requests from %d outstanding node%s", join_id, count, pcmk__plural_s(count)); } } /* A_DC_JOIN_FINALIZE */ void do_dc_join_finalize(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { char *sync_from = NULL; int rc = pcmk_ok; int count_welcomed = crmd_join_phase_count(controld_join_welcomed); int count_finalizable = crmd_join_phase_count(controld_join_integrated) + crmd_join_phase_count(controld_join_nack); /* This we can do straight away and avoid clients timing us out * while we compute the latest CIB */ if (count_welcomed != 0) { crm_debug("Waiting on join-%d requests from %d outstanding node%s " "before finalizing join", current_join_id, count_welcomed, pcmk__plural_s(count_welcomed)); crmd_join_phase_log(LOG_DEBUG); /* crmd_fsa_stall(FALSE); Needed? */ return; } else if (count_finalizable == 0) { crm_debug("Finalization not needed for join-%d at the current time", current_join_id); crmd_join_phase_log(LOG_DEBUG); check_join_state(controld_globals.fsa_state, __func__); return; } controld_clear_fsa_input_flags(R_HAVE_CIB); if ((max_generation_from == NULL) || controld_is_local_node(max_generation_from)) { controld_set_fsa_input_flags(R_HAVE_CIB); } if (!controld_globals.transition_graph->complete) { crm_warn("Delaying join-%d finalization while transition in progress", current_join_id); crmd_join_phase_log(LOG_DEBUG); crmd_fsa_stall(FALSE); return; } if (pcmk__is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) { // Send our CIB out to everyone sync_from = pcmk__str_copy(controld_globals.cluster->priv->node_name); } else { // Ask for the agreed best CIB sync_from = pcmk__str_copy(max_generation_from); } crm_notice("Finalizing join-%d for %d node%s (sync'ing CIB %s.%s.%s " "with schema %s and feature set %s from %s)", current_join_id, count_finalizable, pcmk__plural_s(count_finalizable), pcmk__xe_get(max_generation_xml, PCMK_XA_ADMIN_EPOCH), pcmk__xe_get(max_generation_xml, PCMK_XA_EPOCH), pcmk__xe_get(max_generation_xml, PCMK_XA_NUM_UPDATES), pcmk__xe_get(max_generation_xml, PCMK_XA_VALIDATE_WITH), pcmk__xe_get(max_generation_xml, PCMK_XA_CRM_FEATURE_SET), sync_from); crmd_join_phase_log(LOG_DEBUG); rc = controld_globals.cib_conn->cmds->sync_from(controld_globals.cib_conn, sync_from, NULL, cib_none); fsa_register_cib_callback(rc, sync_from, finalize_sync_callback); } void free_max_generation(void) { free(max_generation_from); max_generation_from = NULL; pcmk__xml_free(max_generation_xml); max_generation_xml = NULL; } void finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { CRM_LOG_ASSERT(-EPERM != rc); if (rc != pcmk_ok) { const char *sync_from = (const char *) user_data; do_crm_log(((rc == -pcmk_err_old_data)? LOG_WARNING : LOG_ERR), "Could not sync CIB from %s in join-%d: %s", sync_from, current_join_id, pcmk_strerror(rc)); if (rc != -pcmk_err_old_data) { record_failed_sync_node(sync_from, current_join_id); } /* restart the whole join process */ register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION_DC, NULL, NULL, __func__); } else if (!AM_I_DC) { crm_debug("Sync'ed CIB for join-%d but no longer DC", current_join_id); } else if (controld_globals.fsa_state != S_FINALIZE_JOIN) { crm_debug("Sync'ed CIB for join-%d but no longer in S_FINALIZE_JOIN " "(%s)", current_join_id, fsa_state2string(controld_globals.fsa_state)); } else { controld_set_fsa_input_flags(R_HAVE_CIB); /* make sure dc_uuid is re-set to us */ if (!check_join_state(controld_globals.fsa_state, __func__)) { int count_finalizable = 0; count_finalizable = crmd_join_phase_count(controld_join_integrated) + crmd_join_phase_count(controld_join_nack); crm_debug("Notifying %d node%s of join-%d results", count_finalizable, pcmk__plural_s(count_finalizable), current_join_id); g_hash_table_foreach(pcmk__peer_cache, finalize_join_for, NULL); } } } static void join_node_state_commit_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { const char *node = user_data; if (rc != pcmk_ok) { fsa_data_t *msg_data = NULL; // for register_fsa_error() macro - crm_crit("join-%d node history update (via CIB call %d) for node %s " - "failed: %s", - current_join_id, call_id, node, pcmk_strerror(rc)); + pcmk__crit("join-%d node history update (via CIB call %d) for node %s " + "failed: %s", + current_join_id, call_id, node, pcmk_strerror(rc)); crm_log_xml_debug(msg, "failed"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } crm_debug("join-%d node history update (via CIB call %d) for node %s " "complete", current_join_id, call_id, node); check_join_state(controld_globals.fsa_state, __func__); } /* A_DC_JOIN_PROCESS_ACK */ void do_dc_join_ack(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { int join_id = -1; ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg); const char *op = pcmk__xe_get(join_ack->msg, PCMK__XA_CRM_TASK); char *join_from = pcmk__xe_get_copy(join_ack->msg, PCMK__XA_SRC); pcmk__node_status_t *peer = NULL; enum controld_join_phase phase = controld_join_none; enum controld_section_e section = controld_section_lrm; char *xpath = NULL; xmlNode *state = join_ack->xml; xmlNode *execd_state = NULL; cib_t *cib = controld_globals.cib_conn; int rc = pcmk_ok; // Sanity checks if (join_from == NULL) { crm_warn("Ignoring message received without node identification"); goto done; } if (op == NULL) { crm_warn("Ignoring message received from %s without task", join_from); goto done; } if (strcmp(op, CRM_OP_JOIN_CONFIRM)) { crm_debug("Ignoring '%s' message from %s while waiting for '%s'", op, join_from, CRM_OP_JOIN_CONFIRM); goto done; } if (pcmk__xe_get_int(join_ack->msg, PCMK__XA_JOIN_ID, &join_id) != pcmk_rc_ok) { crm_warn("Ignoring join confirmation from %s without valid join ID", join_from); goto done; } peer = pcmk__get_node(0, join_from, NULL, pcmk__node_search_cluster_member); phase = controld_get_join_phase(peer); if (phase != controld_join_finalized) { crm_info("Ignoring out-of-sequence join-%d confirmation from %s " "(currently %s not %s)", join_id, join_from, join_phase_text(phase), join_phase_text(controld_join_finalized)); goto done; } if (join_id != current_join_id) { crm_err("Rejecting join-%d confirmation from %s " "because currently on join-%d", join_id, join_from, current_join_id); crm_update_peer_join(__func__, peer, controld_join_nack); goto done; } crm_update_peer_join(__func__, peer, controld_join_confirmed); /* Update CIB with node's current executor state. A new transition will be * triggered later, when the CIB manager notifies us of the change. * * The delete and modify requests are part of an atomic transaction. */ rc = cib->cmds->init_transaction(cib); if (rc != pcmk_ok) { goto done; } // Delete relevant parts of node's current executor state from CIB if (pcmk__is_set(controld_globals.flags, controld_shutdown_lock_enabled)) { section = controld_section_lrm_unlocked; } controld_node_state_deletion_strings(join_from, section, &xpath, NULL); rc = cib->cmds->remove(cib, xpath, NULL, cib_xpath|cib_multiple|cib_transaction); if (rc != pcmk_ok) { goto done; } // Update CIB with node's latest known executor state if (controld_is_local_node(join_from)) { // Use the latest possible state if processing our own join ack execd_state = controld_query_executor_state(); if (execd_state != NULL) { crm_debug("Updating local node history for join-%d from query " "result", current_join_id); state = execd_state; } else { crm_warn("Updating local node history from join-%d confirmation " "because query failed", current_join_id); } } else { crm_debug("Updating node history for %s from join-%d confirmation", join_from, current_join_id); } rc = cib->cmds->modify(cib, PCMK_XE_STATUS, state, cib_can_create|cib_transaction); pcmk__xml_free(execd_state); if (rc != pcmk_ok) { goto done; } // Commit the transaction rc = cib->cmds->end_transaction(cib, true, cib_none); fsa_register_cib_callback(rc, join_from, join_node_state_commit_callback); if (rc > 0) { // join_from will be freed after callback join_from = NULL; rc = pcmk_ok; } done: if (rc != pcmk_ok) { - crm_crit("join-%d node history update for node %s failed: %s", - current_join_id, join_from, pcmk_strerror(rc)); + pcmk__crit("join-%d node history update for node %s failed: %s", + current_join_id, join_from, pcmk_strerror(rc)); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } free(join_from); free(xpath); } void finalize_join_for(gpointer key, gpointer value, gpointer user_data) { xmlNode *acknak = NULL; xmlNode *tmp1 = NULL; pcmk__node_status_t *join_node = value; const char *join_to = join_node->name; enum controld_join_phase phase = controld_get_join_phase(join_node); bool integrated = false; switch (phase) { case controld_join_integrated: integrated = true; break; case controld_join_nack: break; default: crm_trace("Not updating non-integrated and non-nacked node %s (%s) " "for join-%d", join_to, join_phase_text(phase), current_join_id); return; } /* Update the element with the node's name and UUID, in case they * weren't known before */ crm_trace("Updating node name and UUID in CIB for %s", join_to); tmp1 = pcmk__xe_create(NULL, PCMK_XE_NODE); pcmk__xe_set(tmp1, PCMK_XA_ID, pcmk__cluster_get_xml_id(join_node)); pcmk__xe_set(tmp1, PCMK_XA_UNAME, join_to); fsa_cib_anon_update(PCMK_XE_NODES, tmp1); pcmk__xml_free(tmp1); join_node = pcmk__get_node(0, join_to, NULL, pcmk__node_search_cluster_member); if (!pcmk__cluster_is_node_active(join_node)) { /* * NACK'ing nodes that the membership layer doesn't know about yet * simply creates more churn * * Better to leave them waiting and let the join restart when * the new membership event comes in * * All other NACKs (due to versions etc) should still be processed */ pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_PENDING); return; } // Acknowledge or nack node's join request crm_debug("%sing join-%d request from %s", integrated? "Acknowledg" : "Nack", current_join_id, join_to); acknak = create_dc_message(CRM_OP_JOIN_ACKNAK, join_to); pcmk__xe_set_bool_attr(acknak, CRM_OP_JOIN_ACKNAK, integrated); if (integrated) { // No change needed for a nacked node crm_update_peer_join(__func__, join_node, controld_join_finalized); pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_MEMBER); /* Iterate through the remote peer cache and add information on which * node hosts each to the ACK message. This keeps new controllers in * sync with what has already happened. */ if (pcmk__cluster_num_remote_nodes() > 0) { GHashTableIter iter; pcmk__node_status_t *node = NULL; xmlNode *remotes = pcmk__xe_create(acknak, PCMK_XE_NODES); g_hash_table_iter_init(&iter, pcmk__remote_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { xmlNode *remote = NULL; if (!node->conn_host) { continue; } remote = pcmk__xe_create(remotes, PCMK_XE_NODE); pcmk__xe_set_props(remote, PCMK_XA_ID, node->name, PCMK__XA_NODE_STATE, node->state, PCMK__XA_CONNECTION_HOST, node->conn_host, NULL); } } } pcmk__cluster_send_message(join_node, pcmk_ipc_controld, acknak); pcmk__xml_free(acknak); return; } gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source) { static unsigned long long highest_seq = 0; if (controld_globals.membership_id != controld_globals.peer_seq) { crm_debug("join-%d: Membership changed from %llu to %llu " QB_XS " highest=%llu state=%s for=%s", current_join_id, controld_globals.membership_id, controld_globals.peer_seq, highest_seq, fsa_state2string(cur_state), source); if (highest_seq < controld_globals.peer_seq) { /* Don't spam the FSA with duplicates */ highest_seq = controld_globals.peer_seq; register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL); } } else if (cur_state == S_INTEGRATION) { if (crmd_join_phase_count(controld_join_welcomed) == 0) { int count = crmd_join_phase_count(controld_join_integrated); crm_debug("join-%d: Integration of %d peer%s complete " QB_XS " state=%s for=%s", current_join_id, count, pcmk__plural_s(count), fsa_state2string(cur_state), source); register_fsa_input_before(C_FSA_INTERNAL, I_INTEGRATED, NULL); return TRUE; } } else if (cur_state == S_FINALIZE_JOIN) { if (!pcmk__is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) { crm_debug("join-%d: Delaying finalization until we have CIB " QB_XS " state=%s for=%s", current_join_id, fsa_state2string(cur_state), source); return TRUE; } else if (crmd_join_phase_count(controld_join_welcomed) != 0) { int count = crmd_join_phase_count(controld_join_welcomed); crm_debug("join-%d: Still waiting on %d welcomed node%s " QB_XS " state=%s for=%s", current_join_id, count, pcmk__plural_s(count), fsa_state2string(cur_state), source); crmd_join_phase_log(LOG_DEBUG); } else if (crmd_join_phase_count(controld_join_integrated) != 0) { int count = crmd_join_phase_count(controld_join_integrated); crm_debug("join-%d: Still waiting on %d integrated node%s " QB_XS " state=%s for=%s", current_join_id, count, pcmk__plural_s(count), fsa_state2string(cur_state), source); crmd_join_phase_log(LOG_DEBUG); } else if (crmd_join_phase_count(controld_join_finalized) != 0) { int count = crmd_join_phase_count(controld_join_finalized); crm_debug("join-%d: Still waiting on %d finalized node%s " QB_XS " state=%s for=%s", current_join_id, count, pcmk__plural_s(count), fsa_state2string(cur_state), source); crmd_join_phase_log(LOG_DEBUG); } else { crm_debug("join-%d: Complete " QB_XS " state=%s for=%s", current_join_id, fsa_state2string(cur_state), source); register_fsa_input_later(C_FSA_INTERNAL, I_FINALIZED, NULL); return TRUE; } } return FALSE; } void do_dc_join_final(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { crm_debug("Ensuring DC, quorum and node attributes are up-to-date"); crm_update_quorum(pcmk__cluster_has_quorum(), TRUE); } int crmd_join_phase_count(enum controld_join_phase phase) { int count = 0; pcmk__node_status_t *peer; GHashTableIter iter; g_hash_table_iter_init(&iter, pcmk__peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) { if (controld_get_join_phase(peer) == phase) { count++; } } return count; } void crmd_join_phase_log(int level) { pcmk__node_status_t *peer; GHashTableIter iter; g_hash_table_iter_init(&iter, pcmk__peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) { do_crm_log(level, "join-%d: %s=%s", current_join_id, peer->name, join_phase_text(controld_get_join_phase(peer))); } } diff --git a/daemons/controld/controld_messages.c b/daemons/controld/controld_messages.c index 32a43d0188..bb0d7d0a9b 100644 --- a/daemons/controld/controld_messages.c +++ b/daemons/controld/controld_messages.c @@ -1,1384 +1,1385 @@ /* * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include // PRIx64 #include // uint64_t #include #include #include #include #include #include #include #include #include static enum crmd_fsa_input handle_message(xmlNode *msg, enum crmd_fsa_cause cause); static xmlNode* create_ping_reply(const xmlNode *msg); static void handle_response(xmlNode *stored_msg); static enum crmd_fsa_input handle_request(xmlNode *stored_msg, enum crmd_fsa_cause cause); static enum crmd_fsa_input handle_shutdown_request(xmlNode *stored_msg); static void send_msg_via_ipc(xmlNode * msg, const char *sys, const char *src); /* debug only, can wrap all it likes */ static int last_data_id = 0; void register_fsa_error_adv(enum crmd_fsa_cause cause, enum crmd_fsa_input input, fsa_data_t * cur_data, void *new_data, const char *raised_from) { /* save the current actions if any */ if (controld_globals.fsa_actions != A_NOTHING) { register_fsa_input_adv(cur_data ? cur_data->fsa_cause : C_FSA_INTERNAL, I_NULL, cur_data ? cur_data->data : NULL, controld_globals.fsa_actions, TRUE, __func__); } /* reset the action list */ crm_info("Resetting the current action list"); fsa_dump_actions(controld_globals.fsa_actions, "Drop"); controld_globals.fsa_actions = A_NOTHING; /* register the error */ register_fsa_input_adv(cause, input, new_data, A_NOTHING, TRUE, raised_from); } void register_fsa_input_adv(enum crmd_fsa_cause cause, enum crmd_fsa_input input, void *data, uint64_t with_actions, gboolean prepend, const char *raised_from) { unsigned old_len = g_list_length(controld_globals.fsa_message_queue); fsa_data_t *fsa_data = NULL; if (raised_from == NULL) { raised_from = ""; } if (input == I_NULL && with_actions == A_NOTHING /* && data == NULL */ ) { /* no point doing anything */ crm_err("Cannot add entry to queue: no input and no action"); return; } if (input == I_WAIT_FOR_EVENT) { controld_set_global_flags(controld_fsa_is_stalled); crm_debug("Stalling the FSA pending further input: source=%s cause=%s data=%p queue=%d", raised_from, fsa_cause2string(cause), data, old_len); if (old_len > 0) { fsa_dump_queue(LOG_TRACE); prepend = FALSE; } if (data == NULL) { controld_set_fsa_action_flags(with_actions); fsa_dump_actions(with_actions, "Restored"); return; } /* Store everything in the new event and reset * controld_globals.fsa_actions */ with_actions |= controld_globals.fsa_actions; controld_globals.fsa_actions = A_NOTHING; } last_data_id++; crm_trace("%s %s FSA input %d (%s) due to %s, %s data", raised_from, (prepend? "prepended" : "appended"), last_data_id, fsa_input2string(input), fsa_cause2string(cause), (data? "with" : "without")); fsa_data = pcmk__assert_alloc(1, sizeof(fsa_data_t)); fsa_data->id = last_data_id; fsa_data->fsa_input = input; fsa_data->fsa_cause = cause; fsa_data->origin = raised_from; fsa_data->data = NULL; fsa_data->data_type = fsa_dt_none; fsa_data->actions = with_actions; if (with_actions != A_NOTHING) { crm_trace("Adding actions %.16" PRIx64 " to input", with_actions); } if (data != NULL) { switch (cause) { case C_FSA_INTERNAL: case C_CRMD_STATUS_CALLBACK: case C_IPC_MESSAGE: case C_HA_MESSAGE: CRM_CHECK(((ha_msg_input_t *) data)->msg != NULL, crm_err("Bogus data from %s", raised_from)); crm_trace("Copying %s data from %s as cluster message data", fsa_cause2string(cause), raised_from); fsa_data->data = copy_ha_msg_input(data); fsa_data->data_type = fsa_dt_ha_msg; break; case C_LRM_OP_CALLBACK: crm_trace("Copying %s data from %s as lrmd_event_data_t", fsa_cause2string(cause), raised_from); fsa_data->data = lrmd_copy_event((lrmd_event_data_t *) data); fsa_data->data_type = fsa_dt_lrm; break; case C_TIMER_POPPED: case C_SHUTDOWN: case C_UNKNOWN: case C_STARTUP: - crm_crit("Copying %s data (from %s) is not yet implemented", - fsa_cause2string(cause), raised_from); + pcmk__crit("Copying %s data (from %s) is not yet implemented", + fsa_cause2string(cause), raised_from); crmd_exit(CRM_EX_SOFTWARE); break; } } /* make sure to free it properly later */ if (prepend) { controld_globals.fsa_message_queue = g_list_prepend(controld_globals.fsa_message_queue, fsa_data); } else { controld_globals.fsa_message_queue = g_list_append(controld_globals.fsa_message_queue, fsa_data); } crm_trace("FSA message queue length is %d", g_list_length(controld_globals.fsa_message_queue)); /* fsa_dump_queue(LOG_TRACE); */ if (old_len == g_list_length(controld_globals.fsa_message_queue)) { crm_err("Couldn't add message to the queue"); } if (input != I_WAIT_FOR_EVENT) { controld_trigger_fsa(); } } void fsa_dump_queue(int log_level) { int offset = 0; for (GList *iter = controld_globals.fsa_message_queue; iter != NULL; iter = iter->next) { fsa_data_t *data = (fsa_data_t *) iter->data; do_crm_log_unlikely(log_level, "queue[%d.%d]: input %s raised by %s(%p.%d)\t(cause=%s)", offset++, data->id, fsa_input2string(data->fsa_input), data->origin, data->data, data->data_type, fsa_cause2string(data->fsa_cause)); } } ha_msg_input_t * copy_ha_msg_input(ha_msg_input_t * orig) { xmlNode *wrapper = NULL; ha_msg_input_t *copy = pcmk__assert_alloc(1, sizeof(ha_msg_input_t)); copy->msg = (orig != NULL)? pcmk__xml_copy(NULL, orig->msg) : NULL; wrapper = pcmk__xe_first_child(copy->msg, PCMK__XE_CRM_XML, NULL, NULL); copy->xml = pcmk__xe_first_child(wrapper, NULL, NULL, NULL); return copy; } void delete_fsa_input(fsa_data_t * fsa_data) { lrmd_event_data_t *op = NULL; xmlNode *foo = NULL; if (fsa_data == NULL) { return; } crm_trace("About to free %s data", fsa_cause2string(fsa_data->fsa_cause)); if (fsa_data->data != NULL) { switch (fsa_data->data_type) { case fsa_dt_ha_msg: delete_ha_msg_input(fsa_data->data); break; case fsa_dt_xml: foo = fsa_data->data; pcmk__xml_free(foo); break; case fsa_dt_lrm: op = (lrmd_event_data_t *) fsa_data->data; lrmd_free_event(op); break; case fsa_dt_none: if (fsa_data->data != NULL) { crm_err("Don't know how to free %s data from %s", fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin); crmd_exit(CRM_EX_SOFTWARE); } break; } crm_trace("%s data freed", fsa_cause2string(fsa_data->fsa_cause)); } free(fsa_data); } /* returns the next message */ fsa_data_t * get_message(void) { fsa_data_t *message = (fsa_data_t *) controld_globals.fsa_message_queue->data; controld_globals.fsa_message_queue = g_list_remove(controld_globals.fsa_message_queue, message); crm_trace("Processing input %d", message->id); return message; } void * fsa_typed_data_adv(fsa_data_t * fsa_data, enum fsa_data_type a_type, const char *caller) { void *ret_val = NULL; if (fsa_data == NULL) { crm_err("%s: No FSA data available", caller); } else if (fsa_data->data == NULL) { crm_err("%s: No message data available. Origin: %s", caller, fsa_data->origin); } else if (fsa_data->data_type != a_type) { - crm_crit("%s: Message data was the wrong type! %d vs. requested=%d. Origin: %s", - caller, fsa_data->data_type, a_type, fsa_data->origin); + pcmk__crit("%s: Message data was the wrong type! %d vs. requested=%d. " + "Origin: %s", + caller, fsa_data->data_type, a_type, fsa_data->origin); pcmk__assert(fsa_data->data_type == a_type); } else { ret_val = fsa_data->data; } return ret_val; } /* A_MSG_ROUTE */ void do_msg_route(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); route_message(msg_data->fsa_cause, input->msg); } void route_message(enum crmd_fsa_cause cause, xmlNode * input) { ha_msg_input_t fsa_input; enum crmd_fsa_input result = I_NULL; fsa_input.msg = input; CRM_CHECK(cause == C_IPC_MESSAGE || cause == C_HA_MESSAGE, return); /* try passing the buck first */ if (relay_message(input, cause == C_IPC_MESSAGE)) { return; } /* handle locally */ result = handle_message(input, cause); /* done or process later? */ switch (result) { case I_NULL: case I_ROUTER: case I_NODE_JOIN: case I_JOIN_REQUEST: case I_JOIN_RESULT: break; default: /* Defering local processing of message */ register_fsa_input_later(cause, result, &fsa_input); return; } if (result != I_NULL) { /* add to the front of the queue */ register_fsa_input(cause, result, &fsa_input); } } gboolean relay_message(xmlNode * msg, gboolean originated_locally) { enum pcmk_ipc_server dest = pcmk_ipc_unknown; bool is_for_dc = false; bool is_for_dcib = false; bool is_for_te = false; bool is_for_crm = false; bool is_for_cib = false; bool is_local = false; bool broadcast = false; const char *host_to = NULL; const char *sys_to = NULL; const char *sys_from = NULL; const char *type = NULL; const char *task = NULL; const char *ref = NULL; pcmk__node_status_t *node_to = NULL; CRM_CHECK(msg != NULL, return TRUE); host_to = pcmk__xe_get(msg, PCMK__XA_CRM_HOST_TO); sys_to = pcmk__xe_get(msg, PCMK__XA_CRM_SYS_TO); sys_from = pcmk__xe_get(msg, PCMK__XA_CRM_SYS_FROM); type = pcmk__xe_get(msg, PCMK__XA_T); task = pcmk__xe_get(msg, PCMK__XA_CRM_TASK); ref = pcmk__xe_get(msg, PCMK_XA_REFERENCE); broadcast = pcmk__str_empty(host_to); if (ref == NULL) { ref = "without reference ID"; } if (pcmk__str_eq(task, CRM_OP_HELLO, pcmk__str_casei)) { crm_trace("Received hello %s from %s (no processing needed)", ref, pcmk__s(sys_from, "unidentified source")); crm_log_xml_trace(msg, "hello"); return TRUE; } // Require message type (set by pcmk__new_request()) if (!pcmk__str_eq(type, PCMK__VALUE_CRMD, pcmk__str_none)) { crm_warn("Ignoring invalid message %s with type '%s' " "(not '" PCMK__VALUE_CRMD "')", ref, pcmk__s(type, "")); crm_log_xml_trace(msg, "ignored"); return TRUE; } // Require a destination subsystem (also set by pcmk__new_request()) if (sys_to == NULL) { crm_warn("Ignoring invalid message %s with no " PCMK__XA_CRM_SYS_TO, ref); crm_log_xml_trace(msg, "ignored"); return TRUE; } // Get the message type appropriate to the destination subsystem if (pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync) { dest = pcmk__parse_server(sys_to); if (dest == pcmk_ipc_unknown) { /* Unrecognized value, use a sane default * * @TODO Maybe we should bail instead */ dest = pcmk_ipc_controld; } } is_for_dc = (strcasecmp(CRM_SYSTEM_DC, sys_to) == 0); is_for_dcib = (strcasecmp(CRM_SYSTEM_DCIB, sys_to) == 0); is_for_te = (strcasecmp(CRM_SYSTEM_TENGINE, sys_to) == 0); is_for_cib = (strcasecmp(CRM_SYSTEM_CIB, sys_to) == 0); is_for_crm = (strcasecmp(CRM_SYSTEM_CRMD, sys_to) == 0); // Check whether message should be processed locally is_local = false; if (broadcast) { if (is_for_dc || is_for_te) { is_local = false; } else if (is_for_crm) { if (pcmk__strcase_any_of(task, CRM_OP_NODE_INFO, PCMK__CONTROLD_CMD_NODES, NULL)) { /* Node info requests do not specify a host, which is normally * treated as "all hosts", because the whole point is that the * client may not know the local node name. Always handle these * requests locally. */ is_local = true; } else { is_local = !originated_locally; } } else { is_local = true; } } else if (controld_is_local_node(host_to)) { is_local = true; } else if (is_for_crm && pcmk__str_eq(task, CRM_OP_LRM_DELETE, pcmk__str_casei)) { xmlNode *wrapper = pcmk__xe_first_child(msg, PCMK__XE_CRM_XML, NULL, NULL); xmlNode *msg_data = pcmk__xe_first_child(wrapper, NULL, NULL, NULL); const char *mode = pcmk__xe_get(msg_data, PCMK__XA_MODE); if (pcmk__str_eq(mode, PCMK__VALUE_CIB, pcmk__str_none)) { // Local delete of an offline node's resource history is_local = true; } } // If is for DC and DC is not yet selected if (is_for_dc && pcmk__str_eq(task, CRM_OP_PING, pcmk__str_casei) && (controld_globals.dc_name == NULL)) { xmlNode *reply = create_ping_reply(msg); sys_to = pcmk__xe_get(reply, PCMK__XA_CRM_SYS_TO); // Explicitly leave src empty. It indicates that dc is "not yet selected" send_msg_via_ipc(reply, sys_to, NULL); pcmk__xml_free(reply); return TRUE; } // Check whether message should be relayed if (is_for_dc || is_for_dcib || is_for_te) { if (AM_I_DC) { if (is_for_te) { crm_trace("Route message %s locally as transition request", ref); crm_log_xml_trace(msg, sys_to); send_msg_via_ipc(msg, sys_to, controld_globals.cluster->priv->node_name); return TRUE; // No further processing of message is needed } crm_trace("Route message %s locally as DC request", ref); return FALSE; // More to be done by caller } if (originated_locally && !pcmk__strcase_any_of(sys_from, CRM_SYSTEM_PENGINE, CRM_SYSTEM_TENGINE, NULL)) { crm_trace("Relay message %s to DC (via %s)", ref, pcmk__s(host_to, "broadcast")); crm_log_xml_trace(msg, "relayed"); if (!broadcast) { node_to = pcmk__get_node(0, host_to, NULL, pcmk__node_search_cluster_member); } pcmk__cluster_send_message(node_to, dest, msg); return TRUE; } /* Transition engine and scheduler messages are sent only to the DC on * the same node. If we are no longer the DC, discard this message. */ crm_trace("Ignoring message %s because we are no longer DC", ref); crm_log_xml_trace(msg, "ignored"); return TRUE; // No further processing of message is needed } if (is_local) { if (is_for_crm || is_for_cib) { crm_trace("Route message %s locally as controller request", ref); return FALSE; // More to be done by caller } crm_trace("Relay message %s locally to %s", ref, sys_to); crm_log_xml_trace(msg, "IPC-relay"); send_msg_via_ipc(msg, sys_to, controld_globals.cluster->priv->node_name); return TRUE; } if (!broadcast) { node_to = pcmk__search_node_caches(0, host_to, NULL, pcmk__node_search_cluster_member); if (node_to == NULL) { crm_warn("Ignoring message %s because node %s is unknown", ref, host_to); crm_log_xml_trace(msg, "ignored"); return TRUE; } } crm_trace("Relay message %s to %s", ref, pcmk__s(host_to, "all peers")); crm_log_xml_trace(msg, "relayed"); pcmk__cluster_send_message(node_to, dest, msg); return TRUE; } // Return true if field contains a positive integer static bool authorize_version(xmlNode *message_data, const char *field, const char *client_name, const char *ref, const char *uuid) { const char *version = pcmk__xe_get(message_data, field); long long version_num; if ((pcmk__scan_ll(version, &version_num, -1LL) != pcmk_rc_ok) || (version_num < 0LL)) { crm_warn("Rejected IPC hello from %s: '%s' is not a valid protocol %s " QB_XS " ref=%s uuid=%s", client_name, ((version == NULL)? "" : version), field, (ref? ref : "none"), uuid); return false; } return true; } /*! * \internal * \brief Check whether a client IPC message is acceptable * * If a given client IPC message is a hello, "authorize" it by ensuring it has * valid information such as a protocol version, and return false indicating * that nothing further needs to be done with the message. If the message is not * a hello, just return true to indicate it needs further processing. * * \param[in] client_msg XML of IPC message * \param[in,out] curr_client If IPC is not proxied, client that sent message * \param[in] proxy_session If IPC is proxied, the session ID * * \return true if message needs further processing, false if it doesn't */ bool controld_authorize_ipc_message(const xmlNode *client_msg, pcmk__client_t *curr_client, const char *proxy_session) { xmlNode *wrapper = NULL; xmlNode *message_data = NULL; const char *client_name = NULL; const char *op = pcmk__xe_get(client_msg, PCMK__XA_CRM_TASK); const char *ref = pcmk__xe_get(client_msg, PCMK_XA_REFERENCE); const char *uuid = (curr_client? curr_client->id : proxy_session); if (uuid == NULL) { crm_warn("IPC message from client rejected: No client identifier " QB_XS " ref=%s", (ref? ref : "none")); goto rejected; } if (!pcmk__str_eq(CRM_OP_HELLO, op, pcmk__str_casei)) { // Only hello messages need to be authorized return true; } wrapper = pcmk__xe_first_child(client_msg, PCMK__XE_CRM_XML, NULL, NULL); message_data = pcmk__xe_first_child(wrapper, NULL, NULL, NULL); client_name = pcmk__xe_get(message_data, PCMK__XA_CLIENT_NAME); if (pcmk__str_empty(client_name)) { crm_warn("IPC hello from client rejected: No client name", QB_XS " ref=%s uuid=%s", (ref? ref : "none"), uuid); goto rejected; } if (!authorize_version(message_data, PCMK__XA_MAJOR_VERSION, client_name, ref, uuid)) { goto rejected; } if (!authorize_version(message_data, PCMK__XA_MINOR_VERSION, client_name, ref, uuid)) { goto rejected; } crm_trace("Validated IPC hello from client %s", client_name); crm_log_xml_trace(client_msg, "hello"); if (curr_client) { curr_client->userdata = pcmk__str_copy(client_name); } controld_trigger_fsa(); return false; rejected: crm_log_xml_trace(client_msg, "rejected"); if (curr_client) { qb_ipcs_disconnect(curr_client->ipcs); } return false; } static enum crmd_fsa_input handle_message(xmlNode *msg, enum crmd_fsa_cause cause) { const char *type = NULL; CRM_CHECK(msg != NULL, return I_NULL); type = pcmk__xe_get(msg, PCMK__XA_SUBT); if (pcmk__str_eq(type, PCMK__VALUE_REQUEST, pcmk__str_none)) { return handle_request(msg, cause); } if (pcmk__str_eq(type, PCMK__VALUE_RESPONSE, pcmk__str_none)) { handle_response(msg); return I_NULL; } crm_warn("Ignoring message with unknown " PCMK__XA_SUBT" '%s'", pcmk__s(type, "")); crm_log_xml_trace(msg, "bad"); return I_NULL; } static enum crmd_fsa_input handle_failcount_op(xmlNode * stored_msg) { const char *rsc = NULL; const char *uname = NULL; const char *op = NULL; char *interval_spec = NULL; guint interval_ms = 0; gboolean is_remote_node = FALSE; xmlNode *wrapper = pcmk__xe_first_child(stored_msg, PCMK__XE_CRM_XML, NULL, NULL); xmlNode *xml_op = pcmk__xe_first_child(wrapper, NULL, NULL, NULL); if (xml_op) { xmlNode *xml_rsc = pcmk__xe_first_child(xml_op, PCMK_XE_PRIMITIVE, NULL, NULL); xmlNode *xml_attrs = pcmk__xe_first_child(xml_op, PCMK__XE_ATTRIBUTES, NULL, NULL); if (xml_rsc) { rsc = pcmk__xe_id(xml_rsc); } if (xml_attrs) { op = pcmk__xe_get(xml_attrs, CRM_META "_" PCMK__META_CLEAR_FAILURE_OP); pcmk__xe_get_guint(xml_attrs, CRM_META "_" PCMK__META_CLEAR_FAILURE_INTERVAL, &interval_ms); } } uname = pcmk__xe_get(xml_op, PCMK__META_ON_NODE); if ((rsc == NULL) || (uname == NULL)) { crm_log_xml_warn(stored_msg, "invalid failcount op"); return I_NULL; } if (pcmk__xe_get(xml_op, PCMK__XA_ROUTER_NODE)) { is_remote_node = TRUE; } crm_debug("Clearing failures for %s-interval %s on %s " "from attribute manager, CIB, and executor state", pcmk__readable_interval(interval_ms), rsc, uname); if (interval_ms) { interval_spec = pcmk__assert_asprintf("%ums", interval_ms); } update_attrd_clear_failures(uname, rsc, op, interval_spec, is_remote_node); free(interval_spec); controld_cib_delete_last_failure(rsc, uname, op, interval_ms); lrm_clear_last_failure(rsc, uname, op, interval_ms); return I_NULL; } static enum crmd_fsa_input handle_lrm_delete(xmlNode *stored_msg) { const char *mode = NULL; xmlNode *wrapper = pcmk__xe_first_child(stored_msg, PCMK__XE_CRM_XML, NULL, NULL); xmlNode *msg_data = pcmk__xe_first_child(wrapper, NULL, NULL, NULL); CRM_CHECK(msg_data != NULL, return I_NULL); /* CRM_OP_LRM_DELETE has two distinct modes. The default behavior is to * relay the operation to the affected node, which will unregister the * resource from the local executor, clear the resource's history from the * CIB, and do some bookkeeping in the controller. * * However, if the affected node is offline, the client will specify * mode=PCMK__VALUE_CIB which means the controller receiving the operation * should clear the resource's history from the CIB and nothing else. This * is used to clear shutdown locks. */ mode = pcmk__xe_get(msg_data, PCMK__XA_MODE); if (!pcmk__str_eq(mode, PCMK__VALUE_CIB, pcmk__str_none)) { // Relay to affected node pcmk__xe_set(stored_msg, PCMK__XA_CRM_SYS_TO, CRM_SYSTEM_LRMD); return I_ROUTER; } else { // Delete CIB history locally (compare with do_lrm_delete()) const char *from_sys = NULL; const char *user_name = NULL; const char *rsc_id = NULL; const char *node = NULL; xmlNode *rsc_xml = NULL; int rc = pcmk_rc_ok; rsc_xml = pcmk__xe_first_child(msg_data, PCMK_XE_PRIMITIVE, NULL, NULL); CRM_CHECK(rsc_xml != NULL, return I_NULL); rsc_id = pcmk__xe_id(rsc_xml); from_sys = pcmk__xe_get(stored_msg, PCMK__XA_CRM_SYS_FROM); node = pcmk__xe_get(msg_data, PCMK__META_ON_NODE); user_name = pcmk__update_acl_user(stored_msg, PCMK__XA_CRM_USER, NULL); crm_debug("Handling " CRM_OP_LRM_DELETE " for %s on %s locally%s%s " "(clearing CIB resource history only)", rsc_id, node, (user_name? " for user " : ""), (user_name? user_name : "")); rc = controld_delete_resource_history(rsc_id, node, user_name, cib_dryrun|cib_sync_call); if (rc == pcmk_rc_ok) { rc = controld_delete_resource_history(rsc_id, node, user_name, crmd_cib_smart_opt()); } /* Notify client. Also notify tengine if mode=PCMK__VALUE_CIB and * op=CRM_OP_LRM_DELETE. */ if (from_sys) { lrmd_event_data_t *op = NULL; const char *from_host = pcmk__xe_get(stored_msg, PCMK__XA_SRC); const char *transition; if (strcmp(from_sys, CRM_SYSTEM_TENGINE)) { transition = pcmk__xe_get(msg_data, PCMK__XA_TRANSITION_KEY); } else { transition = pcmk__xe_get(stored_msg, PCMK__XA_TRANSITION_KEY); } crm_info("Notifying %s on %s that %s was%s deleted", from_sys, (from_host? from_host : "local node"), rsc_id, ((rc == pcmk_rc_ok)? "" : " not")); op = lrmd_new_event(rsc_id, PCMK_ACTION_DELETE, 0); op->type = lrmd_event_exec_complete; op->user_data = pcmk__str_copy(pcmk__s(transition, FAKE_TE_ID)); op->params = pcmk__strkey_table(free, free); pcmk__insert_dup(op->params, PCMK_XA_CRM_FEATURE_SET, CRM_FEATURE_SET); controld_rc2event(op, rc); controld_ack_event_directly(from_host, from_sys, NULL, op, rsc_id); lrmd_free_event(op); controld_trigger_delete_refresh(from_sys, rsc_id); } return I_NULL; } } /*! * \brief Handle a CRM_OP_REMOTE_STATE message by updating remote peer cache * * \param[in] msg Message XML * * \return Next FSA input */ static enum crmd_fsa_input handle_remote_state(const xmlNode *msg) { const char *conn_host = NULL; const char *remote_uname = pcmk__xe_id(msg); pcmk__node_status_t *remote_peer; bool remote_is_up = false; int rc = pcmk_rc_ok; rc = pcmk__xe_get_bool_attr(msg, PCMK__XA_IN_CCM, &remote_is_up); CRM_CHECK(remote_uname && rc == pcmk_rc_ok, return I_NULL); remote_peer = pcmk__cluster_lookup_remote_node(remote_uname); CRM_CHECK(remote_peer, return I_NULL); pcmk__update_peer_state(__func__, remote_peer, remote_is_up ? PCMK_VALUE_MEMBER : PCMK__VALUE_LOST, 0); conn_host = pcmk__xe_get(msg, PCMK__XA_CONNECTION_HOST); if (conn_host) { pcmk__str_update(&remote_peer->conn_host, conn_host); } else if (remote_peer->conn_host) { free(remote_peer->conn_host); remote_peer->conn_host = NULL; } return I_NULL; } /*! * \brief Handle a CRM_OP_PING message * * \param[in] msg Message XML * * \return Next FSA input */ static xmlNode* create_ping_reply(const xmlNode *msg) { const char *value = NULL; xmlNode *ping = NULL; xmlNode *reply = NULL; // Build reply ping = pcmk__xe_create(NULL, PCMK__XE_PING_RESPONSE); value = pcmk__xe_get(msg, PCMK__XA_CRM_SYS_TO); pcmk__xe_set(ping, PCMK__XA_CRM_SUBSYSTEM, value); // Add controller state value = fsa_state2string(controld_globals.fsa_state); pcmk__xe_set(ping, PCMK__XA_CRMD_STATE, value); crm_notice("Current ping state: %s", value); // CTS needs this // Add controller health // @TODO maybe do some checks to determine meaningful status pcmk__xe_set(ping, PCMK_XA_RESULT, "ok"); reply = pcmk__new_reply(msg, ping); pcmk__xml_free(ping); return reply; } static enum crmd_fsa_input handle_ping(const xmlNode *msg) { xmlNode *reply = create_ping_reply(msg); if (reply != NULL) { (void) relay_message(reply, TRUE); pcmk__xml_free(reply); } // Nothing further to do return I_NULL; } /*! * \brief Handle a PCMK__CONTROLD_CMD_NODES message * * \param[in] request Message XML * * \return Next FSA input */ static enum crmd_fsa_input handle_node_list(const xmlNode *request) { GHashTableIter iter; pcmk__node_status_t *node = NULL; xmlNode *reply = NULL; xmlNode *reply_data = NULL; // Create message data for reply reply_data = pcmk__xe_create(NULL, PCMK_XE_NODES); g_hash_table_iter_init(&iter, pcmk__peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & node)) { xmlNode *xml = pcmk__xe_create(reply_data, PCMK_XE_NODE); pcmk__xe_set_ll(xml, PCMK_XA_ID, (long long) node->cluster_layer_id); // uint32_t pcmk__xe_set(xml, PCMK_XA_UNAME, node->name); pcmk__xe_set(xml, PCMK__XA_IN_CCM, node->state); } // Create and send reply reply = pcmk__new_reply(request, reply_data); pcmk__xml_free(reply_data); if (reply) { (void) relay_message(reply, TRUE); pcmk__xml_free(reply); } // Nothing further to do return I_NULL; } /*! * \brief Handle a CRM_OP_NODE_INFO request * * \param[in] msg Message XML * * \return Next FSA input */ static enum crmd_fsa_input handle_node_info_request(const xmlNode *msg) { const char *value = NULL; pcmk__node_status_t *node = NULL; int node_id = 0; xmlNode *reply = NULL; xmlNode *reply_data = NULL; // Build reply reply_data = pcmk__xe_create(NULL, PCMK_XE_NODE); pcmk__xe_set(reply_data, PCMK__XA_CRM_SUBSYSTEM, CRM_SYSTEM_CRMD); // Add whether current partition has quorum pcmk__xe_set_bool_attr(reply_data, PCMK_XA_HAVE_QUORUM, pcmk__is_set(controld_globals.flags, controld_has_quorum)); /* Check whether client requested node info by ID and/or name * * @TODO A Corosync-layer node ID is of type uint32_t. We should be able to * handle legitimate node IDs greater than INT_MAX, but currently we do not. */ pcmk__xe_get_int(msg, PCMK_XA_ID, &node_id); if (node_id < 0) { node_id = 0; } value = pcmk__xe_get(msg, PCMK_XA_UNAME); // Default to local node if none given if ((node_id == 0) && (value == NULL)) { value = controld_globals.cluster->priv->node_name; } node = pcmk__search_node_caches(node_id, value, NULL, pcmk__node_search_any); if (node) { pcmk__xe_set(reply_data, PCMK_XA_ID, node->xml_id); pcmk__xe_set(reply_data, PCMK_XA_UNAME, node->name); pcmk__xe_set(reply_data, PCMK_XA_CRMD, node->state); pcmk__xe_set_bool_attr(reply_data, PCMK_XA_REMOTE_NODE, pcmk__is_set(node->flags, pcmk__node_status_remote)); } // Send reply reply = pcmk__new_reply(msg, reply_data); pcmk__xml_free(reply_data); if (reply != NULL) { (void) relay_message(reply, TRUE); pcmk__xml_free(reply); } // Nothing further to do return I_NULL; } static void verify_feature_set(xmlNode *msg) { const char *dc_version = pcmk__xe_get(msg, PCMK_XA_CRM_FEATURE_SET); if (dc_version == NULL) { /* All we really know is that the DC feature set is older than 3.1.0, * but that's also all that really matters. */ dc_version = "3.0.14"; } if (feature_set_compatible(dc_version, CRM_FEATURE_SET)) { crm_trace("Local feature set (%s) is compatible with DC's (%s)", CRM_FEATURE_SET, dc_version); } else { crm_err("Local feature set (%s) is incompatible with DC's (%s)", CRM_FEATURE_SET, dc_version); // Nothing is likely to improve without administrator involvement controld_set_fsa_input_flags(R_STAYDOWN); crmd_exit(CRM_EX_FATAL); } } // DC gets own shutdown all-clear static enum crmd_fsa_input handle_shutdown_self_ack(xmlNode *stored_msg) { const char *host_from = pcmk__xe_get(stored_msg, PCMK__XA_SRC); if (pcmk__is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { // The expected case -- we initiated own shutdown sequence crm_info("Shutting down controller"); return I_STOP; } if (pcmk__str_eq(host_from, controld_globals.dc_name, pcmk__str_casei)) { // Must be logic error -- DC confirming its own unrequested shutdown crm_err("Shutting down controller immediately due to " "unexpected shutdown confirmation"); return I_TERMINATE; } if (controld_globals.fsa_state != S_STOPPING) { // Shouldn't happen -- non-DC confirming unrequested shutdown crm_err("Starting new DC election because %s is " "confirming shutdown we did not request", (host_from? host_from : "another node")); return I_ELECTION; } // Shouldn't happen, but we are already stopping anyway crm_debug("Ignoring unexpected shutdown confirmation from %s", (host_from? host_from : "another node")); return I_NULL; } // Non-DC gets shutdown all-clear from DC static enum crmd_fsa_input handle_shutdown_ack(xmlNode *stored_msg) { const char *host_from = pcmk__xe_get(stored_msg, PCMK__XA_SRC); if (host_from == NULL) { crm_warn("Ignoring shutdown request without origin specified"); return I_NULL; } if (pcmk__str_eq(host_from, controld_globals.dc_name, pcmk__str_null_matches|pcmk__str_casei)) { if (pcmk__is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { crm_info("Shutting down controller after confirmation from %s", host_from); } else { crm_err("Shutting down controller after unexpected " "shutdown request from %s", host_from); controld_set_fsa_input_flags(R_STAYDOWN); } return I_STOP; } crm_warn("Ignoring shutdown request from %s because DC is %s", host_from, controld_globals.dc_name); return I_NULL; } static enum crmd_fsa_input handle_request(xmlNode *stored_msg, enum crmd_fsa_cause cause) { xmlNode *msg = NULL; const char *op = pcmk__xe_get(stored_msg, PCMK__XA_CRM_TASK); /* Optimize this for the DC - it has the most to do */ crm_log_xml_trace(stored_msg, "request"); if (op == NULL) { crm_warn("Ignoring request without " PCMK__XA_CRM_TASK); return I_NULL; } if (strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0) { const char *from = pcmk__xe_get(stored_msg, PCMK__XA_SRC); pcmk__node_status_t *node = pcmk__search_node_caches(0, from, NULL, pcmk__node_search_cluster_member); pcmk__update_peer_expected(__func__, node, CRMD_JOINSTATE_DOWN); if(AM_I_DC == FALSE) { return I_NULL; /* Done */ } } /*========== DC-Only Actions ==========*/ if (AM_I_DC) { if (strcmp(op, CRM_OP_JOIN_ANNOUNCE) == 0) { return I_NODE_JOIN; } else if (strcmp(op, CRM_OP_JOIN_REQUEST) == 0) { return I_JOIN_REQUEST; } else if (strcmp(op, CRM_OP_JOIN_CONFIRM) == 0) { return I_JOIN_RESULT; } else if (strcmp(op, CRM_OP_SHUTDOWN) == 0) { return handle_shutdown_self_ack(stored_msg); } else if (strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0) { // Another controller wants to shut down its node return handle_shutdown_request(stored_msg); } } /*========== common actions ==========*/ if (strcmp(op, CRM_OP_NOVOTE) == 0) { ha_msg_input_t fsa_input; fsa_input.msg = stored_msg; register_fsa_input_adv(C_HA_MESSAGE, I_NULL, &fsa_input, A_ELECTION_COUNT | A_ELECTION_CHECK, FALSE, __func__); } else if (strcmp(op, CRM_OP_REMOTE_STATE) == 0) { /* a remote connection host is letting us know the node state */ return handle_remote_state(stored_msg); } else if (strcmp(op, CRM_OP_THROTTLE) == 0) { throttle_update(stored_msg); if (AM_I_DC && (controld_globals.transition_graph != NULL) && !controld_globals.transition_graph->complete) { crm_debug("The throttle changed. Trigger a graph."); trigger_graph(); } return I_NULL; } else if (strcmp(op, CRM_OP_CLEAR_FAILCOUNT) == 0) { return handle_failcount_op(stored_msg); } else if (strcmp(op, CRM_OP_VOTE) == 0) { /* count the vote and decide what to do after that */ ha_msg_input_t fsa_input; fsa_input.msg = stored_msg; register_fsa_input_adv(C_HA_MESSAGE, I_NULL, &fsa_input, A_ELECTION_COUNT | A_ELECTION_CHECK, FALSE, __func__); /* Sometimes we _must_ go into S_ELECTION */ if (controld_globals.fsa_state == S_HALT) { crm_debug("Forcing an election from S_HALT"); return I_ELECTION; } } else if (strcmp(op, CRM_OP_JOIN_OFFER) == 0) { verify_feature_set(stored_msg); crm_debug("Raising I_JOIN_OFFER: join-%s", pcmk__xe_get(stored_msg, PCMK__XA_JOIN_ID)); return I_JOIN_OFFER; } else if (strcmp(op, CRM_OP_JOIN_ACKNAK) == 0) { crm_debug("Raising I_JOIN_RESULT: join-%s", pcmk__xe_get(stored_msg, PCMK__XA_JOIN_ID)); return I_JOIN_RESULT; } else if (strcmp(op, CRM_OP_LRM_DELETE) == 0) { return handle_lrm_delete(stored_msg); } else if ((strcmp(op, CRM_OP_LRM_FAIL) == 0) || (strcmp(op, CRM_OP_REPROBE) == 0)) { pcmk__xe_set(stored_msg, PCMK__XA_CRM_SYS_TO, CRM_SYSTEM_LRMD); return I_ROUTER; } else if (strcmp(op, CRM_OP_NOOP) == 0) { return I_NULL; } else if (strcmp(op, CRM_OP_PING) == 0) { return handle_ping(stored_msg); } else if (strcmp(op, CRM_OP_NODE_INFO) == 0) { return handle_node_info_request(stored_msg); } else if (strcmp(op, CRM_OP_RM_NODE_CACHE) == 0) { int id = 0; const char *name = NULL; pcmk__xe_get_int(stored_msg, PCMK_XA_ID, &id); name = pcmk__xe_get(stored_msg, PCMK_XA_UNAME); if(cause == C_IPC_MESSAGE) { msg = pcmk__new_request(pcmk_ipc_controld, CRM_SYSTEM_CRMD, NULL, CRM_SYSTEM_CRMD, CRM_OP_RM_NODE_CACHE, NULL); if (!pcmk__cluster_send_message(NULL, pcmk_ipc_controld, msg)) { crm_err("Could not instruct peers to remove references to node %s/%u", name, id); } else { crm_notice("Instructing peers to remove references to node %s/%u", name, id); } pcmk__xml_free(msg); } else { pcmk__cluster_forget_cluster_node(id, name); /* If we're forgetting this node, also forget any failures to fence * it, so we don't carry that over to any node added later with the * same name. */ st_fail_count_reset(name); } } else if (strcmp(op, CRM_OP_MAINTENANCE_NODES) == 0) { xmlNode *wrapper = pcmk__xe_first_child(stored_msg, PCMK__XE_CRM_XML, NULL, NULL); xmlNode *xml = pcmk__xe_first_child(wrapper, NULL, NULL, NULL); remote_ra_process_maintenance_nodes(xml); } else if (strcmp(op, PCMK__CONTROLD_CMD_NODES) == 0) { return handle_node_list(stored_msg); /*========== (NOT_DC)-Only Actions ==========*/ } else if (!AM_I_DC) { if (strcmp(op, CRM_OP_SHUTDOWN) == 0) { return handle_shutdown_ack(stored_msg); } } else { crm_err("Unexpected request (%s) sent to %s", op, AM_I_DC ? "the DC" : "non-DC node"); crm_log_xml_err(stored_msg, "Unexpected"); } return I_NULL; } static void handle_response(xmlNode *stored_msg) { const char *op = pcmk__xe_get(stored_msg, PCMK__XA_CRM_TASK); crm_log_xml_trace(stored_msg, "reply"); if (op == NULL) { crm_warn("Ignoring reply without " PCMK__XA_CRM_TASK); } else if (AM_I_DC && strcmp(op, CRM_OP_PECALC) == 0) { // Check whether scheduler answer been superseded by subsequent request const char *msg_ref = pcmk__xe_get(stored_msg, PCMK_XA_REFERENCE); if (msg_ref == NULL) { crm_err("%s - Ignoring calculation with no reference", op); } else if (pcmk__str_eq(msg_ref, controld_globals.fsa_pe_ref, pcmk__str_none)) { ha_msg_input_t fsa_input; controld_stop_sched_timer(); fsa_input.msg = stored_msg; register_fsa_input_later(C_IPC_MESSAGE, I_PE_SUCCESS, &fsa_input); } else { crm_info("%s calculation %s is obsolete", op, msg_ref); } } else if (strcmp(op, CRM_OP_VOTE) == 0 || strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0 || strcmp(op, CRM_OP_SHUTDOWN) == 0) { } else { const char *host_from = pcmk__xe_get(stored_msg, PCMK__XA_SRC); crm_err("Unexpected response (op=%s, src=%s) sent to the %s", op, host_from, AM_I_DC ? "DC" : "controller"); } } static enum crmd_fsa_input handle_shutdown_request(xmlNode * stored_msg) { /* handle here to avoid potential version issues * where the shutdown message/procedure may have * been changed in later versions. * * This way the DC is always in control of the shutdown */ char *now_s = NULL; const char *host_from = pcmk__xe_get(stored_msg, PCMK__XA_SRC); if (host_from == NULL) { /* we're shutting down and the DC */ host_from = controld_globals.cluster->priv->node_name; } crm_info("Creating shutdown request for %s (state=%s)", host_from, fsa_state2string(controld_globals.fsa_state)); crm_log_xml_trace(stored_msg, "message"); now_s = pcmk__ttoa(time(NULL)); update_attrd(host_from, PCMK__NODE_ATTR_SHUTDOWN, now_s, NULL, FALSE); free(now_s); /* will be picked up by the TE as long as its running */ return I_NULL; } static void send_msg_via_ipc(xmlNode * msg, const char *sys, const char *src) { pcmk__client_t *client_channel = NULL; CRM_CHECK(sys != NULL, return); client_channel = pcmk__find_client_by_id(sys); if (pcmk__xe_get(msg, PCMK__XA_SRC) == NULL) { pcmk__xe_set(msg, PCMK__XA_SRC, src); } if (client_channel != NULL) { /* Transient clients such as crmadmin */ pcmk__ipc_send_xml(client_channel, 0, msg, crm_ipc_server_event); } else if (pcmk__str_eq(sys, CRM_SYSTEM_TENGINE, pcmk__str_none)) { xmlNode *wrapper = pcmk__xe_first_child(msg, PCMK__XE_CRM_XML, NULL, NULL); xmlNode *data = pcmk__xe_first_child(wrapper, NULL, NULL, NULL); process_te_message(msg, data); } else if (pcmk__str_eq(sys, CRM_SYSTEM_LRMD, pcmk__str_none)) { fsa_data_t fsa_data; ha_msg_input_t fsa_input; xmlNode *wrapper = NULL; fsa_input.msg = msg; wrapper = pcmk__xe_first_child(msg, PCMK__XE_CRM_XML, NULL, NULL); fsa_input.xml = pcmk__xe_first_child(wrapper, NULL, NULL, NULL); fsa_data.id = 0; fsa_data.actions = 0; fsa_data.data = &fsa_input; fsa_data.fsa_input = I_MESSAGE; fsa_data.fsa_cause = C_IPC_MESSAGE; fsa_data.origin = __func__; fsa_data.data_type = fsa_dt_ha_msg; do_lrm_invoke(A_LRM_INVOKE, C_IPC_MESSAGE, controld_globals.fsa_state, I_MESSAGE, &fsa_data); } else if (crmd_is_proxy_session(sys)) { crmd_proxy_send(sys, msg); } else { crm_info("Received invalid request: unknown subsystem '%s'", sys); } } void delete_ha_msg_input(ha_msg_input_t * orig) { if (orig == NULL) { return; } pcmk__xml_free(orig->msg); free(orig); } /*! * \internal * \brief Notify the cluster of a remote node state change * * \param[in] node_name Node's name * \param[in] node_up true if node is up, false if down */ void broadcast_remote_state_message(const char *node_name, bool node_up) { xmlNode *msg = pcmk__new_request(pcmk_ipc_controld, CRM_SYSTEM_CRMD, NULL, CRM_SYSTEM_CRMD, CRM_OP_REMOTE_STATE, NULL); crm_info("Notifying cluster of Pacemaker Remote node %s %s", node_name, node_up? "coming up" : "going down"); pcmk__xe_set(msg, PCMK_XA_ID, node_name); pcmk__xe_set_bool_attr(msg, PCMK__XA_IN_CCM, node_up); if (node_up) { pcmk__xe_set(msg, PCMK__XA_CONNECTION_HOST, controld_globals.cluster->priv->node_name); } pcmk__cluster_send_message(NULL, pcmk_ipc_controld, msg); pcmk__xml_free(msg); } diff --git a/daemons/controld/controld_schedulerd.c b/daemons/controld/controld_schedulerd.c index 76279ab2ca..1d6c9667a6 100644 --- a/daemons/controld/controld_schedulerd.c +++ b/daemons/controld/controld_schedulerd.c @@ -1,521 +1,522 @@ /* * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include /* pid_t, sleep, ssize_t */ #include #include #include #include #include #include #include #include // xmlXPathObject, etc. #include static void handle_disconnect(void); static pcmk_ipc_api_t *schedulerd_api = NULL; /*! * \internal * \brief Close any scheduler connection and free associated memory */ void controld_shutdown_schedulerd_ipc(void) { controld_clear_fsa_input_flags(R_PE_REQUIRED); pcmk_disconnect_ipc(schedulerd_api); handle_disconnect(); pcmk_free_ipc_api(schedulerd_api); schedulerd_api = NULL; } /*! * \internal * \brief Save CIB query result to file, raising FSA error * * \param[in] msg Ignored * \param[in] call_id Call ID of CIB query * \param[in] rc Return code of CIB query * \param[in] output Result of CIB query * \param[in] user_data Unique identifier for filename * * \note This is intended to be called after a scheduler connection fails. */ static void save_cib_contents(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { const char *id = user_data; register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__); CRM_CHECK(id != NULL, return); if (rc == pcmk_ok) { char *filename = pcmk__assert_asprintf(PCMK_SCHEDULER_INPUT_DIR "/pe-core-%s.bz2", id); if (pcmk__xml_write_file(output, filename, true) != pcmk_rc_ok) { crm_err("Could not save Cluster Information Base to %s after scheduler crash", filename); } else { crm_notice("Saved Cluster Information Base to %s after scheduler crash", filename); } free(filename); } } /*! * \internal * \brief Respond to scheduler connection failure */ static void handle_disconnect(void) { // If we aren't connected to the scheduler, we can't expect a reply controld_expect_sched_reply(NULL); if (pcmk__is_set(controld_globals.fsa_input_register, R_PE_REQUIRED)) { int rc = pcmk_ok; char *uuid_str = pcmk__generate_uuid(); - crm_crit("Lost connection to the scheduler " - QB_XS " CIB will be saved to " PCMK_SCHEDULER_INPUT_DIR "/pe-core-%s.bz2", - uuid_str); + pcmk__crit("Lost connection to the scheduler " + QB_XS " CIB will be saved to " + PCMK_SCHEDULER_INPUT_DIR "/pe-core-%s.bz2", + uuid_str); /* * The scheduler died... * * Save the current CIB so that we have a chance of * figuring out what killed it. * * Delay raising the I_ERROR until the query below completes or * 5s is up, whichever comes first. * */ rc = controld_globals.cib_conn->cmds->query(controld_globals.cib_conn, NULL, NULL, cib_none); fsa_register_cib_callback(rc, uuid_str, save_cib_contents); } controld_clear_fsa_input_flags(R_PE_CONNECTED); controld_trigger_fsa(); return; } static void handle_reply(pcmk_schedulerd_api_reply_t *reply) { const char *msg_ref = NULL; if (!AM_I_DC) { return; } msg_ref = reply->data.graph.reference; if (msg_ref == NULL) { crm_err("%s - Ignoring calculation with no reference", CRM_OP_PECALC); } else if (pcmk__str_eq(msg_ref, controld_globals.fsa_pe_ref, pcmk__str_none)) { ha_msg_input_t fsa_input; xmlNode *crm_data_node; controld_stop_sched_timer(); /* do_te_invoke (which will eventually process the fsa_input we are constructing * here) requires that fsa_input.xml be non-NULL. That will only happen if * copy_ha_msg_input (which is called by register_fsa_input_adv) sees the * fsa_input.msg that it is expecting. The scheduler's IPC dispatch function * gave us the values we need, we just need to put them into XML. * * The name of the top level element here is irrelevant. Nothing checks it. */ fsa_input.msg = pcmk__xe_create(NULL, "dummy-reply"); pcmk__xe_set(fsa_input.msg, PCMK_XA_REFERENCE, msg_ref); pcmk__xe_set(fsa_input.msg, PCMK__XA_CRM_TGRAPH_IN, reply->data.graph.input); crm_data_node = pcmk__xe_create(fsa_input.msg, PCMK__XE_CRM_XML); pcmk__xml_copy(crm_data_node, reply->data.graph.tgraph); register_fsa_input_later(C_IPC_MESSAGE, I_PE_SUCCESS, &fsa_input); pcmk__xml_free(fsa_input.msg); } else { crm_info("%s calculation %s is obsolete", CRM_OP_PECALC, msg_ref); } } static void scheduler_event_callback(pcmk_ipc_api_t *api, enum pcmk_ipc_event event_type, crm_exit_t status, void *event_data, void *user_data) { pcmk_schedulerd_api_reply_t *reply = event_data; switch (event_type) { case pcmk_ipc_event_disconnect: handle_disconnect(); break; case pcmk_ipc_event_reply: handle_reply(reply); break; default: break; } } static bool new_schedulerd_ipc_connection(void) { int rc; controld_set_fsa_input_flags(R_PE_REQUIRED); if (schedulerd_api == NULL) { rc = pcmk_new_ipc_api(&schedulerd_api, pcmk_ipc_schedulerd); if (rc != pcmk_rc_ok) { crm_err("Error connecting to the scheduler: %s", pcmk_rc_str(rc)); return false; } } pcmk_register_ipc_callback(schedulerd_api, scheduler_event_callback, NULL); rc = pcmk__connect_ipc(schedulerd_api, pcmk_ipc_dispatch_main, 3); if (rc != pcmk_rc_ok) { crm_err("Error connecting to %s: %s", pcmk_ipc_name(schedulerd_api, true), pcmk_rc_str(rc)); return false; } controld_set_fsa_input_flags(R_PE_CONNECTED); return true; } static void do_pe_invoke_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data); /* A_PE_START, A_PE_STOP, O_PE_RESTART */ void do_pe_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { if (pcmk__is_set(action, A_PE_STOP)) { controld_clear_fsa_input_flags(R_PE_REQUIRED); pcmk_disconnect_ipc(schedulerd_api); handle_disconnect(); } if (pcmk__is_set(action, A_PE_START) && !pcmk__is_set(controld_globals.fsa_input_register, R_PE_CONNECTED)) { if (cur_state == S_STOPPING) { crm_info("Ignoring request to connect to scheduler while shutting down"); } else if (!new_schedulerd_ipc_connection()) { crm_warn("Could not connect to scheduler"); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } } } static int fsa_pe_query = 0; static mainloop_timer_t *controld_sched_timer = NULL; // @TODO Make this a configurable cluster option if there's demand for it #define SCHED_TIMEOUT_MS (120000) /*! * \internal * \brief Handle a timeout waiting for scheduler reply * * \param[in] user_data Ignored * * \return FALSE (indicating that timer should not be restarted) */ static gboolean controld_sched_timeout(gpointer user_data) { if (AM_I_DC) { /* If this node is the DC but can't communicate with the scheduler, just * exit (and likely get fenced) so this node doesn't interfere with any * further DC elections. * * @TODO We could try something less drastic first, like disconnecting * and reconnecting to the scheduler, but something is likely going * seriously wrong, so perhaps it's better to just fail as quickly as * possible. */ crmd_exit(CRM_EX_FATAL); } return FALSE; } void controld_stop_sched_timer(void) { if ((controld_sched_timer != NULL) && (controld_globals.fsa_pe_ref != NULL)) { crm_trace("Stopping timer for scheduler reply %s", controld_globals.fsa_pe_ref); } mainloop_timer_stop(controld_sched_timer); } /*! * \internal * \brief Set the scheduler request currently being waited on * * \param[in] ref Request to expect reply to (or NULL for none) * * \note This function takes ownership of \p ref. */ void controld_expect_sched_reply(char *ref) { if (ref) { if (controld_sched_timer == NULL) { controld_sched_timer = mainloop_timer_add("scheduler_reply_timer", SCHED_TIMEOUT_MS, FALSE, controld_sched_timeout, NULL); } mainloop_timer_start(controld_sched_timer); } else { controld_stop_sched_timer(); } free(controld_globals.fsa_pe_ref); controld_globals.fsa_pe_ref = ref; } /*! * \internal * \brief Free the scheduler reply timer */ void controld_free_sched_timer(void) { if (controld_sched_timer != NULL) { mainloop_timer_del(controld_sched_timer); controld_sched_timer = NULL; } } /* A_PE_INVOKE */ void do_pe_invoke(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { cib_t *cib_conn = controld_globals.cib_conn; if (AM_I_DC == FALSE) { crm_err("Not invoking scheduler because not DC: %s", fsa_action2string(action)); return; } if (!pcmk__is_set(controld_globals.fsa_input_register, R_PE_CONNECTED)) { if (pcmk__is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { crm_err("Cannot shut down gracefully without the scheduler"); register_fsa_input_before(C_FSA_INTERNAL, I_TERMINATE, NULL); } else { crm_info("Waiting for the scheduler to connect"); crmd_fsa_stall(FALSE); controld_set_fsa_action_flags(A_PE_START); controld_trigger_fsa(); } return; } if (cur_state != S_POLICY_ENGINE) { crm_notice("Not invoking scheduler because in state %s", fsa_state2string(cur_state)); return; } if (!pcmk__is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) { crm_err("Attempted to invoke scheduler without consistent Cluster Information Base!"); /* start the join from scratch */ register_fsa_input_before(C_FSA_INTERNAL, I_ELECTION, NULL); return; } fsa_pe_query = cib_conn->cmds->query(cib_conn, NULL, NULL, cib_none); crm_debug("Query %d: Requesting the current CIB: %s", fsa_pe_query, fsa_state2string(controld_globals.fsa_state)); controld_expect_sched_reply(NULL); fsa_register_cib_callback(fsa_pe_query, NULL, do_pe_invoke_callback); } static void force_local_option(xmlNode *xml, const char *attr_name, const char *attr_value) { int max = 0; int lpc = 0; const char *xpath_base = NULL; char *xpath_string = NULL; xmlXPathObject *xpathObj = NULL; xpath_base = pcmk_cib_xpath_for(PCMK_XE_CRM_CONFIG); if (xpath_base == NULL) { crm_err(PCMK_XE_CRM_CONFIG " CIB element not known (bug?)"); return; } xpath_string = pcmk__assert_asprintf("%s//%s//nvpair[@name='%s']", xpath_base, PCMK_XE_CLUSTER_PROPERTY_SET, attr_name); xpathObj = pcmk__xpath_search(xml->doc, xpath_string); max = pcmk__xpath_num_results(xpathObj); free(xpath_string); for (lpc = 0; lpc < max; lpc++) { xmlNode *match = pcmk__xpath_result(xpathObj, lpc); if (match == NULL) { continue; } crm_trace("Forcing %s/%s = %s", pcmk__xe_id(match), attr_name, attr_value); pcmk__xe_set(match, PCMK_XA_VALUE, attr_value); } if(max == 0) { xmlNode *configuration = NULL; xmlNode *crm_config = NULL; xmlNode *cluster_property_set = NULL; crm_trace("Creating %s-%s for %s=%s", PCMK_VALUE_CIB_BOOTSTRAP_OPTIONS, attr_name, attr_name, attr_value); configuration = pcmk__xe_first_child(xml, PCMK_XE_CONFIGURATION, NULL, NULL); if (configuration == NULL) { configuration = pcmk__xe_create(xml, PCMK_XE_CONFIGURATION); } crm_config = pcmk__xe_first_child(configuration, PCMK_XE_CRM_CONFIG, NULL, NULL); if (crm_config == NULL) { crm_config = pcmk__xe_create(configuration, PCMK_XE_CRM_CONFIG); } cluster_property_set = pcmk__xe_first_child(crm_config, PCMK_XE_CLUSTER_PROPERTY_SET, NULL, NULL); if (cluster_property_set == NULL) { cluster_property_set = pcmk__xe_create(crm_config, PCMK_XE_CLUSTER_PROPERTY_SET); pcmk__xe_set(cluster_property_set, PCMK_XA_ID, PCMK_VALUE_CIB_BOOTSTRAP_OPTIONS); } xml = pcmk__xe_create(cluster_property_set, PCMK_XE_NVPAIR); pcmk__xe_set_id(xml, "%s-%s", PCMK_VALUE_CIB_BOOTSTRAP_OPTIONS, attr_name); pcmk__xe_set(xml, PCMK_XA_NAME, attr_name); pcmk__xe_set(xml, PCMK_XA_VALUE, attr_value); } xmlXPathFreeObject(xpathObj); } static void do_pe_invoke_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { char *ref = NULL; pid_t watchdog = pcmk__locate_sbd(); if (rc != pcmk_ok) { crm_err("Could not retrieve the Cluster Information Base: %s " QB_XS " rc=%d call=%d", pcmk_strerror(rc), rc, call_id); register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__); return; } else if (call_id != fsa_pe_query) { crm_trace("Skipping superseded CIB query: %d (current=%d)", call_id, fsa_pe_query); return; } else if (!AM_I_DC || !pcmk__is_set(controld_globals.fsa_input_register, R_PE_CONNECTED)) { crm_debug("No need to invoke the scheduler anymore"); return; } else if (controld_globals.fsa_state != S_POLICY_ENGINE) { crm_debug("Discarding scheduler request in state: %s", fsa_state2string(controld_globals.fsa_state)); return; /* this callback counts as 1 */ } else if (num_cib_op_callbacks() > 1) { crm_debug("Re-asking for the CIB: %d other peer updates still pending", (num_cib_op_callbacks() - 1)); sleep(1); controld_set_fsa_action_flags(A_PE_INVOKE); controld_trigger_fsa(); return; } CRM_LOG_ASSERT(output != NULL); /* Refresh the remote node cache and the known node cache when the * scheduler is invoked */ pcmk__refresh_node_caches_from_cib(output); pcmk__xe_set(output, PCMK_XA_DC_UUID, controld_globals.our_uuid); pcmk__xe_set_bool_attr(output, PCMK_XA_HAVE_QUORUM, pcmk__is_set(controld_globals.flags, controld_has_quorum)); force_local_option(output, PCMK_OPT_HAVE_WATCHDOG, pcmk__btoa(watchdog)); if (pcmk__is_set(controld_globals.flags, controld_ever_had_quorum) && !pcmk__cluster_has_quorum()) { pcmk__xe_set_int(output, PCMK_XA_NO_QUORUM_PANIC, 1); } rc = pcmk_schedulerd_api_graph(schedulerd_api, output, &ref); if (rc != pcmk_rc_ok) { free(ref); crm_err("Could not contact the scheduler: %s " QB_XS " rc=%d", pcmk_rc_str(rc), rc); register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__); } else { pcmk__assert(ref != NULL); controld_expect_sched_reply(ref); crm_debug("Invoking the scheduler: query=%d, ref=%s, seq=%llu, " "quorate=%s", fsa_pe_query, controld_globals.fsa_pe_ref, controld_globals.peer_seq, pcmk__flag_text(controld_globals.flags, controld_has_quorum)); } } diff --git a/daemons/controld/pacemaker-controld.c b/daemons/controld/pacemaker-controld.c index 1891d4d1d6..73ede8db09 100644 --- a/daemons/controld/pacemaker-controld.c +++ b/daemons/controld/pacemaker-controld.c @@ -1,207 +1,207 @@ /* * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define SUMMARY "daemon for coordinating a Pacemaker cluster's response " \ "to events" controld_globals_t controld_globals = { // Automatic initialization to 0, false, or NULL is fine for most members .fsa_state = S_STARTING, .fsa_actions = A_NOTHING, }; static pcmk__supported_format_t formats[] = { PCMK__SUPPORTED_FORMAT_NONE, PCMK__SUPPORTED_FORMAT_TEXT, PCMK__SUPPORTED_FORMAT_XML, { NULL, NULL, NULL } }; /* @COMPAT Deprecated since 2.1.8. Use pcmk_list_cluster_options() or * crm_attribute --list-options=cluster instead of querying daemon metadata. * * NOTE: pcs (as of at least 0.11.8) uses this */ static int controld_metadata(pcmk__output_t *out) { return pcmk__daemon_metadata(out, PCMK__SERVER_CONTROLD, "Pacemaker controller options", "Cluster options used by Pacemaker's " "controller", pcmk__opt_controld); } static GOptionContext * build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) { return pcmk__build_arg_context(args, "text (default), xml", group, NULL); } int main(int argc, char **argv) { int rc = pcmk_rc_ok; crm_exit_t exit_code = CRM_EX_OK; bool initialize = true; enum crmd_fsa_state state; crm_ipc_t *old_instance = NULL; pcmk__output_t *out = NULL; GError *error = NULL; GOptionGroup *output_group = NULL; pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY); gchar **processed_args = pcmk__cmdline_preproc(argv, NULL); GOptionContext *context = build_arg_context(args, &output_group); crm_log_preinit(NULL, argc, argv); pcmk__register_formats(output_group, formats); if (!g_option_context_parse_strv(context, &processed_args, &error)) { exit_code = CRM_EX_USAGE; goto done; } rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv); if (rc != pcmk_rc_ok) { exit_code = CRM_EX_ERROR; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Error creating output format %s: %s", args->output_ty, pcmk_rc_str(rc)); goto done; } if (args->version) { out->version(out, false); initialize = false; goto done; } if ((g_strv_length(processed_args) >= 2) && pcmk__str_eq(processed_args[1], "metadata", pcmk__str_none)) { initialize = false; rc = controld_metadata(out); if (rc != pcmk_rc_ok) { exit_code = CRM_EX_FATAL; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Unable to display metadata: %s", pcmk_rc_str(rc)); } goto done; } pcmk__cli_init_logging(PCMK__SERVER_CONTROLD, args->verbosity); crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); crm_notice("Starting Pacemaker controller"); old_instance = crm_ipc_new(CRM_SYSTEM_CRMD, 0); if (old_instance == NULL) { /* crm_ipc_new will have already printed an error message with crm_err. */ exit_code = CRM_EX_FATAL; goto done; } if (pcmk__connect_generic_ipc(old_instance) == pcmk_rc_ok) { /* IPC end-point already up */ crm_ipc_close(old_instance); crm_ipc_destroy(old_instance); - crm_crit("Aborting start-up because another controller instance is " - "already active"); + pcmk__crit("Aborting start-up because another controller instance is " + "already active"); initialize = false; goto done; } else { /* not up or not authentic, we'll proceed either way */ crm_ipc_destroy(old_instance); old_instance = NULL; } if (pcmk__daemon_can_write(PCMK_SCHEDULER_INPUT_DIR, NULL) == FALSE) { exit_code = CRM_EX_FATAL; crm_err("Terminating due to bad permissions on " PCMK_SCHEDULER_INPUT_DIR); g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Bad permissions on " PCMK_SCHEDULER_INPUT_DIR " (see logs for details)"); goto done; } else if (pcmk__daemon_can_write(CRM_CONFIG_DIR, NULL) == FALSE) { exit_code = CRM_EX_FATAL; crm_err("Terminating due to bad permissions on " CRM_CONFIG_DIR); g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Bad permissions on " CRM_CONFIG_DIR " (see logs for details)"); goto done; } if (pcmk__log_output_new(&(controld_globals.logger_out)) != pcmk_rc_ok) { exit_code = CRM_EX_FATAL; goto done; } pcmk__output_set_log_level(controld_globals.logger_out, LOG_TRACE); done: g_strfreev(processed_args); pcmk__free_arg_context(context); // We no longer need output pcmk__output_and_clear_error(&error, out); if (out != NULL) { out->finish(out, exit_code, true, NULL); pcmk__output_free(out); } pcmk__unregister_formats(); // Exit on error or command-line queries if ((exit_code != CRM_EX_OK) || !initialize) { crm_exit(exit_code); } // Initialize FSA register_fsa_input(C_STARTUP, I_STARTUP, NULL); pcmk__cluster_init_node_caches(); state = s_crmd_fsa(C_STARTUP); if ((state != S_PENDING) && (state != S_STARTING)) { crm_err("Controller startup failed " QB_XS " FSA state %s", crm_system_name, fsa_state2string(state)); crmd_fast_exit(CRM_EX_ERROR); // Does not return } // Run mainloop controld_globals.mainloop = g_main_loop_new(NULL, FALSE); g_main_loop_run(controld_globals.mainloop); if (pcmk__is_set(controld_globals.fsa_input_register, R_STAYDOWN)) { crm_info("Inhibiting automated respawn"); exit_code = CRM_EX_FATAL; } crmd_fast_exit(exit_code); } diff --git a/daemons/execd/cts-exec-helper.c b/daemons/execd/cts-exec-helper.c index ed1a134f36..a948995317 100644 --- a/daemons/execd/cts-exec-helper.c +++ b/daemons/execd/cts-exec-helper.c @@ -1,627 +1,627 @@ /* * Copyright 2012-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include // crm_meta_name() #include #include #include #include #include #define SUMMARY "cts-exec-helper - inject commands into the Pacemaker executor and watch for events" static int exec_call_id = 0; static gboolean start_test(gpointer user_data); static void try_connect(void); static char *key = NULL; static char *val = NULL; static struct { int verbose; int quiet; guint interval_ms; int timeout; int start_delay; int cancel_call_id; gboolean no_wait; gboolean is_running; gboolean no_connect; int exec_call_opts; const char *api_call; const char *rsc_id; const char *provider; const char *class; const char *type; const char *action; const char *listen; gboolean use_tls; lrmd_key_value_t *params; } options; static gboolean interval_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { return pcmk_parse_interval_spec(optarg, &options.interval_ms) == pcmk_rc_ok; } static gboolean notify_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { if (pcmk__str_any_of(option_name, "--notify-orig", "-n", NULL)) { options.exec_call_opts = lrmd_opt_notify_orig_only; } else if (pcmk__str_any_of(option_name, "--notify-changes", "-o", NULL)) { options.exec_call_opts = lrmd_opt_notify_changes_only; } return TRUE; } static gboolean param_key_val_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { if (pcmk__str_any_of(option_name, "--param-key", "-k", NULL)) { pcmk__str_update(&key, optarg); } else if (pcmk__str_any_of(option_name, "--param-val", "-v", NULL)) { pcmk__str_update(&val, optarg); } if (key != NULL && val != NULL) { options.params = lrmd_key_value_add(options.params, key, val); pcmk__str_update(&key, NULL); pcmk__str_update(&val, NULL); } return TRUE; } static GOptionEntry basic_entries[] = { { "api-call", 'c', 0, G_OPTION_ARG_STRING, &options.api_call, "Directly relates to executor API functions", NULL }, { "is-running", 'R', 0, G_OPTION_ARG_NONE, &options.is_running, "Determine if a resource is registered and running", NULL }, { "listen", 'l', 0, G_OPTION_ARG_STRING, &options.listen, "Listen for a specific event string", NULL }, { "no-wait", 'w', 0, G_OPTION_ARG_NONE, &options.no_wait, "Make api call and do not wait for result", NULL }, { "notify-changes", 'o', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, notify_cb, "Only notify client changes to recurring operations", NULL }, { "notify-orig", 'n', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, notify_cb, "Only notify this client of the results of an API action", NULL }, { "tls", 'S', 0, G_OPTION_ARG_NONE, &options.use_tls, "Use TLS backend for local connection", NULL }, { NULL } }; static GOptionEntry api_call_entries[] = { { "action", 'a', 0, G_OPTION_ARG_STRING, &options.action, NULL, NULL }, { "cancel-call-id", 'x', 0, G_OPTION_ARG_INT, &options.cancel_call_id, NULL, NULL }, { "class", 'C', 0, G_OPTION_ARG_STRING, &options.class, NULL, NULL }, { "interval", 'i', 0, G_OPTION_ARG_CALLBACK, interval_cb, NULL, NULL }, { "param-key", 'k', 0, G_OPTION_ARG_CALLBACK, param_key_val_cb, NULL, NULL }, { "param-val", 'v', 0, G_OPTION_ARG_CALLBACK, param_key_val_cb, NULL, NULL }, { "provider", 'P', 0, G_OPTION_ARG_STRING, &options.provider, NULL, NULL }, { "rsc-id", 'r', 0, G_OPTION_ARG_STRING, &options.rsc_id, NULL, NULL }, { "start-delay", 's', 0, G_OPTION_ARG_INT, &options.start_delay, NULL, NULL }, { "timeout", 't', 0, G_OPTION_ARG_INT, &options.timeout, NULL, NULL }, { "type", 'T', 0, G_OPTION_ARG_STRING, &options.type, NULL, NULL }, { NULL } }; static GMainLoop *mainloop = NULL; static lrmd_t *lrmd_conn = NULL; static char event_buf_v0[1024]; static crm_exit_t test_exit(crm_exit_t exit_code) { lrmd_api_delete(lrmd_conn); return crm_exit(exit_code); } #define print_result(fmt, args...) \ if (!options.quiet) { \ printf(fmt "\n", ##args); \ } #define report_event(event) \ snprintf(event_buf_v0, sizeof(event_buf_v0), "NEW_EVENT event_type:%s rsc_id:%s action:%s rc:%s op_status:%s", \ lrmd_event_type2str(event->type), \ event->rsc_id, \ event->op_type ? event->op_type : "none", \ crm_exit_str((crm_exit_t) event->rc), \ pcmk_exec_status_str(event->op_status)); \ crm_info("%s", event_buf_v0); static void test_shutdown(int nsig) { lrmd_api_delete(lrmd_conn); lrmd_conn = NULL; } static void read_events(lrmd_event_data_t * event) { report_event(event); if (options.listen) { if (pcmk__str_eq(options.listen, event_buf_v0, pcmk__str_casei)) { print_result("LISTEN EVENT SUCCESSFUL"); test_exit(CRM_EX_OK); } } if (exec_call_id && (event->call_id == exec_call_id)) { if (event->op_status == 0 && event->rc == 0) { print_result("API-CALL SUCCESSFUL for 'exec'"); } else { print_result("API-CALL FAILURE for 'exec', rc:%d lrmd_op_status:%s", event->rc, pcmk_exec_status_str(event->op_status)); test_exit(CRM_EX_ERROR); } if (!options.listen) { test_exit(CRM_EX_OK); } } } static gboolean timeout_err(gpointer data) { print_result("LISTEN EVENT FAILURE - timeout occurred, never found"); test_exit(CRM_EX_TIMEOUT); return FALSE; } static void connection_events(lrmd_event_data_t * event) { int rc = event->connection_rc; if (event->type != lrmd_event_connect) { /* ignore */ return; } if (!rc) { crm_info("Executor client connection established"); start_test(NULL); return; } else { sleep(1); try_connect(); crm_notice("Executor client connection failed"); } } static void try_connect(void) { int tries = 10; static int num_tries = 0; int rc = 0; lrmd_conn->cmds->set_callback(lrmd_conn, connection_events); for (; num_tries < tries; num_tries++) { rc = lrmd_conn->cmds->connect_async(lrmd_conn, crm_system_name, 3000); if (!rc) { return; /* we'll hear back in async callback */ } sleep(1); } print_result("API CONNECTION FAILURE"); test_exit(CRM_EX_ERROR); } static gboolean start_test(gpointer user_data) { int rc = 0; if (!options.no_connect) { if (!lrmd_conn->cmds->is_connected(lrmd_conn)) { try_connect(); /* async connect -- this function will get called back into */ return 0; } } lrmd_conn->cmds->set_callback(lrmd_conn, read_events); if (options.timeout) { pcmk__create_timer(options.timeout, timeout_err, NULL); } if (!options.api_call) { return 0; } if (pcmk__str_eq(options.api_call, "exec", pcmk__str_casei)) { rc = lrmd_conn->cmds->exec(lrmd_conn, options.rsc_id, options.action, NULL, options.interval_ms, options.timeout, options.start_delay, options.exec_call_opts, options.params); if (rc > 0) { exec_call_id = rc; print_result("API-CALL 'exec' action pending, waiting on response"); } } else if (pcmk__str_eq(options.api_call, "register_rsc", pcmk__str_casei)) { rc = lrmd_conn->cmds->register_rsc(lrmd_conn, options.rsc_id, options.class, options.provider, options.type, 0); } else if (pcmk__str_eq(options.api_call, "get_rsc_info", pcmk__str_casei)) { lrmd_rsc_info_t *rsc_info; rsc_info = lrmd_conn->cmds->get_rsc_info(lrmd_conn, options.rsc_id, 0); if (rsc_info) { print_result("RSC_INFO: id:%s class:%s provider:%s type:%s", rsc_info->id, rsc_info->standard, (rsc_info->provider? rsc_info->provider : ""), rsc_info->type); lrmd_free_rsc_info(rsc_info); rc = pcmk_ok; } else { rc = -1; } } else if (pcmk__str_eq(options.api_call, "unregister_rsc", pcmk__str_casei)) { rc = lrmd_conn->cmds->unregister_rsc(lrmd_conn, options.rsc_id, 0); } else if (pcmk__str_eq(options.api_call, "cancel", pcmk__str_casei)) { rc = lrmd_conn->cmds->cancel(lrmd_conn, options.rsc_id, options.action, options.interval_ms); } else if (pcmk__str_eq(options.api_call, "metadata", pcmk__str_casei)) { char *output = NULL; rc = lrmd_conn->cmds->get_metadata(lrmd_conn, options.class, options.provider, options.type, &output, 0); if (rc == pcmk_ok) { print_result("%s", output); free(output); } } else if (pcmk__str_eq(options.api_call, "list_agents", pcmk__str_casei)) { lrmd_list_t *list = NULL; lrmd_list_t *iter = NULL; rc = lrmd_conn->cmds->list_agents(lrmd_conn, &list, options.class, options.provider); if (rc > 0) { print_result("%d agents found", rc); for (iter = list; iter != NULL; iter = iter->next) { print_result("%s", iter->val); } lrmd_list_freeall(list); rc = 0; } else { print_result("API_CALL FAILURE - no agents found"); rc = -1; } } else if (pcmk__str_eq(options.api_call, "list_ocf_providers", pcmk__str_casei)) { lrmd_list_t *list = NULL; lrmd_list_t *iter = NULL; rc = lrmd_conn->cmds->list_ocf_providers(lrmd_conn, options.type, &list); if (rc > 0) { print_result("%d providers found", rc); for (iter = list; iter != NULL; iter = iter->next) { print_result("%s", iter->val); } lrmd_list_freeall(list); rc = 0; } else { print_result("API_CALL FAILURE - no providers found"); rc = -1; } } else if (pcmk__str_eq(options.api_call, "list_standards", pcmk__str_casei)) { lrmd_list_t *list = NULL; lrmd_list_t *iter = NULL; rc = lrmd_conn->cmds->list_standards(lrmd_conn, &list); if (rc > 0) { print_result("%d standards found", rc); for (iter = list; iter != NULL; iter = iter->next) { print_result("%s", iter->val); } lrmd_list_freeall(list); rc = 0; } else { print_result("API_CALL FAILURE - no providers found"); rc = -1; } } else if (pcmk__str_eq(options.api_call, "get_recurring_ops", pcmk__str_casei)) { GList *op_list = NULL; GList *op_item = NULL; rc = lrmd_conn->cmds->get_recurring_ops(lrmd_conn, options.rsc_id, 0, 0, &op_list); for (op_item = op_list; op_item != NULL; op_item = op_item->next) { lrmd_op_info_t *op_info = op_item->data; print_result("RECURRING_OP: %s_%s_%s timeout=%sms", op_info->rsc_id, op_info->action, op_info->interval_ms_s, op_info->timeout_ms_s); lrmd_free_op_info(op_info); } g_list_free(op_list); } else if (options.api_call) { print_result("API-CALL FAILURE unknown action '%s'", options.action); test_exit(CRM_EX_ERROR); } if (rc < 0) { print_result("API-CALL FAILURE for '%s' api_rc:%d", options.api_call, rc); test_exit(CRM_EX_ERROR); } if (options.api_call && rc == pcmk_ok) { print_result("API-CALL SUCCESSFUL for '%s'", options.api_call); if (!options.listen) { test_exit(CRM_EX_OK); } } if (options.no_wait) { /* just make the call and exit regardless of anything else. */ test_exit(CRM_EX_OK); } return 0; } /*! * \internal * \brief Generate resource parameters from CIB if none explicitly given * * \return Standard Pacemaker return code */ static int generate_params(void) { int rc = pcmk_rc_ok; pcmk_scheduler_t *scheduler = NULL; xmlNode *cib_xml_copy = NULL; pcmk_resource_t *rsc = NULL; GHashTable *params = NULL; GHashTable *meta = NULL; GHashTableIter iter; char *key = NULL; char *value = NULL; if (options.params != NULL) { return pcmk_rc_ok; // User specified parameters explicitly } // Retrieve and update CIB rc = cib__signon_query(NULL, NULL, &cib_xml_copy); if (rc != pcmk_rc_ok) { return rc; } rc = pcmk__update_configured_schema(&cib_xml_copy, false); if (rc != pcmk_rc_ok) { return rc; } // Calculate cluster status scheduler = pcmk_new_scheduler(); if (scheduler == NULL) { - crm_crit("Could not allocate scheduler data"); + pcmk__crit("Could not allocate scheduler data"); return ENOMEM; } pcmk__set_scheduler_flags(scheduler, pcmk__sched_no_counts); scheduler->input = cib_xml_copy; scheduler->priv->now = crm_time_new(NULL); cluster_status(scheduler); // Find resource in CIB rsc = pe_find_resource_with_flags(scheduler->priv->resources, options.rsc_id, pcmk_rsc_match_history |pcmk_rsc_match_basename); if (rsc == NULL) { crm_err("Resource does not exist in config"); pcmk_free_scheduler(scheduler); return EINVAL; } // Add resource instance parameters to options.params params = pe_rsc_params(rsc, NULL, scheduler); if (params != NULL) { g_hash_table_iter_init(&iter, params); while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) { options.params = lrmd_key_value_add(options.params, key, value); } } // Add resource meta-attributes to options.params meta = pcmk__strkey_table(free, free); get_meta_attributes(meta, rsc, NULL, scheduler); g_hash_table_iter_init(&iter, meta); while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) { char *crm_name = crm_meta_name(key); options.params = lrmd_key_value_add(options.params, crm_name, value); free(crm_name); } g_hash_table_destroy(meta); pcmk_free_scheduler(scheduler); return rc; } static GOptionContext * build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) { GOptionContext *context = NULL; context = pcmk__build_arg_context(args, NULL, group, NULL); pcmk__add_main_args(context, basic_entries); pcmk__add_arg_group(context, "api-call", "API Call Options:", "Parameters for api-call option", api_call_entries); return context; } int main(int argc, char **argv) { GError *error = NULL; crm_exit_t exit_code = CRM_EX_OK; crm_trigger_t *trig = NULL; pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY); /* Typically we'd pass all the single character options that take an argument * as the second parameter here (and there's a bunch of those in this tool). * However, we control how this program is called so we can just not call it * in a way where the preprocessing ever matters. */ gchar **processed_args = pcmk__cmdline_preproc(argv, NULL); GOptionContext *context = build_arg_context(args, NULL); if (!g_option_context_parse_strv(context, &processed_args, &error)) { exit_code = CRM_EX_USAGE; goto done; } /* We have to use crm_log_init here to set up the logging because there's * different handling for daemons vs. command line programs, and * pcmk__cli_init_logging is set up to only handle the latter. */ crm_log_init(NULL, LOG_INFO, TRUE, (args->verbosity? TRUE : FALSE), argc, argv, FALSE); for (int i = 0; i < args->verbosity; i++) { crm_bump_log_level(argc, argv); } if (!options.listen && pcmk__strcase_any_of(options.api_call, "metadata", "list_agents", "list_standards", "list_ocf_providers", NULL)) { options.no_connect = TRUE; } if (options.is_running) { int rc = pcmk_rc_ok; if (options.rsc_id == NULL) { exit_code = CRM_EX_USAGE; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "--is-running requires --rsc-id"); goto done; } options.interval_ms = 0; if (options.timeout == 0) { options.timeout = 30000; } rc = generate_params(); if (rc != pcmk_rc_ok) { exit_code = pcmk_rc2exitc(rc); g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Can not determine resource status: " "unable to get parameters from CIB"); goto done; } options.api_call = "exec"; options.action = PCMK_ACTION_MONITOR; options.exec_call_opts = lrmd_opt_notify_orig_only; } if (!options.api_call && !options.listen) { exit_code = CRM_EX_USAGE; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Must specify at least one of --api-call, --listen, " "or --is-running"); goto done; } if (options.use_tls) { lrmd_conn = lrmd_remote_api_new(NULL, "localhost", 0); } else { lrmd_conn = lrmd_api_new(); } trig = mainloop_add_trigger(G_PRIORITY_HIGH, start_test, NULL); mainloop_set_trigger(trig); mainloop_add_signal(SIGTERM, test_shutdown); crm_info("Starting"); mainloop = g_main_loop_new(NULL, FALSE); g_main_loop_run(mainloop); done: g_strfreev(processed_args); pcmk__free_arg_context(context); free(key); free(val); pcmk__output_and_clear_error(&error, NULL); return test_exit(exit_code); } diff --git a/daemons/execd/pacemaker-execd.c b/daemons/execd/pacemaker-execd.c index b355cd805d..090f2785b3 100644 --- a/daemons/execd/pacemaker-execd.c +++ b/daemons/execd/pacemaker-execd.c @@ -1,575 +1,575 @@ /* * Copyright 2012-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "pacemaker-execd.h" #ifdef PCMK__COMPILE_REMOTE # define EXECD_TYPE "remote" # define EXECD_NAME PCMK__SERVER_REMOTED # define SUMMARY "resource agent executor daemon for Pacemaker Remote nodes" #else # define EXECD_TYPE "local" # define EXECD_NAME PCMK__SERVER_EXECD # define SUMMARY "resource agent executor daemon for Pacemaker cluster nodes" #endif static GMainLoop *mainloop = NULL; static qb_ipcs_service_t *ipcs = NULL; static stonith_t *stonith_api = NULL; int lrmd_call_id = 0; time_t start_time; static struct { gchar **log_files; #ifdef PCMK__COMPILE_REMOTE gchar *port; #endif // PCMK__COMPILE_REMOTE } options; #ifdef PCMK__COMPILE_REMOTE /* whether shutdown request has been sent */ static gboolean shutting_down = FALSE; #endif static void exit_executor(void); static void stonith_connection_destroy_cb(stonith_t * st, stonith_event_t * e) { stonith_api->state = stonith_disconnected; stonith_connection_failed(); } stonith_t * get_stonith_connection(void) { if (stonith_api && stonith_api->state == stonith_disconnected) { stonith_api_delete(stonith_api); stonith_api = NULL; } if (stonith_api == NULL) { int rc = pcmk_ok; stonith_api = stonith_api_new(); if (stonith_api == NULL) { crm_err("Could not connect to fencer: API memory allocation failed"); return NULL; } rc = stonith_api_connect_retry(stonith_api, crm_system_name, 10); if (rc != pcmk_ok) { crm_err("Could not connect to fencer in 10 attempts: %s " QB_XS " rc=%d", pcmk_strerror(rc), rc); stonith_api_delete(stonith_api); stonith_api = NULL; } else { stonith_api_operations_t *cmds = stonith_api->cmds; cmds->register_notification(stonith_api, PCMK__VALUE_ST_NOTIFY_DISCONNECT, stonith_connection_destroy_cb); } } return stonith_api; } static int32_t lrmd_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { crm_trace("Connection %p", c); if (pcmk__new_client(c, uid, gid) == NULL) { return -ENOMEM; } return 0; } static void lrmd_ipc_created(qb_ipcs_connection_t * c) { pcmk__client_t *new_client = pcmk__find_client(c); crm_trace("Connection %p", c); pcmk__assert(new_client != NULL); /* Now that the connection is offically established, alert * the other clients a new connection exists. */ notify_of_new_client(new_client); } static int32_t lrmd_ipc_dispatch(qb_ipcs_connection_t * c, void *data, size_t size) { uint32_t id = 0; uint32_t flags = 0; pcmk__client_t *client = pcmk__find_client(c); xmlNode *request = pcmk__client_data2xml(client, data, &id, &flags); CRM_CHECK(client != NULL, crm_err("Invalid client"); return FALSE); CRM_CHECK(client->id != NULL, crm_err("Invalid client: %p", client); return FALSE); CRM_CHECK(flags & crm_ipc_client_response, crm_err("Invalid client request: %p", client); return FALSE); if (!request) { return 0; } /* @TODO functionize some of this to reduce duplication with * lrmd_remote_client_msg() */ if (!client->name) { const char *value = pcmk__xe_get(request, PCMK__XA_LRMD_CLIENTNAME); if (value == NULL) { client->name = pcmk__itoa(pcmk__client_pid(c)); } else { client->name = pcmk__str_copy(value); } } lrmd_call_id++; if (lrmd_call_id < 1) { lrmd_call_id = 1; } pcmk__xe_set(request, PCMK__XA_LRMD_CLIENTID, client->id); pcmk__xe_set(request, PCMK__XA_LRMD_CLIENTNAME, client->name); pcmk__xe_set_int(request, PCMK__XA_LRMD_CALLID, lrmd_call_id); process_lrmd_message(client, id, request); pcmk__xml_free(request); return 0; } /*! * \internal * \brief Free a client connection, and exit if appropriate * * \param[in,out] client Client connection to free */ void lrmd_client_destroy(pcmk__client_t *client) { pcmk__free_client(client); #ifdef PCMK__COMPILE_REMOTE /* If we were waiting to shut down, we can now safely do so * if there are no more proxied IPC providers */ if (shutting_down && (ipc_proxy_get_provider() == NULL)) { exit_executor(); } #endif } static int32_t lrmd_ipc_closed(qb_ipcs_connection_t * c) { pcmk__client_t *client = pcmk__find_client(c); if (client == NULL) { return 0; } crm_trace("Connection %p", c); client_disconnect_cleanup(client->id); #ifdef PCMK__COMPILE_REMOTE ipc_proxy_remove_provider(client); #endif lrmd_client_destroy(client); return 0; } static void lrmd_ipc_destroy(qb_ipcs_connection_t * c) { lrmd_ipc_closed(c); crm_trace("Connection %p", c); } static struct qb_ipcs_service_handlers lrmd_ipc_callbacks = { .connection_accept = lrmd_ipc_accept, .connection_created = lrmd_ipc_created, .msg_process = lrmd_ipc_dispatch, .connection_closed = lrmd_ipc_closed, .connection_destroyed = lrmd_ipc_destroy }; // \return Standard Pacemaker return code int lrmd_server_send_reply(pcmk__client_t *client, uint32_t id, xmlNode *reply) { crm_trace("Sending reply (%d) to client (%s)", id, client->id); switch (PCMK__CLIENT_TYPE(client)) { case pcmk__client_ipc: return pcmk__ipc_send_xml(client, id, reply, FALSE); #ifdef PCMK__COMPILE_REMOTE case pcmk__client_tls: return lrmd__remote_send_xml(client->remote, reply, id, "reply"); #endif default: crm_err("Could not send reply: unknown type for client %s " QB_XS " flags=%#llx", pcmk__client_name(client), client->flags); } return ENOTCONN; } // \return Standard Pacemaker return code int lrmd_server_send_notify(pcmk__client_t *client, xmlNode *msg) { crm_trace("Sending notification to client (%s)", client->id); switch (PCMK__CLIENT_TYPE(client)) { case pcmk__client_ipc: if (client->ipcs == NULL) { crm_trace("Could not notify local client: disconnected"); return ENOTCONN; } return pcmk__ipc_send_xml(client, 0, msg, crm_ipc_server_event); #ifdef PCMK__COMPILE_REMOTE case pcmk__client_tls: if (client->remote == NULL) { crm_trace("Could not notify remote client: disconnected"); return ENOTCONN; } else { return lrmd__remote_send_xml(client->remote, msg, 0, "notify"); } #endif default: crm_err("Could not notify client %s with unknown transport " QB_XS " flags=%#llx", pcmk__client_name(client), client->flags); } return ENOTCONN; } /*! * \internal * \brief Clean up and exit immediately */ static void exit_executor(void) { const guint nclients = pcmk__ipc_client_count(); crm_info("Terminating with %d client%s", nclients, pcmk__plural_s(nclients)); stonith_api_delete(stonith_api); if (ipcs) { mainloop_del_ipc_server(ipcs); } #ifdef PCMK__COMPILE_REMOTE execd_stop_tls_server(); ipc_proxy_cleanup(); #endif pcmk__client_cleanup(); if (mainloop) { lrmd_drain_alerts(mainloop); } g_hash_table_destroy(rsc_list); // @TODO End mainloop instead so all cleanup is done crm_exit(CRM_EX_OK); } /*! * \internal * \brief Request cluster shutdown if appropriate, otherwise exit immediately * * \param[in] nsig Signal that caused invocation (ignored) */ static void lrmd_shutdown(int nsig) { #ifdef PCMK__COMPILE_REMOTE pcmk__client_t *ipc_proxy = ipc_proxy_get_provider(); /* If there are active proxied IPC providers, then we may be running * resources, so notify the cluster that we wish to shut down. */ if (ipc_proxy) { if (shutting_down) { crm_notice("Waiting for cluster to stop resources before exiting"); return; } crm_info("Sending shutdown request to cluster"); if (ipc_proxy_shutdown_req(ipc_proxy) < 0) { - crm_crit("Shutdown request failed, exiting immediately"); + pcmk__crit("Shutdown request failed, exiting immediately"); } else { /* We requested a shutdown. Now, we need to wait for an * acknowledgement from the proxy host, then wait for all proxy * hosts to disconnect (which ensures that all resources have been * stopped). */ shutting_down = TRUE; /* Stop accepting new proxy connections */ execd_stop_tls_server(); /* Currently, we let the OS kill us if the clients don't disconnect * in a reasonable time. We could instead set a long timer here * (shorter than what the OS is likely to use) and exit immediately * if it pops. */ return; } } #endif exit_executor(); } /*! * \internal * \brief Log a shutdown acknowledgment */ void handle_shutdown_ack(void) { #ifdef PCMK__COMPILE_REMOTE if (shutting_down) { crm_info("IPC proxy provider acknowledged shutdown request"); return; } #endif crm_debug("Ignoring unexpected shutdown acknowledgment " "from IPC proxy provider"); } /*! * \internal * \brief Handle rejection of shutdown request */ void handle_shutdown_nack(void) { #ifdef PCMK__COMPILE_REMOTE if (shutting_down) { crm_info("Exiting immediately after IPC proxy provider " "indicated no resources will be stopped"); exit_executor(); return; } #endif crm_debug("Ignoring unexpected shutdown rejection from IPC proxy provider"); } static GOptionEntry entries[] = { { "logfile", 'l', G_OPTION_FLAG_NONE, G_OPTION_ARG_FILENAME_ARRAY, &options.log_files, "Send logs to the additional named logfile", NULL }, #ifdef PCMK__COMPILE_REMOTE { "port", 'p', G_OPTION_FLAG_NONE, G_OPTION_ARG_STRING, &options.port, "Port to listen on (defaults to " G_STRINGIFY(DEFAULT_REMOTE_PORT) ")", NULL }, #endif // PCMK__COMPILE_REMOTE { NULL } }; static pcmk__supported_format_t formats[] = { PCMK__SUPPORTED_FORMAT_NONE, PCMK__SUPPORTED_FORMAT_TEXT, PCMK__SUPPORTED_FORMAT_XML, { NULL, NULL, NULL } }; static GOptionContext * build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) { GOptionContext *context = NULL; context = pcmk__build_arg_context(args, "text (default), xml", group, NULL); pcmk__add_main_args(context, entries); return context; } int main(int argc, char **argv, char **envp) { int rc = pcmk_rc_ok; crm_exit_t exit_code = CRM_EX_OK; const char *option = NULL; pcmk__output_t *out = NULL; GError *error = NULL; GOptionGroup *output_group = NULL; pcmk__common_args_t *args = NULL; gchar **processed_args = NULL; GOptionContext *context = NULL; #ifdef PCMK__COMPILE_REMOTE // If necessary, create PID 1 now before any file descriptors are opened remoted_spawn_pidone(argc, argv, envp); #endif args = pcmk__new_common_args(SUMMARY); #ifdef PCMK__COMPILE_REMOTE processed_args = pcmk__cmdline_preproc(argv, "lp"); #else processed_args = pcmk__cmdline_preproc(argv, "l"); #endif // PCMK__COMPILE_REMOTE context = build_arg_context(args, &output_group); crm_log_preinit(EXECD_NAME, argc, argv); pcmk__register_formats(output_group, formats); if (!g_option_context_parse_strv(context, &processed_args, &error)) { exit_code = CRM_EX_USAGE; goto done; } rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv); if (rc != pcmk_rc_ok) { exit_code = CRM_EX_ERROR; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Error creating output format %s: %s", args->output_ty, pcmk_rc_str(rc)); goto done; } if (args->version) { out->version(out, false); goto done; } // Open additional log files if (options.log_files != NULL) { for (gchar **fname = options.log_files; *fname != NULL; fname++) { rc = pcmk__add_logfile(*fname); if (rc != pcmk_rc_ok) { out->err(out, "Logging to %s is disabled: %s", *fname, pcmk_rc_str(rc)); } } } pcmk__cli_init_logging(EXECD_NAME, args->verbosity); crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); // ocf_log() (in resource-agents) uses the capitalized env options below option = pcmk__env_option(PCMK__ENV_LOGFACILITY); if (!pcmk__str_eq(option, PCMK_VALUE_NONE, pcmk__str_casei|pcmk__str_null_matches) && !pcmk__str_eq(option, "/dev/null", pcmk__str_none)) { pcmk__set_env_option("LOGFACILITY", option, true); } option = pcmk__env_option(PCMK__ENV_LOGFILE); if (!pcmk__str_eq(option, PCMK_VALUE_NONE, pcmk__str_casei|pcmk__str_null_matches)) { pcmk__set_env_option("LOGFILE", option, true); if (pcmk__env_option_enabled(crm_system_name, PCMK__ENV_DEBUG)) { pcmk__set_env_option("DEBUGLOG", option, true); } } #ifdef PCMK__COMPILE_REMOTE if (options.port != NULL) { pcmk__set_env_option(PCMK__ENV_REMOTE_PORT, options.port, false); } #endif // PCMK__COMPILE_REMOTE start_time = time(NULL); crm_notice("Starting Pacemaker " EXECD_TYPE " executor"); /* The presence of this variable allegedly controls whether child * processes like httpd will try and use Systemd's sd_notify * API */ unsetenv("NOTIFY_SOCKET"); { // Temporary directory for resource agent use (leave owned by root) int rc = pcmk__build_path(PCMK__OCF_TMP_DIR, 0755); if (rc != pcmk_rc_ok) { crm_warn("Could not create resource agent temporary directory " PCMK__OCF_TMP_DIR ": %s", pcmk_rc_str(rc)); } } rsc_list = pcmk__strkey_table(NULL, free_rsc); ipcs = mainloop_add_ipc_server(CRM_SYSTEM_LRMD, QB_IPC_SHM, &lrmd_ipc_callbacks); if (ipcs == NULL) { crm_err("Failed to create IPC server: shutting down and inhibiting respawn"); exit_code = CRM_EX_FATAL; goto done; } #ifdef PCMK__COMPILE_REMOTE if (lrmd_init_remote_tls_server() < 0) { crm_err("Failed to create TLS listener: shutting down and staying down"); exit_code = CRM_EX_FATAL; goto done; } ipc_proxy_init(); #endif mainloop_add_signal(SIGTERM, lrmd_shutdown); mainloop = g_main_loop_new(NULL, FALSE); crm_notice("Pacemaker " EXECD_TYPE " executor successfully started and accepting connections"); crm_notice("OCF resource agent search path is %s", PCMK__OCF_RA_PATH); g_main_loop_run(mainloop); /* should never get here */ exit_executor(); done: g_strfreev(options.log_files); #ifdef PCMK__COMPILE_REMOTE g_free(options.port); #endif // PCMK__COMPILE_REMOTE g_strfreev(processed_args); pcmk__free_arg_context(context); pcmk__output_and_clear_error(&error, out); if (out != NULL) { out->finish(out, exit_code, true, NULL); pcmk__output_free(out); } pcmk__unregister_formats(); crm_exit(exit_code); } diff --git a/daemons/fenced/fenced_cib.c b/daemons/fenced/fenced_cib.c index 20ea4a5f4d..147c4f9268 100644 --- a/daemons/fenced/fenced_cib.c +++ b/daemons/fenced/fenced_cib.c @@ -1,618 +1,618 @@ /* * Copyright 2009-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include // xmlNode #include // xmlXPathObject, etc. #include #include #include #include #include #include static xmlNode *local_cib = NULL; static cib_t *cib_api = NULL; static bool have_cib_devices = FALSE; /*! * \internal * \brief Check whether a node has a specific attribute name/value * * \param[in] node Name of node to check * \param[in] name Name of an attribute to look for * \param[in] value The value the named attribute needs to be set to in order to be considered a match * * \return TRUE if the locally cached CIB has the specified node attribute */ gboolean node_has_attr(const char *node, const char *name, const char *value) { GString *xpath = NULL; xmlNode *match; CRM_CHECK((local_cib != NULL) && (node != NULL) && (name != NULL) && (value != NULL), return FALSE); /* Search for the node's attributes in the CIB. While the schema allows * multiple sets of instance attributes, and allows instance attributes to * use id-ref to reference values elsewhere, that is intended for resources, * so we ignore that here. */ xpath = g_string_sized_new(256); pcmk__g_strcat(xpath, "//" PCMK_XE_NODES "/" PCMK_XE_NODE "[@" PCMK_XA_UNAME "='", node, "']" "/" PCMK_XE_INSTANCE_ATTRIBUTES "/" PCMK_XE_NVPAIR "[@" PCMK_XA_NAME "='", name, "' " "and @" PCMK_XA_VALUE "='", value, "']", NULL); match = pcmk__xpath_find_one(local_cib->doc, xpath->str, LOG_NEVER); g_string_free(xpath, TRUE); return (match != NULL); } static void add_topology_level(xmlNode *match) { char *desc = NULL; pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; CRM_CHECK(match != NULL, return); fenced_register_level(match, &desc, &result); fenced_send_config_notification(STONITH_OP_LEVEL_ADD, &result, desc); pcmk__reset_result(&result); free(desc); } static void topology_remove_helper(const char *node, int level) { char *desc = NULL; pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; xmlNode *data = pcmk__xe_create(NULL, PCMK_XE_FENCING_LEVEL); pcmk__xe_set(data, PCMK__XA_ST_ORIGIN, __func__); pcmk__xe_set_int(data, PCMK_XA_INDEX, level); pcmk__xe_set(data, PCMK_XA_TARGET, node); fenced_unregister_level(data, &desc, &result); fenced_send_config_notification(STONITH_OP_LEVEL_DEL, &result, desc); pcmk__reset_result(&result); pcmk__xml_free(data); free(desc); } static void remove_topology_level(xmlNode *match) { int index = 0; char *key = NULL; CRM_CHECK(match != NULL, return); key = stonith_level_key(match, fenced_target_by_unknown); pcmk__xe_get_int(match, PCMK_XA_INDEX, &index); topology_remove_helper(key, index); free(key); } static void register_fencing_topology(xmlXPathObjectPtr xpathObj) { int max = pcmk__xpath_num_results(xpathObj); for (int lpc = 0; lpc < max; lpc++) { xmlNode *match = pcmk__xpath_result(xpathObj, lpc); if (match == NULL) { continue; } remove_topology_level(match); add_topology_level(match); } } /* Fencing */ void fencing_topology_init(void) { xmlXPathObject *xpathObj = NULL; const char *xpath = "//" PCMK_XE_FENCING_LEVEL; crm_trace("Full topology refresh"); free_topology_list(); init_topology_list(); /* Grab everything */ xpathObj = pcmk__xpath_search(local_cib->doc, xpath); register_fencing_topology(xpathObj); xmlXPathFreeObject(xpathObj); } #define XPATH_WATCHDOG_TIMEOUT "//" PCMK_XE_NVPAIR \ "[@" PCMK_XA_NAME "='" \ PCMK_OPT_STONITH_WATCHDOG_TIMEOUT "']" static void update_stonith_watchdog_timeout_ms(xmlNode *cib) { xmlNode *stonith_watchdog_xml = NULL; const char *value = NULL; // @TODO An XPath search can't handle multiple instances or rules stonith_watchdog_xml = pcmk__xpath_find_one(cib->doc, XPATH_WATCHDOG_TIMEOUT, LOG_NEVER); if (stonith_watchdog_xml) { value = pcmk__xe_get(stonith_watchdog_xml, PCMK_XA_VALUE); } if ((value != NULL) && ((pcmk__parse_ms(value, &stonith_watchdog_timeout_ms) != pcmk_rc_ok) || (stonith_watchdog_timeout_ms < 0))) { stonith_watchdog_timeout_ms = pcmk__auto_stonith_watchdog_timeout(); } } /*! * \internal * \brief Update all STONITH device definitions based on current CIB */ static void cib_devices_update(void) { GHashTableIter iter; stonith_device_t *device = NULL; crm_info("Updating devices to version %s.%s.%s", pcmk__xe_get(local_cib, PCMK_XA_ADMIN_EPOCH), pcmk__xe_get(local_cib, PCMK_XA_EPOCH), pcmk__xe_get(local_cib, PCMK_XA_NUM_UPDATES)); g_hash_table_iter_init(&iter, device_list); while (g_hash_table_iter_next(&iter, NULL, (void **)&device)) { if (device->cib_registered) { device->dirty = TRUE; } } /* have list repopulated if cib has a watchdog-fencing-resource TODO: keep a cached list for queries happening while we are refreshing */ g_list_free_full(stonith_watchdog_targets, free); stonith_watchdog_targets = NULL; fenced_scheduler_run(local_cib); g_hash_table_iter_init(&iter, device_list); while (g_hash_table_iter_next(&iter, NULL, (void **)&device)) { if (device->dirty) { g_hash_table_iter_remove(&iter); } } } static void update_cib_stonith_devices(const char *event, xmlNode * msg) { int format = 1; xmlNode *wrapper = pcmk__xe_first_child(msg, PCMK__XE_CIB_UPDATE_RESULT, NULL, NULL); xmlNode *patchset = pcmk__xe_first_child(wrapper, NULL, NULL, NULL); char *reason = NULL; CRM_CHECK(patchset != NULL, return); pcmk__xe_get_int(patchset, PCMK_XA_FORMAT, &format); if (format != 2) { crm_warn("Unknown patch format: %d", format); return; } for (xmlNode *change = pcmk__xe_first_child(patchset, NULL, NULL, NULL); change != NULL; change = pcmk__xe_next(change, NULL)) { const char *op = pcmk__xe_get(change, PCMK_XA_OPERATION); const char *xpath = pcmk__xe_get(change, PCMK_XA_PATH); const char *shortpath = NULL; if (pcmk__str_eq(op, PCMK_VALUE_MOVE, pcmk__str_null_matches) || (strstr(xpath, "/" PCMK_XE_STATUS) != NULL)) { continue; } if (pcmk__str_eq(op, PCMK_VALUE_DELETE, pcmk__str_none) && (strstr(xpath, "/" PCMK_XE_PRIMITIVE) != NULL)) { const char *rsc_id = NULL; char *search = NULL; char *mutable = NULL; if ((strstr(xpath, PCMK_XE_INSTANCE_ATTRIBUTES) != NULL) || (strstr(xpath, PCMK_XE_META_ATTRIBUTES) != NULL)) { reason = pcmk__str_copy("(meta) attribute deleted from " "resource"); break; } mutable = pcmk__str_copy(xpath); rsc_id = strstr(mutable, PCMK_XE_PRIMITIVE "[@" PCMK_XA_ID "=\'"); if (rsc_id != NULL) { rsc_id += strlen(PCMK_XE_PRIMITIVE "[@" PCMK_XA_ID "=\'"); search = strchr(rsc_id, '\''); } if (search != NULL) { *search = 0; stonith_device_remove(rsc_id, true); /* watchdog_device_update called afterwards to fall back to implicit definition if needed */ } else { crm_warn("Ignoring malformed CIB update (resource deletion)"); } free(mutable); } else if (strstr(xpath, "/" PCMK_XE_RESOURCES) || strstr(xpath, "/" PCMK_XE_CONSTRAINTS) || strstr(xpath, "/" PCMK_XE_RSC_DEFAULTS)) { shortpath = strrchr(xpath, '/'); pcmk__assert(shortpath != NULL); reason = pcmk__assert_asprintf("%s %s", op, shortpath + 1); break; } } if (reason != NULL) { crm_info("Updating device list from CIB: %s", reason); cib_devices_update(); free(reason); } else { crm_trace("No updates for device list found in CIB"); } } static void watchdog_device_update(void) { if (stonith_watchdog_timeout_ms > 0) { if (!g_hash_table_lookup(device_list, STONITH_WATCHDOG_ID) && !stonith_watchdog_targets) { /* getting here watchdog-fencing enabled, no device there yet and reason isn't stonith_watchdog_targets preventing that */ int rc; xmlNode *xml; xml = create_device_registration_xml( STONITH_WATCHDOG_ID, st_namespace_internal, STONITH_WATCHDOG_AGENT, NULL, /* stonith_device_register will add our own name as PCMK_STONITH_HOST_LIST param so we can skip that here */ NULL); rc = stonith_device_register(xml, TRUE); pcmk__xml_free(xml); if (rc != pcmk_ok) { rc = pcmk_legacy2rc(rc); exit_code = CRM_EX_FATAL; - crm_crit("Cannot register watchdog pseudo fence agent: %s", - pcmk_rc_str(rc)); + pcmk__crit("Cannot register watchdog pseudo fence agent: %s", + pcmk_rc_str(rc)); stonith_shutdown(0); } } } else if (g_hash_table_lookup(device_list, STONITH_WATCHDOG_ID) != NULL) { /* be silent if no device - todo parameter to stonith_device_remove */ stonith_device_remove(STONITH_WATCHDOG_ID, true); } } /*! * \internal * \brief Query the full CIB * * \return Standard Pacemaker return code */ static int fenced_query_cib(void) { int rc = pcmk_ok; crm_trace("Re-requesting full CIB"); rc = cib_api->cmds->query(cib_api, NULL, &local_cib, cib_sync_call); rc = pcmk_legacy2rc(rc); if (rc == pcmk_rc_ok) { pcmk__assert(local_cib != NULL); } else { crm_err("Couldn't retrieve the CIB: %s " QB_XS " rc=%d", pcmk_rc_str(rc), rc); } return rc; } static void update_fencing_topology(const char *event, xmlNode *msg) { xmlNode *wrapper = pcmk__xe_first_child(msg, PCMK__XE_CIB_UPDATE_RESULT, NULL, NULL); xmlNode *patchset = pcmk__xe_first_child(wrapper, NULL, NULL, NULL); int format = 1; int add[] = { 0, 0, 0 }; int del[] = { 0, 0, 0 }; CRM_CHECK(patchset != NULL, return); pcmk__xe_get_int(patchset, PCMK_XA_FORMAT, &format); if (format != 2) { crm_warn("Unknown patch format: %d", format); return; } pcmk__xml_patchset_versions(patchset, del, add); for (xmlNode *change = pcmk__xe_first_child(patchset, NULL, NULL, NULL); change != NULL; change = pcmk__xe_next(change, NULL)) { const char *op = pcmk__xe_get(change, PCMK_XA_OPERATION); const char *xpath = pcmk__xe_get(change, PCMK_XA_PATH); if (op == NULL) { continue; } if (strstr(xpath, "/" PCMK_XE_FENCING_LEVEL) != NULL) { // Change to a specific entry crm_trace("Handling %s operation %d.%d.%d for %s", op, add[0], add[1], add[2], xpath); if (strcmp(op, PCMK_VALUE_DELETE) == 0) { /* We have only path and ID, which is not enough info to remove * a specific entry. Re-initialize the whole topology. */ crm_info("Re-initializing fencing topology after %s operation " "%d.%d.%d for %s", op, add[0], add[1], add[2], xpath); fencing_topology_init(); return; } if (strcmp(op, PCMK_VALUE_CREATE) == 0) { add_topology_level(change->children); } else if (strcmp(op, PCMK_VALUE_MODIFY) == 0) { xmlNode *match = pcmk__xe_first_child(change, PCMK_XE_CHANGE_RESULT, NULL, NULL); if (match != NULL) { remove_topology_level(match->children); add_topology_level(match->children); } } continue; } if (strstr(xpath, "/" PCMK_XE_FENCING_TOPOLOGY) != NULL) { // Change to the topology in general crm_info("Re-initializing fencing topology after top-level " "%s operation %d.%d.%d for %s", op, add[0], add[1], add[2], xpath); fencing_topology_init(); return; } if ((strstr(xpath, "/" PCMK_XE_CONFIGURATION) != NULL) && (pcmk__xe_first_child(change, PCMK_XE_FENCING_TOPOLOGY, NULL, NULL) != NULL) && pcmk__str_any_of(op, PCMK_VALUE_CREATE, PCMK_VALUE_DELETE, NULL)) { // Topology was created or entire configuration section was deleted crm_info("Re-initializing fencing topology after top-level " "%s operation %d.%d.%d for %s", op, add[0], add[1], add[2], xpath); fencing_topology_init(); return; } crm_trace("Nothing for us in %s operation %d.%d.%d for %s", op, add[0], add[1], add[2], xpath); } } static void update_cib_cache_cb(const char *event, xmlNode * msg) { long long timeout_ms_saved = stonith_watchdog_timeout_ms; bool need_full_refresh = false; if(!have_cib_devices) { crm_trace("Skipping updates until we get a full dump"); return; } else if(msg == NULL) { crm_trace("Missing %s update", event); return; } /* Maintain a local copy of the CIB so that we have full access * to device definitions, location constraints, and node attributes */ if (local_cib != NULL) { int rc = pcmk_ok; xmlNode *wrapper = NULL; xmlNode *patchset = NULL; pcmk__xe_get_int(msg, PCMK__XA_CIB_RC, &rc); if (rc != pcmk_ok) { return; } wrapper = pcmk__xe_first_child(msg, PCMK__XE_CIB_UPDATE_RESULT, NULL, NULL); patchset = pcmk__xe_first_child(wrapper, NULL, NULL, NULL); rc = xml_apply_patchset(local_cib, patchset, TRUE); switch (rc) { case pcmk_ok: case -pcmk_err_old_data: break; case -pcmk_err_diff_resync: case -pcmk_err_diff_failed: crm_notice("[%s] Patch aborted: %s (%d)", event, pcmk_strerror(rc), rc); pcmk__xml_free(local_cib); local_cib = NULL; break; default: crm_warn("[%s] ABORTED: %s (%d)", event, pcmk_strerror(rc), rc); pcmk__xml_free(local_cib); local_cib = NULL; } } if (local_cib == NULL) { if (fenced_query_cib() != pcmk_rc_ok) { return; } need_full_refresh = true; } pcmk__refresh_node_caches_from_cib(local_cib); update_stonith_watchdog_timeout_ms(local_cib); if (timeout_ms_saved != stonith_watchdog_timeout_ms) { need_full_refresh = true; } if (need_full_refresh) { fencing_topology_init(); cib_devices_update(); } else { // Partial refresh update_fencing_topology(event, msg); update_cib_stonith_devices(event, msg); } watchdog_device_update(); } static void init_cib_cache_cb(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { crm_info("Updating device list from CIB"); have_cib_devices = TRUE; local_cib = pcmk__xml_copy(NULL, output); pcmk__refresh_node_caches_from_cib(local_cib); update_stonith_watchdog_timeout_ms(local_cib); fencing_topology_init(); cib_devices_update(); watchdog_device_update(); } static void cib_connection_destroy(gpointer user_data) { if (stonith_shutdown_flag) { crm_info("Connection to the CIB manager closed"); return; } else { - crm_crit("Lost connection to the CIB manager, shutting down"); + pcmk__crit("Lost connection to the CIB manager, shutting down"); } if (cib_api) { cib_api->cmds->signoff(cib_api); } stonith_shutdown(0); } /*! * \internal * \brief Disconnect from CIB manager */ void fenced_cib_cleanup(void) { if (cib_api != NULL) { cib_api->cmds->del_notify_callback(cib_api, PCMK__VALUE_CIB_DIFF_NOTIFY, update_cib_cache_cb); cib__clean_up_connection(&cib_api); } pcmk__xml_free(local_cib); local_cib = NULL; } void setup_cib(void) { int rc, retries = 0; cib_api = cib_new(); if (cib_api == NULL) { crm_err("No connection to the CIB manager"); return; } do { sleep(retries); rc = cib_api->cmds->signon(cib_api, crm_system_name, cib_command); } while (rc == -ENOTCONN && ++retries < 5); if (rc != pcmk_ok) { crm_err("Could not connect to the CIB manager: %s (%d)", pcmk_strerror(rc), rc); return; } rc = cib_api->cmds->add_notify_callback(cib_api, PCMK__VALUE_CIB_DIFF_NOTIFY, update_cib_cache_cb); if (rc != pcmk_ok) { crm_err("Could not set CIB notification callback"); return; } rc = cib_api->cmds->query(cib_api, NULL, NULL, cib_none); cib_api->cmds->register_callback(cib_api, rc, 120, FALSE, NULL, "init_cib_cache_cb", init_cib_cache_cb); cib_api->cmds->set_connection_dnotify(cib_api, cib_connection_destroy); crm_info("Watching for fencing topology changes"); } diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c index e0d2078711..6bd492942b 100644 --- a/daemons/fenced/pacemaker-fenced.c +++ b/daemons/fenced/pacemaker-fenced.c @@ -1,667 +1,667 @@ /* * Copyright 2009-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include // PRIu32, PRIx32 #include #include #include #include #include #include #include #include #include #include #include #include #define SUMMARY "daemon for executing fencing devices in a Pacemaker cluster" // @TODO This should be guint long long stonith_watchdog_timeout_ms = 0; GList *stonith_watchdog_targets = NULL; static GMainLoop *mainloop = NULL; gboolean stonith_shutdown_flag = FALSE; static qb_ipcs_service_t *ipcs = NULL; static pcmk__output_t *out = NULL; pcmk__supported_format_t formats[] = { PCMK__SUPPORTED_FORMAT_NONE, PCMK__SUPPORTED_FORMAT_TEXT, PCMK__SUPPORTED_FORMAT_XML, { NULL, NULL, NULL } }; static struct { gboolean stand_alone; gchar **log_files; } options; crm_exit_t exit_code = CRM_EX_OK; static void stonith_cleanup(void); static int32_t st_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { if (stonith_shutdown_flag) { crm_info("Ignoring new client [%d] during shutdown", pcmk__client_pid(c)); return -ECONNREFUSED; } if (pcmk__new_client(c, uid, gid) == NULL) { return -ENOMEM; } return 0; } /* Exit code means? */ static int32_t st_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size) { uint32_t id = 0; uint32_t flags = 0; uint32_t call_options = st_opt_none; xmlNode *request = NULL; pcmk__client_t *c = pcmk__find_client(qbc); const char *op = NULL; int rc = pcmk_rc_ok; if (c == NULL) { crm_info("Invalid client: %p", qbc); return 0; } request = pcmk__client_data2xml(c, data, &id, &flags); if (request == NULL) { pcmk__ipc_send_ack(c, id, flags, PCMK__XE_NACK, NULL, CRM_EX_PROTOCOL); return 0; } op = pcmk__xe_get(request, PCMK__XA_CRM_TASK); if(pcmk__str_eq(op, CRM_OP_RM_NODE_CACHE, pcmk__str_casei)) { pcmk__xe_set(request, PCMK__XA_T, PCMK__VALUE_STONITH_NG); pcmk__xe_set(request, PCMK__XA_ST_OP, op); pcmk__xe_set(request, PCMK__XA_ST_CLIENTID, c->id); pcmk__xe_set(request, PCMK__XA_ST_CLIENTNAME, pcmk__client_name(c)); pcmk__xe_set(request, PCMK__XA_ST_CLIENTNODE, fenced_get_local_node()); pcmk__cluster_send_message(NULL, pcmk_ipc_fenced, request); pcmk__xml_free(request); return 0; } if (c->name == NULL) { const char *value = pcmk__xe_get(request, PCMK__XA_ST_CLIENTNAME); c->name = pcmk__assert_asprintf("%s.%u", pcmk__s(value, "unknown"), c->pid); } rc = pcmk__xe_get_flags(request, PCMK__XA_ST_CALLOPT, &call_options, st_opt_none); if (rc != pcmk_rc_ok) { crm_warn("Couldn't parse options from IPC request: %s", pcmk_rc_str(rc)); } crm_trace("Flags %#08" PRIx32 "/%#08x for command %" PRIu32 " from client %s", flags, call_options, id, pcmk__client_name(c)); if (pcmk__is_set(call_options, st_opt_sync_call)) { pcmk__assert(pcmk__is_set(flags, crm_ipc_client_response)); CRM_LOG_ASSERT(c->request_id == 0); /* This means the client has two synchronous events in-flight */ c->request_id = id; /* Reply only to the last one */ } pcmk__xe_set(request, PCMK__XA_ST_CLIENTID, c->id); pcmk__xe_set(request, PCMK__XA_ST_CLIENTNAME, pcmk__client_name(c)); pcmk__xe_set(request, PCMK__XA_ST_CLIENTNODE, fenced_get_local_node()); crm_log_xml_trace(request, "ipc-received"); stonith_command(c, id, flags, request, NULL); pcmk__xml_free(request); return 0; } /* Error code means? */ static int32_t st_ipc_closed(qb_ipcs_connection_t * c) { pcmk__client_t *client = pcmk__find_client(c); if (client == NULL) { return 0; } crm_trace("Connection %p closed", c); pcmk__free_client(client); /* 0 means: yes, go ahead and destroy the connection */ return 0; } static void st_ipc_destroy(qb_ipcs_connection_t * c) { crm_trace("Connection %p destroyed", c); st_ipc_closed(c); } static void stonith_peer_callback(xmlNode * msg, void *private_data) { const char *remote_peer = pcmk__xe_get(msg, PCMK__XA_SRC); const char *op = pcmk__xe_get(msg, PCMK__XA_ST_OP); if (pcmk__str_eq(op, STONITH_OP_POKE, pcmk__str_none)) { return; } crm_log_xml_trace(msg, "Peer[inbound]"); stonith_command(NULL, 0, 0, msg, remote_peer); } #if SUPPORT_COROSYNC static void handle_cpg_message(cpg_handle_t handle, const struct cpg_name *groupName, uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) { xmlNode *xml = NULL; const char *from = NULL; char *data = pcmk__cpg_message_data(handle, nodeid, pid, msg, &from); if(data == NULL) { return; } xml = pcmk__xml_parse(data); if (xml == NULL) { crm_err("Invalid XML: '%.120s'", data); free(data); return; } pcmk__xe_set(xml, PCMK__XA_SRC, from); stonith_peer_callback(xml, NULL); pcmk__xml_free(xml); free(data); } static void stonith_peer_cs_destroy(gpointer user_data) { - crm_crit("Lost connection to cluster layer, shutting down"); + pcmk__crit("Lost connection to cluster layer, shutting down"); stonith_shutdown(0); } #endif void do_local_reply(const xmlNode *notify_src, pcmk__client_t *client, int call_options) { /* send callback to originating child */ int local_rc = pcmk_rc_ok; int rid = 0; uint32_t ipc_flags = crm_ipc_server_event; if (pcmk__is_set(call_options, st_opt_sync_call)) { CRM_LOG_ASSERT(client->request_id); rid = client->request_id; client->request_id = 0; ipc_flags = crm_ipc_flags_none; } local_rc = pcmk__ipc_send_xml(client, rid, notify_src, ipc_flags); if (local_rc == pcmk_rc_ok) { crm_trace("Sent response %d to client %s", rid, pcmk__client_name(client)); } else { crm_warn("%synchronous reply to client %s failed: %s", (pcmk__is_set(call_options, st_opt_sync_call)? "S" : "As"), pcmk__client_name(client), pcmk_rc_str(local_rc)); } } uint64_t get_stonith_flag(const char *name) { if (pcmk__str_eq(name, PCMK__VALUE_ST_NOTIFY_FENCE, pcmk__str_none)) { return st_callback_notify_fence; } else if (pcmk__str_eq(name, STONITH_OP_DEVICE_ADD, pcmk__str_casei)) { return st_callback_device_add; } else if (pcmk__str_eq(name, STONITH_OP_DEVICE_DEL, pcmk__str_casei)) { return st_callback_device_del; } else if (pcmk__str_eq(name, PCMK__VALUE_ST_NOTIFY_HISTORY, pcmk__str_none)) { return st_callback_notify_history; } else if (pcmk__str_eq(name, PCMK__VALUE_ST_NOTIFY_HISTORY_SYNCED, pcmk__str_none)) { return st_callback_notify_history_synced; } return st_callback_unknown; } static void stonith_notify_client(gpointer key, gpointer value, gpointer user_data) { const xmlNode *update_msg = user_data; pcmk__client_t *client = value; const char *type = NULL; CRM_CHECK(client != NULL, return); CRM_CHECK(update_msg != NULL, return); type = pcmk__xe_get(update_msg, PCMK__XA_SUBT); CRM_CHECK(type != NULL, crm_log_xml_err(update_msg, "notify"); return); if (client->ipcs == NULL) { crm_trace("Skipping client with NULL channel"); return; } if (pcmk__is_set(client->flags, get_stonith_flag(type))) { int rc = pcmk__ipc_send_xml(client, 0, update_msg, crm_ipc_server_event); if (rc != pcmk_rc_ok) { crm_warn("%s notification of client %s failed: %s " QB_XS " id=%.8s rc=%d", type, pcmk__client_name(client), pcmk_rc_str(rc), client->id, rc); } else { crm_trace("Sent %s notification to client %s", type, pcmk__client_name(client)); } } } void do_stonith_async_timeout_update(const char *client_id, const char *call_id, int timeout) { pcmk__client_t *client = NULL; xmlNode *notify_data = NULL; if (!timeout || !call_id || !client_id) { return; } client = pcmk__find_client_by_id(client_id); if (!client) { return; } notify_data = pcmk__xe_create(NULL, PCMK__XE_ST_ASYNC_TIMEOUT_VALUE); pcmk__xe_set(notify_data, PCMK__XA_T, PCMK__VALUE_ST_ASYNC_TIMEOUT_VALUE); pcmk__xe_set(notify_data, PCMK__XA_ST_CALLID, call_id); pcmk__xe_set_int(notify_data, PCMK__XA_ST_TIMEOUT, timeout); crm_trace("timeout update is %d for client %s and call id %s", timeout, client_id, call_id); if (client) { pcmk__ipc_send_xml(client, 0, notify_data, crm_ipc_server_event); } pcmk__xml_free(notify_data); } /*! * \internal * \brief Notify relevant IPC clients of a fencing operation result * * \param[in] type Notification type * \param[in] result Result of fencing operation (assume success if NULL) * \param[in] data If not NULL, add to notification as call data */ void fenced_send_notification(const char *type, const pcmk__action_result_t *result, xmlNode *data) { /* TODO: Standardize the contents of data */ xmlNode *update_msg = pcmk__xe_create(NULL, PCMK__XE_NOTIFY); CRM_LOG_ASSERT(type != NULL); pcmk__xe_set(update_msg, PCMK__XA_T, PCMK__VALUE_ST_NOTIFY); pcmk__xe_set(update_msg, PCMK__XA_SUBT, type); pcmk__xe_set(update_msg, PCMK__XA_ST_OP, type); stonith__xe_set_result(update_msg, result); if (data != NULL) { xmlNode *wrapper = pcmk__xe_create(update_msg, PCMK__XE_ST_CALLDATA); pcmk__xml_copy(wrapper, data); } crm_trace("Notifying clients"); pcmk__foreach_ipc_client(stonith_notify_client, update_msg); pcmk__xml_free(update_msg); crm_trace("Notify complete"); } /*! * \internal * \brief Send notifications for a configuration change to subscribed clients * * \param[in] op Notification type (\c STONITH_OP_DEVICE_ADD, * \c STONITH_OP_DEVICE_DEL, \c STONITH_OP_LEVEL_ADD, or * \c STONITH_OP_LEVEL_DEL) * \param[in] result Operation result * \param[in] desc Description of what changed (either device ID or string * representation of level * ([])) */ void fenced_send_config_notification(const char *op, const pcmk__action_result_t *result, const char *desc) { xmlNode *notify_data = pcmk__xe_create(NULL, op); pcmk__xe_set(notify_data, PCMK__XA_ST_DEVICE_ID, desc); fenced_send_notification(op, result, notify_data); pcmk__xml_free(notify_data); } /*! * \internal * \brief Check whether a node does watchdog-fencing * * \param[in] node Name of node to check * * \return TRUE if node found in stonith_watchdog_targets * or stonith_watchdog_targets is empty indicating * all nodes are doing watchdog-fencing */ gboolean node_does_watchdog_fencing(const char *node) { return ((stonith_watchdog_targets == NULL) || pcmk__str_in_list(node, stonith_watchdog_targets, pcmk__str_casei)); } void stonith_shutdown(int nsig) { crm_info("Terminating with %d clients", pcmk__ipc_client_count()); stonith_shutdown_flag = TRUE; if (mainloop != NULL && g_main_loop_is_running(mainloop)) { g_main_loop_quit(mainloop); } } static void stonith_cleanup(void) { fenced_cib_cleanup(); if (ipcs) { qb_ipcs_destroy(ipcs); } pcmk__cluster_destroy_node_caches(); pcmk__client_cleanup(); free_stonith_remote_op_list(); free_topology_list(); free_device_list(); free_metadata_cache(); fenced_unregister_handlers(); } struct qb_ipcs_service_handlers ipc_callbacks = { .connection_accept = st_ipc_accept, .connection_created = NULL, .msg_process = st_ipc_dispatch, .connection_closed = st_ipc_closed, .connection_destroyed = st_ipc_destroy }; /*! * \internal * \brief Callback for peer status changes * * \param[in] type What changed * \param[in] node What peer had the change * \param[in] data Previous value of what changed */ static void st_peer_update_callback(enum pcmk__node_update type, pcmk__node_status_t *node, const void *data) { if ((type != pcmk__node_update_processes) && !pcmk__is_set(node->flags, pcmk__node_status_remote)) { /* * This is a hack until we can send to a nodeid and/or we fix node name lookups * These messages are ignored in stonith_peer_callback() */ xmlNode *query = pcmk__xe_create(NULL, PCMK__XE_STONITH_COMMAND); pcmk__xe_set(query, PCMK__XA_T, PCMK__VALUE_STONITH_NG); pcmk__xe_set(query, PCMK__XA_ST_OP, STONITH_OP_POKE); crm_debug("Broadcasting our uname because of node %" PRIu32, node->cluster_layer_id); pcmk__cluster_send_message(NULL, pcmk_ipc_fenced, query); pcmk__xml_free(query); } } /* @COMPAT Deprecated since 2.1.8. Use pcmk_list_fence_attrs() or * crm_resource --list-options=fencing instead of querying daemon metadata. * * NOTE: pcs (as of at least 0.11.8) uses this */ static int fencer_metadata(void) { const char *name = PCMK__SERVER_FENCED; const char *desc_short = N_("Instance attributes available for all " "\"stonith\"-class resources"); const char *desc_long = N_("Instance attributes available for all " "\"stonith\"-class resources and used by " "Pacemaker's fence daemon"); return pcmk__daemon_metadata(out, name, desc_short, desc_long, pcmk__opt_fencing); } static GOptionEntry entries[] = { { "stand-alone", 's', G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE, &options.stand_alone, N_("Intended for use in regression testing only"), NULL }, { "logfile", 'l', G_OPTION_FLAG_NONE, G_OPTION_ARG_FILENAME_ARRAY, &options.log_files, N_("Send logs to the additional named logfile"), NULL }, { NULL } }; static GOptionContext * build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) { GOptionContext *context = NULL; context = pcmk__build_arg_context(args, "text (default), xml", group, NULL); pcmk__add_main_args(context, entries); return context; } int main(int argc, char **argv) { int rc = pcmk_rc_ok; pcmk_cluster_t *cluster = NULL; crm_ipc_t *old_instance = NULL; GError *error = NULL; GOptionGroup *output_group = NULL; pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY); gchar **processed_args = pcmk__cmdline_preproc(argv, "l"); GOptionContext *context = build_arg_context(args, &output_group); crm_log_preinit(NULL, argc, argv); pcmk__register_formats(output_group, formats); if (!g_option_context_parse_strv(context, &processed_args, &error)) { exit_code = CRM_EX_USAGE; goto done; } rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv); if (rc != pcmk_rc_ok) { exit_code = CRM_EX_ERROR; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Error creating output format %s: %s", args->output_ty, pcmk_rc_str(rc)); goto done; } if (args->version) { out->version(out, false); goto done; } if ((g_strv_length(processed_args) >= 2) && pcmk__str_eq(processed_args[1], "metadata", pcmk__str_none)) { rc = fencer_metadata(); if (rc != pcmk_rc_ok) { exit_code = CRM_EX_FATAL; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Unable to display metadata: %s", pcmk_rc_str(rc)); } goto done; } // Open additional log files pcmk__add_logfiles(options.log_files, out); crm_log_init(NULL, LOG_INFO + args->verbosity, TRUE, (args->verbosity > 0), argc, argv, FALSE); crm_notice("Starting Pacemaker fencer"); old_instance = crm_ipc_new("stonith-ng", 0); if (old_instance == NULL) { /* crm_ipc_new() will have already logged an error message with * crm_err() */ exit_code = CRM_EX_FATAL; goto done; } if (pcmk__connect_generic_ipc(old_instance) == pcmk_rc_ok) { // IPC endpoint already up crm_ipc_close(old_instance); crm_ipc_destroy(old_instance); - crm_crit("Aborting start-up because another fencer instance is " - "already active"); + pcmk__crit("Aborting start-up because another fencer instance is " + "already active"); goto done; } else { // Not up or not authentic, we'll proceed either way crm_ipc_destroy(old_instance); old_instance = NULL; } mainloop_add_signal(SIGTERM, stonith_shutdown); pcmk__cluster_init_node_caches(); rc = fenced_scheduler_init(); if (rc != pcmk_rc_ok) { exit_code = CRM_EX_FATAL; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Error initializing scheduler data: %s", pcmk_rc_str(rc)); goto done; } cluster = pcmk_cluster_new(); #if SUPPORT_COROSYNC if (pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync) { pcmk_cluster_set_destroy_fn(cluster, stonith_peer_cs_destroy); pcmk_cpg_set_deliver_fn(cluster, handle_cpg_message); pcmk_cpg_set_confchg_fn(cluster, pcmk__cpg_confchg_cb); } #endif // SUPPORT_COROSYNC pcmk__cluster_set_status_callback(&st_peer_update_callback); if (pcmk_cluster_connect(cluster) != pcmk_rc_ok) { exit_code = CRM_EX_FATAL; - crm_crit("Cannot sign in to the cluster... terminating"); + pcmk__crit("Cannot sign in to the cluster... terminating"); goto done; } fenced_set_local_node(cluster->priv->node_name); if (!options.stand_alone) { setup_cib(); } init_device_list(); init_topology_list(); pcmk__serve_fenced_ipc(&ipcs, &ipc_callbacks); // Create the mainloop and run it... mainloop = g_main_loop_new(NULL, FALSE); crm_notice("Pacemaker fencer successfully started and accepting connections"); g_main_loop_run(mainloop); done: g_strfreev(processed_args); pcmk__free_arg_context(context); g_strfreev(options.log_files); stonith_cleanup(); pcmk_cluster_free(cluster); fenced_scheduler_cleanup(); pcmk__output_and_clear_error(&error, out); if (out != NULL) { out->finish(out, exit_code, true, NULL); pcmk__output_free(out); } pcmk__unregister_formats(); crm_exit(exit_code); } diff --git a/daemons/pacemakerd/pcmkd_corosync.c b/daemons/pacemakerd/pcmkd_corosync.c index 74bc5299bd..4cf3e2df27 100644 --- a/daemons/pacemakerd/pcmkd_corosync.c +++ b/daemons/pacemakerd/pcmkd_corosync.c @@ -1,375 +1,376 @@ /* * Copyright 2010-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include "pacemakerd.h" #include "pcmkd_corosync.h" #include #include /* for calls to stat() */ #include /* For basename() and dirname() */ #include #include /* For getpwname() */ #include #include #include #include #include #include /* for crm_ipc_is_authentic_process */ #include #include /* PCMK__SPECIAL_PID* */ static corosync_cfg_handle_t cfg_handle = 0; static mainloop_timer_t *reconnect_timer = NULL; /* =::=::=::= CFG - Shutdown stuff =::=::=::= */ static void cfg_shutdown_callback(corosync_cfg_handle_t h, corosync_cfg_shutdown_flags_t flags) { crm_info("Corosync wants to shut down: %s", (flags == COROSYNC_CFG_SHUTDOWN_FLAG_IMMEDIATE) ? "immediate" : (flags == COROSYNC_CFG_SHUTDOWN_FLAG_REGARDLESS) ? "forced" : "optional"); /* Never allow corosync to shut down while we're running */ corosync_cfg_replyto_shutdown(h, COROSYNC_CFG_SHUTDOWN_FLAG_NO); } static corosync_cfg_callbacks_t cfg_callbacks = { .corosync_cfg_shutdown_callback = cfg_shutdown_callback, }; static int pcmk_cfg_dispatch(gpointer user_data) { corosync_cfg_handle_t *handle = (corosync_cfg_handle_t *) user_data; cs_error_t rc = corosync_cfg_dispatch(*handle, CS_DISPATCH_ALL); if (rc != CS_OK) { return -1; } return 0; } static void close_cfg(void) { if (cfg_handle != 0) { #ifdef HAVE_COROSYNC_CFG_TRACKSTART /* Ideally, we would call corosync_cfg_trackstop(cfg_handle) here, but a * bug in corosync 3.1.1 and 3.1.2 makes it hang forever. Thankfully, * it's not necessary since we exit immediately after this. */ #endif corosync_cfg_finalize(cfg_handle); cfg_handle = 0; } } static gboolean cluster_reconnect_cb(gpointer data) { if (cluster_connect_cfg()) { mainloop_timer_del(reconnect_timer); reconnect_timer = NULL; crm_notice("Cluster reconnect succeeded"); pacemakerd_read_config(); restart_cluster_subdaemons(); return G_SOURCE_REMOVE; } else { crm_info("Cluster reconnect failed " "(connection will be reattempted once per second)"); } /* * In theory this will continue forever. In practice the CIB connection from * attrd will timeout and shut down Pacemaker when it gets bored. */ return G_SOURCE_CONTINUE; } static void cfg_connection_destroy(gpointer user_data) { crm_warn("Lost connection to cluster layer " "(connection will be reattempted once per second)"); corosync_cfg_finalize(cfg_handle); cfg_handle = 0; reconnect_timer = mainloop_timer_add("corosync reconnect", 1000, TRUE, cluster_reconnect_cb, NULL); mainloop_timer_start(reconnect_timer); } void cluster_disconnect_cfg(void) { close_cfg(); if (reconnect_timer != NULL) { /* The mainloop should be gone by this point, so this isn't necessary, * but cleaning up memory should make valgrind happier. */ mainloop_timer_del(reconnect_timer); reconnect_timer = NULL; } } #define cs_repeat(counter, max, code) do { \ code; \ if(rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) { \ counter++; \ crm_debug("Retrying Corosync operation after %ds", counter); \ sleep(counter); \ } else { \ break; \ } \ } while(counter < max) gboolean cluster_connect_cfg(void) { cs_error_t rc; int fd = -1, retries = 0, rv; uid_t found_uid = 0; gid_t found_gid = 0; pid_t found_pid = 0; uint32_t nodeid; static struct mainloop_fd_callbacks cfg_fd_callbacks = { .dispatch = pcmk_cfg_dispatch, .destroy = cfg_connection_destroy, }; cs_repeat(retries, 30, rc = corosync_cfg_initialize(&cfg_handle, &cfg_callbacks)); if (rc != CS_OK) { - crm_crit("Could not connect to Corosync CFG: %s " QB_XS " rc=%d", - cs_strerror(rc), rc); + pcmk__crit("Could not connect to Corosync CFG: %s " QB_XS " rc=%d", + cs_strerror(rc), rc); return FALSE; } rc = corosync_cfg_fd_get(cfg_handle, &fd); if (rc != CS_OK) { - crm_crit("Could not get Corosync CFG descriptor: %s " QB_XS " rc=%d", - cs_strerror(rc), rc); + pcmk__crit("Could not get Corosync CFG descriptor: %s " QB_XS " rc=%d", + cs_strerror(rc), rc); goto bail; } /* CFG provider run as root (in given user namespace, anyway)? */ if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid, &found_uid, &found_gid))) { - crm_crit("Rejecting Corosync CFG provider because process %lld " - "is running as uid %lld gid %lld, not root", - (long long) PCMK__SPECIAL_PID_AS_0(found_pid), - (long long) found_uid, (long long) found_gid); + pcmk__crit("Rejecting Corosync CFG provider because process %lld " + "is running as uid %lld gid %lld, not root", + (long long) PCMK__SPECIAL_PID_AS_0(found_pid), + (long long) found_uid, (long long) found_gid); goto bail; } else if (rv < 0) { - crm_crit("Could not authenticate Corosync CFG provider: %s " - QB_XS " rc=%d", strerror(-rv), -rv); + pcmk__crit("Could not authenticate Corosync CFG provider: %s " + QB_XS " rc=%d", strerror(-rv), -rv); goto bail; } retries = 0; cs_repeat(retries, 30, rc = corosync_cfg_local_get(cfg_handle, &nodeid)); if (rc != CS_OK) { - crm_crit("Could not get local node ID from Corosync: %s " - QB_XS " rc=%d", cs_strerror(rc), rc); + pcmk__crit("Could not get local node ID from Corosync: %s " + QB_XS " rc=%d", cs_strerror(rc), rc); goto bail; } crm_debug("Corosync reports local node ID is %lu", (unsigned long) nodeid); #ifdef HAVE_COROSYNC_CFG_TRACKSTART retries = 0; cs_repeat(retries, 30, rc = corosync_cfg_trackstart(cfg_handle, 0)); if (rc != CS_OK) { - crm_crit("Could not enable Corosync CFG shutdown tracker: %s " QB_XS " rc=%d", - cs_strerror(rc), rc); + pcmk__crit("Could not enable Corosync CFG shutdown tracker: %s " + QB_XS " rc=%d", + cs_strerror(rc), rc); goto bail; } #endif mainloop_add_fd("corosync-cfg", G_PRIORITY_DEFAULT, fd, &cfg_handle, &cfg_fd_callbacks); return TRUE; bail: corosync_cfg_finalize(cfg_handle); return FALSE; } void pcmkd_shutdown_corosync(void) { cs_error_t rc; if (cfg_handle == 0) { crm_warn("Unable to shut down Corosync: No connection"); return; } crm_info("Asking Corosync to shut down"); rc = corosync_cfg_try_shutdown(cfg_handle, COROSYNC_CFG_SHUTDOWN_FLAG_IMMEDIATE); if (rc == CS_OK) { close_cfg(); } else { crm_warn("Corosync shutdown failed: %s " QB_XS " rc=%d", cs_strerror(rc), rc); } } bool pcmkd_corosync_connected(void) { cpg_handle_t local_handle = 0; cpg_model_v1_data_t cpg_model_info = {CPG_MODEL_V1, NULL, NULL, NULL, 0}; int fd = -1; if (cpg_model_initialize(&local_handle, CPG_MODEL_V1, (cpg_model_data_t *) &cpg_model_info, NULL) != CS_OK) { return false; } if (cpg_fd_get(local_handle, &fd) != CS_OK) { return false; } cpg_finalize(local_handle); return true; } /* =::=::=::= Configuration =::=::=::= */ static int get_config_opt(uint64_t unused, cmap_handle_t object_handle, const char *key, char **value, const char *fallback) { int rc = 0, retries = 0; cs_repeat(retries, 5, rc = cmap_get_string(object_handle, key, value)); if (rc != CS_OK) { crm_trace("Search for %s failed %d, defaulting to %s", key, rc, fallback); pcmk__str_update(value, fallback); } crm_trace("%s: %s", key, *value); return rc; } gboolean pacemakerd_read_config(void) { cs_error_t rc = CS_OK; int retries = 0; cmap_handle_t local_handle; uint64_t config = 0; int fd = -1; uid_t found_uid = 0; gid_t found_gid = 0; pid_t found_pid = 0; int rv; enum pcmk_cluster_layer cluster_layer = pcmk_cluster_layer_unknown; const char *cluster_layer_s = NULL; // There can be only one possibility do { rc = pcmk__init_cmap(&local_handle); if (rc != CS_OK) { retries++; crm_info("Could not connect to Corosync CMAP: %s (retrying in %ds) " QB_XS " rc=%d", cs_strerror(rc), retries, rc); sleep(retries); } else { break; } } while (retries < 5); if (rc != CS_OK) { - crm_crit("Could not connect to Corosync CMAP: %s " - QB_XS " rc=%d", cs_strerror(rc), rc); + pcmk__crit("Could not connect to Corosync CMAP: %s " + QB_XS " rc=%d", cs_strerror(rc), rc); return FALSE; } rc = cmap_fd_get(local_handle, &fd); if (rc != CS_OK) { - crm_crit("Could not get Corosync CMAP descriptor: %s " QB_XS " rc=%d", - cs_strerror(rc), rc); + pcmk__crit("Could not get Corosync CMAP descriptor: %s " QB_XS " rc=%d", + cs_strerror(rc), rc); cmap_finalize(local_handle); return FALSE; } /* CMAP provider run as root (in given user namespace, anyway)? */ if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid, &found_uid, &found_gid))) { - crm_crit("Rejecting Corosync CMAP provider because process %lld " - "is running as uid %lld gid %lld, not root", - (long long) PCMK__SPECIAL_PID_AS_0(found_pid), - (long long) found_uid, (long long) found_gid); + pcmk__crit("Rejecting Corosync CMAP provider because process %lld " + "is running as uid %lld gid %lld, not root", + (long long) PCMK__SPECIAL_PID_AS_0(found_pid), + (long long) found_uid, (long long) found_gid); cmap_finalize(local_handle); return FALSE; } else if (rv < 0) { - crm_crit("Could not authenticate Corosync CMAP provider: %s " - QB_XS " rc=%d", strerror(-rv), -rv); + pcmk__crit("Could not authenticate Corosync CMAP provider: %s " + QB_XS " rc=%d", strerror(-rv), -rv); cmap_finalize(local_handle); return FALSE; } cluster_layer = pcmk_get_cluster_layer(); cluster_layer_s = pcmk_cluster_layer_text(cluster_layer); if (cluster_layer != pcmk_cluster_layer_corosync) { - crm_crit("Expected Corosync cluster layer but detected %s " - QB_XS " cluster_layer=%d", - cluster_layer_s, cluster_layer); + pcmk__crit("Expected Corosync cluster layer but detected %s " + QB_XS " cluster_layer=%d", + cluster_layer_s, cluster_layer); return FALSE; } crm_info("Reading configuration for %s cluster layer", cluster_layer_s); pcmk__set_env_option(PCMK__ENV_CLUSTER_TYPE, PCMK_VALUE_COROSYNC, true); // If debug logging is not configured, check whether corosync has it if (pcmk__env_option(PCMK__ENV_DEBUG) == NULL) { char *debug_enabled = NULL; get_config_opt(config, local_handle, "logging.debug", &debug_enabled, PCMK_VALUE_OFF); if (pcmk__is_true(debug_enabled)) { pcmk__set_env_option(PCMK__ENV_DEBUG, "1", true); if (get_crm_log_level() < LOG_DEBUG) { set_crm_log_level(LOG_DEBUG); } } else { pcmk__set_env_option(PCMK__ENV_DEBUG, "0", true); } free(debug_enabled); } if(local_handle){ gid_t gid = 0; if (pcmk__daemon_user(NULL, &gid) != pcmk_rc_ok) { crm_warn("Could not authorize group with Corosync " QB_XS " No group found for user %s", CRM_DAEMON_USER); } else { char key[PATH_MAX]; snprintf(key, PATH_MAX, "uidgid.gid.%u", gid); rc = cmap_set_uint8(local_handle, key, 1); if (rc != CS_OK) { crm_warn("Could not authorize group with Corosync: %s " QB_XS " group=%u rc=%d", pcmk__cs_err_str(rc), gid, rc); } } } cmap_finalize(local_handle); return TRUE; } diff --git a/daemons/pacemakerd/pcmkd_subdaemons.c b/daemons/pacemakerd/pcmkd_subdaemons.c index f7bdab17c2..45361ed5d9 100644 --- a/daemons/pacemakerd/pcmkd_subdaemons.c +++ b/daemons/pacemakerd/pcmkd_subdaemons.c @@ -1,902 +1,904 @@ /* * Copyright 2010-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include "pacemakerd.h" #if SUPPORT_COROSYNC #include "pcmkd_corosync.h" #endif #include #include #include #include #include #include #include #include #include #include #include #include enum child_daemon_flags { child_none = 0, child_respawn = 1 << 0, child_needs_cluster = 1 << 1, child_needs_retry = 1 << 2, child_active_before_startup = 1 << 3, }; typedef struct { enum pcmk_ipc_server server; pid_t pid; int respawn_count; //! Child runs as \c root if \c true, or as \c CRM_DAEMON_USER if \c false bool as_root; int check_count; uint32_t flags; } pcmkd_child_t; #define PCMK_PROCESS_CHECK_INTERVAL 1000 /* 1s */ #define PCMK_PROCESS_CHECK_RETRIES 5 #define SHUTDOWN_ESCALATION_PERIOD 180000 /* 3m */ /* Index into the array below */ #define PCMK_CHILD_CONTROLD 5 static pcmkd_child_t pcmk_children[] = { { pcmk_ipc_based, 0, 0, false, 0, child_respawn|child_needs_cluster }, { pcmk_ipc_fenced, 0, 0, true, 0, child_respawn|child_needs_cluster }, { pcmk_ipc_execd, 0, 0, true, 0, child_respawn }, { pcmk_ipc_attrd, 0, 0, false, 0, child_respawn|child_needs_cluster }, { pcmk_ipc_schedulerd, 0, 0, false, 0, child_respawn }, { pcmk_ipc_controld, 0, 0, false, 0, child_respawn|child_needs_cluster }, }; static char *opts_default[] = { NULL, NULL }; static char *opts_vgrind[] = { NULL, NULL, NULL, NULL, NULL }; crm_trigger_t *shutdown_trigger = NULL; crm_trigger_t *startup_trigger = NULL; time_t subdaemon_check_progress = 0; // Whether we need root group access to talk to cluster layer static bool need_root_group = true; /* When contacted via pacemakerd-api by a client having sbd in * the name we assume it is sbd-daemon which wants to know * if pacemakerd shutdown gracefully. * Thus when everything is shutdown properly pacemakerd * waits till it has reported the graceful completion of * shutdown to sbd and just when sbd-client closes the * connection we can assume that the report has arrived * properly so that pacemakerd can finally exit. * Following two variables are used to track that handshake. */ unsigned int shutdown_complete_state_reported_to = 0; gboolean shutdown_complete_state_reported_client_closed = FALSE; /* state we report when asked via pacemakerd-api status-ping */ const char *pacemakerd_state = PCMK__VALUE_INIT; gboolean running_with_sbd = FALSE; /* local copy */ GMainLoop *mainloop = NULL; static gboolean fatal_error = FALSE; static int child_liveness(pcmkd_child_t *child); static gboolean escalate_shutdown(gpointer data); static int start_child(pcmkd_child_t *child); static void pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode); static void pcmk_process_exit(pcmkd_child_t *child); static gboolean pcmk_shutdown_worker(gpointer user_data); static gboolean stop_child(pcmkd_child_t *child, int signal); /*! * \internal * \brief Get path to subdaemon executable * * \param[in] subdaemon Subdaemon to get path for * * \return Newly allocated string with path to subdaemon executable * \note It is the caller's responsibility to free() the return value */ static inline char * subdaemon_path(pcmkd_child_t *subdaemon) { return pcmk__assert_asprintf(CRM_DAEMON_DIR "/%s", pcmk__server_name(subdaemon->server)); } static bool pcmkd_cluster_connected(void) { #if SUPPORT_COROSYNC return pcmkd_corosync_connected(); #else return true; #endif } static gboolean check_next_subdaemon(gpointer user_data) { static int next_child = 0; pcmkd_child_t *child = &(pcmk_children[next_child]); const char *name = pcmk__server_name(child->server); const long long pid = PCMK__SPECIAL_PID_AS_0(child->pid); int rc = child_liveness(child); crm_trace("Checked subdaemon %s[%lld]: %s (%d)", name, pid, pcmk_rc_str(rc), rc); switch (rc) { case pcmk_rc_ok: child->check_count = 0; subdaemon_check_progress = time(NULL); break; case pcmk_rc_ipc_pid_only: // Child was previously OK if (++(child->check_count) >= PCMK_PROCESS_CHECK_RETRIES) { // cts-lab looks for this message - crm_crit("Subdaemon %s[%lld] is unresponsive to IPC " - "after %d attempt%s and will now be killed", - name, pid, child->check_count, - pcmk__plural_s(child->check_count)); + pcmk__crit("Subdaemon %s[%lld] is unresponsive to IPC " + "after %d attempt%s and will now be killed", + name, pid, child->check_count, + pcmk__plural_s(child->check_count)); stop_child(child, SIGKILL); if (pcmk__is_set(child->flags, child_respawn)) { // Respawn limit hasn't been reached, so retry another round child->check_count = 0; } } else { crm_notice("Subdaemon %s[%lld] is unresponsive to IPC " "after %d attempt%s (will recheck later)", name, pid, child->check_count, pcmk__plural_s(child->check_count)); if (pcmk__is_set(child->flags, child_respawn)) { /* as long as the respawn-limit isn't reached and we haven't run out of connect retries we account this as progress we are willing to tell to sbd */ subdaemon_check_progress = time(NULL); } } /* go to the next child and see if we can make progress there */ break; case pcmk_rc_ipc_unresponsive: if (!pcmk__is_set(child->flags, child_respawn)) { /* if a subdaemon is down and we don't want it to be restarted this is a success during shutdown. if it isn't restarted anymore due to MAX_RESPAWN it is rather no success. */ if (child->respawn_count <= MAX_RESPAWN) { subdaemon_check_progress = time(NULL); } } if (!pcmk__is_set(child->flags, child_active_before_startup)) { crm_trace("Subdaemon %s[%lld] terminated", name, pid); break; } if (pcmk__is_set(child->flags, child_respawn)) { // cts-lab looks for this message crm_err("Subdaemon %s[%lld] terminated", name, pid); } else { /* orderly shutdown */ crm_notice("Subdaemon %s[%lld] terminated", name, pid); } pcmk_process_exit(child); break; default: crm_exit(CRM_EX_FATAL); break; /* static analysis/noreturn */ } if (++next_child >= PCMK__NELEM(pcmk_children)) { next_child = 0; } return G_SOURCE_CONTINUE; } static gboolean escalate_shutdown(gpointer data) { pcmkd_child_t *child = data; if (child->pid == PCMK__SPECIAL_PID) { pcmk_process_exit(child); } else if (child->pid != 0) { /* Use SIGSEGV instead of SIGKILL to create a core so we can see what it was up to */ crm_err("Subdaemon %s not terminating in a timely manner, forcing", pcmk__server_name(child->server)); stop_child(child, SIGSEGV); } return FALSE; } static void pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode) { pcmkd_child_t *child = mainloop_child_userdata(p); const char *name = mainloop_child_name(p); if (signo) { // cts-lab looks for this message do_crm_log(((signo == SIGKILL)? LOG_WARNING : LOG_ERR), "%s[%d] terminated with signal %d (%s)%s", name, pid, signo, strsignal(signo), (core? " and dumped core" : "")); } else { switch(exitcode) { case CRM_EX_OK: crm_info("%s[%d] exited with status %d (%s)", name, pid, exitcode, crm_exit_str(exitcode)); break; case CRM_EX_FATAL: crm_warn("Shutting cluster down because %s[%d] had fatal failure", name, pid); child->flags &= ~child_respawn; fatal_error = TRUE; pcmk_shutdown(SIGTERM); break; case CRM_EX_PANIC: { char *msg = NULL; child->flags &= ~child_respawn; fatal_error = TRUE; msg = pcmk__assert_asprintf("Subdaemon %s[%d] requested " "panic", name, pid); pcmk__panic(msg); // Should never get here free(msg); pcmk_shutdown(SIGTERM); } break; default: // cts-lab looks for this message crm_err("%s[%d] exited with status %d (%s)", name, pid, exitcode, crm_exit_str(exitcode)); break; } } pcmk_process_exit(child); } static void pcmk_process_exit(pcmkd_child_t * child) { const char *name = pcmk__server_name(child->server); child->pid = 0; child->flags &= ~child_active_before_startup; child->check_count = 0; child->respawn_count += 1; if (child->respawn_count > MAX_RESPAWN) { crm_err("Subdaemon %s exceeded maximum respawn count", name); child->flags &= ~child_respawn; } if (shutdown_trigger) { /* resume step-wise shutdown (returned TRUE yields no parallelizing) */ mainloop_set_trigger(shutdown_trigger); } else if (!pcmk__is_set(child->flags, child_respawn)) { /* nothing to do */ } else if (pcmk__is_true(pcmk__env_option(PCMK__ENV_FAIL_FAST))) { pcmk__panic("Subdaemon failed"); } else if (child_liveness(child) == pcmk_rc_ok) { crm_warn("Not respawning subdaemon %s because IPC endpoint %s is OK", name, pcmk__server_ipc_name(child->server)); } else if (pcmk__is_set(child->flags, child_needs_cluster) && !pcmkd_cluster_connected()) { crm_notice("Not respawning subdaemon %s until cluster returns", name); child->flags |= child_needs_retry; } else { // cts-lab looks for this message crm_notice("Respawning subdaemon %s after unexpected exit", name); start_child(child); } } static gboolean pcmk_shutdown_worker(gpointer user_data) { static int phase = PCMK__NELEM(pcmk_children) - 1; static time_t next_log = 0; if (phase == PCMK__NELEM(pcmk_children) - 1) { crm_notice("Shutting down Pacemaker"); pacemakerd_state = PCMK__VALUE_SHUTTING_DOWN; } for (; phase >= 0; phase--) { pcmkd_child_t *child = &(pcmk_children[phase]); const char *name = pcmk__server_name(child->server); if (child->pid != 0) { time_t now = time(NULL); if (pcmk__is_set(child->flags, child_respawn)) { if (child->pid == PCMK__SPECIAL_PID) { crm_warn("Subdaemon %s cannot be terminated (shutdown " "will be escalated after %ld seconds if it does " "not terminate on its own; set PCMK_" PCMK__ENV_FAIL_FAST "=1 to exit immediately " "instead)", name, (long) SHUTDOWN_ESCALATION_PERIOD); } next_log = now + 30; child->flags &= ~child_respawn; stop_child(child, SIGTERM); if (phase < PCMK_CHILD_CONTROLD) { pcmk__create_timer(SHUTDOWN_ESCALATION_PERIOD, escalate_shutdown, child); } } else if (now >= next_log) { next_log = now + 30; crm_notice("Still waiting for subdaemon %s to terminate " QB_XS " pid=%lld", name, (long long) child->pid); } return TRUE; } /* cleanup */ crm_debug("Subdaemon %s confirmed stopped", name); child->pid = 0; } crm_notice("Shutdown complete"); pacemakerd_state = PCMK__VALUE_SHUTDOWN_COMPLETE; if (!fatal_error && running_with_sbd && pcmk__get_sbd_sync_resource_startup() && !shutdown_complete_state_reported_client_closed) { crm_notice("Waiting for SBD to pick up shutdown-complete-state."); return TRUE; } g_main_loop_quit(mainloop); if (fatal_error) { crm_notice("Shutting down and staying down after fatal error"); #if SUPPORT_COROSYNC pcmkd_shutdown_corosync(); #endif crm_exit(CRM_EX_FATAL); } return TRUE; } /* TODO once libqb is taught to juggle with IPC end-points carried over as bare file descriptor (https://github.com/ClusterLabs/libqb/issues/325) it shall hand over these descriptors here if/once they are successfully pre-opened in (presumably) child_liveness(), to avoid any remaining room for races */ // \return Standard Pacemaker return code static int start_child(pcmkd_child_t * child) { const char *user = (child->as_root? "root" : CRM_DAEMON_USER); uid_t uid = 0; gid_t gid = 0; gboolean use_valgrind = FALSE; gboolean use_callgrind = FALSE; const char *name = pcmk__server_name(child->server); const char *env_valgrind = pcmk__env_option(PCMK__ENV_VALGRIND_ENABLED); const char *env_callgrind = pcmk__env_option(PCMK__ENV_CALLGRIND_ENABLED); child->flags &= ~child_active_before_startup; child->check_count = 0; if (pcmk__is_true(env_callgrind)) { use_callgrind = TRUE; use_valgrind = TRUE; } else if ((env_callgrind != NULL) && (strstr(env_callgrind, name) != NULL)) { use_callgrind = TRUE; use_valgrind = TRUE; } else if (pcmk__is_true(env_valgrind)) { use_valgrind = TRUE; } else if ((env_valgrind != NULL) && (strstr(env_valgrind, name) != NULL)) { use_valgrind = TRUE; } if (use_valgrind && strlen(PCMK__VALGRIND_EXEC) == 0) { crm_warn("Cannot enable valgrind for subdaemon %s: valgrind not found", name); use_valgrind = FALSE; } if (!child->as_root) { int rc = pcmk__daemon_user(&uid, &gid); if (rc != pcmk_rc_ok) { crm_err("User " CRM_DAEMON_USER " not found for subdaemon %s: %s", name, pcmk_rc_str(rc)); return rc; } } child->pid = fork(); pcmk__assert(child->pid != -1); if (child->pid > 0) { // Parent const char *valgrind_s = ""; if (use_valgrind) { valgrind_s = " (valgrind enabled: " PCMK__VALGRIND_EXEC ")"; } mainloop_child_add(child->pid, 0, name, child, pcmk_child_exit); crm_info("Forked process %lld using user %lld (%s) and group %lld " "for subdaemon %s%s", (long long) child->pid, (long long) uid, user, (long long) gid, name, valgrind_s); return pcmk_rc_ok; } // Child // Start a new session (void) setsid(); // Set up the two alternate argument arrays opts_vgrind[0] = pcmk__str_copy(PCMK__VALGRIND_EXEC); if (use_callgrind) { opts_vgrind[1] = pcmk__str_copy("--tool=callgrind"); opts_vgrind[2] = pcmk__str_copy("--callgrind-out-file=" CRM_STATE_DIR "/callgrind.out.%p"); opts_vgrind[3] = subdaemon_path(child); opts_vgrind[4] = NULL; } else { opts_vgrind[1] = subdaemon_path(child); opts_vgrind[2] = NULL; opts_vgrind[3] = NULL; opts_vgrind[4] = NULL; } opts_default[0] = subdaemon_path(child); if (gid != 0) { // Drop root group access if not needed if (!need_root_group && (setgid(gid) < 0)) { crm_warn("Could not set subdaemon %s group to %lld: %s", name, (long long) gid, strerror(errno)); } /* Initialize supplementary groups to those where the user is a member, * plus haclient (so we can access IPC). * * @TODO initgroups() is not portable (not part of any standard). */ if (initgroups(user, gid) < 0) { crm_err("Cannot initialize system groups for subdaemon %s: %s " QB_XS " errno=%d", name, strerror(errno), errno); } } if ((uid != 0) && (setuid(uid) < 0)) { crm_warn("Could not set subdaemon %s user to %s: %s " QB_XS " uid=%lld errno=%d", name, strerror(errno), user, (long long) uid, errno); } pcmk__close_fds_in_child(true); pcmk__open_devnull(O_RDONLY); // stdin (fd 0) pcmk__open_devnull(O_WRONLY); // stdout (fd 1) pcmk__open_devnull(O_WRONLY); // stderr (fd 2) if (use_valgrind) { (void) execvp(PCMK__VALGRIND_EXEC, opts_vgrind); } else { char *path = subdaemon_path(child); (void) execvp(path, opts_default); free(path); } - crm_crit("Could not execute subdaemon %s: %s", name, strerror(errno)); + pcmk__crit("Could not execute subdaemon %s: %s", name, strerror(errno)); crm_exit(CRM_EX_FATAL); return pcmk_rc_ok; // Never reached } /*! * \internal * \brief Check the liveness of the child based on IPC name and PID if tracked * * \param[in,out] child Child tracked data * * \return Standard Pacemaker return code * * \note Return codes of particular interest include pcmk_rc_ipc_unresponsive * indicating that no trace of IPC liveness was detected, * pcmk_rc_ipc_unauthorized indicating that the IPC endpoint is blocked by * an unauthorized process, and pcmk_rc_ipc_pid_only indicating that * the child is up by PID but not IPC end-point (possibly starting). * \note This function doesn't modify any of \p child members but \c pid, * and is not actively toying with processes as such but invoking * \c stop_child in one particular case (there's for some reason * a different authentic holder of the IPC end-point). */ static int child_liveness(pcmkd_child_t *child) { uid_t cl_uid = 0; gid_t cl_gid = 0; const uid_t root_uid = 0; const gid_t root_gid = 0; const uid_t *ref_uid; const gid_t *ref_gid; const char *name = pcmk__server_name(child->server); const char *ipc_name = pcmk__server_ipc_name(child->server); int rc = pcmk_rc_ok; pid_t ipc_pid = 0; if (child->as_root) { ref_uid = &root_uid; ref_gid = &root_gid; } else { ref_uid = &cl_uid; ref_gid = &cl_gid; rc = pcmk__daemon_user(&cl_uid, &cl_gid); if (rc != pcmk_rc_ok) { crm_err("Could not find user and group IDs for user " CRM_DAEMON_USER ": %s " QB_XS " rc=%d", pcmk_rc_str(rc), rc); return rc; } } rc = pcmk__ipc_is_authentic_process_active(ipc_name, *ref_uid, *ref_gid, &ipc_pid); if ((rc == pcmk_rc_ok) || (rc == pcmk_rc_ipc_unresponsive)) { if (child->pid <= 0) { /* If rc is pcmk_rc_ok, ipc_pid is nonzero and this initializes a * new child. If rc is pcmk_rc_ipc_unresponsive, ipc_pid is zero, * and we will investigate further. */ child->pid = ipc_pid; } else if ((ipc_pid != 0) && (child->pid != ipc_pid)) { /* An unexpected (but authorized) process is responding to IPC. * Investigate further. */ rc = pcmk_rc_ipc_unresponsive; } } if (rc != pcmk_rc_ipc_unresponsive) { return rc; } /* If we get here, a child without IPC is being tracked, no IPC liveness has * been detected, or IPC liveness has been detected with an unexpected (but * authorized) process. This is safe on FreeBSD since the only change * possible from a proper child's PID into "special" PID of 1 behind more * loosely related process. */ rc = pcmk__pid_active(child->pid, name); if ((ipc_pid != 0) && ((rc != pcmk_rc_ok) || (ipc_pid == PCMK__SPECIAL_PID) || (pcmk__pid_active(ipc_pid, name) == pcmk_rc_ok))) { /* An unexpected (but authorized) process was detected at the IPC * endpoint, and either it is active, or the child we're tracking is * not. */ if (rc == pcmk_rc_ok) { /* The child we're tracking is active. Kill it, and adopt the * detected process. This assumes that our children don't fork (thus * getting a different PID owning the IPC), but rather the tracking * got out of sync because of some means external to Pacemaker, and * adopting the detected process is better than killing it and * possibly having to spawn a new child. */ // Not possessing IPC, after all (what about corosync CPG?) stop_child(child, SIGKILL); } child->pid = ipc_pid; return pcmk_rc_ok; } if (rc == pcmk_rc_ok) { // Our tracked child's PID was found active, but not its IPC return pcmk_rc_ipc_pid_only; } if ((child->pid == 0) && (rc == EINVAL)) { // FreeBSD can return EINVAL return pcmk_rc_ipc_unresponsive; } switch (rc) { case EACCES: return pcmk_rc_ipc_unauthorized; case ESRCH: return pcmk_rc_ipc_unresponsive; default: return rc; } } /*! * \internal * \brief Initial one-off check of the pre-existing "child" processes * * With "child" process, we mean the subdaemon that defines an API end-point * (all of them do as of the comment) -- the possible complement is skipped * as it is deemed it has no such shared resources to cause conflicts about, * hence it can presumably be started anew without hesitation. * If that won't hold true in the future, the concept of a shared resource * will have to be generalized beyond the API end-point. * * For boundary cases that the "child" is still starting (IPC end-point is yet * to be witnessed), or more rarely (practically FreeBSD only), when there's * a pre-existing "untrackable" authentic process, we give the situation some * time to possibly unfold in the right direction, meaning that said socket * will appear or the unattainable process will disappear per the observable * IPC, respectively. * * \return Standard Pacemaker return code * * \note Since this gets run at the very start, \c respawn_count fields * for particular children get temporarily overloaded with "rounds * of waiting" tracking, restored once we are about to finish with * success (i.e. returning value >=0) and will remain unrestored * otherwise. One way to suppress liveness detection logic for * particular child is to set the said value to a negative number. */ #define WAIT_TRIES 4 /* together with interleaved sleeps, worst case ~ 1s */ int find_and_track_existing_processes(void) { bool wait_in_progress; int rc; size_t i, rounds; for (rounds = 1; rounds <= WAIT_TRIES; rounds++) { wait_in_progress = false; for (i = 0; i < PCMK__NELEM(pcmk_children); i++) { const char *name = pcmk__server_name(pcmk_children[i].server); const char *ipc_name = NULL; if (pcmk_children[i].respawn_count < 0) { continue; } rc = child_liveness(&pcmk_children[i]); if (rc == pcmk_rc_ipc_unresponsive) { /* As a speculation, don't give up if there are more rounds to * come for other reasons, but don't artificially wait just * because of this, since we would preferably start ASAP. */ continue; } // @TODO Functionize more of this to reduce nesting ipc_name = pcmk__server_ipc_name(pcmk_children[i].server); pcmk_children[i].respawn_count = rounds; switch (rc) { case pcmk_rc_ok: if (pcmk_children[i].pid == PCMK__SPECIAL_PID) { const char *fail_fast = pcmk__env_option(PCMK__ENV_FAIL_FAST); if (pcmk__is_true(fail_fast)) { - crm_crit("Cannot reliably track pre-existing" - " authentic process behind %s IPC on this" - " platform and PCMK_" PCMK__ENV_FAIL_FAST - " requested", ipc_name); + pcmk__crit("Cannot reliably track pre-existing " + "authentic process behind %s IPC on " + "this platform and " + "PCMK_" PCMK__ENV_FAIL_FAST " requested", + ipc_name); return EOPNOTSUPP; } else if (pcmk_children[i].respawn_count == WAIT_TRIES) { crm_notice("Assuming pre-existing authentic, though" " on this platform untrackable, process" " behind %s IPC is stable (was in %d" " previous samples) so rather than" " bailing out (PCMK_" PCMK__ENV_FAIL_FAST " not requested), we just switch to a" " less optimal IPC liveness monitoring" " (not very suitable for heavy load)", name, WAIT_TRIES - 1); crm_warn("The process behind %s IPC cannot be" " terminated, so the overall shutdown" " will get delayed implicitly (%ld s)," " which serves as a graceful period for" " its native termination if it vitally" " depends on some other daemons going" " down in a controlled way already", name, (long) SHUTDOWN_ESCALATION_PERIOD); } else { wait_in_progress = true; crm_warn("Cannot reliably track pre-existing" " authentic process behind %s IPC on this" " platform, can still disappear in %d" " attempt(s)", ipc_name, WAIT_TRIES - pcmk_children[i].respawn_count); continue; } } crm_notice("Tracking existing %s process (pid=%lld)", name, (long long) PCMK__SPECIAL_PID_AS_0( pcmk_children[i].pid)); pcmk_children[i].respawn_count = -1; /* 0~keep watching */ pcmk_children[i].flags |= child_active_before_startup; break; case pcmk_rc_ipc_pid_only: if (pcmk_children[i].respawn_count == WAIT_TRIES) { - crm_crit("%s IPC endpoint for existing authentic" - " process %lld did not (re)appear", - ipc_name, - (long long) PCMK__SPECIAL_PID_AS_0( - pcmk_children[i].pid)); + pid_t child_pid = + PCMK__SPECIAL_PID_AS_0(pcmk_children[i].pid); + + pcmk__crit("%s IPC endpoint for existing authentic" + " process %lld did not (re)appear", + ipc_name, (long long) child_pid); return rc; } wait_in_progress = true; crm_warn("Cannot find %s IPC endpoint for existing" " authentic process %lld, can still (re)appear" " in %d attempts (?)", ipc_name, (long long) PCMK__SPECIAL_PID_AS_0( pcmk_children[i].pid), WAIT_TRIES - pcmk_children[i].respawn_count); continue; default: - crm_crit("Checked liveness of %s: %s " QB_XS " rc=%d", - name, pcmk_rc_str(rc), rc); + pcmk__crit("Checked liveness of %s: %s " QB_XS " rc=%d", + name, pcmk_rc_str(rc), rc); return rc; } } if (!wait_in_progress) { break; } pcmk__sleep_ms(250); // Wait a bit for changes to possibly happen } for (i = 0; i < PCMK__NELEM(pcmk_children); i++) { pcmk_children[i].respawn_count = 0; /* restore pristine state */ } pcmk__create_timer(PCMK_PROCESS_CHECK_INTERVAL, check_next_subdaemon, NULL); return pcmk_rc_ok; } gboolean init_children_processes(void *user_data) { if (pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync) { /* Corosync clusters can drop root group access, because we set * uidgid.gid.${gid}=1 via CMAP, which allows these processes to connect * to corosync. */ need_root_group = false; } /* start any children that have not been detected */ for (int i = 0; i < PCMK__NELEM(pcmk_children); i++) { if (pcmk_children[i].pid != 0) { /* we are already tracking it */ continue; } start_child(&(pcmk_children[i])); } /* From this point on, any daemons being started will be due to * respawning rather than node start. * * This may be useful for the daemons to know */ pcmk__set_env_option(PCMK__ENV_RESPAWNED, PCMK_VALUE_TRUE, false); pacemakerd_state = PCMK__VALUE_RUNNING; return TRUE; } void pcmk_shutdown(int nsig) { if (shutdown_trigger == NULL) { shutdown_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, pcmk_shutdown_worker, NULL); } mainloop_set_trigger(shutdown_trigger); } void restart_cluster_subdaemons(void) { for (int i = 0; i < PCMK__NELEM(pcmk_children); i++) { if (!pcmk__is_set(pcmk_children[i].flags, child_needs_retry) || (pcmk_children[i].pid != 0)) { continue; } crm_notice("Respawning cluster-based subdaemon %s", pcmk__server_name(pcmk_children[i].server)); if (start_child(&pcmk_children[i])) { pcmk_children[i].flags &= ~child_needs_retry; } } } static gboolean stop_child(pcmkd_child_t *child, int signal) { const char *name = pcmk__server_name(child->server); if (signal == 0) { signal = SIGTERM; } /* why to skip PID of 1? - FreeBSD ~ how untrackable process behind IPC is masqueraded as - elsewhere: how "init" task is designated; in particular, in systemd arrangement of socket-based activation, this is pretty real */ if (child->pid == PCMK__SPECIAL_PID) { crm_debug("Nothing to do to stop subdaemon %s[%lld]", name, (long long) PCMK__SPECIAL_PID_AS_0(child->pid)); return TRUE; } if (child->pid <= 0) { crm_trace("Nothing to do to stop subdaemon %s: Not running", name); return TRUE; } errno = 0; if (kill(child->pid, signal) == 0) { crm_notice("Stopping subdaemon %s " QB_XS " via signal %d to process %lld", name, signal, (long long) child->pid); } else { crm_err("Could not stop subdaemon %s[%lld] with signal %d: %s", name, (long long) child->pid, signal, strerror(errno)); } return TRUE; } diff --git a/include/crm/common/logging_internal.h b/include/crm/common/logging_internal.h index 390892f08b..68a7da4eda 100644 --- a/include/crm/common/logging_internal.h +++ b/include/crm/common/logging_internal.h @@ -1,248 +1,257 @@ /* * Copyright 2015-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #ifndef PCMK__CRM_COMMON_LOGGING_INTERNAL__H #define PCMK__CRM_COMMON_LOGGING_INTERNAL__H #include #include // pcmk__is_set() #include #include #ifdef __cplusplus extern "C" { #endif /*! * \internal * \brief Log a message at \c LOG_EMERG level * * \param[in] fmt \c printf() format string for log message * \param[in] args Format string arguments */ #define pcmk__emerg(fmt, args...) qb_log(LOG_EMERG, fmt, ##args) +/*! + * \internal + * \brief Log a message at \c LOG_CRIT level + * + * \param[in] fmt \c printf() format string for log message + * \param[in] args Format string arguments + */ +#define pcmk__crit(fmt, args...) qb_log(LOG_CRIT, fmt, ##args) + /* Some warnings are too noisy when logged every time a given function is called * (for example, using a deprecated feature). As an alternative, we allow * warnings to be logged once per invocation of the calling program. Each of * those warnings needs a flag defined here. */ enum pcmk__warnings { pcmk__wo_blind = (1 << 0), pcmk__wo_record_pending = (1 << 1), pcmk__wo_require_all = (1 << 4), pcmk__wo_order_score = (1 << 5), pcmk__wo_group_order = (1 << 11), pcmk__wo_group_coloc = (1 << 12), pcmk__wo_set_ordering = (1 << 15), pcmk__wo_rdisc_enabled = (1 << 16), pcmk__wo_op_attr_expr = (1 << 19), pcmk__wo_clone_master_max = (1 << 23), pcmk__wo_clone_master_node_max = (1 << 24), pcmk__wo_master_role = (1 << 26), pcmk__wo_slave_role = (1 << 27), }; /*! * \internal * \brief Log a warning once per invocation of calling program * * \param[in] wo_flag enum pcmk__warnings value for this warning * \param[in] fmt... printf(3)-style format and arguments */ #define pcmk__warn_once(wo_flag, fmt...) do { \ if (!pcmk__is_set(pcmk__warnings, wo_flag)) { \ if (wo_flag == pcmk__wo_blind) { \ crm_warn(fmt); \ } else { \ pcmk__config_warn(fmt); \ } \ pcmk__warnings = pcmk__set_flags_as(__func__, __LINE__, \ LOG_TRACE, \ "Warn-once", "logging", \ pcmk__warnings, \ (wo_flag), #wo_flag); \ } \ } while (0) typedef void (*pcmk__config_error_func) (void *ctx, const char *msg, ...) G_GNUC_PRINTF(2, 3); typedef void (*pcmk__config_warning_func) (void *ctx, const char *msg, ...) G_GNUC_PRINTF(2, 3); extern pcmk__config_error_func pcmk__config_error_handler; extern pcmk__config_warning_func pcmk__config_warning_handler; extern void *pcmk__config_error_context; extern void *pcmk__config_warning_context; void pcmk__set_config_error_handler(pcmk__config_error_func error_handler, void *error_context); void pcmk__set_config_warning_handler(pcmk__config_warning_func warning_handler, void *warning_context); /* Pacemaker library functions set this when a configuration error is found, * which turns on extra messages at the end of processing. */ extern bool pcmk__config_has_error; /* Pacemaker library functions set this when a configuration warning is found, * which turns on extra messages at the end of processing. */ extern bool pcmk__config_has_warning; /*! * \internal * \brief Log an error and make crm_verify return failure status * * \param[in] fmt... printf(3)-style format string and arguments */ #define pcmk__config_err(fmt...) do { \ pcmk__config_has_error = true; \ if (pcmk__config_error_handler == NULL) { \ crm_err(fmt); \ } else { \ pcmk__config_error_handler(pcmk__config_error_context, fmt); \ } \ } while (0) /*! * \internal * \brief Log a warning and make crm_verify return failure status * * \param[in] fmt... printf(3)-style format string and arguments */ #define pcmk__config_warn(fmt...) do { \ pcmk__config_has_warning = true; \ if (pcmk__config_warning_handler == NULL) { \ crm_warn(fmt); \ } else { \ pcmk__config_warning_handler(pcmk__config_warning_context, fmt);\ } \ } while (0) /*! * \internal * \brief Execute code depending on whether trace logging is enabled * * This is similar to \p do_crm_log_unlikely() except instead of logging, it * selects one of two code blocks to execute. * * \param[in] if_action Code block to execute if trace logging is enabled * \param[in] else_action Code block to execute if trace logging is not enabled * * \note Neither \p if_action nor \p else_action can contain a \p break or * \p continue statement. */ #define pcmk__if_tracing(if_action, else_action) do { \ static struct qb_log_callsite *trace_cs = NULL; \ \ if (trace_cs == NULL) { \ trace_cs = qb_log_callsite_get(__func__, __FILE__, \ "if_tracing", LOG_TRACE, \ __LINE__, crm_trace_nonlog); \ } \ if (crm_is_callsite_active(trace_cs, LOG_TRACE, \ crm_trace_nonlog)) { \ if_action; \ } else { \ else_action; \ } \ } while (0) /*! * \internal * \brief Log XML changes line-by-line in a formatted fashion * * \param[in] level Priority at which to log the messages * \param[in] xml XML to log * * \note This does nothing when \p level is \c LOG_STDOUT. */ #define pcmk__log_xml_changes(level, xml) do { \ uint8_t _level = pcmk__clip_log_level(level); \ static struct qb_log_callsite *xml_cs = NULL; \ \ switch (_level) { \ case LOG_STDOUT: \ case LOG_NEVER: \ break; \ default: \ if (xml_cs == NULL) { \ xml_cs = qb_log_callsite_get(__func__, __FILE__, \ "xml-changes", _level, \ __LINE__, 0); \ } \ if (crm_is_callsite_active(xml_cs, _level, 0)) { \ pcmk__log_xml_changes_as(__FILE__, __func__, __LINE__, \ 0, _level, xml); \ } \ break; \ } \ } while(0) /*! * \internal * \brief Log an XML patchset line-by-line in a formatted fashion * * \param[in] level Priority at which to log the messages * \param[in] patchset XML patchset to log * * \note This does nothing when \p level is \c LOG_STDOUT. */ #define pcmk__log_xml_patchset(level, patchset) do { \ uint8_t _level = pcmk__clip_log_level(level); \ static struct qb_log_callsite *xml_cs = NULL; \ \ switch (_level) { \ case LOG_STDOUT: \ case LOG_NEVER: \ break; \ default: \ if (xml_cs == NULL) { \ xml_cs = qb_log_callsite_get(__func__, __FILE__, \ "xml-patchset", _level, \ __LINE__, 0); \ } \ if (crm_is_callsite_active(xml_cs, _level, 0)) { \ pcmk__log_xml_patchset_as(__FILE__, __func__, __LINE__, \ 0, _level, patchset); \ } \ break; \ } \ } while(0) void pcmk__log_xml_changes_as(const char *file, const char *function, uint32_t line, uint32_t tags, uint8_t level, const xmlNode *xml); void pcmk__log_xml_patchset_as(const char *file, const char *function, uint32_t line, uint32_t tags, uint8_t level, const xmlNode *patchset); /*! * \internal * \brief Initialize logging for command line tools * * \param[in] name The name of the program * \param[in] verbosity How verbose to be in logging * * \note \p verbosity is not the same as the logging level (LOG_ERR, etc.). */ void pcmk__cli_init_logging(const char *name, unsigned int verbosity); int pcmk__add_logfile(const char *filename); void pcmk__add_logfiles(gchar **log_files, pcmk__output_t *out); void pcmk__free_common_logger(void); #ifdef __cplusplus } #endif #endif // PCMK__CRM_COMMON_LOGGING_INTERNAL__H diff --git a/lib/cluster/corosync.c b/lib/cluster/corosync.c index bf33613651..20e6b4e23b 100644 --- a/lib/cluster/corosync.c +++ b/lib/cluster/corosync.c @@ -1,821 +1,821 @@ /* * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include // PRIu64, etc. #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include // PCMK__SPECIAL_PID #include #include #include "crmcluster_private.h" static quorum_handle_t pcmk_quorum_handle = 0; static gboolean (*quorum_app_callback)(unsigned long long seq, gboolean quorate) = NULL; /*! * \internal * \brief Get the Corosync UUID associated with a Pacemaker node * * \param[in] node Pacemaker node * * \return Newly allocated string with node's Corosync UUID, or NULL if unknown * \note It is the caller's responsibility to free the result with free(). */ char * pcmk__corosync_uuid(const pcmk__node_status_t *node) { pcmk__assert(pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync); if (node != NULL) { if (node->cluster_layer_id > 0) { return pcmk__assert_asprintf("%" PRIu32, node->cluster_layer_id); } else { crm_info("Node %s is not yet known by Corosync", node->name); } } return NULL; } static bool node_name_is_valid(const char *key, const char *name) { int octet; if (name == NULL) { crm_trace("%s is empty", key); return false; } else if (sscanf(name, "%d.%d.%d.%d", &octet, &octet, &octet, &octet) == 4) { crm_trace("%s contains an IPv4 address (%s), ignoring", key, name); return false; } else if (strstr(name, ":") != NULL) { crm_trace("%s contains an IPv6 address (%s), ignoring", key, name); return false; } crm_trace("'%s: %s' is valid", key, name); return true; } /* * \internal * \brief Get Corosync node name corresponding to a node ID * * \param[in] cmap_handle Connection to Corosync CMAP * \param[in] nodeid Node ID to check * * \return Newly allocated string with name or (if no name) IP address * associated with first address assigned to a Corosync node ID (or NULL * if unknown) * \note It is the caller's responsibility to free the result with free(). */ char * pcmk__corosync_name(uint64_t /*cmap_handle_t */ cmap_handle, uint32_t nodeid) { // Originally based on corosync-quorumtool.c:node_name() int lpc = 0; cs_error_t rc = CS_OK; int retries = 0; char *name = NULL; cmap_handle_t local_handle = 0; int fd = -1; uid_t found_uid = 0; gid_t found_gid = 0; pid_t found_pid = 0; int rv; if (nodeid == 0) { nodeid = pcmk__cpg_local_nodeid(0); } if (cmap_handle == 0 && local_handle == 0) { retries = 0; crm_trace("Initializing CMAP connection"); do { rc = pcmk__init_cmap(&local_handle); if (rc != CS_OK) { retries++; crm_debug("API connection setup failed: %s. Retrying in %ds", cs_strerror(rc), retries); sleep(retries); } } while (retries < 5 && rc != CS_OK); if (rc != CS_OK) { crm_warn("Could not connect to Cluster Configuration Database API, error %s", cs_strerror(rc)); local_handle = 0; } } if (cmap_handle == 0) { cmap_handle = local_handle; rc = cmap_fd_get(cmap_handle, &fd); if (rc != CS_OK) { crm_err("Could not obtain the CMAP API connection: %s (%d)", cs_strerror(rc), rc); goto bail; } /* CMAP provider run as root (in given user namespace, anyway)? */ if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid, &found_uid, &found_gid))) { crm_err("CMAP provider is not authentic:" " process %lld (uid: %lld, gid: %lld)", (long long) PCMK__SPECIAL_PID_AS_0(found_pid), (long long) found_uid, (long long) found_gid); goto bail; } else if (rv < 0) { crm_err("Could not verify authenticity of CMAP provider: %s (%d)", strerror(-rv), -rv); goto bail; } } while (name == NULL && cmap_handle != 0) { uint32_t id = 0; char *key = NULL; key = pcmk__assert_asprintf("nodelist.node.%d.nodeid", lpc); rc = cmap_get_uint32(cmap_handle, key, &id); crm_trace("Checking %u vs %u from %s", nodeid, id, key); free(key); if (rc != CS_OK) { break; } if (nodeid == id) { crm_trace("Searching for node name for %u in nodelist.node.%d %s", nodeid, lpc, pcmk__s(name, "")); if (name == NULL) { key = pcmk__assert_asprintf("nodelist.node.%d.name", lpc); cmap_get_string(cmap_handle, key, &name); crm_trace("%s = %s", key, pcmk__s(name, "")); free(key); } if (name == NULL) { key = pcmk__assert_asprintf("nodelist.node.%d.ring0_addr", lpc); cmap_get_string(cmap_handle, key, &name); crm_trace("%s = %s", key, pcmk__s(name, "")); if (!node_name_is_valid(key, name)) { free(name); name = NULL; } free(key); } break; } lpc++; } bail: if(local_handle) { cmap_finalize(local_handle); } if (name == NULL) { crm_info("Unable to get node name for nodeid %u", nodeid); } return name; } /*! * \internal * \brief Disconnect from Corosync cluster * * \param[in,out] cluster Cluster object to disconnect */ void pcmk__corosync_disconnect(pcmk_cluster_t *cluster) { pcmk__cpg_disconnect(cluster); if (pcmk_quorum_handle != 0) { quorum_finalize(pcmk_quorum_handle); pcmk_quorum_handle = 0; } crm_notice("Disconnected from Corosync"); } /*! * \internal * \brief Dispatch function for quorum connection file descriptor * * \param[in] user_data Ignored * * \return 0 on success, -1 on error (per mainloop_io_t interface) */ static int quorum_dispatch_cb(gpointer user_data) { int rc = quorum_dispatch(pcmk_quorum_handle, CS_DISPATCH_ALL); if (rc < 0) { crm_err("Connection to the Quorum API failed: %d", rc); quorum_finalize(pcmk_quorum_handle); pcmk_quorum_handle = 0; return -1; } return 0; } /*! * \internal * \brief Notification callback for Corosync quorum connection * * \param[in] handle Corosync quorum connection * \param[in] quorate Whether cluster is quorate * \param[in] ring_id Corosync ring ID * \param[in] view_list_entries Number of entries in \p view_list * \param[in] view_list Corosync node IDs in membership */ static void quorum_notification_cb(quorum_handle_t handle, uint32_t quorate, uint64_t ring_id, uint32_t view_list_entries, uint32_t *view_list) { int i; GHashTableIter iter; pcmk__node_status_t *node = NULL; static gboolean init_phase = TRUE; bool is_quorate = (quorate != 0); bool was_quorate = pcmk__cluster_has_quorum(); if (is_quorate && !was_quorate) { crm_notice("Quorum acquired " QB_XS " membership=%" PRIu64 " members=%" PRIu32, ring_id, view_list_entries); pcmk__cluster_set_quorum(true); } else if (!is_quorate && was_quorate) { crm_warn("Quorum lost " QB_XS " membership=%" PRIu64 " members=" PRIu32, ring_id, view_list_entries); pcmk__cluster_set_quorum(false); } else { crm_info("Quorum %s " QB_XS " membership=%" PRIu64 " members=%" PRIu32, (is_quorate? "retained" : "still lost"), ring_id, view_list_entries); } if (view_list_entries == 0 && init_phase) { crm_info("Corosync membership is still forming, ignoring"); return; } init_phase = FALSE; /* Reset membership_id for all cached nodes so we can tell which ones aren't * in the view list */ g_hash_table_iter_init(&iter, pcmk__peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { node->membership_id = 0; } /* Update the peer cache for each node in view list */ for (i = 0; i < view_list_entries; i++) { uint32_t id = view_list[i]; crm_debug("Member[%d] %u ", i, id); /* Get this node's peer cache entry (adding one if not already there) */ node = pcmk__get_node(id, NULL, NULL, pcmk__node_search_cluster_member); if (node->name == NULL) { char *name = pcmk__corosync_name(0, id); crm_info("Obtaining name for new node %u", id); node = pcmk__get_node(id, name, NULL, pcmk__node_search_cluster_member); free(name); } // Update the node state (including updating membership_id to ring_id) pcmk__update_peer_state(__func__, node, PCMK_VALUE_MEMBER, ring_id); } /* Remove any peer cache entries we didn't update */ pcmk__reap_unseen_nodes(ring_id); if (quorum_app_callback) { quorum_app_callback(ring_id, is_quorate); } } /*! * \internal * \brief Connect to Corosync quorum service * * \param[in] dispatch Connection dispatch callback * \param[in] destroy Connection destroy callback */ void pcmk__corosync_quorum_connect(gboolean (*dispatch)(unsigned long long, gboolean), void (*destroy)(gpointer)) { cs_error_t rc; int fd = 0; int quorate = 0; uint32_t quorum_type = 0; struct mainloop_fd_callbacks quorum_fd_callbacks; uid_t found_uid = 0; gid_t found_gid = 0; pid_t found_pid = 0; int rv; quorum_fd_callbacks.dispatch = quorum_dispatch_cb; quorum_fd_callbacks.destroy = destroy; crm_debug("Configuring Pacemaker to obtain quorum from Corosync"); { #if 0 // New way but not supported by all Corosync 2 versions quorum_model_v0_data_t quorum_model_data = { .model = QUORUM_MODEL_V0, .quorum_notify_fn = quorum_notification_cb, }; rc = quorum_model_initialize(&pcmk_quorum_handle, QUORUM_MODEL_V0, (quorum_model_data_t *) &quorum_model_data, &quorum_type, NULL); #else quorum_callbacks_t quorum_callbacks = { .quorum_notify_fn = quorum_notification_cb, }; rc = quorum_initialize(&pcmk_quorum_handle, &quorum_callbacks, &quorum_type); #endif } if (rc != CS_OK) { crm_err("Could not connect to the Quorum API: %s (%d)", cs_strerror(rc), rc); goto bail; } else if (quorum_type != QUORUM_SET) { crm_err("Corosync quorum is not configured"); goto bail; } rc = quorum_fd_get(pcmk_quorum_handle, &fd); if (rc != CS_OK) { crm_err("Could not obtain the Quorum API connection: %s (%d)", strerror(rc), rc); goto bail; } /* Quorum provider run as root (in given user namespace, anyway)? */ if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid, &found_uid, &found_gid))) { crm_err("Quorum provider is not authentic:" " process %lld (uid: %lld, gid: %lld)", (long long) PCMK__SPECIAL_PID_AS_0(found_pid), (long long) found_uid, (long long) found_gid); rc = CS_ERR_ACCESS; goto bail; } else if (rv < 0) { crm_err("Could not verify authenticity of Quorum provider: %s (%d)", strerror(-rv), -rv); rc = CS_ERR_ACCESS; goto bail; } rc = quorum_getquorate(pcmk_quorum_handle, &quorate); if (rc != CS_OK) { crm_err("Could not obtain the current Quorum API state: %d", rc); goto bail; } if (quorate) { crm_notice("Quorum acquired"); } else { crm_warn("No quorum"); } quorum_app_callback = dispatch; pcmk__cluster_set_quorum(quorate != 0); rc = quorum_trackstart(pcmk_quorum_handle, CS_TRACK_CHANGES | CS_TRACK_CURRENT); if (rc != CS_OK) { crm_err("Could not setup Quorum API notifications: %d", rc); goto bail; } mainloop_add_fd("quorum", G_PRIORITY_HIGH, fd, dispatch, &quorum_fd_callbacks); pcmk__corosync_add_nodes(NULL); bail: if (rc != CS_OK) { quorum_finalize(pcmk_quorum_handle); } } /*! * \internal * \brief Connect to Corosync cluster layer * * \param[in,out] cluster Initialized cluster object to connect * * \return Standard Pacemaker return code */ int pcmk__corosync_connect(pcmk_cluster_t *cluster) { const enum pcmk_cluster_layer cluster_layer = pcmk_get_cluster_layer(); const char *cluster_layer_s = pcmk_cluster_layer_text(cluster_layer); pcmk__node_status_t *local_node = NULL; int rc = pcmk_rc_ok; pcmk__cluster_init_node_caches(); if (cluster_layer != pcmk_cluster_layer_corosync) { crm_err("Invalid cluster layer: %s " QB_XS " cluster_layer=%d", cluster_layer_s, cluster_layer); return EINVAL; } rc = pcmk__cpg_connect(cluster); if (rc != pcmk_rc_ok) { // Error message was logged by pcmk__cpg_connect() return rc; } crm_info("Connection to %s established", cluster_layer_s); cluster->priv->node_id = pcmk__cpg_local_nodeid(0); if (cluster->priv->node_id == 0) { crm_err("Could not determine local node ID"); return ENXIO; } cluster->priv->node_name = pcmk__cluster_node_name(0); if (cluster->priv->node_name == NULL) { crm_err("Could not determine local node name"); return ENXIO; } // Ensure local node always exists in peer cache local_node = pcmk__get_node(cluster->priv->node_id, cluster->priv->node_name, NULL, pcmk__node_search_cluster_member); cluster->priv->node_xml_id = pcmk__corosync_uuid(local_node); CRM_LOG_ASSERT(cluster->priv->node_xml_id != NULL); return pcmk_rc_ok; } /*! * \internal * \brief Check whether a Corosync cluster is active * * \return \c true if Corosync is found active, or \c false otherwise */ bool pcmk__corosync_is_active(void) { cmap_handle_t handle; int rc = pcmk__init_cmap(&handle); if (rc == CS_OK) { cmap_finalize(handle); return true; } crm_info("Failed to initialize the cmap API: %s (%d)", pcmk__cs_err_str(rc), rc); return false; } /*! * \internal * \brief Check whether a Corosync cluster peer is active * * \param[in] node Node to check * * \return \c true if \p node is an active Corosync peer, or \c false otherwise */ bool pcmk__corosync_is_peer_active(const pcmk__node_status_t *node) { if (node == NULL) { crm_trace("Corosync peer inactive: NULL"); return false; } if (!pcmk__str_eq(node->state, PCMK_VALUE_MEMBER, pcmk__str_none)) { crm_trace("Corosync peer %s inactive: state=%s", node->name, node->state); return false; } if (!pcmk__is_set(node->processes, crm_proc_cpg)) { crm_trace("Corosync peer %s inactive " QB_XS " processes=%.16" PRIx32, node->name, node->processes); return false; } return true; } /*! * \internal * \brief Load Corosync node list (via CMAP) into peer cache and optionally XML * * \param[in,out] xml_parent If not NULL, add entry here for each node * * \return true if any nodes were found, false otherwise */ bool pcmk__corosync_add_nodes(xmlNode *xml_parent) { int lpc = 0; cs_error_t rc = CS_OK; int retries = 0; bool any = false; cmap_handle_t cmap_handle; int fd = -1; uid_t found_uid = 0; gid_t found_gid = 0; pid_t found_pid = 0; int rv; do { rc = pcmk__init_cmap(&cmap_handle); if (rc != CS_OK) { retries++; crm_debug("API connection setup failed: %s. Retrying in %ds", cs_strerror(rc), retries); sleep(retries); } } while (retries < 5 && rc != CS_OK); if (rc != CS_OK) { crm_warn("Could not connect to Cluster Configuration Database API, error %d", rc); return false; } rc = cmap_fd_get(cmap_handle, &fd); if (rc != CS_OK) { crm_err("Could not obtain the CMAP API connection: %s (%d)", cs_strerror(rc), rc); goto bail; } /* CMAP provider run as root (in given user namespace, anyway)? */ if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid, &found_uid, &found_gid))) { crm_err("CMAP provider is not authentic:" " process %lld (uid: %lld, gid: %lld)", (long long) PCMK__SPECIAL_PID_AS_0(found_pid), (long long) found_uid, (long long) found_gid); goto bail; } else if (rv < 0) { crm_err("Could not verify authenticity of CMAP provider: %s (%d)", strerror(-rv), -rv); goto bail; } pcmk__cluster_init_node_caches(); crm_trace("Initializing Corosync node list"); for (lpc = 0; TRUE; lpc++) { uint32_t nodeid = 0; char *name = NULL; char *key = NULL; key = pcmk__assert_asprintf("nodelist.node.%d.nodeid", lpc); rc = cmap_get_uint32(cmap_handle, key, &nodeid); free(key); if (rc != CS_OK) { break; } name = pcmk__corosync_name(cmap_handle, nodeid); if (name != NULL) { GHashTableIter iter; pcmk__node_status_t *node = NULL; g_hash_table_iter_init(&iter, pcmk__peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { if ((node != NULL) && (node->cluster_layer_id > 0) && (node->cluster_layer_id != nodeid) && pcmk__str_eq(node->name, name, pcmk__str_casei)) { - crm_crit("Nodes %" PRIu32 " and %" PRIu32 " share the " - "same name '%s': shutting down", - node->cluster_layer_id, nodeid, name); + pcmk__crit("Nodes %" PRIu32 " and %" PRIu32 " share the " + "same name '%s': shutting down", + node->cluster_layer_id, nodeid, name); crm_exit(CRM_EX_FATAL); } } } if (nodeid > 0 || name != NULL) { crm_trace("Initializing node[%d] %u = %s", lpc, nodeid, name); pcmk__get_node(nodeid, name, NULL, pcmk__node_search_cluster_member); } if (nodeid > 0 && name != NULL) { any = true; if (xml_parent) { xmlNode *node = pcmk__xe_create(xml_parent, PCMK_XE_NODE); pcmk__xe_set_ll(node, PCMK_XA_ID, (long long) nodeid); pcmk__xe_set(node, PCMK_XA_UNAME, name); } } free(name); } bail: cmap_finalize(cmap_handle); return any; } /*! * \internal * \brief Get cluster name from Corosync configuration (via CMAP) * * \return Newly allocated string with cluster name if configured, or NULL */ char * pcmk__corosync_cluster_name(void) { cmap_handle_t handle; char *cluster_name = NULL; cs_error_t rc = CS_OK; int fd = -1; uid_t found_uid = 0; gid_t found_gid = 0; pid_t found_pid = 0; int rv; rc = pcmk__init_cmap(&handle); if (rc != CS_OK) { crm_info("Failed to initialize the cmap API: %s (%d)", cs_strerror(rc), rc); return NULL; } rc = cmap_fd_get(handle, &fd); if (rc != CS_OK) { crm_err("Could not obtain the CMAP API connection: %s (%d)", cs_strerror(rc), rc); goto bail; } /* CMAP provider run as root (in given user namespace, anyway)? */ if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid, &found_uid, &found_gid))) { crm_err("CMAP provider is not authentic:" " process %lld (uid: %lld, gid: %lld)", (long long) PCMK__SPECIAL_PID_AS_0(found_pid), (long long) found_uid, (long long) found_gid); goto bail; } else if (rv < 0) { crm_err("Could not verify authenticity of CMAP provider: %s (%d)", strerror(-rv), -rv); goto bail; } rc = cmap_get_string(handle, "totem.cluster_name", &cluster_name); if (rc != CS_OK) { crm_info("Cannot get totem.cluster_name: %s (%d)", cs_strerror(rc), rc); } else { crm_debug("cmap totem.cluster_name = '%s'", cluster_name); } bail: cmap_finalize(handle); return cluster_name; } /*! * \internal * \brief Check (via CMAP) whether Corosync configuration has a node list * * \return true if Corosync has node list, otherwise false */ bool pcmk__corosync_has_nodelist(void) { cs_error_t cs_rc = CS_OK; int retries = 0; cmap_handle_t cmap_handle; cmap_iter_handle_t iter_handle; char key_name[CMAP_KEYNAME_MAXLEN + 1]; int fd = -1; uid_t found_uid = 0; gid_t found_gid = 0; pid_t found_pid = 0; int rc = pcmk_ok; static bool got_result = false; static bool result = false; if (got_result) { return result; } // Connect to CMAP do { cs_rc = pcmk__init_cmap(&cmap_handle); if (cs_rc != CS_OK) { retries++; crm_debug("CMAP connection failed: %s (rc=%d, retrying in %ds)", cs_strerror(cs_rc), cs_rc, retries); sleep(retries); } } while ((retries < 5) && (cs_rc != CS_OK)); if (cs_rc != CS_OK) { crm_warn("Assuming Corosync does not have node list: " "CMAP connection failed (%s) " QB_XS " rc=%d", cs_strerror(cs_rc), cs_rc); return false; } // Get CMAP connection file descriptor cs_rc = cmap_fd_get(cmap_handle, &fd); if (cs_rc != CS_OK) { crm_warn("Assuming Corosync does not have node list: " "CMAP unusable (%s) " QB_XS " rc=%d", cs_strerror(cs_rc), cs_rc); goto bail; } // Check whether CMAP connection is authentic (i.e. provided by root) rc = crm_ipc_is_authentic_process(fd, (uid_t) 0, (gid_t) 0, &found_pid, &found_uid, &found_gid); if (rc == 0) { crm_warn("Assuming Corosync does not have node list: " "CMAP provider is inauthentic " QB_XS " pid=%lld uid=%lld gid=%lld", (long long) PCMK__SPECIAL_PID_AS_0(found_pid), (long long) found_uid, (long long) found_gid); goto bail; } else if (rc < 0) { crm_warn("Assuming Corosync does not have node list: " "Could not verify CMAP authenticity (%s) " QB_XS " rc=%d", pcmk_strerror(rc), rc); goto bail; } // Check whether nodelist section is presetn cs_rc = cmap_iter_init(cmap_handle, "nodelist", &iter_handle); if (cs_rc != CS_OK) { crm_warn("Assuming Corosync does not have node list: " "CMAP not readable (%s) " QB_XS " rc=%d", cs_strerror(cs_rc), cs_rc); goto bail; } cs_rc = cmap_iter_next(cmap_handle, iter_handle, key_name, NULL, NULL); if (cs_rc == CS_OK) { result = true; } cmap_iter_finalize(cmap_handle, iter_handle); got_result = true; crm_debug("Corosync %s node list", (result? "has" : "does not have")); bail: cmap_finalize(cmap_handle); return result; } diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c index c134ef60a9..269c11d032 100644 --- a/lib/cluster/membership.c +++ b/lib/cluster/membership.c @@ -1,1519 +1,1519 @@ /* * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include // PRIu32 #include // bool #include #include #include #include #include #include #include #include #include #include #include #include "crmcluster_private.h" /* The peer cache remembers cluster nodes that have been seen. This is managed * mostly automatically by libcrmcluster, based on cluster membership events. * * Because cluster nodes can have conflicting names or UUIDs, the hash table key * is a uniquely generated ID. * * @TODO Move caches to pcmk_cluster_t */ GHashTable *pcmk__peer_cache = NULL; /* The remote peer cache tracks pacemaker_remote nodes. While the * value has the same type as the peer cache's, it is tracked separately for * three reasons: pacemaker_remote nodes can't have conflicting names or UUIDs, * so the name (which is also the UUID) is used as the hash table key; there * is no equivalent of membership events, so management is not automatic; and * most users of the peer cache need to exclude pacemaker_remote nodes. * * @TODO That said, using a single cache would be more logical and less * error-prone, so it would be a good idea to merge them one day. * * libcrmcluster provides two avenues for populating the cache: * pcmk__cluster_lookup_remote_node() and pcmk__cluster_forget_remote_node() * directly manage it, while refresh_remote_nodes() populates it via the CIB. * * @TODO Move caches to pcmk_cluster_t */ GHashTable *pcmk__remote_peer_cache = NULL; /* * The CIB cluster node cache tracks cluster nodes that have been seen in * the CIB. It is useful mainly when a caller needs to know about a node that * may no longer be in the membership, but doesn't want to add the node to the * main peer cache tables. */ static GHashTable *cluster_node_cib_cache = NULL; static bool autoreap = true; static bool has_quorum = false; // Flag setting and clearing for pcmk__node_status_t:flags #define set_peer_flags(peer, flags_to_set) do { \ (peer)->flags = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \ "Peer", (peer)->name, \ (peer)->flags, (flags_to_set), \ #flags_to_set); \ } while (0) #define clear_peer_flags(peer, flags_to_clear) do { \ (peer)->flags = pcmk__clear_flags_as(__func__, __LINE__, \ LOG_TRACE, \ "Peer", (peer)->name, \ (peer)->flags, (flags_to_clear), \ #flags_to_clear); \ } while (0) static void update_peer_uname(pcmk__node_status_t *node, const char *uname); static pcmk__node_status_t *find_cib_cluster_node(const char *id, const char *uname); /*! * \internal * \brief Check whether the cluster currently has quorum * * \return \c true if the cluster has quorum, or \c false otherwise */ bool pcmk__cluster_has_quorum(void) { return has_quorum; } /*! * \internal * \brief Set whether the cluster currently has quorum * * \param[in] quorate \c true if the cluster has quorum, or \c false otherwise */ void pcmk__cluster_set_quorum(bool quorate) { has_quorum = quorate; } /*! * \internal * \brief Get the number of Pacemaker Remote nodes that have been seen * * \return Number of cached Pacemaker Remote nodes */ unsigned int pcmk__cluster_num_remote_nodes(void) { if (pcmk__remote_peer_cache == NULL) { return 0U; } return g_hash_table_size(pcmk__remote_peer_cache); } /*! * \internal * \brief Get a remote node cache entry, creating it if necessary * * \param[in] node_name Name of remote node * * \return Cache entry for node on success, or \c NULL (and set \c errno) * otherwise * * \note When creating a new entry, this will leave the node state undetermined. * The caller should also call \c pcmk__update_peer_state() if the state * is known. * \note Because this can add and remove cache entries, callers should not * assume any previously obtained cache entry pointers remain valid. */ pcmk__node_status_t * pcmk__cluster_lookup_remote_node(const char *node_name) { pcmk__node_status_t *node = NULL; char *node_name_copy = NULL; if (node_name == NULL) { errno = EINVAL; return NULL; } /* It's theoretically possible that the node was added to the cluster peer * cache before it was known to be a Pacemaker Remote node. Remove that * entry unless it has an XML ID, which means the name actually is * associated with a cluster node. (@TODO return an error in that case?) */ node = pcmk__search_node_caches(0, node_name, NULL, pcmk__node_search_cluster_member); if ((node != NULL) && ((node->xml_id == NULL) /* This assumes only Pacemaker Remote nodes have their XML ID the * same as their node name */ || pcmk__str_eq(node->name, node->xml_id, pcmk__str_none))) { /* node_name could be a pointer into the cache entry being removed, so * reassign it to a copy before the original gets freed */ node_name_copy = strdup(node_name); if (node_name_copy == NULL) { errno = ENOMEM; return NULL; } node_name = node_name_copy; pcmk__cluster_forget_cluster_node(0, node_name); } /* Return existing cache entry if one exists */ node = g_hash_table_lookup(pcmk__remote_peer_cache, node_name); if (node) { free(node_name_copy); return node; } /* Allocate a new entry */ node = calloc(1, sizeof(pcmk__node_status_t)); if (node == NULL) { free(node_name_copy); return NULL; } /* Populate the essential information */ set_peer_flags(node, pcmk__node_status_remote); node->xml_id = strdup(node_name); if (node->xml_id == NULL) { free(node); errno = ENOMEM; free(node_name_copy); return NULL; } /* Add the new entry to the cache */ g_hash_table_replace(pcmk__remote_peer_cache, node->xml_id, node); crm_trace("added %s to remote cache", node_name); /* Update the entry's uname, ensuring peer status callbacks are called */ update_peer_uname(node, node_name); free(node_name_copy); return node; } /*! * \internal * \brief Remove a node from the Pacemaker Remote node cache * * \param[in] node_name Name of node to remove from cache * * \note The caller must be careful not to use \p node_name after calling this * function if it might be a pointer into the cache entry being removed. */ void pcmk__cluster_forget_remote_node(const char *node_name) { /* Do a lookup first, because node_name could be a pointer within the entry * being removed -- we can't log it *after* removing it. */ if (g_hash_table_lookup(pcmk__remote_peer_cache, node_name) != NULL) { crm_trace("Removing %s from Pacemaker Remote node cache", node_name); g_hash_table_remove(pcmk__remote_peer_cache, node_name); } } /*! * \internal * \brief Return node status based on a CIB status entry * * \param[in] node_state XML of node state * * \return \c PCMK_VALUE_MEMBER if \c PCMK__XA_IN_CCM is true in * \c PCMK__XE_NODE_STATE, or \c PCMK__VALUE_LOST otherwise */ static const char * remote_state_from_cib(const xmlNode *node_state) { bool in_ccm = false; if ((pcmk__xe_get_bool_attr(node_state, PCMK__XA_IN_CCM, &in_ccm) == pcmk_rc_ok) && in_ccm) { return PCMK_VALUE_MEMBER; } return PCMK__VALUE_LOST; } /* user data for looping through remote node xpath searches */ struct refresh_data { const char *field; /* XML attribute to check for node name */ gboolean has_state; /* whether to update node state based on XML */ }; /*! * \internal * \brief Process one pacemaker_remote node xpath search result * * \param[in] result XML search result * \param[in] user_data what to look for in the XML */ static void remote_cache_refresh_helper(xmlNode *result, void *user_data) { const struct refresh_data *data = user_data; const char *remote = pcmk__xe_get(result, data->field); const char *state = NULL; pcmk__node_status_t *node; CRM_CHECK(remote != NULL, return); /* Determine node's state, if the result has it */ if (data->has_state) { state = remote_state_from_cib(result); } /* Check whether cache already has entry for node */ node = g_hash_table_lookup(pcmk__remote_peer_cache, remote); if (node == NULL) { /* Node is not in cache, so add a new entry for it */ node = pcmk__cluster_lookup_remote_node(remote); pcmk__assert(node != NULL); if (state) { pcmk__update_peer_state(__func__, node, state, 0); } } else if (pcmk__is_set(node->flags, pcmk__node_status_dirty)) { /* Node is in cache and hasn't been updated already, so mark it clean */ clear_peer_flags(node, pcmk__node_status_dirty); if (state) { pcmk__update_peer_state(__func__, node, state, 0); } } } static void mark_dirty(gpointer key, gpointer value, gpointer user_data) { set_peer_flags((pcmk__node_status_t *) value, pcmk__node_status_dirty); } static gboolean is_dirty(gpointer key, gpointer value, gpointer user_data) { const pcmk__node_status_t *node = value; return pcmk__is_set(node->flags, pcmk__node_status_dirty); } /*! * \internal * \brief Repopulate the remote node cache based on CIB XML * * \param[in] cib CIB XML to parse */ static void refresh_remote_nodes(xmlNode *cib) { struct refresh_data data; pcmk__cluster_init_node_caches(); /* First, we mark all existing cache entries as dirty, * so that later we can remove any that weren't in the CIB. * We don't empty the cache, because we need to detect changes in state. */ g_hash_table_foreach(pcmk__remote_peer_cache, mark_dirty, NULL); /* Look for guest nodes and remote nodes in the status section */ data.field = PCMK_XA_ID; data.has_state = TRUE; pcmk__xpath_foreach_result(cib->doc, PCMK__XP_REMOTE_NODE_STATUS, remote_cache_refresh_helper, &data); /* Look for guest nodes and remote nodes in the configuration section, * because they may have just been added and not have a status entry yet. * In that case, the cached node state will be left NULL, so that the * peer status callback isn't called until we're sure the node started * successfully. */ data.field = PCMK_XA_VALUE; data.has_state = FALSE; pcmk__xpath_foreach_result(cib->doc, PCMK__XP_GUEST_NODE_CONFIG, remote_cache_refresh_helper, &data); data.field = PCMK_XA_ID; data.has_state = FALSE; pcmk__xpath_foreach_result(cib->doc, PCMK__XP_REMOTE_NODE_CONFIG, remote_cache_refresh_helper, &data); /* Remove all old cache entries that weren't seen in the CIB */ g_hash_table_foreach_remove(pcmk__remote_peer_cache, is_dirty, NULL); } /*! * \internal * \brief Check whether a node is an active cluster node * * Remote nodes are never considered active. This guarantees that they can never * become DC. * * \param[in] node Node to check * * \return \c true if the node is an active cluster node, or \c false otherwise */ bool pcmk__cluster_is_node_active(const pcmk__node_status_t *node) { const enum pcmk_cluster_layer cluster_layer = pcmk_get_cluster_layer(); if ((node == NULL) || pcmk__is_set(node->flags, pcmk__node_status_remote)) { return false; } switch (cluster_layer) { case pcmk_cluster_layer_corosync: #if SUPPORT_COROSYNC return pcmk__corosync_is_peer_active(node); #else break; #endif // SUPPORT_COROSYNC default: break; } crm_err("Unhandled cluster layer: %s", pcmk_cluster_layer_text(cluster_layer)); return false; } /*! * \internal * \brief Check if a node's entry should be removed from the cluster node cache * * A node should be removed from the cache if it's inactive and matches another * \c pcmk__node_status_t (the search object). The node is considered a * mismatch if any of the following are true: * * The search object is \c NULL. * * The search object has an ID set and the cached node's ID does not match it. * * The search object does not have an ID set, and the cached node's name does * not match the search node's name. (If both names are \c NULL, it's a * match.) * * Otherwise, the node is considered a match. * * Note that if the search object has both an ID and a name set, the name is * ignored for matching purposes. * * \param[in] key Ignored * \param[in] value \c pcmk__node_status_t object from cluster node cache * \param[in] user_data \c pcmk__node_status_t object to match against (search * object) * * \return \c TRUE if the node entry should be removed from \c pcmk__peer_cache, * or \c FALSE otherwise */ static gboolean should_forget_cluster_node(gpointer key, gpointer value, gpointer user_data) { pcmk__node_status_t *node = value; pcmk__node_status_t *search = user_data; if (search == NULL) { return FALSE; } if ((search->cluster_layer_id != 0) && (node->cluster_layer_id != search->cluster_layer_id)) { return FALSE; } if ((search->cluster_layer_id == 0) && !pcmk__str_eq(node->name, search->name, pcmk__str_casei)) { // @TODO Consider name even if ID is set? return FALSE; } if (pcmk__cluster_is_node_active(value)) { return FALSE; } crm_info("Removing node with name %s and cluster layer ID %" PRIu32 " from membership cache", pcmk__s(node->name, "(unknown)"), node->cluster_layer_id); return TRUE; } /*! * \internal * \brief Remove one or more inactive nodes from the cluster node cache * * All inactive nodes matching \p id and \p node_name as described in * \c should_forget_cluster_node documentation are removed from the cache. * * If \p id is 0 and \p node_name is \c NULL, all inactive nodes are removed * from the cache regardless of ID and name. This differs from clearing the * cache, in that entries for active nodes are preserved. * * \param[in] id ID of node to remove from cache (0 to ignore) * \param[in] node_name Name of node to remove from cache (ignored if \p id is * nonzero) * * \note \p node_name is not modified directly, but it will be freed if it's a * pointer into a cache entry that is removed. */ void pcmk__cluster_forget_cluster_node(uint32_t id, const char *node_name) { pcmk__node_status_t search = { 0, }; char *criterion = NULL; // For logging guint matches = 0; if (pcmk__peer_cache == NULL) { crm_trace("Membership cache not initialized, ignoring removal request"); return; } search.cluster_layer_id = id; search.name = pcmk__str_copy(node_name); // May log after original freed if (id > 0) { criterion = pcmk__assert_asprintf("cluster layer ID %" PRIu32, id); } else if (node_name != NULL) { criterion = pcmk__assert_asprintf("name %s", node_name); } matches = g_hash_table_foreach_remove(pcmk__peer_cache, should_forget_cluster_node, &search); if (matches > 0) { if (criterion != NULL) { crm_notice("Removed %u inactive node%s with %s from the membership " "cache", matches, pcmk__plural_s(matches), criterion); } else { crm_notice("Removed all (%u) inactive cluster nodes from the " "membership cache", matches); } } else { crm_info("No inactive cluster nodes%s%s to remove from the membership " "cache", ((criterion != NULL)? " with " : ""), pcmk__s(criterion, "")); } free(search.name); free(criterion); } static void count_peer(gpointer key, gpointer value, gpointer user_data) { unsigned int *count = user_data; pcmk__node_status_t *node = value; if (pcmk__cluster_is_node_active(node)) { *count = *count + 1; } } /*! * \internal * \brief Get the number of active cluster nodes that have been seen * * Remote nodes are never considered active. This guarantees that they can never * become DC. * * \return Number of active nodes in the cluster node cache */ unsigned int pcmk__cluster_num_active_nodes(void) { unsigned int count = 0; if (pcmk__peer_cache != NULL) { g_hash_table_foreach(pcmk__peer_cache, count_peer, &count); } return count; } static void destroy_crm_node(gpointer data) { pcmk__node_status_t *node = data; crm_trace("Destroying entry for node %" PRIu32 ": %s", node->cluster_layer_id, node->name); free(node->name); free(node->state); free(node->xml_id); free(node->user_data); free(node->expected); free(node->conn_host); free(node); } /*! * \internal * \brief Initialize node caches */ void pcmk__cluster_init_node_caches(void) { if (pcmk__peer_cache == NULL) { pcmk__peer_cache = pcmk__strikey_table(free, destroy_crm_node); } if (pcmk__remote_peer_cache == NULL) { pcmk__remote_peer_cache = pcmk__strikey_table(NULL, destroy_crm_node); } if (cluster_node_cib_cache == NULL) { cluster_node_cib_cache = pcmk__strikey_table(free, destroy_crm_node); } } /*! * \internal * \brief Initialize node caches */ void pcmk__cluster_destroy_node_caches(void) { if (pcmk__peer_cache != NULL) { crm_trace("Destroying peer cache with %d members", g_hash_table_size(pcmk__peer_cache)); g_hash_table_destroy(pcmk__peer_cache); pcmk__peer_cache = NULL; } if (pcmk__remote_peer_cache != NULL) { crm_trace("Destroying remote peer cache with %d members", pcmk__cluster_num_remote_nodes()); g_hash_table_destroy(pcmk__remote_peer_cache); pcmk__remote_peer_cache = NULL; } if (cluster_node_cib_cache != NULL) { crm_trace("Destroying configured cluster node cache with %d members", g_hash_table_size(cluster_node_cib_cache)); g_hash_table_destroy(cluster_node_cib_cache); cluster_node_cib_cache = NULL; } } static void (*peer_status_callback)(enum pcmk__node_update, pcmk__node_status_t *, const void *) = NULL; /*! * \internal * \brief Set a client function that will be called after peer status changes * * \param[in] dispatch Pointer to function to use as callback * * \note Client callbacks should do only client-specific handling. Callbacks * must not add or remove entries in the peer caches. */ void pcmk__cluster_set_status_callback(void (*dispatch)(enum pcmk__node_update, pcmk__node_status_t *, const void *)) { // @TODO Improve documentation of peer_status_callback peer_status_callback = dispatch; } /*! * \internal * \brief Tell the library whether to automatically reap lost nodes * * If \c true (the default), calling \c crm_update_peer_proc() will also update * the peer state to \c PCMK_VALUE_MEMBER or \c PCMK__VALUE_LOST, and updating * the peer state will reap peers whose state changes to anything other than * \c PCMK_VALUE_MEMBER. * * Callers should leave this enabled unless they plan to manage the cache * separately on their own. * * \param[in] enable \c true to enable automatic reaping, \c false to disable */ void pcmk__cluster_set_autoreap(bool enable) { autoreap = enable; } static void dump_peer_hash(int level, const char *caller) { GHashTableIter iter; const char *id = NULL; pcmk__node_status_t *node = NULL; g_hash_table_iter_init(&iter, pcmk__peer_cache); while (g_hash_table_iter_next(&iter, (gpointer *) &id, (gpointer *) &node)) { do_crm_log(level, "%s: Node %" PRIu32 "/%s = %p - %s", caller, node->cluster_layer_id, node->name, node, id); } } static gboolean hash_find_by_data(gpointer key, gpointer value, gpointer user_data) { return value == user_data; } /*! * \internal * \brief Search cluster member node cache * * \param[in] id If not 0, cluster node ID to search for * \param[in] uname If not NULL, node name to search for * \param[in] uuid If not NULL while id is 0, node UUID instead of cluster * node ID to search for * * \return Cluster node cache entry if found, otherwise NULL */ static pcmk__node_status_t * search_cluster_member_cache(unsigned int id, const char *uname, const char *uuid) { GHashTableIter iter; pcmk__node_status_t *node = NULL; pcmk__node_status_t *by_id = NULL; pcmk__node_status_t *by_name = NULL; pcmk__assert((id > 0) || (uname != NULL)); pcmk__cluster_init_node_caches(); if (uname != NULL) { g_hash_table_iter_init(&iter, pcmk__peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { if (pcmk__str_eq(node->name, uname, pcmk__str_casei)) { crm_trace("Name match: %s", node->name); by_name = node; break; } } } if (id > 0) { g_hash_table_iter_init(&iter, pcmk__peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { if (node->cluster_layer_id == id) { crm_trace("ID match: %" PRIu32, node->cluster_layer_id); by_id = node; break; } } } else if (uuid != NULL) { g_hash_table_iter_init(&iter, pcmk__peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { const char *this_xml_id = pcmk__cluster_get_xml_id(node); if (pcmk__str_eq(uuid, this_xml_id, pcmk__str_none)) { crm_trace("Found cluster node cache entry by XML ID %s", this_xml_id); by_id = node; break; } } } node = by_id; /* Good default */ if(by_id == by_name) { /* Nothing to do if they match (both NULL counts) */ crm_trace("Consistent: %p for %u/%s", by_id, id, uname); } else if(by_id == NULL && by_name) { crm_trace("Only one: %p for %u/%s", by_name, id, uname); if (id && by_name->cluster_layer_id) { dump_peer_hash(LOG_WARNING, __func__); - crm_crit("Nodes %u and %" PRIu32 " share the same name '%s'", - id, by_name->cluster_layer_id, uname); + pcmk__crit("Nodes %u and %" PRIu32 " share the same name '%s'", + id, by_name->cluster_layer_id, uname); node = NULL; /* Create a new one */ } else { node = by_name; } } else if(by_name == NULL && by_id) { crm_trace("Only one: %p for %u/%s", by_id, id, uname); if ((uname != NULL) && (by_id->name != NULL)) { dump_peer_hash(LOG_WARNING, __func__); - crm_crit("Nodes '%s' and '%s' share the same cluster nodeid %u: " - "assuming '%s' is correct", - uname, by_id->name, id, uname); + pcmk__crit("Nodes '%s' and '%s' share the same cluster nodeid %u: " + "assuming '%s' is correct", + uname, by_id->name, id, uname); } } else if ((uname != NULL) && (by_id->name != NULL)) { if (pcmk__str_eq(uname, by_id->name, pcmk__str_casei)) { crm_notice("Node '%s' has changed its cluster layer ID " "from %" PRIu32 " to %" PRIu32, by_id->name, by_name->cluster_layer_id, by_id->cluster_layer_id); g_hash_table_foreach_remove(pcmk__peer_cache, hash_find_by_data, by_name); } else { crm_warn("Nodes '%s' and '%s' share the same cluster nodeid: %u %s", by_id->name, by_name->name, id, uname); dump_peer_hash(LOG_INFO, __func__); crm_abort(__FILE__, __func__, __LINE__, "member weirdness", TRUE, TRUE); } } else if ((id > 0) && (by_name->cluster_layer_id > 0)) { crm_warn("Nodes %" PRIu32 " and %" PRIu32 " share the same name: '%s'", by_id->cluster_layer_id, by_name->cluster_layer_id, uname); } else { /* Simple merge */ /* Only corosync-based clusters use node IDs. The functions that call * pcmk__update_peer_state() and crm_update_peer_proc() only know * nodeid, so 'by_id' is authoritative when merging. */ dump_peer_hash(LOG_DEBUG, __func__); crm_info("Merging %p into %p", by_name, by_id); g_hash_table_foreach_remove(pcmk__peer_cache, hash_find_by_data, by_name); } return node; } /*! * \internal * \brief Search caches for a node (cluster or Pacemaker Remote) * * \param[in] id If not 0, cluster node ID to search for * \param[in] uname If not NULL, node name to search for * \param[in] xml_id If not NULL, CIB XML ID of node to search for * \param[in] flags Group of enum pcmk__node_search_flags * * \return Node cache entry if found, otherwise NULL */ pcmk__node_status_t * pcmk__search_node_caches(unsigned int id, const char *uname, const char *xml_id, uint32_t flags) { pcmk__node_status_t *node = NULL; pcmk__assert((id > 0) || (uname != NULL) || (xml_id != NULL)); pcmk__cluster_init_node_caches(); if (pcmk__is_set(flags, pcmk__node_search_remote)) { if (uname != NULL) { node = g_hash_table_lookup(pcmk__remote_peer_cache, uname); } else if (xml_id != NULL) { node = g_hash_table_lookup(pcmk__remote_peer_cache, xml_id); } } if ((node == NULL) && pcmk__is_set(flags, pcmk__node_search_cluster_member)) { node = search_cluster_member_cache(id, uname, xml_id); } if ((node == NULL) && pcmk__is_set(flags, pcmk__node_search_cluster_cib)) { if (xml_id != NULL) { node = find_cib_cluster_node(xml_id, uname); } else { // Assumes XML ID is node ID as string (as with Corosync) char *id_str = (id == 0)? NULL : pcmk__assert_asprintf("%u", id); node = find_cib_cluster_node(id_str, uname); free(id_str); } } return node; } /*! * \internal * \brief Purge a node from cache (both cluster and Pacemaker Remote) * * \param[in] node_name If not NULL, purge only nodes with this name * \param[in] node_id If not 0, purge cluster nodes only if they have this ID * * \note If \p node_name is NULL and \p node_id is 0, no nodes will be purged. * If \p node_name is not NULL and \p node_id is not 0, Pacemaker Remote * nodes that match \p node_name will be purged, and cluster nodes that * match both \p node_name and \p node_id will be purged. * \note The caller must be careful not to use \p node_name after calling this * function if it might be a pointer into a cache entry being removed. */ void pcmk__purge_node_from_cache(const char *node_name, uint32_t node_id) { char *node_name_copy = NULL; if ((node_name == NULL) && (node_id == 0U)) { return; } // Purge from Pacemaker Remote node cache if ((node_name != NULL) && (g_hash_table_lookup(pcmk__remote_peer_cache, node_name) != NULL)) { /* node_name could be a pointer into the cache entry being purged, * so reassign it to a copy before the original gets freed */ node_name_copy = pcmk__str_copy(node_name); node_name = node_name_copy; crm_trace("Purging %s from Pacemaker Remote node cache", node_name); g_hash_table_remove(pcmk__remote_peer_cache, node_name); } pcmk__cluster_forget_cluster_node(node_id, node_name); free(node_name_copy); } #if SUPPORT_COROSYNC static guint remove_conflicting_peer(pcmk__node_status_t *node) { int matches = 0; GHashTableIter iter; pcmk__node_status_t *existing_node = NULL; if ((node->cluster_layer_id == 0) || (node->name == NULL)) { return 0; } if (!pcmk__corosync_has_nodelist()) { return 0; } g_hash_table_iter_init(&iter, pcmk__peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &existing_node)) { if ((existing_node->cluster_layer_id > 0) && (existing_node->cluster_layer_id != node->cluster_layer_id) && pcmk__str_eq(existing_node->name, node->name, pcmk__str_casei)) { if (pcmk__cluster_is_node_active(existing_node)) { continue; } crm_warn("Removing cached offline node %" PRIu32 "/%s which has " "conflicting name with %" PRIu32, existing_node->cluster_layer_id, existing_node->name, node->cluster_layer_id); g_hash_table_iter_remove(&iter); matches++; } } return matches; } #endif /*! * \internal * \brief Get a cluster node cache entry, possibly creating one if not found * * If \c pcmk__node_search_cluster_member is set in \p flags, the return value * is guaranteed not to be \c NULL. A new cache entry is created if one does not * already exist. * * \param[in] id If not 0, cluster node ID to search for * \param[in] uname If not NULL, node name to search for * \param[in] xml_id If not NULL while \p id is 0, search for this CIB XML ID * instead of a cluster ID * \param[in] flags Group of enum pcmk__node_search_flags * * \return (Possibly newly created) cluster node cache entry */ /* coverity[-alloc] Memory is referenced in one or both hashtables */ pcmk__node_status_t * pcmk__get_node(unsigned int id, const char *uname, const char *xml_id, uint32_t flags) { pcmk__node_status_t *node = NULL; char *uname_lookup = NULL; pcmk__assert((id > 0) || (uname != NULL)); pcmk__cluster_init_node_caches(); // Check the Pacemaker Remote node cache first if (pcmk__is_set(flags, pcmk__node_search_remote)) { node = g_hash_table_lookup(pcmk__remote_peer_cache, uname); if (node != NULL) { return node; } } if (!pcmk__is_set(flags, pcmk__node_search_cluster_member)) { return NULL; } node = search_cluster_member_cache(id, uname, xml_id); /* if uname wasn't provided, and find_peer did not turn up a uname based on id. * we need to do a lookup of the node name using the id in the cluster membership. */ if ((uname == NULL) && ((node == NULL) || (node->name == NULL))) { uname_lookup = pcmk__cluster_node_name(id); } if (uname_lookup) { uname = uname_lookup; crm_trace("Inferred a name of '%s' for node %u", uname, id); /* try to turn up the node one more time now that we know the uname. */ if (node == NULL) { node = search_cluster_member_cache(id, uname, xml_id); } } if (node == NULL) { char *uniqueid = pcmk__generate_uuid(); node = pcmk__assert_alloc(1, sizeof(pcmk__node_status_t)); crm_info("Created entry %s/%p for node %s/%u (%d total)", uniqueid, node, uname, id, 1 + g_hash_table_size(pcmk__peer_cache)); g_hash_table_replace(pcmk__peer_cache, uniqueid, node); } if ((id > 0) && (uname != NULL) && ((node->cluster_layer_id == 0) || (node->name == NULL))) { crm_info("Node %u is now known as %s", id, uname); } if ((id > 0) && (node->cluster_layer_id == 0)) { node->cluster_layer_id = id; } if ((uname != NULL) && (node->name == NULL)) { update_peer_uname(node, uname); } if ((xml_id == NULL) && (node->xml_id == NULL)) { xml_id = pcmk__cluster_get_xml_id(node); if (xml_id == NULL) { crm_debug("Cannot obtain an XML ID for node %s[%u] at this time", node->name, id); } else { crm_info("Node %s[%u] has XML ID %s", node->name, id, xml_id); } } free(uname_lookup); return node; } /*! * \internal * \brief Update a node's uname * * \param[in,out] node Node object to update * \param[in] uname New name to set * * \note This function should not be called within a peer cache iteration, * because in some cases it can remove conflicting cache entries, * which would invalidate the iterator. */ static void update_peer_uname(pcmk__node_status_t *node, const char *uname) { CRM_CHECK(uname != NULL, crm_err("Bug: can't update node name without name"); return); CRM_CHECK(node != NULL, crm_err("Bug: can't update node name to %s without node", uname); return); if (pcmk__str_eq(uname, node->name, pcmk__str_casei)) { crm_debug("Node name '%s' did not change", uname); return; } for (const char *c = uname; *c; ++c) { if ((*c >= 'A') && (*c <= 'Z')) { crm_warn("Node names with capitals are discouraged, consider changing '%s'", uname); break; } } pcmk__str_update(&node->name, uname); if (peer_status_callback != NULL) { peer_status_callback(pcmk__node_update_name, node, NULL); } #if SUPPORT_COROSYNC if ((pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync) && !pcmk__is_set(node->flags, pcmk__node_status_remote)) { remove_conflicting_peer(node); } #endif } /*! * \internal * \brief Get log-friendly string equivalent of a process flag * * \param[in] proc Process flag * * \return Log-friendly string equivalent of \p proc */ static inline const char * proc2text(enum crm_proc_flag proc) { const char *text = "unknown"; switch (proc) { case crm_proc_none: text = "none"; break; case crm_proc_cpg: text = "corosync-cpg"; break; } return text; } /*! * \internal * \brief Update a node's process information (and potentially state) * * \param[in] source Caller's function name (for log messages) * \param[in,out] node Node object to update * \param[in] flag Bitmask of new process information * \param[in] status node status (online, offline, etc.) * * \return NULL if any node was reaped from peer caches, value of node otherwise * * \note If this function returns NULL, the supplied node object was likely * freed and should not be used again. This function should not be * called within a cache iteration if reaping is possible, otherwise * reaping could invalidate the iterator. */ pcmk__node_status_t * crm_update_peer_proc(const char *source, pcmk__node_status_t *node, uint32_t flag, const char *status) { uint32_t last = 0; gboolean changed = FALSE; CRM_CHECK(node != NULL, crm_err("%s: Could not set %s to %s for NULL", source, proc2text(flag), status); return NULL); /* Pacemaker doesn't spawn processes on remote nodes */ if (pcmk__is_set(node->flags, pcmk__node_status_remote)) { return node; } last = node->processes; if (status == NULL) { node->processes = flag; if (node->processes != last) { changed = TRUE; } } else if (pcmk__str_eq(status, PCMK_VALUE_ONLINE, pcmk__str_casei)) { if ((node->processes & flag) != flag) { node->processes = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, "Peer process", node->name, node->processes, flag, "processes"); changed = TRUE; } } else if (node->processes & flag) { node->processes = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, "Peer process", node->name, node->processes, flag, "processes"); changed = TRUE; } if (changed) { if (status == NULL && flag <= crm_proc_none) { crm_info("%s: Node %s[%" PRIu32 "] - all processes are now offline", source, node->name, node->cluster_layer_id); } else { crm_info("%s: Node %s[%" PRIu32 "] - %s is now %s", source, node->name, node->cluster_layer_id, proc2text(flag), status); } if (pcmk__is_set(node->processes, crm_get_cluster_proc())) { node->when_online = time(NULL); } else { node->when_online = 0; } /* Call the client callback first, then update the peer state, * in case the node will be reaped */ if (peer_status_callback != NULL) { peer_status_callback(pcmk__node_update_processes, node, &last); } /* The client callback shouldn't touch the peer caches, * but as a safety net, bail if the peer cache was destroyed. */ if (pcmk__peer_cache == NULL) { return NULL; } if (autoreap) { const char *peer_state = NULL; if (pcmk__is_set(node->processes, crm_get_cluster_proc())) { peer_state = PCMK_VALUE_MEMBER; } else { peer_state = PCMK__VALUE_LOST; } node = pcmk__update_peer_state(__func__, node, peer_state, 0); } } else { crm_trace("%s: Node %s[%" PRIu32 "] - %s is unchanged (%s)", source, node->name, node->cluster_layer_id, proc2text(flag), status); } return node; } /*! * \internal * \brief Update a cluster node cache entry's expected join state * * \param[in] source Caller's function name (for logging) * \param[in,out] node Node to update * \param[in] expected Node's new join state */ void pcmk__update_peer_expected(const char *source, pcmk__node_status_t *node, const char *expected) { char *last = NULL; gboolean changed = FALSE; CRM_CHECK(node != NULL, crm_err("%s: Could not set 'expected' to %s", source, expected); return); /* Remote nodes don't participate in joins */ if (pcmk__is_set(node->flags, pcmk__node_status_remote)) { return; } last = node->expected; if (expected != NULL && !pcmk__str_eq(node->expected, expected, pcmk__str_casei)) { node->expected = strdup(expected); changed = TRUE; } if (changed) { crm_info("%s: Node %s[%" PRIu32 "] - expected state is now %s (was %s)", source, node->name, node->cluster_layer_id, expected, last); free(last); } else { crm_trace("%s: Node %s[%" PRIu32 "] - expected state is unchanged (%s)", source, node->name, node->cluster_layer_id, expected); } } /*! * \internal * \brief Update a node's state and membership information * * \param[in] source Caller's function name (for log messages) * \param[in,out] node Node object to update * \param[in] state Node's new state * \param[in] membership Node's new membership ID * \param[in,out] iter If not NULL, pointer to node's peer cache iterator * * \return NULL if any node was reaped, value of node otherwise * * \note If this function returns NULL, the supplied node object was likely * freed and should not be used again. This function may be called from * within a peer cache iteration if the iterator is supplied. */ static pcmk__node_status_t * update_peer_state_iter(const char *source, pcmk__node_status_t *node, const char *state, uint64_t membership, GHashTableIter *iter) { gboolean is_member; CRM_CHECK(node != NULL, crm_err("Could not set state for unknown host to %s " QB_XS " source=%s", state, source); return NULL); is_member = pcmk__str_eq(state, PCMK_VALUE_MEMBER, pcmk__str_none); if (is_member) { node->when_lost = 0; if (membership) { node->membership_id = membership; } } if (state && !pcmk__str_eq(node->state, state, pcmk__str_casei)) { char *last = node->state; if (is_member) { node->when_member = time(NULL); } else { node->when_member = 0; } node->state = strdup(state); crm_notice("Node %s state is now %s " QB_XS " nodeid=%" PRIu32 " previous=%s source=%s", node->name, state, node->cluster_layer_id, pcmk__s(last, "unknown"), source); if (peer_status_callback != NULL) { peer_status_callback(pcmk__node_update_state, node, last); } free(last); if (autoreap && !is_member && !pcmk__is_set(node->flags, pcmk__node_status_remote)) { /* We only autoreap from the peer cache, not the remote peer cache, * because the latter should be managed only by * refresh_remote_nodes(). */ if(iter) { crm_notice("Purged 1 peer with cluster layer ID %" PRIu32 "and/or name=%s from the membership cache", node->cluster_layer_id, node->name); g_hash_table_iter_remove(iter); } else { pcmk__cluster_forget_cluster_node(node->cluster_layer_id, node->name); } node = NULL; } } else { crm_trace("Node %s state is unchanged (%s) " QB_XS " nodeid=%" PRIu32 " source=%s", node->name, state, node->cluster_layer_id, source); } return node; } /*! * \brief Update a node's state and membership information * * \param[in] source Caller's function name (for log messages) * \param[in,out] node Node object to update * \param[in] state Node's new state * \param[in] membership Node's new membership ID * * \return NULL if any node was reaped, value of node otherwise * * \note If this function returns NULL, the supplied node object was likely * freed and should not be used again. This function should not be * called within a cache iteration if reaping is possible, * otherwise reaping could invalidate the iterator. */ pcmk__node_status_t * pcmk__update_peer_state(const char *source, pcmk__node_status_t *node, const char *state, uint64_t membership) { return update_peer_state_iter(source, node, state, membership, NULL); } /*! * \internal * \brief Reap all nodes from cache whose membership information does not match * * \param[in] membership Membership ID of nodes to keep */ void pcmk__reap_unseen_nodes(uint64_t membership) { GHashTableIter iter; pcmk__node_status_t *node = NULL; crm_trace("Reaping unseen nodes..."); g_hash_table_iter_init(&iter, pcmk__peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *)&node)) { if (node->membership_id != membership) { if (node->state) { /* Calling update_peer_state_iter() allows us to remove the node * from pcmk__peer_cache without invalidating our iterator */ update_peer_state_iter(__func__, node, PCMK__VALUE_LOST, membership, &iter); } else { crm_info("State of node %s[%" PRIu32 "] is still unknown", node->name, node->cluster_layer_id); } } } } static pcmk__node_status_t * find_cib_cluster_node(const char *id, const char *uname) { GHashTableIter iter; pcmk__node_status_t *node = NULL; pcmk__node_status_t *by_id = NULL; pcmk__node_status_t *by_name = NULL; if (uname) { g_hash_table_iter_init(&iter, cluster_node_cib_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { if (pcmk__str_eq(node->name, uname, pcmk__str_casei)) { crm_trace("Name match: %s = %p", node->name, node); by_name = node; break; } } } if (id) { g_hash_table_iter_init(&iter, cluster_node_cib_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { if (pcmk__str_eq(id, pcmk__cluster_get_xml_id(node), pcmk__str_none)) { crm_trace("ID match: %s= %p", id, node); by_id = node; break; } } } node = by_id; /* Good default */ if (by_id == by_name) { /* Nothing to do if they match (both NULL counts) */ crm_trace("Consistent: %p for %s/%s", by_id, id, uname); } else if (by_id == NULL && by_name) { crm_trace("Only one: %p for %s/%s", by_name, id, uname); if (id) { node = NULL; } else { node = by_name; } } else if (by_name == NULL && by_id) { crm_trace("Only one: %p for %s/%s", by_id, id, uname); if (uname) { node = NULL; } } else if ((uname != NULL) && (by_id->name != NULL) && pcmk__str_eq(uname, by_id->name, pcmk__str_casei)) { /* Multiple nodes have the same uname in the CIB. * Return by_id. */ } else if ((id != NULL) && (by_name->xml_id != NULL) && pcmk__str_eq(id, by_name->xml_id, pcmk__str_none)) { /* Multiple nodes have the same id in the CIB. * Return by_name. */ node = by_name; } else { node = NULL; } if (node == NULL) { crm_debug("Couldn't find node%s%s%s%s", id? " " : "", id? id : "", uname? " with name " : "", uname? uname : ""); } return node; } static void cluster_node_cib_cache_refresh_helper(xmlNode *xml_node, void *user_data) { const char *id = pcmk__xe_get(xml_node, PCMK_XA_ID); const char *uname = pcmk__xe_get(xml_node, PCMK_XA_UNAME); pcmk__node_status_t * node = NULL; CRM_CHECK(id != NULL && uname !=NULL, return); node = find_cib_cluster_node(id, uname); if (node == NULL) { char *uniqueid = pcmk__generate_uuid(); node = pcmk__assert_alloc(1, sizeof(pcmk__node_status_t)); node->name = pcmk__str_copy(uname); node->xml_id = pcmk__str_copy(id); g_hash_table_replace(cluster_node_cib_cache, uniqueid, node); } else if (pcmk__is_set(node->flags, pcmk__node_status_dirty)) { pcmk__str_update(&node->name, uname); /* Node is in cache and hasn't been updated already, so mark it clean */ clear_peer_flags(node, pcmk__node_status_dirty); } } static void refresh_cluster_node_cib_cache(xmlNode *cib) { pcmk__cluster_init_node_caches(); g_hash_table_foreach(cluster_node_cib_cache, mark_dirty, NULL); pcmk__xpath_foreach_result(cib->doc, PCMK__XP_MEMBER_NODE_CONFIG, cluster_node_cib_cache_refresh_helper, NULL); // Remove all old cache entries that weren't seen in the CIB g_hash_table_foreach_remove(cluster_node_cib_cache, is_dirty, NULL); } void pcmk__refresh_node_caches_from_cib(xmlNode *cib) { refresh_remote_nodes(cib); refresh_cluster_node_cib_cache(cib); } // Deprecated functions kept only for backward API compatibility // LCOV_EXCL_START #include void crm_peer_init(void) { pcmk__cluster_init_node_caches(); } // LCOV_EXCL_STOP // End deprecated API diff --git a/lib/common/ipc_server.c b/lib/common/ipc_server.c index d29c049c92..8146703453 100644 --- a/lib/common/ipc_server.c +++ b/lib/common/ipc_server.c @@ -1,1009 +1,1009 @@ /* * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include "crmcommon_private.h" /* Evict clients whose event queue grows this large (by default) */ #define PCMK_IPC_DEFAULT_QUEUE_MAX 500 static GHashTable *client_connections = NULL; /*! * \internal * \brief Count IPC clients * * \return Number of active IPC client connections */ guint pcmk__ipc_client_count(void) { return client_connections? g_hash_table_size(client_connections) : 0; } /*! * \internal * \brief Execute a function for each active IPC client connection * * \param[in] func Function to call * \param[in,out] user_data Pointer to pass to function * * \note The parameters are the same as for g_hash_table_foreach(). */ void pcmk__foreach_ipc_client(GHFunc func, gpointer user_data) { if ((func != NULL) && (client_connections != NULL)) { g_hash_table_foreach(client_connections, func, user_data); } } pcmk__client_t * pcmk__find_client(const qb_ipcs_connection_t *c) { if (client_connections) { return g_hash_table_lookup(client_connections, c); } crm_trace("No client found for %p", c); return NULL; } pcmk__client_t * pcmk__find_client_by_id(const char *id) { if ((client_connections != NULL) && (id != NULL)) { gpointer key; pcmk__client_t *client = NULL; GHashTableIter iter; g_hash_table_iter_init(&iter, client_connections); while (g_hash_table_iter_next(&iter, &key, (gpointer *) & client)) { if (strcmp(client->id, id) == 0) { return client; } } } crm_trace("No client found with id='%s'", pcmk__s(id, "")); return NULL; } /*! * \internal * \brief Get a client identifier for use in log messages * * \param[in] c Client * * \return Client's name, client's ID, or a string literal, as available * \note This is intended to be used in format strings like "client %s". */ const char * pcmk__client_name(const pcmk__client_t *c) { if (c == NULL) { return "(unspecified)"; } else if (c->name != NULL) { return c->name; } else if (c->id != NULL) { return c->id; } else { return "(unidentified)"; } } void pcmk__client_cleanup(void) { if (client_connections != NULL) { int active = g_hash_table_size(client_connections); if (active > 0) { crm_warn("Exiting with %d active IPC client%s", active, pcmk__plural_s(active)); } g_hash_table_destroy(client_connections); client_connections = NULL; } } void pcmk__drop_all_clients(qb_ipcs_service_t *service) { qb_ipcs_connection_t *c = NULL; if (service == NULL) { return; } c = qb_ipcs_connection_first_get(service); while (c != NULL) { qb_ipcs_connection_t *last = c; c = qb_ipcs_connection_next_get(service, last); /* There really shouldn't be anyone connected at this point */ crm_notice("Disconnecting client %p, pid=%d...", last, pcmk__client_pid(last)); qb_ipcs_disconnect(last); qb_ipcs_connection_unref(last); } } /*! * \internal * \brief Allocate a new pcmk__client_t object based on an IPC connection * * \param[in] c IPC connection (NULL to allocate generic client) * \param[in] key Connection table key (NULL to use sane default) * \param[in] uid_client UID corresponding to c (ignored if c is NULL) * * \return Pointer to new pcmk__client_t (guaranteed not to be \c NULL) */ static pcmk__client_t * client_from_connection(qb_ipcs_connection_t *c, void *key, uid_t uid_client) { pcmk__client_t *client = pcmk__assert_alloc(1, sizeof(pcmk__client_t)); if (c) { client->user = pcmk__uid2username(uid_client); if (client->user == NULL) { client->user = pcmk__str_copy("#unprivileged"); crm_err("Unable to enforce ACLs for user ID %d, assuming unprivileged", uid_client); } client->ipcs = c; pcmk__set_client_flags(client, pcmk__client_ipc); client->pid = pcmk__client_pid(c); if (key == NULL) { key = c; } } client->id = pcmk__generate_uuid(); if (key == NULL) { key = client->id; } if (client_connections == NULL) { crm_trace("Creating IPC client table"); client_connections = g_hash_table_new(g_direct_hash, g_direct_equal); } g_hash_table_insert(client_connections, key, client); return client; } /*! * \brief Allocate a new pcmk__client_t object and generate its ID * * \param[in] key What to use as connections hash table key (NULL to use ID) * * \return Pointer to new pcmk__client_t (asserts on failure) */ pcmk__client_t * pcmk__new_unauth_client(void *key) { return client_from_connection(NULL, key, 0); } pcmk__client_t * pcmk__new_client(qb_ipcs_connection_t *c, uid_t uid_client, gid_t gid_client) { gid_t uid_cluster = 0; gid_t gid_cluster = 0; pcmk__client_t *client = NULL; CRM_CHECK(c != NULL, return NULL); if (pcmk__daemon_user(&uid_cluster, &gid_cluster) != pcmk_rc_ok) { static bool need_log = true; if (need_log) { crm_warn("Could not find user and group IDs for user " CRM_DAEMON_USER); need_log = false; } } if (uid_client != 0) { crm_trace("Giving group %u access to new IPC connection", gid_cluster); /* Passing -1 to chown(2) means don't change */ qb_ipcs_connection_auth_set(c, -1, gid_cluster, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP); } /* TODO: Do our own auth checking, return NULL if unauthorized */ client = client_from_connection(c, NULL, uid_client); if ((uid_client == 0) || (uid_client == uid_cluster)) { /* Remember when a connection came from root or hacluster */ pcmk__set_client_flags(client, pcmk__client_privileged); } crm_debug("New IPC client %s for PID %u with uid %d and gid %d", client->id, client->pid, uid_client, gid_client); return client; } static struct iovec * pcmk__new_ipc_event(void) { return (struct iovec *) pcmk__assert_alloc(2, sizeof(struct iovec)); } /*! * \brief Free an I/O vector created by pcmk__ipc_prepare_iov() * * \param[in,out] event I/O vector to free */ void pcmk_free_ipc_event(struct iovec *event) { if (event != NULL) { free(event[0].iov_base); free(event[1].iov_base); free(event); } } static void free_event(gpointer data) { pcmk_free_ipc_event((struct iovec *) data); } static void add_event(pcmk__client_t *c, struct iovec *iov) { if (c->event_queue == NULL) { c->event_queue = g_queue_new(); } g_queue_push_tail(c->event_queue, iov); } void pcmk__free_client(pcmk__client_t *c) { if (c == NULL) { return; } if (client_connections) { if (c->ipcs) { crm_trace("Destroying %p/%p (%d remaining)", c, c->ipcs, g_hash_table_size(client_connections) - 1); g_hash_table_remove(client_connections, c->ipcs); } else { crm_trace("Destroying remote connection %p (%d remaining)", c, g_hash_table_size(client_connections) - 1); g_hash_table_remove(client_connections, c->id); } } if (c->event_timer) { g_source_remove(c->event_timer); } if (c->event_queue) { crm_debug("Destroying %d events", g_queue_get_length(c->event_queue)); g_queue_free_full(c->event_queue, free_event); } free(c->id); free(c->name); free(c->user); if (c->remote) { if (c->remote->auth_timeout) { g_source_remove(c->remote->auth_timeout); } if (c->remote->tls_session != NULL) { /* @TODO Reduce duplication at callers. Put here everything * necessary to tear down and free tls_session. */ gnutls_deinit(c->remote->tls_session); } free(c->remote->buffer); free(c->remote); } free(c); } /*! * \internal * \brief Raise IPC eviction threshold for a client, if allowed * * \param[in,out] client Client to modify * \param[in] qmax New threshold */ void pcmk__set_client_queue_max(pcmk__client_t *client, const char *qmax) { int rc = pcmk_rc_ok; long long qmax_ll = 0LL; unsigned int orig_value = 0U; CRM_CHECK(client != NULL, return); orig_value = client->queue_max; if (pcmk__is_set(client->flags, pcmk__client_privileged)) { rc = pcmk__scan_ll(qmax, &qmax_ll, 0LL); if (rc == pcmk_rc_ok) { if ((qmax_ll <= 0LL) || (qmax_ll > UINT_MAX)) { rc = ERANGE; } else { client->queue_max = (unsigned int) qmax_ll; } } } else { rc = EACCES; } if (rc != pcmk_rc_ok) { crm_info("Could not set IPC threshold for client %s[%u] to %s: %s", pcmk__client_name(client), client->pid, pcmk__s(qmax, "default"), pcmk_rc_str(rc)); } else if (client->queue_max != orig_value) { crm_debug("IPC threshold for client %s[%u] is now %u (was %u)", pcmk__client_name(client), client->pid, client->queue_max, orig_value); } } int pcmk__client_pid(qb_ipcs_connection_t *c) { struct qb_ipcs_connection_stats stats; stats.client_pid = 0; qb_ipcs_connection_stats_get(c, &stats, 0); return stats.client_pid; } /*! * \internal * \brief Retrieve message XML from data read from client IPC * * \param[in,out] c IPC client connection * \param[in] data Data read from client connection * \param[out] id Where to store message ID from libqb header * \param[out] flags Where to store flags from libqb header * * \return Message XML on success, NULL otherwise */ xmlNode * pcmk__client_data2xml(pcmk__client_t *c, void *data, uint32_t *id, uint32_t *flags) { xmlNode *xml = NULL; char *uncompressed = NULL; char *text = ((char *)data) + sizeof(pcmk__ipc_header_t); pcmk__ipc_header_t *header = data; if (!pcmk__valid_ipc_header(header)) { return NULL; } if (id) { *id = ((struct qb_ipc_response_header *)data)->id; } if (flags) { *flags = header->flags; } if (pcmk__is_set(header->flags, crm_ipc_proxied)) { /* Mark this client as being the endpoint of a proxy connection. * Proxy connections responses are sent on the event channel, to avoid * blocking the controller serving as proxy. */ pcmk__set_client_flags(c, pcmk__client_proxied); } if (header->size_compressed) { int rc = 0; unsigned int size_u = 1 + header->size_uncompressed; uncompressed = pcmk__assert_alloc(1, size_u); crm_trace("Decompressing message data %u bytes into %u bytes", header->size_compressed, size_u); rc = BZ2_bzBuffToBuffDecompress(uncompressed, &size_u, text, header->size_compressed, 1, 0); text = uncompressed; rc = pcmk__bzlib2rc(rc); if (rc != pcmk_rc_ok) { crm_err("Decompression failed: %s " QB_XS " rc=%d", pcmk_rc_str(rc), rc); free(uncompressed); return NULL; } } pcmk__assert(text[header->size_uncompressed - 1] == 0); xml = pcmk__xml_parse(text); crm_log_xml_trace(xml, "[IPC received]"); free(uncompressed); return xml; } static int crm_ipcs_flush_events(pcmk__client_t *c); static gboolean crm_ipcs_flush_events_cb(gpointer data) { pcmk__client_t *c = data; c->event_timer = 0; crm_ipcs_flush_events(c); return FALSE; } /*! * \internal * \brief Add progressive delay before next event queue flush * * \param[in,out] c Client connection to add delay to * \param[in] queue_len Current event queue length */ static inline void delay_next_flush(pcmk__client_t *c, unsigned int queue_len) { /* Delay a maximum of 1.5 seconds */ guint delay = (queue_len < 5)? (1000 + 100 * queue_len) : 1500; c->event_timer = pcmk__create_timer(delay, crm_ipcs_flush_events_cb, c); } /*! * \internal * \brief Send client any messages in its queue * * \param[in,out] c Client to flush * * \return Standard Pacemaker return value */ static int crm_ipcs_flush_events(pcmk__client_t *c) { int rc = pcmk_rc_ok; ssize_t qb_rc = 0; unsigned int sent = 0; unsigned int queue_len = 0; if (c == NULL) { return rc; } else if (c->event_timer) { /* There is already a timer, wait until it goes off */ crm_trace("Timer active for %p - %d", c->ipcs, c->event_timer); return rc; } if (c->event_queue) { queue_len = g_queue_get_length(c->event_queue); } while (sent < 100) { pcmk__ipc_header_t *header = NULL; struct iovec *event = NULL; if (c->event_queue) { // We don't pop unless send is successful event = g_queue_peek_head(c->event_queue); } if (event == NULL) { // Queue is empty break; } qb_rc = qb_ipcs_event_sendv(c->ipcs, event, 2); if (qb_rc < 0) { rc = (int) -qb_rc; break; } event = g_queue_pop_head(c->event_queue); sent++; header = event[0].iov_base; if (header->size_compressed) { crm_trace("Event %" PRId32 " to %p[%u] (%zd compressed bytes) sent", header->qb.id, c->ipcs, c->pid, qb_rc); } else { crm_trace("Event %" PRId32 " to %p[%u] (%zd bytes) sent: %.120s", header->qb.id, c->ipcs, c->pid, qb_rc, (char *) (event[1].iov_base)); } pcmk_free_ipc_event(event); } queue_len -= sent; if (sent > 0 || queue_len) { crm_trace("Sent %u events (%u remaining) for %p[%d]: %s (%zd)", sent, queue_len, c->ipcs, c->pid, pcmk_rc_str(rc), qb_rc); } if (queue_len) { /* Allow clients to briefly fall behind on processing incoming messages, * but drop completely unresponsive clients so the connection doesn't * consume resources indefinitely. */ if (queue_len > QB_MAX(c->queue_max, PCMK_IPC_DEFAULT_QUEUE_MAX)) { if ((c->queue_backlog <= 1) || (queue_len < c->queue_backlog)) { /* Don't evict for a new or shrinking backlog */ crm_warn("Client with process ID %u has a backlog of %u messages " QB_XS " %p", c->pid, queue_len, c->ipcs); } else { crm_err("Evicting client with process ID %u due to backlog of %u messages " QB_XS " %p", c->pid, queue_len, c->ipcs); c->queue_backlog = 0; qb_ipcs_disconnect(c->ipcs); return rc; } } c->queue_backlog = queue_len; delay_next_flush(c, queue_len); } else { /* Event queue is empty, there is no backlog */ c->queue_backlog = 0; } return rc; } /*! * \internal * \brief Create an I/O vector for sending an IPC XML message * * \param[in] request Identifier for libqb response header * \param[in] message XML message to send * \param[in] max_send_size If 0, default IPC buffer size is used * \param[out] result Where to store prepared I/O vector * \param[out] bytes Size of prepared data in bytes * * \return Standard Pacemaker return code */ int pcmk__ipc_prepare_iov(uint32_t request, const xmlNode *message, uint32_t max_send_size, struct iovec **result, ssize_t *bytes) { struct iovec *iov; unsigned int total = 0; GString *buffer = NULL; pcmk__ipc_header_t *header = NULL; int rc = pcmk_rc_ok; if ((message == NULL) || (result == NULL)) { rc = EINVAL; goto done; } header = calloc(1, sizeof(pcmk__ipc_header_t)); if (header == NULL) { rc = ENOMEM; goto done; } buffer = g_string_sized_new(1024); pcmk__xml_string(message, 0, buffer, 0); if (max_send_size == 0) { max_send_size = crm_ipc_default_buffer_size(); } CRM_LOG_ASSERT(max_send_size != 0); *result = NULL; iov = pcmk__new_ipc_event(); iov[0].iov_len = sizeof(pcmk__ipc_header_t); iov[0].iov_base = header; header->version = PCMK__IPC_VERSION; header->size_uncompressed = 1 + buffer->len; total = iov[0].iov_len + header->size_uncompressed; if (total < max_send_size) { iov[1].iov_base = pcmk__str_copy(buffer->str); iov[1].iov_len = header->size_uncompressed; } else { static unsigned int biggest = 0; char *compressed = NULL; unsigned int new_size = 0; if (pcmk__compress(buffer->str, (unsigned int) header->size_uncompressed, (unsigned int) max_send_size, &compressed, &new_size) == pcmk_rc_ok) { pcmk__set_ipc_flags(header->flags, "send data", crm_ipc_compressed); header->size_compressed = new_size; iov[1].iov_len = header->size_compressed; iov[1].iov_base = compressed; biggest = QB_MAX(header->size_compressed, biggest); } else { crm_log_xml_trace(message, "EMSGSIZE"); biggest = QB_MAX(header->size_uncompressed, biggest); crm_err("Could not compress %u-byte message into less than IPC " "limit of %u bytes; set PCMK_ipc_buffer to higher value " "(%u bytes suggested)", header->size_uncompressed, max_send_size, 4 * biggest); free(compressed); pcmk_free_ipc_event(iov); rc = EMSGSIZE; goto done; } } header->qb.size = iov[0].iov_len + iov[1].iov_len; header->qb.id = (int32_t)request; /* Replying to a specific request */ *result = iov; pcmk__assert(header->qb.size > 0); if (bytes != NULL) { *bytes = header->qb.size; } done: if (buffer != NULL) { g_string_free(buffer, TRUE); } return rc; } int pcmk__ipc_send_iov(pcmk__client_t *c, struct iovec *iov, uint32_t flags) { int rc = pcmk_rc_ok; static uint32_t id = 1; pcmk__ipc_header_t *header = iov[0].iov_base; if (c->flags & pcmk__client_proxied) { /* _ALL_ replies to proxied connections need to be sent as events */ if (!pcmk__is_set(flags, crm_ipc_server_event)) { /* The proxied flag lets us know this was originally meant to be a * response, even though we're sending it over the event channel. */ pcmk__set_ipc_flags(flags, "server event", crm_ipc_server_event |crm_ipc_proxied_relay_response); } } pcmk__set_ipc_flags(header->flags, "server event", flags); if (flags & crm_ipc_server_event) { header->qb.id = id++; /* We don't really use it, but doesn't hurt to set one */ if (flags & crm_ipc_server_free) { crm_trace("Sending the original to %p[%d]", c->ipcs, c->pid); add_event(c, iov); } else { struct iovec *iov_copy = pcmk__new_ipc_event(); crm_trace("Sending a copy to %p[%d]", c->ipcs, c->pid); iov_copy[0].iov_len = iov[0].iov_len; iov_copy[0].iov_base = malloc(iov[0].iov_len); memcpy(iov_copy[0].iov_base, iov[0].iov_base, iov[0].iov_len); iov_copy[1].iov_len = iov[1].iov_len; iov_copy[1].iov_base = malloc(iov[1].iov_len); memcpy(iov_copy[1].iov_base, iov[1].iov_base, iov[1].iov_len); add_event(c, iov_copy); } } else { ssize_t qb_rc; CRM_LOG_ASSERT(header->qb.id != 0); /* Replying to a specific request */ qb_rc = qb_ipcs_response_sendv(c->ipcs, iov, 2); if (qb_rc < header->qb.size) { if (qb_rc < 0) { rc = (int) -qb_rc; } crm_notice("Response %" PRId32 " to pid %u failed: %s " QB_XS " bytes=%" PRId32 " rc=%zd ipcs=%p", header->qb.id, c->pid, pcmk_rc_str(rc), header->qb.size, qb_rc, c->ipcs); } else { crm_trace("Response %" PRId32 " sent, %zd bytes to %p[%u]", header->qb.id, qb_rc, c->ipcs, c->pid); } if (flags & crm_ipc_server_free) { pcmk_free_ipc_event(iov); } } if (flags & crm_ipc_server_event) { rc = crm_ipcs_flush_events(c); } else { crm_ipcs_flush_events(c); } if ((rc == EPIPE) || (rc == ENOTCONN)) { crm_trace("Client %p disconnected", c->ipcs); } return rc; } int pcmk__ipc_send_xml(pcmk__client_t *c, uint32_t request, const xmlNode *message, uint32_t flags) { struct iovec *iov = NULL; int rc = pcmk_rc_ok; if (c == NULL) { return EINVAL; } rc = pcmk__ipc_prepare_iov(request, message, crm_ipc_default_buffer_size(), &iov, NULL); if (rc == pcmk_rc_ok) { pcmk__set_ipc_flags(flags, "send data", crm_ipc_server_free); rc = pcmk__ipc_send_iov(c, iov, flags); } else { pcmk_free_ipc_event(iov); crm_notice("IPC message to pid %d failed: %s " QB_XS " rc=%d", c->pid, pcmk_rc_str(rc), rc); } return rc; } /*! * \internal * \brief Create an acknowledgement with a status code to send to a client * * \param[in] function Calling function * \param[in] line Source file line within calling function * \param[in] flags IPC flags to use when sending * \param[in] tag Element name to use for acknowledgement * \param[in] ver IPC protocol version (can be NULL) * \param[in] status Exit status code to add to ack * * \return Newly created XML for ack * * \note The caller is responsible for freeing the return value with * \c pcmk__xml_free(). */ xmlNode * pcmk__ipc_create_ack_as(const char *function, int line, uint32_t flags, const char *tag, const char *ver, crm_exit_t status) { xmlNode *ack = NULL; if (pcmk__is_set(flags, crm_ipc_client_response)) { ack = pcmk__xe_create(NULL, tag); pcmk__xe_set(ack, PCMK_XA_FUNCTION, function); pcmk__xe_set_int(ack, PCMK__XA_LINE, line); pcmk__xe_set_int(ack, PCMK_XA_STATUS, (int) status); pcmk__xe_set(ack, PCMK__XA_IPC_PROTO_VERSION, ver); } return ack; } /*! * \internal * \brief Send an acknowledgement with a status code to a client * * \param[in] function Calling function * \param[in] line Source file line within calling function * \param[in] c Client to send ack to * \param[in] request Request ID being replied to * \param[in] flags IPC flags to use when sending * \param[in] tag Element name to use for acknowledgement * \param[in] ver IPC protocol version (can be NULL) * \param[in] status Status code to send with acknowledgement * * \return Standard Pacemaker return code */ int pcmk__ipc_send_ack_as(const char *function, int line, pcmk__client_t *c, uint32_t request, uint32_t flags, const char *tag, const char *ver, crm_exit_t status) { int rc = pcmk_rc_ok; xmlNode *ack = pcmk__ipc_create_ack_as(function, line, flags, tag, ver, status); if (ack != NULL) { crm_trace("Ack'ing IPC message from client %s as <%s status=%d>", pcmk__client_name(c), tag, status); crm_log_xml_trace(ack, "sent-ack"); c->request_id = 0; rc = pcmk__ipc_send_xml(c, request, ack, flags); pcmk__xml_free(ack); } return rc; } /*! * \internal * \brief Add an IPC server to the main loop for the CIB manager API * * \param[out] ipcs_ro New IPC server for read-only CIB manager API * \param[out] ipcs_rw New IPC server for read/write CIB manager API * \param[out] ipcs_shm New IPC server for shared-memory CIB manager API * \param[in] ro_cb IPC callbacks for read-only API * \param[in] rw_cb IPC callbacks for read/write and shared-memory APIs * * \note This function exits fatally if unable to create the servers. * \note There is no actual difference between the three IPC endpoints other * than their names. */ void pcmk__serve_based_ipc(qb_ipcs_service_t **ipcs_ro, qb_ipcs_service_t **ipcs_rw, qb_ipcs_service_t **ipcs_shm, struct qb_ipcs_service_handlers *ro_cb, struct qb_ipcs_service_handlers *rw_cb) { *ipcs_ro = mainloop_add_ipc_server(PCMK__SERVER_BASED_RO, QB_IPC_NATIVE, ro_cb); *ipcs_rw = mainloop_add_ipc_server(PCMK__SERVER_BASED_RW, QB_IPC_NATIVE, rw_cb); *ipcs_shm = mainloop_add_ipc_server(PCMK__SERVER_BASED_SHM, QB_IPC_SHM, rw_cb); if (*ipcs_ro == NULL || *ipcs_rw == NULL || *ipcs_shm == NULL) { crm_err("Failed to create the CIB manager: exiting and inhibiting respawn"); crm_warn("Verify pacemaker and pacemaker_remote are not both enabled"); crm_exit(CRM_EX_FATAL); } } /*! * \internal * \brief Destroy IPC servers for the CIB manager API * * \param[out] ipcs_ro IPC server for read-only the CIB manager API * \param[out] ipcs_rw IPC server for read/write the CIB manager API * \param[out] ipcs_shm IPC server for shared-memory the CIB manager API * * \note This is a convenience function for calling qb_ipcs_destroy() for each * argument. */ void pcmk__stop_based_ipc(qb_ipcs_service_t *ipcs_ro, qb_ipcs_service_t *ipcs_rw, qb_ipcs_service_t *ipcs_shm) { qb_ipcs_destroy(ipcs_ro); qb_ipcs_destroy(ipcs_rw); qb_ipcs_destroy(ipcs_shm); } /*! * \internal * \brief Add an IPC server to the main loop for the controller API * * \param[in] cb IPC callbacks * * \return Newly created IPC server */ qb_ipcs_service_t * pcmk__serve_controld_ipc(struct qb_ipcs_service_handlers *cb) { return mainloop_add_ipc_server(CRM_SYSTEM_CRMD, QB_IPC_NATIVE, cb); } /*! * \internal * \brief Add an IPC server to the main loop for the attribute manager API * * \param[out] ipcs Where to store newly created IPC server * \param[in] cb IPC callbacks * * \note This function exits fatally if unable to create the servers. */ void pcmk__serve_attrd_ipc(qb_ipcs_service_t **ipcs, struct qb_ipcs_service_handlers *cb) { *ipcs = mainloop_add_ipc_server(PCMK__VALUE_ATTRD, QB_IPC_NATIVE, cb); if (*ipcs == NULL) { - crm_crit("Exiting fatally because unable to serve " PCMK__SERVER_ATTRD - " IPC (verify pacemaker and pacemaker_remote are not both " - "enabled)"); + pcmk__crit("Exiting fatally because unable to serve " PCMK__SERVER_ATTRD + " IPC (verify pacemaker and pacemaker_remote are not both" + " enabled)"); crm_exit(CRM_EX_FATAL); } } /*! * \internal * \brief Add an IPC server to the main loop for the fencer API * * \param[out] ipcs Where to store newly created IPC server * \param[in] cb IPC callbacks * * \note This function exits fatally if unable to create the servers. */ void pcmk__serve_fenced_ipc(qb_ipcs_service_t **ipcs, struct qb_ipcs_service_handlers *cb) { *ipcs = mainloop_add_ipc_server_with_prio("stonith-ng", QB_IPC_NATIVE, cb, QB_LOOP_HIGH); if (*ipcs == NULL) { crm_err("Failed to create fencer: exiting and inhibiting respawn."); crm_warn("Verify pacemaker and pacemaker_remote are not both enabled."); crm_exit(CRM_EX_FATAL); } } /*! * \internal * \brief Add an IPC server to the main loop for the pacemakerd API * * \param[out] ipcs Where to store newly created IPC server * \param[in] cb IPC callbacks * * \note This function exits with CRM_EX_OSERR if unable to create the servers. */ void pcmk__serve_pacemakerd_ipc(qb_ipcs_service_t **ipcs, struct qb_ipcs_service_handlers *cb) { *ipcs = mainloop_add_ipc_server(CRM_SYSTEM_MCP, QB_IPC_NATIVE, cb); if (*ipcs == NULL) { crm_err("Couldn't start pacemakerd IPC server"); crm_warn("Verify pacemaker and pacemaker_remote are not both enabled."); /* sub-daemons are observed by pacemakerd. Thus we exit CRM_EX_FATAL * if we want to prevent pacemakerd from restarting them. * With pacemakerd we leave the exit-code shown to e.g. systemd * to what it was prior to moving the code here from pacemakerd.c */ crm_exit(CRM_EX_OSERR); } } /*! * \internal * \brief Add an IPC server to the main loop for the scheduler API * * \param[in] cb IPC callbacks * * \return Newly created IPC server * \note This function exits fatally if unable to create the servers. */ qb_ipcs_service_t * pcmk__serve_schedulerd_ipc(struct qb_ipcs_service_handlers *cb) { return mainloop_add_ipc_server(CRM_SYSTEM_PENGINE, QB_IPC_NATIVE, cb); } diff --git a/lib/pacemaker/pcmk_acl.c b/lib/pacemaker/pcmk_acl.c index 0036d6fd10..37c031d207 100644 --- a/lib/pacemaker/pcmk_acl.c +++ b/lib/pacemaker/pcmk_acl.c @@ -1,399 +1,399 @@ /* * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include // pcmk_acl_required() #include #include #include #include #define ACL_NS_PREFIX "http://clusterlabs.org/ns/pacemaker/access/" #define ACL_NS_Q_PREFIX "pcmk-access-" #define ACL_NS_Q_WRITABLE (const xmlChar *) ACL_NS_Q_PREFIX "writable" #define ACL_NS_Q_READABLE (const xmlChar *) ACL_NS_Q_PREFIX "readable" #define ACL_NS_Q_DENIED (const xmlChar *) ACL_NS_Q_PREFIX "denied" static const xmlChar *NS_WRITABLE = (const xmlChar *) ACL_NS_PREFIX "writable"; static const xmlChar *NS_READABLE = (const xmlChar *) ACL_NS_PREFIX "readable"; static const xmlChar *NS_DENIED = (const xmlChar *) ACL_NS_PREFIX "denied"; /*! * \brief This function takes a node and marks it with the namespace * given in the ns parameter. * * \param[in,out] i_node * \param[in] ns * \param[in,out] ret * \param[in,out] ns_recycle_writable * \param[in,out] ns_recycle_readable * \param[in,out] ns_recycle_denied */ static void pcmk__acl_mark_node_with_namespace(xmlNode *i_node, const xmlChar *ns, int *ret, xmlNs **ns_recycle_writable, xmlNs **ns_recycle_readable, xmlNs **ns_recycle_denied) { if (ns == NS_WRITABLE) { if (*ns_recycle_writable == NULL) { *ns_recycle_writable = xmlNewNs(xmlDocGetRootElement(i_node->doc), NS_WRITABLE, ACL_NS_Q_WRITABLE); } xmlSetNs(i_node, *ns_recycle_writable); *ret = pcmk_rc_ok; } else if (ns == NS_READABLE) { if (*ns_recycle_readable == NULL) { *ns_recycle_readable = xmlNewNs(xmlDocGetRootElement(i_node->doc), NS_READABLE, ACL_NS_Q_READABLE); } xmlSetNs(i_node, *ns_recycle_readable); *ret = pcmk_rc_ok; } else if (ns == NS_DENIED) { if (*ns_recycle_denied == NULL) { *ns_recycle_denied = xmlNewNs(xmlDocGetRootElement(i_node->doc), NS_DENIED, ACL_NS_Q_DENIED); }; xmlSetNs(i_node, *ns_recycle_denied); *ret = pcmk_rc_ok; } } /*! * \brief Annotate a given XML element or property and its siblings with * XML namespaces to indicate ACL permissions * * \param[in,out] xml_modify XML to annotate * * \return A standard Pacemaker return code * Namely: * - pcmk_rc_ok upon success, * - pcmk_rc_already if ACLs were not applicable, * - pcmk_rc_schema_validation if the validation schema version * is unsupported (see note), or * - EINVAL or ENOMEM as appropriate; * * \note This function is recursive */ static int annotate_with_siblings(xmlNode *xml_modify) { static xmlNs *ns_recycle_writable = NULL, *ns_recycle_readable = NULL, *ns_recycle_denied = NULL; static const xmlDoc *prev_doc = NULL; xmlNode *i_node = NULL; const xmlChar *ns; int ret = EINVAL; // nodes have not been processed yet if (prev_doc == NULL || prev_doc != xml_modify->doc) { prev_doc = xml_modify->doc; ns_recycle_writable = ns_recycle_readable = ns_recycle_denied = NULL; } for (i_node = xml_modify; i_node != NULL; i_node = i_node->next) { switch (i_node->type) { case XML_ELEMENT_NODE: pcmk__xml_doc_set_flags(i_node->doc, pcmk__xf_tracking); if (!pcmk__check_acl(i_node, NULL, pcmk__xf_acl_read)) { ns = NS_DENIED; } else if (!pcmk__check_acl(i_node, NULL, pcmk__xf_acl_write)) { ns = NS_READABLE; } else { ns = NS_WRITABLE; } pcmk__acl_mark_node_with_namespace(i_node, ns, &ret, &ns_recycle_writable, &ns_recycle_readable, &ns_recycle_denied); // @TODO Could replace recursion with iteration to save stack if (i_node->properties != NULL) { /* This is not entirely clear, but relies on the very same * class-hierarchy emulation that libxml2 has firmly baked * in its API/ABI */ ret |= annotate_with_siblings((xmlNodePtr) i_node->properties); } if (i_node->children != NULL) { ret |= annotate_with_siblings(i_node->children); } break; case XML_ATTRIBUTE_NODE: // We can utilize that parent has already been assigned the ns if (!pcmk__check_acl(i_node->parent, (const char *) i_node->name, pcmk__xf_acl_read)) { ns = NS_DENIED; } else if (!pcmk__check_acl(i_node, (const char *) i_node->name, pcmk__xf_acl_write)) { ns = NS_READABLE; } else { ns = NS_WRITABLE; } pcmk__acl_mark_node_with_namespace(i_node, ns, &ret, &ns_recycle_writable, &ns_recycle_readable, &ns_recycle_denied); break; case XML_COMMENT_NODE: // We can utilize that parent has already been assigned the ns if (!pcmk__check_acl(i_node->parent, (const char *) i_node->name, pcmk__xf_acl_read)) { ns = NS_DENIED; } else if (!pcmk__check_acl(i_node->parent, (const char *) i_node->name, pcmk__xf_acl_write)) { ns = NS_READABLE; } else { ns = NS_WRITABLE; } pcmk__acl_mark_node_with_namespace(i_node, ns, &ret, &ns_recycle_writable, &ns_recycle_readable, &ns_recycle_denied); break; default: break; } } return ret; } int pcmk__acl_annotate_permissions(const char *cred, const xmlDoc *cib_doc, xmlDoc **acl_evaled_doc) { int ret; xmlNode *target, *comment; const char *validation; CRM_CHECK(cred != NULL, return EINVAL); CRM_CHECK(cib_doc != NULL, return EINVAL); CRM_CHECK(acl_evaled_doc != NULL, return EINVAL); /* avoid trivial accidental XML injection */ if (strpbrk(cred, "<>&") != NULL) { return EINVAL; } if (!pcmk_acl_required(cred)) { /* nothing to evaluate */ return pcmk_rc_already; } validation = pcmk__xe_get(xmlDocGetRootElement(cib_doc), PCMK_XA_VALIDATE_WITH); if (pcmk__cmp_schemas_by_name(PCMK__COMPAT_ACL_2_MIN_INCL, validation) > 0) { return pcmk_rc_schema_validation; } target = pcmk__xml_copy(NULL, xmlDocGetRootElement((xmlDoc *) cib_doc)); if (target == NULL) { return EINVAL; } pcmk__enable_acl(target, target, cred); ret = annotate_with_siblings(target); if (ret == pcmk_rc_ok) { char *content = pcmk__assert_asprintf("ACLs as evaluated for user %s", cred); comment = pcmk__xc_create(target->doc, content); xmlAddPrevSibling(xmlDocGetRootElement(target->doc), comment); *acl_evaled_doc = target->doc; free(content); } else { pcmk__xml_free(target); } return ret; } int pcmk__acl_evaled_render(xmlDoc *annotated_doc, enum pcmk__acl_render_how how, xmlChar **doc_txt_ptr) { xmlDoc *xslt_doc; xsltStylesheet *xslt; xsltTransformContext *xslt_ctxt; xmlDoc *res; char *sfile; static const char *params_namespace[] = { "accessrendercfg:c-writable", ACL_NS_Q_PREFIX "writable:", "accessrendercfg:c-readable", ACL_NS_Q_PREFIX "readable:", "accessrendercfg:c-denied", ACL_NS_Q_PREFIX "denied:", "accessrendercfg:c-reset", "", "accessrender:extra-spacing", "no", "accessrender:self-reproducing-prefix", ACL_NS_Q_PREFIX, NULL }, *params_useansi[] = { /* start with hard-coded defaults, then adapt per the template ones */ "accessrendercfg:c-writable", "\x1b[32m", "accessrendercfg:c-readable", "\x1b[34m", "accessrendercfg:c-denied", "\x1b[31m", "accessrendercfg:c-reset", "\x1b[0m", "accessrender:extra-spacing", "no", "accessrender:self-reproducing-prefix", ACL_NS_Q_PREFIX, NULL }, *params_noansi[] = { "accessrendercfg:c-writable", "vvv---[ WRITABLE ]---vvv", "accessrendercfg:c-readable", "vvv---[ READABLE ]---vvv", "accessrendercfg:c-denied", "vvv---[ ~DENIED~ ]---vvv", "accessrendercfg:c-reset", "", "accessrender:extra-spacing", "yes", "accessrender:self-reproducing-prefix", "", NULL }; const char **params; int rc = pcmk_rc_ok; xmlParserCtxtPtr parser_ctxt; /* unfortunately, the input (coming from CIB originally) was parsed with blanks ignored, and since the output is a conversion of XML to text format (we would be covered otherwise thanks to implicit pretty-printing), we need to dump the tree to string output first, only to subsequently reparse it -- this time with blanks honoured */ xmlChar *annotated_dump; int dump_size; pcmk__assert(how != pcmk__acl_render_none); // Color is the default render mode for terminals; text is default otherwise if (how == pcmk__acl_render_default) { if (isatty(STDOUT_FILENO)) { how = pcmk__acl_render_color; } else { how = pcmk__acl_render_text; } } xmlDocDumpFormatMemory(annotated_doc, &annotated_dump, &dump_size, 1); /* res does not need private data: it's temporary and used only with libxslt * functions */ res = xmlReadDoc(annotated_dump, "on-the-fly-access-render", NULL, XML_PARSE_NONET); pcmk__assert(res != NULL); xmlFree(annotated_dump); pcmk__xml_free_doc(annotated_doc); annotated_doc = res; sfile = pcmk__xml_artefact_path(pcmk__xml_artefact_ns_base_xslt, "access-render-2"); parser_ctxt = xmlNewParserCtxt(); pcmk__assert(sfile != NULL); pcmk__mem_assert(parser_ctxt); xslt_doc = xmlCtxtReadFile(parser_ctxt, sfile, NULL, XML_PARSE_NONET); xslt = xsltParseStylesheetDoc(xslt_doc); /* acquires xslt_doc! */ if (xslt == NULL) { - crm_crit("Problem in parsing %s", sfile); + pcmk__crit("Problem in parsing %s", sfile); rc = EINVAL; goto done; } xmlFreeParserCtxt(parser_ctxt); xslt_ctxt = xsltNewTransformContext(xslt, annotated_doc); pcmk__mem_assert(xslt_ctxt); switch (how) { case pcmk__acl_render_namespace: params = params_namespace; break; case pcmk__acl_render_text: params = params_noansi; break; default: /* pcmk__acl_render_color is the only remaining option. * The compiler complains about params possibly uninitialized if we * don't use default here. */ params = params_useansi; break; } xsltQuoteUserParams(xslt_ctxt, params); res = xsltApplyStylesheetUser(xslt, annotated_doc, NULL, NULL, NULL, xslt_ctxt); pcmk__xml_free_doc(annotated_doc); annotated_doc = NULL; xsltFreeTransformContext(xslt_ctxt); xslt_ctxt = NULL; if (how == pcmk__acl_render_color && params != params_useansi) { char **param_i = (char **) params; do { free(*param_i); } while (*param_i++ != NULL); free(params); } if (res == NULL) { rc = EINVAL; } else { int doc_txt_len; int temp = xsltSaveResultToString(doc_txt_ptr, &doc_txt_len, res, xslt); pcmk__xml_free_doc(res); if (temp != 0) { rc = EINVAL; } } done: if (xslt != NULL) { xsltFreeStylesheet(xslt); } free(sfile); return rc; } diff --git a/lib/pacemaker/pcmk_graph_producer.c b/lib/pacemaker/pcmk_graph_producer.c index 92135549bc..b8168e29b2 100644 --- a/lib/pacemaker/pcmk_graph_producer.c +++ b/lib/pacemaker/pcmk_graph_producer.c @@ -1,1105 +1,1107 @@ /* * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include // hash2field(), etc. #include #include #include #include "libpacemaker_private.h" // Convenience macros for logging action properties #define action_type_str(flags) \ (pcmk__is_set((flags), pcmk__action_pseudo)? "pseudo-action" : "action") #define action_optional_str(flags) \ (pcmk__is_set((flags), pcmk__action_optional)? "optional" : "required") #define action_runnable_str(flags) \ (pcmk__is_set((flags), pcmk__action_runnable)? "runnable" : "unrunnable") #define action_node_str(a) \ (((a)->node == NULL)? "no node" : (a)->node->priv->name) /*! * \internal * \brief Add an XML node tag for a specified ID * * \param[in] id Node UUID to add * \param[in,out] xml Parent XML tag to add to */ static xmlNode* add_node_to_xml_by_id(const char *id, xmlNode *xml) { xmlNode *node_xml; node_xml = pcmk__xe_create(xml, PCMK_XE_NODE); pcmk__xe_set(node_xml, PCMK_XA_ID, id); return node_xml; } /*! * \internal * \brief Add an XML node tag for a specified node * * \param[in] node Node to add * \param[in,out] xml XML to add node to */ static void add_node_to_xml(const pcmk_node_t *node, void *xml) { add_node_to_xml_by_id(node->priv->id, (xmlNode *) xml); } /*! * \internal * \brief Count (optionally add to XML) nodes needing maintenance state update * * \param[in,out] xml Parent XML tag to add to, if any * \param[in] scheduler Scheduler data * * \return Count of nodes added * \note Only Pacemaker Remote nodes are considered currently */ static int add_maintenance_nodes(xmlNode *xml, const pcmk_scheduler_t *scheduler) { xmlNode *maintenance = NULL; int count = 0; if (xml != NULL) { maintenance = pcmk__xe_create(xml, PCMK__XE_MAINTENANCE); } for (const GList *iter = scheduler->nodes; iter != NULL; iter = iter->next) { const pcmk_node_t *node = iter->data; if (!pcmk__is_pacemaker_remote_node(node)) { continue; } if ((node->details->maintenance && !pcmk__is_set(node->priv->flags, pcmk__node_remote_maint)) || (!node->details->maintenance && pcmk__is_set(node->priv->flags, pcmk__node_remote_maint))) { if (maintenance != NULL) { pcmk__xe_set(add_node_to_xml_by_id(node->priv->id, maintenance), PCMK__XA_NODE_IN_MAINTENANCE, (node->details->maintenance? "1" : "0")); } count++; } } crm_trace("%s %d nodes in need of maintenance mode update in state", ((maintenance == NULL)? "Counted" : "Added"), count); return count; } /*! * \internal * \brief Add pseudo action with nodes needing maintenance state update * * \param[in,out] scheduler Scheduler data */ static void add_maintenance_update(pcmk_scheduler_t *scheduler) { pcmk_action_t *action = NULL; if (add_maintenance_nodes(NULL, scheduler) != 0) { action = get_pseudo_op(PCMK_ACTION_MAINTENANCE_NODES, scheduler); pcmk__set_action_flags(action, pcmk__action_always_in_graph); } } /*! * \internal * \brief Add XML with nodes that an action is expected to bring down * * If a specified action is expected to bring any nodes down, add an XML block * with their UUIDs. When a node is lost, this allows the controller to * determine whether it was expected. * * \param[in,out] xml Parent XML tag to add to * \param[in] action Action to check for downed nodes */ static void add_downed_nodes(xmlNode *xml, const pcmk_action_t *action) { CRM_CHECK((xml != NULL) && (action != NULL) && (action->node != NULL), return); if (pcmk__str_eq(action->task, PCMK_ACTION_DO_SHUTDOWN, pcmk__str_none)) { /* Shutdown makes the action's node down */ xmlNode *downed = pcmk__xe_create(xml, PCMK__XE_DOWNED); add_node_to_xml_by_id(action->node->priv->id, downed); } else if (pcmk__str_eq(action->task, PCMK_ACTION_STONITH, pcmk__str_none)) { /* Fencing makes the action's node and any hosted guest nodes down */ const char *fence = g_hash_table_lookup(action->meta, PCMK__META_STONITH_ACTION); if (pcmk__is_fencing_action(fence)) { xmlNode *downed = pcmk__xe_create(xml, PCMK__XE_DOWNED); add_node_to_xml_by_id(action->node->priv->id, downed); pe_foreach_guest_node(action->node->priv->scheduler, action->node, add_node_to_xml, downed); } } else if ((action->rsc != NULL) && pcmk__is_set(action->rsc->flags, pcmk__rsc_is_remote_connection) && pcmk__str_eq(action->task, PCMK_ACTION_STOP, pcmk__str_none)) { /* Stopping a remote connection resource makes connected node down, * unless it's part of a migration */ GList *iter; pcmk_action_t *input; bool migrating = false; for (iter = action->actions_before; iter != NULL; iter = iter->next) { input = ((pcmk__related_action_t *) iter->data)->action; if ((input->rsc != NULL) && pcmk__str_eq(action->rsc->id, input->rsc->id, pcmk__str_none) && pcmk__str_eq(input->task, PCMK_ACTION_MIGRATE_FROM, pcmk__str_none)) { migrating = true; break; } } if (!migrating) { xmlNode *downed = pcmk__xe_create(xml, PCMK__XE_DOWNED); add_node_to_xml_by_id(action->rsc->id, downed); } } } /*! * \internal * \brief Create a transition graph operation key for a clone action * * \param[in] action Clone action * \param[in] interval_ms Action interval in milliseconds * * \return Newly allocated string with transition graph operation key */ static char * clone_op_key(const pcmk_action_t *action, guint interval_ms) { if (pcmk__str_eq(action->task, PCMK_ACTION_NOTIFY, pcmk__str_none)) { const char *n_type = g_hash_table_lookup(action->meta, "notify_type"); const char *n_task = g_hash_table_lookup(action->meta, "notify_operation"); return pcmk__notify_key(action->rsc->priv->history_id, n_type, n_task); } return pcmk__op_key(action->rsc->priv->history_id, pcmk__s(action->cancel_task, action->task), interval_ms); } /*! * \internal * \brief Add node details to transition graph action XML * * \param[in] action Scheduled action * \param[in,out] xml Transition graph action XML for \p action */ static void add_node_details(const pcmk_action_t *action, xmlNode *xml) { pcmk_node_t *router_node = pcmk__connection_host_for_action(action); pcmk__xe_set(xml, PCMK__META_ON_NODE, action->node->priv->name); pcmk__xe_set(xml, PCMK__META_ON_NODE_UUID, action->node->priv->id); if (router_node != NULL) { pcmk__xe_set(xml, PCMK__XA_ROUTER_NODE, router_node->priv->name); } } /*! * \internal * \brief Add resource details to transition graph action XML * * \param[in] action Scheduled action * \param[in,out] action_xml Transition graph action XML for \p action */ static void add_resource_details(const pcmk_action_t *action, xmlNode *action_xml) { xmlNode *rsc_xml = NULL; const char *attr_list[] = { PCMK_XA_CLASS, PCMK_XA_PROVIDER, PCMK_XA_TYPE, }; /* If a resource is locked to a node via PCMK_OPT_SHUTDOWN_LOCK, mark its * actions so the controller can preserve the lock when the action * completes. */ if (pcmk__action_locks_rsc_to_node(action)) { pcmk__xe_set_time(action_xml, PCMK_OPT_SHUTDOWN_LOCK, action->rsc->priv->lock_time); } // List affected resource rsc_xml = pcmk__xe_create(action_xml, (const char *) action->rsc->priv->xml->name); if (pcmk__is_set(action->rsc->flags, pcmk__rsc_removed) && (action->rsc->priv->history_id != NULL)) { /* Use the numbered instance name here, because if there is more * than one instance on a node, we need to make sure the command * goes to the right one. * * This is important even for anonymous clones, because the clone's * unique meta-attribute might have just been toggled from on to * off. */ crm_debug("Using orphan clone name %s instead of history ID %s", action->rsc->id, action->rsc->priv->history_id); pcmk__xe_set(rsc_xml, PCMK_XA_ID, action->rsc->priv->history_id); pcmk__xe_set(rsc_xml, PCMK__XA_LONG_ID, action->rsc->id); } else if (!pcmk__is_set(action->rsc->flags, pcmk__rsc_unique)) { const char *xml_id = pcmk__xe_id(action->rsc->priv->xml); crm_debug("Using anonymous clone name %s for %s (aka %s)", xml_id, action->rsc->id, action->rsc->priv->history_id); /* ID is what we'd like client to use * LONG_ID is what they might know it as instead * * LONG_ID is only strictly needed /here/ during the * transition period until all nodes in the cluster * are running the new software /and/ have rebooted * once (meaning that they've only ever spoken to a DC * supporting this feature). (@TODO The effect of removing this on * regression tests suggests that it is still needed for unique clones) * * If anyone toggles the unique flag to 'on', the * 'instance free' name will correspond to an orphan * and fall into the clause above instead */ pcmk__xe_set(rsc_xml, PCMK_XA_ID, xml_id); if ((action->rsc->priv->history_id != NULL) && !pcmk__str_eq(xml_id, action->rsc->priv->history_id, pcmk__str_none)) { pcmk__xe_set(rsc_xml, PCMK__XA_LONG_ID, action->rsc->priv->history_id); } else { pcmk__xe_set(rsc_xml, PCMK__XA_LONG_ID, action->rsc->id); } } else { pcmk__assert(action->rsc->priv->history_id == NULL); pcmk__xe_set(rsc_xml, PCMK_XA_ID, action->rsc->id); } for (int lpc = 0; lpc < PCMK__NELEM(attr_list); lpc++) { pcmk__xe_set(rsc_xml, attr_list[lpc], g_hash_table_lookup(action->rsc->priv->meta, attr_list[lpc])); } } /*! * \internal * \brief Add action attributes to transition graph action XML * * \param[in,out] action Scheduled action * \param[in,out] action_xml Transition graph action XML for \p action */ static void add_action_attributes(pcmk_action_t *action, xmlNode *action_xml) { xmlNode *args_xml = NULL; pcmk_resource_t *rsc = action->rsc; /* We create free-standing XML to start, so we can sort the attributes * before adding it to action_xml, which keeps the scheduler regression * test graphs comparable. */ args_xml = pcmk__xe_create(action_xml, PCMK__XE_ATTRIBUTES); pcmk__xe_set(args_xml, PCMK_XA_CRM_FEATURE_SET, CRM_FEATURE_SET); g_hash_table_foreach(action->extra, hash2field, args_xml); if ((rsc != NULL) && (action->node != NULL)) { // Get the resource instance attributes, evaluated properly for node GHashTable *params = pe_rsc_params(rsc, action->node, rsc->priv->scheduler); pcmk__substitute_remote_addr(rsc, params); g_hash_table_foreach(params, hash2smartfield, args_xml); } else if ((rsc != NULL) && (rsc->priv->variant <= pcmk__rsc_variant_primitive)) { GHashTable *params = pe_rsc_params(rsc, NULL, rsc->priv->scheduler); g_hash_table_foreach(params, hash2smartfield, args_xml); } g_hash_table_foreach(action->meta, hash2metafield, args_xml); if (rsc != NULL) { pcmk_resource_t *parent = rsc; while (parent != NULL) { parent->priv->cmds->add_graph_meta(parent, args_xml); parent = parent->priv->parent; } pcmk__add_guest_meta_to_xml(args_xml, action); } pcmk__xe_sort_attrs(args_xml); } /*! * \internal * \brief Create the transition graph XML for a scheduled action * * \param[in,out] parent Parent XML element to add action to * \param[in,out] action Scheduled action * \param[in] skip_details If false, add action details as sub-elements * \param[in] scheduler Scheduler data */ static void create_graph_action(xmlNode *parent, pcmk_action_t *action, bool skip_details, const pcmk_scheduler_t *scheduler) { bool needs_node_info = true; bool needs_maintenance_info = false; xmlNode *action_xml = NULL; if ((action == NULL) || (scheduler == NULL)) { return; } // Create the top-level element based on task if (pcmk__str_eq(action->task, PCMK_ACTION_STONITH, pcmk__str_none)) { /* All fences need node info; guest node fences are pseudo-events */ if (pcmk__is_set(action->flags, pcmk__action_pseudo)) { action_xml = pcmk__xe_create(parent, PCMK__XE_PSEUDO_EVENT); } else { action_xml = pcmk__xe_create(parent, PCMK__XE_CRM_EVENT); } } else if (pcmk__str_any_of(action->task, PCMK_ACTION_DO_SHUTDOWN, PCMK_ACTION_CLEAR_FAILCOUNT, NULL)) { action_xml = pcmk__xe_create(parent, PCMK__XE_CRM_EVENT); } else if (pcmk__str_eq(action->task, PCMK_ACTION_LRM_DELETE, pcmk__str_none)) { // CIB-only clean-up for shutdown locks action_xml = pcmk__xe_create(parent, PCMK__XE_CRM_EVENT); pcmk__xe_set(action_xml, PCMK__XA_MODE, PCMK__VALUE_CIB); } else if (pcmk__is_set(action->flags, pcmk__action_pseudo)) { if (pcmk__str_eq(action->task, PCMK_ACTION_MAINTENANCE_NODES, pcmk__str_none)) { needs_maintenance_info = true; } action_xml = pcmk__xe_create(parent, PCMK__XE_PSEUDO_EVENT); needs_node_info = false; } else { action_xml = pcmk__xe_create(parent, PCMK__XE_RSC_OP); } pcmk__xe_set_int(action_xml, PCMK_XA_ID, action->id); pcmk__xe_set(action_xml, PCMK_XA_OPERATION, action->task); if ((action->rsc != NULL) && (action->rsc->priv->history_id != NULL)) { char *clone_key = NULL; guint interval_ms; if (pcmk__guint_from_hash(action->meta, PCMK_META_INTERVAL, 0, &interval_ms) != pcmk_rc_ok) { interval_ms = 0; } clone_key = clone_op_key(action, interval_ms); pcmk__xe_set(action_xml, PCMK__XA_OPERATION_KEY, clone_key); pcmk__xe_set(action_xml, "internal_" PCMK__XA_OPERATION_KEY, action->uuid); free(clone_key); } else { pcmk__xe_set(action_xml, PCMK__XA_OPERATION_KEY, action->uuid); } if (needs_node_info && (action->node != NULL)) { add_node_details(action, action_xml); pcmk__insert_dup(action->meta, PCMK__META_ON_NODE, action->node->priv->name); pcmk__insert_dup(action->meta, PCMK__META_ON_NODE_UUID, action->node->priv->id); } if (skip_details) { return; } if ((action->rsc != NULL) && !pcmk__is_set(action->flags, pcmk__action_pseudo)) { // This is a real resource action, so add resource details add_resource_details(action, action_xml); } /* List any attributes in effect */ add_action_attributes(action, action_xml); /* List any nodes this action is expected to make down */ if (needs_node_info && (action->node != NULL)) { add_downed_nodes(action_xml, action); } if (needs_maintenance_info) { add_maintenance_nodes(action_xml, scheduler); } } /*! * \internal * \brief Check whether an action should be added to the transition graph * * \param[in,out] action Action to check * * \return true if action should be added to graph, otherwise false */ static bool should_add_action_to_graph(pcmk_action_t *action) { if (!pcmk__is_set(action->flags, pcmk__action_runnable)) { crm_trace("Ignoring action %s (%d): unrunnable", action->uuid, action->id); return false; } if (pcmk__is_set(action->flags, pcmk__action_optional) && !pcmk__is_set(action->flags, pcmk__action_always_in_graph)) { crm_trace("Ignoring action %s (%d): optional", action->uuid, action->id); return false; } /* Actions for unmanaged resources should be excluded from the graph, * with the exception of monitors and cancellation of recurring monitors. */ if ((action->rsc != NULL) && !pcmk__is_set(action->rsc->flags, pcmk__rsc_managed) && !pcmk__str_eq(action->task, PCMK_ACTION_MONITOR, pcmk__str_none)) { const char *interval_ms_s; /* A cancellation of a recurring monitor will get here because the task * is cancel rather than monitor, but the interval can still be used to * recognize it. The interval has been normalized to milliseconds by * this point, so a string comparison is sufficient. */ interval_ms_s = g_hash_table_lookup(action->meta, PCMK_META_INTERVAL); if (pcmk__str_eq(interval_ms_s, "0", pcmk__str_null_matches)) { crm_trace("Ignoring action %s (%d): for unmanaged resource (%s)", action->uuid, action->id, action->rsc->id); return false; } } /* Always add pseudo-actions, fence actions, and shutdown actions (already * determined to be required and runnable by this point) */ if (pcmk__is_set(action->flags, pcmk__action_pseudo) || pcmk__strcase_any_of(action->task, PCMK_ACTION_STONITH, PCMK_ACTION_DO_SHUTDOWN, NULL)) { return true; } if (action->node == NULL) { pcmk__sched_err(action->scheduler, "Skipping action %s (%d) " "because it was not assigned to a node (bug?)", action->uuid, action->id); pcmk__log_action("Unassigned", action, false); return false; } if (pcmk__is_set(action->flags, pcmk__action_on_dc)) { crm_trace("Action %s (%d) should be dumped: " "can run on DC instead of %s", action->uuid, action->id, pcmk__node_name(action->node)); } else if (pcmk__is_guest_or_bundle_node(action->node) && !pcmk__is_set(action->node->priv->flags, pcmk__node_remote_reset)) { crm_trace("Action %s (%d) should be dumped: " "assuming will be runnable on guest %s", action->uuid, action->id, pcmk__node_name(action->node)); } else if (!action->node->details->online) { pcmk__sched_err(action->scheduler, "Skipping action %s (%d) " "because it was scheduled for offline node (bug?)", action->uuid, action->id); pcmk__log_action("Offline node", action, false); return false; } else if (action->node->details->unclean) { pcmk__sched_err(action->scheduler, "Skipping action %s (%d) " "because it was scheduled for unclean node (bug?)", action->uuid, action->id); pcmk__log_action("Unclean node", action, false); return false; } return true; } /*! * \internal * \brief Check whether an ordering's flags can change an action * * \param[in] ordering Ordering to check * * \return true if ordering has flags that can change an action, false otherwise */ static bool ordering_can_change_actions(const pcmk__related_action_t *ordering) { return pcmk__any_flags_set(ordering->flags, ~(pcmk__ar_then_implies_first_graphed |pcmk__ar_first_implies_then_graphed |pcmk__ar_ordered)); } /*! * \internal * \brief Check whether an action input should be in the transition graph * * \param[in] action Action to check * \param[in,out] input Action input to check * * \return true if input should be in graph, false otherwise * \note This function may not only check an input, but disable it under certian * circumstances (load or anti-colocation orderings that are not needed). */ static bool should_add_input_to_graph(const pcmk_action_t *action, pcmk__related_action_t *input) { if (input->graphed) { return true; } if (input->flags == pcmk__ar_none) { crm_trace("Ignoring %s (%d) input %s (%d): " "ordering disabled", action->uuid, action->id, input->action->uuid, input->action->id); return false; } else if (!pcmk__is_set(input->action->flags, pcmk__action_runnable) && !ordering_can_change_actions(input)) { crm_trace("Ignoring %s (%d) input %s (%d): " "optional and input unrunnable", action->uuid, action->id, input->action->uuid, input->action->id); return false; } else if (!pcmk__is_set(input->action->flags, pcmk__action_runnable) && pcmk__is_set(input->flags, pcmk__ar_min_runnable)) { crm_trace("Ignoring %s (%d) input %s (%d): " "minimum number of instances required but input unrunnable", action->uuid, action->id, input->action->uuid, input->action->id); return false; } else if (pcmk__is_set(input->flags, pcmk__ar_unmigratable_then_blocks) && !pcmk__is_set(input->action->flags, pcmk__action_runnable)) { crm_trace("Ignoring %s (%d) input %s (%d): " "input blocked if 'then' unmigratable", action->uuid, action->id, input->action->uuid, input->action->id); return false; } else if (pcmk__is_set(input->flags, pcmk__ar_if_first_unmigratable) && pcmk__is_set(input->action->flags, pcmk__action_migratable)) { crm_trace("Ignoring %s (%d) input %s (%d): ordering applies " "only if input is unmigratable, but it is migratable", action->uuid, action->id, input->action->uuid, input->action->id); return false; } else if ((input->flags == pcmk__ar_ordered) && pcmk__is_set(input->action->flags, pcmk__action_migratable) && pcmk__ends_with(input->action->uuid, "_stop_0")) { crm_trace("Ignoring %s (%d) input %s (%d): " "optional but stop in migration", action->uuid, action->id, input->action->uuid, input->action->id); return false; } else if (input->flags == pcmk__ar_if_on_same_node_or_target) { pcmk_node_t *input_node = input->action->node; if ((action->rsc != NULL) && pcmk__str_eq(action->task, PCMK_ACTION_MIGRATE_TO, pcmk__str_none)) { pcmk_node_t *assigned = action->rsc->priv->assigned_node; /* For load_stopped -> migrate_to orderings, we care about where * the resource has been assigned, not where migrate_to will be * executed. */ if (!pcmk__same_node(input_node, assigned)) { crm_trace("Ignoring %s (%d) input %s (%d): " "migration target %s is not same as input node %s", action->uuid, action->id, input->action->uuid, input->action->id, (assigned? assigned->priv->name : ""), (input_node? input_node->priv->name : "")); input->flags = pcmk__ar_none; return false; } } else if (!pcmk__same_node(input_node, action->node)) { crm_trace("Ignoring %s (%d) input %s (%d): " "not on same node (%s vs %s)", action->uuid, action->id, input->action->uuid, input->action->id, (action->node? action->node->priv->name : ""), (input_node? input_node->priv->name : "")); input->flags = pcmk__ar_none; return false; } else if (pcmk__is_set(input->action->flags, pcmk__action_optional)) { crm_trace("Ignoring %s (%d) input %s (%d): " "ordering optional", action->uuid, action->id, input->action->uuid, input->action->id); input->flags = pcmk__ar_none; return false; } } else if (input->flags == pcmk__ar_if_required_on_same_node) { if (input->action->node && action->node && !pcmk__same_node(input->action->node, action->node)) { crm_trace("Ignoring %s (%d) input %s (%d): " "not on same node (%s vs %s)", action->uuid, action->id, input->action->uuid, input->action->id, pcmk__node_name(action->node), pcmk__node_name(input->action->node)); input->flags = pcmk__ar_none; return false; } else if (pcmk__is_set(input->action->flags, pcmk__action_optional)) { crm_trace("Ignoring %s (%d) input %s (%d): optional", action->uuid, action->id, input->action->uuid, input->action->id); input->flags = pcmk__ar_none; return false; } } else if (input->action->rsc && input->action->rsc != action->rsc && pcmk__is_set(input->action->rsc->flags, pcmk__rsc_failed) && !pcmk__is_set(input->action->rsc->flags, pcmk__rsc_managed) && pcmk__ends_with(input->action->uuid, "_stop_0") && pcmk__is_clone(action->rsc)) { crm_warn("Ignoring requirement that %s complete before %s:" " unmanaged failed resources cannot prevent clone shutdown", input->action->uuid, action->uuid); return false; } else if (pcmk__is_set(input->action->flags, pcmk__action_optional) && !pcmk__any_flags_set(input->action->flags, pcmk__action_always_in_graph |pcmk__action_added_to_graph) && !should_add_action_to_graph(input->action)) { crm_trace("Ignoring %s (%d) input %s (%d): " "input optional", action->uuid, action->id, input->action->uuid, input->action->id); return false; } crm_trace("%s (%d) input %s %s (%d) on %s should be dumped: %s %s %#.6x", action->uuid, action->id, action_type_str(input->action->flags), input->action->uuid, input->action->id, action_node_str(input->action), action_runnable_str(input->action->flags), action_optional_str(input->action->flags), input->flags); return true; } /*! * \internal * \brief Check whether an ordering creates an ordering loop * * \param[in] init_action "First" action in ordering * \param[in] action Callers should always set this the same as * \p init_action (this function may use a different * value for recursive calls) * \param[in,out] input Action wrapper for "then" action in ordering * * \return true if the ordering creates a loop, otherwise false */ bool pcmk__graph_has_loop(const pcmk_action_t *init_action, const pcmk_action_t *action, pcmk__related_action_t *input) { bool has_loop = false; if (pcmk__is_set(input->action->flags, pcmk__action_detect_loop)) { crm_trace("Breaking tracking loop: %s@%s -> %s@%s (%#.6x)", input->action->uuid, input->action->node? input->action->node->priv->name : "", action->uuid, action->node? action->node->priv->name : "", input->flags); return false; } // Don't need to check inputs that won't be used if (!should_add_input_to_graph(action, input)) { return false; } if (input->action == init_action) { crm_debug("Input loop found in %s@%s ->...-> %s@%s", action->uuid, action->node? action->node->priv->name : "", init_action->uuid, init_action->node? init_action->node->priv->name : ""); return true; } pcmk__set_action_flags(input->action, pcmk__action_detect_loop); crm_trace("Checking inputs of action %s@%s input %s@%s (%#.6x)" "for graph loop with %s@%s ", action->uuid, action->node? action->node->priv->name : "", input->action->uuid, input->action->node? input->action->node->priv->name : "", input->flags, init_action->uuid, init_action->node? init_action->node->priv->name : ""); // Recursively check input itself for loops for (GList *iter = input->action->actions_before; iter != NULL; iter = iter->next) { if (pcmk__graph_has_loop(init_action, input->action, (pcmk__related_action_t *) iter->data)) { // Recursive call already logged a debug message has_loop = true; break; } } pcmk__clear_action_flags(input->action, pcmk__action_detect_loop); if (!has_loop) { crm_trace("No input loop found in %s@%s -> %s@%s (%#.6x)", input->action->uuid, input->action->node? input->action->node->priv->name : "", action->uuid, action->node? action->node->priv->name : "", input->flags); } return has_loop; } /*! * \internal * \brief Create a synapse XML element for a transition graph * * \param[in] action Action that synapse is for * \param[in,out] scheduler Scheduler data containing graph * * \return Newly added XML element for new graph synapse */ static xmlNode * create_graph_synapse(const pcmk_action_t *action, pcmk_scheduler_t *scheduler) { int synapse_priority = 0; xmlNode *syn = pcmk__xe_create(scheduler->priv->graph, PCMK__XE_SYNAPSE); pcmk__xe_set_int(syn, PCMK_XA_ID, scheduler->priv->synapse_count++); if (action->rsc != NULL) { synapse_priority = action->rsc->priv->priority; } if (action->priority > synapse_priority) { synapse_priority = action->priority; } if (synapse_priority > 0) { pcmk__xe_set_int(syn, PCMK__XA_PRIORITY, synapse_priority); } return syn; } /*! * \internal * \brief Add an action to the transition graph XML if appropriate * * \param[in,out] data Action to possibly add * \param[in,out] user_data Scheduler data * * \note This will de-duplicate the action inputs, meaning that the * pcmk__related_action_t:type flags can no longer be relied on to retain * their original settings. That means this MUST be called after * pcmk__apply_orderings() is complete, and nothing after this should rely * on those type flags. (For example, some code looks for type equal to * some flag rather than whether the flag is set, and some code looks for * particular combinations of flags -- such code must be done before * pcmk__create_graph().) */ static void add_action_to_graph(gpointer data, gpointer user_data) { pcmk_action_t *action = (pcmk_action_t *) data; pcmk_scheduler_t *scheduler = (pcmk_scheduler_t *) user_data; xmlNode *syn = NULL; xmlNode *set = NULL; xmlNode *in = NULL; /* If we haven't already, de-duplicate inputs (even if we won't be adding * the action to the graph, so that crm_simulate's dot graphs don't have * duplicates). */ if (!pcmk__is_set(action->flags, pcmk__action_inputs_deduplicated)) { pcmk__deduplicate_action_inputs(action); pcmk__set_action_flags(action, pcmk__action_inputs_deduplicated); } if (pcmk__is_set(action->flags, pcmk__action_added_to_graph) || !should_add_action_to_graph(action)) { return; // Already added, or shouldn't be } pcmk__set_action_flags(action, pcmk__action_added_to_graph); crm_trace("Adding action %d (%s%s%s) to graph", action->id, action->uuid, ((action->node == NULL)? "" : " on "), ((action->node == NULL)? "" : action->node->priv->name)); syn = create_graph_synapse(action, scheduler); set = pcmk__xe_create(syn, PCMK__XE_ACTION_SET); in = pcmk__xe_create(syn, PCMK__XE_INPUTS); create_graph_action(set, action, false, scheduler); for (GList *lpc = action->actions_before; lpc != NULL; lpc = lpc->next) { pcmk__related_action_t *input = lpc->data; if (should_add_input_to_graph(action, input)) { xmlNode *input_xml = pcmk__xe_create(in, PCMK__XE_TRIGGER); input->graphed = true; create_graph_action(input_xml, input->action, true, scheduler); } } } static int transition_id = 0; /*! * \internal * \brief Log a message after calculating a transition * * \param[in] scheduler Scheduler data * \param[in] filename Where transition input is stored */ void pcmk__log_transition_summary(const pcmk_scheduler_t *scheduler, const char *filename) { if (pcmk__is_set(scheduler->flags, pcmk__sched_processing_error) || pcmk__config_has_error) { crm_err("Calculated transition %d (with errors)%s%s", transition_id, (filename == NULL)? "" : ", saving inputs in ", (filename == NULL)? "" : filename); } else if (pcmk__is_set(scheduler->flags, pcmk__sched_processing_warning) || pcmk__config_has_warning) { crm_warn("Calculated transition %d (with warnings)%s%s", transition_id, (filename == NULL)? "" : ", saving inputs in ", (filename == NULL)? "" : filename); } else { crm_notice("Calculated transition %d%s%s", transition_id, (filename == NULL)? "" : ", saving inputs in ", (filename == NULL)? "" : filename); } if (pcmk__config_has_error) { crm_notice("Configuration errors found during scheduler processing," " please run \"crm_verify -L\" to identify issues"); } } /*! * \internal * \brief Add a resource's actions to the transition graph * * \param[in,out] rsc Resource whose actions should be added */ void pcmk__add_rsc_actions_to_graph(pcmk_resource_t *rsc) { GList *iter = NULL; pcmk__assert(rsc != NULL); pcmk__rsc_trace(rsc, "Adding actions for %s to graph", rsc->id); // First add the resource's own actions g_list_foreach(rsc->priv->actions, add_action_to_graph, rsc->priv->scheduler); // Then recursively add its children's actions (appropriate to variant) for (iter = rsc->priv->children; iter != NULL; iter = iter->next) { pcmk_resource_t *child_rsc = (pcmk_resource_t *) iter->data; child_rsc->priv->cmds->add_actions_to_graph(child_rsc); } } /*! * \internal * \brief Create a transition graph with all cluster actions needed * * \param[in,out] scheduler Scheduler data */ void pcmk__create_graph(pcmk_scheduler_t *scheduler) { GList *iter = NULL; const char *value = NULL; long long limit = 0LL; GHashTable *config_hash = scheduler->priv->options; int rc = pcmk_rc_ok; transition_id++; crm_trace("Creating transition graph %d", transition_id); scheduler->priv->graph = pcmk__xe_create(NULL, PCMK__XE_TRANSITION_GRAPH); value = pcmk__cluster_option(config_hash, PCMK_OPT_CLUSTER_DELAY); pcmk__xe_set(scheduler->priv->graph, PCMK_OPT_CLUSTER_DELAY, value); value = pcmk__cluster_option(config_hash, PCMK_OPT_STONITH_TIMEOUT); pcmk__xe_set(scheduler->priv->graph, PCMK_OPT_STONITH_TIMEOUT, value); pcmk__xe_set(scheduler->priv->graph, PCMK__XA_FAILED_STOP_OFFSET, PCMK_VALUE_INFINITY); if (pcmk__is_set(scheduler->flags, pcmk__sched_start_failure_fatal)) { pcmk__xe_set(scheduler->priv->graph, PCMK__XA_FAILED_START_OFFSET, PCMK_VALUE_INFINITY); } else { pcmk__xe_set(scheduler->priv->graph, PCMK__XA_FAILED_START_OFFSET, "1"); } value = pcmk__cluster_option(config_hash, PCMK_OPT_BATCH_LIMIT); pcmk__xe_set(scheduler->priv->graph, PCMK_OPT_BATCH_LIMIT, value); pcmk__xe_set_int(scheduler->priv->graph, "transition_id", transition_id); value = pcmk__cluster_option(config_hash, PCMK_OPT_MIGRATION_LIMIT); rc = pcmk__scan_ll(value, &limit, 0LL); if (rc != pcmk_rc_ok) { crm_warn("Ignoring invalid value '%s' for " PCMK_OPT_MIGRATION_LIMIT ": %s", value, pcmk_rc_str(rc)); } else if (limit > 0) { pcmk__xe_set(scheduler->priv->graph, PCMK_OPT_MIGRATION_LIMIT, value); } if (scheduler->priv->recheck_by > 0) { pcmk__xe_set_time(scheduler->priv->graph, "recheck-by", scheduler->priv->recheck_by); } /* The following code will de-duplicate action inputs, so nothing past this * should rely on the action input type flags retaining their original * values. */ // Add resource actions to graph for (iter = scheduler->priv->resources; iter != NULL; iter = iter->next) { pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data; pcmk__rsc_trace(rsc, "Processing actions for %s", rsc->id); rsc->priv->cmds->add_actions_to_graph(rsc); } // Add pseudo-action for list of nodes with maintenance state update add_maintenance_update(scheduler); // Add non-resource (node) actions for (iter = scheduler->priv->actions; iter != NULL; iter = iter->next) { pcmk_action_t *action = (pcmk_action_t *) iter->data; if ((action->rsc != NULL) && (action->node != NULL) && action->node->details->shutdown && !pcmk__is_set(action->rsc->flags, pcmk__rsc_maintenance) && !pcmk__any_flags_set(action->flags, pcmk__action_optional|pcmk__action_runnable) && pcmk__str_eq(action->task, PCMK_ACTION_STOP, pcmk__str_none)) { /* Eventually we should just ignore the 'fence' case, but for now * it's the best way to detect (in CTS) when CIB resource updates * are being lost. */ if (pcmk__is_set(scheduler->flags, pcmk__sched_quorate) || (scheduler->no_quorum_policy == pcmk_no_quorum_ignore)) { + + const bool unclean = action->node->details->unclean; const bool managed = pcmk__is_set(action->rsc->flags, pcmk__rsc_managed); const bool failed = pcmk__is_set(action->rsc->flags, pcmk__rsc_failed); - crm_crit("Cannot %s %s because of %s:%s%s (%s)", - action->node->details->unclean? "fence" : "shut down", - pcmk__node_name(action->node), action->rsc->id, - (managed? " blocked" : " unmanaged"), - (failed? " failed" : ""), action->uuid); + pcmk__crit("Cannot %s %s because of %s:%s%s (%s)", + (unclean? "fence" : "shut down"), + pcmk__node_name(action->node), action->rsc->id, + (managed? " blocked" : " unmanaged"), + (failed? " failed" : ""), action->uuid); } } add_action_to_graph((gpointer) action, (gpointer) scheduler); } crm_log_xml_trace(scheduler->priv->graph, "graph"); } diff --git a/lib/services/dbus.c b/lib/services/dbus.c index 9033d3cea6..3b6994bc11 100644 --- a/lib/services/dbus.c +++ b/lib/services/dbus.c @@ -1,778 +1,779 @@ /* - * Copyright 2014-2024 the Pacemaker project contributors + * Copyright 2014-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include /* * DBus message dispatch */ // List of DBus connections (DBusConnection*) with messages available static GList *conn_dispatches = NULL; /*! * \internal * \brief Save an indication that DBus messages need dispatching * * \param[in] connection DBus connection with messages to dispatch * \param[in] new_status Dispatch status as reported by DBus library * \param[in] data Ignored * * \note This is suitable to be used as a DBus status dispatch function. * As mentioned in the DBus documentation, dbus_connection_dispatch() must * not be called from within this function, and any re-entrancy is a bad * idea. Instead, this should just flag the main loop that messages need * to be dispatched. */ static void update_dispatch_status(DBusConnection *connection, DBusDispatchStatus new_status, void *data) { if (new_status == DBUS_DISPATCH_DATA_REMAINS) { crm_trace("DBus connection has messages available for dispatch"); conn_dispatches = g_list_prepend(conn_dispatches, connection); } else { crm_trace("DBus connection has no messages available for dispatch " "(status %d)", new_status); } } /*! * \internal * \brief Dispatch available messages on all DBus connections */ static void dispatch_messages(void) { for (GList *gIter = conn_dispatches; gIter != NULL; gIter = gIter->next) { DBusConnection *connection = gIter->data; while (dbus_connection_get_dispatch_status(connection) == DBUS_DISPATCH_DATA_REMAINS) { crm_trace("Dispatching available messages on DBus connection"); dbus_connection_dispatch(connection); } } g_list_free(conn_dispatches); conn_dispatches = NULL; } /* * DBus file descriptor watches * * The DBus library allows the caller to register functions for the library to * use for file descriptor notifications via a main loop. */ /* Copied from dbus-watch.c */ static const char* dbus_watch_flags_to_string(int flags) { const char *watch_type; if ((flags & DBUS_WATCH_READABLE) && (flags & DBUS_WATCH_WRITABLE)) { watch_type = "read/write"; } else if (flags & DBUS_WATCH_READABLE) { watch_type = "read"; } else if (flags & DBUS_WATCH_WRITABLE) { watch_type = "write"; } else { watch_type = "neither read nor write"; } return watch_type; } /*! * \internal * \brief Dispatch data available on a DBus file descriptor watch * * \param[in,out] userdata Pointer to the DBus watch * * \return Always 0 * \note This is suitable for use as a dispatch function in * struct mainloop_fd_callbacks (which means that a negative return value * would indicate the file descriptor is no longer required). */ static int dispatch_fd_data(gpointer userdata) { bool oom = FALSE; DBusWatch *watch = userdata; int flags = dbus_watch_get_flags(watch); bool enabled = dbus_watch_get_enabled (watch); crm_trace("Dispatching DBus watch for file descriptor %d " "with flags %#x (%s)", dbus_watch_get_unix_fd(watch), flags, dbus_watch_flags_to_string(flags)); if (enabled && (flags & (DBUS_WATCH_READABLE|DBUS_WATCH_WRITABLE))) { oom = !dbus_watch_handle(watch, flags); } else if (enabled) { oom = !dbus_watch_handle(watch, DBUS_WATCH_ERROR); } if (flags != dbus_watch_get_flags(watch)) { flags = dbus_watch_get_flags(watch); crm_trace("Dispatched DBus file descriptor watch: now %#x (%s)", flags, dbus_watch_flags_to_string(flags)); } if (oom) { - crm_crit("Could not dispatch DBus file descriptor data: Out of memory"); + pcmk__crit("Could not dispatch DBus file descriptor data: Out of " + "memory"); } else { dispatch_messages(); } return 0; } static void watch_fd_closed(gpointer userdata) { crm_trace("DBus watch for file descriptor %d is now closed", dbus_watch_get_unix_fd((DBusWatch *) userdata)); } static struct mainloop_fd_callbacks pcmk_dbus_cb = { .dispatch = dispatch_fd_data, .destroy = watch_fd_closed, }; static dbus_bool_t add_dbus_watch(DBusWatch *watch, void *data) { int fd = dbus_watch_get_unix_fd(watch); mainloop_io_t *client = mainloop_add_fd("dbus", G_PRIORITY_DEFAULT, fd, watch, &pcmk_dbus_cb); crm_trace("Added DBus watch for file descriptor %d", fd); dbus_watch_set_data(watch, client, NULL); return TRUE; } static void toggle_dbus_watch(DBusWatch *watch, void *data) { // @TODO Should this do something more? crm_debug("DBus watch for file descriptor %d is now %s", dbus_watch_get_unix_fd(watch), (dbus_watch_get_enabled(watch)? "enabled" : "disabled")); } static void remove_dbus_watch(DBusWatch *watch, void *data) { crm_trace("Removed DBus watch for file descriptor %d", dbus_watch_get_unix_fd(watch)); mainloop_del_fd((mainloop_io_t *) dbus_watch_get_data(watch)); } static void register_watch_functions(DBusConnection *connection) { dbus_connection_set_watch_functions(connection, add_dbus_watch, remove_dbus_watch, toggle_dbus_watch, NULL, NULL); } /* * DBus main loop timeouts * * The DBus library allows the caller to register functions for the library to * use for managing timers via a main loop. */ static gboolean timer_popped(gpointer data) { crm_debug("%dms DBus timer expired", dbus_timeout_get_interval((DBusTimeout *) data)); dbus_timeout_handle(data); return FALSE; } static dbus_bool_t add_dbus_timer(DBusTimeout *timeout, void *data) { int interval_ms = dbus_timeout_get_interval(timeout); guint id = pcmk__create_timer(interval_ms, timer_popped, timeout); if (id) { dbus_timeout_set_data(timeout, GUINT_TO_POINTER(id), NULL); } crm_trace("Added %dms DBus timer", interval_ms); return TRUE; } static void remove_dbus_timer(DBusTimeout *timeout, void *data) { void *vid = dbus_timeout_get_data(timeout); guint id = GPOINTER_TO_UINT(vid); crm_trace("Removing %dms DBus timer", dbus_timeout_get_interval(timeout)); if (id) { g_source_remove(id); dbus_timeout_set_data(timeout, 0, NULL); } } static void toggle_dbus_timer(DBusTimeout *timeout, void *data) { bool enabled = dbus_timeout_get_enabled(timeout); crm_trace("Toggling %dms DBus timer %s", dbus_timeout_get_interval(timeout), (enabled? "off": "on")); if (enabled) { add_dbus_timer(timeout, data); } else { remove_dbus_timer(timeout, data); } } static void register_timer_functions(DBusConnection *connection) { dbus_connection_set_timeout_functions(connection, add_dbus_timer, remove_dbus_timer, toggle_dbus_timer, NULL, NULL); } /* * General DBus utilities */ DBusConnection * pcmk_dbus_connect(void) { DBusError err; DBusConnection *connection; dbus_error_init(&err); connection = dbus_bus_get(DBUS_BUS_SYSTEM, &err); if (dbus_error_is_set(&err)) { crm_err("Could not connect to DBus: %s", err.message); dbus_error_free(&err); return NULL; } if (connection == NULL) { return NULL; } /* Tell libdbus not to exit the process when a disconnect happens. This * defaults to FALSE but is toggled on by the dbus_bus_get() call above. */ dbus_connection_set_exit_on_disconnect(connection, FALSE); // Set custom handlers for various situations register_timer_functions(connection); register_watch_functions(connection); dbus_connection_set_dispatch_status_function(connection, update_dispatch_status, NULL, NULL); // Call the dispatch function to check for any messages waiting already update_dispatch_status(connection, dbus_connection_get_dispatch_status(connection), NULL); return connection; } void pcmk_dbus_disconnect(DBusConnection *connection) { /* We acquire our dbus connection with dbus_bus_get(), which makes it a * shared connection. Therefore, we can't close or free it here. The * best we can do is decrement the reference count so dbus knows when * there are no more clients connected to it. */ dbus_connection_unref(connection); } // Custom DBus error names to use #define ERR_NO_REQUEST "org.clusterlabs.pacemaker.NoRequest" #define ERR_NO_REPLY "org.clusterlabs.pacemaker.NoReply" #define ERR_INVALID_REPLY "org.clusterlabs.pacemaker.InvalidReply" #define ERR_INVALID_REPLY_METHOD "org.clusterlabs.pacemaker.InvalidReply.Method" #define ERR_INVALID_REPLY_SIGNAL "org.clusterlabs.pacemaker.InvalidReply.Signal" #define ERR_INVALID_REPLY_TYPE "org.clusterlabs.pacemaker.InvalidReply.Type" #define ERR_SEND_FAILED "org.clusterlabs.pacemaker.SendFailed" /*! * \internal * \brief Check whether a DBus reply indicates an error occurred * * \param[in] pending If non-NULL, indicates that a DBus request was sent * \param[in] reply Reply received from DBus * \param[out] ret If non-NULL, will be set to DBus error, if any * * \return TRUE if an error was found, FALSE otherwise * * \note Following the DBus API convention, a TRUE return is exactly equivalent * to ret being set. If ret is provided and this function returns TRUE, * the caller is responsible for calling dbus_error_free() on ret when * done using it. */ bool pcmk_dbus_find_error(const DBusPendingCall *pending, DBusMessage *reply, DBusError *ret) { DBusError error; dbus_error_init(&error); if (pending == NULL) { dbus_set_error_const(&error, ERR_NO_REQUEST, "No request sent"); } else if (reply == NULL) { dbus_set_error_const(&error, ERR_NO_REPLY, "No reply"); } else { DBusMessageIter args; int dtype = dbus_message_get_type(reply); switch (dtype) { case DBUS_MESSAGE_TYPE_METHOD_RETURN: { char *sig = NULL; dbus_message_iter_init(reply, &args); crm_trace("Received DBus reply with argument type '%s'", (sig = dbus_message_iter_get_signature(&args))); if (sig != NULL) { dbus_free(sig); } } break; case DBUS_MESSAGE_TYPE_INVALID: dbus_set_error_const(&error, ERR_INVALID_REPLY, "Invalid reply"); break; case DBUS_MESSAGE_TYPE_METHOD_CALL: dbus_set_error_const(&error, ERR_INVALID_REPLY_METHOD, "Invalid reply (method call)"); break; case DBUS_MESSAGE_TYPE_SIGNAL: dbus_set_error_const(&error, ERR_INVALID_REPLY_SIGNAL, "Invalid reply (signal)"); break; case DBUS_MESSAGE_TYPE_ERROR: dbus_set_error_from_message(&error, reply); break; default: dbus_set_error(&error, ERR_INVALID_REPLY_TYPE, "Unknown reply type %d", dtype); } } if (dbus_error_is_set(&error)) { crm_trace("DBus reply indicated error '%s' (%s)", error.name, error.message); if (ret) { dbus_error_init(ret); dbus_move_error(&error, ret); } else { dbus_error_free(&error); } return TRUE; } return FALSE; } /*! * \internal * \brief Send a DBus request and wait for the reply * * \param[in,out] msg DBus request to send * \param[in,out] connection DBus connection to use * \param[out] error If non-NULL, will be set to error, if any * \param[in] timeout Timeout to use for request * * \return DBus reply * * \note If error is non-NULL, it is initialized, so the caller may always use * dbus_error_is_set() to determine whether an error occurred; the caller * is responsible for calling dbus_error_free() in this case. */ DBusMessage * pcmk_dbus_send_recv(DBusMessage *msg, DBusConnection *connection, DBusError *error, int timeout) { const char *method = NULL; DBusMessage *reply = NULL; DBusPendingCall* pending = NULL; pcmk__assert(dbus_message_get_type(msg) == DBUS_MESSAGE_TYPE_METHOD_CALL); method = dbus_message_get_member (msg); /* Ensure caller can reliably check whether error is set */ if (error) { dbus_error_init(error); } if (timeout <= 0) { /* DBUS_TIMEOUT_USE_DEFAULT (-1) tells DBus to use a sane default */ timeout = DBUS_TIMEOUT_USE_DEFAULT; } // send message and get a handle for a reply if (!dbus_connection_send_with_reply(connection, msg, &pending, timeout)) { if (error) { dbus_set_error(error, ERR_SEND_FAILED, "Could not queue DBus '%s' request", method); } return NULL; } dbus_connection_flush(connection); if (pending) { /* block until we receive a reply */ dbus_pending_call_block(pending); /* get the reply message */ reply = dbus_pending_call_steal_reply(pending); } (void) pcmk_dbus_find_error(pending, reply, error); if (pending) { /* free the pending message handle */ dbus_pending_call_unref(pending); } return reply; } /*! * \internal * \brief Send a DBus message with a callback for the reply * * \param[in,out] msg DBus message to send * \param[in,out] connection DBus connection to send on * \param[in] done Function to call when pending call completes * \param[in] user_data Data to pass to done callback * * \return Handle for reply on success, NULL on error * \note The caller can assume that the done callback is called always and * only when the return value is non-NULL. (This allows the caller to * know where it should free dynamically allocated user_data.) */ DBusPendingCall * pcmk_dbus_send(DBusMessage *msg, DBusConnection *connection, void (*done)(DBusPendingCall *pending, void *user_data), void *user_data, int timeout) { const char *method = NULL; DBusPendingCall* pending = NULL; pcmk__assert(done != NULL); pcmk__assert(dbus_message_get_type(msg) == DBUS_MESSAGE_TYPE_METHOD_CALL); method = dbus_message_get_member(msg); if (timeout <= 0) { /* DBUS_TIMEOUT_USE_DEFAULT (-1) tells DBus to use a sane default */ timeout = DBUS_TIMEOUT_USE_DEFAULT; } // send message and get a handle for a reply if (!dbus_connection_send_with_reply(connection, msg, &pending, timeout)) { crm_err("Could not send DBus %s message: failed", method); return NULL; } else if (pending == NULL) { crm_err("Could not send DBus %s message: connection may be closed", method); return NULL; } if (dbus_pending_call_get_completed(pending)) { crm_info("DBus %s message completed too soon", method); /* Calling done() directly in this case instead of setting notify below * breaks things */ } if (!dbus_pending_call_set_notify(pending, done, user_data, NULL)) { return NULL; } return pending; } bool pcmk_dbus_type_check(DBusMessage *msg, DBusMessageIter *field, int expected, const char *function, int line) { int dtype = 0; DBusMessageIter lfield; if (field == NULL) { if (dbus_message_iter_init(msg, &lfield)) { field = &lfield; } } if (field == NULL) { do_crm_log_alias(LOG_INFO, __FILE__, function, line, "DBus reply has empty parameter list (expected '%c')", expected); return FALSE; } dtype = dbus_message_iter_get_arg_type(field); if (dtype != expected) { DBusMessageIter args; char *sig; dbus_message_iter_init(msg, &args); sig = dbus_message_iter_get_signature(&args); do_crm_log_alias(LOG_INFO, __FILE__, function, line, "DBus reply has unexpected type " "(expected '%c' not '%c' in '%s')", expected, dtype, sig); dbus_free(sig); return FALSE; } return TRUE; } /* * Property queries */ /* DBus APIs often provide queryable properties that use this standard * interface. See: * https://dbus.freedesktop.org/doc/dbus-specification.html#standard-interfaces-properties */ #define BUS_PROPERTY_IFACE "org.freedesktop.DBus.Properties" // Callback prototype for when a DBus property query result is received typedef void (*property_callback_func)(const char *name, // Property name const char *value, // Property value void *userdata); // Caller-provided data // Data needed by DBus property queries struct property_query { char *name; // Property name being queried char *target; // Name of DBus bus that query should be sent to char *object; // DBus object path for object with the property void *userdata; // Caller-provided data to supply to callback property_callback_func callback; // Function to call when result is received }; static void free_property_query(struct property_query *data) { free(data->target); free(data->object); free(data->name); free(data); } static char * handle_query_result(DBusMessage *reply, struct property_query *data) { DBusError error; char *output = NULL; DBusMessageIter args; DBusMessageIter variant_iter; DBusBasicValue value; dbus_error_init(&error); // First, check if the reply contains an error if (pcmk_dbus_find_error((void*)&error, reply, &error)) { crm_err("DBus query for %s property '%s' failed: %s", data->object, data->name, error.message); dbus_error_free(&error); goto cleanup; } // The lone output argument should be a DBus variant type dbus_message_iter_init(reply, &args); if (!pcmk_dbus_type_check(reply, &args, DBUS_TYPE_VARIANT, __func__, __LINE__)) { crm_err("DBus query for %s property '%s' failed: Unexpected reply type", data->object, data->name); goto cleanup; } // The variant should be a string dbus_message_iter_recurse(&args, &variant_iter); if (!pcmk_dbus_type_check(reply, &variant_iter, DBUS_TYPE_STRING, __func__, __LINE__)) { crm_err("DBus query for %s property '%s' failed: " "Unexpected variant type", data->object, data->name); goto cleanup; } dbus_message_iter_get_basic(&variant_iter, &value); // There should be no more arguments (in variant or reply) dbus_message_iter_next(&variant_iter); if (dbus_message_iter_get_arg_type(&variant_iter) != DBUS_TYPE_INVALID) { crm_err("DBus query for %s property '%s' failed: " "Too many arguments in reply", data->object, data->name); goto cleanup; } dbus_message_iter_next(&args); if (dbus_message_iter_get_arg_type(&args) != DBUS_TYPE_INVALID) { crm_err("DBus query for %s property '%s' failed: " "Too many arguments in reply", data->object, data->name); goto cleanup; } crm_trace("DBus query result for %s: %s='%s'", data->object, data->name, (value.str? value.str : "")); if (data->callback) { // Query was asynchronous data->callback(data->name, (value.str? value.str : ""), data->userdata); } else { // Query was synchronous output = strdup(value.str? value.str : ""); } cleanup: free_property_query(data); return output; } static void async_query_result_cb(DBusPendingCall *pending, void *user_data) { DBusMessage *reply = NULL; char *value = NULL; if (pending) { reply = dbus_pending_call_steal_reply(pending); } value = handle_query_result(reply, user_data); free(value); if (reply) { dbus_message_unref(reply); } } /*! * \internal * \brief Query a property on a DBus object * * \param[in,out] connection An active connection to DBus * \param[in] target DBus name that the query should be sent to * \param[in] obj DBus object path for object with the property * \param[in] iface DBus interface for property to query * \param[in] name Name of property to query * \param[in] callback If not NULL, perform query asynchronously and call * this function when query completes * \param[in,out] userdata Caller-provided data to provide to \p callback * \param[out] pending If \p callback is not NULL, this will be set to * handle for the reply (or NULL on error) * \param[in] timeout Abort query if it takes longer than this (ms) * * \return NULL if \p callback is non-NULL (i.e. asynchronous), otherwise a * newly allocated string with property value * \note It is the caller's responsibility to free the result with free(). */ char * pcmk_dbus_get_property(DBusConnection *connection, const char *target, const char *obj, const gchar * iface, const char *name, property_callback_func callback, void *userdata, DBusPendingCall **pending, int timeout) { DBusMessage *msg; char *output = NULL; struct property_query *query_data = NULL; CRM_CHECK((connection != NULL) && (target != NULL) && (obj != NULL) && (iface != NULL) && (name != NULL), return NULL); crm_trace("Querying DBus %s for %s property '%s'", target, obj, name); // Create a new message to use to invoke method msg = dbus_message_new_method_call(target, obj, BUS_PROPERTY_IFACE, "Get"); if (msg == NULL) { crm_err("DBus query for %s property '%s' failed: " "Unable to create message", obj, name); return NULL; } // Add the interface name and property name as message arguments if (!dbus_message_append_args(msg, DBUS_TYPE_STRING, &iface, DBUS_TYPE_STRING, &name, DBUS_TYPE_INVALID)) { crm_err("DBus query for %s property '%s' failed: " "Could not append arguments", obj, name); dbus_message_unref(msg); return NULL; } query_data = malloc(sizeof(struct property_query)); if (query_data == NULL) { - crm_crit("DBus query for %s property '%s' failed: Out of memory", - obj, name); + pcmk__crit("DBus query for %s property '%s' failed: Out of memory", obj, + name); dbus_message_unref(msg); return NULL; } query_data->target = strdup(target); query_data->object = strdup(obj); query_data->callback = callback; query_data->userdata = userdata; query_data->name = strdup(name); CRM_CHECK((query_data->target != NULL) && (query_data->object != NULL) && (query_data->name != NULL), free_property_query(query_data); dbus_message_unref(msg); return NULL); if (query_data->callback) { // Asynchronous DBusPendingCall *local_pending; local_pending = pcmk_dbus_send(msg, connection, async_query_result_cb, query_data, timeout); if (local_pending == NULL) { // async_query_result_cb() was not called in this case free_property_query(query_data); query_data = NULL; } if (pending) { *pending = local_pending; } } else { // Synchronous DBusMessage *reply = pcmk_dbus_send_recv(msg, connection, NULL, timeout); output = handle_query_result(reply, query_data); if (reply) { dbus_message_unref(reply); } } dbus_message_unref(msg); return output; } diff --git a/lib/services/services.c b/lib/services/services.c index 566fcc226b..2b55e4115c 100644 --- a/lib/services/services.c +++ b/lib/services/services.c @@ -1,1383 +1,1384 @@ /* * Copyright 2010-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "services_private.h" #include "services_ocf.h" #if PCMK__ENABLE_LSB #include "services_lsb.h" #endif #if SUPPORT_SYSTEMD # include #endif /* TODO: Develop a rollover strategy */ static int operations = 0; static GHashTable *recurring_actions = NULL; /* ops waiting to run async because of conflicting active * pending ops */ static GList *blocked_ops = NULL; /* ops currently active (in-flight) */ static GList *inflight_ops = NULL; static void handle_blocked_ops(void); /*! * \brief Find first service class that can provide a specified agent * * \param[in] agent Name of agent to search for * * \return Service class if found, NULL otherwise * * \note The priority is LSB then systemd. It would be preferable to put systemd * first, but LSB merely requires a file existence check, while systemd * requires contacting DBus. */ const char * resources_find_service_class(const char *agent) { #if PCMK__ENABLE_LSB if (services__lsb_agent_exists(agent)) { return PCMK_RESOURCE_CLASS_LSB; } #endif #if SUPPORT_SYSTEMD if (systemd_unit_exists(agent)) { return PCMK_RESOURCE_CLASS_SYSTEMD; } #endif return NULL; } static inline void init_recurring_actions(void) { if (recurring_actions == NULL) { recurring_actions = pcmk__strkey_table(NULL, NULL); } } /*! * \internal * \brief Check whether op is in-flight systemd op * * \param[in] op Operation to check * * \return TRUE if op is in-flight systemd op */ static inline gboolean inflight_systemd(const svc_action_t *op) { return pcmk__str_eq(op->standard, PCMK_RESOURCE_CLASS_SYSTEMD, pcmk__str_casei) && (g_list_find(inflight_ops, op) != NULL); } /*! * \internal * \brief Expand "service" alias to an actual resource class * * \param[in] rsc Resource name (for logging only) * \param[in] standard Resource class as configured * \param[in] agent Agent name to look for * * \return Newly allocated string with actual resource class * * \note The caller is responsible for calling free() on the result. */ static char * expand_resource_class(const char *rsc, const char *standard, const char *agent) { char *expanded_class = NULL; #if PCMK__ENABLE_SERVICE if (strcasecmp(standard, PCMK_RESOURCE_CLASS_SERVICE) == 0) { const char *found_class = resources_find_service_class(agent); if (found_class != NULL) { crm_debug("Found %s agent %s for %s", found_class, agent, rsc); expanded_class = pcmk__str_copy(found_class); } else { const char *default_standard = NULL; #if PCMK__ENABLE_LSB default_standard = PCMK_RESOURCE_CLASS_LSB; #elif SUPPORT_SYSTEMD default_standard = PCMK_RESOURCE_CLASS_SYSTEMD; #else #error No standards supported for service alias (configure script bug) #endif crm_info("Assuming resource class %s for agent %s for %s", default_standard, agent, rsc); expanded_class = pcmk__str_copy(default_standard); } } #endif if (expanded_class == NULL) { expanded_class = pcmk__str_copy(standard); } return expanded_class; } /*! * \internal * \brief Create a simple svc_action_t instance * * \return Newly allocated instance (or NULL if not enough memory) */ static svc_action_t * new_action(void) { svc_action_t *op = calloc(1, sizeof(svc_action_t)); if (op == NULL) { return NULL; } op->opaque = calloc(1, sizeof(svc_action_private_t)); if (op->opaque == NULL) { free(op); return NULL; } // Initialize result services__set_result(op, PCMK_OCF_UNKNOWN, PCMK_EXEC_UNKNOWN, NULL); return op; } static bool required_argument_missing(uint32_t ra_caps, const char *name, const char *standard, const char *provider, const char *agent, const char *action) { if (pcmk__str_empty(name)) { crm_info("Cannot create operation without resource name (bug?)"); return true; } if (pcmk__str_empty(standard)) { crm_info("Cannot create operation for %s without resource class (bug?)", name); return true; } if (pcmk__is_set(ra_caps, pcmk_ra_cap_provider) && pcmk__str_empty(provider)) { crm_info("Cannot create operation for %s resource %s " "without provider (bug?)", standard, name); return true; } if (pcmk__str_empty(agent)) { crm_info("Cannot create operation for %s without agent name (bug?)", name); return true; } if (pcmk__str_empty(action)) { crm_info("Cannot create operation for %s without action name (bug?)", name); return true; } return false; } // \return Standard Pacemaker return code (pcmk_rc_ok or ENOMEM) static int copy_action_arguments(svc_action_t *op, uint32_t ra_caps, const char *name, const char *standard, const char *provider, const char *agent, const char *action) { op->rsc = strdup(name); if (op->rsc == NULL) { return ENOMEM; } op->agent = strdup(agent); if (op->agent == NULL) { return ENOMEM; } op->standard = expand_resource_class(name, standard, agent); if (op->standard == NULL) { return ENOMEM; } if (pcmk__is_set(ra_caps, pcmk_ra_cap_status) && pcmk__str_eq(action, PCMK_ACTION_MONITOR, pcmk__str_casei)) { action = PCMK_ACTION_STATUS; } op->action = strdup(action); if (op->action == NULL) { return ENOMEM; } if (pcmk__is_set(ra_caps, pcmk_ra_cap_provider)) { op->provider = strdup(provider); if (op->provider == NULL) { return ENOMEM; } } return pcmk_rc_ok; } // Takes ownership of params svc_action_t * services__create_resource_action(const char *name, const char *standard, const char *provider, const char *agent, const char *action, guint interval_ms, int timeout, GHashTable *params, enum svc_action_flags flags) { svc_action_t *op = NULL; uint32_t ra_caps = pcmk_get_ra_caps(standard); int rc = pcmk_rc_ok; op = new_action(); if (op == NULL) { - crm_crit("Cannot prepare action: %s", strerror(ENOMEM)); + pcmk__crit("Cannot prepare action: %s", strerror(ENOMEM)); if (params != NULL) { g_hash_table_destroy(params); } return NULL; } op->interval_ms = interval_ms; op->timeout = timeout; op->flags = flags; op->sequence = ++operations; // Take ownership of params if (pcmk__is_set(ra_caps, pcmk_ra_cap_params)) { op->params = params; } else if (params != NULL) { g_hash_table_destroy(params); params = NULL; } if (required_argument_missing(ra_caps, name, standard, provider, agent, action)) { services__set_result(op, services__generic_error(op), PCMK_EXEC_ERROR_FATAL, "Required agent or action information missing"); return op; } op->id = pcmk__op_key(name, action, interval_ms); if (copy_action_arguments(op, ra_caps, name, standard, provider, agent, action) != pcmk_rc_ok) { - crm_crit("Cannot prepare %s action for %s: %s", - action, name, strerror(ENOMEM)); + pcmk__crit("Cannot prepare %s action for %s: %s", action, name, + strerror(ENOMEM)); services__handle_exec_error(op, ENOMEM); return op; } if (strcasecmp(op->standard, PCMK_RESOURCE_CLASS_OCF) == 0) { rc = services__ocf_prepare(op); #if PCMK__ENABLE_LSB } else if (strcasecmp(op->standard, PCMK_RESOURCE_CLASS_LSB) == 0) { rc = services__lsb_prepare(op); #endif #if SUPPORT_SYSTEMD } else if (strcasecmp(op->standard, PCMK_RESOURCE_CLASS_SYSTEMD) == 0) { rc = services__systemd_prepare(op); #endif } else { crm_info("Unknown resource standard: %s", op->standard); rc = ENOENT; } if (rc != pcmk_rc_ok) { crm_info("Cannot prepare %s operation for %s: %s", action, name, strerror(rc)); services__handle_exec_error(op, rc); } return op; } svc_action_t * resources_action_create(const char *name, const char *standard, const char *provider, const char *agent, const char *action, guint interval_ms, int timeout, GHashTable *params, enum svc_action_flags flags) { svc_action_t *op = services__create_resource_action(name, standard, provider, agent, action, interval_ms, timeout, params, flags); if (op == NULL || op->rc != 0) { services_action_free(op); return NULL; } else { // Preserve public API backward compatibility op->rc = PCMK_OCF_OK; op->status = PCMK_EXEC_DONE; return op; } } svc_action_t * services_action_create_generic(const char *exec, const char *args[]) { svc_action_t *op = new_action(); pcmk__mem_assert(op); op->opaque->exec = strdup(exec); op->opaque->args[0] = strdup(exec); if ((op->opaque->exec == NULL) || (op->opaque->args[0] == NULL)) { - crm_crit("Cannot prepare action for '%s': %s", exec, strerror(ENOMEM)); + pcmk__crit("Cannot prepare action for '%s': %s", exec, + strerror(ENOMEM)); services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, strerror(ENOMEM)); return op; } if (args == NULL) { return op; } for (int cur_arg = 1; args[cur_arg - 1] != NULL; cur_arg++) { if (cur_arg == PCMK__NELEM(op->opaque->args)) { crm_info("Cannot prepare action for '%s': Too many arguments", exec); services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR_HARD, "Too many arguments"); break; } op->opaque->args[cur_arg] = strdup(args[cur_arg - 1]); if (op->opaque->args[cur_arg] == NULL) { - crm_crit("Cannot prepare action for '%s': %s", - exec, strerror(ENOMEM)); + pcmk__crit("Cannot prepare action for '%s': %s", exec, + strerror(ENOMEM)); services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, strerror(ENOMEM)); break; } } return op; } /*! * \brief Create an alert agent action * * \param[in] id Alert ID * \param[in] exec Path to alert agent executable * \param[in] timeout Action timeout * \param[in] params Parameters to use with action * \param[in] sequence Action sequence number * \param[in] cb_data Data to pass to callback function * * \return New action on success, NULL on error * \note It is the caller's responsibility to free cb_data. * The caller should not free params explicitly. */ svc_action_t * services_alert_create(const char *id, const char *exec, int timeout, GHashTable *params, int sequence, void *cb_data) { svc_action_t *action = services_action_create_generic(exec, NULL); action->id = pcmk__str_copy(id); action->standard = pcmk__str_copy(PCMK_RESOURCE_CLASS_ALERT); action->timeout = timeout; action->params = params; action->sequence = sequence; action->cb_data = cb_data; return action; } /*! * \brief Set the user and group that an action will execute as * * \param[in,out] op Action to modify * \param[in] user Name of user to execute action as * \param[in] group Name of group to execute action as * * \return pcmk_ok on success, -errno otherwise * * \note This will have no effect unless the process executing the action runs * as root and the action is not a systemd action. We could implement this * for systemd by adding User= and Group= to [Service] in the override * file, but that seems more likely to cause problems than be useful. */ int services_action_user(svc_action_t *op, const char *user) { int rc = pcmk_ok; CRM_CHECK((op != NULL) && (user != NULL), return -EINVAL); rc = pcmk__lookup_user(user, &(op->opaque->uid), &(op->opaque->gid)); return pcmk_rc2legacy(rc); } /*! * \brief Execute an alert agent action * * \param[in,out] action Action to execute * \param[in] cb Function to call when action completes * * \return TRUE if the library will free action, FALSE otherwise * * \note If this function returns FALSE, it is the caller's responsibility to * free the action with services_action_free(). However, unless someone * intentionally creates a recurring alert action, this will never return * FALSE. */ gboolean services_alert_async(svc_action_t *action, void (*cb)(svc_action_t *op)) { action->synchronous = false; action->opaque->callback = cb; return services__execute_file(action) == pcmk_rc_ok; } #if HAVE_DBUS /*! * \internal * \brief Update operation's pending DBus call, unreferencing old one if needed * * \param[in,out] op Operation to modify * \param[in] pending Pending call to set */ void services_set_op_pending(svc_action_t *op, DBusPendingCall *pending) { if (op->opaque->pending && (op->opaque->pending != pending)) { if (pending) { crm_info("Lost pending %s DBus call (%p)", op->id, op->opaque->pending); } else { crm_trace("Done with pending %s DBus call (%p)", op->id, op->opaque->pending); } dbus_pending_call_unref(op->opaque->pending); } op->opaque->pending = pending; if (pending) { crm_trace("Updated pending %s DBus call (%p)", op->id, pending); } else { crm_trace("Cleared pending %s DBus call", op->id); } } #endif void services_action_cleanup(svc_action_t * op) { if ((op == NULL) || (op->opaque == NULL)) { return; } #if HAVE_DBUS if(op->opaque->timerid != 0) { crm_trace("Removing timer for call %s to %s", op->action, op->rsc); g_source_remove(op->opaque->timerid); op->opaque->timerid = 0; } if(op->opaque->pending) { if (dbus_pending_call_get_completed(op->opaque->pending)) { // This should never be the case crm_warn("Result of %s op %s was unhandled", op->standard, op->id); } else { crm_debug("Will ignore any result of canceled %s op %s", op->standard, op->id); } dbus_pending_call_cancel(op->opaque->pending); services_set_op_pending(op, NULL); } #endif if (op->opaque->stderr_gsource) { mainloop_del_fd(op->opaque->stderr_gsource); op->opaque->stderr_gsource = NULL; } if (op->opaque->stdout_gsource) { mainloop_del_fd(op->opaque->stdout_gsource); op->opaque->stdout_gsource = NULL; } } /*! * \internal * \brief Map an actual resource action result to a standard OCF result * * \param[in] standard Agent standard (must not be "service") * \param[in] action Action that result is for * \param[in] exit_status Actual agent exit status * * \return Standard OCF result */ enum ocf_exitcode services_result2ocf(const char *standard, const char *action, int exit_status) { if (pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_OCF, pcmk__str_casei)) { return services__ocf2ocf(exit_status); #if SUPPORT_SYSTEMD } else if (pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_SYSTEMD, pcmk__str_casei)) { return services__systemd2ocf(exit_status); #endif #if PCMK__ENABLE_LSB } else if (pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_LSB, pcmk__str_casei)) { return services__lsb2ocf(action, exit_status); #endif } else { crm_warn("Treating result from unknown standard '%s' as OCF", ((standard == NULL)? "unspecified" : standard)); return services__ocf2ocf(exit_status); } } void services_action_free(svc_action_t * op) { unsigned int i; if (op == NULL) { return; } /* The operation should be removed from all tracking lists by this point. * If it's not, we have a bug somewhere, so bail. That may lead to a * memory leak, but it's better than a use-after-free segmentation fault. */ CRM_CHECK(g_list_find(inflight_ops, op) == NULL, return); CRM_CHECK(g_list_find(blocked_ops, op) == NULL, return); CRM_CHECK((recurring_actions == NULL) || (g_hash_table_lookup(recurring_actions, op->id) == NULL), return); services_action_cleanup(op); if (op->opaque->repeat_timer) { g_source_remove(op->opaque->repeat_timer); op->opaque->repeat_timer = 0; } free(op->id); free(op->opaque->exec); for (i = 0; i < PCMK__NELEM(op->opaque->args); i++) { free(op->opaque->args[i]); } free(op->opaque->exit_reason); #if SUPPORT_SYSTEMD free(op->opaque->job_path); #endif // SUPPORT_SYSTEMD free(op->opaque); free(op->rsc); free(op->action); free(op->standard); free(op->agent); free(op->provider); free(op->stdout_data); free(op->stderr_data); if (op->params) { g_hash_table_destroy(op->params); op->params = NULL; } free(op); } gboolean cancel_recurring_action(svc_action_t * op) { crm_info("Cancelling %s operation %s", op->standard, op->id); if (recurring_actions) { g_hash_table_remove(recurring_actions, op->id); } if (op->opaque->repeat_timer) { g_source_remove(op->opaque->repeat_timer); op->opaque->repeat_timer = 0; } return TRUE; } /*! * \brief Cancel a recurring action * * \param[in] name Name of resource that operation is for * \param[in] action Name of operation to cancel * \param[in] interval_ms Interval of operation to cancel * * \return TRUE if action was successfully cancelled, FALSE otherwise */ gboolean services_action_cancel(const char *name, const char *action, guint interval_ms) { gboolean cancelled = FALSE; char *id = pcmk__op_key(name, action, interval_ms); svc_action_t *op = NULL; /* We can only cancel a recurring action */ init_recurring_actions(); op = g_hash_table_lookup(recurring_actions, id); if (op == NULL) { goto done; } // Tell services__finalize_async_op() not to reschedule the operation op->cancel = TRUE; /* Stop tracking it as a recurring operation, and stop its repeat timer */ cancel_recurring_action(op); /* If the op has a PID, it's an in-flight child process, so kill it. * * Whether the kill succeeds or fails, the main loop will send the op to * async_action_complete() (and thus services__finalize_async_op()) when the * process goes away. */ if (op->pid != 0) { crm_info("Terminating in-flight op %s[%d] early because it was cancelled", id, op->pid); cancelled = mainloop_child_kill(op->pid); if (cancelled == FALSE) { crm_err("Termination of %s[%d] failed", id, op->pid); } goto done; } #if HAVE_DBUS // In-flight systemd ops don't have a pid if (inflight_systemd(op)) { inflight_ops = g_list_remove(inflight_ops, op); /* This will cause any result that comes in later to be discarded, so we * don't call the callback and free the operation twice. */ services_action_cleanup(op); } #endif /* The rest of this is essentially equivalent to * services__finalize_async_op(), minus the handle_blocked_ops() call. */ // Report operation as cancelled services__set_cancelled(op); if (op->opaque->callback) { op->opaque->callback(op); } blocked_ops = g_list_remove(blocked_ops, op); services_action_free(op); cancelled = TRUE; // @TODO Initiate handle_blocked_ops() asynchronously done: free(id); return cancelled; } gboolean services_action_kick(const char *name, const char *action, guint interval_ms) { svc_action_t * op = NULL; char *id = pcmk__op_key(name, action, interval_ms); init_recurring_actions(); op = g_hash_table_lookup(recurring_actions, id); free(id); if (op == NULL) { return FALSE; } if (op->pid || inflight_systemd(op)) { return TRUE; } else { if (op->opaque->repeat_timer) { g_source_remove(op->opaque->repeat_timer); op->opaque->repeat_timer = 0; } recurring_action_timer(op); return TRUE; } } /*! * \internal * \brief Add a new recurring operation, checking for duplicates * * \param[in,out] op Operation to add * * \return TRUE if duplicate found (and reschedule), FALSE otherwise */ static gboolean handle_duplicate_recurring(svc_action_t *op) { svc_action_t * dup = NULL; /* check for duplicates */ dup = g_hash_table_lookup(recurring_actions, op->id); if (dup && (dup != op)) { /* update user data */ if (op->opaque->callback) { dup->opaque->callback = op->opaque->callback; dup->cb_data = op->cb_data; op->cb_data = NULL; } /* immediately execute the next interval */ if (dup->pid != 0) { if (op->opaque->repeat_timer) { g_source_remove(op->opaque->repeat_timer); op->opaque->repeat_timer = 0; } recurring_action_timer(dup); } /* free the duplicate */ services_action_free(op); return TRUE; } return FALSE; } /*! * \internal * \brief Execute an action appropriately according to its standard * * \param[in,out] op Action to execute * * \return Standard Pacemaker return code * \retval EBUSY Recurring operation could not be initiated * \retval pcmk_rc_error Synchronous action failed * \retval pcmk_rc_ok Synchronous action succeeded, or asynchronous action * should not be freed (because it's pending or because * it failed to execute and was already freed) * * \note If the return value for an asynchronous action is not pcmk_rc_ok, the * caller is responsible for freeing the action. */ static int execute_action(svc_action_t *op) { #if SUPPORT_SYSTEMD if (pcmk__str_eq(op->standard, PCMK_RESOURCE_CLASS_SYSTEMD, pcmk__str_casei)) { return services__execute_systemd(op); } #endif return services__execute_file(op); } void services_add_inflight_op(svc_action_t * op) { if (op == NULL) { return; } pcmk__assert(op->synchronous == FALSE); /* keep track of ops that are in-flight to avoid collisions in the same namespace */ if (op->rsc) { inflight_ops = g_list_append(inflight_ops, op); } } /*! * \internal * \brief Stop tracking an operation that completed * * \param[in] op Operation to stop tracking */ void services_untrack_op(const svc_action_t *op) { /* Op is no longer in-flight or blocked */ inflight_ops = g_list_remove(inflight_ops, op); blocked_ops = g_list_remove(blocked_ops, op); /* Op is no longer blocking other ops, so check if any need to run */ handle_blocked_ops(); } gboolean services_action_async_fork_notify(svc_action_t * op, void (*action_callback) (svc_action_t *), void (*action_fork_callback) (svc_action_t *)) { CRM_CHECK(op != NULL, return TRUE); op->synchronous = false; if (action_callback != NULL) { op->opaque->callback = action_callback; } if (action_fork_callback != NULL) { op->opaque->fork_callback = action_fork_callback; } if (op->interval_ms > 0) { init_recurring_actions(); if (handle_duplicate_recurring(op)) { /* entry rescheduled, dup freed */ /* exit early */ return TRUE; } g_hash_table_replace(recurring_actions, op->id, op); } if (!pcmk__is_set(op->flags, SVC_ACTION_NON_BLOCKED) && (op->rsc != NULL) && is_op_blocked(op->rsc)) { blocked_ops = g_list_append(blocked_ops, op); return TRUE; } return execute_action(op) == pcmk_rc_ok; } gboolean services_action_async(svc_action_t * op, void (*action_callback) (svc_action_t *)) { return services_action_async_fork_notify(op, action_callback, NULL); } static gboolean processing_blocked_ops = FALSE; gboolean is_op_blocked(const char *rsc) { GList *gIter = NULL; svc_action_t *op = NULL; for (gIter = inflight_ops; gIter != NULL; gIter = gIter->next) { op = gIter->data; if (pcmk__str_eq(op->rsc, rsc, pcmk__str_none)) { return TRUE; } } return FALSE; } static void handle_blocked_ops(void) { GList *executed_ops = NULL; GList *gIter = NULL; svc_action_t *op = NULL; if (processing_blocked_ops) { /* avoid nested calling of this function */ return; } processing_blocked_ops = TRUE; /* n^2 operation here, but blocked ops are incredibly rare. this list * will be empty 99% of the time. */ for (gIter = blocked_ops; gIter != NULL; gIter = gIter->next) { op = gIter->data; if (is_op_blocked(op->rsc)) { continue; } executed_ops = g_list_append(executed_ops, op); if (execute_action(op) != pcmk_rc_ok) { /* this can cause this function to be called recursively * which is why we have processing_blocked_ops static variable */ services__finalize_async_op(op); } } for (gIter = executed_ops; gIter != NULL; gIter = gIter->next) { op = gIter->data; blocked_ops = g_list_remove(blocked_ops, op); } g_list_free(executed_ops); processing_blocked_ops = FALSE; } /*! * \internal * \brief Execute a meta-data action appropriately to standard * * \param[in,out] op Meta-data action to execute * * \return Standard Pacemaker return code */ static int execute_metadata_action(svc_action_t *op) { const char *class = op->standard; if (op->agent == NULL) { crm_info("Meta-data requested without specifying agent"); services__set_result(op, services__generic_error(op), PCMK_EXEC_ERROR_FATAL, "Agent not specified"); return EINVAL; } if (class == NULL) { crm_info("Meta-data requested for agent %s without specifying class", op->agent); services__set_result(op, services__generic_error(op), PCMK_EXEC_ERROR_FATAL, "Agent standard not specified"); return EINVAL; } #if PCMK__ENABLE_SERVICE if (!strcmp(class, PCMK_RESOURCE_CLASS_SERVICE)) { class = resources_find_service_class(op->agent); } if (class == NULL) { crm_info("Meta-data requested for %s, but could not determine class", op->agent); services__set_result(op, services__generic_error(op), PCMK_EXEC_ERROR_HARD, "Agent standard could not be determined"); return EINVAL; } #endif #if PCMK__ENABLE_LSB if (pcmk__str_eq(class, PCMK_RESOURCE_CLASS_LSB, pcmk__str_casei)) { return pcmk_legacy2rc(services__get_lsb_metadata(op->agent, &op->stdout_data)); } #endif return execute_action(op); } gboolean services_action_sync(svc_action_t * op) { gboolean rc = TRUE; if (op == NULL) { crm_trace("No operation to execute"); return FALSE; } op->synchronous = true; if (pcmk__str_eq(op->action, PCMK_ACTION_META_DATA, pcmk__str_casei)) { /* Synchronous meta-data operations are handled specially. Since most * resource classes don't provide any meta-data, it has to be * synthesized from available information about the agent. * * services_action_async() doesn't treat meta-data actions specially, so * it will result in an error for classes that don't support the action. */ rc = (execute_metadata_action(op) == pcmk_rc_ok); } else { rc = (execute_action(op) == pcmk_rc_ok); } crm_trace(" > " PCMK__OP_FMT ": %s = %d", op->rsc, op->action, op->interval_ms, op->opaque->exec, op->rc); if (op->stdout_data) { crm_trace(" > stdout: %s", op->stdout_data); } if (op->stderr_data) { crm_trace(" > stderr: %s", op->stderr_data); } return rc; } GList * get_directory_list(const char *root, gboolean files, gboolean executable) { return services_os_get_directory_list(root, files, executable); } GList * resources_list_standards(void) { GList *standards = NULL; standards = g_list_append(standards, strdup(PCMK_RESOURCE_CLASS_OCF)); #if PCMK__ENABLE_SERVICE standards = g_list_append(standards, strdup(PCMK_RESOURCE_CLASS_SERVICE)); #endif #if PCMK__ENABLE_LSB standards = g_list_append(standards, strdup(PCMK_RESOURCE_CLASS_LSB)); #endif #if SUPPORT_SYSTEMD { GList *agents = systemd_unit_listall(); if (agents != NULL) { standards = g_list_append(standards, strdup(PCMK_RESOURCE_CLASS_SYSTEMD)); g_list_free_full(agents, free); } } #endif return standards; } GList * resources_list_providers(const char *standard) { if (pcmk__is_set(pcmk_get_ra_caps(standard), pcmk_ra_cap_provider)) { return resources_os_list_ocf_providers(); } return NULL; } GList * resources_list_agents(const char *standard, const char *provider) { if ((standard == NULL) #if PCMK__ENABLE_SERVICE || (strcasecmp(standard, PCMK_RESOURCE_CLASS_SERVICE) == 0) #endif ) { GList *tmp1; GList *tmp2; GList *result = NULL; if (standard == NULL) { tmp1 = result; tmp2 = resources_os_list_ocf_agents(NULL); if (tmp2) { result = g_list_concat(tmp1, tmp2); } } #if PCMK__ENABLE_LSB result = g_list_concat(result, services__list_lsb_agents()); #endif #if SUPPORT_SYSTEMD tmp1 = result; tmp2 = systemd_unit_listall(); if (tmp2) { result = g_list_concat(tmp1, tmp2); } #endif return result; } else if (strcasecmp(standard, PCMK_RESOURCE_CLASS_OCF) == 0) { return resources_os_list_ocf_agents(provider); #if PCMK__ENABLE_LSB } else if (strcasecmp(standard, PCMK_RESOURCE_CLASS_LSB) == 0) { return services__list_lsb_agents(); #endif #if SUPPORT_SYSTEMD } else if (strcasecmp(standard, PCMK_RESOURCE_CLASS_SYSTEMD) == 0) { return systemd_unit_listall(); #endif } return NULL; } gboolean resources_agent_exists(const char *standard, const char *provider, const char *agent) { GList *standards = NULL; GList *providers = NULL; GList *iter = NULL; gboolean rc = FALSE; gboolean has_providers = FALSE; standards = resources_list_standards(); for (iter = standards; iter != NULL; iter = iter->next) { if (pcmk__str_eq(iter->data, standard, pcmk__str_none)) { rc = TRUE; break; } } if (rc == FALSE) { goto done; } rc = FALSE; has_providers = pcmk__is_set(pcmk_get_ra_caps(standard), pcmk_ra_cap_provider); if (has_providers == TRUE && provider != NULL) { providers = resources_list_providers(standard); for (iter = providers; iter != NULL; iter = iter->next) { if (pcmk__str_eq(iter->data, provider, pcmk__str_none)) { rc = TRUE; break; } } } else if (has_providers == FALSE && provider == NULL) { rc = TRUE; } if (rc == FALSE) { goto done; } #if PCMK__ENABLE_SERVICE if (pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_SERVICE, pcmk__str_casei)) { #if PCMK__ENABLE_LSB if (services__lsb_agent_exists(agent)) { rc = TRUE; goto done; } #endif #if SUPPORT_SYSTEMD if (systemd_unit_exists(agent)) { rc = TRUE; goto done; } #endif rc = FALSE; goto done; } #endif if (pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_OCF, pcmk__str_casei)) { rc = services__ocf_agent_exists(provider, agent); #if PCMK__ENABLE_LSB } else if (pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_LSB, pcmk__str_casei)) { rc = services__lsb_agent_exists(agent); #endif #if SUPPORT_SYSTEMD } else if (pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_SYSTEMD, pcmk__str_casei)) { rc = systemd_unit_exists(agent); #endif } else { rc = FALSE; } done: g_list_free(standards); g_list_free(providers); return rc; } /*! * \internal * \brief Set the result of an action * * \param[out] action Where to set action result * \param[in] agent_status Exit status to set * \param[in] exec_status Execution status to set * \param[in] reason Human-friendly description of event to set */ void services__set_result(svc_action_t *action, int agent_status, enum pcmk_exec_status exec_status, const char *reason) { if (action == NULL) { return; } action->rc = agent_status; action->status = exec_status; if (!pcmk__str_eq(action->opaque->exit_reason, reason, pcmk__str_none)) { free(action->opaque->exit_reason); action->opaque->exit_reason = (reason == NULL)? NULL : strdup(reason); } } /*! * \internal * \brief Set a \c pcmk__action_result_t based on a \c svc_action_t * * \param[in] action Service action whose result to copy * \param[in,out] result Action result object to set */ void services__copy_result(const svc_action_t *action, pcmk__action_result_t *result) { pcmk__set_result(result, action->rc, action->status, action->opaque->exit_reason); } /*! * \internal * \brief Set the result of an action, with a formatted exit reason * * \param[out] action Where to set action result * \param[in] agent_status Exit status to set * \param[in] exec_status Execution status to set * \param[in] format printf-style format for a human-friendly * description of reason for result * \param[in] ... arguments for \p format */ void services__format_result(svc_action_t *action, int agent_status, enum pcmk_exec_status exec_status, const char *format, ...) { va_list ap; int len = 0; char *reason = NULL; if (action == NULL) { return; } action->rc = agent_status; action->status = exec_status; if (format != NULL) { va_start(ap, format); len = vasprintf(&reason, format, ap); pcmk__assert(len > 0); va_end(ap); } free(action->opaque->exit_reason); action->opaque->exit_reason = reason; } /*! * \internal * \brief Set the result of an action to cancelled * * \param[out] action Where to set action result * * \note This sets execution status but leaves the exit status unchanged */ void services__set_cancelled(svc_action_t *action) { if (action != NULL) { action->status = PCMK_EXEC_CANCELLED; free(action->opaque->exit_reason); action->opaque->exit_reason = NULL; } } /*! * \internal * \brief Get a readable description of what an action is for * * \param[in] action Action to check * * \return Readable name for the kind of \p action */ const char * services__action_kind(const svc_action_t *action) { if ((action == NULL) || (action->standard == NULL)) { return "Process"; } else if (pcmk__str_eq(action->standard, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_none)) { return "Fence agent"; } else if (pcmk__str_eq(action->standard, PCMK_RESOURCE_CLASS_ALERT, pcmk__str_none)) { return "Alert agent"; } else { return "Resource agent"; } } /*! * \internal * \brief Get the exit reason of an action * * \param[in] action Action to check * * \return Action's exit reason (or NULL if none) */ const char * services__exit_reason(const svc_action_t *action) { return action->opaque->exit_reason; } /*! * \internal * \brief Steal stdout from an action * * \param[in,out] action Action whose stdout is desired * * \return Action's stdout (which may be NULL) * \note Upon return, \p action will no longer track the output, so it is the * caller's responsibility to free the return value. */ char * services__grab_stdout(svc_action_t *action) { char *output = action->stdout_data; action->stdout_data = NULL; return output; } /*! * \internal * \brief Steal stderr from an action * * \param[in,out] action Action whose stderr is desired * * \return Action's stderr (which may be NULL) * \note Upon return, \p action will no longer track the output, so it is the * caller's responsibility to free the return value. */ char * services__grab_stderr(svc_action_t *action) { char *output = action->stderr_data; action->stderr_data = NULL; return output; }