diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c index c0a8028009..f0d8446a47 100644 --- a/daemons/attrd/attrd_corosync.c +++ b/daemons/attrd/attrd_corosync.c @@ -1,604 +1,608 @@ /* * Copyright 2013-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include "pacemaker-attrd.h" static xmlNode * attrd_confirmation(int callid) { xmlNode *node = pcmk__xe_create(NULL, __func__); crm_xml_add(node, PCMK__XA_T, PCMK__VALUE_ATTRD); crm_xml_add(node, PCMK__XA_SRC, get_local_node_name()); crm_xml_add(node, PCMK_XA_TASK, PCMK__ATTRD_CMD_CONFIRM); crm_xml_add_int(node, PCMK__XA_CALL_ID, callid); return node; } static void attrd_peer_message(crm_node_t *peer, xmlNode *xml) { const char *election_op = crm_element_value(xml, PCMK__XA_CRM_TASK); if (election_op) { attrd_handle_election_op(peer, xml); return; } if (attrd_shutting_down(false)) { /* If we're shutting down, we want to continue responding to election * ops as long as we're a cluster member (because our vote may be * needed). Ignore all other messages. */ return; } else { pcmk__request_t request = { .ipc_client = NULL, .ipc_id = 0, .ipc_flags = 0, .peer = peer->uname, .xml = xml, .call_options = 0, .result = PCMK__UNKNOWN_RESULT, }; request.op = crm_element_value_copy(request.xml, PCMK_XA_TASK); CRM_CHECK(request.op != NULL, return); attrd_handle_request(&request); /* Having finished handling the request, check to see if the originating * peer requested confirmation. If so, send that confirmation back now. */ if (pcmk__xe_attr_is_true(xml, PCMK__XA_CONFIRM) && !pcmk__str_eq(request.op, PCMK__ATTRD_CMD_CONFIRM, pcmk__str_none)) { int callid = 0; xmlNode *reply = NULL; /* Add the confirmation ID for the message we are confirming to the * response so the originating peer knows what they're a confirmation * for. */ crm_element_value_int(xml, PCMK__XA_CALL_ID, &callid); reply = attrd_confirmation(callid); /* And then send the confirmation back to the originating peer. This * ends up right back in this same function (attrd_peer_message) on the * peer where it will have to do something with a PCMK__XA_CONFIRM type * message. */ crm_debug("Sending %s a confirmation", peer->uname); attrd_send_message(peer, reply, false); free_xml(reply); } pcmk__reset_request(&request); } } static void attrd_cpg_dispatch(cpg_handle_t handle, const struct cpg_name *groupName, uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) { uint32_t kind = 0; xmlNode *xml = NULL; const char *from = NULL; char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from); if(data == NULL) { return; } if (kind == crm_class_cluster) { xml = pcmk__xml_parse(data); } if (xml == NULL) { crm_err("Bad message of class %d received from %s[%u]: '%.120s'", kind, from, nodeid, data); } else { attrd_peer_message(pcmk__get_node(nodeid, from, NULL, pcmk__node_search_cluster), xml); } free_xml(xml); free(data); } static void attrd_cpg_destroy(gpointer unused) { if (attrd_shutting_down(false)) { crm_info("Disconnected from Corosync process group"); } else { crm_crit("Lost connection to Corosync process group, shutting down"); attrd_exit_status = CRM_EX_DISCONNECT; attrd_shutdown(0); } } /*! * \internal * \brief Broadcast an update for a single attribute value * * \param[in] a Attribute to broadcast * \param[in] v Attribute value to broadcast */ void attrd_broadcast_value(const attribute_t *a, const attribute_value_t *v) { xmlNode *op = pcmk__xe_create(NULL, PCMK_XE_OP); crm_xml_add(op, PCMK_XA_TASK, PCMK__ATTRD_CMD_UPDATE); attrd_add_value_xml(op, a, v, false); attrd_send_message(NULL, op, false); free_xml(op); } #define state_text(state) pcmk__s((state), "in unknown state") static void attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *data) { bool gone = false; bool is_remote = pcmk_is_set(peer->flags, crm_remote_node); switch (kind) { case crm_status_uname: crm_debug("%s node %s is now %s", (is_remote? "Remote" : "Cluster"), peer->uname, state_text(peer->state)); break; case crm_status_processes: if (!pcmk_is_set(peer->processes, crm_get_cluster_proc())) { gone = true; } crm_debug("Node %s is %s a peer", peer->uname, (gone? "no longer" : "now")); break; case crm_status_nstate: crm_debug("%s node %s is now %s (was %s)", (is_remote? "Remote" : "Cluster"), peer->uname, state_text(peer->state), state_text(data)); if (pcmk__str_eq(peer->state, CRM_NODE_MEMBER, pcmk__str_casei)) { /* If we're the writer, send new peers a list of all attributes * (unless it's a remote node, which doesn't run its own attrd) */ if (attrd_election_won() && !pcmk_is_set(peer->flags, crm_remote_node)) { attrd_peer_sync(peer); } } else { // Remove all attribute values associated with lost nodes attrd_peer_remove(peer->uname, false, "loss"); gone = true; } break; } // Remove votes from cluster nodes that leave, in case election in progress if (gone && !is_remote) { attrd_remove_voter(peer); attrd_remove_peer_protocol_ver(peer->uname); attrd_do_not_expect_from_peer(peer->uname); } } static void record_peer_nodeid(attribute_value_t *v, const char *host) { crm_node_t *known_peer = pcmk__get_node(v->nodeid, host, NULL, pcmk__node_search_cluster); crm_trace("Learned %s has node id %s", known_peer->uname, known_peer->uuid); if (attrd_election_won()) { attrd_write_attributes(attrd_write_changed); } } #define readable_value(rv_v) pcmk__s((rv_v)->current, "(unset)") #define readable_peer(p) \ (((p) == NULL)? "all peers" : pcmk__s((p)->uname, "unknown peer")) static void update_attr_on_host(attribute_t *a, const crm_node_t *peer, const xmlNode *xml, const char *attr, const char *value, const char *host, bool filter) { int is_remote = 0; bool changed = false; attribute_value_t *v = NULL; // Create entry for value if not already existing v = g_hash_table_lookup(a->values, host); if (v == NULL) { v = pcmk__assert_alloc(1, sizeof(attribute_value_t)); v->nodename = pcmk__str_copy(host); g_hash_table_replace(a->values, v->nodename, v); } // If value is for a Pacemaker Remote node, remember that crm_element_value_int(xml, PCMK__XA_ATTR_IS_REMOTE, &is_remote); if (is_remote) { attrd_set_value_flags(v, attrd_value_remote); - CRM_ASSERT(crm_remote_peer_get(host) != NULL); + CRM_ASSERT(pcmk__cluster_lookup_remote_node(host) != NULL); } // Check whether the value changed changed = !pcmk__str_eq(v->current, value, pcmk__str_casei); if (changed && filter && pcmk__str_eq(host, attrd_cluster->uname, pcmk__str_casei)) { /* Broadcast the local value for an attribute that differs from the * value provided in a peer's attribute synchronization response. This * ensures a node's values for itself take precedence and all peers are * kept in sync. */ v = g_hash_table_lookup(a->values, attrd_cluster->uname); crm_notice("%s[%s]: local value '%s' takes priority over '%s' from %s", attr, host, readable_value(v), value, peer->uname); attrd_broadcast_value(a, v); } else if (changed) { crm_notice("Setting %s[%s]%s%s: %s -> %s " CRM_XS " from %s with %s write delay", attr, host, a->set_type ? " in " : "", pcmk__s(a->set_type, ""), readable_value(v), pcmk__s(value, "(unset)"), peer->uname, (a->timeout_ms == 0)? "no" : pcmk__readable_interval(a->timeout_ms)); pcmk__str_update(&v->current, value); attrd_set_attr_flags(a, attrd_attr_changed); if (pcmk__str_eq(host, attrd_cluster->uname, pcmk__str_casei) && pcmk__str_eq(attr, PCMK__NODE_ATTR_SHUTDOWN, pcmk__str_none)) { if (!pcmk__str_eq(value, "0", pcmk__str_null_matches)) { attrd_set_requesting_shutdown(); } else { attrd_clear_requesting_shutdown(); } } // Write out new value or start dampening timer if (a->timeout_ms && a->timer) { crm_trace("Delaying write of %s %s for dampening", attr, pcmk__readable_interval(a->timeout_ms)); mainloop_timer_start(a->timer); } else { attrd_write_or_elect_attribute(a); } } else { int is_force_write = 0; crm_element_value_int(xml, PCMK__XA_ATTRD_IS_FORCE_WRITE, &is_force_write); if (is_force_write == 1 && a->timeout_ms && a->timer) { /* Save forced writing and set change flag. */ /* The actual attribute is written by Writer after election. */ crm_trace("%s[%s] from %s is unchanged (%s), forcing write", attr, host, peer->uname, pcmk__s(value, "unset")); attrd_set_attr_flags(a, attrd_attr_force_write); } else { crm_trace("%s[%s] from %s is unchanged (%s)", attr, host, peer->uname, pcmk__s(value, "unset")); } } // This allows us to later detect local values that peer doesn't know about attrd_set_value_flags(v, attrd_value_from_peer); /* If this is a cluster node whose node ID we are learning, remember it */ if ((v->nodeid == 0) && !pcmk_is_set(v->flags, attrd_value_remote) && (crm_element_value_int(xml, PCMK__XA_ATTR_HOST_ID, (int*)&v->nodeid) == 0) && (v->nodeid > 0)) { record_peer_nodeid(v, host); } } static void attrd_peer_update_one(const crm_node_t *peer, xmlNode *xml, bool filter) { attribute_t *a = NULL; const char *attr = crm_element_value(xml, PCMK__XA_ATTR_NAME); const char *value = crm_element_value(xml, PCMK__XA_ATTR_VALUE); const char *host = crm_element_value(xml, PCMK__XA_ATTR_HOST); if (attr == NULL) { crm_warn("Could not update attribute: peer did not specify name"); return; } a = attrd_populate_attribute(xml, attr); if (a == NULL) { return; } if (host == NULL) { // If no host was specified, update all hosts GHashTableIter vIter; crm_debug("Setting %s for all hosts to %s", attr, value); pcmk__xe_remove_attr(xml, PCMK__XA_ATTR_HOST_ID); g_hash_table_iter_init(&vIter, a->values); while (g_hash_table_iter_next(&vIter, (gpointer *) & host, NULL)) { update_attr_on_host(a, peer, xml, attr, value, host, filter); } } else { // Update attribute value for the given host update_attr_on_host(a, peer, xml, attr, value, host, filter); } /* If this is a message from some attrd instance broadcasting its protocol * version, check to see if it's a new minimum version. */ if (pcmk__str_eq(attr, CRM_ATTR_PROTOCOL, pcmk__str_none)) { attrd_update_minimum_protocol_ver(peer->uname, value); } } static void broadcast_unseen_local_values(void) { GHashTableIter aIter; GHashTableIter vIter; attribute_t *a = NULL; attribute_value_t *v = NULL; xmlNode *sync = NULL; g_hash_table_iter_init(&aIter, attributes); while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) { g_hash_table_iter_init(&vIter, a->values); while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) { if (!pcmk_is_set(v->flags, attrd_value_from_peer) && pcmk__str_eq(v->nodename, attrd_cluster->uname, pcmk__str_casei)) { crm_trace("* %s[%s]='%s' is local-only", a->id, v->nodename, readable_value(v)); if (sync == NULL) { sync = pcmk__xe_create(NULL, __func__); crm_xml_add(sync, PCMK_XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE); } attrd_add_value_xml(sync, a, v, a->timeout_ms && a->timer); } } } if (sync != NULL) { crm_debug("Broadcasting local-only values"); attrd_send_message(NULL, sync, false); free_xml(sync); } } int attrd_cluster_connect(void) { + int rc = pcmk_rc_ok; + attrd_cluster = pcmk_cluster_new(); attrd_cluster->destroy = attrd_cpg_destroy; attrd_cluster->cpg.cpg_deliver_fn = attrd_cpg_dispatch; attrd_cluster->cpg.cpg_confchg_fn = pcmk_cpg_membership; crm_set_status_callback(&attrd_peer_change_cb); - if (crm_cluster_connect(attrd_cluster) == FALSE) { + rc = pcmk_cluster_connect(attrd_cluster); + rc = pcmk_rc2legacy(rc); + if (rc != pcmk_ok) { crm_err("Cluster connection failed"); - return -ENOTCONN; + return rc; } return pcmk_ok; } void attrd_peer_clear_failure(pcmk__request_t *request) { xmlNode *xml = request->xml; const char *rsc = crm_element_value(xml, PCMK__XA_ATTR_RESOURCE); const char *host = crm_element_value(xml, PCMK__XA_ATTR_HOST); const char *op = crm_element_value(xml, PCMK__XA_ATTR_CLEAR_OPERATION); const char *interval_spec = crm_element_value(xml, PCMK__XA_ATTR_CLEAR_INTERVAL); guint interval_ms = 0U; char *attr = NULL; GHashTableIter iter; regex_t regex; crm_node_t *peer = pcmk__get_node(0, request->peer, NULL, pcmk__node_search_cluster); pcmk_parse_interval_spec(interval_spec, &interval_ms); if (attrd_failure_regex(®ex, rsc, op, interval_ms) != pcmk_ok) { crm_info("Ignoring invalid request to clear failures for %s", pcmk__s(rsc, "all resources")); return; } crm_xml_add(xml, PCMK_XA_TASK, PCMK__ATTRD_CMD_UPDATE); /* Make sure value is not set, so we delete */ pcmk__xe_remove_attr(xml, PCMK__XA_ATTR_VALUE); g_hash_table_iter_init(&iter, attributes); while (g_hash_table_iter_next(&iter, (gpointer *) &attr, NULL)) { if (regexec(®ex, attr, 0, NULL, 0) == 0) { crm_trace("Matched %s when clearing %s", attr, pcmk__s(rsc, "all resources")); crm_xml_add(xml, PCMK__XA_ATTR_NAME, attr); attrd_peer_update(peer, xml, host, false); } } regfree(®ex); } /*! * \internal * \brief Load attributes from a peer sync response * * \param[in] peer Peer that sent sync response * \param[in] peer_won Whether peer is the attribute writer * \param[in,out] xml Request XML */ void attrd_peer_sync_response(const crm_node_t *peer, bool peer_won, xmlNode *xml) { crm_info("Processing " PCMK__ATTRD_CMD_SYNC_RESPONSE " from %s", peer->uname); if (peer_won) { /* Initialize the "seen" flag for all attributes to cleared, so we can * detect attributes that local node has but the writer doesn't. */ attrd_clear_value_seen(); } // Process each attribute update in the sync response for (xmlNode *child = pcmk__xe_first_child(xml, NULL, NULL, NULL); child != NULL; child = pcmk__xe_next(child)) { attrd_peer_update(peer, child, crm_element_value(child, PCMK__XA_ATTR_HOST), true); } if (peer_won) { /* If any attributes are still not marked as seen, the writer doesn't * know about them, so send all peers an update with them. */ broadcast_unseen_local_values(); } } /*! * \internal * \brief Remove all attributes and optionally peer cache entries for a node * * \param[in] host Name of node to purge * \param[in] uncache If true, remove node from peer caches * \param[in] source Who requested removal (only used for logging) */ void attrd_peer_remove(const char *host, bool uncache, const char *source) { attribute_t *a = NULL; GHashTableIter aIter; CRM_CHECK(host != NULL, return); crm_notice("Removing all %s attributes for node %s " CRM_XS " %s reaping node from cache", host, source, (uncache? "and" : "without")); g_hash_table_iter_init(&aIter, attributes); while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) { if(g_hash_table_remove(a->values, host)) { crm_debug("Removed %s[%s] for peer %s", a->id, host, source); } } if (uncache) { pcmk__purge_node_from_cache(host, 0); } } /*! * \internal * \brief Send all known attributes and values to a peer * * \param[in] peer Peer to send sync to (if NULL, broadcast to all peers) */ void attrd_peer_sync(crm_node_t *peer) { GHashTableIter aIter; GHashTableIter vIter; attribute_t *a = NULL; attribute_value_t *v = NULL; xmlNode *sync = pcmk__xe_create(NULL, __func__); crm_xml_add(sync, PCMK_XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE); g_hash_table_iter_init(&aIter, attributes); while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) { g_hash_table_iter_init(&vIter, a->values); while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) { crm_debug("Syncing %s[%s]='%s' to %s", a->id, v->nodename, readable_value(v), readable_peer(peer)); attrd_add_value_xml(sync, a, v, false); } } crm_debug("Syncing values to %s", readable_peer(peer)); attrd_send_message(peer, sync, false); free_xml(sync); } void attrd_peer_update(const crm_node_t *peer, xmlNode *xml, const char *host, bool filter) { bool handle_sync_point = false; CRM_CHECK((peer != NULL) && (xml != NULL), return); if (xml->children != NULL) { for (xmlNode *child = pcmk__xe_first_child(xml, PCMK_XE_OP, NULL, NULL); child != NULL; child = pcmk__xe_next_same(child)) { pcmk__xe_copy_attrs(child, xml, pcmk__xaf_no_overwrite); attrd_peer_update_one(peer, child, filter); if (attrd_request_has_sync_point(child)) { handle_sync_point = true; } } } else { attrd_peer_update_one(peer, xml, filter); if (attrd_request_has_sync_point(xml)) { handle_sync_point = true; } } /* If the update XML specified that the client wanted to wait for a sync * point, process that now. */ if (handle_sync_point) { crm_trace("Hit local sync point for attribute update"); attrd_ack_waitlist_clients(attrd_sync_point_local, xml); } } diff --git a/daemons/based/based_callbacks.c b/daemons/based/based_callbacks.c index 4f76d308bc..c4fb9344f9 100644 --- a/daemons/based/based_callbacks.c +++ b/daemons/based/based_callbacks.c @@ -1,1761 +1,1761 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include // uint32_t, uint64_t, UINT64_C() #include #include #include // PRIu64 #include #include #include #include #include #include #include #include #define EXIT_ESCALATION_MS 10000 static unsigned long cib_local_bcast_num = 0; typedef struct cib_local_notify_s { xmlNode *notify_src; char *client_id; gboolean from_peer; gboolean sync_reply; } cib_local_notify_t; int next_client_id = 0; gboolean legacy_mode = FALSE; qb_ipcs_service_t *ipcs_ro = NULL; qb_ipcs_service_t *ipcs_rw = NULL; qb_ipcs_service_t *ipcs_shm = NULL; static int cib_process_command(xmlNode *request, const cib__operation_t *operation, cib__op_fn_t op_function, xmlNode **reply, xmlNode **cib_diff, bool privileged); static gboolean cib_common_callback(qb_ipcs_connection_t *c, void *data, size_t size, gboolean privileged); gboolean cib_legacy_mode(void) { return legacy_mode; } static int32_t cib_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { if (cib_shutdown_flag) { crm_info("Ignoring new IPC client [%d] during shutdown", pcmk__client_pid(c)); return -ECONNREFUSED; } if (pcmk__new_client(c, uid, gid) == NULL) { return -ENOMEM; } return 0; } static int32_t cib_ipc_dispatch_rw(qb_ipcs_connection_t * c, void *data, size_t size) { pcmk__client_t *client = pcmk__find_client(c); crm_trace("%p message from %s", c, client->id); return cib_common_callback(c, data, size, TRUE); } static int32_t cib_ipc_dispatch_ro(qb_ipcs_connection_t * c, void *data, size_t size) { pcmk__client_t *client = pcmk__find_client(c); crm_trace("%p message from %s", c, client->id); return cib_common_callback(c, data, size, FALSE); } /* Error code means? */ static int32_t cib_ipc_closed(qb_ipcs_connection_t * c) { pcmk__client_t *client = pcmk__find_client(c); if (client == NULL) { return 0; } crm_trace("Connection %p", c); pcmk__free_client(client); return 0; } static void cib_ipc_destroy(qb_ipcs_connection_t * c) { crm_trace("Connection %p", c); cib_ipc_closed(c); if (cib_shutdown_flag) { cib_shutdown(0); } } struct qb_ipcs_service_handlers ipc_ro_callbacks = { .connection_accept = cib_ipc_accept, .connection_created = NULL, .msg_process = cib_ipc_dispatch_ro, .connection_closed = cib_ipc_closed, .connection_destroyed = cib_ipc_destroy }; struct qb_ipcs_service_handlers ipc_rw_callbacks = { .connection_accept = cib_ipc_accept, .connection_created = NULL, .msg_process = cib_ipc_dispatch_rw, .connection_closed = cib_ipc_closed, .connection_destroyed = cib_ipc_destroy }; /*! * \internal * \brief Create reply XML for a CIB request * * \param[in] op CIB operation type * \param[in] call_id CIB call ID * \param[in] client_id CIB client ID * \param[in] call_options Group of enum cib_call_options flags * \param[in] rc Request return code * \param[in] call_data Request output data * * \return Reply XML * * \note The caller is responsible for freeing the return value using * \p free_xml(). */ static xmlNode * create_cib_reply(const char *op, const char *call_id, const char *client_id, int call_options, int rc, xmlNode *call_data) { xmlNode *reply = pcmk__xe_create(NULL, PCMK__XE_CIB_REPLY); crm_xml_add(reply, PCMK__XA_T, PCMK__VALUE_CIB); crm_xml_add(reply, PCMK__XA_CIB_OP, op); crm_xml_add(reply, PCMK__XA_CIB_CALLID, call_id); crm_xml_add(reply, PCMK__XA_CIB_CLIENTID, client_id); crm_xml_add_int(reply, PCMK__XA_CIB_CALLOPT, call_options); crm_xml_add_int(reply, PCMK__XA_CIB_RC, rc); if (call_data != NULL) { xmlNode *wrapper = pcmk__xe_create(reply, PCMK__XE_CIB_CALLDATA); crm_trace("Attaching reply output"); pcmk__xml_copy(wrapper, call_data); } crm_log_xml_explicit(reply, "cib:reply"); return reply; } static void do_local_notify(const xmlNode *notify_src, const char *client_id, bool sync_reply, bool from_peer) { int rid = 0; int call_id = 0; pcmk__client_t *client_obj = NULL; CRM_ASSERT(notify_src && client_id); crm_element_value_int(notify_src, PCMK__XA_CIB_CALLID, &call_id); client_obj = pcmk__find_client_by_id(client_id); if (client_obj == NULL) { crm_debug("Could not send response %d: client %s not found", call_id, client_id); return; } if (sync_reply) { if (client_obj->ipcs) { CRM_LOG_ASSERT(client_obj->request_id); rid = client_obj->request_id; client_obj->request_id = 0; crm_trace("Sending response %d to client %s%s", rid, pcmk__client_name(client_obj), (from_peer? " (originator of delegated request)" : "")); } else { crm_trace("Sending response (call %d) to client %s%s", call_id, pcmk__client_name(client_obj), (from_peer? " (originator of delegated request)" : "")); } } else { crm_trace("Sending event %d to client %s%s", call_id, pcmk__client_name(client_obj), (from_peer? " (originator of delegated request)" : "")); } switch (PCMK__CLIENT_TYPE(client_obj)) { case pcmk__client_ipc: { int rc = pcmk__ipc_send_xml(client_obj, rid, notify_src, (sync_reply? crm_ipc_flags_none : crm_ipc_server_event)); if (rc != pcmk_rc_ok) { crm_warn("%s reply to client %s failed: %s " CRM_XS " rc=%d", (sync_reply? "Synchronous" : "Asynchronous"), pcmk__client_name(client_obj), pcmk_rc_str(rc), rc); } } break; #ifdef HAVE_GNUTLS_GNUTLS_H case pcmk__client_tls: #endif case pcmk__client_tcp: pcmk__remote_send_xml(client_obj->remote, notify_src); break; default: crm_err("Unknown transport for client %s " CRM_XS " flags=%#016" PRIx64, pcmk__client_name(client_obj), client_obj->flags); } } void cib_common_callback_worker(uint32_t id, uint32_t flags, xmlNode * op_request, pcmk__client_t *cib_client, gboolean privileged) { const char *op = crm_element_value(op_request, PCMK__XA_CIB_OP); int call_options = cib_none; crm_element_value_int(op_request, PCMK__XA_CIB_CALLOPT, &call_options); /* Requests with cib_transaction set should not be sent to based directly * (outside of a commit-transaction request) */ if (pcmk_is_set(call_options, cib_transaction)) { return; } if (pcmk__str_eq(op, CRM_OP_REGISTER, pcmk__str_none)) { if (flags & crm_ipc_client_response) { xmlNode *ack = pcmk__xe_create(NULL, __func__); crm_xml_add(ack, PCMK__XA_CIB_OP, CRM_OP_REGISTER); crm_xml_add(ack, PCMK__XA_CIB_CLIENTID, cib_client->id); pcmk__ipc_send_xml(cib_client, id, ack, flags); cib_client->request_id = 0; free_xml(ack); } return; } else if (pcmk__str_eq(op, PCMK__VALUE_CIB_NOTIFY, pcmk__str_none)) { /* Update the notify filters for this client */ int on_off = 0; crm_exit_t status = CRM_EX_OK; uint64_t bit = UINT64_C(0); const char *type = crm_element_value(op_request, PCMK__XA_CIB_NOTIFY_TYPE); crm_element_value_int(op_request, PCMK__XA_CIB_NOTIFY_ACTIVATE, &on_off); crm_debug("Setting %s callbacks %s for client %s", type, (on_off? "on" : "off"), pcmk__client_name(cib_client)); if (pcmk__str_eq(type, PCMK__VALUE_CIB_POST_NOTIFY, pcmk__str_none)) { bit = cib_notify_post; } else if (pcmk__str_eq(type, PCMK__VALUE_CIB_PRE_NOTIFY, pcmk__str_none)) { bit = cib_notify_pre; } else if (pcmk__str_eq(type, PCMK__VALUE_CIB_UPDATE_CONFIRMATION, pcmk__str_none)) { bit = cib_notify_confirm; } else if (pcmk__str_eq(type, PCMK__VALUE_CIB_DIFF_NOTIFY, pcmk__str_none)) { bit = cib_notify_diff; } else { status = CRM_EX_INVALID_PARAM; } if (bit != 0) { if (on_off) { pcmk__set_client_flags(cib_client, bit); } else { pcmk__clear_client_flags(cib_client, bit); } } pcmk__ipc_send_ack(cib_client, id, flags, PCMK__XE_ACK, NULL, status); return; } cib_process_request(op_request, privileged, cib_client); } int32_t cib_common_callback(qb_ipcs_connection_t * c, void *data, size_t size, gboolean privileged) { uint32_t id = 0; uint32_t flags = 0; int call_options = 0; pcmk__client_t *cib_client = pcmk__find_client(c); xmlNode *op_request = pcmk__client_data2xml(cib_client, data, &id, &flags); if (op_request) { crm_element_value_int(op_request, PCMK__XA_CIB_CALLOPT, &call_options); } if (op_request == NULL) { crm_trace("Invalid message from %p", c); pcmk__ipc_send_ack(cib_client, id, flags, PCMK__XE_NACK, NULL, CRM_EX_PROTOCOL); return 0; } else if(cib_client == NULL) { crm_trace("Invalid client %p", c); return 0; } if (pcmk_is_set(call_options, cib_sync_call)) { CRM_LOG_ASSERT(flags & crm_ipc_client_response); CRM_LOG_ASSERT(cib_client->request_id == 0); /* This means the client has two synchronous events in-flight */ cib_client->request_id = id; /* Reply only to the last one */ } if (cib_client->name == NULL) { const char *value = crm_element_value(op_request, PCMK__XA_CIB_CLIENTNAME); if (value == NULL) { cib_client->name = pcmk__itoa(cib_client->pid); } else { cib_client->name = pcmk__str_copy(value); if (crm_is_daemon_name(value)) { pcmk__set_client_flags(cib_client, cib_is_daemon); } } } /* Allow cluster daemons more leeway before being evicted */ if (pcmk_is_set(cib_client->flags, cib_is_daemon)) { const char *qmax = cib_config_lookup(PCMK_OPT_CLUSTER_IPC_LIMIT); if (pcmk__set_client_queue_max(cib_client, qmax)) { crm_trace("IPC threshold for client %s[%u] is now %u", pcmk__client_name(cib_client), cib_client->pid, cib_client->queue_max); } } crm_xml_add(op_request, PCMK__XA_CIB_CLIENTID, cib_client->id); crm_xml_add(op_request, PCMK__XA_CIB_CLIENTNAME, cib_client->name); CRM_LOG_ASSERT(cib_client->user != NULL); pcmk__update_acl_user(op_request, PCMK__XA_CIB_USER, cib_client->user); cib_common_callback_worker(id, flags, op_request, cib_client, privileged); free_xml(op_request); return 0; } static uint64_t ping_seq = 0; static char *ping_digest = NULL; static bool ping_modified_since = FALSE; static gboolean cib_digester_cb(gpointer data) { if (based_is_primary) { char buffer[32]; xmlNode *ping = pcmk__xe_create(NULL, PCMK__XE_PING); ping_seq++; free(ping_digest); ping_digest = NULL; ping_modified_since = FALSE; snprintf(buffer, 32, "%" PRIu64, ping_seq); crm_trace("Requesting peer digests (%s)", buffer); crm_xml_add(ping, PCMK__XA_T, PCMK__VALUE_CIB); crm_xml_add(ping, PCMK__XA_CIB_OP, CRM_OP_PING); crm_xml_add(ping, PCMK__XA_CIB_PING_ID, buffer); crm_xml_add(ping, PCMK_XA_CRM_FEATURE_SET, CRM_FEATURE_SET); send_cluster_message(NULL, crm_msg_cib, ping, TRUE); free_xml(ping); } return FALSE; } static void process_ping_reply(xmlNode *reply) { uint64_t seq = 0; const char *host = crm_element_value(reply, PCMK__XA_SRC); xmlNode *wrapper = pcmk__xe_first_child(reply, PCMK__XE_CIB_CALLDATA, NULL, NULL); xmlNode *pong = pcmk__xe_first_child(wrapper, NULL, NULL, NULL); const char *seq_s = crm_element_value(pong, PCMK__XA_CIB_PING_ID); const char *digest = crm_element_value(pong, PCMK__XA_DIGEST); if (seq_s == NULL) { crm_debug("Ignoring ping reply with no " PCMK__XA_CIB_PING_ID); return; } else { long long seq_ll; if (pcmk__scan_ll(seq_s, &seq_ll, 0LL) != pcmk_rc_ok) { return; } seq = (uint64_t) seq_ll; } if(digest == NULL) { crm_trace("Ignoring ping reply %s from %s with no digest", seq_s, host); } else if(seq != ping_seq) { crm_trace("Ignoring out of sequence ping reply %s from %s", seq_s, host); } else if(ping_modified_since) { crm_trace("Ignoring ping reply %s from %s: cib updated since", seq_s, host); } else { const char *version = crm_element_value(pong, PCMK_XA_CRM_FEATURE_SET); if(ping_digest == NULL) { crm_trace("Calculating new digest"); ping_digest = calculate_xml_versioned_digest(the_cib, FALSE, TRUE, version); } crm_trace("Processing ping reply %s from %s (%s)", seq_s, host, digest); if (!pcmk__str_eq(ping_digest, digest, pcmk__str_casei)) { xmlNode *wrapper = pcmk__xe_first_child(pong, PCMK__XE_CIB_CALLDATA, NULL, NULL); xmlNode *remote_cib = pcmk__xe_first_child(wrapper, NULL, NULL, NULL); const char *admin_epoch_s = NULL; const char *epoch_s = NULL; const char *num_updates_s = NULL; if (remote_cib != NULL) { admin_epoch_s = crm_element_value(remote_cib, PCMK_XA_ADMIN_EPOCH); epoch_s = crm_element_value(remote_cib, PCMK_XA_EPOCH); num_updates_s = crm_element_value(remote_cib, PCMK_XA_NUM_UPDATES); } crm_notice("Local CIB %s.%s.%s.%s differs from %s: %s.%s.%s.%s %p", crm_element_value(the_cib, PCMK_XA_ADMIN_EPOCH), crm_element_value(the_cib, PCMK_XA_EPOCH), crm_element_value(the_cib, PCMK_XA_NUM_UPDATES), ping_digest, host, pcmk__s(admin_epoch_s, "_"), pcmk__s(epoch_s, "_"), pcmk__s(num_updates_s, "_"), digest, remote_cib); if(remote_cib && remote_cib->children) { // Additional debug xml_calculate_changes(the_cib, remote_cib); pcmk__log_xml_changes(LOG_INFO, remote_cib); crm_trace("End of differences"); } free_xml(remote_cib); sync_our_cib(reply, FALSE); } } } static void local_notify_destroy_callback(gpointer data) { cib_local_notify_t *notify = data; free_xml(notify->notify_src); free(notify->client_id); free(notify); } static void check_local_notify(int bcast_id) { const cib_local_notify_t *notify = NULL; if (!local_notify_queue) { return; } notify = pcmk__intkey_table_lookup(local_notify_queue, bcast_id); if (notify) { do_local_notify(notify->notify_src, notify->client_id, notify->sync_reply, notify->from_peer); pcmk__intkey_table_remove(local_notify_queue, bcast_id); } } static void queue_local_notify(xmlNode * notify_src, const char *client_id, gboolean sync_reply, gboolean from_peer) { cib_local_notify_t *notify = pcmk__assert_alloc(1, sizeof(cib_local_notify_t)); notify->notify_src = notify_src; notify->client_id = pcmk__str_copy(client_id); notify->sync_reply = sync_reply; notify->from_peer = from_peer; if (!local_notify_queue) { local_notify_queue = pcmk__intkey_table(local_notify_destroy_callback); } pcmk__intkey_table_insert(local_notify_queue, cib_local_bcast_num, notify); // cppcheck doesn't know notify will get freed when hash table is destroyed // cppcheck-suppress memleak } static void parse_local_options_v1(const pcmk__client_t *cib_client, const cib__operation_t *operation, int call_options, const char *host, const char *op, gboolean *local_notify, gboolean *needs_reply, gboolean *process, gboolean *needs_forward) { if (pcmk_is_set(operation->flags, cib__op_attr_modifies) && !pcmk_is_set(call_options, cib_inhibit_bcast)) { /* we need to send an update anyway */ *needs_reply = TRUE; } else { *needs_reply = FALSE; } if (host == NULL && (call_options & cib_scope_local)) { crm_trace("Processing locally scoped %s op from client %s", op, pcmk__client_name(cib_client)); *local_notify = TRUE; } else if ((host == NULL) && based_is_primary) { crm_trace("Processing %s op locally from client %s as primary", op, pcmk__client_name(cib_client)); *local_notify = TRUE; } else if (pcmk__str_eq(host, OUR_NODENAME, pcmk__str_casei)) { crm_trace("Processing locally addressed %s op from client %s", op, pcmk__client_name(cib_client)); *local_notify = TRUE; } else if (stand_alone) { *needs_forward = FALSE; *local_notify = TRUE; *process = TRUE; } else { crm_trace("%s op from %s needs to be forwarded to client %s", op, pcmk__client_name(cib_client), pcmk__s(host, "the primary instance")); *needs_forward = TRUE; *process = FALSE; } } static void parse_local_options_v2(const pcmk__client_t *cib_client, const cib__operation_t *operation, int call_options, const char *host, const char *op, gboolean *local_notify, gboolean *needs_reply, gboolean *process, gboolean *needs_forward) { // Process locally and notify local client *process = TRUE; *needs_reply = FALSE; *local_notify = TRUE; *needs_forward = FALSE; if (pcmk_is_set(operation->flags, cib__op_attr_local)) { /* Always process locally if cib__op_attr_local is set. * * @COMPAT: Currently host is ignored. At a compatibility break, throw * an error (from cib_process_request() or earlier) if host is not NULL or * OUR_NODENAME. */ crm_trace("Processing always-local %s op from client %s", op, pcmk__client_name(cib_client)); if (!pcmk__str_eq(host, OUR_NODENAME, pcmk__str_casei|pcmk__str_null_matches)) { crm_warn("Operation '%s' is always local but its target host is " "set to '%s'", op, host); } return; } if (pcmk_is_set(operation->flags, cib__op_attr_modifies) || !pcmk__str_eq(host, OUR_NODENAME, pcmk__str_casei|pcmk__str_null_matches)) { // Forward modifying and non-local requests via cluster *process = FALSE; *needs_reply = FALSE; *local_notify = FALSE; *needs_forward = TRUE; crm_trace("%s op from %s needs to be forwarded to %s", op, pcmk__client_name(cib_client), pcmk__s(host, "all nodes")); return; } if (stand_alone) { crm_trace("Processing %s op from client %s (stand-alone)", op, pcmk__client_name(cib_client)); } else { crm_trace("Processing %saddressed %s op from client %s", ((host != NULL)? "locally " : "un"), op, pcmk__client_name(cib_client)); } } static void parse_local_options(const pcmk__client_t *cib_client, const cib__operation_t *operation, int call_options, const char *host, const char *op, gboolean *local_notify, gboolean *needs_reply, gboolean *process, gboolean *needs_forward) { if(cib_legacy_mode()) { parse_local_options_v1(cib_client, operation, call_options, host, op, local_notify, needs_reply, process, needs_forward); } else { parse_local_options_v2(cib_client, operation, call_options, host, op, local_notify, needs_reply, process, needs_forward); } } static gboolean parse_peer_options_v1(const cib__operation_t *operation, xmlNode *request, gboolean *local_notify, gboolean *needs_reply, gboolean *process) { const char *op = NULL; const char *host = NULL; const char *delegated = NULL; const char *originator = crm_element_value(request, PCMK__XA_SRC); const char *reply_to = crm_element_value(request, PCMK__XA_CIB_ISREPLYTO); gboolean is_reply = pcmk__str_eq(reply_to, OUR_NODENAME, pcmk__str_casei); if (pcmk__xe_attr_is_true(request, PCMK__XA_CIB_UPDATE)) { *needs_reply = FALSE; if (is_reply) { *local_notify = TRUE; crm_trace("Processing global/peer update from %s" " that originated from us", originator); } else { crm_trace("Processing global/peer update from %s", originator); } return TRUE; } op = crm_element_value(request, PCMK__XA_CIB_OP); crm_trace("Processing legacy %s request sent by %s", op, originator); if (pcmk__str_eq(op, PCMK__CIB_REQUEST_SHUTDOWN, pcmk__str_none)) { /* Always process these */ *local_notify = FALSE; if (reply_to == NULL || is_reply) { *process = TRUE; } if (is_reply) { *needs_reply = FALSE; } return *process; } if (is_reply && pcmk__str_eq(op, CRM_OP_PING, pcmk__str_casei)) { process_ping_reply(request); return FALSE; } if (is_reply) { crm_trace("Forward reply sent from %s to local clients", originator); *process = FALSE; *needs_reply = FALSE; *local_notify = TRUE; return TRUE; } host = crm_element_value(request, PCMK__XA_CIB_HOST); if (pcmk__str_eq(host, OUR_NODENAME, pcmk__str_casei)) { crm_trace("Processing %s request sent to us from %s", op, originator); return TRUE; } else if(is_reply == FALSE && pcmk__str_eq(op, CRM_OP_PING, pcmk__str_casei)) { crm_trace("Processing %s request sent to %s by %s", op, host?host:"everyone", originator); *needs_reply = TRUE; return TRUE; } else if ((host == NULL) && based_is_primary) { crm_trace("Processing %s request sent to primary instance from %s", op, originator); return TRUE; } delegated = crm_element_value(request, PCMK__XA_CIB_DELEGATED_FROM); if (delegated != NULL) { crm_trace("Ignoring message for primary instance"); } else if (host != NULL) { /* this is for a specific instance and we're not it */ crm_trace("Ignoring msg for instance on %s", host); } else if ((reply_to == NULL) && !based_is_primary) { // This is for the primary instance, and we're not it crm_trace("Ignoring reply for primary instance"); } else if (pcmk__str_eq(op, PCMK__CIB_REQUEST_SHUTDOWN, pcmk__str_none)) { if (reply_to != NULL) { crm_debug("Processing %s from %s", op, originator); *needs_reply = FALSE; } else { crm_debug("Processing %s reply from %s", op, originator); } return TRUE; } else { crm_err("Nothing for us to do?"); crm_log_xml_err(request, "Peer[inbound]"); } return FALSE; } static gboolean parse_peer_options_v2(const cib__operation_t *operation, xmlNode *request, gboolean *local_notify, gboolean *needs_reply, gboolean *process) { const char *host = NULL; const char *delegated = crm_element_value(request, PCMK__XA_CIB_DELEGATED_FROM); const char *op = crm_element_value(request, PCMK__XA_CIB_OP); const char *originator = crm_element_value(request, PCMK__XA_SRC); const char *reply_to = crm_element_value(request, PCMK__XA_CIB_ISREPLYTO); gboolean is_reply = pcmk__str_eq(reply_to, OUR_NODENAME, pcmk__str_casei); if (originator == NULL) { // Shouldn't be possible originator = "peer"; } if (pcmk__str_eq(op, PCMK__CIB_REQUEST_REPLACE, pcmk__str_none)) { // sync_our_cib() sets PCMK__XA_CIB_ISREPLYTO if (reply_to) { delegated = reply_to; } goto skip_is_reply; } else if (pcmk__str_eq(op, PCMK__CIB_REQUEST_SYNC_TO_ALL, pcmk__str_none)) { // Nothing to do } else if (is_reply && pcmk__str_eq(op, CRM_OP_PING, pcmk__str_casei)) { process_ping_reply(request); return FALSE; } else if (pcmk__str_eq(op, PCMK__CIB_REQUEST_UPGRADE, pcmk__str_none)) { /* Only the DC (node with the oldest software) should process * this operation if PCMK__XA_CIB_SCHEMA_MAX is unset. * * If the DC is happy it will then send out another * PCMK__CIB_REQUEST_UPGRADE which will tell all nodes to do the actual * upgrade. * * Except this time PCMK__XA_CIB_SCHEMA_MAX will be set which puts a * limit on how far newer nodes will go */ const char *max = crm_element_value(request, PCMK__XA_CIB_SCHEMA_MAX); const char *upgrade_rc = crm_element_value(request, PCMK__XA_CIB_UPGRADE_RC); crm_trace("Parsing upgrade %s for %s with max=%s and upgrade_rc=%s", (is_reply? "reply" : "request"), (based_is_primary? "primary" : "secondary"), pcmk__s(max, "none"), pcmk__s(upgrade_rc, "none")); if (upgrade_rc != NULL) { // Our upgrade request was rejected by DC, notify clients of result crm_xml_add(request, PCMK__XA_CIB_RC, upgrade_rc); } else if ((max == NULL) && based_is_primary) { /* We are the DC, check if this upgrade is allowed */ goto skip_is_reply; } else if(max) { /* Ok, go ahead and upgrade to 'max' */ goto skip_is_reply; } else { // Ignore broadcast client requests when we're not primary return FALSE; } } else if (pcmk__xe_attr_is_true(request, PCMK__XA_CIB_UPDATE)) { crm_info("Detected legacy %s global update from %s", op, originator); send_sync_request(NULL); legacy_mode = TRUE; return FALSE; } else if (is_reply && pcmk_is_set(operation->flags, cib__op_attr_modifies)) { crm_trace("Ignoring legacy %s reply sent from %s to local clients", op, originator); return FALSE; } else if (pcmk__str_eq(op, PCMK__CIB_REQUEST_SHUTDOWN, pcmk__str_none)) { *local_notify = FALSE; if (reply_to == NULL) { *process = TRUE; } else { // Not possible? crm_debug("Ignoring shutdown request from %s because reply_to=%s", originator, reply_to); } return *process; } if (is_reply) { crm_trace("Will notify local clients for %s reply from %s", op, originator); *process = FALSE; *needs_reply = FALSE; *local_notify = TRUE; return TRUE; } skip_is_reply: *process = TRUE; *needs_reply = FALSE; *local_notify = pcmk__str_eq(delegated, OUR_NODENAME, pcmk__str_casei); host = crm_element_value(request, PCMK__XA_CIB_HOST); if (pcmk__str_eq(host, OUR_NODENAME, pcmk__str_casei)) { crm_trace("Processing %s request sent to us from %s", op, originator); *needs_reply = TRUE; return TRUE; } else if (host != NULL) { crm_trace("Ignoring %s request intended for CIB manager on %s", op, host); return FALSE; } else if(is_reply == FALSE && pcmk__str_eq(op, CRM_OP_PING, pcmk__str_casei)) { *needs_reply = TRUE; } crm_trace("Processing %s request broadcast by %s call %s on %s " "(local clients will%s be notified)", op, pcmk__s(crm_element_value(request, PCMK__XA_CIB_CLIENTNAME), "client"), pcmk__s(crm_element_value(request, PCMK__XA_CIB_CALLID), "without ID"), originator, (*local_notify? "" : "not")); return TRUE; } static gboolean parse_peer_options(const cib__operation_t *operation, xmlNode *request, gboolean *local_notify, gboolean *needs_reply, gboolean *process) { /* TODO: What happens when an update comes in after node A * requests the CIB from node B, but before it gets the reply (and * sends out the replace operation) */ if(cib_legacy_mode()) { return parse_peer_options_v1(operation, request, local_notify, needs_reply, process); } else { return parse_peer_options_v2(operation, request, local_notify, needs_reply, process); } } /*! * \internal * \brief Forward a CIB request to the appropriate target host(s) * * \param[in] request CIB request to forward */ static void forward_request(xmlNode *request) { const char *op = crm_element_value(request, PCMK__XA_CIB_OP); const char *section = crm_element_value(request, PCMK__XA_CIB_SECTION); const char *host = crm_element_value(request, PCMK__XA_CIB_HOST); const char *originator = crm_element_value(request, PCMK__XA_SRC); const char *client_name = crm_element_value(request, PCMK__XA_CIB_CLIENTNAME); const char *call_id = crm_element_value(request, PCMK__XA_CIB_CALLID); crm_node_t *peer = NULL; int log_level = LOG_INFO; if (pcmk__str_eq(op, PCMK__CIB_REQUEST_NOOP, pcmk__str_none)) { log_level = LOG_DEBUG; } do_crm_log(log_level, "Forwarding %s operation for section %s to %s (origin=%s/%s/%s)", pcmk__s(op, "invalid"), pcmk__s(section, "all"), pcmk__s(host, (cib_legacy_mode()? "primary" : "all")), pcmk__s(originator, "local"), pcmk__s(client_name, "unspecified"), pcmk__s(call_id, "unspecified")); crm_xml_add(request, PCMK__XA_CIB_DELEGATED_FROM, OUR_NODENAME); if (host != NULL) { peer = pcmk__get_node(0, host, NULL, pcmk__node_search_cluster); } send_cluster_message(peer, crm_msg_cib, request, FALSE); // Return the request to its original state pcmk__xe_remove_attr(request, PCMK__XA_CIB_DELEGATED_FROM); } static gboolean send_peer_reply(xmlNode * msg, xmlNode * result_diff, const char *originator, gboolean broadcast) { CRM_ASSERT(msg != NULL); if (broadcast) { /* @COMPAT: Legacy code * * This successful call modified the CIB, and the change needs to be * broadcast (sent via cluster to all nodes). */ int diff_add_updates = 0; int diff_add_epoch = 0; int diff_add_admin_epoch = 0; int diff_del_updates = 0; int diff_del_epoch = 0; int diff_del_admin_epoch = 0; const char *digest = NULL; int format = 1; xmlNode *wrapper = NULL; CRM_LOG_ASSERT(result_diff != NULL); digest = crm_element_value(result_diff, PCMK__XA_DIGEST); crm_element_value_int(result_diff, PCMK_XA_FORMAT, &format); cib_diff_version_details(result_diff, &diff_add_admin_epoch, &diff_add_epoch, &diff_add_updates, &diff_del_admin_epoch, &diff_del_epoch, &diff_del_updates); crm_trace("Sending update diff %d.%d.%d -> %d.%d.%d %s", diff_del_admin_epoch, diff_del_epoch, diff_del_updates, diff_add_admin_epoch, diff_add_epoch, diff_add_updates, digest); crm_xml_add(msg, PCMK__XA_CIB_ISREPLYTO, originator); pcmk__xe_set_bool_attr(msg, PCMK__XA_CIB_UPDATE, true); crm_xml_add(msg, PCMK__XA_CIB_OP, PCMK__CIB_REQUEST_APPLY_PATCH); crm_xml_add(msg, PCMK__XA_CIB_USER, CRM_DAEMON_USER); if (format == 1) { CRM_ASSERT(digest != NULL); } wrapper = pcmk__xe_create(msg, PCMK__XE_CIB_UPDATE_DIFF); pcmk__xml_copy(wrapper, result_diff); crm_log_xml_explicit(msg, "copy"); return send_cluster_message(NULL, crm_msg_cib, msg, TRUE); } else if (originator != NULL) { /* send reply via HA to originating node */ crm_trace("Sending request result to %s only", originator); crm_xml_add(msg, PCMK__XA_CIB_ISREPLYTO, originator); return send_cluster_message(pcmk__get_node(0, originator, NULL, pcmk__node_search_cluster), crm_msg_cib, msg, FALSE); } return FALSE; } /*! * \internal * \brief Handle an IPC or CPG message containing a request * * \param[in,out] request Request XML * \param[in] privileged Whether privileged commands may be run * (see cib_server_ops[] definition) * \param[in] cib_client IPC client that sent request (or NULL if CPG) * * \return Legacy Pacemaker return code */ int cib_process_request(xmlNode *request, gboolean privileged, const pcmk__client_t *cib_client) { // @TODO: Break into multiple smaller functions int call_options = 0; gboolean process = TRUE; // Whether to process request locally now gboolean is_update = TRUE; // Whether request would modify CIB gboolean needs_reply = TRUE; // Whether to build a reply gboolean local_notify = FALSE; // Whether to notify (local) requester gboolean needs_forward = FALSE; // Whether to forward request somewhere else xmlNode *op_reply = NULL; xmlNode *result_diff = NULL; int rc = pcmk_ok; const char *op = crm_element_value(request, PCMK__XA_CIB_OP); const char *originator = crm_element_value(request, PCMK__XA_SRC); const char *host = crm_element_value(request, PCMK__XA_CIB_HOST); const char *target = NULL; const char *call_id = crm_element_value(request, PCMK__XA_CIB_CALLID); const char *client_id = crm_element_value(request, PCMK__XA_CIB_CLIENTID); const char *client_name = crm_element_value(request, PCMK__XA_CIB_CLIENTNAME); const char *reply_to = crm_element_value(request, PCMK__XA_CIB_ISREPLYTO); const cib__operation_t *operation = NULL; cib__op_fn_t op_function = NULL; crm_element_value_int(request, PCMK__XA_CIB_CALLOPT, &call_options); if ((host != NULL) && (*host == '\0')) { host = NULL; } // @TODO: Improve trace messages. Target is accurate only for legacy mode. if (host) { target = host; } else if (call_options & cib_scope_local) { target = "local host"; } else { target = "primary"; } if (cib_client == NULL) { crm_trace("Processing peer %s operation from %s/%s on %s intended for %s (reply=%s)", op, client_name, call_id, originator, target, reply_to); } else { crm_xml_add(request, PCMK__XA_SRC, OUR_NODENAME); crm_trace("Processing local %s operation from %s/%s intended for %s", op, client_name, call_id, target); } rc = cib__get_operation(op, &operation); rc = pcmk_rc2legacy(rc); if (rc != pcmk_ok) { /* TODO: construct error reply? */ crm_err("Pre-processing of command failed: %s", pcmk_strerror(rc)); return rc; } op_function = based_get_op_function(operation); if (op_function == NULL) { crm_err("Operation %s not supported by CIB manager", op); return -EOPNOTSUPP; } if (cib_client != NULL) { parse_local_options(cib_client, operation, call_options, host, op, &local_notify, &needs_reply, &process, &needs_forward); } else if (!parse_peer_options(operation, request, &local_notify, &needs_reply, &process)) { return rc; } if (pcmk_is_set(call_options, cib_transaction)) { /* All requests in a transaction are processed locally against a working * CIB copy, and we don't notify for individual requests because the * entire transaction is atomic. * * We still call the option parser functions above, for the sake of log * messages and checking whether we're the target for peer requests. */ process = TRUE; needs_reply = FALSE; local_notify = FALSE; needs_forward = FALSE; } is_update = pcmk_is_set(operation->flags, cib__op_attr_modifies); if (pcmk_is_set(call_options, cib_discard_reply)) { /* If the request will modify the CIB, and we are in legacy mode, we * need to build a reply so we can broadcast a diff, even if the * requester doesn't want one. */ needs_reply = is_update && cib_legacy_mode(); local_notify = FALSE; crm_trace("Client is not interested in the reply"); } if (needs_forward) { forward_request(request); return rc; } if (cib_status != pcmk_ok) { rc = cib_status; crm_err("Operation ignored, cluster configuration is invalid." " Please repair and restart: %s", pcmk_strerror(cib_status)); op_reply = create_cib_reply(op, call_id, client_id, call_options, rc, the_cib); } else if (process) { time_t finished = 0; time_t now = time(NULL); int level = LOG_INFO; const char *section = crm_element_value(request, PCMK__XA_CIB_SECTION); const char *admin_epoch_s = NULL; const char *epoch_s = NULL; const char *num_updates_s = NULL; rc = cib_process_command(request, operation, op_function, &op_reply, &result_diff, privileged); if (!is_update) { level = LOG_TRACE; } else if (pcmk__xe_attr_is_true(request, PCMK__XA_CIB_UPDATE)) { switch (rc) { case pcmk_ok: level = LOG_INFO; break; case -pcmk_err_old_data: case -pcmk_err_diff_resync: case -pcmk_err_diff_failed: level = LOG_TRACE; break; default: level = LOG_ERR; } } else if (rc != pcmk_ok) { level = LOG_WARNING; } if (the_cib != NULL) { admin_epoch_s = crm_element_value(the_cib, PCMK_XA_ADMIN_EPOCH); epoch_s = crm_element_value(the_cib, PCMK_XA_EPOCH); num_updates_s = crm_element_value(the_cib, PCMK_XA_NUM_UPDATES); } do_crm_log(level, "Completed %s operation for section %s: %s (rc=%d, origin=%s/%s/%s, version=%s.%s.%s)", op, section ? section : "'all'", pcmk_strerror(rc), rc, originator ? originator : "local", client_name, call_id, pcmk__s(admin_epoch_s, "0"), pcmk__s(epoch_s, "0"), pcmk__s(num_updates_s, "0")); finished = time(NULL); if ((finished - now) > 3) { crm_trace("%s operation took %lds to complete", op, (long)(finished - now)); crm_write_blackbox(0, NULL); } if (op_reply == NULL && (needs_reply || local_notify)) { crm_err("Unexpected NULL reply to message"); crm_log_xml_err(request, "null reply"); needs_reply = FALSE; local_notify = FALSE; } } if (is_update && !cib_legacy_mode()) { crm_trace("Completed pre-sync update from %s/%s/%s%s", originator ? originator : "local", client_name, call_id, local_notify?" with local notification":""); } else if (!needs_reply || stand_alone) { // This was a non-originating secondary update crm_trace("Completed update as secondary"); } else if (cib_legacy_mode() && rc == pcmk_ok && result_diff != NULL && !(call_options & cib_inhibit_bcast)) { gboolean broadcast = FALSE; cib_local_bcast_num++; crm_xml_add_int(request, PCMK__XA_CIB_LOCAL_NOTIFY_ID, cib_local_bcast_num); broadcast = send_peer_reply(request, result_diff, originator, TRUE); if (broadcast && client_id && local_notify && op_reply) { /* If we have been asked to sync the reply, * and a bcast msg has gone out, we queue the local notify * until we know the bcast message has been received */ local_notify = FALSE; crm_trace("Queuing local %ssync notification for %s", (call_options & cib_sync_call) ? "" : "a-", client_id); queue_local_notify(op_reply, client_id, pcmk_is_set(call_options, cib_sync_call), (cib_client == NULL)); op_reply = NULL; /* the reply is queued, so don't free here */ } } else if ((cib_client == NULL) && !pcmk_is_set(call_options, cib_discard_reply)) { if (is_update == FALSE || result_diff == NULL) { crm_trace("Request not broadcast: R/O call"); } else if (call_options & cib_inhibit_bcast) { crm_trace("Request not broadcast: inhibited"); } else if (rc != pcmk_ok) { crm_trace("Request not broadcast: call failed: %s", pcmk_strerror(rc)); } else { crm_trace("Directing reply to %s", originator); } send_peer_reply(op_reply, result_diff, originator, FALSE); } if (local_notify && client_id) { crm_trace("Performing local %ssync notification for %s", (pcmk_is_set(call_options, cib_sync_call)? "" : "a"), client_id); if (process == FALSE) { do_local_notify(request, client_id, pcmk_is_set(call_options, cib_sync_call), (cib_client == NULL)); } else { do_local_notify(op_reply, client_id, pcmk_is_set(call_options, cib_sync_call), (cib_client == NULL)); } } free_xml(op_reply); free_xml(result_diff); return rc; } /*! * \internal * \brief Get a CIB operation's input from the request XML * * \param[in] request CIB request XML * \param[in] type CIB operation type * \param[out] section Where to store CIB section name * * \return Input XML for CIB operation * * \note If not \c NULL, the return value is a non-const pointer to part of * \p request. The caller should not free it directly. */ static xmlNode * prepare_input(const xmlNode *request, enum cib__op_type type, const char **section) { xmlNode *wrapper = NULL; xmlNode *input = NULL; *section = NULL; switch (type) { case cib__op_apply_patch: { const char *wrapper_name = PCMK__XE_CIB_CALLDATA; if (pcmk__xe_attr_is_true(request, PCMK__XA_CIB_UPDATE)) { wrapper_name = PCMK__XE_CIB_UPDATE_DIFF; } wrapper = pcmk__xe_first_child(request, wrapper_name, NULL, NULL); input = pcmk__xe_first_child(wrapper, NULL, NULL, NULL); } break; default: wrapper = pcmk__xe_first_child(request, PCMK__XE_CIB_CALLDATA, NULL, NULL); input = pcmk__xe_first_child(wrapper, NULL, NULL, NULL); *section = crm_element_value(request, PCMK__XA_CIB_SECTION); break; } // Grab the specified section if ((*section != NULL) && pcmk__xe_is(input, PCMK_XE_CIB)) { input = pcmk_find_cib_element(input, *section); } return input; } // v1 and v2 patch formats #define XPATH_CONFIG_CHANGE \ "//" PCMK_XE_CRM_CONFIG " | " \ "//" PCMK_XE_CHANGE \ "[contains(@" PCMK_XA_PATH ",'/" PCMK_XE_CRM_CONFIG "/')]" static bool contains_config_change(xmlNode *diff) { bool changed = false; if (diff) { xmlXPathObject *xpathObj = xpath_search(diff, XPATH_CONFIG_CHANGE); if (numXpathResults(xpathObj) > 0) { changed = true; } freeXpathObject(xpathObj); } return changed; } static int cib_process_command(xmlNode *request, const cib__operation_t *operation, cib__op_fn_t op_function, xmlNode **reply, xmlNode **cib_diff, bool privileged) { xmlNode *input = NULL; xmlNode *output = NULL; xmlNode *result_cib = NULL; int call_options = 0; const char *op = NULL; const char *section = NULL; const char *call_id = crm_element_value(request, PCMK__XA_CIB_CALLID); const char *client_id = crm_element_value(request, PCMK__XA_CIB_CLIENTID); const char *client_name = crm_element_value(request, PCMK__XA_CIB_CLIENTNAME); const char *originator = crm_element_value(request, PCMK__XA_SRC); int rc = pcmk_ok; bool config_changed = false; bool manage_counters = true; static mainloop_timer_t *digest_timer = NULL; CRM_ASSERT(cib_status == pcmk_ok); if(digest_timer == NULL) { digest_timer = mainloop_timer_add("digester", 5000, FALSE, cib_digester_cb, NULL); } *reply = NULL; *cib_diff = NULL; /* Start processing the request... */ op = crm_element_value(request, PCMK__XA_CIB_OP); crm_element_value_int(request, PCMK__XA_CIB_CALLOPT, &call_options); if (!privileged && pcmk_is_set(operation->flags, cib__op_attr_privileged)) { rc = -EACCES; crm_trace("Failed due to lack of privileges: %s", pcmk_strerror(rc)); goto done; } input = prepare_input(request, operation->type, §ion); if (!pcmk_is_set(operation->flags, cib__op_attr_modifies)) { rc = cib_perform_op(NULL, op, call_options, op_function, true, section, request, input, false, &config_changed, &the_cib, &result_cib, NULL, &output); CRM_CHECK(result_cib == NULL, free_xml(result_cib)); goto done; } /* @COMPAT: Handle a valid write action (legacy) * * @TODO: Re-evaluate whether this is all truly legacy. The cib_force_diff * portion is. However, PCMK__XA_CIB_UPDATE may be set by a sync operation * even in non-legacy mode, and manage_counters tells xml_create_patchset() * whether to update version/epoch info. */ if (pcmk__xe_attr_is_true(request, PCMK__XA_CIB_UPDATE)) { manage_counters = false; cib__set_call_options(call_options, "call", cib_force_diff); crm_trace("Global update detected"); CRM_LOG_ASSERT(pcmk__str_any_of(op, PCMK__CIB_REQUEST_APPLY_PATCH, PCMK__CIB_REQUEST_REPLACE, NULL)); } ping_modified_since = TRUE; if (pcmk_is_set(call_options, cib_inhibit_bcast)) { crm_trace("Skipping update: inhibit broadcast"); manage_counters = false; } // result_cib must not be modified after cib_perform_op() returns rc = cib_perform_op(NULL, op, call_options, op_function, false, section, request, input, manage_counters, &config_changed, &the_cib, &result_cib, cib_diff, &output); // @COMPAT: Legacy code if (!manage_counters) { int format = 1; // If the diff is NULL at this point, it's because nothing changed if (*cib_diff != NULL) { crm_element_value_int(*cib_diff, PCMK_XA_FORMAT, &format); } if (format == 1) { config_changed = cib__config_changed_v1(NULL, NULL, cib_diff); } } /* Always write to disk for successful ops with the flag set. This also * negates the need to detect ordering changes. */ if ((rc == pcmk_ok) && pcmk_is_set(operation->flags, cib__op_attr_writes_through)) { config_changed = true; } if ((rc == pcmk_ok) && !pcmk_any_flags_set(call_options, cib_dryrun|cib_transaction)) { if (result_cib != the_cib) { if (pcmk_is_set(operation->flags, cib__op_attr_writes_through)) { config_changed = true; } crm_trace("Activating %s->%s%s", crm_element_value(the_cib, PCMK_XA_NUM_UPDATES), crm_element_value(result_cib, PCMK_XA_NUM_UPDATES), (config_changed? " changed" : "")); rc = activateCibXml(result_cib, config_changed, op); if (rc != pcmk_ok) { crm_err("Failed to activate new CIB: %s", pcmk_strerror(rc)); } } if ((rc == pcmk_ok) && contains_config_change(*cib_diff)) { cib_read_config(config_hash, result_cib); } /* @COMPAT Nodes older than feature set 3.19.0 don't support * transactions. In a mixed-version cluster with nodes <3.19.0, we must * sync the updated CIB, so that the older nodes receive the changes. * Any node that has already applied the transaction will ignore the * synced CIB. * * To ensure the updated CIB is synced from only one node, we sync it * from the originator. */ if ((operation->type == cib__op_commit_transact) && pcmk__str_eq(originator, OUR_NODENAME, pcmk__str_casei) && compare_version(crm_element_value(the_cib, PCMK_XA_CRM_FEATURE_SET), "3.19.0") < 0) { sync_our_cib(request, TRUE); } mainloop_timer_stop(digest_timer); mainloop_timer_start(digest_timer); } else if (rc == -pcmk_err_schema_validation) { CRM_ASSERT(result_cib != the_cib); if (output != NULL) { crm_log_xml_info(output, "cib:output"); free_xml(output); } output = result_cib; } else { crm_trace("Not activating %d %d %s", rc, pcmk_is_set(call_options, cib_dryrun), crm_element_value(result_cib, PCMK_XA_NUM_UPDATES)); if (result_cib != the_cib) { free_xml(result_cib); } } if (!pcmk_any_flags_set(call_options, cib_dryrun|cib_inhibit_notify|cib_transaction)) { crm_trace("Sending notifications %d", pcmk_is_set(call_options, cib_dryrun)); cib_diff_notify(op, rc, call_id, client_id, client_name, originator, input, *cib_diff); } pcmk__log_xml_patchset(LOG_TRACE, *cib_diff); done: if (!pcmk_is_set(call_options, cib_discard_reply) || cib_legacy_mode()) { *reply = create_cib_reply(op, call_id, client_id, call_options, rc, output); } if (output != the_cib) { free_xml(output); } crm_trace("done"); return rc; } void cib_peer_callback(xmlNode * msg, void *private_data) { const char *reason = NULL; const char *originator = crm_element_value(msg, PCMK__XA_SRC); if (cib_legacy_mode() && pcmk__str_eq(originator, OUR_NODENAME, pcmk__str_casei|pcmk__str_null_matches)) { /* message is from ourselves */ int bcast_id = 0; if (crm_element_value_int(msg, PCMK__XA_CIB_LOCAL_NOTIFY_ID, &bcast_id) == 0) { check_local_notify(bcast_id); } return; } else if (crm_peer_cache == NULL) { reason = "membership not established"; goto bail; } if (crm_element_value(msg, PCMK__XA_CIB_CLIENTNAME) == NULL) { crm_xml_add(msg, PCMK__XA_CIB_CLIENTNAME, originator); } /* crm_log_xml_trace(msg, "Peer[inbound]"); */ cib_process_request(msg, TRUE, NULL); return; bail: if (reason) { const char *op = crm_element_value(msg, PCMK__XA_CIB_OP); crm_warn("Discarding %s message from %s: %s", op, originator, reason); } } static gboolean cib_force_exit(gpointer data) { crm_notice("Forcing exit!"); terminate_cib(__func__, CRM_EX_ERROR); return FALSE; } static void disconnect_remote_client(gpointer key, gpointer value, gpointer user_data) { pcmk__client_t *a_client = value; crm_err("Can't disconnect client %s: Not implemented", pcmk__client_name(a_client)); } static void initiate_exit(void) { int active = 0; xmlNode *leaving = NULL; - active = crm_active_peers(); + active = pcmk__cluster_num_active_nodes(); if (active < 2) { // This is the last active node terminate_cib(__func__, 0); return; } crm_info("Sending shutdown request to %d peers", active); leaving = pcmk__xe_create(NULL, PCMK__XE_EXIT_NOTIFICATION); crm_xml_add(leaving, PCMK__XA_T, PCMK__VALUE_CIB); crm_xml_add(leaving, PCMK__XA_CIB_OP, PCMK__CIB_REQUEST_SHUTDOWN); send_cluster_message(NULL, crm_msg_cib, leaving, TRUE); free_xml(leaving); g_timeout_add(EXIT_ESCALATION_MS, cib_force_exit, NULL); } void cib_shutdown(int nsig) { struct qb_ipcs_stats srv_stats; if (cib_shutdown_flag == FALSE) { int disconnects = 0; qb_ipcs_connection_t *c = NULL; cib_shutdown_flag = TRUE; c = qb_ipcs_connection_first_get(ipcs_rw); while (c != NULL) { qb_ipcs_connection_t *last = c; c = qb_ipcs_connection_next_get(ipcs_rw, last); crm_debug("Disconnecting r/w client %p...", last); qb_ipcs_disconnect(last); qb_ipcs_connection_unref(last); disconnects++; } c = qb_ipcs_connection_first_get(ipcs_ro); while (c != NULL) { qb_ipcs_connection_t *last = c; c = qb_ipcs_connection_next_get(ipcs_ro, last); crm_debug("Disconnecting r/o client %p...", last); qb_ipcs_disconnect(last); qb_ipcs_connection_unref(last); disconnects++; } c = qb_ipcs_connection_first_get(ipcs_shm); while (c != NULL) { qb_ipcs_connection_t *last = c; c = qb_ipcs_connection_next_get(ipcs_shm, last); crm_debug("Disconnecting non-blocking r/w client %p...", last); qb_ipcs_disconnect(last); qb_ipcs_connection_unref(last); disconnects++; } disconnects += pcmk__ipc_client_count(); crm_debug("Disconnecting %d remote clients", pcmk__ipc_client_count()); pcmk__foreach_ipc_client(disconnect_remote_client, NULL); crm_info("Disconnected %d clients", disconnects); } qb_ipcs_stats_get(ipcs_rw, &srv_stats, QB_FALSE); if (pcmk__ipc_client_count() == 0) { crm_info("All clients disconnected (%d)", srv_stats.active_connections); initiate_exit(); } else { crm_info("Waiting on %d clients to disconnect (%d)", pcmk__ipc_client_count(), srv_stats.active_connections); } } extern int remote_fd; extern int remote_tls_fd; /*! * \internal * \brief Close remote sockets, free the global CIB and quit * * \param[in] caller Name of calling function (for log message) * \param[in] fast If -1, skip disconnect; if positive, exit that */ void terminate_cib(const char *caller, int fast) { crm_info("%s: Exiting%s...", caller, (fast > 0)? " fast" : mainloop ? " from mainloop" : ""); if (remote_fd > 0) { close(remote_fd); remote_fd = 0; } if (remote_tls_fd > 0) { close(remote_tls_fd); remote_tls_fd = 0; } uninitializeCib(); if (fast > 0) { /* Quit fast on error */ pcmk__stop_based_ipc(ipcs_ro, ipcs_rw, ipcs_shm); crm_exit(fast); } else if ((mainloop != NULL) && g_main_loop_is_running(mainloop)) { /* Quit via returning from the main loop. If fast == -1, we skip the * disconnect here, and it will be done when the main loop returns * (this allows the peer status callback to avoid messing with the * peer caches). */ if (fast == 0) { - crm_cluster_disconnect(crm_cluster); + pcmk_cluster_disconnect(crm_cluster); } g_main_loop_quit(mainloop); } else { /* Quit via clean exit. Even the peer status callback can disconnect * here, because we're not returning control to the caller. */ - crm_cluster_disconnect(crm_cluster); + pcmk_cluster_disconnect(crm_cluster); pcmk__stop_based_ipc(ipcs_ro, ipcs_rw, ipcs_shm); crm_exit(CRM_EX_OK); } } diff --git a/daemons/based/pacemaker-based.c b/daemons/based/pacemaker-based.c index 35401ea276..b42dbc08fc 100644 --- a/daemons/based/pacemaker-based.c +++ b/daemons/based/pacemaker-based.c @@ -1,450 +1,450 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define SUMMARY "daemon for managing the configuration of a Pacemaker cluster" extern int init_remote_listener(int port, gboolean encrypted); gboolean cib_shutdown_flag = FALSE; int cib_status = pcmk_ok; crm_cluster_t *crm_cluster = NULL; GMainLoop *mainloop = NULL; gchar *cib_root = NULL; static gboolean preserve_status = FALSE; gboolean cib_writes_enabled = TRUE; gboolean stand_alone = FALSE; int remote_fd = 0; int remote_tls_fd = 0; GHashTable *config_hash = NULL; GHashTable *local_notify_queue = NULL; static void cib_init(void); void cib_shutdown(int nsig); static bool startCib(const char *filename); extern int write_cib_contents(gpointer p); static crm_exit_t exit_code = CRM_EX_OK; static void cib_enable_writes(int nsig) { crm_info("(Re)enabling disk writes"); cib_writes_enabled = TRUE; } /*! * \internal * \brief Set up options, users, and groups for stand-alone mode * * \param[out] error GLib error object * * \return Standard Pacemaker return code */ static int setup_stand_alone(GError **error) { int rc = 0; struct passwd *pwentry = NULL; preserve_status = TRUE; cib_writes_enabled = FALSE; errno = 0; pwentry = getpwnam(CRM_DAEMON_USER); if (pwentry == NULL) { exit_code = CRM_EX_FATAL; if (errno != 0) { g_set_error(error, PCMK__EXITC_ERROR, exit_code, "Error getting password DB entry for %s: %s", CRM_DAEMON_USER, strerror(errno)); return errno; } g_set_error(error, PCMK__EXITC_ERROR, exit_code, "Password DB entry for '%s' not found", CRM_DAEMON_USER); return ENXIO; } rc = setgid(pwentry->pw_gid); if (rc < 0) { exit_code = CRM_EX_FATAL; g_set_error(error, PCMK__EXITC_ERROR, exit_code, "Could not set group to %d: %s", pwentry->pw_gid, strerror(errno)); return errno; } rc = initgroups(CRM_DAEMON_USER, pwentry->pw_gid); if (rc < 0) { exit_code = CRM_EX_FATAL; g_set_error(error, PCMK__EXITC_ERROR, exit_code, "Could not setup groups for user %d: %s", pwentry->pw_uid, strerror(errno)); return errno; } rc = setuid(pwentry->pw_uid); if (rc < 0) { exit_code = CRM_EX_FATAL; g_set_error(error, PCMK__EXITC_ERROR, exit_code, "Could not set user to %d: %s", pwentry->pw_uid, strerror(errno)); return errno; } return pcmk_rc_ok; } /* @COMPAT Deprecated since 2.1.8. Use pcmk_list_cluster_options() or * crm_attribute --list-options=cluster instead of querying daemon metadata. */ static int based_metadata(pcmk__output_t *out) { return pcmk__daemon_metadata(out, "pacemaker-based", "Cluster Information Base manager options", "Cluster options used by Pacemaker's Cluster " "Information Base manager", pcmk__opt_based); } static GOptionEntry entries[] = { { "stand-alone", 's', G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE, &stand_alone, "(Advanced use only) Run in stand-alone mode", NULL }, { "disk-writes", 'w', G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE, &cib_writes_enabled, "(Advanced use only) Enable disk writes (enabled by default unless in " "stand-alone mode)", NULL }, { "cib-root", 'r', G_OPTION_FLAG_NONE, G_OPTION_ARG_FILENAME, &cib_root, "(Advanced use only) Directory where the CIB XML file should be located " "(default: " CRM_CONFIG_DIR ")", NULL }, { NULL } }; static pcmk__supported_format_t formats[] = { PCMK__SUPPORTED_FORMAT_NONE, PCMK__SUPPORTED_FORMAT_TEXT, PCMK__SUPPORTED_FORMAT_XML, { NULL, NULL, NULL } }; static GOptionContext * build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) { GOptionContext *context = NULL; context = pcmk__build_arg_context(args, "text (default), xml", group, NULL); pcmk__add_main_args(context, entries); return context; } int main(int argc, char **argv) { int rc = pcmk_rc_ok; crm_ipc_t *old_instance = NULL; pcmk__output_t *out = NULL; GError *error = NULL; GOptionGroup *output_group = NULL; pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY); gchar **processed_args = pcmk__cmdline_preproc(argv, "r"); GOptionContext *context = build_arg_context(args, &output_group); crm_log_preinit(NULL, argc, argv); pcmk__register_formats(output_group, formats); if (!g_option_context_parse_strv(context, &processed_args, &error)) { exit_code = CRM_EX_USAGE; goto done; } rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv); if (rc != pcmk_rc_ok) { exit_code = CRM_EX_ERROR; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Error creating output format %s: %s", args->output_ty, pcmk_rc_str(rc)); goto done; } if (args->version) { out->version(out, false); goto done; } mainloop_add_signal(SIGTERM, cib_shutdown); mainloop_add_signal(SIGPIPE, cib_enable_writes); cib_writer = mainloop_add_trigger(G_PRIORITY_LOW, write_cib_contents, NULL); if ((g_strv_length(processed_args) >= 2) && pcmk__str_eq(processed_args[1], "metadata", pcmk__str_none)) { rc = based_metadata(out); if (rc != pcmk_rc_ok) { exit_code = CRM_EX_FATAL; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Unable to display metadata: %s", pcmk_rc_str(rc)); } goto done; } pcmk__cli_init_logging("pacemaker-based", args->verbosity); crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); crm_notice("Starting Pacemaker CIB manager"); old_instance = crm_ipc_new(PCMK__SERVER_BASED_RO, 0); if (old_instance == NULL) { /* crm_ipc_new() will have already logged an error message with * crm_err() */ exit_code = CRM_EX_FATAL; goto done; } if (pcmk__connect_generic_ipc(old_instance) == pcmk_rc_ok) { /* IPC end-point already up */ crm_ipc_close(old_instance); crm_ipc_destroy(old_instance); crm_err("pacemaker-based is already active, aborting startup"); goto done; } else { /* not up or not authentic, we'll proceed either way */ crm_ipc_destroy(old_instance); old_instance = NULL; } if (stand_alone) { rc = setup_stand_alone(&error); if (rc != pcmk_rc_ok) { goto done; } } if (cib_root == NULL) { cib_root = g_strdup(CRM_CONFIG_DIR); } else { crm_notice("Using custom config location: %s", cib_root); } if (!pcmk__daemon_can_write(cib_root, NULL)) { exit_code = CRM_EX_FATAL; crm_err("Terminating due to bad permissions on %s", cib_root); g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Bad permissions on %s (see logs for details)", cib_root); goto done; } crm_peer_init(); // Read initial CIB, connect to cluster, and start IPC servers cib_init(); // Run the main loop mainloop = g_main_loop_new(NULL, FALSE); crm_notice("Pacemaker CIB manager successfully started and accepting connections"); g_main_loop_run(mainloop); /* If main loop returned, clean up and exit. We disconnect in case * terminate_cib() was called with fast=-1. */ - crm_cluster_disconnect(crm_cluster); + pcmk_cluster_disconnect(crm_cluster); pcmk__stop_based_ipc(ipcs_ro, ipcs_rw, ipcs_shm); done: g_strfreev(processed_args); pcmk__free_arg_context(context); crm_peer_destroy(); if (local_notify_queue != NULL) { g_hash_table_destroy(local_notify_queue); } if (config_hash != NULL) { g_hash_table_destroy(config_hash); } pcmk__client_cleanup(); pcmk_cluster_free(crm_cluster); g_free(cib_root); pcmk__output_and_clear_error(&error, out); if (out != NULL) { out->finish(out, exit_code, true, NULL); pcmk__output_free(out); } pcmk__unregister_formats(); crm_exit(exit_code); } #if SUPPORT_COROSYNC static void cib_cs_dispatch(cpg_handle_t handle, const struct cpg_name *groupName, uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) { uint32_t kind = 0; xmlNode *xml = NULL; const char *from = NULL; char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from); if(data == NULL) { return; } if (kind == crm_class_cluster) { xml = pcmk__xml_parse(data); if (xml == NULL) { crm_err("Invalid XML: '%.120s'", data); free(data); return; } crm_xml_add(xml, PCMK__XA_SRC, from); cib_peer_callback(xml, NULL); } free_xml(xml); free(data); } static void cib_cs_destroy(gpointer user_data) { if (cib_shutdown_flag) { crm_info("Corosync disconnection complete"); } else { crm_crit("Lost connection to cluster layer, shutting down"); terminate_cib(__func__, CRM_EX_DISCONNECT); } } #endif static void cib_peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data) { switch (type) { case crm_status_processes: if (cib_legacy_mode() && !pcmk_is_set(node->processes, crm_get_cluster_proc())) { uint32_t old = data? *(const uint32_t *)data : 0; if ((node->processes ^ old) & crm_proc_cpg) { crm_info("Attempting to disable legacy mode after %s left the cluster", node->uname); legacy_mode = FALSE; } } break; case crm_status_uname: case crm_status_nstate: - if (cib_shutdown_flag && (crm_active_peers() < 2) + if (cib_shutdown_flag && (pcmk__cluster_num_active_nodes() < 2) && (pcmk__ipc_client_count() == 0)) { crm_info("No more peers"); terminate_cib(__func__, -1); } break; } } static void cib_init(void) { crm_cluster = pcmk_cluster_new(); #if SUPPORT_COROSYNC if (is_corosync_cluster()) { crm_cluster->destroy = cib_cs_destroy; crm_cluster->cpg.cpg_deliver_fn = cib_cs_dispatch; crm_cluster->cpg.cpg_confchg_fn = pcmk_cpg_membership; } #endif // SUPPORT_COROSYNC config_hash = pcmk__strkey_table(free, free); if (startCib("cib.xml") == FALSE) { crm_crit("Cannot start CIB... terminating"); crm_exit(CRM_EX_NOINPUT); } if (!stand_alone) { crm_set_status_callback(&cib_peer_update_callback); - if (!crm_cluster_connect(crm_cluster)) { + if (pcmk_cluster_connect(crm_cluster) != pcmk_rc_ok) { crm_crit("Cannot sign in to the cluster... terminating"); crm_exit(CRM_EX_FATAL); } } pcmk__serve_based_ipc(&ipcs_ro, &ipcs_rw, &ipcs_shm, &ipc_ro_callbacks, &ipc_rw_callbacks); if (stand_alone) { based_is_primary = true; } } static bool startCib(const char *filename) { gboolean active = FALSE; xmlNode *cib = readCibXmlFile(cib_root, filename, !preserve_status); if (activateCibXml(cib, TRUE, "start") == 0) { int port = 0; active = TRUE; cib_read_config(config_hash, cib); pcmk__scan_port(crm_element_value(cib, PCMK_XA_REMOTE_TLS_PORT), &port); if (port >= 0) { remote_tls_fd = init_remote_listener(port, TRUE); } pcmk__scan_port(crm_element_value(cib, PCMK_XA_REMOTE_CLEAR_PORT), &port); if (port >= 0) { remote_fd = init_remote_listener(port, FALSE); } } return active; } diff --git a/daemons/controld/controld_callbacks.c b/daemons/controld/controld_callbacks.c index 81f5b13137..c8fecd5812 100644 --- a/daemons/controld/controld_callbacks.c +++ b/daemons/controld/controld_callbacks.c @@ -1,381 +1,381 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include /* From join_dc... */ extern gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source); void crmd_ha_msg_filter(xmlNode * msg) { if (AM_I_DC) { const char *sys_from = crm_element_value(msg, PCMK__XA_CRM_SYS_FROM); if (pcmk__str_eq(sys_from, CRM_SYSTEM_DC, pcmk__str_casei)) { const char *from = crm_element_value(msg, PCMK__XA_SRC); if (!pcmk__str_eq(from, controld_globals.our_nodename, pcmk__str_casei)) { int level = LOG_INFO; const char *op = crm_element_value(msg, PCMK__XA_CRM_TASK); /* make sure the election happens NOW */ if (controld_globals.fsa_state != S_ELECTION) { ha_msg_input_t new_input; level = LOG_WARNING; new_input.msg = msg; register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION, NULL, &new_input, __func__); } do_crm_log(level, "Another DC detected: %s (op=%s)", from, op); goto done; } } } else { const char *sys_to = crm_element_value(msg, PCMK__XA_CRM_SYS_TO); if (pcmk__str_eq(sys_to, CRM_SYSTEM_DC, pcmk__str_casei)) { return; } } /* crm_log_xml_trace(msg, "HA[inbound]"); */ route_message(C_HA_MESSAGE, msg); done: controld_trigger_fsa(); } /*! * \internal * \brief Check whether a node is online * * \param[in] node Node to check * * \retval -1 if completely dead * \retval 0 if partially alive * \retval 1 if completely alive */ static int node_alive(const crm_node_t *node) { if (pcmk_is_set(node->flags, crm_remote_node)) { // Pacemaker Remote nodes can't be partially alive return pcmk__str_eq(node->state, CRM_NODE_MEMBER, pcmk__str_casei) ? 1: -1; - } else if (crm_is_peer_active(node)) { + } else if (pcmk__cluster_is_node_active(node)) { // Completely up cluster node: both cluster member and peer return 1; } else if (!pcmk_is_set(node->processes, crm_get_cluster_proc()) && !pcmk__str_eq(node->state, CRM_NODE_MEMBER, pcmk__str_casei)) { // Completely down cluster node: neither cluster member nor peer return -1; } // Partially up cluster node: only cluster member or only peer return 0; } #define state_text(state) ((state)? (const char *)(state) : "in unknown state") void peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data) { uint32_t old = 0; bool appeared = FALSE; bool is_remote = pcmk_is_set(node->flags, crm_remote_node); controld_node_pending_timer(node); /* The controller waits to receive some information from the membership * layer before declaring itself operational. If this is being called for a * cluster node, indicate that we have it. */ if (!is_remote) { controld_set_fsa_input_flags(R_PEER_DATA); } if (type == crm_status_processes && pcmk_is_set(node->processes, crm_get_cluster_proc()) && !AM_I_DC && !is_remote) { /* * This is a hack until we can send to a nodeid and/or we fix node name lookups * These messages are ignored in crmd_ha_msg_filter() */ xmlNode *query = create_request(CRM_OP_HELLO, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL); crm_debug("Sending hello to node %u so that it learns our node name", node->id); send_cluster_message(node, crm_msg_crmd, query, FALSE); free_xml(query); } if (node->uname == NULL) { return; } switch (type) { case crm_status_uname: /* If we've never seen the node, then it also won't be in the status section */ crm_info("%s node %s is now %s", (is_remote? "Remote" : "Cluster"), node->uname, state_text(node->state)); return; case crm_status_nstate: /* This callback should not be called unless the state actually * changed, but here's a failsafe just in case. */ CRM_CHECK(!pcmk__str_eq(data, node->state, pcmk__str_casei), return); crm_info("%s node %s is now %s (was %s)", (is_remote? "Remote" : "Cluster"), node->uname, state_text(node->state), state_text(data)); if (pcmk__str_eq(CRM_NODE_MEMBER, node->state, pcmk__str_casei)) { appeared = TRUE; if (!is_remote) { remove_stonith_cleanup(node->uname); } } else { controld_remove_failed_sync_node(node->uname); controld_remove_voter(node->uname); } crmd_alert_node_event(node); break; case crm_status_processes: CRM_CHECK(data != NULL, return); old = *(const uint32_t *)data; appeared = pcmk_is_set(node->processes, crm_get_cluster_proc()); { const char *dc_s = controld_globals.dc_name; if ((dc_s == NULL) && AM_I_DC) { dc_s = PCMK_VALUE_TRUE; } crm_info("Node %s is %s a peer " CRM_XS " DC=%s old=%#07x new=%#07x", node->uname, (appeared? "now" : "no longer"), pcmk__s(dc_s, ""), old, node->processes); } if (!pcmk_is_set((node->processes ^ old), crm_get_cluster_proc())) { /* Peer status did not change. This should not be possible, * since we don't track process flags other than peer status. */ crm_trace("Process flag %#7x did not change from %#7x to %#7x", crm_get_cluster_proc(), old, node->processes); return; } if (!appeared) { node->peer_lost = time(NULL); controld_remove_failed_sync_node(node->uname); controld_remove_voter(node->uname); } if (!pcmk_is_set(controld_globals.fsa_input_register, R_CIB_CONNECTED)) { crm_trace("Ignoring peer status change because not connected to CIB"); return; } else if (controld_globals.fsa_state == S_STOPPING) { crm_trace("Ignoring peer status change because stopping"); return; } if (!appeared && pcmk__str_eq(node->uname, controld_globals.our_nodename, pcmk__str_casei)) { /* Did we get evicted? */ crm_notice("Our peer connection failed"); register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ERROR, NULL); } else if (pcmk__str_eq(node->uname, controld_globals.dc_name, pcmk__str_casei) - && !crm_is_peer_active(node)) { + && !pcmk__cluster_is_node_active(node)) { /* Did the DC leave us? */ crm_notice("Our peer on the DC (%s) is dead", controld_globals.dc_name); register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ELECTION, NULL); /* @COMPAT DC < 1.1.13: If a DC shuts down normally, we don't * want to fence it. Newer DCs will send their shutdown request * to all peers, who will update the DC's expected state to * down, thus avoiding fencing. We can safely erase the DC's * transient attributes when it leaves in that case. However, * the only way to avoid fencing older DCs is to leave the * transient attributes intact until it rejoins. */ if (compare_version(controld_globals.dc_version, "3.0.9") > 0) { controld_delete_node_state(node->uname, controld_section_attrs, cib_scope_local); } } else if (AM_I_DC || pcmk_is_set(controld_globals.flags, controld_dc_left) || (controld_globals.dc_name == NULL)) { /* This only needs to be done once, so normally the DC should do * it. However if there is no DC, every node must do it, since * there is no other way to ensure some one node does it. */ if (appeared) { te_trigger_stonith_history_sync(FALSE); } else { controld_delete_node_state(node->uname, controld_section_attrs, cib_scope_local); } } break; } if (AM_I_DC) { xmlNode *update = NULL; int flags = node_update_peer; int alive = node_alive(node); pcmk__graph_action_t *down = match_down_event(node->uuid); crm_trace("Alive=%d, appeared=%d, down=%d", alive, appeared, (down? down->id : -1)); if (appeared && (alive > 0) && !is_remote) { register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL); } if (down) { const char *task = crm_element_value(down->xml, PCMK_XA_OPERATION); if (pcmk__str_eq(task, PCMK_ACTION_STONITH, pcmk__str_casei)) { /* tengine_stonith_callback() confirms fence actions */ crm_trace("Updating CIB %s fencer reported fencing of %s complete", (pcmk_is_set(down->flags, pcmk__graph_action_confirmed)? "after" : "before"), node->uname); } else if (!appeared && pcmk__str_eq(task, PCMK_ACTION_DO_SHUTDOWN, pcmk__str_casei)) { // Shutdown actions are immediately confirmed (i.e. no_wait) if (!is_remote) { flags |= node_update_join | node_update_expected; crmd_peer_down(node, FALSE); check_join_state(controld_globals.fsa_state, __func__); } if (alive >= 0) { crm_info("%s of peer %s is in progress " CRM_XS " action=%d", task, node->uname, down->id); } else { crm_notice("%s of peer %s is complete " CRM_XS " action=%d", task, node->uname, down->id); pcmk__update_graph(controld_globals.transition_graph, down); trigger_graph(); } } else { crm_trace("Node %s is %s, was expected to %s (op %d)", node->uname, ((alive > 0)? "alive" : ((alive < 0)? "dead" : "partially alive")), task, down->id); } } else if (appeared == FALSE) { if ((controld_globals.transition_graph == NULL) || (controld_globals.transition_graph->id == -1)) { crm_info("Stonith/shutdown of node %s is unknown to the " "current DC", node->uname); } else { crm_warn("Stonith/shutdown of node %s was not expected", node->uname); } if (!is_remote) { crm_update_peer_join(__func__, node, crm_join_none); check_join_state(controld_globals.fsa_state, __func__); } abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart, "Node failure", NULL); fail_incompletable_actions(controld_globals.transition_graph, node->uuid); } else { crm_trace("Node %s came up, was not expected to be down", node->uname); } if (is_remote) { /* A pacemaker_remote node won't have its cluster status updated * in the CIB by membership-layer callbacks, so do it here. */ flags |= node_update_cluster; /* Trigger resource placement on newly integrated nodes */ if (appeared) { abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart, "Pacemaker Remote node integrated", NULL); } } if (!appeared && (type == crm_status_processes) && (node->when_member > 1)) { /* The node left CPG but is still a cluster member. Set its * membership time to 1 to record it in the cluster state as a * boolean, so we don't fence it due to * PCMK_OPT_NODE_PENDING_TIMEOUT. */ node->when_member = 1; flags |= node_update_cluster; controld_node_pending_timer(node); } /* Update the CIB node state */ update = create_node_state_update(node, flags, NULL, __func__); if (update == NULL) { crm_debug("Node state update not yet possible for %s", node->uname); } else { fsa_cib_anon_update(PCMK_XE_STATUS, update); } free_xml(update); } controld_trigger_fsa(); } gboolean crm_fsa_trigger(gpointer user_data) { crm_trace("Invoked (queue len: %d)", g_list_length(controld_globals.fsa_message_queue)); s_crmd_fsa(C_FSA_INTERNAL); crm_trace("Exited (queue len: %d)", g_list_length(controld_globals.fsa_message_queue)); return TRUE; } diff --git a/daemons/controld/controld_cib.c b/daemons/controld/controld_cib.c index fd75a3794b..7acff3064f 100644 --- a/daemons/controld/controld_cib.c +++ b/daemons/controld/controld_cib.c @@ -1,1069 +1,1066 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include /* sleep */ #include #include #include #include #include // Call ID of the most recent in-progress CIB resource update (or 0 if none) static int pending_rsc_update = 0; /*! * \internal * \brief Respond to a dropped CIB connection * * \param[in] user_data CIB connection that dropped */ static void handle_cib_disconnect(gpointer user_data) { CRM_LOG_ASSERT(user_data == controld_globals.cib_conn); controld_trigger_fsa(); controld_globals.cib_conn->state = cib_disconnected; if (pcmk_is_set(controld_globals.fsa_input_register, R_CIB_CONNECTED)) { // @TODO This should trigger a reconnect, not a shutdown crm_crit("Lost connection to the CIB manager, shutting down"); register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL); controld_clear_fsa_input_flags(R_CIB_CONNECTED); } else { // Expected crm_info("Disconnected from the CIB manager"); } } static void do_cib_updated(const char *event, xmlNode * msg) { const xmlNode *patchset = NULL; const char *client_name = NULL; crm_debug("Received CIB diff notification: DC=%s", pcmk__btoa(AM_I_DC)); if (cib__get_notify_patchset(msg, &patchset) != pcmk_rc_ok) { return; } if (cib__element_in_patchset(patchset, PCMK_XE_ALERTS) || cib__element_in_patchset(patchset, PCMK_XE_CRM_CONFIG)) { controld_trigger_config(); } if (!AM_I_DC) { // We're not in control of the join sequence return; } client_name = crm_element_value(msg, PCMK__XA_CIB_CLIENTNAME); if (!cib__client_triggers_refresh(client_name)) { // The CIB is still accurate return; } if (cib__element_in_patchset(patchset, PCMK_XE_NODES) || cib__element_in_patchset(patchset, PCMK_XE_STATUS)) { /* An unsafe client modified the PCMK_XE_NODES or PCMK_XE_STATUS * section. Ensure the node list is up-to-date, and start the join * process again so we get everyone's current resource history. */ if (client_name == NULL) { client_name = crm_element_value(msg, PCMK__XA_CIB_CLIENTID); } crm_notice("Populating nodes and starting an election after %s event " "triggered by %s", event, pcmk__s(client_name, "(unidentified client)")); populate_cib_nodes(node_update_quick|node_update_all, __func__); register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL); } } void controld_disconnect_cib_manager(void) { cib_t *cib_conn = controld_globals.cib_conn; CRM_ASSERT(cib_conn != NULL); crm_debug("Disconnecting from the CIB manager"); controld_clear_fsa_input_flags(R_CIB_CONNECTED); cib_conn->cmds->del_notify_callback(cib_conn, PCMK__VALUE_CIB_DIFF_NOTIFY, do_cib_updated); cib_free_callbacks(cib_conn); if (cib_conn->state != cib_disconnected) { cib_conn->cmds->set_secondary(cib_conn, cib_scope_local|cib_discard_reply); cib_conn->cmds->signoff(cib_conn); } } /* A_CIB_STOP, A_CIB_START, O_CIB_RESTART */ void do_cib_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { static int cib_retries = 0; cib_t *cib_conn = controld_globals.cib_conn; void (*dnotify_fn) (gpointer user_data) = handle_cib_disconnect; void (*update_cb) (const char *event, xmlNodePtr msg) = do_cib_updated; int rc = pcmk_ok; CRM_ASSERT(cib_conn != NULL); if (pcmk_is_set(action, A_CIB_STOP)) { if ((cib_conn->state != cib_disconnected) && (pending_rsc_update != 0)) { crm_info("Waiting for resource update %d to complete", pending_rsc_update); crmd_fsa_stall(FALSE); return; } controld_disconnect_cib_manager(); } if (!pcmk_is_set(action, A_CIB_START)) { return; } if (cur_state == S_STOPPING) { crm_err("Ignoring request to connect to the CIB manager after " "shutdown"); return; } rc = cib_conn->cmds->signon(cib_conn, CRM_SYSTEM_CRMD, cib_command_nonblocking); if (rc != pcmk_ok) { // A short wait that usually avoids stalling the FSA sleep(1); rc = cib_conn->cmds->signon(cib_conn, CRM_SYSTEM_CRMD, cib_command_nonblocking); } if (rc != pcmk_ok) { crm_info("Could not connect to the CIB manager: %s", pcmk_strerror(rc)); } else if (cib_conn->cmds->set_connection_dnotify(cib_conn, dnotify_fn) != pcmk_ok) { crm_err("Could not set dnotify callback"); } else if (cib_conn->cmds->add_notify_callback(cib_conn, PCMK__VALUE_CIB_DIFF_NOTIFY, update_cb) != pcmk_ok) { crm_err("Could not set CIB notification callback (update)"); } else { controld_set_fsa_input_flags(R_CIB_CONNECTED); cib_retries = 0; } if (!pcmk_is_set(controld_globals.fsa_input_register, R_CIB_CONNECTED)) { cib_retries++; if (cib_retries < 30) { crm_warn("Couldn't complete CIB registration %d times... " "pause and retry", cib_retries); controld_start_wait_timer(); crmd_fsa_stall(FALSE); } else { crm_err("Could not complete CIB registration %d times... " "hard error", cib_retries); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } } #define MIN_CIB_OP_TIMEOUT (30) /*! * \internal * \brief Get the timeout (in seconds) that should be used with CIB operations * * \return The maximum of 30 seconds, the value of the PCMK_cib_timeout * environment variable, or 10 seconds times one more than the number of * nodes in the cluster. */ unsigned int cib_op_timeout(void) { // @COMPAT: Drop env_timeout at 3.0.0 static int env_timeout = -1; unsigned int calculated_timeout = 0; if (env_timeout == -1) { const char *env = pcmk__env_option(PCMK__ENV_CIB_TIMEOUT); pcmk__scan_min_int(env, &env_timeout, MIN_CIB_OP_TIMEOUT); crm_trace("Minimum CIB op timeout: %ds (environment: %s)", env_timeout, (env? env : "none")); } - calculated_timeout = 1 + crm_active_peers(); - if (crm_remote_peer_cache) { - calculated_timeout += g_hash_table_size(crm_remote_peer_cache); - } - calculated_timeout *= 10; - + calculated_timeout = 10U * (1U + + pcmk__cluster_num_active_nodes() + + pcmk__cluster_num_remote_nodes()); calculated_timeout = QB_MAX(calculated_timeout, env_timeout); crm_trace("Calculated timeout: %us", calculated_timeout); if (controld_globals.cib_conn) { controld_globals.cib_conn->call_timeout = calculated_timeout; } return calculated_timeout; } /*! * \internal * \brief Get CIB call options to use local scope if primary is unavailable * * \return CIB call options */ int crmd_cib_smart_opt(void) { int call_opt = cib_none; if ((controld_globals.fsa_state == S_ELECTION) || (controld_globals.fsa_state == S_PENDING)) { crm_info("Sending update to local CIB in state: %s", fsa_state2string(controld_globals.fsa_state)); cib__set_call_options(call_opt, "update", cib_scope_local); } return call_opt; } static void cib_delete_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { char *desc = user_data; if (rc == 0) { crm_debug("Deletion of %s (via CIB call %d) succeeded", desc, call_id); } else { crm_warn("Deletion of %s (via CIB call %d) failed: %s " CRM_XS " rc=%d", desc, call_id, pcmk_strerror(rc), rc); } } // Searches for various portions of PCMK__XE_NODE_STATE to delete // Match a particular node's PCMK__XE_NODE_STATE (takes node name 1x) #define XPATH_NODE_STATE "//" PCMK__XE_NODE_STATE "[@" PCMK_XA_UNAME "='%s']" // Node's lrm section (name 1x) #define XPATH_NODE_LRM XPATH_NODE_STATE "/" PCMK__XE_LRM /* Node's PCMK__XE_LRM_RSC_OP entries and PCMK__XE_LRM_RESOURCE entries without * unexpired lock * (name 2x, (seconds_since_epoch - PCMK_OPT_SHUTDOWN_LOCK_LIMIT) 1x) */ #define XPATH_NODE_LRM_UNLOCKED XPATH_NODE_STATE "//" PCMK__XE_LRM_RSC_OP \ "|" XPATH_NODE_STATE \ "//" PCMK__XE_LRM_RESOURCE \ "[not(@" PCMK_OPT_SHUTDOWN_LOCK ") " \ "or " PCMK_OPT_SHUTDOWN_LOCK "<%lld]" // Node's PCMK__XE_TRANSIENT_ATTRIBUTES section (name 1x) #define XPATH_NODE_ATTRS XPATH_NODE_STATE "/" PCMK__XE_TRANSIENT_ATTRIBUTES // Everything under PCMK__XE_NODE_STATE (name 1x) #define XPATH_NODE_ALL XPATH_NODE_STATE "/*" /* Unlocked history + transient attributes * (name 2x, (seconds_since_epoch - PCMK_OPT_SHUTDOWN_LOCK_LIMIT) 1x, name 1x) */ #define XPATH_NODE_ALL_UNLOCKED XPATH_NODE_LRM_UNLOCKED "|" XPATH_NODE_ATTRS /*! * \internal * \brief Get the XPath and description of a node state section to be deleted * * \param[in] uname Desired node * \param[in] section Subsection of \c PCMK__XE_NODE_STATE to be deleted * \param[out] xpath Where to store XPath of \p section * \param[out] desc If not \c NULL, where to store description of \p section */ void controld_node_state_deletion_strings(const char *uname, enum controld_section_e section, char **xpath, char **desc) { const char *desc_pre = NULL; // Shutdown locks that started before this time are expired long long expire = (long long) time(NULL) - controld_globals.shutdown_lock_limit; switch (section) { case controld_section_lrm: *xpath = crm_strdup_printf(XPATH_NODE_LRM, uname); desc_pre = "resource history"; break; case controld_section_lrm_unlocked: *xpath = crm_strdup_printf(XPATH_NODE_LRM_UNLOCKED, uname, uname, expire); desc_pre = "resource history (other than shutdown locks)"; break; case controld_section_attrs: *xpath = crm_strdup_printf(XPATH_NODE_ATTRS, uname); desc_pre = "transient attributes"; break; case controld_section_all: *xpath = crm_strdup_printf(XPATH_NODE_ALL, uname); desc_pre = "all state"; break; case controld_section_all_unlocked: *xpath = crm_strdup_printf(XPATH_NODE_ALL_UNLOCKED, uname, uname, expire, uname); desc_pre = "all state (other than shutdown locks)"; break; default: // We called this function incorrectly CRM_ASSERT(false); break; } if (desc != NULL) { *desc = crm_strdup_printf("%s for node %s", desc_pre, uname); } } /*! * \internal * \brief Delete subsection of a node's CIB \c PCMK__XE_NODE_STATE * * \param[in] uname Desired node * \param[in] section Subsection of \c PCMK__XE_NODE_STATE to delete * \param[in] options CIB call options to use */ void controld_delete_node_state(const char *uname, enum controld_section_e section, int options) { cib_t *cib = controld_globals.cib_conn; char *xpath = NULL; char *desc = NULL; int cib_rc = pcmk_ok; CRM_ASSERT((uname != NULL) && (cib != NULL)); controld_node_state_deletion_strings(uname, section, &xpath, &desc); cib__set_call_options(options, "node state deletion", cib_xpath|cib_multiple); cib_rc = cib->cmds->remove(cib, xpath, NULL, options); fsa_register_cib_callback(cib_rc, desc, cib_delete_callback); crm_info("Deleting %s (via CIB call %d) " CRM_XS " xpath=%s", desc, cib_rc, xpath); // CIB library handles freeing desc free(xpath); } // Takes node name and resource ID #define XPATH_RESOURCE_HISTORY "//" PCMK__XE_NODE_STATE \ "[@" PCMK_XA_UNAME "='%s']/" \ PCMK__XE_LRM "/" PCMK__XE_LRM_RESOURCES \ "/" PCMK__XE_LRM_RESOURCE \ "[@" PCMK_XA_ID "='%s']" // @TODO could add "and @PCMK_OPT_SHUTDOWN_LOCK" to limit to locks /*! * \internal * \brief Clear resource history from CIB for a given resource and node * * \param[in] rsc_id ID of resource to be cleared * \param[in] node Node whose resource history should be cleared * \param[in] user_name ACL user name to use * \param[in] call_options CIB call options * * \return Standard Pacemaker return code */ int controld_delete_resource_history(const char *rsc_id, const char *node, const char *user_name, int call_options) { char *desc = NULL; char *xpath = NULL; int rc = pcmk_rc_ok; cib_t *cib = controld_globals.cib_conn; CRM_CHECK((rsc_id != NULL) && (node != NULL), return EINVAL); desc = crm_strdup_printf("resource history for %s on %s", rsc_id, node); if (cib == NULL) { crm_err("Unable to clear %s: no CIB connection", desc); free(desc); return ENOTCONN; } // Ask CIB to delete the entry xpath = crm_strdup_printf(XPATH_RESOURCE_HISTORY, node, rsc_id); cib->cmds->set_user(cib, user_name); rc = cib->cmds->remove(cib, xpath, NULL, call_options|cib_xpath); cib->cmds->set_user(cib, NULL); if (rc < 0) { rc = pcmk_legacy2rc(rc); crm_err("Could not delete resource status of %s on %s%s%s: %s " CRM_XS " rc=%d", rsc_id, node, (user_name? " for user " : ""), (user_name? user_name : ""), pcmk_rc_str(rc), rc); free(desc); free(xpath); return rc; } if (pcmk_is_set(call_options, cib_sync_call)) { if (pcmk_is_set(call_options, cib_dryrun)) { crm_debug("Deletion of %s would succeed", desc); } else { crm_debug("Deletion of %s succeeded", desc); } free(desc); } else { crm_info("Clearing %s (via CIB call %d) " CRM_XS " xpath=%s", desc, rc, xpath); fsa_register_cib_callback(rc, desc, cib_delete_callback); // CIB library handles freeing desc } free(xpath); return pcmk_rc_ok; } /*! * \internal * \brief Build XML and string of parameters meeting some criteria, for digest * * \param[in] op Executor event with parameter table to use * \param[in] metadata Parsed meta-data for executed resource agent * \param[in] param_type Flag used for selection criteria * \param[out] result Will be set to newly created XML with selected * parameters as attributes * * \return Newly allocated space-separated string of parameter names * \note Selection criteria varies by param_type: for the restart digest, we * want parameters that are *not* marked reloadable (OCF 1.1) or that * *are* marked unique (pre-1.1), for both string and XML results; for the * secure digest, we want parameters that *are* marked private for the * string, but parameters that are *not* marked private for the XML. * \note It is the caller's responsibility to free the string return value with * \p g_string_free() and the XML result with \p free_xml(). */ static GString * build_parameter_list(const lrmd_event_data_t *op, const struct ra_metadata_s *metadata, enum ra_param_flags_e param_type, xmlNode **result) { GString *list = NULL; *result = pcmk__xe_create(NULL, PCMK_XE_PARAMETERS); /* Consider all parameters only except private ones to be consistent with * what scheduler does with calculate_secure_digest(). */ if (param_type == ra_param_private && compare_version(controld_globals.dc_version, "3.16.0") >= 0) { g_hash_table_foreach(op->params, hash2field, *result); pcmk__filter_op_for_digest(*result); } for (GList *iter = metadata->ra_params; iter != NULL; iter = iter->next) { struct ra_param_s *param = (struct ra_param_s *) iter->data; bool accept_for_list = false; bool accept_for_xml = false; switch (param_type) { case ra_param_reloadable: accept_for_list = !pcmk_is_set(param->rap_flags, param_type); accept_for_xml = accept_for_list; break; case ra_param_unique: accept_for_list = pcmk_is_set(param->rap_flags, param_type); accept_for_xml = accept_for_list; break; case ra_param_private: accept_for_list = pcmk_is_set(param->rap_flags, param_type); accept_for_xml = !accept_for_list; break; } if (accept_for_list) { crm_trace("Attr %s is %s", param->rap_name, ra_param_flag2text(param_type)); if (list == NULL) { // We will later search for " WORD ", so start list with a space pcmk__add_word(&list, 256, " "); } pcmk__add_word(&list, 0, param->rap_name); } else { crm_trace("Rejecting %s for %s", param->rap_name, ra_param_flag2text(param_type)); } if (accept_for_xml) { const char *v = g_hash_table_lookup(op->params, param->rap_name); if (v != NULL) { crm_trace("Adding attr %s=%s to the xml result", param->rap_name, v); crm_xml_add(*result, param->rap_name, v); } } else { crm_trace("Removing attr %s from the xml result", param->rap_name); pcmk__xe_remove_attr(*result, param->rap_name); } } if (list != NULL) { // We will later search for " WORD ", so end list with a space pcmk__add_word(&list, 0, " "); } return list; } static void append_restart_list(lrmd_event_data_t *op, struct ra_metadata_s *metadata, xmlNode *update, const char *version) { GString *list = NULL; char *digest = NULL; xmlNode *restart = NULL; CRM_LOG_ASSERT(op->params != NULL); if (op->interval_ms > 0) { /* monitors are not reloadable */ return; } if (pcmk_is_set(metadata->ra_flags, ra_supports_reload_agent)) { /* Add parameters not marked reloadable to the PCMK__XA_OP_FORCE_RESTART * list */ list = build_parameter_list(op, metadata, ra_param_reloadable, &restart); } else if (pcmk_is_set(metadata->ra_flags, ra_supports_legacy_reload)) { /* @COMPAT pre-OCF-1.1 resource agents * * Before OCF 1.1, Pacemaker abused "unique=0" to indicate * reloadability. Add any parameters with unique="1" to the * PCMK__XA_OP_FORCE_RESTART list. */ list = build_parameter_list(op, metadata, ra_param_unique, &restart); } else { // Resource does not support agent reloads return; } digest = calculate_operation_digest(restart, version); /* Add PCMK__XA_OP_FORCE_RESTART and PCMK__XA_OP_RESTART_DIGEST to indicate * the resource supports reload, no matter if it actually supports any * reloadable parameters */ crm_xml_add(update, PCMK__XA_OP_FORCE_RESTART, (list == NULL)? "" : (const char *) list->str); crm_xml_add(update, PCMK__XA_OP_RESTART_DIGEST, digest); if ((list != NULL) && (list->len > 0)) { crm_trace("%s: %s, %s", op->rsc_id, digest, (const char *) list->str); } else { crm_trace("%s: %s", op->rsc_id, digest); } if (list != NULL) { g_string_free(list, TRUE); } free_xml(restart); free(digest); } static void append_secure_list(lrmd_event_data_t *op, struct ra_metadata_s *metadata, xmlNode *update, const char *version) { GString *list = NULL; char *digest = NULL; xmlNode *secure = NULL; CRM_LOG_ASSERT(op->params != NULL); /* To keep PCMK__XA_OP_SECURE_PARAMS short, we want it to contain the secure * parameters but PCMK__XA_OP_SECURE_DIGEST to be based on the insecure ones */ list = build_parameter_list(op, metadata, ra_param_private, &secure); if (list != NULL) { digest = calculate_operation_digest(secure, version); crm_xml_add(update, PCMK__XA_OP_SECURE_PARAMS, (const char *) list->str); crm_xml_add(update, PCMK__XA_OP_SECURE_DIGEST, digest); crm_trace("%s: %s, %s", op->rsc_id, digest, (const char *) list->str); g_string_free(list, TRUE); } else { crm_trace("%s: no secure parameters", op->rsc_id); } free_xml(secure); free(digest); } /*! * \internal * \brief Create XML for a resource history entry * * \param[in] func Function name of caller * \param[in,out] parent XML to add entry to * \param[in] rsc Affected resource * \param[in,out] op Action to add an entry for (or NULL to do nothing) * \param[in] node_name Node where action occurred */ void controld_add_resource_history_xml_as(const char *func, xmlNode *parent, const lrmd_rsc_info_t *rsc, lrmd_event_data_t *op, const char *node_name) { int target_rc = 0; xmlNode *xml_op = NULL; struct ra_metadata_s *metadata = NULL; const char *caller_version = NULL; lrm_state_t *lrm_state = NULL; if (op == NULL) { return; } target_rc = rsc_op_expected_rc(op); caller_version = g_hash_table_lookup(op->params, PCMK_XA_CRM_FEATURE_SET); CRM_CHECK(caller_version != NULL, caller_version = CRM_FEATURE_SET); xml_op = pcmk__create_history_xml(parent, op, caller_version, target_rc, controld_globals.our_nodename, func); if (xml_op == NULL) { return; } if ((rsc == NULL) || (op->params == NULL) || !crm_op_needs_metadata(rsc->standard, op->op_type)) { crm_trace("No digests needed for %s action on %s (params=%p rsc=%p)", op->op_type, op->rsc_id, op->params, rsc); return; } lrm_state = lrm_state_find(node_name); if (lrm_state == NULL) { crm_warn("Cannot calculate digests for operation " PCMK__OP_FMT " because we have no connection to executor for %s", op->rsc_id, op->op_type, op->interval_ms, node_name); return; } /* Ideally the metadata is cached, and the agent is just a fallback. * * @TODO Go through all callers and ensure they get metadata asynchronously * first. */ metadata = controld_get_rsc_metadata(lrm_state, rsc, controld_metadata_from_agent |controld_metadata_from_cache); if (metadata == NULL) { return; } crm_trace("Including additional digests for %s:%s:%s", rsc->standard, rsc->provider, rsc->type); append_restart_list(op, metadata, xml_op, caller_version); append_secure_list(op, metadata, xml_op, caller_version); return; } /*! * \internal * \brief Record an action as pending in the CIB, if appropriate * * \param[in] node_name Node where the action is pending * \param[in] rsc Resource that action is for * \param[in,out] op Pending action * * \return true if action was recorded in CIB, otherwise false */ bool controld_record_pending_op(const char *node_name, const lrmd_rsc_info_t *rsc, lrmd_event_data_t *op) { const char *record_pending = NULL; CRM_CHECK((node_name != NULL) && (rsc != NULL) && (op != NULL), return false); // Never record certain operation types as pending if ((op->op_type == NULL) || (op->params == NULL) || !controld_action_is_recordable(op->op_type)) { return false; } // Check action's PCMK_META_RECORD_PENDING meta-attribute (defaults to true) record_pending = crm_meta_value(op->params, PCMK_META_RECORD_PENDING); if ((record_pending != NULL) && !crm_is_true(record_pending)) { return false; } op->call_id = -1; op->t_run = time(NULL); op->t_rcchange = op->t_run; lrmd__set_result(op, PCMK_OCF_UNKNOWN, PCMK_EXEC_PENDING, NULL); crm_debug("Recording pending %s-interval %s for %s on %s in the CIB", pcmk__readable_interval(op->interval_ms), op->op_type, op->rsc_id, node_name); controld_update_resource_history(node_name, rsc, op, 0); return true; } static void cib_rsc_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { switch (rc) { case pcmk_ok: case -pcmk_err_diff_failed: case -pcmk_err_diff_resync: crm_trace("Resource history update completed (call=%d rc=%d)", call_id, rc); break; default: if (call_id > 0) { crm_warn("Resource history update %d failed: %s " CRM_XS " rc=%d", call_id, pcmk_strerror(rc), rc); } else { crm_warn("Resource history update failed: %s " CRM_XS " rc=%d", pcmk_strerror(rc), rc); } } if (call_id == pending_rsc_update) { pending_rsc_update = 0; controld_trigger_fsa(); } } /* Only successful stops, and probes that found the resource inactive, get locks * recorded in the history. This ensures the resource stays locked to the node * until it is active there again after the node comes back up. */ static bool should_preserve_lock(lrmd_event_data_t *op) { if (!pcmk_is_set(controld_globals.flags, controld_shutdown_lock_enabled)) { return false; } if (!strcmp(op->op_type, PCMK_ACTION_STOP) && (op->rc == PCMK_OCF_OK)) { return true; } if (!strcmp(op->op_type, PCMK_ACTION_MONITOR) && (op->rc == PCMK_OCF_NOT_RUNNING)) { return true; } return false; } /*! * \internal * \brief Request a CIB update * * \param[in] section Section of CIB to update * \param[in] data New XML of CIB section to update * \param[in] options CIB call options * \param[in] callback If not \c NULL, set this as the operation callback * * \return Standard Pacemaker return code * * \note If \p callback is \p cib_rsc_callback(), the CIB update's call ID is * stored in \p pending_rsc_update on success. */ int controld_update_cib(const char *section, xmlNode *data, int options, void (*callback)(xmlNode *, int, int, xmlNode *, void *)) { cib_t *cib = controld_globals.cib_conn; int cib_rc = -ENOTCONN; CRM_ASSERT(data != NULL); if (cib != NULL) { cib_rc = cib->cmds->modify(cib, section, data, options); if (cib_rc >= 0) { crm_debug("Submitted CIB update %d for %s section", cib_rc, section); } } if (callback == NULL) { if (cib_rc < 0) { crm_err("Failed to update CIB %s section: %s", section, pcmk_rc_str(pcmk_legacy2rc(cib_rc))); } } else { if ((cib_rc >= 0) && (callback == cib_rsc_callback)) { /* Checking for a particular callback is a little hacky, but it * didn't seem worth adding an output argument for cib_rc for just * one use case. */ pending_rsc_update = cib_rc; } fsa_register_cib_callback(cib_rc, NULL, callback); } return (cib_rc >= 0)? pcmk_rc_ok : pcmk_legacy2rc(cib_rc); } /*! * \internal * \brief Update resource history entry in CIB * * \param[in] node_name Node where action occurred * \param[in] rsc Resource that action is for * \param[in,out] op Action to record * \param[in] lock_time If nonzero, when resource was locked to node * * \note On success, the CIB update's call ID will be stored in * pending_rsc_update. */ void controld_update_resource_history(const char *node_name, const lrmd_rsc_info_t *rsc, lrmd_event_data_t *op, time_t lock_time) { xmlNode *update = NULL; xmlNode *xml = NULL; int call_opt = crmd_cib_smart_opt(); const char *node_id = NULL; const char *container = NULL; CRM_CHECK((node_name != NULL) && (op != NULL), return); if (rsc == NULL) { crm_warn("Resource %s no longer exists in the executor", op->rsc_id); controld_ack_event_directly(NULL, NULL, rsc, op, op->rsc_id); return; } // update = pcmk__xe_create(NULL, PCMK_XE_STATUS); // xml = pcmk__xe_create(update, PCMK__XE_NODE_STATE); if (pcmk__str_eq(node_name, controld_globals.our_nodename, pcmk__str_casei)) { node_id = controld_globals.our_uuid; } else { node_id = node_name; pcmk__xe_set_bool_attr(xml, PCMK_XA_REMOTE_NODE, true); } crm_xml_add(xml, PCMK_XA_ID, node_id); crm_xml_add(xml, PCMK_XA_UNAME, node_name); crm_xml_add(xml, PCMK_XA_CRM_DEBUG_ORIGIN, __func__); // xml = pcmk__xe_create(xml, PCMK__XE_LRM); crm_xml_add(xml, PCMK_XA_ID, node_id); // xml = pcmk__xe_create(xml, PCMK__XE_LRM_RESOURCES); // xml = pcmk__xe_create(xml, PCMK__XE_LRM_RESOURCE); crm_xml_add(xml, PCMK_XA_ID, op->rsc_id); crm_xml_add(xml, PCMK_XA_CLASS, rsc->standard); crm_xml_add(xml, PCMK_XA_PROVIDER, rsc->provider); crm_xml_add(xml, PCMK_XA_TYPE, rsc->type); if (lock_time != 0) { /* Actions on a locked resource should either preserve the lock by * recording it with the action result, or clear it. */ if (!should_preserve_lock(op)) { lock_time = 0; } crm_xml_add_ll(xml, PCMK_OPT_SHUTDOWN_LOCK, (long long) lock_time); } if (op->params != NULL) { container = g_hash_table_lookup(op->params, CRM_META "_" PCMK__META_CONTAINER); if (container != NULL) { crm_trace("Resource %s is a part of container resource %s", op->rsc_id, container); crm_xml_add(xml, PCMK__META_CONTAINER, container); } } // (possibly more than one) controld_add_resource_history_xml(xml, rsc, op, node_name); /* Update CIB asynchronously. Even if it fails, the resource state should be * discovered during the next election. Worst case, the node is wrongly * fenced for running a resource it isn't. */ crm_log_xml_trace(update, __func__); controld_update_cib(PCMK_XE_STATUS, update, call_opt, cib_rsc_callback); free_xml(update); } /*! * \internal * \brief Erase an LRM history entry from the CIB, given the operation data * * \param[in] op Operation whose history should be deleted */ void controld_delete_action_history(const lrmd_event_data_t *op) { xmlNode *xml_top = NULL; CRM_CHECK(op != NULL, return); xml_top = pcmk__xe_create(NULL, PCMK__XE_LRM_RSC_OP); crm_xml_add_int(xml_top, PCMK__XA_CALL_ID, op->call_id); crm_xml_add(xml_top, PCMK__XA_TRANSITION_KEY, op->user_data); if (op->interval_ms > 0) { char *op_id = pcmk__op_key(op->rsc_id, op->op_type, op->interval_ms); /* Avoid deleting last_failure too (if it was a result of this recurring op failing) */ crm_xml_add(xml_top, PCMK_XA_ID, op_id); free(op_id); } crm_debug("Erasing resource operation history for " PCMK__OP_FMT " (call=%d)", op->rsc_id, op->op_type, op->interval_ms, op->call_id); controld_globals.cib_conn->cmds->remove(controld_globals.cib_conn, PCMK_XE_STATUS, xml_top, cib_none); crm_log_xml_trace(xml_top, "op:cancel"); free_xml(xml_top); } /* Define xpath to find LRM resource history entry by node and resource */ #define XPATH_HISTORY \ "/" PCMK_XE_CIB "/" PCMK_XE_STATUS \ "/" PCMK__XE_NODE_STATE "[@" PCMK_XA_UNAME "='%s']" \ "/" PCMK__XE_LRM "/" PCMK__XE_LRM_RESOURCES \ "/" PCMK__XE_LRM_RESOURCE "[@" PCMK_XA_ID "='%s']" \ "/" PCMK__XE_LRM_RSC_OP /* ... and also by operation key */ #define XPATH_HISTORY_ID XPATH_HISTORY "[@" PCMK_XA_ID "='%s']" /* ... and also by operation key and operation call ID */ #define XPATH_HISTORY_CALL XPATH_HISTORY \ "[@" PCMK_XA_ID "='%s' and @" PCMK__XA_CALL_ID "='%d']" /* ... and also by operation key and original operation key */ #define XPATH_HISTORY_ORIG XPATH_HISTORY \ "[@" PCMK_XA_ID "='%s' and @" PCMK__XA_OPERATION_KEY "='%s']" /*! * \internal * \brief Delete a last_failure resource history entry from the CIB * * \param[in] rsc_id Name of resource to clear history for * \param[in] node Name of node to clear history for * \param[in] action If specified, delete only if this was failed action * \param[in] interval_ms If \p action is specified, it has this interval */ void controld_cib_delete_last_failure(const char *rsc_id, const char *node, const char *action, guint interval_ms) { char *xpath = NULL; char *last_failure_key = NULL; CRM_CHECK((rsc_id != NULL) && (node != NULL), return); // Generate XPath to match desired entry last_failure_key = pcmk__op_key(rsc_id, "last_failure", 0); if (action == NULL) { xpath = crm_strdup_printf(XPATH_HISTORY_ID, node, rsc_id, last_failure_key); } else { char *action_key = pcmk__op_key(rsc_id, action, interval_ms); xpath = crm_strdup_printf(XPATH_HISTORY_ORIG, node, rsc_id, last_failure_key, action_key); free(action_key); } free(last_failure_key); controld_globals.cib_conn->cmds->remove(controld_globals.cib_conn, xpath, NULL, cib_xpath); free(xpath); } /*! * \internal * \brief Delete resource history entry from the CIB, given operation key * * \param[in] rsc_id Name of resource to clear history for * \param[in] node Name of node to clear history for * \param[in] key Operation key of operation to clear history for * \param[in] call_id If specified, delete entry only if it has this call ID */ void controld_delete_action_history_by_key(const char *rsc_id, const char *node, const char *key, int call_id) { char *xpath = NULL; CRM_CHECK((rsc_id != NULL) && (node != NULL) && (key != NULL), return); if (call_id > 0) { xpath = crm_strdup_printf(XPATH_HISTORY_CALL, node, rsc_id, key, call_id); } else { xpath = crm_strdup_printf(XPATH_HISTORY_ID, node, rsc_id, key); } controld_globals.cib_conn->cmds->remove(controld_globals.cib_conn, xpath, NULL, cib_xpath); free(xpath); } diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c index 953e73b456..4e33e38da0 100644 --- a/daemons/controld/controld_control.c +++ b/daemons/controld/controld_control.c @@ -1,689 +1,689 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include static qb_ipcs_service_t *ipcs = NULL; static crm_trigger_t *config_read_trigger = NULL; #if SUPPORT_COROSYNC extern gboolean crm_connect_corosync(crm_cluster_t * cluster); #endif static void crm_shutdown(int nsig); static gboolean crm_read_options(gpointer user_data); /* A_HA_CONNECT */ void do_ha_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { gboolean registered = FALSE; static crm_cluster_t *cluster = NULL; if (cluster == NULL) { cluster = pcmk_cluster_new(); } if (action & A_HA_DISCONNECT) { - crm_cluster_disconnect(cluster); + pcmk_cluster_disconnect(cluster); crm_info("Disconnected from the cluster"); controld_set_fsa_input_flags(R_HA_DISCONNECTED); } if (action & A_HA_CONNECT) { crm_set_status_callback(&peer_update_callback); crm_set_autoreap(FALSE); #if SUPPORT_COROSYNC if (is_corosync_cluster()) { registered = crm_connect_corosync(cluster); } #endif // SUPPORT_COROSYNC if (registered) { controld_election_init(cluster->uname); controld_globals.our_nodename = cluster->uname; controld_globals.our_uuid = cluster->uuid; if(cluster->uuid == NULL) { crm_err("Could not obtain local uuid"); registered = FALSE; } } if (!registered) { controld_set_fsa_input_flags(R_HA_DISCONNECTED); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return; } populate_cib_nodes(node_update_none, __func__); controld_clear_fsa_input_flags(R_HA_DISCONNECTED); crm_info("Connected to the cluster"); } if (action & ~(A_HA_CONNECT | A_HA_DISCONNECT)) { crm_err("Unexpected action %s in %s", fsa_action2string(action), __func__); } } /* A_SHUTDOWN */ void do_shutdown(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { /* just in case */ controld_set_fsa_input_flags(R_SHUTDOWN); controld_disconnect_fencer(FALSE); } /* A_SHUTDOWN_REQ */ void do_shutdown_req(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { xmlNode *msg = NULL; controld_set_fsa_input_flags(R_SHUTDOWN); //controld_set_fsa_input_flags(R_STAYDOWN); crm_info("Sending shutdown request to all peers (DC is %s)", pcmk__s(controld_globals.dc_name, "not set")); msg = create_request(CRM_OP_SHUTDOWN_REQ, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL); if (send_cluster_message(NULL, crm_msg_crmd, msg, TRUE) == FALSE) { register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } free_xml(msg); } void crmd_fast_exit(crm_exit_t exit_code) { if (pcmk_is_set(controld_globals.fsa_input_register, R_STAYDOWN)) { crm_warn("Inhibiting respawn "CRM_XS" remapping exit code %d to %d", exit_code, CRM_EX_FATAL); exit_code = CRM_EX_FATAL; } else if ((exit_code == CRM_EX_OK) && pcmk_is_set(controld_globals.fsa_input_register, R_IN_RECOVERY)) { crm_err("Could not recover from internal error"); exit_code = CRM_EX_ERROR; } if (controld_globals.logger_out != NULL) { controld_globals.logger_out->finish(controld_globals.logger_out, exit_code, true, NULL); pcmk__output_free(controld_globals.logger_out); controld_globals.logger_out = NULL; } crm_exit(exit_code); } crm_exit_t crmd_exit(crm_exit_t exit_code) { GMainLoop *mloop = controld_globals.mainloop; static bool in_progress = FALSE; if (in_progress && (exit_code == CRM_EX_OK)) { crm_debug("Exit is already in progress"); return exit_code; } else if(in_progress) { crm_notice("Error during shutdown process, exiting now with status %d (%s)", exit_code, crm_exit_str(exit_code)); crm_write_blackbox(SIGTRAP, NULL); crmd_fast_exit(exit_code); } in_progress = TRUE; crm_trace("Preparing to exit with status %d (%s)", exit_code, crm_exit_str(exit_code)); /* Suppress secondary errors resulting from us disconnecting everything */ controld_set_fsa_input_flags(R_HA_DISCONNECTED); /* Close all IPC servers and clients to ensure any and all shared memory files are cleaned up */ if(ipcs) { crm_trace("Closing IPC server"); mainloop_del_ipc_server(ipcs); ipcs = NULL; } controld_close_attrd_ipc(); controld_shutdown_schedulerd_ipc(); controld_disconnect_fencer(TRUE); if ((exit_code == CRM_EX_OK) && (controld_globals.mainloop == NULL)) { crm_debug("No mainloop detected"); exit_code = CRM_EX_ERROR; } /* On an error, just get out. * * Otherwise, make the effort to have mainloop exit gracefully so * that it (mostly) cleans up after itself and valgrind has less * to report on - allowing real errors stand out */ if (exit_code != CRM_EX_OK) { crm_notice("Forcing immediate exit with status %d (%s)", exit_code, crm_exit_str(exit_code)); crm_write_blackbox(SIGTRAP, NULL); crmd_fast_exit(exit_code); } /* Clean up as much memory as possible for valgrind */ for (GList *iter = controld_globals.fsa_message_queue; iter != NULL; iter = iter->next) { fsa_data_t *fsa_data = (fsa_data_t *) iter->data; crm_info("Dropping %s: [ state=%s cause=%s origin=%s ]", fsa_input2string(fsa_data->fsa_input), fsa_state2string(controld_globals.fsa_state), fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin); delete_fsa_input(fsa_data); } controld_clear_fsa_input_flags(R_MEMBERSHIP); g_list_free(controld_globals.fsa_message_queue); controld_globals.fsa_message_queue = NULL; controld_free_node_pending_timers(); controld_election_fini(); /* Tear down the CIB manager connection, but don't free it yet -- it could * be used when we drain the mainloop later. */ controld_disconnect_cib_manager(); verify_stopped(controld_globals.fsa_state, LOG_WARNING); controld_clear_fsa_input_flags(R_LRM_CONNECTED); lrm_state_destroy_all(); mainloop_destroy_trigger(config_read_trigger); config_read_trigger = NULL; controld_destroy_fsa_trigger(); controld_destroy_transition_trigger(); pcmk__client_cleanup(); crm_peer_destroy(); controld_free_fsa_timers(); te_cleanup_stonith_history_sync(NULL, TRUE); controld_free_sched_timer(); free(controld_globals.our_nodename); controld_globals.our_nodename = NULL; free(controld_globals.our_uuid); controld_globals.our_uuid = NULL; free(controld_globals.dc_name); controld_globals.dc_name = NULL; free(controld_globals.dc_version); controld_globals.dc_version = NULL; free(controld_globals.cluster_name); controld_globals.cluster_name = NULL; free(controld_globals.te_uuid); controld_globals.te_uuid = NULL; free_max_generation(); controld_destroy_failed_sync_table(); controld_destroy_outside_events_table(); mainloop_destroy_signal(SIGPIPE); mainloop_destroy_signal(SIGUSR1); mainloop_destroy_signal(SIGTERM); mainloop_destroy_signal(SIGTRAP); /* leave SIGCHLD engaged as we might still want to drain some service-actions */ if (mloop) { GMainContext *ctx = g_main_loop_get_context(controld_globals.mainloop); /* Don't re-enter this block */ controld_globals.mainloop = NULL; /* no signals on final draining anymore */ mainloop_destroy_signal(SIGCHLD); crm_trace("Draining mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx)); { int lpc = 0; while((g_main_context_pending(ctx) && lpc < 10)) { lpc++; crm_trace("Iteration %d", lpc); g_main_context_dispatch(ctx); } } crm_trace("Closing mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx)); g_main_loop_quit(mloop); /* Won't do anything yet, since we're inside it now */ g_main_loop_unref(mloop); } else { mainloop_destroy_signal(SIGCHLD); } cib_delete(controld_globals.cib_conn); controld_globals.cib_conn = NULL; throttle_fini(); /* Graceful */ crm_trace("Done preparing for exit with status %d (%s)", exit_code, crm_exit_str(exit_code)); return exit_code; } /* A_EXIT_0, A_EXIT_1 */ void do_exit(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { crm_exit_t exit_code = CRM_EX_OK; if (pcmk_is_set(action, A_EXIT_1)) { exit_code = CRM_EX_ERROR; crm_err("Exiting now due to errors"); } verify_stopped(cur_state, LOG_ERR); crmd_exit(exit_code); } static void sigpipe_ignore(int nsig) { return; } /* A_STARTUP */ void do_startup(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { crm_debug("Registering Signal Handlers"); mainloop_add_signal(SIGTERM, crm_shutdown); mainloop_add_signal(SIGPIPE, sigpipe_ignore); config_read_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, crm_read_options, NULL); controld_init_fsa_trigger(); controld_init_transition_trigger(); crm_debug("Creating CIB manager and executor objects"); controld_globals.cib_conn = cib_new(); lrm_state_init_local(); if (controld_init_fsa_timers() == FALSE) { register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } // \return libqb error code (0 on success, -errno on error) static int32_t accept_controller_client(qb_ipcs_connection_t *c, uid_t uid, gid_t gid) { crm_trace("Accepting new IPC client connection"); if (pcmk__new_client(c, uid, gid) == NULL) { return -ENOMEM; } return 0; } // \return libqb error code (0 on success, -errno on error) static int32_t dispatch_controller_ipc(qb_ipcs_connection_t * c, void *data, size_t size) { uint32_t id = 0; uint32_t flags = 0; pcmk__client_t *client = pcmk__find_client(c); xmlNode *msg = pcmk__client_data2xml(client, data, &id, &flags); if (msg == NULL) { pcmk__ipc_send_ack(client, id, flags, PCMK__XE_ACK, NULL, CRM_EX_PROTOCOL); return 0; } pcmk__ipc_send_ack(client, id, flags, PCMK__XE_ACK, NULL, CRM_EX_INDETERMINATE); CRM_ASSERT(client->user != NULL); pcmk__update_acl_user(msg, PCMK__XA_CRM_USER, client->user); crm_xml_add(msg, PCMK__XA_CRM_SYS_FROM, client->id); if (controld_authorize_ipc_message(msg, client, NULL)) { crm_trace("Processing IPC message from client %s", pcmk__client_name(client)); route_message(C_IPC_MESSAGE, msg); } controld_trigger_fsa(); free_xml(msg); return 0; } static int32_t ipc_client_disconnected(qb_ipcs_connection_t *c) { pcmk__client_t *client = pcmk__find_client(c); if (client) { crm_trace("Disconnecting %sregistered client %s (%p/%p)", (client->userdata? "" : "un"), pcmk__client_name(client), c, client); free(client->userdata); pcmk__free_client(client); controld_trigger_fsa(); } return 0; } static void ipc_connection_destroyed(qb_ipcs_connection_t *c) { crm_trace("Connection %p", c); ipc_client_disconnected(c); } /* A_STOP */ void do_stop(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { crm_trace("Closing IPC server"); mainloop_del_ipc_server(ipcs); ipcs = NULL; register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL); } /* A_STARTED */ void do_started(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { static struct qb_ipcs_service_handlers crmd_callbacks = { .connection_accept = accept_controller_client, .connection_created = NULL, .msg_process = dispatch_controller_ipc, .connection_closed = ipc_client_disconnected, .connection_destroyed = ipc_connection_destroyed }; if (cur_state != S_STARTING) { crm_err("Start cancelled... %s", fsa_state2string(cur_state)); return; } else if (!pcmk_is_set(controld_globals.fsa_input_register, R_MEMBERSHIP)) { crm_info("Delaying start, no membership data (%.16llx)", R_MEMBERSHIP); crmd_fsa_stall(TRUE); return; } else if (!pcmk_is_set(controld_globals.fsa_input_register, R_LRM_CONNECTED)) { crm_info("Delaying start, not connected to executor (%.16llx)", R_LRM_CONNECTED); crmd_fsa_stall(TRUE); return; } else if (!pcmk_is_set(controld_globals.fsa_input_register, R_CIB_CONNECTED)) { crm_info("Delaying start, CIB not connected (%.16llx)", R_CIB_CONNECTED); crmd_fsa_stall(TRUE); return; } else if (!pcmk_is_set(controld_globals.fsa_input_register, R_READ_CONFIG)) { crm_info("Delaying start, Config not read (%.16llx)", R_READ_CONFIG); crmd_fsa_stall(TRUE); return; } else if (!pcmk_is_set(controld_globals.fsa_input_register, R_PEER_DATA)) { crm_info("Delaying start, No peer data (%.16llx)", R_PEER_DATA); crmd_fsa_stall(TRUE); return; } crm_debug("Init server comms"); ipcs = pcmk__serve_controld_ipc(&crmd_callbacks); if (ipcs == NULL) { crm_err("Failed to create IPC server: shutting down and inhibiting respawn"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } else { crm_notice("Pacemaker controller successfully started and accepting connections"); } controld_set_fsa_input_flags(R_ST_REQUIRED); controld_timer_fencer_connect(GINT_TO_POINTER(TRUE)); controld_clear_fsa_input_flags(R_STARTING); register_fsa_input(msg_data->fsa_cause, I_PENDING, NULL); } /* A_RECOVER */ void do_recover(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { controld_set_fsa_input_flags(R_IN_RECOVERY); crm_warn("Fast-tracking shutdown in response to errors"); register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL); } static void config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { const char *value = NULL; GHashTable *config_hash = NULL; crm_time_t *now = crm_time_new(NULL); xmlNode *crmconfig = NULL; xmlNode *alerts = NULL; if (rc != pcmk_ok) { fsa_data_t *msg_data = NULL; crm_err("Local CIB query resulted in an error: %s", pcmk_strerror(rc)); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); if (rc == -EACCES || rc == -pcmk_err_schema_validation) { crm_err("The cluster is mis-configured - shutting down and staying down"); controld_set_fsa_input_flags(R_STAYDOWN); } goto bail; } crmconfig = output; if ((crmconfig != NULL) && !pcmk__xe_is(crmconfig, PCMK_XE_CRM_CONFIG)) { crmconfig = pcmk__xe_first_child(crmconfig, PCMK_XE_CRM_CONFIG, NULL, NULL); } if (!crmconfig) { fsa_data_t *msg_data = NULL; crm_err("Local CIB query for " PCMK_XE_CRM_CONFIG " section failed"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); goto bail; } crm_debug("Call %d : Parsing CIB options", call_id); config_hash = pcmk__strkey_table(free, free); pe_unpack_nvpairs(crmconfig, crmconfig, PCMK_XE_CLUSTER_PROPERTY_SET, NULL, config_hash, PCMK_VALUE_CIB_BOOTSTRAP_OPTIONS, FALSE, now, NULL); // Validate all options, and use defaults if not already present in hash pcmk__validate_cluster_options(config_hash); /* Validate the watchdog timeout in the context of the local node * environment. If invalid, the controller will exit with a fatal error. * * We do this via a wrapper in the controller, so that we call * pcmk__valid_stonith_watchdog_timeout() only if watchdog fencing is * enabled for the local node. Otherwise, we may exit unnecessarily. * * A validator function in libcrmcommon can't act as such a wrapper, because * it doesn't have a stonith API connection or the local node name. */ value = g_hash_table_lookup(config_hash, PCMK_OPT_STONITH_WATCHDOG_TIMEOUT); controld_verify_stonith_watchdog_timeout(value); value = g_hash_table_lookup(config_hash, PCMK_OPT_NO_QUORUM_POLICY); if (pcmk__str_eq(value, PCMK_VALUE_FENCE_LEGACY, pcmk__str_casei) && (pcmk__locate_sbd() != 0)) { controld_set_global_flags(controld_no_quorum_suicide); } value = g_hash_table_lookup(config_hash, PCMK_OPT_SHUTDOWN_LOCK); if (crm_is_true(value)) { controld_set_global_flags(controld_shutdown_lock_enabled); } else { controld_clear_global_flags(controld_shutdown_lock_enabled); } value = g_hash_table_lookup(config_hash, PCMK_OPT_SHUTDOWN_LOCK_LIMIT); pcmk_parse_interval_spec(value, &controld_globals.shutdown_lock_limit); controld_globals.shutdown_lock_limit /= 1000; value = g_hash_table_lookup(config_hash, PCMK_OPT_NODE_PENDING_TIMEOUT); pcmk_parse_interval_spec(value, &controld_globals.node_pending_timeout); controld_globals.node_pending_timeout /= 1000; value = g_hash_table_lookup(config_hash, PCMK_OPT_CLUSTER_NAME); pcmk__str_update(&(controld_globals.cluster_name), value); // Let subcomponents initialize their own static variables controld_configure_election(config_hash); controld_configure_fencing(config_hash); controld_configure_fsa_timers(config_hash); controld_configure_throttle(config_hash); alerts = pcmk__xe_first_child(output, PCMK_XE_ALERTS, NULL, NULL); crmd_unpack_alerts(alerts); controld_set_fsa_input_flags(R_READ_CONFIG); controld_trigger_fsa(); g_hash_table_destroy(config_hash); bail: crm_time_free(now); } /*! * \internal * \brief Trigger read and processing of the configuration * * \param[in] fn Calling function name * \param[in] line Line number where call occurred */ void controld_trigger_config_as(const char *fn, int line) { if (config_read_trigger != NULL) { crm_trace("%s:%d - Triggered config processing", fn, line); mainloop_set_trigger(config_read_trigger); } } gboolean crm_read_options(gpointer user_data) { cib_t *cib_conn = controld_globals.cib_conn; int call_id = cib_conn->cmds->query(cib_conn, "//" PCMK_XE_CRM_CONFIG " | //" PCMK_XE_ALERTS, NULL, cib_xpath|cib_scope_local); fsa_register_cib_callback(call_id, NULL, config_query_callback); crm_trace("Querying the CIB... call %d", call_id); return TRUE; } /* A_READCONFIG */ void do_read_config(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { throttle_init(); controld_trigger_config(); } static void crm_shutdown(int nsig) { const char *value = NULL; guint default_period_ms = 0; if ((controld_globals.mainloop == NULL) || !g_main_loop_is_running(controld_globals.mainloop)) { crmd_exit(CRM_EX_OK); return; } if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { crm_err("Escalating shutdown"); register_fsa_input_before(C_SHUTDOWN, I_ERROR, NULL); return; } controld_set_fsa_input_flags(R_SHUTDOWN); register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL); /* If shutdown timer doesn't have a period set, use the default * * @TODO: Evaluate whether this is still necessary. As long as * config_query_callback() has been run at least once, it doesn't look like * anything could have changed the timer period since then. */ value = pcmk__cluster_option(NULL, PCMK_OPT_SHUTDOWN_ESCALATION); pcmk_parse_interval_spec(value, &default_period_ms); controld_shutdown_start_countdown(default_period_ms); } diff --git a/daemons/controld/controld_corosync.c b/daemons/controld/controld_corosync.c index 91170f494a..1f59048ede 100644 --- a/daemons/controld/controld_corosync.c +++ b/daemons/controld/controld_corosync.c @@ -1,161 +1,161 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #if SUPPORT_COROSYNC extern void post_cache_update(int seq); /* A_HA_CONNECT */ static void crmd_cs_dispatch(cpg_handle_t handle, const struct cpg_name *groupName, uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) { uint32_t kind = 0; const char *from = NULL; char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from); if(data == NULL) { return; } if (kind == crm_class_cluster) { crm_node_t *peer = NULL; xmlNode *xml = pcmk__xml_parse(data); if (xml == NULL) { crm_err("Could not parse message content (%d): %.100s", kind, data); free(data); return; } crm_xml_add(xml, PCMK__XA_SRC, from); peer = pcmk__get_node(0, from, NULL, pcmk__node_search_cluster); if (!pcmk_is_set(peer->processes, crm_proc_cpg)) { /* If we can still talk to our peer process on that node, * then it must be part of the corosync membership */ crm_warn("Receiving messages from a node we think is dead: %s[%d]", peer->uname, peer->id); crm_update_peer_proc(__func__, peer, crm_proc_cpg, PCMK_VALUE_ONLINE); } crmd_ha_msg_filter(xml); free_xml(xml); } else { crm_err("Invalid message class (%d): %.100s", kind, data); } free(data); } static gboolean crmd_quorum_callback(unsigned long long seq, gboolean quorate) { crm_update_quorum(quorate, FALSE); post_cache_update(seq); return TRUE; } static void crmd_cs_destroy(gpointer user_data) { if (!pcmk_is_set(controld_globals.fsa_input_register, R_HA_DISCONNECTED)) { crm_crit("Lost connection to cluster layer, shutting down"); crmd_exit(CRM_EX_DISCONNECT); } } /*! * \brief Handle a Corosync notification of a CPG configuration change * * \param[in] handle CPG connection * \param[in] cpg_name CPG group name * \param[in] member_list List of current CPG members * \param[in] member_list_entries Number of entries in \p member_list * \param[in] left_list List of CPG members that left * \param[in] left_list_entries Number of entries in \p left_list * \param[in] joined_list List of CPG members that joined * \param[in] joined_list_entries Number of entries in \p joined_list */ static void cpg_membership_callback(cpg_handle_t handle, const struct cpg_name *cpg_name, const struct cpg_address *member_list, size_t member_list_entries, const struct cpg_address *left_list, size_t left_list_entries, const struct cpg_address *joined_list, size_t joined_list_entries) { /* When nodes leave CPG, the DC clears their transient node attributes. * * However if there is no DC, or the DC is among the nodes that left, each * remaining node needs to do the clearing, to ensure it gets done. * Otherwise, the attributes would persist when the nodes rejoin, which * could have serious consequences for unfencing, agents that use attributes * for internal logic, etc. * * Here, we set a global boolean if the DC is among the nodes that left, for * use by the peer callback. */ if (controld_globals.dc_name != NULL) { crm_node_t *peer = NULL; peer = pcmk__search_node_caches(0, controld_globals.dc_name, pcmk__node_search_cluster); if (peer != NULL) { for (int i = 0; i < left_list_entries; ++i) { if (left_list[i].nodeid == peer->id) { controld_set_global_flags(controld_dc_left); break; } } } } // Process the change normally, which will call the peer callback as needed pcmk_cpg_membership(handle, cpg_name, member_list, member_list_entries, left_list, left_list_entries, joined_list, joined_list_entries); controld_clear_global_flags(controld_dc_left); } extern gboolean crm_connect_corosync(crm_cluster_t * cluster); gboolean crm_connect_corosync(crm_cluster_t * cluster) { if (is_corosync_cluster()) { crm_set_status_callback(&peer_update_callback); cluster->cpg.cpg_deliver_fn = crmd_cs_dispatch; cluster->cpg.cpg_confchg_fn = cpg_membership_callback; cluster->destroy = crmd_cs_destroy; - if (crm_cluster_connect(cluster)) { + if (pcmk_cluster_connect(cluster) == pcmk_rc_ok) { pcmk__corosync_quorum_connect(crmd_quorum_callback, crmd_cs_destroy); return TRUE; } } return FALSE; } #endif diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c index 4b5584dcd7..9f67ccce07 100644 --- a/daemons/controld/controld_fencing.c +++ b/daemons/controld/controld_fencing.c @@ -1,1119 +1,1119 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include static void tengine_stonith_history_synced(stonith_t *st, stonith_event_t *st_event); /* * stonith failure counting * * We don't want to get stuck in a permanent fencing loop. Keep track of the * number of fencing failures for each target node, and the most we'll restart a * transition for. */ struct st_fail_rec { int count; }; static bool fence_reaction_panic = false; static unsigned long int stonith_max_attempts = 10; static GHashTable *stonith_failures = NULL; /*! * \internal * \brief Update max fencing attempts before giving up * * \param[in] value New max fencing attempts */ static void update_stonith_max_attempts(const char *value) { stonith_max_attempts = char2score(value); if (stonith_max_attempts < 1UL) { stonith_max_attempts = 10UL; } } /*! * \internal * \brief Configure reaction to notification of local node being fenced * * \param[in] reaction_s Reaction type */ static void set_fence_reaction(const char *reaction_s) { if (pcmk__str_eq(reaction_s, "panic", pcmk__str_casei)) { fence_reaction_panic = true; } else { if (!pcmk__str_eq(reaction_s, PCMK_VALUE_STOP, pcmk__str_casei)) { crm_warn("Invalid value '%s' for %s, using 'stop'", reaction_s, PCMK_OPT_FENCE_REACTION); } fence_reaction_panic = false; } } /*! * \internal * \brief Configure fencing options based on the CIB * * \param[in,out] options Name/value pairs for configured options */ void controld_configure_fencing(GHashTable *options) { const char *value = NULL; value = g_hash_table_lookup(options, PCMK_OPT_FENCE_REACTION); set_fence_reaction(value); value = g_hash_table_lookup(options, PCMK_OPT_STONITH_MAX_ATTEMPTS); update_stonith_max_attempts(value); } static gboolean too_many_st_failures(const char *target) { GHashTableIter iter; const char *key = NULL; struct st_fail_rec *value = NULL; if (stonith_failures == NULL) { return FALSE; } if (target == NULL) { g_hash_table_iter_init(&iter, stonith_failures); while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) { if (value->count >= stonith_max_attempts) { target = (const char*)key; goto too_many; } } } else { value = g_hash_table_lookup(stonith_failures, target); if ((value != NULL) && (value->count >= stonith_max_attempts)) { goto too_many; } } return FALSE; too_many: crm_warn("Too many failures (%d) to fence %s, giving up", value->count, target); return TRUE; } /*! * \internal * \brief Reset a stonith fail count * * \param[in] target Name of node to reset, or NULL for all */ void st_fail_count_reset(const char *target) { if (stonith_failures == NULL) { return; } if (target) { struct st_fail_rec *rec = NULL; rec = g_hash_table_lookup(stonith_failures, target); if (rec) { rec->count = 0; } } else { GHashTableIter iter; const char *key = NULL; struct st_fail_rec *rec = NULL; g_hash_table_iter_init(&iter, stonith_failures); while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &rec)) { rec->count = 0; } } } static void st_fail_count_increment(const char *target) { struct st_fail_rec *rec = NULL; if (stonith_failures == NULL) { stonith_failures = pcmk__strkey_table(free, free); } rec = g_hash_table_lookup(stonith_failures, target); if (rec) { rec->count++; } else { rec = malloc(sizeof(struct st_fail_rec)); if(rec == NULL) { return; } rec->count = 1; g_hash_table_insert(stonith_failures, pcmk__str_copy(target), rec); } } /* end stonith fail count functions */ static void cib_fencing_updated(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { if (rc < pcmk_ok) { crm_err("Fencing update %d for %s: failed - %s (%d)", call_id, (char *)user_data, pcmk_strerror(rc), rc); crm_log_xml_warn(msg, "Failed update"); abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_shutdown, "CIB update failed", NULL); } else { crm_info("Fencing update %d for %s: complete", call_id, (char *)user_data); } } static void send_stonith_update(pcmk__graph_action_t *action, const char *target, const char *uuid) { int rc = pcmk_ok; crm_node_t *peer = NULL; /* We (usually) rely on the membership layer to do node_update_cluster, * and the peer status callback to do node_update_peer, because the node * might have already rejoined before we get the stonith result here. */ int flags = node_update_join | node_update_expected; /* zero out the node-status & remove all LRM status info */ xmlNode *node_state = NULL; CRM_CHECK(target != NULL, return); CRM_CHECK(uuid != NULL, return); /* Make sure the membership and join caches are accurate. * Try getting any existing node cache entry also by node uuid in case it * doesn't have an uname yet. */ peer = pcmk__get_node(0, target, uuid, pcmk__node_search_any); CRM_CHECK(peer != NULL, return); if (peer->state == NULL) { /* Usually, we rely on the membership layer to update the cluster state * in the CIB. However, if the node has never been seen, do it here, so * the node is not considered unclean. */ flags |= node_update_cluster; } if (peer->uuid == NULL) { crm_info("Recording uuid '%s' for node '%s'", uuid, target); peer->uuid = pcmk__str_copy(uuid); } crmd_peer_down(peer, TRUE); /* Generate a node state update for the CIB */ node_state = create_node_state_update(peer, flags, NULL, __func__); /* we have to mark whether or not remote nodes have already been fenced */ if (peer->flags & crm_remote_node) { char *now_s = pcmk__ttoa(time(NULL)); crm_xml_add(node_state, PCMK__XA_NODE_FENCED, now_s); free(now_s); } /* Force our known ID */ crm_xml_add(node_state, PCMK_XA_ID, uuid); rc = controld_globals.cib_conn->cmds->modify(controld_globals.cib_conn, PCMK_XE_STATUS, node_state, cib_scope_local |cib_can_create); /* Delay processing the trigger until the update completes */ crm_debug("Sending fencing update %d for %s", rc, target); fsa_register_cib_callback(rc, pcmk__str_copy(target), cib_fencing_updated); // Make sure it sticks /* controld_globals.cib_conn->cmds->bump_epoch(controld_globals.cib_conn, * cib_scope_local); */ controld_delete_node_state(peer->uname, controld_section_all, cib_scope_local); free_xml(node_state); return; } /*! * \internal * \brief Abort transition due to stonith failure * * \param[in] abort_action Whether to restart or stop transition * \param[in] target Don't restart if this (NULL for any) has too many failures * \param[in] reason Log this stonith action XML as abort reason (or NULL) */ static void abort_for_stonith_failure(enum pcmk__graph_next abort_action, const char *target, const xmlNode *reason) { /* If stonith repeatedly fails, we eventually give up on starting a new * transition for that reason. */ if ((abort_action != pcmk__graph_wait) && too_many_st_failures(target)) { abort_action = pcmk__graph_wait; } abort_transition(PCMK_SCORE_INFINITY, abort_action, "Stonith failed", reason); } /* * stonith cleanup list * * If the DC is shot, proper notifications might not go out. * The stonith cleanup list allows the cluster to (re-)send * notifications once a new DC is elected. */ static GList *stonith_cleanup_list = NULL; /*! * \internal * \brief Add a node to the stonith cleanup list * * \param[in] target Name of node to add */ void add_stonith_cleanup(const char *target) { stonith_cleanup_list = g_list_append(stonith_cleanup_list, pcmk__str_copy(target)); } /*! * \internal * \brief Remove a node from the stonith cleanup list * * \param[in] Name of node to remove */ void remove_stonith_cleanup(const char *target) { GList *iter = stonith_cleanup_list; while (iter != NULL) { GList *tmp = iter; char *iter_name = tmp->data; iter = iter->next; if (pcmk__str_eq(target, iter_name, pcmk__str_casei)) { crm_trace("Removing %s from the cleanup list", iter_name); stonith_cleanup_list = g_list_delete_link(stonith_cleanup_list, tmp); free(iter_name); } } } /*! * \internal * \brief Purge all entries from the stonith cleanup list */ void purge_stonith_cleanup(void) { if (stonith_cleanup_list) { GList *iter = NULL; for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) { char *target = iter->data; crm_info("Purging %s from stonith cleanup list", target); free(target); } g_list_free(stonith_cleanup_list); stonith_cleanup_list = NULL; } } /*! * \internal * \brief Send stonith updates for all entries in cleanup list, then purge it */ void execute_stonith_cleanup(void) { GList *iter; for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) { char *target = iter->data; crm_node_t *target_node = pcmk__get_node(0, target, NULL, pcmk__node_search_cluster); const char *uuid = crm_peer_uuid(target_node); crm_notice("Marking %s, target of a previous stonith action, as clean", target); send_stonith_update(NULL, target, uuid); free(target); } g_list_free(stonith_cleanup_list); stonith_cleanup_list = NULL; } /* end stonith cleanup list functions */ /* stonith API client * * Functions that need to interact directly with the fencer via its API */ static stonith_t *stonith_api = NULL; static mainloop_timer_t *controld_fencer_connect_timer = NULL; static char *te_client_id = NULL; static gboolean fail_incompletable_stonith(pcmk__graph_t *graph) { GList *lpc = NULL; const char *task = NULL; xmlNode *last_action = NULL; if (graph == NULL) { return FALSE; } for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) { GList *lpc2 = NULL; pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) lpc->data; if (pcmk_is_set(synapse->flags, pcmk__synapse_confirmed)) { continue; } for (lpc2 = synapse->actions; lpc2 != NULL; lpc2 = lpc2->next) { pcmk__graph_action_t *action = (pcmk__graph_action_t *) lpc2->data; if ((action->type != pcmk__cluster_graph_action) || pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) { continue; } task = crm_element_value(action->xml, PCMK_XA_OPERATION); if (pcmk__str_eq(task, PCMK_ACTION_STONITH, pcmk__str_casei)) { pcmk__set_graph_action_flags(action, pcmk__graph_action_failed); last_action = action->xml; pcmk__update_graph(graph, action); crm_notice("Failing action %d (%s): fencer terminated", action->id, pcmk__xe_id(action->xml)); } } } if (last_action != NULL) { crm_warn("Fencer failure resulted in unrunnable actions"); abort_for_stonith_failure(pcmk__graph_restart, NULL, last_action); return TRUE; } return FALSE; } static void tengine_stonith_connection_destroy(stonith_t *st, stonith_event_t *e) { te_cleanup_stonith_history_sync(st, FALSE); if (pcmk_is_set(controld_globals.fsa_input_register, R_ST_REQUIRED)) { crm_err("Lost fencer connection (will attempt to reconnect)"); if (!mainloop_timer_running(controld_fencer_connect_timer)) { mainloop_timer_start(controld_fencer_connect_timer); } } else { crm_info("Disconnected from fencer"); } if (stonith_api) { /* the client API won't properly reconnect notifications * if they are still in the table - so remove them */ if (stonith_api->state != stonith_disconnected) { stonith_api->cmds->disconnect(st); } stonith_api->cmds->remove_notification(stonith_api, NULL); } if (AM_I_DC) { fail_incompletable_stonith(controld_globals.transition_graph); trigger_graph(); } } /*! * \internal * \brief Handle an event notification from the fencing API * * \param[in] st Fencing API connection (ignored) * \param[in] event Fencing API event notification */ static void handle_fence_notification(stonith_t *st, stonith_event_t *event) { bool succeeded = true; const char *executioner = "the cluster"; const char *client = "a client"; const char *reason = NULL; int exec_status; if (te_client_id == NULL) { te_client_id = crm_strdup_printf("%s.%lu", crm_system_name, (unsigned long) getpid()); } if (event == NULL) { crm_err("Notify data not found"); return; } if (event->executioner != NULL) { executioner = event->executioner; } if (event->client_origin != NULL) { client = event->client_origin; } exec_status = stonith__event_execution_status(event); if ((stonith__event_exit_status(event) != CRM_EX_OK) || (exec_status != PCMK_EXEC_DONE)) { succeeded = false; if (exec_status == PCMK_EXEC_DONE) { exec_status = PCMK_EXEC_ERROR; } } reason = stonith__event_exit_reason(event); crmd_alert_fencing_op(event); if (pcmk__str_eq(PCMK_ACTION_ON, event->action, pcmk__str_none)) { // Unfencing doesn't need special handling, just a log message if (succeeded) { crm_notice("%s was unfenced by %s at the request of %s@%s", event->target, executioner, client, event->origin); } else { crm_err("Unfencing of %s by %s failed (%s%s%s) with exit status %d", event->target, executioner, pcmk_exec_status_str(exec_status), ((reason == NULL)? "" : ": "), ((reason == NULL)? "" : reason), stonith__event_exit_status(event)); } return; } if (succeeded && pcmk__str_eq(event->target, controld_globals.our_nodename, pcmk__str_casei)) { /* We were notified of our own fencing. Most likely, either fencing was * misconfigured, or fabric fencing that doesn't cut cluster * communication is in use. * * Either way, shutting down the local host is a good idea, to require * administrator intervention. Also, other nodes would otherwise likely * set our status to lost because of the fencing callback and discard * our subsequent election votes as "not part of our cluster". */ crm_crit("We were allegedly just fenced by %s for %s!", executioner, event->origin); // Dumps blackbox if enabled if (fence_reaction_panic) { pcmk__panic(__func__); } else { crm_exit(CRM_EX_FATAL); } return; // Should never get here } /* Update the count of fencing failures for this target, in case we become * DC later. The current DC has already updated its fail count in * tengine_stonith_callback(). */ if (!AM_I_DC) { if (succeeded) { st_fail_count_reset(event->target); } else { st_fail_count_increment(event->target); } } crm_notice("Peer %s was%s terminated (%s) by %s on behalf of %s@%s: " "%s%s%s%s " CRM_XS " event=%s", event->target, (succeeded? "" : " not"), event->action, executioner, client, event->origin, (succeeded? "OK" : pcmk_exec_status_str(exec_status)), ((reason == NULL)? "" : " ("), ((reason == NULL)? "" : reason), ((reason == NULL)? "" : ")"), event->id); if (succeeded) { crm_node_t *peer = pcmk__search_node_caches(0, event->target, pcmk__node_search_any |pcmk__node_search_known); const char *uuid = NULL; if (peer == NULL) { return; } uuid = crm_peer_uuid(peer); if (AM_I_DC) { /* The DC always sends updates */ send_stonith_update(NULL, event->target, uuid); /* @TODO Ideally, at this point, we'd check whether the fenced node * hosted any guest nodes, and call remote_node_down() for them. * Unfortunately, the controller doesn't have a simple, reliable way * to map hosts to guests. It might be possible to track this in the - * peer cache via crm_remote_peer_cache_refresh(). For now, we rely - * on the scheduler creating fence pseudo-events for the guests. + * peer cache via refresh_remote_nodes(). For now, we rely on the + * scheduler creating fence pseudo-events for the guests. */ if (!pcmk__str_eq(client, te_client_id, pcmk__str_casei)) { /* Abort the current transition if it wasn't the cluster that * initiated fencing. */ crm_info("External fencing operation from %s fenced %s", client, event->target); abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart, "External Fencing Operation", NULL); } } else if (pcmk__str_eq(controld_globals.dc_name, event->target, pcmk__str_null_matches|pcmk__str_casei) && !pcmk_is_set(peer->flags, crm_remote_node)) { // Assume the target was our DC if we don't currently have one if (controld_globals.dc_name != NULL) { crm_notice("Fencing target %s was our DC", event->target); } else { crm_notice("Fencing target %s may have been our DC", event->target); } /* Given the CIB resyncing that occurs around elections, * have one node update the CIB now and, if the new DC is different, * have them do so too after the election */ if (pcmk__str_eq(event->executioner, controld_globals.our_nodename, pcmk__str_casei)) { send_stonith_update(NULL, event->target, uuid); } add_stonith_cleanup(event->target); } /* If the target is a remote node, and we host its connection, * immediately fail all monitors so it can be recovered quickly. * The connection won't necessarily drop when a remote node is fenced, * so the failure might not otherwise be detected until the next poke. */ if (pcmk_is_set(peer->flags, crm_remote_node)) { remote_ra_fail(event->target); } crmd_peer_down(peer, TRUE); } } /*! * \brief Connect to fencer * * \param[in] user_data If NULL, retry failures now, otherwise retry in mainloop timer * * \return G_SOURCE_REMOVE on success, G_SOURCE_CONTINUE to retry * \note If user_data is NULL, this will wait 2s between attempts, for up to * 30 attempts, meaning the controller could be blocked as long as 58s. */ gboolean controld_timer_fencer_connect(gpointer user_data) { int rc = pcmk_ok; if (stonith_api == NULL) { stonith_api = stonith_api_new(); if (stonith_api == NULL) { crm_err("Could not connect to fencer: API memory allocation failed"); return G_SOURCE_REMOVE; } } if (stonith_api->state != stonith_disconnected) { crm_trace("Already connected to fencer, no need to retry"); return G_SOURCE_REMOVE; } if (user_data == NULL) { // Blocking (retry failures now until successful) rc = stonith_api_connect_retry(stonith_api, crm_system_name, 30); if (rc != pcmk_ok) { crm_err("Could not connect to fencer in 30 attempts: %s " CRM_XS " rc=%d", pcmk_strerror(rc), rc); } } else { // Non-blocking (retry failures later in main loop) rc = stonith_api->cmds->connect(stonith_api, crm_system_name, NULL); if (controld_fencer_connect_timer == NULL) { controld_fencer_connect_timer = mainloop_timer_add("controld_fencer_connect", 1000, TRUE, controld_timer_fencer_connect, GINT_TO_POINTER(TRUE)); } if (rc != pcmk_ok) { if (pcmk_is_set(controld_globals.fsa_input_register, R_ST_REQUIRED)) { crm_notice("Fencer connection failed (will retry): %s " CRM_XS " rc=%d", pcmk_strerror(rc), rc); if (!mainloop_timer_running(controld_fencer_connect_timer)) { mainloop_timer_start(controld_fencer_connect_timer); } return G_SOURCE_CONTINUE; } else { crm_info("Fencer connection failed (ignoring because no longer required): %s " CRM_XS " rc=%d", pcmk_strerror(rc), rc); } return G_SOURCE_REMOVE; } } if (rc == pcmk_ok) { stonith_api_operations_t *cmds = stonith_api->cmds; cmds->register_notification(stonith_api, PCMK__VALUE_ST_NOTIFY_DISCONNECT, tengine_stonith_connection_destroy); cmds->register_notification(stonith_api, PCMK__VALUE_ST_NOTIFY_FENCE, handle_fence_notification); cmds->register_notification(stonith_api, PCMK__VALUE_ST_NOTIFY_HISTORY_SYNCED, tengine_stonith_history_synced); te_trigger_stonith_history_sync(TRUE); crm_notice("Fencer successfully connected"); } return G_SOURCE_REMOVE; } void controld_disconnect_fencer(bool destroy) { if (stonith_api) { // Prevent fencer connection from coming up again controld_clear_fsa_input_flags(R_ST_REQUIRED); if (stonith_api->state != stonith_disconnected) { stonith_api->cmds->disconnect(stonith_api); } stonith_api->cmds->remove_notification(stonith_api, NULL); } if (destroy) { if (stonith_api) { stonith_api->cmds->free(stonith_api); stonith_api = NULL; } if (controld_fencer_connect_timer) { mainloop_timer_del(controld_fencer_connect_timer); controld_fencer_connect_timer = NULL; } if (te_client_id) { free(te_client_id); te_client_id = NULL; } } } static gboolean do_stonith_history_sync(gpointer user_data) { if (stonith_api && (stonith_api->state != stonith_disconnected)) { stonith_history_t *history = NULL; te_cleanup_stonith_history_sync(stonith_api, FALSE); stonith_api->cmds->history(stonith_api, st_opt_sync_call | st_opt_broadcast, NULL, &history, 5); stonith_history_free(history); return TRUE; } else { crm_info("Skip triggering stonith history-sync as stonith is disconnected"); return FALSE; } } static void tengine_stonith_callback(stonith_t *stonith, stonith_callback_data_t *data) { char *uuid = NULL; int stonith_id = -1; int transition_id = -1; pcmk__graph_action_t *action = NULL; const char *target = NULL; if ((data == NULL) || (data->userdata == NULL)) { crm_err("Ignoring fence operation %d result: " "No transition key given (bug?)", ((data == NULL)? -1 : data->call_id)); return; } if (!AM_I_DC) { const char *reason = stonith__exit_reason(data); if (reason == NULL) { reason = pcmk_exec_status_str(stonith__execution_status(data)); } crm_notice("Result of fence operation %d: %d (%s) " CRM_XS " key=%s", data->call_id, stonith__exit_status(data), reason, (const char *) data->userdata); return; } CRM_CHECK(decode_transition_key(data->userdata, &uuid, &transition_id, &stonith_id, NULL), goto bail); if (controld_globals.transition_graph->complete || (stonith_id < 0) || !pcmk__str_eq(uuid, controld_globals.te_uuid, pcmk__str_none) || (controld_globals.transition_graph->id != transition_id)) { crm_info("Ignoring fence operation %d result: " "Not from current transition " CRM_XS " complete=%s action=%d uuid=%s (vs %s) transition=%d (vs %d)", data->call_id, pcmk__btoa(controld_globals.transition_graph->complete), stonith_id, uuid, controld_globals.te_uuid, transition_id, controld_globals.transition_graph->id); goto bail; } action = controld_get_action(stonith_id); if (action == NULL) { crm_err("Ignoring fence operation %d result: " "Action %d not found in transition graph (bug?) " CRM_XS " uuid=%s transition=%d", data->call_id, stonith_id, uuid, transition_id); goto bail; } target = crm_element_value(action->xml, PCMK__META_ON_NODE); if (target == NULL) { crm_err("Ignoring fence operation %d result: No target given (bug?)", data->call_id); goto bail; } stop_te_timer(action); if (stonith__exit_status(data) == CRM_EX_OK) { const char *uuid = crm_element_value(action->xml, PCMK__META_ON_NODE_UUID); const char *op = crm_meta_value(action->params, PCMK__META_STONITH_ACTION); crm_info("Fence operation %d for %s succeeded", data->call_id, target); if (!(pcmk_is_set(action->flags, pcmk__graph_action_confirmed))) { te_action_confirmed(action, NULL); if (pcmk__str_eq(PCMK_ACTION_ON, op, pcmk__str_casei)) { const char *value = NULL; char *now = pcmk__ttoa(time(NULL)); gboolean is_remote_node = FALSE; /* This check is not 100% reliable, since this node is not * guaranteed to have the remote node cached. However, it * doesn't have to be reliable, since the attribute manager can * learn a node's "remoteness" by other means sooner or later. * This allows it to learn more quickly if this node does have * the information. */ if (g_hash_table_lookup(crm_remote_peer_cache, uuid) != NULL) { is_remote_node = TRUE; } update_attrd(target, CRM_ATTR_UNFENCED, now, NULL, is_remote_node); free(now); value = crm_meta_value(action->params, PCMK__META_DIGESTS_ALL); update_attrd(target, CRM_ATTR_DIGESTS_ALL, value, NULL, is_remote_node); value = crm_meta_value(action->params, PCMK__META_DIGESTS_SECURE); update_attrd(target, CRM_ATTR_DIGESTS_SECURE, value, NULL, is_remote_node); } else if (!(pcmk_is_set(action->flags, pcmk__graph_action_sent_update))) { send_stonith_update(action, target, uuid); pcmk__set_graph_action_flags(action, pcmk__graph_action_sent_update); } } st_fail_count_reset(target); } else { enum pcmk__graph_next abort_action = pcmk__graph_restart; int status = stonith__execution_status(data); const char *reason = stonith__exit_reason(data); if (reason == NULL) { if (status == PCMK_EXEC_DONE) { reason = "Agent returned error"; } else { reason = pcmk_exec_status_str(status); } } pcmk__set_graph_action_flags(action, pcmk__graph_action_failed); /* If no fence devices were available, there's no use in immediately * checking again, so don't start a new transition in that case. */ if (status == PCMK_EXEC_NO_FENCE_DEVICE) { crm_warn("Fence operation %d for %s failed: %s " "(aborting transition and giving up for now)", data->call_id, target, reason); abort_action = pcmk__graph_wait; } else { crm_notice("Fence operation %d for %s failed: %s " "(aborting transition)", data->call_id, target, reason); } /* Increment the fail count now, so abort_for_stonith_failure() can * check it. Non-DC nodes will increment it in * handle_fence_notification(). */ st_fail_count_increment(target); abort_for_stonith_failure(abort_action, target, NULL); } pcmk__update_graph(controld_globals.transition_graph, action); trigger_graph(); bail: free(data->userdata); free(uuid); return; } static int fence_with_delay(const char *target, const char *type, int delay) { uint32_t options = st_opt_none; // Group of enum stonith_call_options int timeout_sec = (int) (controld_globals.transition_graph->stonith_timeout / 1000); if (crmd_join_phase_count(crm_join_confirmed) == 1) { stonith__set_call_options(options, target, st_opt_allow_suicide); } return stonith_api->cmds->fence_with_delay(stonith_api, options, target, type, timeout_sec, 0, delay); } /*! * \internal * \brief Execute a fencing action from a transition graph * * \param[in] graph Transition graph being executed (ignored) * \param[in] action Fencing action to execute * * \return Standard Pacemaker return code */ int controld_execute_fence_action(pcmk__graph_t *graph, pcmk__graph_action_t *action) { int rc = 0; const char *id = pcmk__xe_id(action->xml); const char *uuid = crm_element_value(action->xml, PCMK__META_ON_NODE_UUID); const char *target = crm_element_value(action->xml, PCMK__META_ON_NODE); const char *type = crm_meta_value(action->params, PCMK__META_STONITH_ACTION); char *transition_key = NULL; const char *priority_delay = NULL; int delay_i = 0; gboolean invalid_action = FALSE; int stonith_timeout = (int) (controld_globals.transition_graph->stonith_timeout / 1000); CRM_CHECK(id != NULL, invalid_action = TRUE); CRM_CHECK(uuid != NULL, invalid_action = TRUE); CRM_CHECK(type != NULL, invalid_action = TRUE); CRM_CHECK(target != NULL, invalid_action = TRUE); if (invalid_action) { crm_log_xml_warn(action->xml, "BadAction"); return EPROTO; } priority_delay = crm_meta_value(action->params, PCMK_OPT_PRIORITY_FENCING_DELAY); crm_notice("Requesting fencing (%s) targeting node %s " CRM_XS " action=%s timeout=%i%s%s", type, target, id, stonith_timeout, priority_delay ? " priority_delay=" : "", priority_delay ? priority_delay : ""); /* Passing NULL means block until we can connect... */ controld_timer_fencer_connect(NULL); pcmk__scan_min_int(priority_delay, &delay_i, 0); rc = fence_with_delay(target, type, delay_i); transition_key = pcmk__transition_key(controld_globals.transition_graph->id, action->id, 0, controld_globals.te_uuid), stonith_api->cmds->register_callback(stonith_api, rc, (stonith_timeout + (delay_i > 0 ? delay_i : 0)), st_opt_timeout_updates, transition_key, "tengine_stonith_callback", tengine_stonith_callback); return pcmk_rc_ok; } bool controld_verify_stonith_watchdog_timeout(const char *value) { long long st_timeout = (value != NULL)? crm_get_msec(value) : 0; const char *our_nodename = controld_globals.our_nodename; if (st_timeout == 0 || (stonith_api && (stonith_api->state != stonith_disconnected) && stonith__watchdog_fencing_enabled_for_node_api(stonith_api, our_nodename))) { return pcmk__valid_stonith_watchdog_timeout(value); } return true; } /* end stonith API client functions */ /* * stonith history synchronization * * Each node's fencer keeps track of a cluster-wide fencing history. When a node * joins or leaves, we need to synchronize the history across all nodes. */ static crm_trigger_t *stonith_history_sync_trigger = NULL; static mainloop_timer_t *stonith_history_sync_timer_short = NULL; static mainloop_timer_t *stonith_history_sync_timer_long = NULL; void te_cleanup_stonith_history_sync(stonith_t *st, bool free_timers) { if (free_timers) { mainloop_timer_del(stonith_history_sync_timer_short); stonith_history_sync_timer_short = NULL; mainloop_timer_del(stonith_history_sync_timer_long); stonith_history_sync_timer_long = NULL; } else { mainloop_timer_stop(stonith_history_sync_timer_short); mainloop_timer_stop(stonith_history_sync_timer_long); } if (st) { st->cmds->remove_notification(st, PCMK__VALUE_ST_NOTIFY_HISTORY_SYNCED); } } static void tengine_stonith_history_synced(stonith_t *st, stonith_event_t *st_event) { te_cleanup_stonith_history_sync(st, FALSE); crm_debug("Fence-history synced - cancel all timers"); } static gboolean stonith_history_sync_set_trigger(gpointer user_data) { mainloop_set_trigger(stonith_history_sync_trigger); return FALSE; } void te_trigger_stonith_history_sync(bool long_timeout) { /* trigger a sync in 5s to give more nodes the * chance to show up so that we don't create * unnecessary stonith-history-sync traffic * * the long timeout of 30s is there as a fallback * so that after a successful connection to fenced * we will wait for 30s for the DC to trigger a * history-sync * if this doesn't happen we trigger a sync locally * (e.g. fenced segfaults and is restarted by pacemakerd) */ /* as we are finally checking the stonith-connection * in do_stonith_history_sync we should be fine * leaving stonith_history_sync_time & stonith_history_sync_trigger * around */ if (stonith_history_sync_trigger == NULL) { stonith_history_sync_trigger = mainloop_add_trigger(G_PRIORITY_LOW, do_stonith_history_sync, NULL); } if (long_timeout) { if(stonith_history_sync_timer_long == NULL) { stonith_history_sync_timer_long = mainloop_timer_add("history_sync_long", 30000, FALSE, stonith_history_sync_set_trigger, NULL); } crm_info("Fence history will be synchronized cluster-wide within 30 seconds"); mainloop_timer_start(stonith_history_sync_timer_long); } else { if(stonith_history_sync_timer_short == NULL) { stonith_history_sync_timer_short = mainloop_timer_add("history_sync_short", 5000, FALSE, stonith_history_sync_set_trigger, NULL); } crm_info("Fence history will be synchronized cluster-wide within 5 seconds"); mainloop_timer_start(stonith_history_sync_timer_short); } } /* end stonith history synchronization functions */ diff --git a/daemons/controld/controld_fsa.c b/daemons/controld/controld_fsa.c index 2840349815..79b3507ce5 100644 --- a/daemons/controld/controld_fsa.c +++ b/daemons/controld/controld_fsa.c @@ -1,733 +1,733 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include // uint64_t #include #include #include #include #include #include #include #include #include //! Triggers an FSA invocation static crm_trigger_t *fsa_trigger = NULL; #define DOT_PREFIX "actions:trace: " #define do_dot_log(fmt, args...) crm_trace( fmt, ##args) static void do_state_transition(enum crmd_fsa_state cur_state, enum crmd_fsa_state next_state, fsa_data_t *msg_data); void s_crmd_fsa_actions(fsa_data_t * fsa_data); void log_fsa_input(fsa_data_t * stored_msg); void init_dotfile(void); void init_dotfile(void) { do_dot_log(DOT_PREFIX "digraph \"g\" {"); do_dot_log(DOT_PREFIX " size = \"30,30\""); do_dot_log(DOT_PREFIX " graph ["); do_dot_log(DOT_PREFIX " fontsize = \"12\""); do_dot_log(DOT_PREFIX " fontname = \"Times-Roman\""); do_dot_log(DOT_PREFIX " fontcolor = \"black\""); do_dot_log(DOT_PREFIX " bb = \"0,0,398.922306,478.927856\""); do_dot_log(DOT_PREFIX " color = \"black\""); do_dot_log(DOT_PREFIX " ]"); do_dot_log(DOT_PREFIX " node ["); do_dot_log(DOT_PREFIX " fontsize = \"12\""); do_dot_log(DOT_PREFIX " fontname = \"Times-Roman\""); do_dot_log(DOT_PREFIX " fontcolor = \"black\""); do_dot_log(DOT_PREFIX " shape = \"ellipse\""); do_dot_log(DOT_PREFIX " color = \"black\""); do_dot_log(DOT_PREFIX " ]"); do_dot_log(DOT_PREFIX " edge ["); do_dot_log(DOT_PREFIX " fontsize = \"12\""); do_dot_log(DOT_PREFIX " fontname = \"Times-Roman\""); do_dot_log(DOT_PREFIX " fontcolor = \"black\""); do_dot_log(DOT_PREFIX " color = \"black\""); do_dot_log(DOT_PREFIX " ]"); do_dot_log(DOT_PREFIX "// special nodes"); do_dot_log(DOT_PREFIX " \"S_PENDING\" "); do_dot_log(DOT_PREFIX " ["); do_dot_log(DOT_PREFIX " color = \"blue\""); do_dot_log(DOT_PREFIX " fontcolor = \"blue\""); do_dot_log(DOT_PREFIX " ]"); do_dot_log(DOT_PREFIX " \"S_TERMINATE\" "); do_dot_log(DOT_PREFIX " ["); do_dot_log(DOT_PREFIX " color = \"red\""); do_dot_log(DOT_PREFIX " fontcolor = \"red\""); do_dot_log(DOT_PREFIX " ]"); do_dot_log(DOT_PREFIX "// DC only nodes"); do_dot_log(DOT_PREFIX " \"S_INTEGRATION\" [ fontcolor = \"green\" ]"); do_dot_log(DOT_PREFIX " \"S_POLICY_ENGINE\" [ fontcolor = \"green\" ]"); do_dot_log(DOT_PREFIX " \"S_TRANSITION_ENGINE\" [ fontcolor = \"green\" ]"); do_dot_log(DOT_PREFIX " \"S_RELEASE_DC\" [ fontcolor = \"green\" ]"); do_dot_log(DOT_PREFIX " \"S_IDLE\" [ fontcolor = \"green\" ]"); } static void do_fsa_action(fsa_data_t * fsa_data, long long an_action, void (*function) (long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t * msg_data)) { controld_clear_fsa_action_flags(an_action); crm_trace(DOT_PREFIX "\t// %s", fsa_action2string(an_action)); function(an_action, fsa_data->fsa_cause, controld_globals.fsa_state, fsa_data->fsa_input, fsa_data); } static const uint64_t startup_actions = A_STARTUP | A_CIB_START | A_LRM_CONNECT | A_HA_CONNECT | A_READCONFIG | A_STARTED | A_CL_JOIN_QUERY; // A_LOG, A_WARN, A_ERROR void do_log(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data) { unsigned log_type = LOG_TRACE; if (action & A_LOG) { log_type = LOG_INFO; } else if (action & A_WARN) { log_type = LOG_WARNING; } else if (action & A_ERROR) { log_type = LOG_ERR; } do_crm_log(log_type, "Input %s received in state %s from %s", fsa_input2string(msg_data->fsa_input), fsa_state2string(cur_state), msg_data->origin); if (msg_data->data_type == fsa_dt_ha_msg) { ha_msg_input_t *input = fsa_typed_data(msg_data->data_type); crm_log_xml_debug(input->msg, __func__); } else if (msg_data->data_type == fsa_dt_xml) { xmlNode *input = fsa_typed_data(msg_data->data_type); crm_log_xml_debug(input, __func__); } else if (msg_data->data_type == fsa_dt_lrm) { lrmd_event_data_t *input = fsa_typed_data(msg_data->data_type); do_crm_log(log_type, "Resource %s: Call ID %d returned %d (%d)." " New status if rc=0: %s", input->rsc_id, input->call_id, input->rc, input->op_status, (char *)input->user_data); } } /*! * \internal * \brief Initialize the FSA trigger */ void controld_init_fsa_trigger(void) { fsa_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, crm_fsa_trigger, NULL); } /*! * \internal * \brief Destroy the FSA trigger */ void controld_destroy_fsa_trigger(void) { // This basically will not work, since mainloop has a reference to it mainloop_destroy_trigger(fsa_trigger); fsa_trigger = NULL; } /*! * \internal * \brief Trigger an FSA invocation * * \param[in] fn Calling function name * \param[in] line Line number where call occurred */ void controld_trigger_fsa_as(const char *fn, int line) { if (fsa_trigger != NULL) { crm_trace("%s:%d - Triggered FSA invocation", fn, line); mainloop_set_trigger(fsa_trigger); } } enum crmd_fsa_state s_crmd_fsa(enum crmd_fsa_cause cause) { controld_globals_t *globals = &controld_globals; fsa_data_t *fsa_data = NULL; uint64_t register_copy = controld_globals.fsa_input_register; uint64_t new_actions = A_NOTHING; enum crmd_fsa_state last_state; crm_trace("FSA invoked with Cause: %s\tState: %s", fsa_cause2string(cause), fsa_state2string(globals->fsa_state)); fsa_dump_actions(controld_globals.fsa_actions, "Initial"); controld_clear_global_flags(controld_fsa_is_stalled); if ((controld_globals.fsa_message_queue == NULL) && (controld_globals.fsa_actions != A_NOTHING)) { /* fake the first message so we can get into the loop */ fsa_data = pcmk__assert_alloc(1, sizeof(fsa_data_t)); fsa_data->fsa_input = I_NULL; fsa_data->fsa_cause = C_FSA_INTERNAL; fsa_data->origin = __func__; fsa_data->data_type = fsa_dt_none; controld_globals.fsa_message_queue = g_list_append(controld_globals.fsa_message_queue, fsa_data); } while ((controld_globals.fsa_message_queue != NULL) && !pcmk_is_set(controld_globals.flags, controld_fsa_is_stalled)) { crm_trace("Checking messages (%d remaining)", g_list_length(controld_globals.fsa_message_queue)); fsa_data = get_message(); if(fsa_data == NULL) { continue; } log_fsa_input(fsa_data); /* add any actions back to the queue */ controld_set_fsa_action_flags(fsa_data->actions); fsa_dump_actions(fsa_data->actions, "Restored actions"); /* get the next batch of actions */ new_actions = controld_fsa_get_action(fsa_data->fsa_input); controld_set_fsa_action_flags(new_actions); fsa_dump_actions(new_actions, "New actions"); if (fsa_data->fsa_input != I_NULL && fsa_data->fsa_input != I_ROUTER) { crm_debug("Processing %s: [ state=%s cause=%s origin=%s ]", fsa_input2string(fsa_data->fsa_input), fsa_state2string(globals->fsa_state), fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin); } /* logging : *before* the state is changed */ if (pcmk_is_set(controld_globals.fsa_actions, A_ERROR)) { do_fsa_action(fsa_data, A_ERROR, do_log); } if (pcmk_is_set(controld_globals.fsa_actions, A_WARN)) { do_fsa_action(fsa_data, A_WARN, do_log); } if (pcmk_is_set(controld_globals.fsa_actions, A_LOG)) { do_fsa_action(fsa_data, A_LOG, do_log); } /* update state variables */ last_state = globals->fsa_state; globals->fsa_state = controld_fsa_get_next_state(fsa_data->fsa_input); /* * Remove certain actions during shutdown */ if ((globals->fsa_state == S_STOPPING) || pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { controld_clear_fsa_action_flags(startup_actions); } /* * Hook for change of state. * Allows actions to be added or removed when entering a state */ if (last_state != globals->fsa_state) { do_state_transition(last_state, globals->fsa_state, fsa_data); } else { do_dot_log(DOT_PREFIX "\t// FSA input: State=%s \tCause=%s" " \tInput=%s \tOrigin=%s() \tid=%d", fsa_state2string(globals->fsa_state), fsa_cause2string(fsa_data->fsa_cause), fsa_input2string(fsa_data->fsa_input), fsa_data->origin, fsa_data->id); } /* start doing things... */ s_crmd_fsa_actions(fsa_data); delete_fsa_input(fsa_data); } if ((controld_globals.fsa_message_queue != NULL) || (controld_globals.fsa_actions != A_NOTHING) || pcmk_is_set(controld_globals.flags, controld_fsa_is_stalled)) { crm_debug("Exiting the FSA: queue=%d, fsa_actions=%#llx, stalled=%s", g_list_length(controld_globals.fsa_message_queue), (unsigned long long) controld_globals.fsa_actions, pcmk__flag_text(controld_globals.flags, controld_fsa_is_stalled)); } else { crm_trace("Exiting the FSA"); } /* cleanup inputs? */ if (register_copy != controld_globals.fsa_input_register) { uint64_t same = register_copy & controld_globals.fsa_input_register; fsa_dump_inputs(LOG_DEBUG, "Added", controld_globals.fsa_input_register ^ same); fsa_dump_inputs(LOG_DEBUG, "Removed", register_copy ^ same); } fsa_dump_actions(controld_globals.fsa_actions, "Remaining"); fsa_dump_queue(LOG_DEBUG); return globals->fsa_state; } void s_crmd_fsa_actions(fsa_data_t * fsa_data) { /* * Process actions in order of priority but do only one * action at a time to avoid complicating the ordering. */ CRM_CHECK(fsa_data != NULL, return); while ((controld_globals.fsa_actions != A_NOTHING) && !pcmk_is_set(controld_globals.flags, controld_fsa_is_stalled)) { /* regular action processing in order of action priority * * Make sure all actions that connect to required systems * are performed first */ if (pcmk_is_set(controld_globals.fsa_actions, A_ERROR)) { do_fsa_action(fsa_data, A_ERROR, do_log); } else if (pcmk_is_set(controld_globals.fsa_actions, A_WARN)) { do_fsa_action(fsa_data, A_WARN, do_log); } else if (pcmk_is_set(controld_globals.fsa_actions, A_LOG)) { do_fsa_action(fsa_data, A_LOG, do_log); /* get out of here NOW! before anything worse happens */ } else if (pcmk_is_set(controld_globals.fsa_actions, A_EXIT_1)) { do_fsa_action(fsa_data, A_EXIT_1, do_exit); /* sub-system restart */ } else if (pcmk_all_flags_set(controld_globals.fsa_actions, O_LRM_RECONNECT)) { do_fsa_action(fsa_data, O_LRM_RECONNECT, do_lrm_control); } else if (pcmk_all_flags_set(controld_globals.fsa_actions, O_CIB_RESTART)) { do_fsa_action(fsa_data, O_CIB_RESTART, do_cib_control); } else if (pcmk_all_flags_set(controld_globals.fsa_actions, O_PE_RESTART)) { do_fsa_action(fsa_data, O_PE_RESTART, do_pe_control); } else if (pcmk_all_flags_set(controld_globals.fsa_actions, O_TE_RESTART)) { do_fsa_action(fsa_data, O_TE_RESTART, do_te_control); /* essential start tasks */ } else if (pcmk_is_set(controld_globals.fsa_actions, A_STARTUP)) { do_fsa_action(fsa_data, A_STARTUP, do_startup); } else if (pcmk_is_set(controld_globals.fsa_actions, A_CIB_START)) { do_fsa_action(fsa_data, A_CIB_START, do_cib_control); } else if (pcmk_is_set(controld_globals.fsa_actions, A_HA_CONNECT)) { do_fsa_action(fsa_data, A_HA_CONNECT, do_ha_control); } else if (pcmk_is_set(controld_globals.fsa_actions, A_READCONFIG)) { do_fsa_action(fsa_data, A_READCONFIG, do_read_config); /* sub-system start/connect */ } else if (pcmk_is_set(controld_globals.fsa_actions, A_LRM_CONNECT)) { do_fsa_action(fsa_data, A_LRM_CONNECT, do_lrm_control); } else if (pcmk_is_set(controld_globals.fsa_actions, A_TE_START)) { do_fsa_action(fsa_data, A_TE_START, do_te_control); } else if (pcmk_is_set(controld_globals.fsa_actions, A_PE_START)) { do_fsa_action(fsa_data, A_PE_START, do_pe_control); /* Timers */ } else if (pcmk_is_set(controld_globals.fsa_actions, A_DC_TIMER_STOP)) { do_fsa_action(fsa_data, A_DC_TIMER_STOP, do_timer_control); } else if (pcmk_is_set(controld_globals.fsa_actions, A_INTEGRATE_TIMER_STOP)) { do_fsa_action(fsa_data, A_INTEGRATE_TIMER_STOP, do_timer_control); } else if (pcmk_is_set(controld_globals.fsa_actions, A_INTEGRATE_TIMER_START)) { do_fsa_action(fsa_data, A_INTEGRATE_TIMER_START, do_timer_control); } else if (pcmk_is_set(controld_globals.fsa_actions, A_FINALIZE_TIMER_STOP)) { do_fsa_action(fsa_data, A_FINALIZE_TIMER_STOP, do_timer_control); } else if (pcmk_is_set(controld_globals.fsa_actions, A_FINALIZE_TIMER_START)) { do_fsa_action(fsa_data, A_FINALIZE_TIMER_START, do_timer_control); /* * Highest priority actions */ } else if (pcmk_is_set(controld_globals.fsa_actions, A_MSG_ROUTE)) { do_fsa_action(fsa_data, A_MSG_ROUTE, do_msg_route); } else if (pcmk_is_set(controld_globals.fsa_actions, A_RECOVER)) { do_fsa_action(fsa_data, A_RECOVER, do_recover); } else if (pcmk_is_set(controld_globals.fsa_actions, A_CL_JOIN_RESULT)) { do_fsa_action(fsa_data, A_CL_JOIN_RESULT, do_cl_join_finalize_respond); } else if (pcmk_is_set(controld_globals.fsa_actions, A_CL_JOIN_REQUEST)) { do_fsa_action(fsa_data, A_CL_JOIN_REQUEST, do_cl_join_offer_respond); } else if (pcmk_is_set(controld_globals.fsa_actions, A_SHUTDOWN_REQ)) { do_fsa_action(fsa_data, A_SHUTDOWN_REQ, do_shutdown_req); } else if (pcmk_is_set(controld_globals.fsa_actions, A_ELECTION_VOTE)) { do_fsa_action(fsa_data, A_ELECTION_VOTE, do_election_vote); } else if (pcmk_is_set(controld_globals.fsa_actions, A_ELECTION_COUNT)) { do_fsa_action(fsa_data, A_ELECTION_COUNT, do_election_count_vote); } else if (pcmk_is_set(controld_globals.fsa_actions, A_LRM_EVENT)) { do_fsa_action(fsa_data, A_LRM_EVENT, do_lrm_event); /* * High priority actions */ } else if (pcmk_is_set(controld_globals.fsa_actions, A_STARTED)) { do_fsa_action(fsa_data, A_STARTED, do_started); } else if (pcmk_is_set(controld_globals.fsa_actions, A_CL_JOIN_QUERY)) { do_fsa_action(fsa_data, A_CL_JOIN_QUERY, do_cl_join_query); } else if (pcmk_is_set(controld_globals.fsa_actions, A_DC_TIMER_START)) { do_fsa_action(fsa_data, A_DC_TIMER_START, do_timer_control); /* * Medium priority actions * - Membership */ } else if (pcmk_is_set(controld_globals.fsa_actions, A_DC_TAKEOVER)) { do_fsa_action(fsa_data, A_DC_TAKEOVER, do_dc_takeover); } else if (pcmk_is_set(controld_globals.fsa_actions, A_DC_RELEASE)) { do_fsa_action(fsa_data, A_DC_RELEASE, do_dc_release); } else if (pcmk_is_set(controld_globals.fsa_actions, A_DC_JOIN_FINAL)) { do_fsa_action(fsa_data, A_DC_JOIN_FINAL, do_dc_join_final); } else if (pcmk_is_set(controld_globals.fsa_actions, A_ELECTION_CHECK)) { do_fsa_action(fsa_data, A_ELECTION_CHECK, do_election_check); } else if (pcmk_is_set(controld_globals.fsa_actions, A_ELECTION_START)) { do_fsa_action(fsa_data, A_ELECTION_START, do_election_vote); } else if (pcmk_is_set(controld_globals.fsa_actions, A_DC_JOIN_OFFER_ALL)) { do_fsa_action(fsa_data, A_DC_JOIN_OFFER_ALL, do_dc_join_offer_all); } else if (pcmk_is_set(controld_globals.fsa_actions, A_DC_JOIN_OFFER_ONE)) { do_fsa_action(fsa_data, A_DC_JOIN_OFFER_ONE, do_dc_join_offer_one); } else if (pcmk_is_set(controld_globals.fsa_actions, A_DC_JOIN_PROCESS_REQ)) { do_fsa_action(fsa_data, A_DC_JOIN_PROCESS_REQ, do_dc_join_filter_offer); } else if (pcmk_is_set(controld_globals.fsa_actions, A_DC_JOIN_PROCESS_ACK)) { do_fsa_action(fsa_data, A_DC_JOIN_PROCESS_ACK, do_dc_join_ack); } else if (pcmk_is_set(controld_globals.fsa_actions, A_DC_JOIN_FINALIZE)) { do_fsa_action(fsa_data, A_DC_JOIN_FINALIZE, do_dc_join_finalize); } else if (pcmk_is_set(controld_globals.fsa_actions, A_CL_JOIN_ANNOUNCE)) { do_fsa_action(fsa_data, A_CL_JOIN_ANNOUNCE, do_cl_join_announce); /* * Low(er) priority actions * Make sure the CIB is always updated before invoking the * scheduler, and the scheduler before the transition engine. */ } else if (pcmk_is_set(controld_globals.fsa_actions, A_TE_HALT)) { do_fsa_action(fsa_data, A_TE_HALT, do_te_invoke); } else if (pcmk_is_set(controld_globals.fsa_actions, A_TE_CANCEL)) { do_fsa_action(fsa_data, A_TE_CANCEL, do_te_invoke); } else if (pcmk_is_set(controld_globals.fsa_actions, A_LRM_INVOKE)) { do_fsa_action(fsa_data, A_LRM_INVOKE, do_lrm_invoke); } else if (pcmk_is_set(controld_globals.fsa_actions, A_PE_INVOKE)) { do_fsa_action(fsa_data, A_PE_INVOKE, do_pe_invoke); } else if (pcmk_is_set(controld_globals.fsa_actions, A_TE_INVOKE)) { do_fsa_action(fsa_data, A_TE_INVOKE, do_te_invoke); /* Shutdown actions */ } else if (pcmk_is_set(controld_globals.fsa_actions, A_DC_RELEASED)) { do_fsa_action(fsa_data, A_DC_RELEASED, do_dc_release); } else if (pcmk_is_set(controld_globals.fsa_actions, A_PE_STOP)) { do_fsa_action(fsa_data, A_PE_STOP, do_pe_control); } else if (pcmk_is_set(controld_globals.fsa_actions, A_TE_STOP)) { do_fsa_action(fsa_data, A_TE_STOP, do_te_control); } else if (pcmk_is_set(controld_globals.fsa_actions, A_SHUTDOWN)) { do_fsa_action(fsa_data, A_SHUTDOWN, do_shutdown); } else if (pcmk_is_set(controld_globals.fsa_actions, A_LRM_DISCONNECT)) { do_fsa_action(fsa_data, A_LRM_DISCONNECT, do_lrm_control); } else if (pcmk_is_set(controld_globals.fsa_actions, A_HA_DISCONNECT)) { do_fsa_action(fsa_data, A_HA_DISCONNECT, do_ha_control); } else if (pcmk_is_set(controld_globals.fsa_actions, A_CIB_STOP)) { do_fsa_action(fsa_data, A_CIB_STOP, do_cib_control); } else if (pcmk_is_set(controld_globals.fsa_actions, A_STOP)) { do_fsa_action(fsa_data, A_STOP, do_stop); /* exit gracefully */ } else if (pcmk_is_set(controld_globals.fsa_actions, A_EXIT_0)) { do_fsa_action(fsa_data, A_EXIT_0, do_exit); /* Error checking and reporting */ } else { crm_err("Action %s not supported "CRM_XS" %#llx", fsa_action2string(controld_globals.fsa_actions), (unsigned long long) controld_globals.fsa_actions); register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, fsa_data, NULL, __func__); } } } void log_fsa_input(fsa_data_t * stored_msg) { CRM_ASSERT(stored_msg); crm_trace("Processing queued input %d", stored_msg->id); if (stored_msg->fsa_cause == C_LRM_OP_CALLBACK) { crm_trace("FSA processing LRM callback from %s", stored_msg->origin); } else if (stored_msg->data == NULL) { crm_trace("FSA processing input from %s", stored_msg->origin); } else { ha_msg_input_t *ha_input = fsa_typed_data_adv(stored_msg, fsa_dt_ha_msg, __func__); crm_trace("FSA processing XML message from %s", stored_msg->origin); crm_log_xml_trace(ha_input->xml, "FSA message data"); } } static void check_join_counts(fsa_data_t *msg_data) { int count; guint npeers; count = crmd_join_phase_count(crm_join_finalized); if (count > 0) { crm_err("%d cluster node%s failed to confirm join", count, pcmk__plural_s(count)); crmd_join_phase_log(LOG_NOTICE); return; } - npeers = crm_active_peers(); + npeers = pcmk__cluster_num_active_nodes(); count = crmd_join_phase_count(crm_join_confirmed); if (count == npeers) { if (npeers == 1) { crm_debug("Sole active cluster node is fully joined"); } else { crm_debug("All %d active cluster nodes are fully joined", count); } } else if (count > npeers) { crm_err("New election needed because more nodes confirmed join " "than are in membership (%d > %u)", count, npeers); register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL); } else if (controld_globals.membership_id != crm_peer_seq) { crm_info("New join needed because membership changed (%llu -> %llu)", controld_globals.membership_id, crm_peer_seq); register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL); } else { crm_warn("Only %d of %u active cluster nodes fully joined " "(%d did not respond to offer)", count, npeers, crmd_join_phase_count(crm_join_welcomed)); } } static void do_state_transition(enum crmd_fsa_state cur_state, enum crmd_fsa_state next_state, fsa_data_t *msg_data) { int level = LOG_INFO; int count = 0; gboolean clear_recovery_bit = TRUE; #if 0 uint64_t original_fsa_actions = controld_globals.fsa_actions; #endif enum crmd_fsa_cause cause = msg_data->fsa_cause; enum crmd_fsa_input current_input = msg_data->fsa_input; const char *state_from = fsa_state2string(cur_state); const char *state_to = fsa_state2string(next_state); const char *input = fsa_input2string(current_input); CRM_LOG_ASSERT(cur_state != next_state); do_dot_log(DOT_PREFIX "\t%s -> %s [ label=%s cause=%s origin=%s ]", state_from, state_to, input, fsa_cause2string(cause), msg_data->origin); if (cur_state == S_IDLE || next_state == S_IDLE) { level = LOG_NOTICE; } else if (cur_state == S_NOT_DC || next_state == S_NOT_DC) { level = LOG_NOTICE; } else if (cur_state == S_ELECTION) { level = LOG_NOTICE; } else if (cur_state == S_STARTING) { level = LOG_NOTICE; } else if (next_state == S_RECOVERY) { level = LOG_WARNING; } do_crm_log(level, "State transition %s -> %s " CRM_XS " input=%s cause=%s origin=%s", state_from, state_to, input, fsa_cause2string(cause), msg_data->origin); if (next_state != S_ELECTION && cur_state != S_RELEASE_DC) { controld_stop_current_election_timeout(); } if (next_state == S_INTEGRATION) { controld_set_fsa_action_flags(A_INTEGRATE_TIMER_START); } else { controld_set_fsa_action_flags(A_INTEGRATE_TIMER_STOP); } if (next_state == S_FINALIZE_JOIN) { controld_set_fsa_action_flags(A_FINALIZE_TIMER_START); } else { controld_set_fsa_action_flags(A_FINALIZE_TIMER_STOP); } if (next_state != S_PENDING) { controld_set_fsa_action_flags(A_DC_TIMER_STOP); } if (next_state != S_IDLE) { controld_stop_recheck_timer(); } if (cur_state == S_FINALIZE_JOIN && next_state == S_POLICY_ENGINE) { populate_cib_nodes(node_update_quick|node_update_all, __func__); } switch (next_state) { case S_PENDING: { cib_t *cib_conn = controld_globals.cib_conn; cib_conn->cmds->set_secondary(cib_conn, cib_scope_local); } update_dc(NULL); break; case S_ELECTION: update_dc(NULL); break; case S_NOT_DC: controld_reset_counter_election_timer(); purge_stonith_cleanup(); if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { crm_info("(Re)Issuing shutdown request now" " that we have a new DC"); controld_set_fsa_action_flags(A_SHUTDOWN_REQ); } CRM_LOG_ASSERT(controld_globals.dc_name != NULL); if (controld_globals.dc_name == NULL) { crm_err("Reached S_NOT_DC without a DC" " being recorded"); } break; case S_RECOVERY: clear_recovery_bit = FALSE; break; case S_FINALIZE_JOIN: CRM_LOG_ASSERT(AM_I_DC); if (cause == C_TIMER_POPPED) { crm_warn("Progressed to state %s after %s", fsa_state2string(next_state), fsa_cause2string(cause)); } count = crmd_join_phase_count(crm_join_welcomed); if (count > 0) { crm_warn("%d cluster node%s failed to respond to join offer", count, pcmk__plural_s(count)); crmd_join_phase_log(LOG_NOTICE); } else { crm_debug("All cluster nodes (%d) responded to join offer", crmd_join_phase_count(crm_join_integrated)); } break; case S_POLICY_ENGINE: controld_reset_counter_election_timer(); CRM_LOG_ASSERT(AM_I_DC); if (cause == C_TIMER_POPPED) { crm_info("Progressed to state %s after %s", fsa_state2string(next_state), fsa_cause2string(cause)); } check_join_counts(msg_data); break; case S_STOPPING: case S_TERMINATE: /* possibly redundant */ controld_set_fsa_input_flags(R_SHUTDOWN); break; case S_IDLE: CRM_LOG_ASSERT(AM_I_DC); if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { crm_info("(Re)Issuing shutdown request now" " that we are the DC"); controld_set_fsa_action_flags(A_SHUTDOWN_REQ); } controld_start_recheck_timer(); break; default: break; } if (clear_recovery_bit && next_state != S_PENDING) { controld_clear_fsa_action_flags(A_RECOVER); } else if (clear_recovery_bit == FALSE) { controld_set_fsa_action_flags(A_RECOVER); } #if 0 if (original_fsa_actions != controld_globals.fsa_actions) { fsa_dump_actions(original_fsa_actions ^ controld_globals.fsa_actions, "New actions"); } #endif } diff --git a/daemons/controld/controld_join_client.c b/daemons/controld/controld_join_client.c index 0a6bc218e1..97e6685a28 100644 --- a/daemons/controld/controld_join_client.c +++ b/daemons/controld/controld_join_client.c @@ -1,366 +1,366 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include void join_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data); extern ha_msg_input_t *copy_ha_msg_input(ha_msg_input_t * orig); /*! * \internal * \brief Remember if DC is shutting down as we join * * If we're joining while the current DC is shutting down, update its expected * state, so we don't fence it if we become the new DC. (We weren't a peer * when it broadcast its shutdown request.) * * \param[in] msg A join message from the DC */ static void update_dc_expected(const xmlNode *msg) { if ((controld_globals.dc_name != NULL) && pcmk__xe_attr_is_true(msg, PCMK__XA_DC_LEAVING)) { crm_node_t *dc_node = pcmk__get_node(0, controld_globals.dc_name, NULL, pcmk__node_search_cluster); pcmk__update_peer_expected(__func__, dc_node, CRMD_JOINSTATE_DOWN); } } /* A_CL_JOIN_QUERY */ /* is there a DC out there? */ void do_cl_join_query(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { xmlNode *req = create_request(CRM_OP_JOIN_ANNOUNCE, NULL, NULL, CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL); sleep(1); // Give the cluster layer time to propagate to the DC update_dc(NULL); /* Unset any existing value so that the result is not discarded */ crm_debug("Querying for a DC"); send_cluster_message(NULL, crm_msg_crmd, req, FALSE); free_xml(req); } /* A_CL_JOIN_ANNOUNCE */ /* this is kind of a workaround for the fact that we may not be around or * are otherwise unable to reply when the DC sends out A_DC_JOIN_OFFER_ALL */ void do_cl_join_announce(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { /* don't announce if we're in one of these states */ if (cur_state != S_PENDING) { crm_warn("Not announcing cluster join because in state %s", fsa_state2string(cur_state)); return; } if (!pcmk_is_set(controld_globals.fsa_input_register, R_STARTING)) { /* send as a broadcast */ xmlNode *req = create_request(CRM_OP_JOIN_ANNOUNCE, NULL, NULL, CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL); crm_debug("Announcing availability"); update_dc(NULL); send_cluster_message(NULL, crm_msg_crmd, req, FALSE); free_xml(req); } else { /* Delay announce until we have finished local startup */ crm_warn("Delaying announce of cluster join until local startup is complete"); return; } } static int query_call_id = 0; /* A_CL_JOIN_REQUEST */ /* aka. accept the welcome offer */ void do_cl_join_offer_respond(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { cib_t *cib_conn = controld_globals.cib_conn; ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); const char *welcome_from; const char *join_id; CRM_CHECK(input != NULL, return); welcome_from = crm_element_value(input->msg, PCMK__XA_SRC); join_id = crm_element_value(input->msg, PCMK__XA_JOIN_ID); crm_trace("Accepting cluster join offer from node %s "CRM_XS" join-%s", welcome_from, crm_element_value(input->msg, PCMK__XA_JOIN_ID)); /* we only ever want the last one */ if (query_call_id > 0) { crm_trace("Cancelling previous join query: %d", query_call_id); remove_cib_op_callback(query_call_id, FALSE); query_call_id = 0; } if (update_dc(input->msg) == FALSE) { crm_warn("Discarding cluster join offer from node %s (expected %s)", welcome_from, controld_globals.dc_name); return; } update_dc_expected(input->msg); query_call_id = cib_conn->cmds->query(cib_conn, NULL, NULL, cib_scope_local|cib_no_children); fsa_register_cib_callback(query_call_id, pcmk__str_copy(join_id), join_query_callback); crm_trace("Registered join query callback: %d", query_call_id); controld_set_fsa_action_flags(A_DC_TIMER_STOP); controld_trigger_fsa(); } void join_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { char *join_id = user_data; xmlNode *generation = pcmk__xe_create(NULL, PCMK__XE_GENERATION_TUPLE); CRM_LOG_ASSERT(join_id != NULL); if (query_call_id != call_id) { crm_trace("Query %d superseded", call_id); goto done; } query_call_id = 0; if(rc != pcmk_ok || output == NULL) { crm_err("Could not retrieve version details for join-%s: %s (%d)", join_id, pcmk_strerror(rc), rc); register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__); } else if (controld_globals.dc_name == NULL) { crm_debug("Membership is in flux, not continuing join-%s", join_id); } else { xmlNode *reply = NULL; crm_debug("Respond to join offer join-%s from %s", join_id, controld_globals.dc_name); pcmk__xe_copy_attrs(generation, output, pcmk__xaf_none); reply = create_request(CRM_OP_JOIN_REQUEST, generation, controld_globals.dc_name, CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL); crm_xml_add(reply, PCMK__XA_JOIN_ID, join_id); crm_xml_add(reply, PCMK_XA_CRM_FEATURE_SET, CRM_FEATURE_SET); send_cluster_message(pcmk__get_node(0, controld_globals.dc_name, NULL, pcmk__node_search_cluster), crm_msg_crmd, reply, TRUE); free_xml(reply); } done: free_xml(generation); } void set_join_state(const char *start_state, const char *node_name, const char *node_uuid, bool remote) { if (pcmk__str_eq(start_state, PCMK_VALUE_STANDBY, pcmk__str_casei)) { crm_notice("Forcing node %s to join in %s state per configured " "environment", node_name, start_state); cib__update_node_attr(controld_globals.logger_out, controld_globals.cib_conn, cib_sync_call, PCMK_XE_NODES, node_uuid, NULL, NULL, NULL, PCMK_NODE_ATTR_STANDBY, PCMK_VALUE_TRUE, NULL, (remote? PCMK_VALUE_REMOTE : NULL)); } else if (pcmk__str_eq(start_state, PCMK_VALUE_ONLINE, pcmk__str_casei)) { crm_notice("Forcing node %s to join in %s state per configured " "environment", node_name, start_state); cib__update_node_attr(controld_globals.logger_out, controld_globals.cib_conn, cib_sync_call, PCMK_XE_NODES, node_uuid, NULL, NULL, NULL, PCMK_NODE_ATTR_STANDBY, PCMK_VALUE_FALSE, NULL, (remote? PCMK_VALUE_REMOTE : NULL)); } else if (pcmk__str_eq(start_state, PCMK_VALUE_DEFAULT, pcmk__str_casei)) { crm_debug("Not forcing a starting state on node %s", node_name); } else { crm_warn("Unrecognized start state '%s', using " "'" PCMK_VALUE_DEFAULT "' (%s)", start_state, node_name); } } static int update_conn_host_cache(xmlNode *node, void *userdata) { const char *remote = crm_element_value(node, PCMK_XA_ID); const char *conn_host = crm_element_value(node, PCMK__XA_CONNECTION_HOST); const char *state = crm_element_value(node, PCMK__XA_NODE_STATE); - crm_node_t *remote_peer = crm_remote_peer_get(remote); + crm_node_t *remote_peer = pcmk__cluster_lookup_remote_node(remote); if (remote_peer == NULL) { return pcmk_rc_ok; } if (conn_host != NULL) { pcmk__str_update(&remote_peer->conn_host, conn_host); } if (state != NULL) { pcmk__update_peer_state(__func__, remote_peer, state, 0); } return pcmk_rc_ok; } /* A_CL_JOIN_RESULT */ /* aka. this is notification that we have (or have not) been accepted */ void do_cl_join_finalize_respond(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { xmlNode *tmp1 = NULL; gboolean was_nack = TRUE; static gboolean first_join = TRUE; ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); const char *start_state = pcmk__env_option(PCMK__ENV_NODE_START_STATE); int join_id = -1; const char *op = crm_element_value(input->msg, PCMK__XA_CRM_TASK); const char *welcome_from = crm_element_value(input->msg, PCMK__XA_SRC); if (!pcmk__str_eq(op, CRM_OP_JOIN_ACKNAK, pcmk__str_casei)) { crm_trace("Ignoring op=%s message", op); return; } /* calculate if it was an ack or a nack */ if (pcmk__xe_attr_is_true(input->msg, CRM_OP_JOIN_ACKNAK)) { was_nack = FALSE; } crm_element_value_int(input->msg, PCMK__XA_JOIN_ID, &join_id); if (was_nack) { crm_err("Shutting down because cluster join with leader %s failed " CRM_XS" join-%d NACK'd", welcome_from, join_id); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); controld_set_fsa_input_flags(R_STAYDOWN); return; } if (!AM_I_DC && pcmk__str_eq(welcome_from, controld_globals.our_nodename, pcmk__str_casei)) { crm_warn("Discarding our own welcome - we're no longer the DC"); return; } if (update_dc(input->msg) == FALSE) { crm_warn("Discarding %s from node %s (expected from %s)", op, welcome_from, controld_globals.dc_name); return; } update_dc_expected(input->msg); /* record the node's feature set as a transient attribute */ update_attrd(controld_globals.our_nodename, CRM_ATTR_FEATURE_SET, CRM_FEATURE_SET, NULL, FALSE); /* send our status section to the DC */ tmp1 = controld_query_executor_state(); if (tmp1 != NULL) { xmlNode *remotes = NULL; xmlNode *reply = create_request(CRM_OP_JOIN_CONFIRM, tmp1, controld_globals.dc_name, CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL); crm_xml_add_int(reply, PCMK__XA_JOIN_ID, join_id); crm_debug("Confirming join-%d: sending local operation history to %s", join_id, controld_globals.dc_name); /* * If this is the node's first join since the controller started on it, * set its initial state (standby or member) according to the user's * preference. * * We do not clear the LRM history here. Even if the DC failed to do it * when we last left, removing them here creates a race condition if the * controller is being recovered. Instead of a list of active resources * from the executor, we may end up with a blank status section. If we * are _NOT_ lucky, we will probe for the "wrong" instance of anonymous * clones and end up with multiple active instances on the machine. */ if (first_join && !pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { first_join = FALSE; if (start_state) { set_join_state(start_state, controld_globals.our_nodename, controld_globals.our_uuid, false); } } send_cluster_message(pcmk__get_node(0, controld_globals.dc_name, NULL, pcmk__node_search_cluster), crm_msg_crmd, reply, TRUE); free_xml(reply); if (AM_I_DC == FALSE) { register_fsa_input_adv(cause, I_NOT_DC, NULL, A_NOTHING, TRUE, __func__); } free_xml(tmp1); /* Update the remote node cache with information about which node * is hosting the connection. */ remotes = pcmk__xe_first_child(input->msg, PCMK_XE_NODES, NULL, NULL); if (remotes != NULL) { pcmk__xe_foreach_child(remotes, PCMK_XE_NODE, update_conn_host_cache, NULL); } } else { crm_err("Could not confirm join-%d with %s: Local operation history " "failed", join_id, controld_globals.dc_name); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } } diff --git a/daemons/controld/controld_join_dc.c b/daemons/controld/controld_join_dc.c index b3d9526db6..bf06422361 100644 --- a/daemons/controld/controld_join_dc.c +++ b/daemons/controld/controld_join_dc.c @@ -1,1045 +1,1045 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include static char *max_generation_from = NULL; static xmlNodePtr max_generation_xml = NULL; /*! * \internal * \brief Nodes from which a CIB sync has failed since the peer joined * * This table is of the form (node_name -> join_id). \p node_name is * the name of a client node from which a CIB \p sync_from() call has failed in * \p do_dc_join_finalize() since the client joined the cluster as a peer. * \p join_id is the ID of the join round in which the \p sync_from() failed, * and is intended for use in nack log messages. */ static GHashTable *failed_sync_nodes = NULL; void finalize_join_for(gpointer key, gpointer value, gpointer user_data); void finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data); gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source); /* Numeric counter used to identify join rounds (an unsigned int would be * appropriate, except we get and set it in XML as int) */ static int current_join_id = 0; /*! * \internal * \brief Destroy the hash table containing failed sync nodes */ void controld_destroy_failed_sync_table(void) { if (failed_sync_nodes != NULL) { g_hash_table_destroy(failed_sync_nodes); failed_sync_nodes = NULL; } } /*! * \internal * \brief Remove a node from the failed sync nodes table if present * * \param[in] node_name Node name to remove */ void controld_remove_failed_sync_node(const char *node_name) { if (failed_sync_nodes != NULL) { g_hash_table_remove(failed_sync_nodes, (gchar *) node_name); } } /*! * \internal * \brief Add to a hash table a node whose CIB failed to sync * * \param[in] node_name Name of node whose CIB failed to sync * \param[in] join_id Join round when the failure occurred */ static void record_failed_sync_node(const char *node_name, gint join_id) { if (failed_sync_nodes == NULL) { failed_sync_nodes = pcmk__strikey_table(g_free, NULL); } /* If the node is already in the table then we failed to nack it during the * filter offer step */ CRM_LOG_ASSERT(g_hash_table_insert(failed_sync_nodes, g_strdup(node_name), GINT_TO_POINTER(join_id))); } /*! * \internal * \brief Look up a node name in the failed sync table * * \param[in] node_name Name of node to look up * \param[out] join_id Where to store the join ID of when the sync failed * * \return Standard Pacemaker return code. Specifically, \p pcmk_rc_ok if the * node name was found, or \p pcmk_rc_node_unknown otherwise. * \note \p *join_id is set to -1 if the node is not found. */ static int lookup_failed_sync_node(const char *node_name, gint *join_id) { *join_id = -1; if (failed_sync_nodes != NULL) { gpointer result = g_hash_table_lookup(failed_sync_nodes, (gchar *) node_name); if (result != NULL) { *join_id = GPOINTER_TO_INT(result); return pcmk_rc_ok; } } return pcmk_rc_node_unknown; } void crm_update_peer_join(const char *source, crm_node_t * node, enum crm_join_phase phase) { enum crm_join_phase last = 0; CRM_CHECK(node != NULL, return); /* Remote nodes do not participate in joins */ if (pcmk_is_set(node->flags, crm_remote_node)) { return; } last = node->join; if(phase == last) { crm_trace("Node %s join-%d phase is still %s " CRM_XS " nodeid=%u source=%s", node->uname, current_join_id, crm_join_phase_str(last), node->id, source); } else if ((phase <= crm_join_none) || (phase == (last + 1))) { node->join = phase; crm_trace("Node %s join-%d phase is now %s (was %s) " CRM_XS " nodeid=%u source=%s", node->uname, current_join_id, crm_join_phase_str(phase), crm_join_phase_str(last), node->id, source); } else { crm_warn("Rejecting join-%d phase update for node %s because " "can't go from %s to %s " CRM_XS " nodeid=%u source=%s", current_join_id, node->uname, crm_join_phase_str(last), crm_join_phase_str(phase), node->id, source); } } static void start_join_round(void) { GHashTableIter iter; crm_node_t *peer = NULL; crm_debug("Starting new join round join-%d", current_join_id); g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) { crm_update_peer_join(__func__, peer, crm_join_none); } if (max_generation_from != NULL) { free(max_generation_from); max_generation_from = NULL; } if (max_generation_xml != NULL) { free_xml(max_generation_xml); max_generation_xml = NULL; } controld_clear_fsa_input_flags(R_HAVE_CIB); } /*! * \internal * \brief Create a join message from the DC * * \param[in] join_op Join operation name * \param[in] host_to Recipient of message */ static xmlNode * create_dc_message(const char *join_op, const char *host_to) { xmlNode *msg = create_request(join_op, NULL, host_to, CRM_SYSTEM_CRMD, CRM_SYSTEM_DC, NULL); /* Identify which election this is a part of */ crm_xml_add_int(msg, PCMK__XA_JOIN_ID, current_join_id); /* Add a field specifying whether the DC is shutting down. This keeps the * joining node from fencing the old DC if it becomes the new DC. */ pcmk__xe_set_bool_attr(msg, PCMK__XA_DC_LEAVING, pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)); return msg; } static void join_make_offer(gpointer key, gpointer value, gpointer user_data) { xmlNode *offer = NULL; crm_node_t *member = (crm_node_t *)value; CRM_ASSERT(member != NULL); - if (crm_is_peer_active(member) == FALSE) { + if (!pcmk__cluster_is_node_active(member)) { crm_info("Not making join-%d offer to inactive node %s", current_join_id, (member->uname? member->uname : "with unknown name")); if(member->expected == NULL && pcmk__str_eq(member->state, CRM_NODE_LOST, pcmk__str_casei)) { /* You would think this unsafe, but in fact this plus an * active resource is what causes it to be fenced. * * Yes, this does mean that any node that dies at the same * time as the old DC and is not running resource (still) * won't be fenced. * * I'm not happy about this either. */ pcmk__update_peer_expected(__func__, member, CRMD_JOINSTATE_DOWN); } return; } if (member->uname == NULL) { crm_info("Not making join-%d offer to node uuid %s with unknown name", current_join_id, member->uuid); return; } if (controld_globals.membership_id != crm_peer_seq) { controld_globals.membership_id = crm_peer_seq; crm_info("Making join-%d offers based on membership event %llu", current_join_id, crm_peer_seq); } if(user_data && member->join > crm_join_none) { crm_info("Not making join-%d offer to already known node %s (%s)", current_join_id, member->uname, crm_join_phase_str(member->join)); return; } crm_update_peer_join(__func__, (crm_node_t*)member, crm_join_none); offer = create_dc_message(CRM_OP_JOIN_OFFER, member->uname); // Advertise our feature set so the joining node can bail if not compatible crm_xml_add(offer, PCMK_XA_CRM_FEATURE_SET, CRM_FEATURE_SET); crm_info("Sending join-%d offer to %s", current_join_id, member->uname); send_cluster_message(member, crm_msg_crmd, offer, TRUE); free_xml(offer); crm_update_peer_join(__func__, member, crm_join_welcomed); } /* A_DC_JOIN_OFFER_ALL */ void do_dc_join_offer_all(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { int count; /* Reset everyone's status back to down or in_ccm in the CIB. * Any nodes that are active in the CIB but not in the cluster membership * will be seen as offline by the scheduler anyway. */ current_join_id++; start_join_round(); update_dc(NULL); if (cause == C_HA_MESSAGE && current_input == I_NODE_JOIN) { crm_info("A new node joined the cluster"); } g_hash_table_foreach(crm_peer_cache, join_make_offer, NULL); count = crmd_join_phase_count(crm_join_welcomed); crm_info("Waiting on join-%d requests from %d outstanding node%s", current_join_id, count, pcmk__plural_s(count)); // Don't waste time by invoking the scheduler yet } /* A_DC_JOIN_OFFER_ONE */ void do_dc_join_offer_one(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { crm_node_t *member; ha_msg_input_t *welcome = NULL; int count; const char *join_to = NULL; if (msg_data->data == NULL) { crm_info("Making join-%d offers to any unconfirmed nodes " "because an unknown node joined", current_join_id); g_hash_table_foreach(crm_peer_cache, join_make_offer, &member); check_join_state(cur_state, __func__); return; } welcome = fsa_typed_data(fsa_dt_ha_msg); if (welcome == NULL) { // fsa_typed_data() already logged an error return; } join_to = crm_element_value(welcome->msg, PCMK__XA_SRC); if (join_to == NULL) { crm_err("Can't make join-%d offer to unknown node", current_join_id); return; } member = pcmk__get_node(0, join_to, NULL, pcmk__node_search_cluster); /* It is possible that a node will have been sick or starting up when the * original offer was made. However, it will either re-announce itself in * due course, or we can re-store the original offer on the client. */ crm_update_peer_join(__func__, member, crm_join_none); join_make_offer(NULL, member, NULL); /* If the offer isn't to the local node, make an offer to the local node as * well, to ensure the correct value for max_generation_from. */ if (strcasecmp(join_to, controld_globals.our_nodename) != 0) { member = pcmk__get_node(0, controld_globals.our_nodename, NULL, pcmk__node_search_cluster); join_make_offer(NULL, member, NULL); } /* This was a genuine join request; cancel any existing transition and * invoke the scheduler. */ abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart, "Node join", NULL); count = crmd_join_phase_count(crm_join_welcomed); crm_info("Waiting on join-%d requests from %d outstanding node%s", current_join_id, count, pcmk__plural_s(count)); // Don't waste time by invoking the scheduler yet } static int compare_int_fields(xmlNode * left, xmlNode * right, const char *field) { const char *elem_l = crm_element_value(left, field); const char *elem_r = crm_element_value(right, field); long long int_elem_l; long long int_elem_r; pcmk__scan_ll(elem_l, &int_elem_l, -1LL); pcmk__scan_ll(elem_r, &int_elem_r, -1LL); if (int_elem_l < int_elem_r) { return -1; } else if (int_elem_l > int_elem_r) { return 1; } return 0; } /* A_DC_JOIN_PROCESS_REQ */ void do_dc_join_filter_offer(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { xmlNode *generation = NULL; int cmp = 0; int join_id = -1; int count = 0; gint value = 0; gboolean ack_nack_bool = TRUE; ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg); const char *join_from = crm_element_value(join_ack->msg, PCMK__XA_SRC); const char *ref = crm_element_value(join_ack->msg, PCMK_XA_REFERENCE); const char *join_version = crm_element_value(join_ack->msg, PCMK_XA_CRM_FEATURE_SET); crm_node_t *join_node = NULL; if (join_from == NULL) { crm_err("Ignoring invalid join request without node name"); return; } join_node = pcmk__get_node(0, join_from, NULL, pcmk__node_search_cluster); crm_element_value_int(join_ack->msg, PCMK__XA_JOIN_ID, &join_id); if (join_id != current_join_id) { crm_debug("Ignoring join-%d request from %s because we are on join-%d", join_id, join_from, current_join_id); check_join_state(cur_state, __func__); return; } generation = join_ack->xml; if (max_generation_xml != NULL && generation != NULL) { int lpc = 0; const char *attributes[] = { PCMK_XA_ADMIN_EPOCH, PCMK_XA_EPOCH, PCMK_XA_NUM_UPDATES, }; /* It's not obvious that join_ack->xml is the PCMK__XE_GENERATION_TUPLE * element from the join client. The "if" guard is for clarity. */ if (pcmk__xe_is(generation, PCMK__XE_GENERATION_TUPLE)) { for (lpc = 0; cmp == 0 && lpc < PCMK__NELEM(attributes); lpc++) { cmp = compare_int_fields(max_generation_xml, generation, attributes[lpc]); } } else { // Should always be PCMK__XE_GENERATION_TUPLE CRM_LOG_ASSERT(false); } } if (ref == NULL) { ref = "none"; // for logging only } if (lookup_failed_sync_node(join_from, &value) == pcmk_rc_ok) { crm_err("Rejecting join-%d request from node %s because we failed to " "sync its CIB in join-%d " CRM_XS " ref=%s", join_id, join_from, value, ref); ack_nack_bool = FALSE; - } else if (!crm_is_peer_active(join_node)) { + } else if (!pcmk__cluster_is_node_active(join_node)) { if (match_down_event(join_from) != NULL) { /* The join request was received after the node was fenced or * otherwise shutdown in a way that we're aware of. No need to log * an error in this rare occurrence; we know the client was recently * shut down, and receiving a lingering in-flight request is not * cause for alarm. */ crm_debug("Rejecting join-%d request from inactive node %s " CRM_XS " ref=%s", join_id, join_from, ref); } else { crm_err("Rejecting join-%d request from inactive node %s " CRM_XS " ref=%s", join_id, join_from, ref); } ack_nack_bool = FALSE; } else if (generation == NULL) { crm_err("Rejecting invalid join-%d request from node %s " "missing CIB generation " CRM_XS " ref=%s", join_id, join_from, ref); ack_nack_bool = FALSE; } else if ((join_version == NULL) || !feature_set_compatible(CRM_FEATURE_SET, join_version)) { crm_err("Rejecting join-%d request from node %s because feature set %s" " is incompatible with ours (%s) " CRM_XS " ref=%s", join_id, join_from, (join_version? join_version : "pre-3.1.0"), CRM_FEATURE_SET, ref); ack_nack_bool = FALSE; } else if (max_generation_xml == NULL) { const char *validation = crm_element_value(generation, PCMK_XA_VALIDATE_WITH); if (pcmk__get_schema(validation) == NULL) { crm_err("Rejecting join-%d request from %s (with first CIB " "generation) due to unknown schema version %s " CRM_XS " ref=%s", join_id, join_from, pcmk__s(validation, "(missing)"), ref); ack_nack_bool = FALSE; } else { crm_debug("Accepting join-%d request from %s (with first CIB " "generation) " CRM_XS " ref=%s", join_id, join_from, ref); max_generation_xml = pcmk__xml_copy(NULL, generation); pcmk__str_update(&max_generation_from, join_from); } } else if ((cmp < 0) || ((cmp == 0) && pcmk__str_eq(join_from, controld_globals.our_nodename, pcmk__str_casei))) { const char *validation = crm_element_value(generation, PCMK_XA_VALIDATE_WITH); if (pcmk__get_schema(validation) == NULL) { crm_err("Rejecting join-%d request from %s (with better CIB " "generation than current best from %s) due to unknown " "schema version %s " CRM_XS " ref=%s", join_id, join_from, max_generation_from, pcmk__s(validation, "(missing)"), ref); ack_nack_bool = FALSE; } else { crm_debug("Accepting join-%d request from %s (with better CIB " "generation than current best from %s) " CRM_XS " ref=%s", join_id, join_from, max_generation_from, ref); crm_log_xml_debug(max_generation_xml, "Old max generation"); crm_log_xml_debug(generation, "New max generation"); free_xml(max_generation_xml); max_generation_xml = pcmk__xml_copy(NULL, join_ack->xml); pcmk__str_update(&max_generation_from, join_from); } } else { crm_debug("Accepting join-%d request from %s " CRM_XS " ref=%s", join_id, join_from, ref); } if (!ack_nack_bool) { if (compare_version(join_version, "3.17.0") < 0) { /* Clients with CRM_FEATURE_SET < 3.17.0 may respawn infinitely * after a nack message, don't send one */ crm_update_peer_join(__func__, join_node, crm_join_nack_quiet); } else { crm_update_peer_join(__func__, join_node, crm_join_nack); } pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_NACK); } else { crm_update_peer_join(__func__, join_node, crm_join_integrated); pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_MEMBER); } count = crmd_join_phase_count(crm_join_integrated); crm_debug("%d node%s currently integrated in join-%d", count, pcmk__plural_s(count), join_id); if (check_join_state(cur_state, __func__) == FALSE) { // Don't waste time by invoking the scheduler yet count = crmd_join_phase_count(crm_join_welcomed); crm_debug("Waiting on join-%d requests from %d outstanding node%s", join_id, count, pcmk__plural_s(count)); } } /* A_DC_JOIN_FINALIZE */ void do_dc_join_finalize(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { char *sync_from = NULL; int rc = pcmk_ok; int count_welcomed = crmd_join_phase_count(crm_join_welcomed); int count_finalizable = crmd_join_phase_count(crm_join_integrated) + crmd_join_phase_count(crm_join_nack) + crmd_join_phase_count(crm_join_nack_quiet); /* This we can do straight away and avoid clients timing us out * while we compute the latest CIB */ if (count_welcomed != 0) { crm_debug("Waiting on join-%d requests from %d outstanding node%s " "before finalizing join", current_join_id, count_welcomed, pcmk__plural_s(count_welcomed)); crmd_join_phase_log(LOG_DEBUG); /* crmd_fsa_stall(FALSE); Needed? */ return; } else if (count_finalizable == 0) { crm_debug("Finalization not needed for join-%d at the current time", current_join_id); crmd_join_phase_log(LOG_DEBUG); check_join_state(controld_globals.fsa_state, __func__); return; } controld_clear_fsa_input_flags(R_HAVE_CIB); if (pcmk__str_eq(max_generation_from, controld_globals.our_nodename, pcmk__str_null_matches|pcmk__str_casei)) { controld_set_fsa_input_flags(R_HAVE_CIB); } if (!controld_globals.transition_graph->complete) { crm_warn("Delaying join-%d finalization while transition in progress", current_join_id); crmd_join_phase_log(LOG_DEBUG); crmd_fsa_stall(FALSE); return; } if (pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) { // Send our CIB out to everyone sync_from = pcmk__str_copy(controld_globals.our_nodename); crm_debug("Finalizing join-%d for %d node%s (sync'ing from local CIB)", current_join_id, count_finalizable, pcmk__plural_s(count_finalizable)); crm_log_xml_debug(max_generation_xml, "Requested CIB version"); } else { // Ask for the agreed best CIB sync_from = pcmk__str_copy(max_generation_from); crm_notice("Finalizing join-%d for %d node%s (sync'ing CIB from %s)", current_join_id, count_finalizable, pcmk__plural_s(count_finalizable), sync_from); crm_log_xml_notice(max_generation_xml, "Requested CIB version"); } crmd_join_phase_log(LOG_DEBUG); rc = controld_globals.cib_conn->cmds->sync_from(controld_globals.cib_conn, sync_from, NULL, cib_none); fsa_register_cib_callback(rc, sync_from, finalize_sync_callback); } void free_max_generation(void) { free(max_generation_from); max_generation_from = NULL; free_xml(max_generation_xml); max_generation_xml = NULL; } void finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { CRM_LOG_ASSERT(-EPERM != rc); if (rc != pcmk_ok) { const char *sync_from = (const char *) user_data; do_crm_log(((rc == -pcmk_err_old_data)? LOG_WARNING : LOG_ERR), "Could not sync CIB from %s in join-%d: %s", sync_from, current_join_id, pcmk_strerror(rc)); if (rc != -pcmk_err_old_data) { record_failed_sync_node(sync_from, current_join_id); } /* restart the whole join process */ register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION_DC, NULL, NULL, __func__); } else if (!AM_I_DC) { crm_debug("Sync'ed CIB for join-%d but no longer DC", current_join_id); } else if (controld_globals.fsa_state != S_FINALIZE_JOIN) { crm_debug("Sync'ed CIB for join-%d but no longer in S_FINALIZE_JOIN " "(%s)", current_join_id, fsa_state2string(controld_globals.fsa_state)); } else { controld_set_fsa_input_flags(R_HAVE_CIB); /* make sure dc_uuid is re-set to us */ if (!check_join_state(controld_globals.fsa_state, __func__)) { int count_finalizable = 0; count_finalizable = crmd_join_phase_count(crm_join_integrated) + crmd_join_phase_count(crm_join_nack) + crmd_join_phase_count(crm_join_nack_quiet); crm_debug("Notifying %d node%s of join-%d results", count_finalizable, pcmk__plural_s(count_finalizable), current_join_id); g_hash_table_foreach(crm_peer_cache, finalize_join_for, NULL); } } } static void join_node_state_commit_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { const char *node = user_data; if (rc != pcmk_ok) { fsa_data_t *msg_data = NULL; // for register_fsa_error() macro crm_crit("join-%d node history update (via CIB call %d) for node %s " "failed: %s", current_join_id, call_id, node, pcmk_strerror(rc)); crm_log_xml_debug(msg, "failed"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } crm_debug("join-%d node history update (via CIB call %d) for node %s " "complete", current_join_id, call_id, node); check_join_state(controld_globals.fsa_state, __func__); } /* A_DC_JOIN_PROCESS_ACK */ void do_dc_join_ack(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { int join_id = -1; ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg); const char *op = crm_element_value(join_ack->msg, PCMK__XA_CRM_TASK); char *join_from = crm_element_value_copy(join_ack->msg, PCMK__XA_SRC); crm_node_t *peer = NULL; enum controld_section_e section = controld_section_lrm; char *xpath = NULL; xmlNode *state = join_ack->xml; xmlNode *execd_state = NULL; cib_t *cib = controld_globals.cib_conn; int rc = pcmk_ok; // Sanity checks if (join_from == NULL) { crm_warn("Ignoring message received without node identification"); goto done; } if (op == NULL) { crm_warn("Ignoring message received from %s without task", join_from); goto done; } if (strcmp(op, CRM_OP_JOIN_CONFIRM)) { crm_debug("Ignoring '%s' message from %s while waiting for '%s'", op, join_from, CRM_OP_JOIN_CONFIRM); goto done; } if (crm_element_value_int(join_ack->msg, PCMK__XA_JOIN_ID, &join_id) != 0) { crm_warn("Ignoring join confirmation from %s without valid join ID", join_from); goto done; } peer = pcmk__get_node(0, join_from, NULL, pcmk__node_search_cluster); if (peer->join != crm_join_finalized) { crm_info("Ignoring out-of-sequence join-%d confirmation from %s " "(currently %s not %s)", join_id, join_from, crm_join_phase_str(peer->join), crm_join_phase_str(crm_join_finalized)); goto done; } if (join_id != current_join_id) { crm_err("Rejecting join-%d confirmation from %s " "because currently on join-%d", join_id, join_from, current_join_id); crm_update_peer_join(__func__, peer, crm_join_nack); goto done; } crm_update_peer_join(__func__, peer, crm_join_confirmed); /* Update CIB with node's current executor state. A new transition will be * triggered later, when the CIB manager notifies us of the change. * * The delete and modify requests are part of an atomic transaction. */ rc = cib->cmds->init_transaction(cib); if (rc != pcmk_ok) { goto done; } // Delete relevant parts of node's current executor state from CIB if (pcmk_is_set(controld_globals.flags, controld_shutdown_lock_enabled)) { section = controld_section_lrm_unlocked; } controld_node_state_deletion_strings(join_from, section, &xpath, NULL); rc = cib->cmds->remove(cib, xpath, NULL, cib_scope_local |cib_xpath |cib_multiple |cib_transaction); if (rc != pcmk_ok) { goto done; } // Update CIB with node's latest known executor state if (pcmk__str_eq(join_from, controld_globals.our_nodename, pcmk__str_casei)) { // Use the latest possible state if processing our own join ack execd_state = controld_query_executor_state(); if (execd_state != NULL) { crm_debug("Updating local node history for join-%d from query " "result", current_join_id); state = execd_state; } else { crm_warn("Updating local node history from join-%d confirmation " "because query failed", current_join_id); } } else { crm_debug("Updating node history for %s from join-%d confirmation", join_from, current_join_id); } rc = cib->cmds->modify(cib, PCMK_XE_STATUS, state, cib_scope_local|cib_can_create|cib_transaction); free_xml(execd_state); if (rc != pcmk_ok) { goto done; } // Commit the transaction rc = cib->cmds->end_transaction(cib, true, cib_scope_local); fsa_register_cib_callback(rc, join_from, join_node_state_commit_callback); if (rc > 0) { // join_from will be freed after callback join_from = NULL; rc = pcmk_ok; } done: if (rc != pcmk_ok) { crm_crit("join-%d node history update for node %s failed: %s", current_join_id, join_from, pcmk_strerror(rc)); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } free(join_from); free(xpath); } void finalize_join_for(gpointer key, gpointer value, gpointer user_data) { xmlNode *acknak = NULL; xmlNode *tmp1 = NULL; crm_node_t *join_node = value; const char *join_to = join_node->uname; bool integrated = false; switch (join_node->join) { case crm_join_integrated: integrated = true; break; case crm_join_nack: case crm_join_nack_quiet: break; default: crm_trace("Not updating non-integrated and non-nacked node %s (%s) " "for join-%d", join_to, crm_join_phase_str(join_node->join), current_join_id); return; } /* Update the element with the node's name and UUID, in case they * weren't known before */ crm_trace("Updating node name and UUID in CIB for %s", join_to); tmp1 = pcmk__xe_create(NULL, PCMK_XE_NODE); crm_xml_add(tmp1, PCMK_XA_ID, crm_peer_uuid(join_node)); crm_xml_add(tmp1, PCMK_XA_UNAME, join_to); fsa_cib_anon_update(PCMK_XE_NODES, tmp1); free_xml(tmp1); if (join_node->join == crm_join_nack_quiet) { crm_trace("Not sending nack message to node %s with feature set older " "than 3.17.0", join_to); return; } join_node = pcmk__get_node(0, join_to, NULL, pcmk__node_search_cluster); - if (!crm_is_peer_active(join_node)) { + if (!pcmk__cluster_is_node_active(join_node)) { /* * NACK'ing nodes that the membership layer doesn't know about yet * simply creates more churn * * Better to leave them waiting and let the join restart when * the new membership event comes in * * All other NACKs (due to versions etc) should still be processed */ pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_PENDING); return; } // Acknowledge or nack node's join request crm_debug("%sing join-%d request from %s", integrated? "Acknowledg" : "Nack", current_join_id, join_to); acknak = create_dc_message(CRM_OP_JOIN_ACKNAK, join_to); pcmk__xe_set_bool_attr(acknak, CRM_OP_JOIN_ACKNAK, integrated); if (integrated) { // No change needed for a nacked node crm_update_peer_join(__func__, join_node, crm_join_finalized); pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_MEMBER); /* Iterate through the remote peer cache and add information on which * node hosts each to the ACK message. This keeps new controllers in * sync with what has already happened. */ - if (crm_remote_peer_cache_size() != 0) { + if (pcmk__cluster_num_remote_nodes() > 0) { GHashTableIter iter; crm_node_t *node = NULL; xmlNode *remotes = pcmk__xe_create(acknak, PCMK_XE_NODES); g_hash_table_iter_init(&iter, crm_remote_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { xmlNode *remote = NULL; if (!node->conn_host) { continue; } remote = pcmk__xe_create(remotes, PCMK_XE_NODE); pcmk__xe_set_props(remote, PCMK_XA_ID, node->uname, PCMK__XA_NODE_STATE, node->state, PCMK__XA_CONNECTION_HOST, node->conn_host, NULL); } } } send_cluster_message(join_node, crm_msg_crmd, acknak, TRUE); free_xml(acknak); return; } gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source) { static unsigned long long highest_seq = 0; if (controld_globals.membership_id != crm_peer_seq) { crm_debug("join-%d: Membership changed from %llu to %llu " CRM_XS " highest=%llu state=%s for=%s", current_join_id, controld_globals.membership_id, crm_peer_seq, highest_seq, fsa_state2string(cur_state), source); if(highest_seq < crm_peer_seq) { /* Don't spam the FSA with duplicates */ highest_seq = crm_peer_seq; register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL); } } else if (cur_state == S_INTEGRATION) { if (crmd_join_phase_count(crm_join_welcomed) == 0) { int count = crmd_join_phase_count(crm_join_integrated); crm_debug("join-%d: Integration of %d peer%s complete " CRM_XS " state=%s for=%s", current_join_id, count, pcmk__plural_s(count), fsa_state2string(cur_state), source); register_fsa_input_before(C_FSA_INTERNAL, I_INTEGRATED, NULL); return TRUE; } } else if (cur_state == S_FINALIZE_JOIN) { if (!pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) { crm_debug("join-%d: Delaying finalization until we have CIB " CRM_XS " state=%s for=%s", current_join_id, fsa_state2string(cur_state), source); return TRUE; } else if (crmd_join_phase_count(crm_join_welcomed) != 0) { int count = crmd_join_phase_count(crm_join_welcomed); crm_debug("join-%d: Still waiting on %d welcomed node%s " CRM_XS " state=%s for=%s", current_join_id, count, pcmk__plural_s(count), fsa_state2string(cur_state), source); crmd_join_phase_log(LOG_DEBUG); } else if (crmd_join_phase_count(crm_join_integrated) != 0) { int count = crmd_join_phase_count(crm_join_integrated); crm_debug("join-%d: Still waiting on %d integrated node%s " CRM_XS " state=%s for=%s", current_join_id, count, pcmk__plural_s(count), fsa_state2string(cur_state), source); crmd_join_phase_log(LOG_DEBUG); } else if (crmd_join_phase_count(crm_join_finalized) != 0) { int count = crmd_join_phase_count(crm_join_finalized); crm_debug("join-%d: Still waiting on %d finalized node%s " CRM_XS " state=%s for=%s", current_join_id, count, pcmk__plural_s(count), fsa_state2string(cur_state), source); crmd_join_phase_log(LOG_DEBUG); } else { crm_debug("join-%d: Complete " CRM_XS " state=%s for=%s", current_join_id, fsa_state2string(cur_state), source); register_fsa_input_later(C_FSA_INTERNAL, I_FINALIZED, NULL); return TRUE; } } return FALSE; } void do_dc_join_final(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { crm_debug("Ensuring DC, quorum and node attributes are up-to-date"); crm_update_quorum(crm_have_quorum, TRUE); } int crmd_join_phase_count(enum crm_join_phase phase) { int count = 0; crm_node_t *peer; GHashTableIter iter; g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) { if(peer->join == phase) { count++; } } return count; } void crmd_join_phase_log(int level) { crm_node_t *peer; GHashTableIter iter; g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) { do_crm_log(level, "join-%d: %s=%s", current_join_id, peer->uname, crm_join_phase_str(peer->join)); } } diff --git a/daemons/controld/controld_membership.c b/daemons/controld/controld_membership.c index 759c280684..ece48688ee 100644 --- a/daemons/controld/controld_membership.c +++ b/daemons/controld/controld_membership.c @@ -1,463 +1,463 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ /* put these first so that uuid_t is defined without conflicts */ #include #include #include #include #include #include #include void post_cache_update(int instance); extern gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source); static void reap_dead_nodes(gpointer key, gpointer value, gpointer user_data) { crm_node_t *node = value; - if (crm_is_peer_active(node) == FALSE) { + if (!pcmk__cluster_is_node_active(node)) { crm_update_peer_join(__func__, node, crm_join_none); if(node && node->uname) { if (pcmk__str_eq(controld_globals.our_nodename, node->uname, pcmk__str_casei)) { crm_err("We're not part of the cluster anymore"); register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL); } else if (!AM_I_DC && pcmk__str_eq(node->uname, controld_globals.dc_name, pcmk__str_casei)) { crm_warn("Our DC node (%s) left the cluster", node->uname); register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL); } } if ((controld_globals.fsa_state == S_INTEGRATION) || (controld_globals.fsa_state == S_FINALIZE_JOIN)) { check_join_state(controld_globals.fsa_state, __func__); } if ((node != NULL) && (node->uuid != NULL)) { fail_incompletable_actions(controld_globals.transition_graph, node->uuid); } } } void post_cache_update(int instance) { xmlNode *no_op = NULL; crm_peer_seq = instance; crm_debug("Updated cache after membership event %d.", instance); g_hash_table_foreach(crm_peer_cache, reap_dead_nodes, NULL); controld_set_fsa_input_flags(R_MEMBERSHIP); if (AM_I_DC) { populate_cib_nodes(node_update_quick | node_update_cluster | node_update_peer | node_update_expected, __func__); } /* * If we lost nodes, we should re-check the election status * Safe to call outside of an election */ controld_set_fsa_action_flags(A_ELECTION_CHECK); controld_trigger_fsa(); /* Membership changed, remind everyone we're here. * This will aid detection of duplicate DCs */ no_op = create_request(CRM_OP_NOOP, NULL, NULL, CRM_SYSTEM_CRMD, AM_I_DC ? CRM_SYSTEM_DC : CRM_SYSTEM_CRMD, NULL); send_cluster_message(NULL, crm_msg_crmd, no_op, FALSE); free_xml(no_op); } static void crmd_node_update_complete(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { fsa_data_t *msg_data = NULL; if (rc == pcmk_ok) { crm_trace("Node update %d complete", call_id); } else if(call_id < pcmk_ok) { crm_err("Node update failed: %s (%d)", pcmk_strerror(call_id), call_id); crm_log_xml_debug(msg, "failed"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } else { crm_err("Node update %d failed: %s (%d)", call_id, pcmk_strerror(rc), rc); crm_log_xml_debug(msg, "failed"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } /*! * \internal * \brief Create an XML node state tag with updates * * \param[in,out] node Node whose state will be used for update * \param[in] flags Bitmask of node_update_flags indicating what to update * \param[in,out] parent XML node to contain update (or NULL) * \param[in] source Who requested the update (only used for logging) * * \return Pointer to created node state tag */ xmlNode * create_node_state_update(crm_node_t *node, int flags, xmlNode *parent, const char *source) { const char *value = NULL; xmlNode *node_state; if (!node->state) { crm_info("Node update for %s cancelled: no state, not seen yet", node->uname); return NULL; } node_state = pcmk__xe_create(parent, PCMK__XE_NODE_STATE); if (pcmk_is_set(node->flags, crm_remote_node)) { pcmk__xe_set_bool_attr(node_state, PCMK_XA_REMOTE_NODE, true); } if (crm_xml_add(node_state, PCMK_XA_ID, crm_peer_uuid(node)) == NULL) { crm_info("Node update for %s cancelled: no ID", node->uname); free_xml(node_state); return NULL; } crm_xml_add(node_state, PCMK_XA_UNAME, node->uname); if ((flags & node_update_cluster) && node->state) { if (compare_version(controld_globals.dc_version, "3.18.0") >= 0) { // A value 0 means the node is not a cluster member. crm_xml_add_ll(node_state, PCMK__XA_IN_CCM, node->when_member); } else { pcmk__xe_set_bool_attr(node_state, PCMK__XA_IN_CCM, pcmk__str_eq(node->state, CRM_NODE_MEMBER, pcmk__str_casei)); } } if (!pcmk_is_set(node->flags, crm_remote_node)) { if (flags & node_update_peer) { if (compare_version(controld_globals.dc_version, "3.18.0") >= 0) { // A value 0 means the peer is offline in CPG. crm_xml_add_ll(node_state, PCMK_XA_CRMD, node->when_online); } else { // @COMPAT DCs < 2.1.7 use online/offline rather than timestamp value = PCMK_VALUE_OFFLINE; if (pcmk_is_set(node->processes, crm_get_cluster_proc())) { value = PCMK_VALUE_ONLINE; } crm_xml_add(node_state, PCMK_XA_CRMD, value); } } if (flags & node_update_join) { if (node->join <= crm_join_none) { value = CRMD_JOINSTATE_DOWN; } else { value = CRMD_JOINSTATE_MEMBER; } crm_xml_add(node_state, PCMK__XA_JOIN, value); } if (flags & node_update_expected) { crm_xml_add(node_state, PCMK_XA_EXPECTED, node->expected); } } crm_xml_add(node_state, PCMK_XA_CRM_DEBUG_ORIGIN, source); return node_state; } static void remove_conflicting_node_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { char *node_uuid = user_data; do_crm_log_unlikely(rc == 0 ? LOG_DEBUG : LOG_NOTICE, "Deletion of the unknown conflicting node \"%s\": %s (rc=%d)", node_uuid, pcmk_strerror(rc), rc); } static void search_conflicting_node_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { char *new_node_uuid = user_data; xmlNode *node_xml = NULL; if (rc != pcmk_ok) { if (rc != -ENXIO) { crm_notice("Searching conflicting nodes for %s failed: %s (%d)", new_node_uuid, pcmk_strerror(rc), rc); } return; } else if (output == NULL) { return; } if (pcmk__xe_is(output, PCMK_XE_NODE)) { node_xml = output; } else { node_xml = pcmk__xe_first_child(output, PCMK_XE_NODE, NULL, NULL); } for (; node_xml != NULL; node_xml = pcmk__xe_next_same(node_xml)) { const char *node_uuid = NULL; const char *node_uname = NULL; GHashTableIter iter; crm_node_t *node = NULL; gboolean known = FALSE; node_uuid = crm_element_value(node_xml, PCMK_XA_ID); node_uname = crm_element_value(node_xml, PCMK_XA_UNAME); if (node_uuid == NULL || node_uname == NULL) { continue; } g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { if (node->uuid && pcmk__str_eq(node->uuid, node_uuid, pcmk__str_casei) && node->uname && pcmk__str_eq(node->uname, node_uname, pcmk__str_casei)) { known = TRUE; break; } } if (known == FALSE) { cib_t *cib_conn = controld_globals.cib_conn; int delete_call_id = 0; xmlNode *node_state_xml = NULL; crm_notice("Deleting unknown node %s/%s which has conflicting uname with %s", node_uuid, node_uname, new_node_uuid); delete_call_id = cib_conn->cmds->remove(cib_conn, PCMK_XE_NODES, node_xml, cib_scope_local); fsa_register_cib_callback(delete_call_id, pcmk__str_copy(node_uuid), remove_conflicting_node_callback); node_state_xml = pcmk__xe_create(NULL, PCMK__XE_NODE_STATE); crm_xml_add(node_state_xml, PCMK_XA_ID, node_uuid); crm_xml_add(node_state_xml, PCMK_XA_UNAME, node_uname); delete_call_id = cib_conn->cmds->remove(cib_conn, PCMK_XE_STATUS, node_state_xml, cib_scope_local); fsa_register_cib_callback(delete_call_id, pcmk__str_copy(node_uuid), remove_conflicting_node_callback); free_xml(node_state_xml); } } } static void node_list_update_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { fsa_data_t *msg_data = NULL; if(call_id < pcmk_ok) { crm_err("Node list update failed: %s (%d)", pcmk_strerror(call_id), call_id); crm_log_xml_debug(msg, "update:failed"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } else if(rc < pcmk_ok) { crm_err("Node update %d failed: %s (%d)", call_id, pcmk_strerror(rc), rc); crm_log_xml_debug(msg, "update:failed"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } void populate_cib_nodes(enum node_update_flags flags, const char *source) { cib_t *cib_conn = controld_globals.cib_conn; int call_id = 0; gboolean from_hashtable = TRUE; xmlNode *node_list = pcmk__xe_create(NULL, PCMK_XE_NODES); #if SUPPORT_COROSYNC if (!pcmk_is_set(flags, node_update_quick) && is_corosync_cluster()) { from_hashtable = pcmk__corosync_add_nodes(node_list); } #endif if (from_hashtable) { GHashTableIter iter; crm_node_t *node = NULL; GString *xpath = NULL; g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { xmlNode *new_node = NULL; if ((node->uuid != NULL) && (node->uname != NULL)) { crm_trace("Creating node entry for %s/%s", node->uname, node->uuid); if (xpath == NULL) { xpath = g_string_sized_new(512); } else { g_string_truncate(xpath, 0); } /* We need both to be valid */ new_node = pcmk__xe_create(node_list, PCMK_XE_NODE); crm_xml_add(new_node, PCMK_XA_ID, node->uuid); crm_xml_add(new_node, PCMK_XA_UNAME, node->uname); /* Search and remove unknown nodes with the conflicting uname from CIB */ pcmk__g_strcat(xpath, "/" PCMK_XE_CIB "/" PCMK_XE_CONFIGURATION "/" PCMK_XE_NODES "/" PCMK_XE_NODE "[@" PCMK_XA_UNAME "='", node->uname, "']" "[@" PCMK_XA_ID "!='", node->uuid, "']", NULL); call_id = cib_conn->cmds->query(cib_conn, (const char *) xpath->str, NULL, cib_scope_local|cib_xpath); fsa_register_cib_callback(call_id, pcmk__str_copy(node->uuid), search_conflicting_node_callback); } } if (xpath != NULL) { g_string_free(xpath, TRUE); } } crm_trace("Populating section from %s", from_hashtable ? "hashtable" : "cluster"); if ((controld_update_cib(PCMK_XE_NODES, node_list, cib_scope_local, node_list_update_callback) == pcmk_rc_ok) && (crm_peer_cache != NULL) && AM_I_DC) { /* * There is no need to update the local CIB with our values if * we've not seen valid membership data */ GHashTableIter iter; crm_node_t *node = NULL; free_xml(node_list); node_list = pcmk__xe_create(NULL, PCMK_XE_STATUS); g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { create_node_state_update(node, flags, node_list, source); } if (crm_remote_peer_cache) { g_hash_table_iter_init(&iter, crm_remote_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { create_node_state_update(node, flags, node_list, source); } } controld_update_cib(PCMK_XE_STATUS, node_list, cib_scope_local, crmd_node_update_complete); } free_xml(node_list); } static void cib_quorum_update_complete(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { fsa_data_t *msg_data = NULL; if (rc == pcmk_ok) { crm_trace("Quorum update %d complete", call_id); } else { crm_err("Quorum update %d failed: %s (%d)", call_id, pcmk_strerror(rc), rc); crm_log_xml_debug(msg, "failed"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } void crm_update_quorum(gboolean quorum, gboolean force_update) { bool has_quorum = pcmk_is_set(controld_globals.flags, controld_has_quorum); if (quorum) { controld_set_global_flags(controld_ever_had_quorum); } else if (pcmk_all_flags_set(controld_globals.flags, controld_ever_had_quorum |controld_no_quorum_suicide)) { pcmk__panic(__func__); } if (AM_I_DC && ((has_quorum && !quorum) || (!has_quorum && quorum) || force_update)) { xmlNode *update = NULL; update = pcmk__xe_create(NULL, PCMK_XE_CIB); crm_xml_add_int(update, PCMK_XA_HAVE_QUORUM, quorum); crm_xml_add(update, PCMK_XA_DC_UUID, controld_globals.our_uuid); crm_debug("Updating quorum status to %s", pcmk__btoa(quorum)); controld_update_cib(PCMK_XE_CIB, update, cib_scope_local, cib_quorum_update_complete); free_xml(update); /* Quorum changes usually cause a new transition via other activity: * quorum gained via a node joining will abort via the node join, * and quorum lost via a node leaving will usually abort via resource * activity and/or fencing. * * However, it is possible that nothing else causes a transition (e.g. * someone forces quorum via corosync-cmaptcl, or quorum is lost due to * a node in standby shutting down cleanly), so here ensure a new * transition is triggered. */ if (quorum) { /* If quorum was gained, abort after a short delay, in case multiple * nodes are joining around the same time, so the one that brings us * to quorum doesn't cause all the remaining ones to be fenced. */ abort_after_delay(PCMK_SCORE_INFINITY, pcmk__graph_restart, "Quorum gained", 5000); } else { abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart, "Quorum lost", NULL); } } if (quorum) { controld_set_global_flags(controld_has_quorum); } else { controld_clear_global_flags(controld_has_quorum); } } diff --git a/daemons/controld/controld_messages.c b/daemons/controld/controld_messages.c index 1d71109fae..3e472136f8 100644 --- a/daemons/controld/controld_messages.c +++ b/daemons/controld/controld_messages.c @@ -1,1358 +1,1358 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include static enum crmd_fsa_input handle_message(xmlNode *msg, enum crmd_fsa_cause cause); static void handle_response(xmlNode *stored_msg); static enum crmd_fsa_input handle_request(xmlNode *stored_msg, enum crmd_fsa_cause cause); static enum crmd_fsa_input handle_shutdown_request(xmlNode *stored_msg); static void send_msg_via_ipc(xmlNode * msg, const char *sys); /* debug only, can wrap all it likes */ static int last_data_id = 0; void register_fsa_error_adv(enum crmd_fsa_cause cause, enum crmd_fsa_input input, fsa_data_t * cur_data, void *new_data, const char *raised_from) { /* save the current actions if any */ if (controld_globals.fsa_actions != A_NOTHING) { register_fsa_input_adv(cur_data ? cur_data->fsa_cause : C_FSA_INTERNAL, I_NULL, cur_data ? cur_data->data : NULL, controld_globals.fsa_actions, TRUE, __func__); } /* reset the action list */ crm_info("Resetting the current action list"); fsa_dump_actions(controld_globals.fsa_actions, "Drop"); controld_globals.fsa_actions = A_NOTHING; /* register the error */ register_fsa_input_adv(cause, input, new_data, A_NOTHING, TRUE, raised_from); } void register_fsa_input_adv(enum crmd_fsa_cause cause, enum crmd_fsa_input input, void *data, uint64_t with_actions, gboolean prepend, const char *raised_from) { unsigned old_len = g_list_length(controld_globals.fsa_message_queue); fsa_data_t *fsa_data = NULL; if (raised_from == NULL) { raised_from = ""; } if (input == I_NULL && with_actions == A_NOTHING /* && data == NULL */ ) { /* no point doing anything */ crm_err("Cannot add entry to queue: no input and no action"); return; } if (input == I_WAIT_FOR_EVENT) { controld_set_global_flags(controld_fsa_is_stalled); crm_debug("Stalling the FSA pending further input: source=%s cause=%s data=%p queue=%d", raised_from, fsa_cause2string(cause), data, old_len); if (old_len > 0) { fsa_dump_queue(LOG_TRACE); prepend = FALSE; } if (data == NULL) { controld_set_fsa_action_flags(with_actions); fsa_dump_actions(with_actions, "Restored"); return; } /* Store everything in the new event and reset * controld_globals.fsa_actions */ with_actions |= controld_globals.fsa_actions; controld_globals.fsa_actions = A_NOTHING; } last_data_id++; crm_trace("%s %s FSA input %d (%s) due to %s, %s data", raised_from, (prepend? "prepended" : "appended"), last_data_id, fsa_input2string(input), fsa_cause2string(cause), (data? "with" : "without")); fsa_data = pcmk__assert_alloc(1, sizeof(fsa_data_t)); fsa_data->id = last_data_id; fsa_data->fsa_input = input; fsa_data->fsa_cause = cause; fsa_data->origin = raised_from; fsa_data->data = NULL; fsa_data->data_type = fsa_dt_none; fsa_data->actions = with_actions; if (with_actions != A_NOTHING) { crm_trace("Adding actions %.16llx to input", (unsigned long long) with_actions); } if (data != NULL) { switch (cause) { case C_FSA_INTERNAL: case C_CRMD_STATUS_CALLBACK: case C_IPC_MESSAGE: case C_HA_MESSAGE: CRM_CHECK(((ha_msg_input_t *) data)->msg != NULL, crm_err("Bogus data from %s", raised_from)); crm_trace("Copying %s data from %s as cluster message data", fsa_cause2string(cause), raised_from); fsa_data->data = copy_ha_msg_input(data); fsa_data->data_type = fsa_dt_ha_msg; break; case C_LRM_OP_CALLBACK: crm_trace("Copying %s data from %s as lrmd_event_data_t", fsa_cause2string(cause), raised_from); fsa_data->data = lrmd_copy_event((lrmd_event_data_t *) data); fsa_data->data_type = fsa_dt_lrm; break; case C_TIMER_POPPED: case C_SHUTDOWN: case C_UNKNOWN: case C_STARTUP: crm_crit("Copying %s data (from %s) is not yet implemented", fsa_cause2string(cause), raised_from); crmd_exit(CRM_EX_SOFTWARE); break; } } /* make sure to free it properly later */ if (prepend) { controld_globals.fsa_message_queue = g_list_prepend(controld_globals.fsa_message_queue, fsa_data); } else { controld_globals.fsa_message_queue = g_list_append(controld_globals.fsa_message_queue, fsa_data); } crm_trace("FSA message queue length is %d", g_list_length(controld_globals.fsa_message_queue)); /* fsa_dump_queue(LOG_TRACE); */ if (old_len == g_list_length(controld_globals.fsa_message_queue)) { crm_err("Couldn't add message to the queue"); } if (input != I_WAIT_FOR_EVENT) { controld_trigger_fsa(); } } void fsa_dump_queue(int log_level) { int offset = 0; for (GList *iter = controld_globals.fsa_message_queue; iter != NULL; iter = iter->next) { fsa_data_t *data = (fsa_data_t *) iter->data; do_crm_log_unlikely(log_level, "queue[%d.%d]: input %s raised by %s(%p.%d)\t(cause=%s)", offset++, data->id, fsa_input2string(data->fsa_input), data->origin, data->data, data->data_type, fsa_cause2string(data->fsa_cause)); } } ha_msg_input_t * copy_ha_msg_input(ha_msg_input_t * orig) { xmlNode *wrapper = NULL; ha_msg_input_t *copy = pcmk__assert_alloc(1, sizeof(ha_msg_input_t)); copy->msg = (orig != NULL)? pcmk__xml_copy(NULL, orig->msg) : NULL; wrapper = pcmk__xe_first_child(copy->msg, PCMK__XE_CRM_XML, NULL, NULL); copy->xml = pcmk__xe_first_child(wrapper, NULL, NULL, NULL); return copy; } void delete_fsa_input(fsa_data_t * fsa_data) { lrmd_event_data_t *op = NULL; xmlNode *foo = NULL; if (fsa_data == NULL) { return; } crm_trace("About to free %s data", fsa_cause2string(fsa_data->fsa_cause)); if (fsa_data->data != NULL) { switch (fsa_data->data_type) { case fsa_dt_ha_msg: delete_ha_msg_input(fsa_data->data); break; case fsa_dt_xml: foo = fsa_data->data; free_xml(foo); break; case fsa_dt_lrm: op = (lrmd_event_data_t *) fsa_data->data; lrmd_free_event(op); break; case fsa_dt_none: if (fsa_data->data != NULL) { crm_err("Don't know how to free %s data from %s", fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin); crmd_exit(CRM_EX_SOFTWARE); } break; } crm_trace("%s data freed", fsa_cause2string(fsa_data->fsa_cause)); } free(fsa_data); } /* returns the next message */ fsa_data_t * get_message(void) { fsa_data_t *message = (fsa_data_t *) controld_globals.fsa_message_queue->data; controld_globals.fsa_message_queue = g_list_remove(controld_globals.fsa_message_queue, message); crm_trace("Processing input %d", message->id); return message; } void * fsa_typed_data_adv(fsa_data_t * fsa_data, enum fsa_data_type a_type, const char *caller) { void *ret_val = NULL; if (fsa_data == NULL) { crm_err("%s: No FSA data available", caller); } else if (fsa_data->data == NULL) { crm_err("%s: No message data available. Origin: %s", caller, fsa_data->origin); } else if (fsa_data->data_type != a_type) { crm_crit("%s: Message data was the wrong type! %d vs. requested=%d. Origin: %s", caller, fsa_data->data_type, a_type, fsa_data->origin); CRM_ASSERT(fsa_data->data_type == a_type); } else { ret_val = fsa_data->data; } return ret_val; } /* A_MSG_ROUTE */ void do_msg_route(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); route_message(msg_data->fsa_cause, input->msg); } void route_message(enum crmd_fsa_cause cause, xmlNode * input) { ha_msg_input_t fsa_input; enum crmd_fsa_input result = I_NULL; fsa_input.msg = input; CRM_CHECK(cause == C_IPC_MESSAGE || cause == C_HA_MESSAGE, return); /* try passing the buck first */ if (relay_message(input, cause == C_IPC_MESSAGE)) { return; } /* handle locally */ result = handle_message(input, cause); /* done or process later? */ switch (result) { case I_NULL: case I_CIB_OP: case I_ROUTER: case I_NODE_JOIN: case I_JOIN_REQUEST: case I_JOIN_RESULT: break; default: /* Defering local processing of message */ register_fsa_input_later(cause, result, &fsa_input); return; } if (result != I_NULL) { /* add to the front of the queue */ register_fsa_input(cause, result, &fsa_input); } } gboolean relay_message(xmlNode * msg, gboolean originated_locally) { enum crm_ais_msg_types dest = crm_msg_ais; bool is_for_dc = false; bool is_for_dcib = false; bool is_for_te = false; bool is_for_crm = false; bool is_for_cib = false; bool is_local = false; bool broadcast = false; const char *host_to = NULL; const char *sys_to = NULL; const char *sys_from = NULL; const char *type = NULL; const char *task = NULL; const char *ref = NULL; crm_node_t *node_to = NULL; CRM_CHECK(msg != NULL, return TRUE); host_to = crm_element_value(msg, PCMK__XA_CRM_HOST_TO); sys_to = crm_element_value(msg, PCMK__XA_CRM_SYS_TO); sys_from = crm_element_value(msg, PCMK__XA_CRM_SYS_FROM); type = crm_element_value(msg, PCMK__XA_T); task = crm_element_value(msg, PCMK__XA_CRM_TASK); ref = crm_element_value(msg, PCMK_XA_REFERENCE); broadcast = pcmk__str_empty(host_to); if (ref == NULL) { ref = "without reference ID"; } if (pcmk__str_eq(task, CRM_OP_HELLO, pcmk__str_casei)) { crm_trace("Received hello %s from %s (no processing needed)", ref, pcmk__s(sys_from, "unidentified source")); crm_log_xml_trace(msg, "hello"); return TRUE; } // Require message type (set by create_request()) if (!pcmk__str_eq(type, PCMK__VALUE_CRMD, pcmk__str_none)) { crm_warn("Ignoring invalid message %s with type '%s' " "(not '" PCMK__VALUE_CRMD "')", ref, pcmk__s(type, "")); crm_log_xml_trace(msg, "ignored"); return TRUE; } // Require a destination subsystem (also set by create_request()) if (sys_to == NULL) { crm_warn("Ignoring invalid message %s with no " PCMK__XA_CRM_SYS_TO, ref); crm_log_xml_trace(msg, "ignored"); return TRUE; } // Get the message type appropriate to the destination subsystem if (is_corosync_cluster()) { dest = text2msg_type(sys_to); if ((dest < crm_msg_ais) || (dest > crm_msg_stonith_ng)) { /* Unrecognized value, use a sane default * * @TODO Maybe we should bail instead */ dest = crm_msg_crmd; } } is_for_dc = (strcasecmp(CRM_SYSTEM_DC, sys_to) == 0); is_for_dcib = (strcasecmp(CRM_SYSTEM_DCIB, sys_to) == 0); is_for_te = (strcasecmp(CRM_SYSTEM_TENGINE, sys_to) == 0); is_for_cib = (strcasecmp(CRM_SYSTEM_CIB, sys_to) == 0); is_for_crm = (strcasecmp(CRM_SYSTEM_CRMD, sys_to) == 0); // Check whether message should be processed locally is_local = false; if (broadcast) { if (is_for_dc || is_for_te) { is_local = false; } else if (is_for_crm) { if (pcmk__strcase_any_of(task, CRM_OP_NODE_INFO, PCMK__CONTROLD_CMD_NODES, NULL)) { /* Node info requests do not specify a host, which is normally * treated as "all hosts", because the whole point is that the * client may not know the local node name. Always handle these * requests locally. */ is_local = true; } else { is_local = !originated_locally; } } else { is_local = true; } } else if (pcmk__str_eq(controld_globals.our_nodename, host_to, pcmk__str_casei)) { is_local = true; } else if (is_for_crm && pcmk__str_eq(task, CRM_OP_LRM_DELETE, pcmk__str_casei)) { xmlNode *wrapper = pcmk__xe_first_child(msg, PCMK__XE_CRM_XML, NULL, NULL); xmlNode *msg_data = pcmk__xe_first_child(wrapper, NULL, NULL, NULL); const char *mode = crm_element_value(msg_data, PCMK__XA_MODE); if (pcmk__str_eq(mode, PCMK__VALUE_CIB, pcmk__str_none)) { // Local delete of an offline node's resource history is_local = true; } } // Check whether message should be relayed if (is_for_dc || is_for_dcib || is_for_te) { if (AM_I_DC) { if (is_for_te) { crm_trace("Route message %s locally as transition request", ref); crm_log_xml_trace(msg, sys_to); send_msg_via_ipc(msg, sys_to); return TRUE; // No further processing of message is needed } crm_trace("Route message %s locally as DC request", ref); return FALSE; // More to be done by caller } if (originated_locally && !pcmk__strcase_any_of(sys_from, CRM_SYSTEM_PENGINE, CRM_SYSTEM_TENGINE, NULL)) { crm_trace("Relay message %s to DC (via %s)", ref, pcmk__s(host_to, "broadcast")); crm_log_xml_trace(msg, "relayed"); if (!broadcast) { node_to = pcmk__get_node(0, host_to, NULL, pcmk__node_search_cluster); } send_cluster_message(node_to, dest, msg, TRUE); return TRUE; } /* Transition engine and scheduler messages are sent only to the DC on * the same node. If we are no longer the DC, discard this message. */ crm_trace("Ignoring message %s because we are no longer DC", ref); crm_log_xml_trace(msg, "ignored"); return TRUE; // No further processing of message is needed } if (is_local) { if (is_for_crm || is_for_cib) { crm_trace("Route message %s locally as controller request", ref); return FALSE; // More to be done by caller } crm_trace("Relay message %s locally to %s", ref, sys_to); crm_log_xml_trace(msg, "IPC-relay"); send_msg_via_ipc(msg, sys_to); return TRUE; } if (!broadcast) { node_to = pcmk__search_node_caches(0, host_to, pcmk__node_search_cluster); if (node_to == NULL) { crm_warn("Ignoring message %s because node %s is unknown", ref, host_to); crm_log_xml_trace(msg, "ignored"); return TRUE; } } crm_trace("Relay message %s to %s", ref, pcmk__s(host_to, "all peers")); crm_log_xml_trace(msg, "relayed"); send_cluster_message(node_to, dest, msg, TRUE); return TRUE; } // Return true if field contains a positive integer static bool authorize_version(xmlNode *message_data, const char *field, const char *client_name, const char *ref, const char *uuid) { const char *version = crm_element_value(message_data, field); long long version_num; if ((pcmk__scan_ll(version, &version_num, -1LL) != pcmk_rc_ok) || (version_num < 0LL)) { crm_warn("Rejected IPC hello from %s: '%s' is not a valid protocol %s " CRM_XS " ref=%s uuid=%s", client_name, ((version == NULL)? "" : version), field, (ref? ref : "none"), uuid); return false; } return true; } /*! * \internal * \brief Check whether a client IPC message is acceptable * * If a given client IPC message is a hello, "authorize" it by ensuring it has * valid information such as a protocol version, and return false indicating * that nothing further needs to be done with the message. If the message is not * a hello, just return true to indicate it needs further processing. * * \param[in] client_msg XML of IPC message * \param[in,out] curr_client If IPC is not proxied, client that sent message * \param[in] proxy_session If IPC is proxied, the session ID * * \return true if message needs further processing, false if it doesn't */ bool controld_authorize_ipc_message(const xmlNode *client_msg, pcmk__client_t *curr_client, const char *proxy_session) { xmlNode *wrapper = NULL; xmlNode *message_data = NULL; const char *client_name = NULL; const char *op = crm_element_value(client_msg, PCMK__XA_CRM_TASK); const char *ref = crm_element_value(client_msg, PCMK_XA_REFERENCE); const char *uuid = (curr_client? curr_client->id : proxy_session); if (uuid == NULL) { crm_warn("IPC message from client rejected: No client identifier " CRM_XS " ref=%s", (ref? ref : "none")); goto rejected; } if (!pcmk__str_eq(CRM_OP_HELLO, op, pcmk__str_casei)) { // Only hello messages need to be authorized return true; } wrapper = pcmk__xe_first_child(client_msg, PCMK__XE_CRM_XML, NULL, NULL); message_data = pcmk__xe_first_child(wrapper, NULL, NULL, NULL); client_name = crm_element_value(message_data, PCMK__XA_CLIENT_NAME); if (pcmk__str_empty(client_name)) { crm_warn("IPC hello from client rejected: No client name", CRM_XS " ref=%s uuid=%s", (ref? ref : "none"), uuid); goto rejected; } if (!authorize_version(message_data, PCMK__XA_MAJOR_VERSION, client_name, ref, uuid)) { goto rejected; } if (!authorize_version(message_data, PCMK__XA_MINOR_VERSION, client_name, ref, uuid)) { goto rejected; } crm_trace("Validated IPC hello from client %s", client_name); crm_log_xml_trace(client_msg, "hello"); if (curr_client) { curr_client->userdata = pcmk__str_copy(client_name); } controld_trigger_fsa(); return false; rejected: crm_log_xml_trace(client_msg, "rejected"); if (curr_client) { qb_ipcs_disconnect(curr_client->ipcs); } return false; } static enum crmd_fsa_input handle_message(xmlNode *msg, enum crmd_fsa_cause cause) { const char *type = NULL; CRM_CHECK(msg != NULL, return I_NULL); type = crm_element_value(msg, PCMK__XA_SUBT); if (pcmk__str_eq(type, PCMK__VALUE_REQUEST, pcmk__str_none)) { return handle_request(msg, cause); } if (pcmk__str_eq(type, PCMK__VALUE_RESPONSE, pcmk__str_none)) { handle_response(msg); return I_NULL; } crm_warn("Ignoring message with unknown " PCMK__XA_SUBT" '%s'", pcmk__s(type, "")); crm_log_xml_trace(msg, "bad"); return I_NULL; } static enum crmd_fsa_input handle_failcount_op(xmlNode * stored_msg) { const char *rsc = NULL; const char *uname = NULL; const char *op = NULL; char *interval_spec = NULL; guint interval_ms = 0; gboolean is_remote_node = FALSE; xmlNode *wrapper = pcmk__xe_first_child(stored_msg, PCMK__XE_CRM_XML, NULL, NULL); xmlNode *xml_op = pcmk__xe_first_child(wrapper, NULL, NULL, NULL); if (xml_op) { xmlNode *xml_rsc = pcmk__xe_first_child(xml_op, PCMK_XE_PRIMITIVE, NULL, NULL); xmlNode *xml_attrs = pcmk__xe_first_child(xml_op, PCMK__XE_ATTRIBUTES, NULL, NULL); if (xml_rsc) { rsc = pcmk__xe_id(xml_rsc); } if (xml_attrs) { op = crm_element_value(xml_attrs, CRM_META "_" PCMK__META_CLEAR_FAILURE_OP); crm_element_value_ms(xml_attrs, CRM_META "_" PCMK__META_CLEAR_FAILURE_INTERVAL, &interval_ms); } } uname = crm_element_value(xml_op, PCMK__META_ON_NODE); if ((rsc == NULL) || (uname == NULL)) { crm_log_xml_warn(stored_msg, "invalid failcount op"); return I_NULL; } if (crm_element_value(xml_op, PCMK__XA_ROUTER_NODE)) { is_remote_node = TRUE; } crm_debug("Clearing failures for %s-interval %s on %s " "from attribute manager, CIB, and executor state", pcmk__readable_interval(interval_ms), rsc, uname); if (interval_ms) { interval_spec = crm_strdup_printf("%ums", interval_ms); } update_attrd_clear_failures(uname, rsc, op, interval_spec, is_remote_node); free(interval_spec); controld_cib_delete_last_failure(rsc, uname, op, interval_ms); lrm_clear_last_failure(rsc, uname, op, interval_ms); return I_NULL; } static enum crmd_fsa_input handle_lrm_delete(xmlNode *stored_msg) { const char *mode = NULL; xmlNode *wrapper = pcmk__xe_first_child(stored_msg, PCMK__XE_CRM_XML, NULL, NULL); xmlNode *msg_data = pcmk__xe_first_child(wrapper, NULL, NULL, NULL); CRM_CHECK(msg_data != NULL, return I_NULL); /* CRM_OP_LRM_DELETE has two distinct modes. The default behavior is to * relay the operation to the affected node, which will unregister the * resource from the local executor, clear the resource's history from the * CIB, and do some bookkeeping in the controller. * * However, if the affected node is offline, the client will specify * mode=PCMK__VALUE_CIB which means the controller receiving the operation * should clear the resource's history from the CIB and nothing else. This * is used to clear shutdown locks. */ mode = crm_element_value(msg_data, PCMK__XA_MODE); if (!pcmk__str_eq(mode, PCMK__VALUE_CIB, pcmk__str_none)) { // Relay to affected node crm_xml_add(stored_msg, PCMK__XA_CRM_SYS_TO, CRM_SYSTEM_LRMD); return I_ROUTER; } else { // Delete CIB history locally (compare with do_lrm_delete()) const char *from_sys = NULL; const char *user_name = NULL; const char *rsc_id = NULL; const char *node = NULL; xmlNode *rsc_xml = NULL; int rc = pcmk_rc_ok; rsc_xml = pcmk__xe_first_child(msg_data, PCMK_XE_PRIMITIVE, NULL, NULL); CRM_CHECK(rsc_xml != NULL, return I_NULL); rsc_id = pcmk__xe_id(rsc_xml); from_sys = crm_element_value(stored_msg, PCMK__XA_CRM_SYS_FROM); node = crm_element_value(msg_data, PCMK__META_ON_NODE); user_name = pcmk__update_acl_user(stored_msg, PCMK__XA_CRM_USER, NULL); crm_debug("Handling " CRM_OP_LRM_DELETE " for %s on %s locally%s%s " "(clearing CIB resource history only)", rsc_id, node, (user_name? " for user " : ""), (user_name? user_name : "")); rc = controld_delete_resource_history(rsc_id, node, user_name, cib_dryrun|cib_sync_call); if (rc == pcmk_rc_ok) { rc = controld_delete_resource_history(rsc_id, node, user_name, crmd_cib_smart_opt()); } /* Notify client. Also notify tengine if mode=PCMK__VALUE_CIB and * op=CRM_OP_LRM_DELETE. */ if (from_sys) { lrmd_event_data_t *op = NULL; const char *from_host = crm_element_value(stored_msg, PCMK__XA_SRC); const char *transition; if (strcmp(from_sys, CRM_SYSTEM_TENGINE)) { transition = crm_element_value(msg_data, PCMK__XA_TRANSITION_KEY); } else { transition = crm_element_value(stored_msg, PCMK__XA_TRANSITION_KEY); } crm_info("Notifying %s on %s that %s was%s deleted", from_sys, (from_host? from_host : "local node"), rsc_id, ((rc == pcmk_rc_ok)? "" : " not")); op = lrmd_new_event(rsc_id, PCMK_ACTION_DELETE, 0); op->type = lrmd_event_exec_complete; op->user_data = pcmk__str_copy(pcmk__s(transition, FAKE_TE_ID)); op->params = pcmk__strkey_table(free, free); pcmk__insert_dup(op->params, PCMK_XA_CRM_FEATURE_SET, CRM_FEATURE_SET); controld_rc2event(op, rc); controld_ack_event_directly(from_host, from_sys, NULL, op, rsc_id); lrmd_free_event(op); controld_trigger_delete_refresh(from_sys, rsc_id); } return I_NULL; } } /*! * \brief Handle a CRM_OP_REMOTE_STATE message by updating remote peer cache * * \param[in] msg Message XML * * \return Next FSA input */ static enum crmd_fsa_input handle_remote_state(const xmlNode *msg) { const char *conn_host = NULL; const char *remote_uname = pcmk__xe_id(msg); crm_node_t *remote_peer; bool remote_is_up = false; int rc = pcmk_rc_ok; rc = pcmk__xe_get_bool_attr(msg, PCMK__XA_IN_CCM, &remote_is_up); CRM_CHECK(remote_uname && rc == pcmk_rc_ok, return I_NULL); - remote_peer = crm_remote_peer_get(remote_uname); + remote_peer = pcmk__cluster_lookup_remote_node(remote_uname); CRM_CHECK(remote_peer, return I_NULL); pcmk__update_peer_state(__func__, remote_peer, remote_is_up ? CRM_NODE_MEMBER : CRM_NODE_LOST, 0); conn_host = crm_element_value(msg, PCMK__XA_CONNECTION_HOST); if (conn_host) { pcmk__str_update(&remote_peer->conn_host, conn_host); } else if (remote_peer->conn_host) { free(remote_peer->conn_host); remote_peer->conn_host = NULL; } return I_NULL; } /*! * \brief Handle a CRM_OP_PING message * * \param[in] msg Message XML * * \return Next FSA input */ static enum crmd_fsa_input handle_ping(const xmlNode *msg) { const char *value = NULL; xmlNode *ping = NULL; xmlNode *reply = NULL; // Build reply ping = pcmk__xe_create(NULL, PCMK__XE_PING_RESPONSE); value = crm_element_value(msg, PCMK__XA_CRM_SYS_TO); crm_xml_add(ping, PCMK__XA_CRM_SUBSYSTEM, value); // Add controller state value = fsa_state2string(controld_globals.fsa_state); crm_xml_add(ping, PCMK__XA_CRMD_STATE, value); crm_notice("Current ping state: %s", value); // CTS needs this // Add controller health // @TODO maybe do some checks to determine meaningful status crm_xml_add(ping, PCMK_XA_RESULT, "ok"); // Send reply reply = create_reply(msg, ping); free_xml(ping); if (reply != NULL) { (void) relay_message(reply, TRUE); free_xml(reply); } // Nothing further to do return I_NULL; } /*! * \brief Handle a PCMK__CONTROLD_CMD_NODES message * * \param[in] request Message XML * * \return Next FSA input */ static enum crmd_fsa_input handle_node_list(const xmlNode *request) { GHashTableIter iter; crm_node_t *node = NULL; xmlNode *reply = NULL; xmlNode *reply_data = NULL; // Create message data for reply reply_data = pcmk__xe_create(NULL, PCMK_XE_NODES); g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & node)) { xmlNode *xml = pcmk__xe_create(reply_data, PCMK_XE_NODE); crm_xml_add_ll(xml, PCMK_XA_ID, (long long) node->id); // uint32_t crm_xml_add(xml, PCMK_XA_UNAME, node->uname); crm_xml_add(xml, PCMK__XA_IN_CCM, node->state); } // Create and send reply reply = create_reply(request, reply_data); free_xml(reply_data); if (reply) { (void) relay_message(reply, TRUE); free_xml(reply); } // Nothing further to do return I_NULL; } /*! * \brief Handle a CRM_OP_NODE_INFO request * * \param[in] msg Message XML * * \return Next FSA input */ static enum crmd_fsa_input handle_node_info_request(const xmlNode *msg) { const char *value = NULL; crm_node_t *node = NULL; int node_id = 0; xmlNode *reply = NULL; xmlNode *reply_data = NULL; // Build reply reply_data = pcmk__xe_create(NULL, PCMK_XE_NODE); crm_xml_add(reply_data, PCMK__XA_CRM_SUBSYSTEM, CRM_SYSTEM_CRMD); // Add whether current partition has quorum pcmk__xe_set_bool_attr(reply_data, PCMK_XA_HAVE_QUORUM, pcmk_is_set(controld_globals.flags, controld_has_quorum)); // Check whether client requested node info by ID and/or name crm_element_value_int(msg, PCMK_XA_ID, &node_id); if (node_id < 0) { node_id = 0; } value = crm_element_value(msg, PCMK_XA_UNAME); // Default to local node if none given if ((node_id == 0) && (value == NULL)) { value = controld_globals.our_nodename; } node = pcmk__search_node_caches(node_id, value, pcmk__node_search_any); if (node) { crm_xml_add(reply_data, PCMK_XA_ID, node->uuid); crm_xml_add(reply_data, PCMK_XA_UNAME, node->uname); crm_xml_add(reply_data, PCMK_XA_CRMD, node->state); pcmk__xe_set_bool_attr(reply_data, PCMK_XA_REMOTE_NODE, pcmk_is_set(node->flags, crm_remote_node)); } // Send reply reply = create_reply(msg, reply_data); free_xml(reply_data); if (reply != NULL) { (void) relay_message(reply, TRUE); free_xml(reply); } // Nothing further to do return I_NULL; } static void verify_feature_set(xmlNode *msg) { const char *dc_version = crm_element_value(msg, PCMK_XA_CRM_FEATURE_SET); if (dc_version == NULL) { /* All we really know is that the DC feature set is older than 3.1.0, * but that's also all that really matters. */ dc_version = "3.0.14"; } if (feature_set_compatible(dc_version, CRM_FEATURE_SET)) { crm_trace("Local feature set (%s) is compatible with DC's (%s)", CRM_FEATURE_SET, dc_version); } else { crm_err("Local feature set (%s) is incompatible with DC's (%s)", CRM_FEATURE_SET, dc_version); // Nothing is likely to improve without administrator involvement controld_set_fsa_input_flags(R_STAYDOWN); crmd_exit(CRM_EX_FATAL); } } // DC gets own shutdown all-clear static enum crmd_fsa_input handle_shutdown_self_ack(xmlNode *stored_msg) { const char *host_from = crm_element_value(stored_msg, PCMK__XA_SRC); if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { // The expected case -- we initiated own shutdown sequence crm_info("Shutting down controller"); return I_STOP; } if (pcmk__str_eq(host_from, controld_globals.dc_name, pcmk__str_casei)) { // Must be logic error -- DC confirming its own unrequested shutdown crm_err("Shutting down controller immediately due to " "unexpected shutdown confirmation"); return I_TERMINATE; } if (controld_globals.fsa_state != S_STOPPING) { // Shouldn't happen -- non-DC confirming unrequested shutdown crm_err("Starting new DC election because %s is " "confirming shutdown we did not request", (host_from? host_from : "another node")); return I_ELECTION; } // Shouldn't happen, but we are already stopping anyway crm_debug("Ignoring unexpected shutdown confirmation from %s", (host_from? host_from : "another node")); return I_NULL; } // Non-DC gets shutdown all-clear from DC static enum crmd_fsa_input handle_shutdown_ack(xmlNode *stored_msg) { const char *host_from = crm_element_value(stored_msg, PCMK__XA_SRC); if (host_from == NULL) { crm_warn("Ignoring shutdown request without origin specified"); return I_NULL; } if (pcmk__str_eq(host_from, controld_globals.dc_name, pcmk__str_null_matches|pcmk__str_casei)) { if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { crm_info("Shutting down controller after confirmation from %s", host_from); } else { crm_err("Shutting down controller after unexpected " "shutdown request from %s", host_from); controld_set_fsa_input_flags(R_STAYDOWN); } return I_STOP; } crm_warn("Ignoring shutdown request from %s because DC is %s", host_from, controld_globals.dc_name); return I_NULL; } static enum crmd_fsa_input handle_request(xmlNode *stored_msg, enum crmd_fsa_cause cause) { xmlNode *msg = NULL; const char *op = crm_element_value(stored_msg, PCMK__XA_CRM_TASK); /* Optimize this for the DC - it has the most to do */ crm_log_xml_trace(stored_msg, "request"); if (op == NULL) { crm_warn("Ignoring request without " PCMK__XA_CRM_TASK); return I_NULL; } if (strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0) { const char *from = crm_element_value(stored_msg, PCMK__XA_SRC); crm_node_t *node = pcmk__search_node_caches(0, from, pcmk__node_search_cluster); pcmk__update_peer_expected(__func__, node, CRMD_JOINSTATE_DOWN); if(AM_I_DC == FALSE) { return I_NULL; /* Done */ } } /*========== DC-Only Actions ==========*/ if (AM_I_DC) { if (strcmp(op, CRM_OP_JOIN_ANNOUNCE) == 0) { return I_NODE_JOIN; } else if (strcmp(op, CRM_OP_JOIN_REQUEST) == 0) { return I_JOIN_REQUEST; } else if (strcmp(op, CRM_OP_JOIN_CONFIRM) == 0) { return I_JOIN_RESULT; } else if (strcmp(op, CRM_OP_SHUTDOWN) == 0) { return handle_shutdown_self_ack(stored_msg); } else if (strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0) { // Another controller wants to shut down its node return handle_shutdown_request(stored_msg); } } /*========== common actions ==========*/ if (strcmp(op, CRM_OP_NOVOTE) == 0) { ha_msg_input_t fsa_input; fsa_input.msg = stored_msg; register_fsa_input_adv(C_HA_MESSAGE, I_NULL, &fsa_input, A_ELECTION_COUNT | A_ELECTION_CHECK, FALSE, __func__); } else if (strcmp(op, CRM_OP_REMOTE_STATE) == 0) { /* a remote connection host is letting us know the node state */ return handle_remote_state(stored_msg); } else if (strcmp(op, CRM_OP_THROTTLE) == 0) { throttle_update(stored_msg); if (AM_I_DC && (controld_globals.transition_graph != NULL) && !controld_globals.transition_graph->complete) { crm_debug("The throttle changed. Trigger a graph."); trigger_graph(); } return I_NULL; } else if (strcmp(op, CRM_OP_CLEAR_FAILCOUNT) == 0) { return handle_failcount_op(stored_msg); } else if (strcmp(op, CRM_OP_VOTE) == 0) { /* count the vote and decide what to do after that */ ha_msg_input_t fsa_input; fsa_input.msg = stored_msg; register_fsa_input_adv(C_HA_MESSAGE, I_NULL, &fsa_input, A_ELECTION_COUNT | A_ELECTION_CHECK, FALSE, __func__); /* Sometimes we _must_ go into S_ELECTION */ if (controld_globals.fsa_state == S_HALT) { crm_debug("Forcing an election from S_HALT"); return I_ELECTION; } } else if (strcmp(op, CRM_OP_JOIN_OFFER) == 0) { verify_feature_set(stored_msg); crm_debug("Raising I_JOIN_OFFER: join-%s", crm_element_value(stored_msg, PCMK__XA_JOIN_ID)); return I_JOIN_OFFER; } else if (strcmp(op, CRM_OP_JOIN_ACKNAK) == 0) { crm_debug("Raising I_JOIN_RESULT: join-%s", crm_element_value(stored_msg, PCMK__XA_JOIN_ID)); return I_JOIN_RESULT; } else if (strcmp(op, CRM_OP_LRM_DELETE) == 0) { return handle_lrm_delete(stored_msg); } else if ((strcmp(op, CRM_OP_LRM_FAIL) == 0) || (strcmp(op, CRM_OP_LRM_REFRESH) == 0) // @COMPAT || (strcmp(op, CRM_OP_REPROBE) == 0)) { crm_xml_add(stored_msg, PCMK__XA_CRM_SYS_TO, CRM_SYSTEM_LRMD); return I_ROUTER; } else if (strcmp(op, CRM_OP_NOOP) == 0) { return I_NULL; } else if (strcmp(op, CRM_OP_PING) == 0) { return handle_ping(stored_msg); } else if (strcmp(op, CRM_OP_NODE_INFO) == 0) { return handle_node_info_request(stored_msg); } else if (strcmp(op, CRM_OP_RM_NODE_CACHE) == 0) { int id = 0; const char *name = NULL; crm_element_value_int(stored_msg, PCMK_XA_ID, &id); name = crm_element_value(stored_msg, PCMK_XA_UNAME); if(cause == C_IPC_MESSAGE) { msg = create_request(CRM_OP_RM_NODE_CACHE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL); if (send_cluster_message(NULL, crm_msg_crmd, msg, TRUE) == FALSE) { crm_err("Could not instruct peers to remove references to node %s/%u", name, id); } else { crm_notice("Instructing peers to remove references to node %s/%u", name, id); } free_xml(msg); } else { - reap_crm_member(id, name); + pcmk__cluster_forget_cluster_node(id, name); /* If we're forgetting this node, also forget any failures to fence * it, so we don't carry that over to any node added later with the * same name. */ st_fail_count_reset(name); } } else if (strcmp(op, CRM_OP_MAINTENANCE_NODES) == 0) { xmlNode *wrapper = pcmk__xe_first_child(stored_msg, PCMK__XE_CRM_XML, NULL, NULL); xmlNode *xml = pcmk__xe_first_child(wrapper, NULL, NULL, NULL); remote_ra_process_maintenance_nodes(xml); } else if (strcmp(op, PCMK__CONTROLD_CMD_NODES) == 0) { return handle_node_list(stored_msg); /*========== (NOT_DC)-Only Actions ==========*/ } else if (!AM_I_DC) { if (strcmp(op, CRM_OP_SHUTDOWN) == 0) { return handle_shutdown_ack(stored_msg); } } else { crm_err("Unexpected request (%s) sent to %s", op, AM_I_DC ? "the DC" : "non-DC node"); crm_log_xml_err(stored_msg, "Unexpected"); } return I_NULL; } static void handle_response(xmlNode *stored_msg) { const char *op = crm_element_value(stored_msg, PCMK__XA_CRM_TASK); crm_log_xml_trace(stored_msg, "reply"); if (op == NULL) { crm_warn("Ignoring reply without " PCMK__XA_CRM_TASK); } else if (AM_I_DC && strcmp(op, CRM_OP_PECALC) == 0) { // Check whether scheduler answer been superseded by subsequent request const char *msg_ref = crm_element_value(stored_msg, PCMK_XA_REFERENCE); if (msg_ref == NULL) { crm_err("%s - Ignoring calculation with no reference", op); } else if (pcmk__str_eq(msg_ref, controld_globals.fsa_pe_ref, pcmk__str_none)) { ha_msg_input_t fsa_input; controld_stop_sched_timer(); fsa_input.msg = stored_msg; register_fsa_input_later(C_IPC_MESSAGE, I_PE_SUCCESS, &fsa_input); } else { crm_info("%s calculation %s is obsolete", op, msg_ref); } } else if (strcmp(op, CRM_OP_VOTE) == 0 || strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0 || strcmp(op, CRM_OP_SHUTDOWN) == 0) { } else { const char *host_from = crm_element_value(stored_msg, PCMK__XA_SRC); crm_err("Unexpected response (op=%s, src=%s) sent to the %s", op, host_from, AM_I_DC ? "DC" : "controller"); } } static enum crmd_fsa_input handle_shutdown_request(xmlNode * stored_msg) { /* handle here to avoid potential version issues * where the shutdown message/procedure may have * been changed in later versions. * * This way the DC is always in control of the shutdown */ char *now_s = NULL; const char *host_from = crm_element_value(stored_msg, PCMK__XA_SRC); if (host_from == NULL) { /* we're shutting down and the DC */ host_from = controld_globals.our_nodename; } crm_info("Creating shutdown request for %s (state=%s)", host_from, fsa_state2string(controld_globals.fsa_state)); crm_log_xml_trace(stored_msg, "message"); now_s = pcmk__ttoa(time(NULL)); update_attrd(host_from, PCMK__NODE_ATTR_SHUTDOWN, now_s, NULL, FALSE); free(now_s); /* will be picked up by the TE as long as its running */ return I_NULL; } static void send_msg_via_ipc(xmlNode * msg, const char *sys) { pcmk__client_t *client_channel = NULL; CRM_CHECK(sys != NULL, return); client_channel = pcmk__find_client_by_id(sys); if (crm_element_value(msg, PCMK__XA_SRC) == NULL) { crm_xml_add(msg, PCMK__XA_SRC, controld_globals.our_nodename); } if (client_channel != NULL) { /* Transient clients such as crmadmin */ pcmk__ipc_send_xml(client_channel, 0, msg, crm_ipc_server_event); } else if (pcmk__str_eq(sys, CRM_SYSTEM_TENGINE, pcmk__str_none)) { xmlNode *wrapper = pcmk__xe_first_child(msg, PCMK__XE_CRM_XML, NULL, NULL); xmlNode *data = pcmk__xe_first_child(wrapper, NULL, NULL, NULL); process_te_message(msg, data); } else if (pcmk__str_eq(sys, CRM_SYSTEM_LRMD, pcmk__str_none)) { fsa_data_t fsa_data; ha_msg_input_t fsa_input; xmlNode *wrapper = NULL; fsa_input.msg = msg; wrapper = pcmk__xe_first_child(msg, PCMK__XE_CRM_XML, NULL, NULL); fsa_input.xml = pcmk__xe_first_child(wrapper, NULL, NULL, NULL); fsa_data.id = 0; fsa_data.actions = 0; fsa_data.data = &fsa_input; fsa_data.fsa_input = I_MESSAGE; fsa_data.fsa_cause = C_IPC_MESSAGE; fsa_data.origin = __func__; fsa_data.data_type = fsa_dt_ha_msg; do_lrm_invoke(A_LRM_INVOKE, C_IPC_MESSAGE, controld_globals.fsa_state, I_MESSAGE, &fsa_data); } else if (crmd_is_proxy_session(sys)) { crmd_proxy_send(sys, msg); } else { crm_info("Received invalid request: unknown subsystem '%s'", sys); } } void delete_ha_msg_input(ha_msg_input_t * orig) { if (orig == NULL) { return; } free_xml(orig->msg); free(orig); } /*! * \internal * \brief Notify the cluster of a remote node state change * * \param[in] node_name Node's name * \param[in] node_up true if node is up, false if down */ void broadcast_remote_state_message(const char *node_name, bool node_up) { xmlNode *msg = create_request(CRM_OP_REMOTE_STATE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL); crm_info("Notifying cluster of Pacemaker Remote node %s %s", node_name, node_up? "coming up" : "going down"); crm_xml_add(msg, PCMK_XA_ID, node_name); pcmk__xe_set_bool_attr(msg, PCMK__XA_IN_CCM, node_up); if (node_up) { crm_xml_add(msg, PCMK__XA_CONNECTION_HOST, controld_globals.our_nodename); } send_cluster_message(NULL, crm_msg_crmd, msg, TRUE); free_xml(msg); } diff --git a/daemons/controld/controld_remote_ra.c b/daemons/controld/controld_remote_ra.c index 6e25103a9d..83b5d2bb1c 100644 --- a/daemons/controld/controld_remote_ra.c +++ b/daemons/controld/controld_remote_ra.c @@ -1,1471 +1,1471 @@ /* * Copyright 2013-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #define REMOTE_LRMD_RA "remote" /* The max start timeout before cmd retry */ #define MAX_START_TIMEOUT_MS 10000 #define cmd_set_flags(cmd, flags_to_set) do { \ (cmd)->status = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \ "Remote command", (cmd)->rsc_id, (cmd)->status, \ (flags_to_set), #flags_to_set); \ } while (0) #define cmd_clear_flags(cmd, flags_to_clear) do { \ (cmd)->status = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, \ "Remote command", (cmd)->rsc_id, (cmd)->status, \ (flags_to_clear), #flags_to_clear); \ } while (0) enum remote_cmd_status { cmd_reported_success = (1 << 0), cmd_cancel = (1 << 1), }; typedef struct remote_ra_cmd_s { /*! the local node the cmd is issued from */ char *owner; /*! the remote node the cmd is executed on */ char *rsc_id; /*! the action to execute */ char *action; /*! some string the client wants us to give it back */ char *userdata; /*! start delay in ms */ int start_delay; /*! timer id used for start delay. */ int delay_id; /*! timeout in ms for cmd */ int timeout; int remaining_timeout; /*! recurring interval in ms */ guint interval_ms; /*! interval timer id */ int interval_id; int monitor_timeout_id; int takeover_timeout_id; /*! action parameters */ lrmd_key_value_t *params; pcmk__action_result_t result; int call_id; time_t start_time; uint32_t status; } remote_ra_cmd_t; #define lrm_remote_set_flags(lrm_state, flags_to_set) do { \ lrm_state_t *lrm = (lrm_state); \ remote_ra_data_t *ra = lrm->remote_ra_data; \ ra->status = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, "Remote", \ lrm->node_name, ra->status, \ (flags_to_set), #flags_to_set); \ } while (0) #define lrm_remote_clear_flags(lrm_state, flags_to_clear) do { \ lrm_state_t *lrm = (lrm_state); \ remote_ra_data_t *ra = lrm->remote_ra_data; \ ra->status = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, "Remote", \ lrm->node_name, ra->status, \ (flags_to_clear), #flags_to_clear); \ } while (0) enum remote_status { expect_takeover = (1 << 0), takeover_complete = (1 << 1), remote_active = (1 << 2), /* Maintenance mode is difficult to determine from the controller's context, * so we have it signalled back with the transition from the scheduler. */ remote_in_maint = (1 << 3), /* Similar for whether we are controlling a guest node or remote node. * Fortunately there is a meta-attribute in the transition already and * as the situation doesn't change over time we can use the * resource start for noting down the information for later use when * the attributes aren't at hand. */ controlling_guest = (1 << 4), }; typedef struct remote_ra_data_s { crm_trigger_t *work; remote_ra_cmd_t *cur_cmd; GList *cmds; GList *recurring_cmds; uint32_t status; } remote_ra_data_t; static int handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms); static void handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd); static GList *fail_all_monitor_cmds(GList * list); static void free_cmd(gpointer user_data) { remote_ra_cmd_t *cmd = user_data; if (!cmd) { return; } if (cmd->delay_id) { g_source_remove(cmd->delay_id); } if (cmd->interval_id) { g_source_remove(cmd->interval_id); } if (cmd->monitor_timeout_id) { g_source_remove(cmd->monitor_timeout_id); } if (cmd->takeover_timeout_id) { g_source_remove(cmd->takeover_timeout_id); } free(cmd->owner); free(cmd->rsc_id); free(cmd->action); free(cmd->userdata); pcmk__reset_result(&(cmd->result)); lrmd_key_value_freeall(cmd->params); free(cmd); } static int generate_callid(void) { static int remote_ra_callid = 0; remote_ra_callid++; if (remote_ra_callid <= 0) { remote_ra_callid = 1; } return remote_ra_callid; } static gboolean recurring_helper(gpointer data) { remote_ra_cmd_t *cmd = data; lrm_state_t *connection_rsc = NULL; cmd->interval_id = 0; connection_rsc = lrm_state_find(cmd->rsc_id); if (connection_rsc && connection_rsc->remote_ra_data) { remote_ra_data_t *ra_data = connection_rsc->remote_ra_data; ra_data->recurring_cmds = g_list_remove(ra_data->recurring_cmds, cmd); ra_data->cmds = g_list_append(ra_data->cmds, cmd); mainloop_set_trigger(ra_data->work); } return FALSE; } static gboolean start_delay_helper(gpointer data) { remote_ra_cmd_t *cmd = data; lrm_state_t *connection_rsc = NULL; cmd->delay_id = 0; connection_rsc = lrm_state_find(cmd->rsc_id); if (connection_rsc && connection_rsc->remote_ra_data) { remote_ra_data_t *ra_data = connection_rsc->remote_ra_data; mainloop_set_trigger(ra_data->work); } return FALSE; } static bool should_purge_attributes(crm_node_t *node) { bool purge = true; crm_node_t *conn_node = NULL; lrm_state_t *connection_rsc = NULL; if (!node->conn_host) { return purge; } /* Get the node that was hosting the remote connection resource from the * peer cache. That's the one we really care about here. */ conn_node = pcmk__get_node(0, node->conn_host, NULL, pcmk__node_search_cluster); if (conn_node == NULL) { return purge; } /* Check the uptime of connection_rsc. If it hasn't been running long * enough, set purge=true. "Long enough" means it started running earlier * than the timestamp when we noticed it went away in the first place. */ connection_rsc = lrm_state_find(node->uname); if (connection_rsc != NULL) { lrmd_t *lrm = connection_rsc->conn; time_t uptime = lrmd__uptime(lrm); time_t now = time(NULL); /* Add 20s of fuzziness to give corosync a while to notice the remote * host is gone. On various error conditions (failure to get uptime, * peer_lost isn't set) we default to purging. */ if (uptime > 0 && conn_node->peer_lost > 0 && uptime + 20 >= now - conn_node->peer_lost) { purge = false; } } return purge; } static enum controld_section_e section_to_delete(bool purge) { if (pcmk_is_set(controld_globals.flags, controld_shutdown_lock_enabled)) { if (purge) { return controld_section_all_unlocked; } else { return controld_section_lrm_unlocked; } } else { if (purge) { return controld_section_all; } else { return controld_section_lrm; } } } static void purge_remote_node_attrs(int call_opt, crm_node_t *node) { bool purge = should_purge_attributes(node); enum controld_section_e section = section_to_delete(purge); /* Purge node from attrd's memory */ if (purge) { update_attrd_remote_node_removed(node->uname, NULL); } controld_delete_node_state(node->uname, section, call_opt); } /*! * \internal * \brief Handle cluster communication related to pacemaker_remote node joining * * \param[in] node_name Name of newly integrated pacemaker_remote node */ static void remote_node_up(const char *node_name) { int call_opt; xmlNode *update, *state; crm_node_t *node; lrm_state_t *connection_rsc = NULL; CRM_CHECK(node_name != NULL, return); crm_info("Announcing Pacemaker Remote node %s", node_name); call_opt = crmd_cib_smart_opt(); /* Delete node's probe_complete attribute. This serves two purposes: * * - @COMPAT DCs < 1.1.14 in a rolling upgrade might use it * - deleting it (or any attribute for that matter) here ensures the * attribute manager learns the node is remote */ update_attrd(node_name, CRM_OP_PROBED, NULL, NULL, TRUE); /* Ensure node is in the remote peer cache with member status */ - node = crm_remote_peer_get(node_name); + node = pcmk__cluster_lookup_remote_node(node_name); CRM_CHECK(node != NULL, return); purge_remote_node_attrs(call_opt, node); pcmk__update_peer_state(__func__, node, CRM_NODE_MEMBER, 0); /* Apply any start state that we were given from the environment on the * remote node. */ connection_rsc = lrm_state_find(node->uname); if (connection_rsc != NULL) { lrmd_t *lrm = connection_rsc->conn; const char *start_state = lrmd__node_start_state(lrm); if (start_state) { set_join_state(start_state, node->uname, node->uuid, true); } } /* pacemaker_remote nodes don't participate in the membership layer, * so cluster nodes don't automatically get notified when they come and go. * We send a cluster message to the DC, and update the CIB node state entry, * so the DC will get it sooner (via message) or later (via CIB refresh), * and any other interested parties can query the CIB. */ broadcast_remote_state_message(node_name, true); update = pcmk__xe_create(NULL, PCMK_XE_STATUS); state = create_node_state_update(node, node_update_cluster, update, __func__); /* Clear the PCMK__XA_NODE_FENCED flag in the node state. If the node ever * needs to be fenced, this flag will allow various actions to determine * whether the fencing has happened yet. */ crm_xml_add(state, PCMK__XA_NODE_FENCED, "0"); /* TODO: If the remote connection drops, and this (async) CIB update either * failed or has not yet completed, later actions could mistakenly think the * node has already been fenced (if the PCMK__XA_NODE_FENCED attribute was * previously set, because it won't have been cleared). This could prevent * actual fencing or allow recurring monitor failures to be cleared too * soon. Ideally, we wouldn't rely on the CIB for the fenced status. */ controld_update_cib(PCMK_XE_STATUS, update, call_opt, NULL); free_xml(update); } enum down_opts { DOWN_KEEP_LRM, DOWN_ERASE_LRM }; /*! * \internal * \brief Handle cluster communication related to pacemaker_remote node leaving * * \param[in] node_name Name of lost node * \param[in] opts Whether to keep or erase LRM history */ static void remote_node_down(const char *node_name, const enum down_opts opts) { xmlNode *update; int call_opt = crmd_cib_smart_opt(); crm_node_t *node; /* Purge node from attrd's memory */ update_attrd_remote_node_removed(node_name, NULL); /* Normally, only node attributes should be erased, and the resource history * should be kept until the node comes back up. However, after a successful * fence, we want to clear the history as well, so we don't think resources * are still running on the node. */ if (opts == DOWN_ERASE_LRM) { controld_delete_node_state(node_name, controld_section_all, call_opt); } else { controld_delete_node_state(node_name, controld_section_attrs, call_opt); } /* Ensure node is in the remote peer cache with lost state */ - node = crm_remote_peer_get(node_name); + node = pcmk__cluster_lookup_remote_node(node_name); CRM_CHECK(node != NULL, return); pcmk__update_peer_state(__func__, node, CRM_NODE_LOST, 0); /* Notify DC */ broadcast_remote_state_message(node_name, false); /* Update CIB node state */ update = pcmk__xe_create(NULL, PCMK_XE_STATUS); create_node_state_update(node, node_update_cluster, update, __func__); controld_update_cib(PCMK_XE_STATUS, update, call_opt, NULL); free_xml(update); } /*! * \internal * \brief Handle effects of a remote RA command on node state * * \param[in] cmd Completed remote RA command */ static void check_remote_node_state(const remote_ra_cmd_t *cmd) { /* Only successful actions can change node state */ if (!pcmk__result_ok(&(cmd->result))) { return; } if (pcmk__str_eq(cmd->action, PCMK_ACTION_START, pcmk__str_casei)) { remote_node_up(cmd->rsc_id); } else if (pcmk__str_eq(cmd->action, PCMK_ACTION_MIGRATE_FROM, pcmk__str_casei)) { /* After a successful migration, we don't need to do remote_node_up() * because the DC already knows the node is up, and we don't want to * clear LRM history etc. We do need to add the remote node to this * host's remote peer cache, because (unless it happens to be DC) * it hasn't been tracking the remote node, and other code relies on * the cache to distinguish remote nodes from unseen cluster nodes. */ - crm_node_t *node = crm_remote_peer_get(cmd->rsc_id); + crm_node_t *node = pcmk__cluster_lookup_remote_node(cmd->rsc_id); CRM_CHECK(node != NULL, return); pcmk__update_peer_state(__func__, node, CRM_NODE_MEMBER, 0); } else if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP, pcmk__str_casei)) { lrm_state_t *lrm_state = lrm_state_find(cmd->rsc_id); remote_ra_data_t *ra_data = lrm_state? lrm_state->remote_ra_data : NULL; if (ra_data) { if (!pcmk_is_set(ra_data->status, takeover_complete)) { /* Stop means down if we didn't successfully migrate elsewhere */ remote_node_down(cmd->rsc_id, DOWN_KEEP_LRM); } else if (AM_I_DC == FALSE) { /* Only the connection host and DC track node state, * so if the connection migrated elsewhere and we aren't DC, * un-cache the node, so we don't have stale info */ - crm_remote_peer_cache_remove(cmd->rsc_id); + pcmk__cluster_forget_remote_node(cmd->rsc_id); } } } /* We don't do anything for successful monitors, which is correct for * routine recurring monitors, and for monitors on nodes where the * connection isn't supposed to be (the cluster will stop the connection in * that case). However, if the initial probe finds the connection already * active on the node where we want it, we probably should do * remote_node_up(). Unfortunately, we can't distinguish that case here. * Given that connections have to be initiated by the cluster, the chance of * that should be close to zero. */ } static void report_remote_ra_result(remote_ra_cmd_t * cmd) { lrmd_event_data_t op = { 0, }; check_remote_node_state(cmd); op.type = lrmd_event_exec_complete; op.rsc_id = cmd->rsc_id; op.op_type = cmd->action; op.user_data = cmd->userdata; op.timeout = cmd->timeout; op.interval_ms = cmd->interval_ms; op.t_run = (unsigned int) cmd->start_time; op.t_rcchange = (unsigned int) cmd->start_time; lrmd__set_result(&op, cmd->result.exit_status, cmd->result.execution_status, cmd->result.exit_reason); if (pcmk_is_set(cmd->status, cmd_reported_success) && !pcmk__result_ok(&(cmd->result))) { op.t_rcchange = (unsigned int) time(NULL); /* This edge case will likely never ever occur, but if it does the * result is that a failure will not be processed correctly. This is only * remotely possible because we are able to detect a connection resource's tcp * connection has failed at any moment after start has completed. The actual * recurring operation is just a connectivity ping. * * basically, we are not guaranteed that the first successful monitor op and * a subsequent failed monitor op will not occur in the same timestamp. We have to * make it look like the operations occurred at separate times though. */ if (op.t_rcchange == op.t_run) { op.t_rcchange++; } } if (cmd->params) { lrmd_key_value_t *tmp; op.params = pcmk__strkey_table(free, free); for (tmp = cmd->params; tmp; tmp = tmp->next) { pcmk__insert_dup(op.params, tmp->key, tmp->value); } } op.call_id = cmd->call_id; op.remote_nodename = cmd->owner; lrm_op_callback(&op); if (op.params) { g_hash_table_destroy(op.params); } lrmd__reset_result(&op); } static void update_remaining_timeout(remote_ra_cmd_t * cmd) { cmd->remaining_timeout = ((cmd->timeout / 1000) - (time(NULL) - cmd->start_time)) * 1000; } static gboolean retry_start_cmd_cb(gpointer data) { lrm_state_t *lrm_state = data; remote_ra_data_t *ra_data = lrm_state->remote_ra_data; remote_ra_cmd_t *cmd = NULL; int rc = ETIME; if (!ra_data || !ra_data->cur_cmd) { return FALSE; } cmd = ra_data->cur_cmd; if (!pcmk__strcase_any_of(cmd->action, PCMK_ACTION_START, PCMK_ACTION_MIGRATE_FROM, NULL)) { return FALSE; } update_remaining_timeout(cmd); if (cmd->remaining_timeout > 0) { rc = handle_remote_ra_start(lrm_state, cmd, cmd->remaining_timeout); } else { pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_TIMEOUT, "Not enough time remains to retry remote connection"); } if (rc != pcmk_rc_ok) { report_remote_ra_result(cmd); if (ra_data->cmds) { mainloop_set_trigger(ra_data->work); } ra_data->cur_cmd = NULL; free_cmd(cmd); } else { /* wait for connection event */ } return FALSE; } static gboolean connection_takeover_timeout_cb(gpointer data) { lrm_state_t *lrm_state = NULL; remote_ra_cmd_t *cmd = data; crm_info("takeover event timed out for node %s", cmd->rsc_id); cmd->takeover_timeout_id = 0; lrm_state = lrm_state_find(cmd->rsc_id); handle_remote_ra_stop(lrm_state, cmd); free_cmd(cmd); return FALSE; } static gboolean monitor_timeout_cb(gpointer data) { lrm_state_t *lrm_state = NULL; remote_ra_cmd_t *cmd = data; lrm_state = lrm_state_find(cmd->rsc_id); crm_info("Timed out waiting for remote poke response from %s%s", cmd->rsc_id, (lrm_state? "" : " (no LRM state)")); cmd->monitor_timeout_id = 0; pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_TIMEOUT, "Remote executor did not respond"); if (lrm_state && lrm_state->remote_ra_data) { remote_ra_data_t *ra_data = lrm_state->remote_ra_data; if (ra_data->cur_cmd == cmd) { ra_data->cur_cmd = NULL; } if (ra_data->cmds) { mainloop_set_trigger(ra_data->work); } } report_remote_ra_result(cmd); free_cmd(cmd); if(lrm_state) { lrm_state_disconnect(lrm_state); } return FALSE; } static void synthesize_lrmd_success(lrm_state_t *lrm_state, const char *rsc_id, const char *op_type) { lrmd_event_data_t op = { 0, }; if (lrm_state == NULL) { /* if lrm_state not given assume local */ lrm_state = lrm_state_find(controld_globals.our_nodename); } CRM_ASSERT(lrm_state != NULL); op.type = lrmd_event_exec_complete; op.rsc_id = rsc_id; op.op_type = op_type; op.t_run = (unsigned int) time(NULL); op.t_rcchange = op.t_run; op.call_id = generate_callid(); lrmd__set_result(&op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); process_lrm_event(lrm_state, &op, NULL, NULL); } void remote_lrm_op_callback(lrmd_event_data_t * op) { gboolean cmd_handled = FALSE; lrm_state_t *lrm_state = NULL; remote_ra_data_t *ra_data = NULL; remote_ra_cmd_t *cmd = NULL; crm_debug("Processing '%s%s%s' event on remote connection to %s: %s " "(%d) status=%s (%d)", (op->op_type? op->op_type : ""), (op->op_type? " " : ""), lrmd_event_type2str(op->type), op->remote_nodename, services_ocf_exitcode_str(op->rc), op->rc, pcmk_exec_status_str(op->op_status), op->op_status); lrm_state = lrm_state_find(op->remote_nodename); if (!lrm_state || !lrm_state->remote_ra_data) { crm_debug("No state information found for remote connection event"); return; } ra_data = lrm_state->remote_ra_data; if (op->type == lrmd_event_new_client) { // Another client has connected to the remote daemon if (pcmk_is_set(ra_data->status, expect_takeover)) { // Great, we knew this was coming lrm_remote_clear_flags(lrm_state, expect_takeover); lrm_remote_set_flags(lrm_state, takeover_complete); } else { crm_err("Disconnecting from Pacemaker Remote node %s due to " "unexpected client takeover", op->remote_nodename); /* In this case, lrmd_tls_connection_destroy() will be called under the control of mainloop. */ /* Do not free lrm_state->conn yet. */ /* It'll be freed in the following stop action. */ lrm_state_disconnect_only(lrm_state); } return; } /* filter all EXEC events up */ if (op->type == lrmd_event_exec_complete) { if (pcmk_is_set(ra_data->status, takeover_complete)) { crm_debug("ignoring event, this connection is taken over by another node"); } else { lrm_op_callback(op); } return; } if ((op->type == lrmd_event_disconnect) && (ra_data->cur_cmd == NULL)) { if (!pcmk_is_set(ra_data->status, remote_active)) { crm_debug("Disconnection from Pacemaker Remote node %s complete", lrm_state->node_name); } else if (!remote_ra_is_in_maintenance(lrm_state)) { crm_err("Lost connection to Pacemaker Remote node %s", lrm_state->node_name); ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds); ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds); } else { crm_notice("Unmanaged Pacemaker Remote node %s disconnected", lrm_state->node_name); /* Do roughly what a 'stop' on the remote-resource would do */ handle_remote_ra_stop(lrm_state, NULL); remote_node_down(lrm_state->node_name, DOWN_KEEP_LRM); /* now fake the reply of a successful 'stop' */ synthesize_lrmd_success(NULL, lrm_state->node_name, PCMK_ACTION_STOP); } return; } if (!ra_data->cur_cmd) { crm_debug("no event to match"); return; } cmd = ra_data->cur_cmd; /* Start actions and migrate from actions complete after connection * comes back to us. */ if ((op->type == lrmd_event_connect) && pcmk__strcase_any_of(cmd->action, PCMK_ACTION_START, PCMK_ACTION_MIGRATE_FROM, NULL)) { if (op->connection_rc < 0) { update_remaining_timeout(cmd); if ((op->connection_rc == -ENOKEY) || (op->connection_rc == -EKEYREJECTED)) { // Hard error, don't retry pcmk__set_result(&(cmd->result), PCMK_OCF_INVALID_PARAM, PCMK_EXEC_ERROR, pcmk_strerror(op->connection_rc)); } else if (cmd->remaining_timeout > 3000) { crm_trace("rescheduling start, remaining timeout %d", cmd->remaining_timeout); g_timeout_add(1000, retry_start_cmd_cb, lrm_state); return; } else { crm_trace("can't reschedule start, remaining timeout too small %d", cmd->remaining_timeout); pcmk__format_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_TIMEOUT, "%s without enough time to retry", pcmk_strerror(op->connection_rc)); } } else { lrm_state_reset_tables(lrm_state, TRUE); pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); lrm_remote_set_flags(lrm_state, remote_active); } crm_debug("Remote connection event matched %s action", cmd->action); report_remote_ra_result(cmd); cmd_handled = TRUE; } else if ((op->type == lrmd_event_poke) && pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR, pcmk__str_casei)) { if (cmd->monitor_timeout_id) { g_source_remove(cmd->monitor_timeout_id); cmd->monitor_timeout_id = 0; } /* Only report success the first time, after that only worry about failures. * For this function, if we get the poke pack, it is always a success. Pokes * only fail if the send fails, or the response times out. */ if (!pcmk_is_set(cmd->status, cmd_reported_success)) { pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); report_remote_ra_result(cmd); cmd_set_flags(cmd, cmd_reported_success); } crm_debug("Remote poke event matched %s action", cmd->action); /* success, keep rescheduling if interval is present. */ if (cmd->interval_ms && !pcmk_is_set(cmd->status, cmd_cancel)) { ra_data->recurring_cmds = g_list_append(ra_data->recurring_cmds, cmd); cmd->interval_id = g_timeout_add(cmd->interval_ms, recurring_helper, cmd); cmd = NULL; /* prevent free */ } cmd_handled = TRUE; } else if ((op->type == lrmd_event_disconnect) && pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR, pcmk__str_casei)) { if (pcmk_is_set(ra_data->status, remote_active) && !pcmk_is_set(cmd->status, cmd_cancel)) { pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, "Remote connection unexpectedly dropped " "during monitor"); report_remote_ra_result(cmd); crm_err("Remote connection to %s unexpectedly dropped during monitor", lrm_state->node_name); } cmd_handled = TRUE; } else if ((op->type == lrmd_event_new_client) && pcmk__str_eq(cmd->action, PCMK_ACTION_STOP, pcmk__str_casei)) { handle_remote_ra_stop(lrm_state, cmd); cmd_handled = TRUE; } else { crm_debug("Event did not match %s action", ra_data->cur_cmd->action); } if (cmd_handled) { ra_data->cur_cmd = NULL; if (ra_data->cmds) { mainloop_set_trigger(ra_data->work); } free_cmd(cmd); } } static void handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd) { remote_ra_data_t *ra_data = NULL; CRM_ASSERT(lrm_state); ra_data = lrm_state->remote_ra_data; if (!pcmk_is_set(ra_data->status, takeover_complete)) { /* delete pending ops when ever the remote connection is intentionally stopped */ g_hash_table_remove_all(lrm_state->active_ops); } else { /* we no longer hold the history if this connection has been migrated, * however, we keep metadata cache for future use */ lrm_state_reset_tables(lrm_state, FALSE); } lrm_remote_clear_flags(lrm_state, remote_active); lrm_state_disconnect(lrm_state); if (ra_data->cmds) { g_list_free_full(ra_data->cmds, free_cmd); } if (ra_data->recurring_cmds) { g_list_free_full(ra_data->recurring_cmds, free_cmd); } ra_data->cmds = NULL; ra_data->recurring_cmds = NULL; ra_data->cur_cmd = NULL; if (cmd) { pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); report_remote_ra_result(cmd); } } // \return Standard Pacemaker return code static int handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms) { const char *server = NULL; lrmd_key_value_t *tmp = NULL; int port = 0; int timeout_used = timeout_ms > MAX_START_TIMEOUT_MS ? MAX_START_TIMEOUT_MS : timeout_ms; int rc = pcmk_rc_ok; for (tmp = cmd->params; tmp; tmp = tmp->next) { if (pcmk__strcase_any_of(tmp->key, PCMK_REMOTE_RA_ADDR, PCMK_REMOTE_RA_SERVER, NULL)) { server = tmp->value; } else if (pcmk__str_eq(tmp->key, PCMK_REMOTE_RA_PORT, pcmk__str_none)) { port = atoi(tmp->value); } else if (pcmk__str_eq(tmp->key, CRM_META "_" PCMK__META_CONTAINER, pcmk__str_none)) { lrm_remote_set_flags(lrm_state, controlling_guest); } } rc = controld_connect_remote_executor(lrm_state, server, port, timeout_used); if (rc != pcmk_rc_ok) { pcmk__format_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, "Could not connect to Pacemaker Remote node %s: %s", lrm_state->node_name, pcmk_rc_str(rc)); } return rc; } static gboolean handle_remote_ra_exec(gpointer user_data) { int rc = 0; lrm_state_t *lrm_state = user_data; remote_ra_data_t *ra_data = lrm_state->remote_ra_data; remote_ra_cmd_t *cmd; GList *first = NULL; if (ra_data->cur_cmd) { /* still waiting on previous cmd */ return TRUE; } while (ra_data->cmds) { first = ra_data->cmds; cmd = first->data; if (cmd->delay_id) { /* still waiting for start delay timer to trip */ return TRUE; } ra_data->cmds = g_list_remove_link(ra_data->cmds, first); g_list_free_1(first); if (pcmk__str_any_of(cmd->action, PCMK_ACTION_START, PCMK_ACTION_MIGRATE_FROM, NULL)) { lrm_remote_clear_flags(lrm_state, expect_takeover | takeover_complete); if (handle_remote_ra_start(lrm_state, cmd, cmd->timeout) == pcmk_rc_ok) { /* take care of this later when we get async connection result */ crm_debug("Initiated async remote connection, %s action will complete after connect event", cmd->action); ra_data->cur_cmd = cmd; return TRUE; } report_remote_ra_result(cmd); } else if (!strcmp(cmd->action, PCMK_ACTION_MONITOR)) { if (lrm_state_is_connected(lrm_state) == TRUE) { rc = lrm_state_poke_connection(lrm_state); if (rc < 0) { pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, pcmk_strerror(rc)); } } else { rc = -1; pcmk__set_result(&(cmd->result), PCMK_OCF_NOT_RUNNING, PCMK_EXEC_DONE, "Remote connection inactive"); } if (rc == 0) { crm_debug("Poked Pacemaker Remote at node %s, waiting for async response", cmd->rsc_id); ra_data->cur_cmd = cmd; cmd->monitor_timeout_id = g_timeout_add(cmd->timeout, monitor_timeout_cb, cmd); return TRUE; } report_remote_ra_result(cmd); } else if (!strcmp(cmd->action, PCMK_ACTION_STOP)) { if (pcmk_is_set(ra_data->status, expect_takeover)) { /* briefly wait on stop for the takeover event to occur. If the * takeover event does not occur during the wait period, that's fine. * It just means that the remote-node's lrm_status section is going to get * cleared which will require all the resources running in the remote-node * to be explicitly re-detected via probe actions. If the takeover does occur * successfully, then we can leave the status section intact. */ cmd->takeover_timeout_id = g_timeout_add((cmd->timeout/2), connection_takeover_timeout_cb, cmd); ra_data->cur_cmd = cmd; return TRUE; } handle_remote_ra_stop(lrm_state, cmd); } else if (strcmp(cmd->action, PCMK_ACTION_MIGRATE_TO) == 0) { lrm_remote_clear_flags(lrm_state, takeover_complete); lrm_remote_set_flags(lrm_state, expect_takeover); pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); report_remote_ra_result(cmd); } else if (pcmk__str_any_of(cmd->action, PCMK_ACTION_RELOAD, PCMK_ACTION_RELOAD_AGENT, NULL)) { /* Currently the only reloadable parameter is * PCMK_REMOTE_RA_RECONNECT_INTERVAL, which is only used by the * scheduler via the CIB, so reloads are a no-op. * * @COMPAT DC <2.1.0: We only need to check for "reload" in case * we're in a rolling upgrade with a DC scheduling "reload" instead * of "reload-agent". An OCF 1.1 "reload" would be a no-op anyway, * so this would work for that purpose as well. */ pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); report_remote_ra_result(cmd); } free_cmd(cmd); } return TRUE; } static void remote_ra_data_init(lrm_state_t * lrm_state) { remote_ra_data_t *ra_data = NULL; if (lrm_state->remote_ra_data) { return; } ra_data = pcmk__assert_alloc(1, sizeof(remote_ra_data_t)); ra_data->work = mainloop_add_trigger(G_PRIORITY_HIGH, handle_remote_ra_exec, lrm_state); lrm_state->remote_ra_data = ra_data; } void remote_ra_cleanup(lrm_state_t * lrm_state) { remote_ra_data_t *ra_data = lrm_state->remote_ra_data; if (!ra_data) { return; } if (ra_data->cmds) { g_list_free_full(ra_data->cmds, free_cmd); } if (ra_data->recurring_cmds) { g_list_free_full(ra_data->recurring_cmds, free_cmd); } mainloop_destroy_trigger(ra_data->work); free(ra_data); lrm_state->remote_ra_data = NULL; } gboolean is_remote_lrmd_ra(const char *agent, const char *provider, const char *id) { if (agent && provider && !strcmp(agent, REMOTE_LRMD_RA) && !strcmp(provider, "pacemaker")) { return TRUE; } if ((id != NULL) && (lrm_state_find(id) != NULL) && !pcmk__str_eq(id, controld_globals.our_nodename, pcmk__str_casei)) { return TRUE; } return FALSE; } lrmd_rsc_info_t * remote_ra_get_rsc_info(lrm_state_t * lrm_state, const char *rsc_id) { lrmd_rsc_info_t *info = NULL; if ((lrm_state_find(rsc_id))) { info = pcmk__assert_alloc(1, sizeof(lrmd_rsc_info_t)); info->id = pcmk__str_copy(rsc_id); info->type = pcmk__str_copy(REMOTE_LRMD_RA); info->standard = pcmk__str_copy(PCMK_RESOURCE_CLASS_OCF); info->provider = pcmk__str_copy("pacemaker"); } return info; } static gboolean is_remote_ra_supported_action(const char *action) { return pcmk__str_any_of(action, PCMK_ACTION_START, PCMK_ACTION_STOP, PCMK_ACTION_MONITOR, PCMK_ACTION_MIGRATE_TO, PCMK_ACTION_MIGRATE_FROM, PCMK_ACTION_RELOAD_AGENT, PCMK_ACTION_RELOAD, NULL); } static GList * fail_all_monitor_cmds(GList * list) { GList *rm_list = NULL; remote_ra_cmd_t *cmd = NULL; GList *gIter = NULL; for (gIter = list; gIter != NULL; gIter = gIter->next) { cmd = gIter->data; if ((cmd->interval_ms > 0) && pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR, pcmk__str_casei)) { rm_list = g_list_append(rm_list, cmd); } } for (gIter = rm_list; gIter != NULL; gIter = gIter->next) { cmd = gIter->data; pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, "Lost connection to remote executor"); crm_trace("Pre-emptively failing %s %s (interval=%u, %s)", cmd->action, cmd->rsc_id, cmd->interval_ms, cmd->userdata); report_remote_ra_result(cmd); list = g_list_remove(list, cmd); free_cmd(cmd); } /* frees only the list data, not the cmds */ g_list_free(rm_list); return list; } static GList * remove_cmd(GList * list, const char *action, guint interval_ms) { remote_ra_cmd_t *cmd = NULL; GList *gIter = NULL; for (gIter = list; gIter != NULL; gIter = gIter->next) { cmd = gIter->data; if ((cmd->interval_ms == interval_ms) && pcmk__str_eq(cmd->action, action, pcmk__str_casei)) { break; } cmd = NULL; } if (cmd) { list = g_list_remove(list, cmd); free_cmd(cmd); } return list; } int remote_ra_cancel(lrm_state_t *lrm_state, const char *rsc_id, const char *action, guint interval_ms) { lrm_state_t *connection_rsc = NULL; remote_ra_data_t *ra_data = NULL; connection_rsc = lrm_state_find(rsc_id); if (!connection_rsc || !connection_rsc->remote_ra_data) { return -EINVAL; } ra_data = connection_rsc->remote_ra_data; ra_data->cmds = remove_cmd(ra_data->cmds, action, interval_ms); ra_data->recurring_cmds = remove_cmd(ra_data->recurring_cmds, action, interval_ms); if (ra_data->cur_cmd && (ra_data->cur_cmd->interval_ms == interval_ms) && (pcmk__str_eq(ra_data->cur_cmd->action, action, pcmk__str_casei))) { cmd_set_flags(ra_data->cur_cmd, cmd_cancel); } return 0; } static remote_ra_cmd_t * handle_dup_monitor(remote_ra_data_t *ra_data, guint interval_ms, const char *userdata) { GList *gIter = NULL; remote_ra_cmd_t *cmd = NULL; /* there are 3 places a potential duplicate monitor operation * could exist. * 1. recurring_cmds list. where the op is waiting for its next interval * 2. cmds list, where the op is queued to get executed immediately * 3. cur_cmd, which means the monitor op is in flight right now. */ if (interval_ms == 0) { return NULL; } if (ra_data->cur_cmd && !pcmk_is_set(ra_data->cur_cmd->status, cmd_cancel) && (ra_data->cur_cmd->interval_ms == interval_ms) && pcmk__str_eq(ra_data->cur_cmd->action, PCMK_ACTION_MONITOR, pcmk__str_casei)) { cmd = ra_data->cur_cmd; goto handle_dup; } for (gIter = ra_data->recurring_cmds; gIter != NULL; gIter = gIter->next) { cmd = gIter->data; if ((cmd->interval_ms == interval_ms) && pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR, pcmk__str_casei)) { goto handle_dup; } } for (gIter = ra_data->cmds; gIter != NULL; gIter = gIter->next) { cmd = gIter->data; if ((cmd->interval_ms == interval_ms) && pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR, pcmk__str_casei)) { goto handle_dup; } } return NULL; handle_dup: crm_trace("merging duplicate monitor cmd " PCMK__OP_FMT, cmd->rsc_id, PCMK_ACTION_MONITOR, interval_ms); /* update the userdata */ if (userdata) { free(cmd->userdata); cmd->userdata = pcmk__str_copy(userdata); } /* if we've already reported success, generate a new call id */ if (pcmk_is_set(cmd->status, cmd_reported_success)) { cmd->start_time = time(NULL); cmd->call_id = generate_callid(); cmd_clear_flags(cmd, cmd_reported_success); } /* if we have an interval_id set, that means we are in the process of * waiting for this cmd's next interval. instead of waiting, cancel * the timer and execute the action immediately */ if (cmd->interval_id) { g_source_remove(cmd->interval_id); cmd->interval_id = 0; recurring_helper(cmd); } return cmd; } /*! * \internal * \brief Execute an action using the (internal) ocf:pacemaker:remote agent * * \param[in] lrm_state Executor state object for remote connection * \param[in] rsc_id Connection resource ID * \param[in] action Action to execute * \param[in] userdata String to copy and pass to execution callback * \param[in] interval_ms Action interval (in milliseconds) * \param[in] timeout_ms Action timeout (in milliseconds) * \param[in] start_delay_ms Delay (in milliseconds) before executing action * \param[in,out] params Connection resource parameters * \param[out] call_id Where to store call ID on success * * \return Standard Pacemaker return code * \note This takes ownership of \p params, which should not be used or freed * after calling this function. */ int controld_execute_remote_agent(const lrm_state_t *lrm_state, const char *rsc_id, const char *action, const char *userdata, guint interval_ms, int timeout_ms, int start_delay_ms, lrmd_key_value_t *params, int *call_id) { lrm_state_t *connection_rsc = NULL; remote_ra_cmd_t *cmd = NULL; remote_ra_data_t *ra_data = NULL; *call_id = 0; CRM_CHECK((lrm_state != NULL) && (rsc_id != NULL) && (action != NULL) && (userdata != NULL) && (call_id != NULL), lrmd_key_value_freeall(params); return EINVAL); if (!is_remote_ra_supported_action(action)) { lrmd_key_value_freeall(params); return EOPNOTSUPP; } connection_rsc = lrm_state_find(rsc_id); if (connection_rsc == NULL) { lrmd_key_value_freeall(params); return ENOTCONN; } remote_ra_data_init(connection_rsc); ra_data = connection_rsc->remote_ra_data; cmd = handle_dup_monitor(ra_data, interval_ms, userdata); if (cmd) { *call_id = cmd->call_id; lrmd_key_value_freeall(params); return pcmk_rc_ok; } cmd = pcmk__assert_alloc(1, sizeof(remote_ra_cmd_t)); cmd->owner = pcmk__str_copy(lrm_state->node_name); cmd->rsc_id = pcmk__str_copy(rsc_id); cmd->action = pcmk__str_copy(action); cmd->userdata = pcmk__str_copy(userdata); cmd->interval_ms = interval_ms; cmd->timeout = timeout_ms; cmd->start_delay = start_delay_ms; cmd->params = params; cmd->start_time = time(NULL); cmd->call_id = generate_callid(); if (cmd->start_delay) { cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd); } ra_data->cmds = g_list_append(ra_data->cmds, cmd); mainloop_set_trigger(ra_data->work); *call_id = cmd->call_id; return pcmk_rc_ok; } /*! * \internal * \brief Immediately fail all monitors of a remote node, if proxied here * * \param[in] node_name Name of pacemaker_remote node */ void remote_ra_fail(const char *node_name) { lrm_state_t *lrm_state = lrm_state_find(node_name); if (lrm_state && lrm_state_is_connected(lrm_state)) { remote_ra_data_t *ra_data = lrm_state->remote_ra_data; crm_info("Failing monitors on Pacemaker Remote node %s", node_name); ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds); ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds); } } /* A guest node fencing implied by host fencing looks like: * * * * * * * */ #define XPATH_PSEUDO_FENCE "/" PCMK__XE_PSEUDO_EVENT \ "[@" PCMK_XA_OPERATION "='stonith']/" PCMK__XE_DOWNED "/" PCMK_XE_NODE /*! * \internal * \brief Check a pseudo-action for Pacemaker Remote node side effects * * \param[in,out] xml XML of pseudo-action to check */ void remote_ra_process_pseudo(xmlNode *xml) { xmlXPathObjectPtr search = xpath_search(xml, XPATH_PSEUDO_FENCE); if (numXpathResults(search) == 1) { xmlNode *result = getXpathResult(search, 0); /* Normally, we handle the necessary side effects of a guest node stop * action when reporting the remote agent's result. However, if the stop * is implied due to fencing, it will be a fencing pseudo-event, and * there won't be a result to report. Handle that case here. * * This will result in a duplicate call to remote_node_down() if the * guest stop was real instead of implied, but that shouldn't hurt. * * There is still one corner case that isn't handled: if a guest node * isn't running any resources when its host is fenced, it will appear * to be cleanly stopped, so there will be no pseudo-fence, and our * peer cache state will be incorrect unless and until the guest is * recovered. */ if (result) { const char *remote = pcmk__xe_id(result); if (remote) { remote_node_down(remote, DOWN_ERASE_LRM); } } } freeXpathObject(search); } static void remote_ra_maintenance(lrm_state_t * lrm_state, gboolean maintenance) { xmlNode *update, *state; int call_opt; crm_node_t *node; call_opt = crmd_cib_smart_opt(); - node = crm_remote_peer_get(lrm_state->node_name); + node = pcmk__cluster_lookup_remote_node(lrm_state->node_name); CRM_CHECK(node != NULL, return); update = pcmk__xe_create(NULL, PCMK_XE_STATUS); state = create_node_state_update(node, node_update_none, update, __func__); crm_xml_add(state, PCMK__XA_NODE_IN_MAINTENANCE, (maintenance? "1" : "0")); if (controld_update_cib(PCMK_XE_STATUS, update, call_opt, NULL) == pcmk_rc_ok) { /* TODO: still not 100% sure that async update will succeed ... */ if (maintenance) { lrm_remote_set_flags(lrm_state, remote_in_maint); } else { lrm_remote_clear_flags(lrm_state, remote_in_maint); } } free_xml(update); } #define XPATH_PSEUDO_MAINTENANCE "//" PCMK__XE_PSEUDO_EVENT \ "[@" PCMK_XA_OPERATION "='" PCMK_ACTION_MAINTENANCE_NODES "']/" \ PCMK__XE_MAINTENANCE /*! * \internal * \brief Check a pseudo-action holding updates for maintenance state * * \param[in,out] xml XML of pseudo-action to check */ void remote_ra_process_maintenance_nodes(xmlNode *xml) { xmlXPathObjectPtr search = xpath_search(xml, XPATH_PSEUDO_MAINTENANCE); if (numXpathResults(search) == 1) { xmlNode *node; int cnt = 0, cnt_remote = 0; for (node = pcmk__xe_first_child(getXpathResult(search, 0), PCMK_XE_NODE, NULL, NULL); node != NULL; node = pcmk__xe_next_same(node)) { lrm_state_t *lrm_state = lrm_state_find(pcmk__xe_id(node)); cnt++; if (lrm_state && lrm_state->remote_ra_data && pcmk_is_set(((remote_ra_data_t *) lrm_state->remote_ra_data)->status, remote_active)) { const char *in_maint_s = NULL; int in_maint; cnt_remote++; in_maint_s = crm_element_value(node, PCMK__XA_NODE_IN_MAINTENANCE); pcmk__scan_min_int(in_maint_s, &in_maint, 0); remote_ra_maintenance(lrm_state, in_maint); } } crm_trace("Action holds %d nodes (%d remotes found) adjusting " PCMK_OPT_MAINTENANCE_MODE, cnt, cnt_remote); } freeXpathObject(search); } gboolean remote_ra_is_in_maintenance(lrm_state_t * lrm_state) { remote_ra_data_t *ra_data = lrm_state->remote_ra_data; return pcmk_is_set(ra_data->status, remote_in_maint); } gboolean remote_ra_controlling_guest(lrm_state_t * lrm_state) { remote_ra_data_t *ra_data = lrm_state->remote_ra_data; return pcmk_is_set(ra_data->status, controlling_guest); } diff --git a/daemons/controld/controld_throttle.c b/daemons/controld/controld_throttle.c index 95045e725f..0a1142d0ea 100644 --- a/daemons/controld/controld_throttle.c +++ b/daemons/controld/controld_throttle.c @@ -1,574 +1,574 @@ /* * Copyright 2013-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include /* These values don't need to be bits, but these particular values must be kept * for backward compatibility during rolling upgrades. */ enum throttle_state_e { throttle_none = 0x0000, throttle_low = 0x0001, throttle_med = 0x0010, throttle_high = 0x0100, throttle_extreme = 0x1000, }; struct throttle_record_s { int max; enum throttle_state_e mode; char *node; }; static int throttle_job_max = 0; static float throttle_load_target = 0.0; #define THROTTLE_FACTOR_LOW 1.2 #define THROTTLE_FACTOR_MEDIUM 1.6 #define THROTTLE_FACTOR_HIGH 2.0 static GHashTable *throttle_records = NULL; static mainloop_timer_t *throttle_timer = NULL; static const char * load2str(enum throttle_state_e mode) { switch (mode) { case throttle_extreme: return "extreme"; case throttle_high: return "high"; case throttle_med: return "medium"; case throttle_low: return "low"; case throttle_none: return "negligible"; default: return "undetermined"; } } #if HAVE_LINUX_PROCFS /*! * \internal * \brief Return name of /proc file containing the CIB daemon's load statistics * * \return Newly allocated memory with file name on success, NULL otherwise * * \note It is the caller's responsibility to free the return value. * This will return NULL if the daemon is being run via valgrind. * This should be called only on Linux systems. */ static char * find_cib_loadfile(void) { pid_t pid = pcmk__procfs_pid_of("pacemaker-based"); return pid? crm_strdup_printf("/proc/%lld/stat", (long long) pid) : NULL; } static bool throttle_cib_load(float *load) { /* /proc/[pid]/stat Status information about the process. This is used by ps(1). It is defined in /usr/src/linux/fs/proc/array.c. The fields, in order, with their proper scanf(3) format specifiers, are: pid %d (1) The process ID. comm %s (2) The filename of the executable, in parentheses. This is visible whether or not the executable is swapped out. state %c (3) One character from the string "RSDZTW" where R is running, S is sleeping in an interruptible wait, D is waiting in uninterruptible disk sleep, Z is zombie, T is traced or stopped (on a signal), and W is paging. ppid %d (4) The PID of the parent. pgrp %d (5) The process group ID of the process. session %d (6) The session ID of the process. tty_nr %d (7) The controlling terminal of the process. (The minor device number is contained in the combination of bits 31 to 20 and 7 to 0; the major device number is in bits 15 to 8.) tpgid %d (8) The ID of the foreground process group of the controlling terminal of the process. flags %u (%lu before Linux 2.6.22) (9) The kernel flags word of the process. For bit meanings, see the PF_* defines in the Linux kernel source file include/linux/sched.h. Details depend on the kernel version. minflt %lu (10) The number of minor faults the process has made which have not required loading a memory page from disk. cminflt %lu (11) The number of minor faults that the process's waited-for children have made. majflt %lu (12) The number of major faults the process has made which have required loading a memory page from disk. cmajflt %lu (13) The number of major faults that the process's waited-for children have made. utime %lu (14) Amount of time that this process has been scheduled in user mode, measured in clock ticks (divide by sysconf(_SC_CLK_TCK)). This includes guest time, guest_time (time spent running a virtual CPU, see below), so that applications that are not aware of the guest time field do not lose that time from their calculations. stime %lu (15) Amount of time that this process has been scheduled in kernel mode, measured in clock ticks (divide by sysconf(_SC_CLK_TCK)). */ static char *loadfile = NULL; static time_t last_call = 0; static long ticks_per_s = 0; static unsigned long last_utime, last_stime; char buffer[64*1024]; FILE *stream = NULL; time_t now = time(NULL); if(load == NULL) { return FALSE; } else { *load = 0.0; } if(loadfile == NULL) { last_call = 0; last_utime = 0; last_stime = 0; loadfile = find_cib_loadfile(); if (loadfile == NULL) { crm_warn("Couldn't find CIB load file"); return FALSE; } ticks_per_s = sysconf(_SC_CLK_TCK); crm_trace("Found %s", loadfile); } stream = fopen(loadfile, "r"); if(stream == NULL) { int rc = errno; crm_warn("Couldn't read %s: %s (%d)", loadfile, pcmk_rc_str(rc), rc); free(loadfile); loadfile = NULL; return FALSE; } if(fgets(buffer, sizeof(buffer), stream)) { char *comm = pcmk__assert_alloc(1, 256); char state = 0; int rc = 0, pid = 0, ppid = 0, pgrp = 0, session = 0, tty_nr = 0, tpgid = 0; unsigned long flags = 0, minflt = 0, cminflt = 0, majflt = 0, cmajflt = 0, utime = 0, stime = 0; rc = sscanf(buffer, "%d %[^ ] %c %d %d %d %d %d %lu %lu %lu %lu %lu %lu %lu", &pid, comm, &state, &ppid, &pgrp, &session, &tty_nr, &tpgid, &flags, &minflt, &cminflt, &majflt, &cmajflt, &utime, &stime); free(comm); if(rc != 15) { crm_err("Only %d of 15 fields found in %s", rc, loadfile); fclose(stream); return FALSE; } else if(last_call > 0 && last_call < now && last_utime <= utime && last_stime <= stime) { time_t elapsed = now - last_call; unsigned long delta_utime = utime - last_utime; unsigned long delta_stime = stime - last_stime; *load = (delta_utime + delta_stime); /* Cast to a float before division */ *load /= ticks_per_s; *load /= elapsed; crm_debug("cib load: %f (%lu ticks in %lds)", *load, delta_utime + delta_stime, (long)elapsed); } else { crm_debug("Init %lu + %lu ticks at %ld (%lu tps)", utime, stime, (long)now, ticks_per_s); } last_call = now; last_utime = utime; last_stime = stime; fclose(stream); return TRUE; } fclose(stream); return FALSE; } static bool throttle_load_avg(float *load) { char buffer[256]; FILE *stream = NULL; const char *loadfile = "/proc/loadavg"; if(load == NULL) { return FALSE; } stream = fopen(loadfile, "r"); if(stream == NULL) { int rc = errno; crm_warn("Couldn't read %s: %s (%d)", loadfile, pcmk_rc_str(rc), rc); return FALSE; } if(fgets(buffer, sizeof(buffer), stream)) { char *nl = strstr(buffer, "\n"); /* Grab the 1-minute average, ignore the rest */ *load = strtof(buffer, NULL); if(nl) { nl[0] = 0; } fclose(stream); return TRUE; } fclose(stream); return FALSE; } /*! * \internal * \brief Check a load value against throttling thresholds * * \param[in] load Load value to check * \param[in] desc Description of metric (for logging) * \param[in] thresholds Low/medium/high/extreme thresholds * * \return Throttle mode corresponding to load value */ static enum throttle_state_e throttle_check_thresholds(float load, const char *desc, const float thresholds[4]) { if (load > thresholds[3]) { crm_notice("Extreme %s detected: %f", desc, load); return throttle_extreme; } else if (load > thresholds[2]) { crm_notice("High %s detected: %f", desc, load); return throttle_high; } else if (load > thresholds[1]) { crm_info("Moderate %s detected: %f", desc, load); return throttle_med; } else if (load > thresholds[0]) { crm_debug("Noticeable %s detected: %f", desc, load); return throttle_low; } crm_trace("Negligible %s detected: %f", desc, load); return throttle_none; } static enum throttle_state_e throttle_handle_load(float load, const char *desc, int cores) { float normalize; float thresholds[4]; if (cores == 1) { /* On a single core machine, a load of 1.0 is already too high */ normalize = 0.6; } else { /* Normalize the load to be per-core */ normalize = cores; } thresholds[0] = throttle_load_target * normalize * THROTTLE_FACTOR_LOW; thresholds[1] = throttle_load_target * normalize * THROTTLE_FACTOR_MEDIUM; thresholds[2] = throttle_load_target * normalize * THROTTLE_FACTOR_HIGH; thresholds[3] = load + 1.0; /* never extreme */ return throttle_check_thresholds(load, desc, thresholds); } #endif // HAVE_LINUX_PROCFS static enum throttle_state_e throttle_mode(void) { enum throttle_state_e mode = throttle_none; #if HAVE_LINUX_PROCFS unsigned int cores; float load; float thresholds[4]; cores = pcmk__procfs_num_cores(); if(throttle_cib_load(&load)) { float cib_max_cpu = 0.95; /* The CIB is a single-threaded task and thus cannot consume * more than 100% of a CPU (and 1/cores of the overall system * load). * * On a many-cored system, the CIB might therefore be maxed out * (causing operations to fail or appear to fail) even though * the overall system load is still reasonable. * * Therefore, the 'normal' thresholds can not apply here, and we * need a special case. */ if(cores == 1) { cib_max_cpu = 0.4; } if(throttle_load_target > 0.0 && throttle_load_target < cib_max_cpu) { cib_max_cpu = throttle_load_target; } thresholds[0] = cib_max_cpu * 0.8; thresholds[1] = cib_max_cpu * 0.9; thresholds[2] = cib_max_cpu; /* Can only happen on machines with a low number of cores */ thresholds[3] = cib_max_cpu * 1.5; mode = throttle_check_thresholds(load, "CIB load", thresholds); } if(throttle_load_target <= 0) { /* If we ever make this a valid value, the cluster will at least behave as expected */ return mode; } if(throttle_load_avg(&load)) { enum throttle_state_e cpu_load; cpu_load = throttle_handle_load(load, "CPU load", cores); if (cpu_load > mode) { mode = cpu_load; } crm_debug("Current load is %f across %u core(s)", load, cores); } #endif // HAVE_LINUX_PROCFS return mode; } static void throttle_send_command(enum throttle_state_e mode) { xmlNode *xml = NULL; static enum throttle_state_e last = -1; if(mode != last) { crm_info("New throttle mode: %s load (was %s)", load2str(mode), load2str(last)); last = mode; xml = create_request(CRM_OP_THROTTLE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL); crm_xml_add_int(xml, PCMK__XA_CRM_LIMIT_MODE, mode); crm_xml_add_int(xml, PCMK__XA_CRM_LIMIT_MAX, throttle_job_max); send_cluster_message(NULL, crm_msg_crmd, xml, TRUE); free_xml(xml); } } static gboolean throttle_timer_cb(gpointer data) { throttle_send_command(throttle_mode()); return TRUE; } static void throttle_record_free(gpointer p) { struct throttle_record_s *r = p; free(r->node); free(r); } static void throttle_set_load_target(float target) { throttle_load_target = target; } /*! * \internal * \brief Update the maximum number of simultaneous jobs * * \param[in] preference Cluster-wide \c PCMK_OPT_NODE_ACTION_LIMIT from the * CIB */ static void throttle_update_job_max(const char *preference) { long long max = 0LL; const char *env_limit = pcmk__env_option(PCMK__ENV_NODE_ACTION_LIMIT); if (env_limit != NULL) { preference = env_limit; // Per-node override } if (preference != NULL) { pcmk__scan_ll(preference, &max, 0LL); } if (max > 0) { throttle_job_max = (max >= INT_MAX)? INT_MAX : (int) max; } else { // Default is based on the number of cores detected throttle_job_max = 2 * pcmk__procfs_num_cores(); } } void throttle_init(void) { if(throttle_records == NULL) { throttle_records = pcmk__strkey_table(NULL, throttle_record_free); throttle_timer = mainloop_timer_add("throttle", 30 * 1000, TRUE, throttle_timer_cb, NULL); } throttle_update_job_max(NULL); mainloop_timer_start(throttle_timer); } /*! * \internal * \brief Configure throttle options based on the CIB * * \param[in,out] options Name/value pairs for configured options */ void controld_configure_throttle(GHashTable *options) { const char *value = g_hash_table_lookup(options, PCMK_OPT_LOAD_THRESHOLD); if (value != NULL) { throttle_set_load_target(strtof(value, NULL) / 100.0); } value = g_hash_table_lookup(options, PCMK_OPT_NODE_ACTION_LIMIT); throttle_update_job_max(value); } void throttle_fini(void) { if (throttle_timer != NULL) { mainloop_timer_del(throttle_timer); throttle_timer = NULL; } if (throttle_records != NULL) { g_hash_table_destroy(throttle_records); throttle_records = NULL; } } int throttle_get_total_job_limit(int l) { /* Cluster-wide limit */ GHashTableIter iter; int limit = l; - int peers = crm_active_peers(); + int peers = pcmk__cluster_num_active_nodes(); struct throttle_record_s *r = NULL; g_hash_table_iter_init(&iter, throttle_records); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &r)) { switch(r->mode) { case throttle_extreme: if(limit == 0 || limit > peers/4) { limit = QB_MAX(1, peers/4); } break; case throttle_high: if(limit == 0 || limit > peers/2) { limit = QB_MAX(1, peers/2); } break; default: break; } } if(limit == l) { } else if(l == 0) { crm_trace("Using " PCMK_OPT_BATCH_LIMIT "=%d", limit); } else { crm_trace("Using " PCMK_OPT_BATCH_LIMIT "=%d instead of %d", limit, l); } return limit; } int throttle_get_job_limit(const char *node) { int jobs = 1; struct throttle_record_s *r = NULL; r = g_hash_table_lookup(throttle_records, node); if(r == NULL) { r = pcmk__assert_alloc(1, sizeof(struct throttle_record_s)); r->node = pcmk__str_copy(node); r->mode = throttle_low; r->max = throttle_job_max; crm_trace("Defaulting to local values for unknown node %s", node); g_hash_table_insert(throttle_records, r->node, r); } switch(r->mode) { case throttle_extreme: case throttle_high: jobs = 1; /* At least one job must always be allowed */ break; case throttle_med: jobs = QB_MAX(1, r->max / 4); break; case throttle_low: jobs = QB_MAX(1, r->max / 2); break; case throttle_none: jobs = QB_MAX(1, r->max); break; default: crm_err("Unknown throttle mode %.4x on %s", r->mode, node); break; } return jobs; } void throttle_update(xmlNode *xml) { int max = 0; int mode = 0; struct throttle_record_s *r = NULL; const char *from = crm_element_value(xml, PCMK__XA_SRC); crm_element_value_int(xml, PCMK__XA_CRM_LIMIT_MODE, &mode); crm_element_value_int(xml, PCMK__XA_CRM_LIMIT_MAX, &max); r = g_hash_table_lookup(throttle_records, from); if(r == NULL) { r = pcmk__assert_alloc(1, sizeof(struct throttle_record_s)); r->node = pcmk__str_copy(from); g_hash_table_insert(throttle_records, r->node, r); } r->max = max; r->mode = (enum throttle_state_e) mode; crm_debug("Node %s has %s load and supports at most %d jobs; new job limit %d", from, load2str((enum throttle_state_e) mode), max, throttle_get_job_limit(from)); } diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c index 3c035c0930..9e0e647192 100644 --- a/daemons/fenced/fenced_commands.c +++ b/daemons/fenced/fenced_commands.c @@ -1,3709 +1,3709 @@ /* * Copyright 2009-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include GHashTable *device_list = NULL; GHashTable *topology = NULL; static GList *cmd_list = NULL; static GHashTable *fenced_handlers = NULL; struct device_search_s { /* target of fence action */ char *host; /* requested fence action */ char *action; /* timeout to use if a device is queried dynamically for possible targets */ int per_device_timeout; /* number of registered fencing devices at time of request */ int replies_needed; /* number of device replies received so far */ int replies_received; /* whether the target is eligible to perform requested action (or off) */ bool allow_suicide; /* private data to pass to search callback function */ void *user_data; /* function to call when all replies have been received */ void (*callback) (GList * devices, void *user_data); /* devices capable of performing requested action (or off if remapping) */ GList *capable; /* Whether to perform searches that support the action */ uint32_t support_action_only; }; static gboolean stonith_device_dispatch(gpointer user_data); static void st_child_done(int pid, const pcmk__action_result_t *result, void *user_data); static void search_devices_record_result(struct device_search_s *search, const char *device, gboolean can_fence); static int get_agent_metadata(const char *agent, xmlNode **metadata); static void read_action_metadata(stonith_device_t *device); static enum fenced_target_by unpack_level_kind(const xmlNode *level); typedef struct async_command_s { int id; int pid; int fd_stdout; int options; int default_timeout; /* seconds */ int timeout; /* seconds */ int start_delay; // seconds (-1 means disable static/random fencing delays) int delay_id; char *op; char *origin; char *client; char *client_name; char *remote_op_id; char *target; uint32_t target_nodeid; char *action; char *device; GList *device_list; GList *next_device_iter; // device_list entry for next device to execute void *internal_user_data; void (*done_cb) (int pid, const pcmk__action_result_t *result, void *user_data); guint timer_sigterm; guint timer_sigkill; /*! If the operation timed out, this is the last signal * we sent to the process to get it to terminate */ int last_timeout_signo; stonith_device_t *active_on; stonith_device_t *activating_on; } async_command_t; static xmlNode *construct_async_reply(const async_command_t *cmd, const pcmk__action_result_t *result); static gboolean is_action_required(const char *action, const stonith_device_t *device) { return (device != NULL) && device->automatic_unfencing && pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none); } static int get_action_delay_max(const stonith_device_t *device, const char *action) { const char *value = NULL; guint delay_max = 0U; if (!pcmk__is_fencing_action(action)) { return 0; } value = g_hash_table_lookup(device->params, PCMK_STONITH_DELAY_MAX); if (value) { pcmk_parse_interval_spec(value, &delay_max); delay_max /= 1000; } return (int) delay_max; } static int get_action_delay_base(const stonith_device_t *device, const char *action, const char *target) { char *hash_value = NULL; guint delay_base = 0U; if (!pcmk__is_fencing_action(action)) { return 0; } hash_value = g_hash_table_lookup(device->params, PCMK_STONITH_DELAY_BASE); if (hash_value) { char *value = pcmk__str_copy(hash_value); char *valptr = value; if (target != NULL) { for (char *val = strtok(value, "; \t"); val != NULL; val = strtok(NULL, "; \t")) { char *mapval = strchr(val, ':'); if (mapval == NULL || mapval[1] == 0) { crm_err("pcmk_delay_base: empty value in mapping", val); continue; } if (mapval != val && strncasecmp(target, val, (size_t)(mapval - val)) == 0) { value = mapval + 1; crm_debug("pcmk_delay_base mapped to %s for %s", value, target); break; } } } if (strchr(value, ':') == 0) { pcmk_parse_interval_spec(value, &delay_base); delay_base /= 1000; } free(valptr); } return (int) delay_base; } /*! * \internal * \brief Override STONITH timeout with pcmk_*_timeout if available * * \param[in] device STONITH device to use * \param[in] action STONITH action name * \param[in] default_timeout Timeout to use if device does not have * a pcmk_*_timeout parameter for action * * \return Value of pcmk_(action)_timeout if available, otherwise default_timeout * \note For consistency, it would be nice if reboot/off/on timeouts could be * set the same way as start/stop/monitor timeouts, i.e. with an * entry in the fencing resource configuration. However that * is insufficient because fencing devices may be registered directly via * the fencer's register_device() API instead of going through the CIB * (e.g. stonith_admin uses it for its -R option, and the executor uses it * to ensure a device is registered when a command is issued). As device * properties, pcmk_*_timeout parameters can be grabbed by the fencer when * the device is registered, whether by CIB change or API call. */ static int get_action_timeout(const stonith_device_t *device, const char *action, int default_timeout) { if (action && device && device->params) { char buffer[64] = { 0, }; const char *value = NULL; /* If "reboot" was requested but the device does not support it, * we will remap to "off", so check timeout for "off" instead */ if (pcmk__str_eq(action, PCMK_ACTION_REBOOT, pcmk__str_none) && !pcmk_is_set(device->flags, st_device_supports_reboot)) { crm_trace("%s doesn't support reboot, using timeout for off instead", device->id); action = PCMK_ACTION_OFF; } /* If the device config specified an action-specific timeout, use it */ snprintf(buffer, sizeof(buffer), "pcmk_%s_timeout", action); value = g_hash_table_lookup(device->params, buffer); if (value) { long long timeout_ms = crm_get_msec(value); return (int) QB_MIN(timeout_ms / 1000, INT_MAX); } } return default_timeout; } /*! * \internal * \brief Get the currently executing device for a fencing operation * * \param[in] cmd Fencing operation to check * * \return Currently executing device for \p cmd if any, otherwise NULL */ static stonith_device_t * cmd_device(const async_command_t *cmd) { if ((cmd == NULL) || (cmd->device == NULL) || (device_list == NULL)) { return NULL; } return g_hash_table_lookup(device_list, cmd->device); } /*! * \internal * \brief Return the configured reboot action for a given device * * \param[in] device_id Device ID * * \return Configured reboot action for \p device_id */ const char * fenced_device_reboot_action(const char *device_id) { const char *action = NULL; if ((device_list != NULL) && (device_id != NULL)) { stonith_device_t *device = g_hash_table_lookup(device_list, device_id); if ((device != NULL) && (device->params != NULL)) { action = g_hash_table_lookup(device->params, "pcmk_reboot_action"); } } return pcmk__s(action, PCMK_ACTION_REBOOT); } /*! * \internal * \brief Check whether a given device supports the "on" action * * \param[in] device_id Device ID * * \return true if \p device_id supports "on", otherwise false */ bool fenced_device_supports_on(const char *device_id) { if ((device_list != NULL) && (device_id != NULL)) { stonith_device_t *device = g_hash_table_lookup(device_list, device_id); if (device != NULL) { return pcmk_is_set(device->flags, st_device_supports_on); } } return false; } static void free_async_command(async_command_t * cmd) { if (!cmd) { return; } if (cmd->delay_id) { g_source_remove(cmd->delay_id); } cmd_list = g_list_remove(cmd_list, cmd); g_list_free_full(cmd->device_list, free); free(cmd->device); free(cmd->action); free(cmd->target); free(cmd->remote_op_id); free(cmd->client); free(cmd->client_name); free(cmd->origin); free(cmd->op); free(cmd); } /*! * \internal * \brief Create a new asynchronous fencing operation from request XML * * \param[in] msg Fencing request XML (from IPC or CPG) * * \return Newly allocated fencing operation on success, otherwise NULL * * \note This asserts on memory errors, so a NULL return indicates an * unparseable message. */ static async_command_t * create_async_command(xmlNode *msg) { xmlNode *op = NULL; async_command_t *cmd = NULL; if (msg == NULL) { return NULL; } op = get_xpath_object("//@" PCMK__XE_ST_DEVICE_ACTION, msg, LOG_ERR); if (op == NULL) { return NULL; } cmd = pcmk__assert_alloc(1, sizeof(async_command_t)); // All messages must include these cmd->action = crm_element_value_copy(op, PCMK__XA_ST_DEVICE_ACTION); cmd->op = crm_element_value_copy(msg, PCMK__XA_ST_OP); cmd->client = crm_element_value_copy(msg, PCMK__XA_ST_CLIENTID); if ((cmd->action == NULL) || (cmd->op == NULL) || (cmd->client == NULL)) { free_async_command(cmd); return NULL; } crm_element_value_int(msg, PCMK__XA_ST_CALLID, &(cmd->id)); crm_element_value_int(msg, PCMK__XA_ST_CALLOPT, &(cmd->options)); crm_element_value_int(msg, PCMK__XA_ST_DELAY, &(cmd->start_delay)); crm_element_value_int(msg, PCMK__XA_ST_TIMEOUT, &(cmd->default_timeout)); cmd->timeout = cmd->default_timeout; cmd->origin = crm_element_value_copy(msg, PCMK__XA_SRC); cmd->remote_op_id = crm_element_value_copy(msg, PCMK__XA_ST_REMOTE_OP); cmd->client_name = crm_element_value_copy(msg, PCMK__XA_ST_CLIENTNAME); cmd->target = crm_element_value_copy(op, PCMK__XA_ST_TARGET); cmd->device = crm_element_value_copy(op, PCMK__XA_ST_DEVICE_ID); cmd->done_cb = st_child_done; // Track in global command list cmd_list = g_list_append(cmd_list, cmd); return cmd; } static int get_action_limit(stonith_device_t * device) { const char *value = NULL; int action_limit = 1; value = g_hash_table_lookup(device->params, PCMK_STONITH_ACTION_LIMIT); if ((value == NULL) || (pcmk__scan_min_int(value, &action_limit, INT_MIN) != pcmk_rc_ok) || (action_limit == 0)) { action_limit = 1; } return action_limit; } static int get_active_cmds(stonith_device_t * device) { int counter = 0; GList *gIter = NULL; GList *gIterNext = NULL; CRM_CHECK(device != NULL, return 0); for (gIter = cmd_list; gIter != NULL; gIter = gIterNext) { async_command_t *cmd = gIter->data; gIterNext = gIter->next; if (cmd->active_on == device) { counter++; } } return counter; } static void fork_cb(int pid, void *user_data) { async_command_t *cmd = (async_command_t *) user_data; stonith_device_t * device = /* in case of a retry we've done the move from activating_on to active_on already */ cmd->activating_on?cmd->activating_on:cmd->active_on; CRM_ASSERT(device); crm_debug("Operation '%s' [%d]%s%s using %s now running with %ds timeout", cmd->action, pid, ((cmd->target == NULL)? "" : " targeting "), pcmk__s(cmd->target, ""), device->id, cmd->timeout); cmd->active_on = device; cmd->activating_on = NULL; } static int get_agent_metadata_cb(gpointer data) { stonith_device_t *device = data; guint period_ms; switch (get_agent_metadata(device->agent, &device->agent_metadata)) { case pcmk_rc_ok: if (device->agent_metadata) { read_action_metadata(device); stonith__device_parameter_flags(&(device->flags), device->id, device->agent_metadata); } return G_SOURCE_REMOVE; case EAGAIN: period_ms = pcmk__mainloop_timer_get_period(device->timer); if (period_ms < 160 * 1000) { mainloop_timer_set_period(device->timer, 2 * period_ms); } return G_SOURCE_CONTINUE; default: return G_SOURCE_REMOVE; } } /*! * \internal * \brief Call a command's action callback for an internal (not library) result * * \param[in,out] cmd Command to report result for * \param[in] execution_status Execution status to use for result * \param[in] exit_status Exit status to use for result * \param[in] exit_reason Exit reason to use for result */ static void report_internal_result(async_command_t *cmd, int exit_status, int execution_status, const char *exit_reason) { pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; pcmk__set_result(&result, exit_status, execution_status, exit_reason); cmd->done_cb(0, &result, cmd); pcmk__reset_result(&result); } static gboolean stonith_device_execute(stonith_device_t * device) { int exec_rc = 0; const char *action_str = NULL; const char *host_arg = NULL; async_command_t *cmd = NULL; stonith_action_t *action = NULL; int active_cmds = 0; int action_limit = 0; GList *gIter = NULL; GList *gIterNext = NULL; CRM_CHECK(device != NULL, return FALSE); active_cmds = get_active_cmds(device); action_limit = get_action_limit(device); if (action_limit > -1 && active_cmds >= action_limit) { crm_trace("%s is over its action limit of %d (%u active action%s)", device->id, action_limit, active_cmds, pcmk__plural_s(active_cmds)); return TRUE; } for (gIter = device->pending_ops; gIter != NULL; gIter = gIterNext) { async_command_t *pending_op = gIter->data; gIterNext = gIter->next; if (pending_op && pending_op->delay_id) { crm_trace("Operation '%s'%s%s using %s was asked to run too early, " "waiting for start delay of %ds", pending_op->action, ((pending_op->target == NULL)? "" : " targeting "), pcmk__s(pending_op->target, ""), device->id, pending_op->start_delay); continue; } device->pending_ops = g_list_remove_link(device->pending_ops, gIter); g_list_free_1(gIter); cmd = pending_op; break; } if (cmd == NULL) { crm_trace("No actions using %s are needed", device->id); return TRUE; } if (pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT, STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) { if (pcmk__is_fencing_action(cmd->action)) { if (node_does_watchdog_fencing(stonith_our_uname)) { pcmk__panic(__func__); goto done; } } else { crm_info("Faking success for %s watchdog operation", cmd->action); report_internal_result(cmd, CRM_EX_OK, PCMK_EXEC_DONE, NULL); goto done; } } #if SUPPORT_CIBSECRETS exec_rc = pcmk__substitute_secrets(device->id, device->params); if (exec_rc != pcmk_rc_ok) { if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP, pcmk__str_none)) { crm_info("Proceeding with stop operation for %s " "despite being unable to load CIB secrets (%s)", device->id, pcmk_rc_str(exec_rc)); } else { crm_err("Considering %s unconfigured " "because unable to load CIB secrets: %s", device->id, pcmk_rc_str(exec_rc)); report_internal_result(cmd, CRM_EX_ERROR, PCMK_EXEC_NO_SECRETS, "Failed to get CIB secrets"); goto done; } } #endif action_str = cmd->action; if (pcmk__str_eq(cmd->action, PCMK_ACTION_REBOOT, pcmk__str_none) && !pcmk_is_set(device->flags, st_device_supports_reboot)) { crm_notice("Remapping 'reboot' action%s%s using %s to 'off' " "because agent '%s' does not support reboot", ((cmd->target == NULL)? "" : " targeting "), pcmk__s(cmd->target, ""), device->id, device->agent); action_str = PCMK_ACTION_OFF; } if (pcmk_is_set(device->flags, st_device_supports_parameter_port)) { host_arg = "port"; } else if (pcmk_is_set(device->flags, st_device_supports_parameter_plug)) { host_arg = "plug"; } action = stonith__action_create(device->agent, action_str, cmd->target, cmd->target_nodeid, cmd->timeout, device->params, device->aliases, host_arg); /* for async exec, exec_rc is negative for early error exit otherwise handling of success/errors is done via callbacks */ cmd->activating_on = device; exec_rc = stonith__execute_async(action, (void *)cmd, cmd->done_cb, fork_cb); if (exec_rc < 0) { cmd->activating_on = NULL; cmd->done_cb(0, stonith__action_result(action), cmd); stonith__destroy_action(action); } done: /* Device might get triggered to work by multiple fencing commands * simultaneously. Trigger the device again to make sure any * remaining concurrent commands get executed. */ if (device->pending_ops) { mainloop_set_trigger(device->work); } return TRUE; } static gboolean stonith_device_dispatch(gpointer user_data) { return stonith_device_execute(user_data); } static gboolean start_delay_helper(gpointer data) { async_command_t *cmd = data; stonith_device_t *device = cmd_device(cmd); cmd->delay_id = 0; if (device) { mainloop_set_trigger(device->work); } return FALSE; } static void schedule_stonith_command(async_command_t * cmd, stonith_device_t * device) { int delay_max = 0; int delay_base = 0; int requested_delay = cmd->start_delay; CRM_CHECK(cmd != NULL, return); CRM_CHECK(device != NULL, return); if (cmd->device) { free(cmd->device); } if (device->include_nodeid && (cmd->target != NULL)) { crm_node_t *node = pcmk__get_node(0, cmd->target, NULL, pcmk__node_search_cluster); cmd->target_nodeid = node->id; } cmd->device = pcmk__str_copy(device->id); cmd->timeout = get_action_timeout(device, cmd->action, cmd->default_timeout); if (cmd->remote_op_id) { crm_debug("Scheduling '%s' action%s%s using %s for remote peer %s " "with op id %.8s and timeout %ds", cmd->action, (cmd->target == NULL)? "" : " targeting ", pcmk__s(cmd->target, ""), device->id, cmd->origin, cmd->remote_op_id, cmd->timeout); } else { crm_debug("Scheduling '%s' action%s%s using %s for %s with timeout %ds", cmd->action, (cmd->target == NULL)? "" : " targeting ", pcmk__s(cmd->target, ""), device->id, cmd->client, cmd->timeout); } device->pending_ops = g_list_append(device->pending_ops, cmd); mainloop_set_trigger(device->work); // Value -1 means disable any static/random fencing delays if (requested_delay < 0) { return; } delay_max = get_action_delay_max(device, cmd->action); delay_base = get_action_delay_base(device, cmd->action, cmd->target); if (delay_max == 0) { delay_max = delay_base; } if (delay_max < delay_base) { crm_warn(PCMK_STONITH_DELAY_BASE " (%ds) is larger than " PCMK_STONITH_DELAY_MAX " (%ds) for %s using %s " "(limiting to maximum delay)", delay_base, delay_max, cmd->action, device->id); delay_base = delay_max; } if (delay_max > 0) { // coverity[dontcall] It doesn't matter here if rand() is predictable cmd->start_delay += ((delay_max != delay_base)?(rand() % (delay_max - delay_base)):0) + delay_base; } if (cmd->start_delay > 0) { crm_notice("Delaying '%s' action%s%s using %s for %ds " CRM_XS " timeout=%ds requested_delay=%ds base=%ds max=%ds", cmd->action, (cmd->target == NULL)? "" : " targeting ", pcmk__s(cmd->target, ""), device->id, cmd->start_delay, cmd->timeout, requested_delay, delay_base, delay_max); cmd->delay_id = g_timeout_add_seconds(cmd->start_delay, start_delay_helper, cmd); } } static void free_device(gpointer data) { GList *gIter = NULL; stonith_device_t *device = data; g_hash_table_destroy(device->params); g_hash_table_destroy(device->aliases); for (gIter = device->pending_ops; gIter != NULL; gIter = gIter->next) { async_command_t *cmd = gIter->data; crm_warn("Removal of device '%s' purged operation '%s'", device->id, cmd->action); report_internal_result(cmd, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, "Device was removed before action could be executed"); } g_list_free(device->pending_ops); g_list_free_full(device->targets, free); if (device->timer) { mainloop_timer_stop(device->timer); mainloop_timer_del(device->timer); } mainloop_destroy_trigger(device->work); free_xml(device->agent_metadata); free(device->namespace); if (device->on_target_actions != NULL) { g_string_free(device->on_target_actions, TRUE); } free(device->agent); free(device->id); free(device); } void free_device_list(void) { if (device_list != NULL) { g_hash_table_destroy(device_list); device_list = NULL; } } void init_device_list(void) { if (device_list == NULL) { device_list = pcmk__strkey_table(NULL, free_device); } } static GHashTable * build_port_aliases(const char *hostmap, GList ** targets) { char *name = NULL; int last = 0, lpc = 0, max = 0, added = 0; GHashTable *aliases = pcmk__strikey_table(free, free); if (hostmap == NULL) { return aliases; } max = strlen(hostmap); for (; lpc <= max; lpc++) { switch (hostmap[lpc]) { /* Skip escaped chars */ case '\\': lpc++; break; /* Assignment chars */ case '=': case ':': if (lpc > last) { free(name); name = pcmk__assert_alloc(1, 1 + lpc - last); memcpy(name, hostmap + last, lpc - last); } last = lpc + 1; break; /* Delimeter chars */ /* case ',': Potentially used to specify multiple ports */ case 0: case ';': case ' ': case '\t': if (name) { char *value = NULL; int k = 0; value = pcmk__assert_alloc(1, 1 + lpc - last); memcpy(value, hostmap + last, lpc - last); for (int i = 0; value[i] != '\0'; i++) { if (value[i] != '\\') { value[k++] = value[i]; } } value[k] = '\0'; crm_debug("Adding alias '%s'='%s'", name, value); g_hash_table_replace(aliases, name, value); if (targets) { *targets = g_list_append(*targets, pcmk__str_copy(value)); } value = NULL; name = NULL; added++; } else if (lpc > last) { crm_debug("Parse error at offset %d near '%s'", lpc - last, hostmap + last); } last = lpc + 1; break; } if (hostmap[lpc] == 0) { break; } } if (added == 0) { crm_info("No host mappings detected in '%s'", hostmap); } free(name); return aliases; } GHashTable *metadata_cache = NULL; void free_metadata_cache(void) { if (metadata_cache != NULL) { g_hash_table_destroy(metadata_cache); metadata_cache = NULL; } } static void init_metadata_cache(void) { if (metadata_cache == NULL) { metadata_cache = pcmk__strkey_table(free, free); } } int get_agent_metadata(const char *agent, xmlNode ** metadata) { char *buffer = NULL; if (metadata == NULL) { return EINVAL; } *metadata = NULL; if (pcmk__str_eq(agent, STONITH_WATCHDOG_AGENT_INTERNAL, pcmk__str_none)) { return pcmk_rc_ok; } init_metadata_cache(); buffer = g_hash_table_lookup(metadata_cache, agent); if (buffer == NULL) { stonith_t *st = stonith_api_new(); int rc; if (st == NULL) { crm_warn("Could not get agent meta-data: " "API memory allocation failed"); return EAGAIN; } rc = st->cmds->metadata(st, st_opt_sync_call, agent, NULL, &buffer, 10); stonith_api_delete(st); if (rc || !buffer) { crm_err("Could not retrieve metadata for fencing agent %s", agent); return EAGAIN; } g_hash_table_replace(metadata_cache, pcmk__str_copy(agent), buffer); } *metadata = pcmk__xml_parse(buffer); return pcmk_rc_ok; } static gboolean is_nodeid_required(xmlNode * xml) { xmlXPathObjectPtr xpath = NULL; if (stand_alone) { return FALSE; } if (!xml) { return FALSE; } xpath = xpath_search(xml, "//" PCMK_XE_PARAMETER "[@" PCMK_XA_NAME "='nodeid']"); if (numXpathResults(xpath) <= 0) { freeXpathObject(xpath); return FALSE; } freeXpathObject(xpath); return TRUE; } static void read_action_metadata(stonith_device_t *device) { xmlXPathObjectPtr xpath = NULL; int max = 0; int lpc = 0; if (device->agent_metadata == NULL) { return; } xpath = xpath_search(device->agent_metadata, "//action"); max = numXpathResults(xpath); if (max <= 0) { freeXpathObject(xpath); return; } for (lpc = 0; lpc < max; lpc++) { const char *action = NULL; xmlNode *match = getXpathResult(xpath, lpc); CRM_LOG_ASSERT(match != NULL); if(match == NULL) { continue; }; action = crm_element_value(match, PCMK_XA_NAME); if (pcmk__str_eq(action, PCMK_ACTION_LIST, pcmk__str_none)) { stonith__set_device_flags(device->flags, device->id, st_device_supports_list); } else if (pcmk__str_eq(action, PCMK_ACTION_STATUS, pcmk__str_none)) { stonith__set_device_flags(device->flags, device->id, st_device_supports_status); } else if (pcmk__str_eq(action, PCMK_ACTION_REBOOT, pcmk__str_none)) { stonith__set_device_flags(device->flags, device->id, st_device_supports_reboot); } else if (pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none)) { /* PCMK_XA_AUTOMATIC means the cluster will unfence a node when it * joins. * * @COMPAT PCMK__XA_REQUIRED is a deprecated synonym for * PCMK_XA_AUTOMATIC. */ if (pcmk__xe_attr_is_true(match, PCMK_XA_AUTOMATIC) || pcmk__xe_attr_is_true(match, PCMK__XA_REQUIRED)) { device->automatic_unfencing = TRUE; } stonith__set_device_flags(device->flags, device->id, st_device_supports_on); } if ((action != NULL) && pcmk__xe_attr_is_true(match, PCMK_XA_ON_TARGET)) { pcmk__add_word(&(device->on_target_actions), 64, action); } } freeXpathObject(xpath); } /*! * \internal * \brief Set a pcmk_*_action parameter if not already set * * \param[in,out] params Device parameters * \param[in] action Name of action * \param[in] value Value to use if action is not already set */ static void map_action(GHashTable *params, const char *action, const char *value) { char *key = crm_strdup_printf("pcmk_%s_action", action); if (g_hash_table_lookup(params, key)) { crm_warn("Ignoring %s='%s', see %s instead", STONITH_ATTR_ACTION_OP, value, key); free(key); } else { crm_warn("Mapping %s='%s' to %s='%s'", STONITH_ATTR_ACTION_OP, value, key, value); g_hash_table_insert(params, key, pcmk__str_copy(value)); } } /*! * \internal * \brief Create device parameter table from XML * * \param[in] name Device name (used for logging only) * \param[in] dev XML containing device parameters */ static GHashTable * xml2device_params(const char *name, const xmlNode *dev) { GHashTable *params = xml2list(dev); const char *value; /* Action should never be specified in the device configuration, * but we support it for users who are familiar with other software * that worked that way. */ value = g_hash_table_lookup(params, STONITH_ATTR_ACTION_OP); if (value != NULL) { crm_warn("%s has '%s' parameter, which should never be specified in configuration", name, STONITH_ATTR_ACTION_OP); if (*value == '\0') { crm_warn("Ignoring empty '%s' parameter", STONITH_ATTR_ACTION_OP); } else if (strcmp(value, PCMK_ACTION_REBOOT) == 0) { crm_warn("Ignoring %s='reboot' (see " PCMK_OPT_STONITH_ACTION " cluster property instead)", STONITH_ATTR_ACTION_OP); } else if (strcmp(value, PCMK_ACTION_OFF) == 0) { map_action(params, PCMK_ACTION_REBOOT, value); } else { map_action(params, PCMK_ACTION_OFF, value); map_action(params, PCMK_ACTION_REBOOT, value); } g_hash_table_remove(params, STONITH_ATTR_ACTION_OP); } return params; } static const char * target_list_type(stonith_device_t * dev) { const char *check_type = NULL; check_type = g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_CHECK); if (check_type == NULL) { if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_LIST)) { check_type = PCMK_VALUE_STATIC_LIST; } else if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_MAP)) { check_type = PCMK_VALUE_STATIC_LIST; } else if (pcmk_is_set(dev->flags, st_device_supports_list)) { check_type = PCMK_VALUE_DYNAMIC_LIST; } else if (pcmk_is_set(dev->flags, st_device_supports_status)) { check_type = PCMK_VALUE_STATUS; } else { check_type = PCMK_VALUE_NONE; } } return check_type; } static stonith_device_t * build_device_from_xml(xmlNode *dev) { const char *value; stonith_device_t *device = NULL; char *agent = crm_element_value_copy(dev, PCMK_XA_AGENT); CRM_CHECK(agent != NULL, return device); device = pcmk__assert_alloc(1, sizeof(stonith_device_t)); device->id = crm_element_value_copy(dev, PCMK_XA_ID); device->agent = agent; device->namespace = crm_element_value_copy(dev, PCMK__XA_NAMESPACE); device->params = xml2device_params(device->id, dev); value = g_hash_table_lookup(device->params, PCMK_STONITH_HOST_LIST); if (value) { device->targets = stonith__parse_targets(value); } value = g_hash_table_lookup(device->params, PCMK_STONITH_HOST_MAP); device->aliases = build_port_aliases(value, &(device->targets)); value = target_list_type(device); if (!pcmk__str_eq(value, PCMK_VALUE_STATIC_LIST, pcmk__str_casei) && (device->targets != NULL)) { // device->targets is necessary only with PCMK_VALUE_STATIC_LIST g_list_free_full(device->targets, free); device->targets = NULL; } switch (get_agent_metadata(device->agent, &device->agent_metadata)) { case pcmk_rc_ok: if (device->agent_metadata) { read_action_metadata(device); stonith__device_parameter_flags(&(device->flags), device->id, device->agent_metadata); } break; case EAGAIN: if (device->timer == NULL) { device->timer = mainloop_timer_add("get_agent_metadata", 10 * 1000, TRUE, get_agent_metadata_cb, device); } if (!mainloop_timer_running(device->timer)) { mainloop_timer_start(device->timer); } break; default: break; } value = g_hash_table_lookup(device->params, "nodeid"); if (!value) { device->include_nodeid = is_nodeid_required(device->agent_metadata); } value = crm_element_value(dev, PCMK__XA_RSC_PROVIDES); if (pcmk__str_eq(value, PCMK_VALUE_UNFENCING, pcmk__str_casei)) { device->automatic_unfencing = TRUE; } if (is_action_required(PCMK_ACTION_ON, device)) { crm_info("Fencing device '%s' requires unfencing", device->id); } if (device->on_target_actions != NULL) { crm_info("Fencing device '%s' requires actions (%s) to be executed " "on target", device->id, (const char *) device->on_target_actions->str); } device->work = mainloop_add_trigger(G_PRIORITY_HIGH, stonith_device_dispatch, device); /* TODO: Hook up priority */ return device; } static void schedule_internal_command(const char *origin, stonith_device_t * device, const char *action, const char *target, int timeout, void *internal_user_data, void (*done_cb) (int pid, const pcmk__action_result_t *result, void *user_data)) { async_command_t *cmd = NULL; cmd = pcmk__assert_alloc(1, sizeof(async_command_t)); cmd->id = -1; cmd->default_timeout = timeout ? timeout : 60; cmd->timeout = cmd->default_timeout; cmd->action = pcmk__str_copy(action); cmd->target = pcmk__str_copy(target); cmd->device = pcmk__str_copy(device->id); cmd->origin = pcmk__str_copy(origin); cmd->client = pcmk__str_copy(crm_system_name); cmd->client_name = pcmk__str_copy(crm_system_name); cmd->internal_user_data = internal_user_data; cmd->done_cb = done_cb; /* cmd, not internal_user_data, is passed to 'done_cb' as the userdata */ schedule_stonith_command(cmd, device); } // Fence agent status commands use custom exit status codes enum fence_status_code { fence_status_invalid = -1, fence_status_active = 0, fence_status_unknown = 1, fence_status_inactive = 2, }; static void status_search_cb(int pid, const pcmk__action_result_t *result, void *user_data) { async_command_t *cmd = user_data; struct device_search_s *search = cmd->internal_user_data; stonith_device_t *dev = cmd_device(cmd); gboolean can = FALSE; free_async_command(cmd); if (!dev) { search_devices_record_result(search, NULL, FALSE); return; } mainloop_set_trigger(dev->work); if (result->execution_status != PCMK_EXEC_DONE) { crm_warn("Assuming %s cannot fence %s " "because status could not be executed: %s%s%s%s", dev->id, search->host, pcmk_exec_status_str(result->execution_status), ((result->exit_reason == NULL)? "" : " ("), ((result->exit_reason == NULL)? "" : result->exit_reason), ((result->exit_reason == NULL)? "" : ")")); search_devices_record_result(search, dev->id, FALSE); return; } switch (result->exit_status) { case fence_status_unknown: crm_trace("%s reported it cannot fence %s", dev->id, search->host); break; case fence_status_active: case fence_status_inactive: crm_trace("%s reported it can fence %s", dev->id, search->host); can = TRUE; break; default: crm_warn("Assuming %s cannot fence %s " "(status returned unknown code %d)", dev->id, search->host, result->exit_status); break; } search_devices_record_result(search, dev->id, can); } static void dynamic_list_search_cb(int pid, const pcmk__action_result_t *result, void *user_data) { async_command_t *cmd = user_data; struct device_search_s *search = cmd->internal_user_data; stonith_device_t *dev = cmd_device(cmd); gboolean can_fence = FALSE; free_async_command(cmd); /* Host/alias must be in the list output to be eligible to be fenced * * Will cause problems if down'd nodes aren't listed or (for virtual nodes) * if the guest is still listed despite being moved to another machine */ if (!dev) { search_devices_record_result(search, NULL, FALSE); return; } mainloop_set_trigger(dev->work); if (pcmk__result_ok(result)) { crm_info("Refreshing target list for %s", dev->id); g_list_free_full(dev->targets, free); dev->targets = stonith__parse_targets(result->action_stdout); dev->targets_age = time(NULL); } else if (dev->targets != NULL) { if (result->execution_status == PCMK_EXEC_DONE) { crm_info("Reusing most recent target list for %s " "because list returned error code %d", dev->id, result->exit_status); } else { crm_info("Reusing most recent target list for %s " "because list could not be executed: %s%s%s%s", dev->id, pcmk_exec_status_str(result->execution_status), ((result->exit_reason == NULL)? "" : " ("), ((result->exit_reason == NULL)? "" : result->exit_reason), ((result->exit_reason == NULL)? "" : ")")); } } else { // We have never successfully executed list if (result->execution_status == PCMK_EXEC_DONE) { crm_warn("Assuming %s cannot fence %s " "because list returned error code %d", dev->id, search->host, result->exit_status); } else { crm_warn("Assuming %s cannot fence %s " "because list could not be executed: %s%s%s%s", dev->id, search->host, pcmk_exec_status_str(result->execution_status), ((result->exit_reason == NULL)? "" : " ("), ((result->exit_reason == NULL)? "" : result->exit_reason), ((result->exit_reason == NULL)? "" : ")")); } /* Fall back to pcmk_host_check=PCMK_VALUE_STATUS if the user didn't * explicitly specify PCMK_VALUE_DYNAMIC_LIST */ if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_CHECK) == NULL) { crm_notice("Switching to pcmk_host_check='status' for %s", dev->id); pcmk__insert_dup(dev->params, PCMK_STONITH_HOST_CHECK, PCMK_VALUE_STATUS); } } if (dev->targets) { const char *alias = g_hash_table_lookup(dev->aliases, search->host); if (!alias) { alias = search->host; } if (pcmk__str_in_list(alias, dev->targets, pcmk__str_casei)) { can_fence = TRUE; } } search_devices_record_result(search, dev->id, can_fence); } /*! * \internal * \brief Returns true if any key in first is not in second or second has a different value for key */ static int device_params_diff(GHashTable *first, GHashTable *second) { char *key = NULL; char *value = NULL; GHashTableIter gIter; g_hash_table_iter_init(&gIter, first); while (g_hash_table_iter_next(&gIter, (void **)&key, (void **)&value)) { if(strstr(key, "CRM_meta") == key) { continue; } else if (strcmp(key, PCMK_XA_CRM_FEATURE_SET) == 0) { continue; } else { char *other_value = g_hash_table_lookup(second, key); if (!other_value || !pcmk__str_eq(other_value, value, pcmk__str_casei)) { crm_trace("Different value for %s: %s != %s", key, other_value, value); return 1; } } } return 0; } /*! * \internal * \brief Checks to see if an identical device already exists in the device_list */ static stonith_device_t * device_has_duplicate(const stonith_device_t *device) { stonith_device_t *dup = g_hash_table_lookup(device_list, device->id); if (!dup) { crm_trace("No match for %s", device->id); return NULL; } else if (!pcmk__str_eq(dup->agent, device->agent, pcmk__str_casei)) { crm_trace("Different agent: %s != %s", dup->agent, device->agent); return NULL; } /* Use calculate_operation_digest() here? */ if (device_params_diff(device->params, dup->params) || device_params_diff(dup->params, device->params)) { return NULL; } crm_trace("Match"); return dup; } int stonith_device_register(xmlNode *dev, gboolean from_cib) { stonith_device_t *dup = NULL; stonith_device_t *device = build_device_from_xml(dev); guint ndevices = 0; int rv = pcmk_ok; CRM_CHECK(device != NULL, return -ENOMEM); /* do we have a watchdog-device? */ if (pcmk__str_eq(device->id, STONITH_WATCHDOG_ID, pcmk__str_none) || pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT, STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) do { if (stonith_watchdog_timeout_ms <= 0) { crm_err("Ignoring watchdog fence device without " PCMK_OPT_STONITH_WATCHDOG_TIMEOUT " set."); rv = -ENODEV; /* fall through to cleanup & return */ } else if (!pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT, STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) { crm_err("Ignoring watchdog fence device with unknown " "agent '%s' unequal '" STONITH_WATCHDOG_AGENT "'.", device->agent?device->agent:""); rv = -ENODEV; /* fall through to cleanup & return */ } else if (!pcmk__str_eq(device->id, STONITH_WATCHDOG_ID, pcmk__str_none)) { crm_err("Ignoring watchdog fence device " "named %s !='"STONITH_WATCHDOG_ID"'.", device->id?device->id:""); rv = -ENODEV; /* fall through to cleanup & return */ } else { if (pcmk__str_eq(device->agent, STONITH_WATCHDOG_AGENT, pcmk__str_none)) { /* this either has an empty list or the targets configured for watchdog-fencing */ g_list_free_full(stonith_watchdog_targets, free); stonith_watchdog_targets = device->targets; device->targets = NULL; } if (node_does_watchdog_fencing(stonith_our_uname)) { g_list_free_full(device->targets, free); device->targets = stonith__parse_targets(stonith_our_uname); pcmk__insert_dup(device->params, PCMK_STONITH_HOST_LIST, stonith_our_uname); /* proceed as with any other stonith-device */ break; } crm_debug("Skip registration of watchdog fence device on node not in host-list."); /* cleanup and fall through to more cleanup and return */ device->targets = NULL; stonith_device_remove(device->id, from_cib); } free_device(device); return rv; } while (0); dup = device_has_duplicate(device); if (dup) { ndevices = g_hash_table_size(device_list); crm_debug("Device '%s' already in device list (%d active device%s)", device->id, ndevices, pcmk__plural_s(ndevices)); free_device(device); device = dup; dup = g_hash_table_lookup(device_list, device->id); dup->dirty = FALSE; } else { stonith_device_t *old = g_hash_table_lookup(device_list, device->id); if (from_cib && old && old->api_registered) { /* If the cib is writing over an entry that is shared with a stonith client, * copy any pending ops that currently exist on the old entry to the new one. * Otherwise the pending ops will be reported as failures */ crm_info("Overwriting existing entry for %s from CIB", device->id); device->pending_ops = old->pending_ops; device->api_registered = TRUE; old->pending_ops = NULL; if (device->pending_ops) { mainloop_set_trigger(device->work); } } g_hash_table_replace(device_list, device->id, device); ndevices = g_hash_table_size(device_list); crm_notice("Added '%s' to device list (%d active device%s)", device->id, ndevices, pcmk__plural_s(ndevices)); } if (from_cib) { device->cib_registered = TRUE; } else { device->api_registered = TRUE; } return pcmk_ok; } void stonith_device_remove(const char *id, bool from_cib) { stonith_device_t *device = g_hash_table_lookup(device_list, id); guint ndevices = 0; if (!device) { ndevices = g_hash_table_size(device_list); crm_info("Device '%s' not found (%d active device%s)", id, ndevices, pcmk__plural_s(ndevices)); return; } if (from_cib) { device->cib_registered = FALSE; } else { device->verified = FALSE; device->api_registered = FALSE; } if (!device->cib_registered && !device->api_registered) { g_hash_table_remove(device_list, id); ndevices = g_hash_table_size(device_list); crm_info("Removed '%s' from device list (%d active device%s)", id, ndevices, pcmk__plural_s(ndevices)); } else { crm_trace("Not removing '%s' from device list (%d active) because " "still registered via:%s%s", id, g_hash_table_size(device_list), (device->cib_registered? " cib" : ""), (device->api_registered? " api" : "")); } } /*! * \internal * \brief Return the number of stonith levels registered for a node * * \param[in] tp Node's topology table entry * * \return Number of non-NULL levels in topology entry * \note This function is used only for log messages. */ static int count_active_levels(const stonith_topology_t *tp) { int lpc = 0; int count = 0; for (lpc = 0; lpc < ST_LEVEL_MAX; lpc++) { if (tp->levels[lpc] != NULL) { count++; } } return count; } static void free_topology_entry(gpointer data) { stonith_topology_t *tp = data; int lpc = 0; for (lpc = 0; lpc < ST_LEVEL_MAX; lpc++) { if (tp->levels[lpc] != NULL) { g_list_free_full(tp->levels[lpc], free); } } free(tp->target); free(tp->target_value); free(tp->target_pattern); free(tp->target_attribute); free(tp); } void free_topology_list(void) { if (topology != NULL) { g_hash_table_destroy(topology); topology = NULL; } } void init_topology_list(void) { if (topology == NULL) { topology = pcmk__strkey_table(NULL, free_topology_entry); } } char * stonith_level_key(const xmlNode *level, enum fenced_target_by mode) { if (mode == fenced_target_by_unknown) { mode = unpack_level_kind(level); } switch (mode) { case fenced_target_by_name: return crm_element_value_copy(level, PCMK_XA_TARGET); case fenced_target_by_pattern: return crm_element_value_copy(level, PCMK_XA_TARGET_PATTERN); case fenced_target_by_attribute: return crm_strdup_printf("%s=%s", crm_element_value(level, PCMK_XA_TARGET_ATTRIBUTE), crm_element_value(level, PCMK_XA_TARGET_VALUE)); default: return crm_strdup_printf("unknown-%s", pcmk__xe_id(level)); } } /*! * \internal * \brief Parse target identification from topology level XML * * \param[in] level Topology level XML to parse * * \return How to identify target of \p level */ static enum fenced_target_by unpack_level_kind(const xmlNode *level) { if (crm_element_value(level, PCMK_XA_TARGET) != NULL) { return fenced_target_by_name; } if (crm_element_value(level, PCMK_XA_TARGET_PATTERN) != NULL) { return fenced_target_by_pattern; } if (!stand_alone /* if standalone, there's no attribute manager */ && (crm_element_value(level, PCMK_XA_TARGET_ATTRIBUTE) != NULL) && (crm_element_value(level, PCMK_XA_TARGET_VALUE) != NULL)) { return fenced_target_by_attribute; } return fenced_target_by_unknown; } static stonith_key_value_t * parse_device_list(const char *devices) { int lpc = 0; int max = 0; int last = 0; stonith_key_value_t *output = NULL; if (devices == NULL) { return output; } max = strlen(devices); for (lpc = 0; lpc <= max; lpc++) { if (devices[lpc] == ',' || devices[lpc] == 0) { char *line = strndup(devices + last, lpc - last); output = stonith_key_value_add(output, NULL, line); free(line); last = lpc + 1; } } return output; } /*! * \internal * \brief Unpack essential information from topology request XML * * \param[in] xml Request XML to search * \param[out] mode If not NULL, where to store level kind * \param[out] target If not NULL, where to store representation of target * \param[out] id If not NULL, where to store level number * \param[out] desc If not NULL, where to store log-friendly level description * * \return Topology level XML from within \p xml, or NULL if not found * \note The caller is responsible for freeing \p *target and \p *desc if set. */ static xmlNode * unpack_level_request(xmlNode *xml, enum fenced_target_by *mode, char **target, int *id, char **desc) { enum fenced_target_by local_mode = fenced_target_by_unknown; char *local_target = NULL; int local_id = 0; /* The level element can be the top element or lower. If top level, don't * search by xpath, because it might give multiple hits if the XML is the * CIB. */ if ((xml != NULL) && !pcmk__xe_is(xml, PCMK_XE_FENCING_LEVEL)) { xml = get_xpath_object("//" PCMK_XE_FENCING_LEVEL, xml, LOG_WARNING); } if (xml == NULL) { if (desc != NULL) { *desc = crm_strdup_printf("missing"); } } else { local_mode = unpack_level_kind(xml); local_target = stonith_level_key(xml, local_mode); crm_element_value_int(xml, PCMK_XA_INDEX, &local_id); if (desc != NULL) { *desc = crm_strdup_printf("%s[%d]", local_target, local_id); } } if (mode != NULL) { *mode = local_mode; } if (id != NULL) { *id = local_id; } if (target != NULL) { *target = local_target; } else { free(local_target); } return xml; } /*! * \internal * \brief Register a fencing topology level for a target * * Given an XML request specifying the target name, level index, and device IDs * for the level, this will create an entry for the target in the global topology * table if one does not already exist, then append the specified device IDs to * the entry's device list for the specified level. * * \param[in] msg XML request for STONITH level registration * \param[out] desc If not NULL, set to string representation "TARGET[LEVEL]" * \param[out] result Where to set result of registration */ void fenced_register_level(xmlNode *msg, char **desc, pcmk__action_result_t *result) { int id = 0; xmlNode *level; enum fenced_target_by mode; char *target; stonith_topology_t *tp; stonith_key_value_t *dIter = NULL; stonith_key_value_t *devices = NULL; CRM_CHECK((msg != NULL) && (result != NULL), return); level = unpack_level_request(msg, &mode, &target, &id, desc); if (level == NULL) { fenced_set_protocol_error(result); return; } // Ensure an ID was given (even the client API adds an ID) if (pcmk__str_empty(pcmk__xe_id(level))) { crm_warn("Ignoring registration for topology level without ID"); free(target); crm_log_xml_trace(level, "Bad level"); pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID, "Topology level is invalid without ID"); return; } // Ensure a valid target was specified if (mode == fenced_target_by_unknown) { crm_warn("Ignoring registration for topology level '%s' " "without valid target", pcmk__xe_id(level)); free(target); crm_log_xml_trace(level, "Bad level"); pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID, "Invalid target for topology level '%s'", pcmk__xe_id(level)); return; } // Ensure level ID is in allowed range if ((id <= 0) || (id >= ST_LEVEL_MAX)) { crm_warn("Ignoring topology registration for %s with invalid level %d", target, id); free(target); crm_log_xml_trace(level, "Bad level"); pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID, "Invalid level number '%s' for topology level '%s'", pcmk__s(crm_element_value(level, PCMK_XA_INDEX), ""), pcmk__xe_id(level)); return; } /* Find or create topology table entry */ tp = g_hash_table_lookup(topology, target); if (tp == NULL) { tp = pcmk__assert_alloc(1, sizeof(stonith_topology_t)); tp->kind = mode; tp->target = target; tp->target_value = crm_element_value_copy(level, PCMK_XA_TARGET_VALUE); tp->target_pattern = crm_element_value_copy(level, PCMK_XA_TARGET_PATTERN); tp->target_attribute = crm_element_value_copy(level, PCMK_XA_TARGET_ATTRIBUTE); g_hash_table_replace(topology, tp->target, tp); crm_trace("Added %s (%d) to the topology (%d active entries)", target, (int) mode, g_hash_table_size(topology)); } else { free(target); } if (tp->levels[id] != NULL) { crm_info("Adding to the existing %s[%d] topology entry", tp->target, id); } devices = parse_device_list(crm_element_value(level, PCMK_XA_DEVICES)); for (dIter = devices; dIter; dIter = dIter->next) { const char *device = dIter->value; crm_trace("Adding device '%s' for %s[%d]", device, tp->target, id); tp->levels[id] = g_list_append(tp->levels[id], pcmk__str_copy(device)); } stonith_key_value_freeall(devices, 1, 1); { int nlevels = count_active_levels(tp); crm_info("Target %s has %d active fencing level%s", tp->target, nlevels, pcmk__plural_s(nlevels)); } pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); } /*! * \internal * \brief Unregister a fencing topology level for a target * * Given an XML request specifying the target name and level index (or 0 for all * levels), this will remove any corresponding entry for the target from the * global topology table. * * \param[in] msg XML request for STONITH level registration * \param[out] desc If not NULL, set to string representation "TARGET[LEVEL]" * \param[out] result Where to set result of unregistration */ void fenced_unregister_level(xmlNode *msg, char **desc, pcmk__action_result_t *result) { int id = -1; stonith_topology_t *tp; char *target; xmlNode *level = NULL; CRM_CHECK(result != NULL, return); level = unpack_level_request(msg, NULL, &target, &id, desc); if (level == NULL) { fenced_set_protocol_error(result); return; } // Ensure level ID is in allowed range if ((id < 0) || (id >= ST_LEVEL_MAX)) { crm_warn("Ignoring topology unregistration for %s with invalid level %d", target, id); free(target); crm_log_xml_trace(level, "Bad level"); pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID, "Invalid level number '%s' for topology level %s", pcmk__s(crm_element_value(level, PCMK_XA_INDEX), ""), // Client API doesn't add ID to unregistration XML pcmk__s(pcmk__xe_id(level), "")); return; } tp = g_hash_table_lookup(topology, target); if (tp == NULL) { guint nentries = g_hash_table_size(topology); crm_info("No fencing topology found for %s (%d active %s)", target, nentries, pcmk__plural_alt(nentries, "entry", "entries")); } else if (id == 0 && g_hash_table_remove(topology, target)) { guint nentries = g_hash_table_size(topology); crm_info("Removed all fencing topology entries related to %s " "(%d active %s remaining)", target, nentries, pcmk__plural_alt(nentries, "entry", "entries")); } else if (tp->levels[id] != NULL) { guint nlevels; g_list_free_full(tp->levels[id], free); tp->levels[id] = NULL; nlevels = count_active_levels(tp); crm_info("Removed level %d from fencing topology for %s " "(%d active level%s remaining)", id, target, nlevels, pcmk__plural_s(nlevels)); } free(target); pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); } static char * list_to_string(GList *list, const char *delim, gboolean terminate_with_delim) { int max = g_list_length(list); size_t delim_len = delim?strlen(delim):0; size_t alloc_size = 1 + (max?((max-1+(terminate_with_delim?1:0))*delim_len):0); char *rv; GList *gIter; char *pos = NULL; const char *lead_delim = ""; for (gIter = list; gIter != NULL; gIter = gIter->next) { const char *value = (const char *) gIter->data; alloc_size += strlen(value); } rv = pcmk__assert_alloc(alloc_size, sizeof(char)); pos = rv; for (gIter = list; gIter != NULL; gIter = gIter->next) { const char *value = (const char *) gIter->data; pos = &pos[sprintf(pos, "%s%s", lead_delim, value)]; lead_delim = delim; } if (max && terminate_with_delim) { sprintf(pos, "%s", delim); } return rv; } /*! * \internal * \brief Execute a fence agent action directly (and asynchronously) * * Handle a STONITH_OP_EXEC API message by scheduling a requested agent action * directly on a specified device. Only list, monitor, and status actions are * expected to use this call, though it should work with any agent command. * * \param[in] msg Request XML specifying action * \param[out] result Where to store result of action * * \note If the action is monitor, the device must be registered via the API * (CIB registration is not sufficient), because monitor should not be * possible unless the device is "started" (API registered). */ static void execute_agent_action(xmlNode *msg, pcmk__action_result_t *result) { xmlNode *dev = get_xpath_object("//" PCMK__XE_ST_DEVICE_ID, msg, LOG_ERR); xmlNode *op = get_xpath_object("//@" PCMK__XE_ST_DEVICE_ACTION, msg, LOG_ERR); const char *id = crm_element_value(dev, PCMK__XA_ST_DEVICE_ID); const char *action = crm_element_value(op, PCMK__XA_ST_DEVICE_ACTION); async_command_t *cmd = NULL; stonith_device_t *device = NULL; if ((id == NULL) || (action == NULL)) { crm_info("Malformed API action request: device %s, action %s", (id? id : "not specified"), (action? action : "not specified")); fenced_set_protocol_error(result); return; } if (pcmk__str_eq(id, STONITH_WATCHDOG_ID, pcmk__str_none)) { // Watchdog agent actions are implemented internally if (stonith_watchdog_timeout_ms <= 0) { pcmk__set_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, "Watchdog fence device not configured"); return; } else if (pcmk__str_eq(action, PCMK_ACTION_LIST, pcmk__str_none)) { pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); pcmk__set_result_output(result, list_to_string(stonith_watchdog_targets, "\n", TRUE), NULL); return; } else if (pcmk__str_eq(action, PCMK_ACTION_MONITOR, pcmk__str_none)) { pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return; } } device = g_hash_table_lookup(device_list, id); if (device == NULL) { crm_info("Ignoring API '%s' action request because device %s not found", action, id); pcmk__format_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, "'%s' not found", id); return; } else if (!device->api_registered && (strcmp(action, PCMK_ACTION_MONITOR) == 0)) { // Monitors may run only on "started" (API-registered) devices crm_info("Ignoring API '%s' action request because device %s not active", action, id); pcmk__format_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, "'%s' not active", id); return; } cmd = create_async_command(msg); if (cmd == NULL) { crm_log_xml_warn(msg, "invalid"); fenced_set_protocol_error(result); return; } schedule_stonith_command(cmd, device); pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL); } static void search_devices_record_result(struct device_search_s *search, const char *device, gboolean can_fence) { search->replies_received++; if (can_fence && device) { if (search->support_action_only != st_device_supports_none) { stonith_device_t *dev = g_hash_table_lookup(device_list, device); if (dev && !pcmk_is_set(dev->flags, search->support_action_only)) { return; } } search->capable = g_list_append(search->capable, pcmk__str_copy(device)); } if (search->replies_needed == search->replies_received) { guint ndevices = g_list_length(search->capable); crm_debug("Search found %d device%s that can perform '%s' targeting %s", ndevices, pcmk__plural_s(ndevices), (search->action? search->action : "unknown action"), (search->host? search->host : "any node")); search->callback(search->capable, search->user_data); free(search->host); free(search->action); free(search); } } /*! * \internal * \brief Check whether the local host is allowed to execute a fencing action * * \param[in] device Fence device to check * \param[in] action Fence action to check * \param[in] target Hostname of fence target * \param[in] allow_suicide Whether self-fencing is allowed for this operation * * \return TRUE if local host is allowed to execute action, FALSE otherwise */ static gboolean localhost_is_eligible(const stonith_device_t *device, const char *action, const char *target, gboolean allow_suicide) { gboolean localhost_is_target = pcmk__str_eq(target, stonith_our_uname, pcmk__str_casei); if ((device != NULL) && (action != NULL) && (device->on_target_actions != NULL) && (strstr((const char*) device->on_target_actions->str, action) != NULL)) { if (!localhost_is_target) { crm_trace("Operation '%s' using %s can only be executed for local " "host, not %s", action, device->id, target); return FALSE; } } else if (localhost_is_target && !allow_suicide) { crm_trace("'%s' operation does not support self-fencing", action); return FALSE; } return TRUE; } /*! * \internal * \brief Check if local node is allowed to execute (possibly remapped) action * * \param[in] device Fence device to check * \param[in] action Fence action to check * \param[in] target Node name of fence target * \param[in] allow_self Whether self-fencing is allowed for this operation * * \return true if local node is allowed to execute \p action or any actions it * might be remapped to, otherwise false */ static bool localhost_is_eligible_with_remap(const stonith_device_t *device, const char *action, const char *target, gboolean allow_self) { // Check exact action if (localhost_is_eligible(device, action, target, allow_self)) { return true; } // Check potential remaps if (pcmk__str_eq(action, PCMK_ACTION_REBOOT, pcmk__str_none)) { /* "reboot" might get remapped to "off" then "on", so even if reboot is * disallowed, return true if either of those is allowed. We'll report * the disallowed actions with the results. We never allow self-fencing * for remapped "on" actions because the target is off at that point. */ if (localhost_is_eligible(device, PCMK_ACTION_OFF, target, allow_self) || localhost_is_eligible(device, PCMK_ACTION_ON, target, FALSE)) { return true; } } return false; } static void can_fence_host_with_device(stonith_device_t *dev, struct device_search_s *search) { gboolean can = FALSE; const char *check_type = "Internal bug"; const char *target = NULL; const char *alias = NULL; const char *dev_id = "Unspecified device"; const char *action = (search == NULL)? NULL : search->action; CRM_CHECK((dev != NULL) && (action != NULL), goto search_report_results); if (dev->id != NULL) { dev_id = dev->id; } target = search->host; if (target == NULL) { can = TRUE; check_type = "No target"; goto search_report_results; } /* Answer immediately if the device does not support the action * or the local node is not allowed to perform it */ if (pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none) && !pcmk_is_set(dev->flags, st_device_supports_on)) { check_type = "Agent does not support 'on'"; goto search_report_results; } else if (!localhost_is_eligible_with_remap(dev, action, target, search->allow_suicide)) { check_type = "This node is not allowed to execute action"; goto search_report_results; } // Check eligibility as specified by pcmk_host_check check_type = target_list_type(dev); alias = g_hash_table_lookup(dev->aliases, target); if (pcmk__str_eq(check_type, PCMK_VALUE_NONE, pcmk__str_casei)) { can = TRUE; } else if (pcmk__str_eq(check_type, PCMK_VALUE_STATIC_LIST, pcmk__str_casei)) { if (pcmk__str_in_list(target, dev->targets, pcmk__str_casei)) { can = TRUE; } else if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_MAP) && g_hash_table_lookup(dev->aliases, target)) { can = TRUE; } } else if (pcmk__str_eq(check_type, PCMK_VALUE_DYNAMIC_LIST, pcmk__str_casei)) { time_t now = time(NULL); if (dev->targets == NULL || dev->targets_age + 60 < now) { int device_timeout = get_action_timeout(dev, PCMK_ACTION_LIST, search->per_device_timeout); if (device_timeout > search->per_device_timeout) { crm_notice("Since the pcmk_list_timeout (%ds) parameter of %s " "is larger than " PCMK_OPT_STONITH_TIMEOUT " (%ds), timeout may occur", device_timeout, dev_id, search->per_device_timeout); } crm_trace("Running '%s' to check whether %s is eligible to fence %s (%s)", check_type, dev_id, target, action); schedule_internal_command(__func__, dev, PCMK_ACTION_LIST, NULL, search->per_device_timeout, search, dynamic_list_search_cb); /* we'll respond to this search request async in the cb */ return; } if (pcmk__str_in_list(((alias == NULL)? target : alias), dev->targets, pcmk__str_casei)) { can = TRUE; } } else if (pcmk__str_eq(check_type, PCMK_VALUE_STATUS, pcmk__str_casei)) { int device_timeout = get_action_timeout(dev, check_type, search->per_device_timeout); if (device_timeout > search->per_device_timeout) { crm_notice("Since the pcmk_status_timeout (%ds) parameter of %s is " "larger than " PCMK_OPT_STONITH_TIMEOUT " (%ds), " "timeout may occur", device_timeout, dev_id, search->per_device_timeout); } crm_trace("Running '%s' to check whether %s is eligible to fence %s (%s)", check_type, dev_id, target, action); schedule_internal_command(__func__, dev, PCMK_ACTION_STATUS, target, search->per_device_timeout, search, status_search_cb); /* we'll respond to this search request async in the cb */ return; } else { crm_err("Invalid value for " PCMK_STONITH_HOST_CHECK ": %s", check_type); check_type = "Invalid " PCMK_STONITH_HOST_CHECK; } search_report_results: crm_info("%s is%s eligible to fence (%s) %s%s%s%s: %s", dev_id, (can? "" : " not"), pcmk__s(action, "unspecified action"), pcmk__s(target, "unspecified target"), (alias == NULL)? "" : " (as '", pcmk__s(alias, ""), (alias == NULL)? "" : "')", check_type); search_devices_record_result(search, ((dev == NULL)? NULL : dev_id), can); } static void search_devices(gpointer key, gpointer value, gpointer user_data) { stonith_device_t *dev = value; struct device_search_s *search = user_data; can_fence_host_with_device(dev, search); } #define DEFAULT_QUERY_TIMEOUT 20 static void get_capable_devices(const char *host, const char *action, int timeout, bool suicide, void *user_data, void (*callback) (GList * devices, void *user_data), uint32_t support_action_only) { struct device_search_s *search; guint ndevices = g_hash_table_size(device_list); if (ndevices == 0) { callback(NULL, user_data); return; } search = pcmk__assert_alloc(1, sizeof(struct device_search_s)); search->host = pcmk__str_copy(host); search->action = pcmk__str_copy(action); search->per_device_timeout = timeout; search->allow_suicide = suicide; search->callback = callback; search->user_data = user_data; search->support_action_only = support_action_only; /* We are guaranteed this many replies, even if a device is * unregistered while the search is in progress. */ search->replies_needed = ndevices; crm_debug("Searching %d device%s to see which can execute '%s' targeting %s", ndevices, pcmk__plural_s(ndevices), (search->action? search->action : "unknown action"), (search->host? search->host : "any node")); g_hash_table_foreach(device_list, search_devices, search); } struct st_query_data { xmlNode *reply; char *remote_peer; char *client_id; char *target; char *action; int call_options; }; /*! * \internal * \brief Add action-specific attributes to query reply XML * * \param[in,out] xml XML to add attributes to * \param[in] action Fence action * \param[in] device Fence device * \param[in] target Fence target */ static void add_action_specific_attributes(xmlNode *xml, const char *action, const stonith_device_t *device, const char *target) { int action_specific_timeout; int delay_max; int delay_base; CRM_CHECK(xml && action && device, return); // PCMK__XA_ST_REQUIRED is currently used only for unfencing if (is_action_required(action, device)) { crm_trace("Action '%s' is required using %s", action, device->id); crm_xml_add_int(xml, PCMK__XA_ST_REQUIRED, 1); } // pcmk__timeout if configured action_specific_timeout = get_action_timeout(device, action, 0); if (action_specific_timeout) { crm_trace("Action '%s' has timeout %ds using %s", action, action_specific_timeout, device->id); crm_xml_add_int(xml, PCMK__XA_ST_ACTION_TIMEOUT, action_specific_timeout); } delay_max = get_action_delay_max(device, action); if (delay_max > 0) { crm_trace("Action '%s' has maximum random delay %ds using %s", action, delay_max, device->id); crm_xml_add_int(xml, PCMK__XA_ST_DELAY_MAX, delay_max); } delay_base = get_action_delay_base(device, action, target); if (delay_base > 0) { crm_xml_add_int(xml, PCMK__XA_ST_DELAY_BASE, delay_base); } if ((delay_max > 0) && (delay_base == 0)) { crm_trace("Action '%s' has maximum random delay %ds using %s", action, delay_max, device->id); } else if ((delay_max == 0) && (delay_base > 0)) { crm_trace("Action '%s' has a static delay of %ds using %s", action, delay_base, device->id); } else if ((delay_max > 0) && (delay_base > 0)) { crm_trace("Action '%s' has a minimum delay of %ds and a randomly chosen " "maximum delay of %ds using %s", action, delay_base, delay_max, device->id); } } /*! * \internal * \brief Add "disallowed" attribute to query reply XML if appropriate * * \param[in,out] xml XML to add attribute to * \param[in] action Fence action * \param[in] device Fence device * \param[in] target Fence target * \param[in] allow_suicide Whether self-fencing is allowed */ static void add_disallowed(xmlNode *xml, const char *action, const stonith_device_t *device, const char *target, gboolean allow_suicide) { if (!localhost_is_eligible(device, action, target, allow_suicide)) { crm_trace("Action '%s' using %s is disallowed for local host", action, device->id); pcmk__xe_set_bool_attr(xml, PCMK__XA_ST_ACTION_DISALLOWED, true); } } /*! * \internal * \brief Add child element with action-specific values to query reply XML * * \param[in,out] xml XML to add attribute to * \param[in] action Fence action * \param[in] device Fence device * \param[in] target Fence target * \param[in] allow_suicide Whether self-fencing is allowed */ static void add_action_reply(xmlNode *xml, const char *action, const stonith_device_t *device, const char *target, gboolean allow_suicide) { xmlNode *child = pcmk__xe_create(xml, PCMK__XE_ST_DEVICE_ACTION); crm_xml_add(child, PCMK_XA_ID, action); add_action_specific_attributes(child, action, device, target); add_disallowed(child, action, device, target, allow_suicide); } /*! * \internal * \brief Send a reply to a CPG peer or IPC client * * \param[in] reply XML reply to send * \param[in] call_options Send synchronously if st_opt_sync_call is set * \param[in] remote_peer If not NULL, name of peer node to send CPG reply * \param[in,out] client If not NULL, client to send IPC reply */ static void stonith_send_reply(const xmlNode *reply, int call_options, const char *remote_peer, pcmk__client_t *client) { CRM_CHECK((reply != NULL) && ((remote_peer != NULL) || (client != NULL)), return); if (remote_peer == NULL) { do_local_reply(reply, client, call_options); } else { send_cluster_message(pcmk__get_node(0, remote_peer, NULL, pcmk__node_search_cluster), crm_msg_stonith_ng, reply, FALSE); } } static void stonith_query_capable_device_cb(GList * devices, void *user_data) { struct st_query_data *query = user_data; int available_devices = 0; xmlNode *dev = NULL; xmlNode *list = NULL; GList *lpc = NULL; pcmk__client_t *client = NULL; if (query->client_id != NULL) { client = pcmk__find_client_by_id(query->client_id); if ((client == NULL) && (query->remote_peer == NULL)) { crm_trace("Skipping reply to %s: no longer a client", query->client_id); goto done; } } /* Pack the results into XML */ list = pcmk__xe_create(NULL, __func__); crm_xml_add(list, PCMK__XA_ST_TARGET, query->target); for (lpc = devices; lpc != NULL; lpc = lpc->next) { stonith_device_t *device = g_hash_table_lookup(device_list, lpc->data); const char *action = query->action; if (!device) { /* It is possible the device got unregistered while * determining who can fence the target */ continue; } available_devices++; dev = pcmk__xe_create(list, PCMK__XE_ST_DEVICE_ID); crm_xml_add(dev, PCMK_XA_ID, device->id); crm_xml_add(dev, PCMK__XA_NAMESPACE, device->namespace); crm_xml_add(dev, PCMK_XA_AGENT, device->agent); // Has had successful monitor, list, or status on this node crm_xml_add_int(dev, PCMK__XA_ST_MONITOR_VERIFIED, device->verified); crm_xml_add_int(dev, PCMK__XA_ST_DEVICE_SUPPORT_FLAGS, device->flags); /* If the originating fencer wants to reboot the node, and we have a * capable device that doesn't support "reboot", remap to "off" instead. */ if (!pcmk_is_set(device->flags, st_device_supports_reboot) && pcmk__str_eq(query->action, PCMK_ACTION_REBOOT, pcmk__str_none)) { crm_trace("%s doesn't support reboot, using values for off instead", device->id); action = PCMK_ACTION_OFF; } /* Add action-specific values if available */ add_action_specific_attributes(dev, action, device, query->target); if (pcmk__str_eq(query->action, PCMK_ACTION_REBOOT, pcmk__str_none)) { /* A "reboot" *might* get remapped to "off" then "on", so after * sending the "reboot"-specific values in the main element, we add * sub-elements for "off" and "on" values. * * We short-circuited earlier if "reboot", "off" and "on" are all * disallowed for the local host. However if only one or two are * disallowed, we send back the results and mark which ones are * disallowed. If "reboot" is disallowed, this might cause problems * with older fencer versions, which won't check for it. Older * versions will ignore "off" and "on", so they are not a problem. */ add_disallowed(dev, action, device, query->target, pcmk_is_set(query->call_options, st_opt_allow_suicide)); add_action_reply(dev, PCMK_ACTION_OFF, device, query->target, pcmk_is_set(query->call_options, st_opt_allow_suicide)); add_action_reply(dev, PCMK_ACTION_ON, device, query->target, FALSE); } /* A query without a target wants device parameters */ if (query->target == NULL) { xmlNode *attrs = pcmk__xe_create(dev, PCMK__XE_ATTRIBUTES); g_hash_table_foreach(device->params, hash2field, attrs); } } crm_xml_add_int(list, PCMK__XA_ST_AVAILABLE_DEVICES, available_devices); if (query->target) { crm_debug("Found %d matching device%s for target '%s'", available_devices, pcmk__plural_s(available_devices), query->target); } else { crm_debug("%d device%s installed", available_devices, pcmk__plural_s(available_devices)); } if (list != NULL) { xmlNode *wrapper = pcmk__xe_create(query->reply, PCMK__XE_ST_CALLDATA); crm_log_xml_trace(list, "Add query results"); xmlAddChild(wrapper, list); } stonith_send_reply(query->reply, query->call_options, query->remote_peer, client); done: free_xml(query->reply); free(query->remote_peer); free(query->client_id); free(query->target); free(query->action); free(query); g_list_free_full(devices, free); } /*! * \internal * \brief Log the result of an asynchronous command * * \param[in] cmd Command the result is for * \param[in] result Result of command * \param[in] pid Process ID of command, if available * \param[in] next Alternate device that will be tried if command failed * \param[in] op_merged Whether this command was merged with an earlier one */ static void log_async_result(const async_command_t *cmd, const pcmk__action_result_t *result, int pid, const char *next, bool op_merged) { int log_level = LOG_ERR; int output_log_level = LOG_NEVER; guint devices_remaining = g_list_length(cmd->next_device_iter); GString *msg = g_string_sized_new(80); // Reasonable starting size // Choose log levels appropriately if we have a result if (pcmk__result_ok(result)) { log_level = (cmd->target == NULL)? LOG_DEBUG : LOG_NOTICE; if ((result->action_stdout != NULL) && !pcmk__str_eq(cmd->action, PCMK_ACTION_METADATA, pcmk__str_none)) { output_log_level = LOG_DEBUG; } next = NULL; } else { log_level = (cmd->target == NULL)? LOG_NOTICE : LOG_ERR; if ((result->action_stdout != NULL) && !pcmk__str_eq(cmd->action, PCMK_ACTION_METADATA, pcmk__str_none)) { output_log_level = LOG_WARNING; } } // Build the log message piece by piece pcmk__g_strcat(msg, "Operation '", cmd->action, "' ", NULL); if (pid != 0) { g_string_append_printf(msg, "[%d] ", pid); } if (cmd->target != NULL) { pcmk__g_strcat(msg, "targeting ", cmd->target, " ", NULL); } if (cmd->device != NULL) { pcmk__g_strcat(msg, "using ", cmd->device, " ", NULL); } // Add exit status or execution status as appropriate if (result->execution_status == PCMK_EXEC_DONE) { g_string_append_printf(msg, "returned %d", result->exit_status); } else { pcmk__g_strcat(msg, "could not be executed: ", pcmk_exec_status_str(result->execution_status), NULL); } // Add exit reason and next device if appropriate if (result->exit_reason != NULL) { pcmk__g_strcat(msg, " (", result->exit_reason, ")", NULL); } if (next != NULL) { pcmk__g_strcat(msg, ", retrying with ", next, NULL); } if (devices_remaining > 0) { g_string_append_printf(msg, " (%u device%s remaining)", (unsigned int) devices_remaining, pcmk__plural_s(devices_remaining)); } g_string_append_printf(msg, " " CRM_XS " %scall %d from %s", (op_merged? "merged " : ""), cmd->id, cmd->client_name); // Log the result do_crm_log(log_level, "%s", msg->str); g_string_free(msg, TRUE); // Log the output (which may have multiple lines), if appropriate if (output_log_level != LOG_NEVER) { char *prefix = crm_strdup_printf("%s[%d]", cmd->device, pid); crm_log_output(output_log_level, prefix, result->action_stdout); free(prefix); } } /*! * \internal * \brief Reply to requester after asynchronous command completion * * \param[in] cmd Command that completed * \param[in] result Result of command * \param[in] pid Process ID of command, if available * \param[in] merged If true, command was merged with another, not executed */ static void send_async_reply(const async_command_t *cmd, const pcmk__action_result_t *result, int pid, bool merged) { xmlNode *reply = NULL; pcmk__client_t *client = NULL; CRM_CHECK((cmd != NULL) && (result != NULL), return); log_async_result(cmd, result, pid, NULL, merged); if (cmd->client != NULL) { client = pcmk__find_client_by_id(cmd->client); if ((client == NULL) && (cmd->origin == NULL)) { crm_trace("Skipping reply to %s: no longer a client", cmd->client); return; } } reply = construct_async_reply(cmd, result); if (merged) { pcmk__xe_set_bool_attr(reply, PCMK__XA_ST_OP_MERGED, true); } if (!stand_alone && pcmk__is_fencing_action(cmd->action) && pcmk__str_eq(cmd->origin, cmd->target, pcmk__str_casei)) { /* The target was also the originator, so broadcast the result on its * behalf (since it will be unable to). */ crm_trace("Broadcast '%s' result for %s (target was also originator)", cmd->action, cmd->target); crm_xml_add(reply, PCMK__XA_SUBT, PCMK__VALUE_BROADCAST); crm_xml_add(reply, PCMK__XA_ST_OP, STONITH_OP_NOTIFY); send_cluster_message(NULL, crm_msg_stonith_ng, reply, FALSE); } else { // Reply only to the originator stonith_send_reply(reply, cmd->options, cmd->origin, client); } crm_log_xml_trace(reply, "Reply"); free_xml(reply); if (stand_alone) { /* Do notification with a clean data object */ xmlNode *notify_data = pcmk__xe_create(NULL, PCMK__XE_ST_NOTIFY_FENCE); stonith__xe_set_result(notify_data, result); crm_xml_add(notify_data, PCMK__XA_ST_TARGET, cmd->target); crm_xml_add(notify_data, PCMK__XA_ST_OP, cmd->op); crm_xml_add(notify_data, PCMK__XA_ST_DELEGATE, "localhost"); crm_xml_add(notify_data, PCMK__XA_ST_DEVICE_ID, cmd->device); crm_xml_add(notify_data, PCMK__XA_ST_REMOTE_OP, cmd->remote_op_id); crm_xml_add(notify_data, PCMK__XA_ST_ORIGIN, cmd->client); fenced_send_notification(PCMK__VALUE_ST_NOTIFY_FENCE, result, notify_data); fenced_send_notification(PCMK__VALUE_ST_NOTIFY_HISTORY, NULL, NULL); } } static void cancel_stonith_command(async_command_t * cmd) { stonith_device_t *device = cmd_device(cmd); if (device) { crm_trace("Cancel scheduled '%s' action using %s", cmd->action, device->id); device->pending_ops = g_list_remove(device->pending_ops, cmd); } } /*! * \internal * \brief Cancel and reply to any duplicates of a just-completed operation * * Check whether any fencing operations are scheduled to do the same thing as * one that just succeeded. If so, rather than performing the same operation * twice, return the result of this operation for all matching pending commands. * * \param[in,out] cmd Fencing operation that just succeeded * \param[in] result Result of \p cmd * \param[in] pid If nonzero, process ID of agent invocation (for logs) * * \note Duplicate merging will do the right thing for either type of remapped * reboot. If the executing fencer remapped an unsupported reboot to off, * then cmd->action will be "reboot" and will be merged with any other * reboot requests. If the originating fencer remapped a topology reboot * to off then on, we will get here once with cmd->action "off" and once * with "on", and they will be merged separately with similar requests. */ static void reply_to_duplicates(async_command_t *cmd, const pcmk__action_result_t *result, int pid) { GList *next = NULL; for (GList *iter = cmd_list; iter != NULL; iter = next) { async_command_t *cmd_other = iter->data; next = iter->next; // We might delete this entry, so grab next now if (cmd == cmd_other) { continue; } /* A pending operation matches if: * 1. The client connections are different. * 2. The target is the same. * 3. The fencing action is the same. * 4. The device scheduled to execute the action is the same. */ if (pcmk__str_eq(cmd->client, cmd_other->client, pcmk__str_casei) || !pcmk__str_eq(cmd->target, cmd_other->target, pcmk__str_casei) || !pcmk__str_eq(cmd->action, cmd_other->action, pcmk__str_none) || !pcmk__str_eq(cmd->device, cmd_other->device, pcmk__str_casei)) { continue; } crm_notice("Merging fencing action '%s'%s%s originating from " "client %s with identical fencing request from client %s", cmd_other->action, (cmd_other->target == NULL)? "" : " targeting ", pcmk__s(cmd_other->target, ""), cmd_other->client_name, cmd->client_name); // Stop tracking the duplicate, send its result, and cancel it cmd_list = g_list_remove_link(cmd_list, iter); send_async_reply(cmd_other, result, pid, true); cancel_stonith_command(cmd_other); free_async_command(cmd_other); g_list_free_1(iter); } } /*! * \internal * \brief Return the next required device (if any) for an operation * * \param[in,out] cmd Fencing operation that just succeeded * * \return Next device required for action if any, otherwise NULL */ static stonith_device_t * next_required_device(async_command_t *cmd) { for (GList *iter = cmd->next_device_iter; iter != NULL; iter = iter->next) { stonith_device_t *next_device = g_hash_table_lookup(device_list, iter->data); if (is_action_required(cmd->action, next_device)) { /* This is only called for successful actions, so it's OK to skip * non-required devices. */ cmd->next_device_iter = iter->next; return next_device; } } return NULL; } static void st_child_done(int pid, const pcmk__action_result_t *result, void *user_data) { async_command_t *cmd = user_data; stonith_device_t *device = NULL; stonith_device_t *next_device = NULL; CRM_CHECK(cmd != NULL, return); device = cmd_device(cmd); cmd->active_on = NULL; /* The device is ready to do something else now */ if (device) { if (!device->verified && pcmk__result_ok(result) && pcmk__strcase_any_of(cmd->action, PCMK_ACTION_LIST, PCMK_ACTION_MONITOR, PCMK_ACTION_STATUS, NULL)) { device->verified = TRUE; } mainloop_set_trigger(device->work); } if (pcmk__result_ok(result)) { next_device = next_required_device(cmd); } else if ((cmd->next_device_iter != NULL) && !is_action_required(cmd->action, device)) { /* if this device didn't work out, see if there are any others we can try. * if the failed device was 'required', we can't pick another device. */ next_device = g_hash_table_lookup(device_list, cmd->next_device_iter->data); cmd->next_device_iter = cmd->next_device_iter->next; } if (next_device == NULL) { send_async_reply(cmd, result, pid, false); if (pcmk__result_ok(result)) { reply_to_duplicates(cmd, result, pid); } free_async_command(cmd); } else { // This operation requires more fencing log_async_result(cmd, result, pid, next_device->id, false); schedule_stonith_command(cmd, next_device); } } static gint sort_device_priority(gconstpointer a, gconstpointer b) { const stonith_device_t *dev_a = a; const stonith_device_t *dev_b = b; if (dev_a->priority > dev_b->priority) { return -1; } else if (dev_a->priority < dev_b->priority) { return 1; } return 0; } static void stonith_fence_get_devices_cb(GList * devices, void *user_data) { async_command_t *cmd = user_data; stonith_device_t *device = NULL; guint ndevices = g_list_length(devices); crm_info("Found %d matching device%s for target '%s'", ndevices, pcmk__plural_s(ndevices), cmd->target); if (devices != NULL) { /* Order based on priority */ devices = g_list_sort(devices, sort_device_priority); device = g_hash_table_lookup(device_list, devices->data); } if (device == NULL) { // No device found pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; pcmk__format_result(&result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, "No device configured for target '%s'", cmd->target); send_async_reply(cmd, &result, 0, false); pcmk__reset_result(&result); free_async_command(cmd); g_list_free_full(devices, free); } else { // Device found, schedule it for fencing cmd->device_list = devices; cmd->next_device_iter = devices->next; schedule_stonith_command(cmd, device); } } /*! * \internal * \brief Execute a fence action via the local node * * \param[in] msg Fencing request * \param[out] result Where to store result of fence action */ static void fence_locally(xmlNode *msg, pcmk__action_result_t *result) { const char *device_id = NULL; stonith_device_t *device = NULL; async_command_t *cmd = NULL; xmlNode *dev = NULL; CRM_CHECK((msg != NULL) && (result != NULL), return); dev = get_xpath_object("//@" PCMK__XA_ST_TARGET, msg, LOG_ERR); cmd = create_async_command(msg); if (cmd == NULL) { crm_log_xml_warn(msg, "invalid"); fenced_set_protocol_error(result); return; } device_id = crm_element_value(dev, PCMK__XA_ST_DEVICE_ID); if (device_id != NULL) { device = g_hash_table_lookup(device_list, device_id); if (device == NULL) { crm_err("Requested device '%s' is not available", device_id); pcmk__format_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, "Requested device '%s' not found", device_id); return; } schedule_stonith_command(cmd, device); } else { const char *host = crm_element_value(dev, PCMK__XA_ST_TARGET); if (pcmk_is_set(cmd->options, st_opt_cs_nodeid)) { int nodeid = 0; crm_node_t *node = NULL; pcmk__scan_min_int(host, &nodeid, 0); node = pcmk__search_node_caches(nodeid, NULL, pcmk__node_search_any |pcmk__node_search_known); if (node != NULL) { host = node->uname; } } /* If we get to here, then self-fencing is implicitly allowed */ get_capable_devices(host, cmd->action, cmd->default_timeout, TRUE, cmd, stonith_fence_get_devices_cb, fenced_support_flag(cmd->action)); } pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL); } /*! * \internal * \brief Build an XML reply for a fencing operation * * \param[in] request Request that reply is for * \param[in] data If not NULL, add to reply as call data * \param[in] result Full result of fencing operation * * \return Newly created XML reply * \note The caller is responsible for freeing the result. * \note This has some overlap with construct_async_reply(), but that copies * values from an async_command_t, whereas this one copies them from the * request. */ xmlNode * fenced_construct_reply(const xmlNode *request, xmlNode *data, const pcmk__action_result_t *result) { xmlNode *reply = NULL; reply = pcmk__xe_create(NULL, PCMK__XE_ST_REPLY); crm_xml_add(reply, PCMK__XA_ST_ORIGIN, __func__); crm_xml_add(reply, PCMK__XA_T, PCMK__VALUE_STONITH_NG); stonith__xe_set_result(reply, result); if (request == NULL) { /* Most likely, this is the result of a stonith operation that was * initiated before we came up. Unfortunately that means we lack enough * information to provide clients with a full result. * * @TODO Maybe synchronize this information at start-up? */ crm_warn("Missing request information for client notifications for " "operation with result '%s' (initiated before we came up?)", pcmk_exec_status_str(result->execution_status)); } else { const char *name = NULL; const char *value = NULL; // Attributes to copy from request to reply const char *names[] = { PCMK__XA_ST_OP, PCMK__XA_ST_CALLID, PCMK__XA_ST_CLIENTID, PCMK__XA_ST_CLIENTNAME, PCMK__XA_ST_REMOTE_OP, PCMK__XA_ST_CALLOPT, }; for (int lpc = 0; lpc < PCMK__NELEM(names); lpc++) { name = names[lpc]; value = crm_element_value(request, name); crm_xml_add(reply, name, value); } if (data != NULL) { xmlNode *wrapper = pcmk__xe_create(reply, PCMK__XE_ST_CALLDATA); pcmk__xml_copy(wrapper, data); } } return reply; } /*! * \internal * \brief Build an XML reply to an asynchronous fencing command * * \param[in] cmd Fencing command that reply is for * \param[in] result Command result */ static xmlNode * construct_async_reply(const async_command_t *cmd, const pcmk__action_result_t *result) { xmlNode *reply = pcmk__xe_create(NULL, PCMK__XE_ST_REPLY); crm_xml_add(reply, PCMK__XA_ST_ORIGIN, __func__); crm_xml_add(reply, PCMK__XA_T, PCMK__VALUE_STONITH_NG); crm_xml_add(reply, PCMK__XA_ST_OP, cmd->op); crm_xml_add(reply, PCMK__XA_ST_DEVICE_ID, cmd->device); crm_xml_add(reply, PCMK__XA_ST_REMOTE_OP, cmd->remote_op_id); crm_xml_add(reply, PCMK__XA_ST_CLIENTID, cmd->client); crm_xml_add(reply, PCMK__XA_ST_CLIENTNAME, cmd->client_name); crm_xml_add(reply, PCMK__XA_ST_TARGET, cmd->target); crm_xml_add(reply, PCMK__XA_ST_DEVICE_ACTION, cmd->op); crm_xml_add(reply, PCMK__XA_ST_ORIGIN, cmd->origin); crm_xml_add_int(reply, PCMK__XA_ST_CALLID, cmd->id); crm_xml_add_int(reply, PCMK__XA_ST_CALLOPT, cmd->options); stonith__xe_set_result(reply, result); return reply; } bool fencing_peer_active(crm_node_t *peer) { if (peer == NULL) { return FALSE; } else if (peer->uname == NULL) { return FALSE; } else if (pcmk_is_set(peer->processes, crm_get_cluster_proc())) { return TRUE; } return FALSE; } void set_fencing_completed(remote_fencing_op_t *op) { struct timespec tv; qb_util_timespec_from_epoch_get(&tv); op->completed = tv.tv_sec; op->completed_nsec = tv.tv_nsec; } /*! * \internal * \brief Look for alternate node needed if local node shouldn't fence target * * \param[in] target Node that must be fenced * * \return Name of an alternate node that should fence \p target if any, * or NULL otherwise */ static const char * check_alternate_host(const char *target) { if (pcmk__str_eq(target, stonith_our_uname, pcmk__str_casei)) { GHashTableIter gIter; crm_node_t *entry = NULL; g_hash_table_iter_init(&gIter, crm_peer_cache); while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) { if (fencing_peer_active(entry) && !pcmk__str_eq(entry->uname, target, pcmk__str_casei)) { crm_notice("Forwarding self-fencing request to %s", entry->uname); return entry->uname; } } crm_warn("Will handle own fencing because no peer can"); } return NULL; } static void remove_relay_op(xmlNode * request) { xmlNode *dev = get_xpath_object("//@" PCMK__XE_ST_DEVICE_ACTION, request, LOG_TRACE); const char *relay_op_id = NULL; const char *op_id = NULL; const char *client_name = NULL; const char *target = NULL; remote_fencing_op_t *relay_op = NULL; if (dev) { target = crm_element_value(dev, PCMK__XA_ST_TARGET); } relay_op_id = crm_element_value(request, PCMK__XA_ST_REMOTE_OP_RELAY); op_id = crm_element_value(request, PCMK__XA_ST_REMOTE_OP); client_name = crm_element_value(request, PCMK__XA_ST_CLIENTNAME); /* Delete RELAY operation. */ if (relay_op_id && target && pcmk__str_eq(target, stonith_our_uname, pcmk__str_casei)) { relay_op = g_hash_table_lookup(stonith_remote_op_list, relay_op_id); if (relay_op) { GHashTableIter iter; remote_fencing_op_t *list_op = NULL; g_hash_table_iter_init(&iter, stonith_remote_op_list); /* If the operation to be deleted is registered as a duplicate, delete the registration. */ while (g_hash_table_iter_next(&iter, NULL, (void **)&list_op)) { GList *dup_iter = NULL; if (list_op != relay_op) { for (dup_iter = list_op->duplicates; dup_iter != NULL; dup_iter = dup_iter->next) { remote_fencing_op_t *other = dup_iter->data; if (other == relay_op) { other->duplicates = g_list_remove(other->duplicates, relay_op); break; } } } } crm_debug("Deleting relay op %s ('%s'%s%s for %s), " "replaced by op %s ('%s'%s%s for %s)", relay_op->id, relay_op->action, (relay_op->target == NULL)? "" : " targeting ", pcmk__s(relay_op->target, ""), relay_op->client_name, op_id, relay_op->action, (target == NULL)? "" : " targeting ", pcmk__s(target, ""), client_name); g_hash_table_remove(stonith_remote_op_list, relay_op_id); } } } /*! * \internal * \brief Check whether an API request was sent by a privileged user * * API commands related to fencing configuration may be done only by privileged * IPC users (i.e. root or hacluster), because all other users should go through * the CIB to have ACLs applied. If no client was given, this is a peer request, * which is always allowed. * * \param[in] c IPC client that sent request (or NULL if sent by CPG peer) * \param[in] op Requested API operation (for logging only) * * \return true if sender is peer or privileged client, otherwise false */ static inline bool is_privileged(const pcmk__client_t *c, const char *op) { if ((c == NULL) || pcmk_is_set(c->flags, pcmk__client_privileged)) { return true; } else { crm_warn("Rejecting IPC request '%s' from unprivileged client %s", pcmk__s(op, ""), pcmk__client_name(c)); return false; } } // CRM_OP_REGISTER static xmlNode * handle_register_request(pcmk__request_t *request) { xmlNode *reply = pcmk__xe_create(NULL, "reply"); CRM_ASSERT(request->ipc_client != NULL); crm_xml_add(reply, PCMK__XA_ST_OP, CRM_OP_REGISTER); crm_xml_add(reply, PCMK__XA_ST_CLIENTID, request->ipc_client->id); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); pcmk__set_request_flags(request, pcmk__request_reuse_options); return reply; } // STONITH_OP_EXEC static xmlNode * handle_agent_request(pcmk__request_t *request) { execute_agent_action(request->xml, &request->result); if (request->result.execution_status == PCMK_EXEC_PENDING) { return NULL; } return fenced_construct_reply(request->xml, NULL, &request->result); } // STONITH_OP_TIMEOUT_UPDATE static xmlNode * handle_update_timeout_request(pcmk__request_t *request) { const char *call_id = crm_element_value(request->xml, PCMK__XA_ST_CALLID); const char *client_id = crm_element_value(request->xml, PCMK__XA_ST_CLIENTID); int op_timeout = 0; crm_element_value_int(request->xml, PCMK__XA_ST_TIMEOUT, &op_timeout); do_stonith_async_timeout_update(client_id, call_id, op_timeout); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return NULL; } // STONITH_OP_QUERY static xmlNode * handle_query_request(pcmk__request_t *request) { int timeout = 0; xmlNode *dev = NULL; const char *action = NULL; const char *target = NULL; const char *client_id = crm_element_value(request->xml, PCMK__XA_ST_CLIENTID); struct st_query_data *query = NULL; if (request->peer != NULL) { // Record it for the future notification create_remote_stonith_op(client_id, request->xml, TRUE); } /* Delete the DC node RELAY operation. */ remove_relay_op(request->xml); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); dev = get_xpath_object("//@" PCMK__XE_ST_DEVICE_ACTION, request->xml, LOG_NEVER); if (dev != NULL) { const char *device = crm_element_value(dev, PCMK__XA_ST_DEVICE_ID); if (pcmk__str_eq(device, "manual_ack", pcmk__str_casei)) { return NULL; // No query or reply necessary } target = crm_element_value(dev, PCMK__XA_ST_TARGET); action = crm_element_value(dev, PCMK__XA_ST_DEVICE_ACTION); } crm_log_xml_trace(request->xml, "Query"); query = pcmk__assert_alloc(1, sizeof(struct st_query_data)); query->reply = fenced_construct_reply(request->xml, NULL, &request->result); query->remote_peer = pcmk__str_copy(request->peer); query->client_id = pcmk__str_copy(client_id); query->target = pcmk__str_copy(target); query->action = pcmk__str_copy(action); query->call_options = request->call_options; crm_element_value_int(request->xml, PCMK__XA_ST_TIMEOUT, &timeout); get_capable_devices(target, action, timeout, pcmk_is_set(query->call_options, st_opt_allow_suicide), query, stonith_query_capable_device_cb, st_device_supports_none); return NULL; } // STONITH_OP_NOTIFY static xmlNode * handle_notify_request(pcmk__request_t *request) { const char *flag_name = NULL; CRM_ASSERT(request->ipc_client != NULL); flag_name = crm_element_value(request->xml, PCMK__XA_ST_NOTIFY_ACTIVATE); if (flag_name != NULL) { crm_debug("Enabling %s callbacks for client %s", flag_name, pcmk__request_origin(request)); pcmk__set_client_flags(request->ipc_client, get_stonith_flag(flag_name)); } flag_name = crm_element_value(request->xml, PCMK__XA_ST_NOTIFY_DEACTIVATE); if (flag_name != NULL) { crm_debug("Disabling %s callbacks for client %s", flag_name, pcmk__request_origin(request)); pcmk__clear_client_flags(request->ipc_client, get_stonith_flag(flag_name)); } pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); pcmk__set_request_flags(request, pcmk__request_reuse_options); return pcmk__ipc_create_ack(request->ipc_flags, PCMK__XE_ACK, NULL, CRM_EX_OK); } // STONITH_OP_RELAY static xmlNode * handle_relay_request(pcmk__request_t *request) { xmlNode *dev = get_xpath_object("//@" PCMK__XA_ST_TARGET, request->xml, LOG_TRACE); crm_notice("Received forwarded fencing request from " "%s %s to fence (%s) peer %s", pcmk__request_origin_type(request), pcmk__request_origin(request), crm_element_value(dev, PCMK__XA_ST_DEVICE_ACTION), crm_element_value(dev, PCMK__XA_ST_TARGET)); if (initiate_remote_stonith_op(NULL, request->xml, FALSE) == NULL) { fenced_set_protocol_error(&request->result); return fenced_construct_reply(request->xml, NULL, &request->result); } pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL); return NULL; } // STONITH_OP_FENCE static xmlNode * handle_fence_request(pcmk__request_t *request) { if ((request->peer != NULL) || stand_alone) { fence_locally(request->xml, &request->result); } else if (pcmk_is_set(request->call_options, st_opt_manual_ack)) { switch (fenced_handle_manual_confirmation(request->ipc_client, request->xml)) { case pcmk_rc_ok: pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); break; case EINPROGRESS: pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL); break; default: fenced_set_protocol_error(&request->result); break; } } else { const char *alternate_host = NULL; xmlNode *dev = get_xpath_object("//@" PCMK__XA_ST_TARGET, request->xml, LOG_TRACE); const char *target = crm_element_value(dev, PCMK__XA_ST_TARGET); const char *action = crm_element_value(dev, PCMK__XA_ST_DEVICE_ACTION); const char *device = crm_element_value(dev, PCMK__XA_ST_DEVICE_ID); if (request->ipc_client != NULL) { int tolerance = 0; crm_notice("Client %s wants to fence (%s) %s using %s", pcmk__request_origin(request), action, target, (device? device : "any device")); crm_element_value_int(dev, PCMK__XA_ST_TOLERANCE, &tolerance); if (stonith_check_fence_tolerance(tolerance, target, action)) { pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return fenced_construct_reply(request->xml, NULL, &request->result); } alternate_host = check_alternate_host(target); } else { crm_notice("Peer %s wants to fence (%s) '%s' with device '%s'", request->peer, action, target, (device == NULL)? "(any)" : device); } if (alternate_host != NULL) { const char *client_id = NULL; remote_fencing_op_t *op = NULL; if (request->ipc_client->id == 0) { client_id = crm_element_value(request->xml, PCMK__XA_ST_CLIENTID); } else { client_id = request->ipc_client->id; } /* Create a duplicate fencing operation to relay with the client ID. * When a query response is received, this operation should be * deleted to avoid keeping the duplicate around. */ op = create_remote_stonith_op(client_id, request->xml, FALSE); crm_xml_add(request->xml, PCMK__XA_ST_OP, STONITH_OP_RELAY); crm_xml_add(request->xml, PCMK__XA_ST_CLIENTID, request->ipc_client->id); crm_xml_add(request->xml, PCMK__XA_ST_REMOTE_OP, op->id); send_cluster_message(pcmk__get_node(0, alternate_host, NULL, pcmk__node_search_cluster), crm_msg_stonith_ng, request->xml, FALSE); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL); } else if (initiate_remote_stonith_op(request->ipc_client, request->xml, FALSE) == NULL) { fenced_set_protocol_error(&request->result); } else { pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL); } } if (request->result.execution_status == PCMK_EXEC_PENDING) { return NULL; } return fenced_construct_reply(request->xml, NULL, &request->result); } // STONITH_OP_FENCE_HISTORY static xmlNode * handle_history_request(pcmk__request_t *request) { xmlNode *reply = NULL; xmlNode *data = NULL; stonith_fence_history(request->xml, &data, request->peer, request->call_options); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); if (!pcmk_is_set(request->call_options, st_opt_discard_reply)) { /* When the local node broadcasts its history, it sets * st_opt_discard_reply and doesn't need a reply. */ reply = fenced_construct_reply(request->xml, data, &request->result); } free_xml(data); return reply; } // STONITH_OP_DEVICE_ADD static xmlNode * handle_device_add_request(pcmk__request_t *request) { const char *op = crm_element_value(request->xml, PCMK__XA_ST_OP); xmlNode *dev = get_xpath_object("//" PCMK__XE_ST_DEVICE_ID, request->xml, LOG_ERR); if (is_privileged(request->ipc_client, op)) { int rc = stonith_device_register(dev, FALSE); pcmk__set_result(&request->result, ((rc == pcmk_ok)? CRM_EX_OK : CRM_EX_ERROR), stonith__legacy2status(rc), ((rc == pcmk_ok)? NULL : pcmk_strerror(rc))); } else { pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV, PCMK_EXEC_INVALID, "Unprivileged users must register device via CIB"); } fenced_send_config_notification(op, &request->result, (dev == NULL)? NULL : pcmk__xe_id(dev)); return fenced_construct_reply(request->xml, NULL, &request->result); } // STONITH_OP_DEVICE_DEL static xmlNode * handle_device_delete_request(pcmk__request_t *request) { xmlNode *dev = get_xpath_object("//" PCMK__XE_ST_DEVICE_ID, request->xml, LOG_ERR); const char *device_id = crm_element_value(dev, PCMK_XA_ID); const char *op = crm_element_value(request->xml, PCMK__XA_ST_OP); if (is_privileged(request->ipc_client, op)) { stonith_device_remove(device_id, false); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); } else { pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV, PCMK_EXEC_INVALID, "Unprivileged users must delete device via CIB"); } fenced_send_config_notification(op, &request->result, device_id); return fenced_construct_reply(request->xml, NULL, &request->result); } // STONITH_OP_LEVEL_ADD static xmlNode * handle_level_add_request(pcmk__request_t *request) { char *desc = NULL; const char *op = crm_element_value(request->xml, PCMK__XA_ST_OP); if (is_privileged(request->ipc_client, op)) { fenced_register_level(request->xml, &desc, &request->result); } else { unpack_level_request(request->xml, NULL, NULL, NULL, &desc); pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV, PCMK_EXEC_INVALID, "Unprivileged users must add level via CIB"); } fenced_send_config_notification(op, &request->result, desc); free(desc); return fenced_construct_reply(request->xml, NULL, &request->result); } // STONITH_OP_LEVEL_DEL static xmlNode * handle_level_delete_request(pcmk__request_t *request) { char *desc = NULL; const char *op = crm_element_value(request->xml, PCMK__XA_ST_OP); if (is_privileged(request->ipc_client, op)) { fenced_unregister_level(request->xml, &desc, &request->result); } else { unpack_level_request(request->xml, NULL, NULL, NULL, &desc); pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV, PCMK_EXEC_INVALID, "Unprivileged users must delete level via CIB"); } fenced_send_config_notification(op, &request->result, desc); free(desc); return fenced_construct_reply(request->xml, NULL, &request->result); } // CRM_OP_RM_NODE_CACHE static xmlNode * handle_cache_request(pcmk__request_t *request) { int node_id = 0; const char *name = NULL; crm_element_value_int(request->xml, PCMK_XA_ID, &node_id); name = crm_element_value(request->xml, PCMK_XA_UNAME); - reap_crm_member(node_id, name); + pcmk__cluster_forget_cluster_node(node_id, name); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return NULL; } static xmlNode * handle_unknown_request(pcmk__request_t *request) { crm_err("Unknown IPC request %s from %s %s", request->op, pcmk__request_origin_type(request), pcmk__request_origin(request)); pcmk__format_result(&request->result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID, "Unknown IPC request type '%s' (bug?)", request->op); return fenced_construct_reply(request->xml, NULL, &request->result); } static void fenced_register_handlers(void) { pcmk__server_command_t handlers[] = { { CRM_OP_REGISTER, handle_register_request }, { STONITH_OP_EXEC, handle_agent_request }, { STONITH_OP_TIMEOUT_UPDATE, handle_update_timeout_request }, { STONITH_OP_QUERY, handle_query_request }, { STONITH_OP_NOTIFY, handle_notify_request }, { STONITH_OP_RELAY, handle_relay_request }, { STONITH_OP_FENCE, handle_fence_request }, { STONITH_OP_FENCE_HISTORY, handle_history_request }, { STONITH_OP_DEVICE_ADD, handle_device_add_request }, { STONITH_OP_DEVICE_DEL, handle_device_delete_request }, { STONITH_OP_LEVEL_ADD, handle_level_add_request }, { STONITH_OP_LEVEL_DEL, handle_level_delete_request }, { CRM_OP_RM_NODE_CACHE, handle_cache_request }, { NULL, handle_unknown_request }, }; fenced_handlers = pcmk__register_handlers(handlers); } void fenced_unregister_handlers(void) { if (fenced_handlers != NULL) { g_hash_table_destroy(fenced_handlers); fenced_handlers = NULL; } } static void handle_request(pcmk__request_t *request) { xmlNode *reply = NULL; const char *reason = NULL; if (fenced_handlers == NULL) { fenced_register_handlers(); } reply = pcmk__process_request(request, fenced_handlers); if (reply != NULL) { if (pcmk_is_set(request->flags, pcmk__request_reuse_options) && (request->ipc_client != NULL)) { /* Certain IPC-only commands must reuse the call options from the * original request rather than the ones set by stonith_send_reply() * -> do_local_reply(). */ pcmk__ipc_send_xml(request->ipc_client, request->ipc_id, reply, request->ipc_flags); request->ipc_client->request_id = 0; } else { stonith_send_reply(reply, request->call_options, request->peer, request->ipc_client); } free_xml(reply); } reason = request->result.exit_reason; crm_debug("Processed %s request from %s %s: %s%s%s%s", request->op, pcmk__request_origin_type(request), pcmk__request_origin(request), pcmk_exec_status_str(request->result.execution_status), (reason == NULL)? "" : " (", (reason == NULL)? "" : reason, (reason == NULL)? "" : ")"); } static void handle_reply(pcmk__client_t *client, xmlNode *request, const char *remote_peer) { // Copy, because request might be freed before we want to log this char *op = crm_element_value_copy(request, PCMK__XA_ST_OP); if (pcmk__str_eq(op, STONITH_OP_QUERY, pcmk__str_none)) { process_remote_stonith_query(request); } else if (pcmk__str_any_of(op, STONITH_OP_NOTIFY, STONITH_OP_FENCE, NULL)) { fenced_process_fencing_reply(request); } else { crm_err("Ignoring unknown %s reply from %s %s", pcmk__s(op, "untyped"), ((client == NULL)? "peer" : "client"), ((client == NULL)? remote_peer : pcmk__client_name(client))); crm_log_xml_warn(request, "UnknownOp"); free(op); return; } crm_debug("Processed %s reply from %s %s", op, ((client == NULL)? "peer" : "client"), ((client == NULL)? remote_peer : pcmk__client_name(client))); free(op); } /*! * \internal * \brief Handle a message from an IPC client or CPG peer * * \param[in,out] client If not NULL, IPC client that sent message * \param[in] id If from IPC client, IPC message ID * \param[in] flags Message flags * \param[in,out] message Message XML * \param[in] remote_peer If not NULL, CPG peer that sent message */ void stonith_command(pcmk__client_t *client, uint32_t id, uint32_t flags, xmlNode *message, const char *remote_peer) { int call_options = st_opt_none; bool is_reply = false; CRM_CHECK(message != NULL, return); if (get_xpath_object("//" PCMK__XE_ST_REPLY, message, LOG_NEVER) != NULL) { is_reply = true; } crm_element_value_int(message, PCMK__XA_ST_CALLOPT, &call_options); crm_debug("Processing %ssynchronous %s %s %u from %s %s", pcmk_is_set(call_options, st_opt_sync_call)? "" : "a", crm_element_value(message, PCMK__XA_ST_OP), (is_reply? "reply" : "request"), id, ((client == NULL)? "peer" : "client"), ((client == NULL)? remote_peer : pcmk__client_name(client))); if (pcmk_is_set(call_options, st_opt_sync_call)) { CRM_ASSERT(client == NULL || client->request_id == id); } if (is_reply) { handle_reply(client, message, remote_peer); } else { pcmk__request_t request = { .ipc_client = client, .ipc_id = id, .ipc_flags = flags, .peer = remote_peer, .xml = message, .call_options = call_options, .result = PCMK__UNKNOWN_RESULT, }; request.op = crm_element_value_copy(request.xml, PCMK__XA_ST_OP); CRM_CHECK(request.op != NULL, return); if (pcmk_is_set(request.call_options, st_opt_sync_call)) { pcmk__set_request_flags(&request, pcmk__request_sync); } handle_request(&request); pcmk__reset_request(&request); } } diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c index 372a8bff5e..2515355560 100644 --- a/daemons/fenced/pacemaker-fenced.c +++ b/daemons/fenced/pacemaker-fenced.c @@ -1,680 +1,680 @@ /* * Copyright 2009-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include // PRIu32, PRIx32 #include #include #include #include #include #include #include #include #include #include #include #include #define SUMMARY "daemon for executing fencing devices in a Pacemaker cluster" char *stonith_our_uname = NULL; long long stonith_watchdog_timeout_ms = 0; GList *stonith_watchdog_targets = NULL; static GMainLoop *mainloop = NULL; gboolean stand_alone = FALSE; gboolean stonith_shutdown_flag = FALSE; static qb_ipcs_service_t *ipcs = NULL; static pcmk__output_t *out = NULL; pcmk__supported_format_t formats[] = { PCMK__SUPPORTED_FORMAT_NONE, PCMK__SUPPORTED_FORMAT_TEXT, PCMK__SUPPORTED_FORMAT_XML, { NULL, NULL, NULL } }; static struct { bool no_cib_connect; gchar **log_files; } options; crm_exit_t exit_code = CRM_EX_OK; static void stonith_cleanup(void); static int32_t st_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { if (stonith_shutdown_flag) { crm_info("Ignoring new client [%d] during shutdown", pcmk__client_pid(c)); return -ECONNREFUSED; } if (pcmk__new_client(c, uid, gid) == NULL) { return -ENOMEM; } return 0; } /* Exit code means? */ static int32_t st_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size) { uint32_t id = 0; uint32_t flags = 0; int call_options = 0; xmlNode *request = NULL; pcmk__client_t *c = pcmk__find_client(qbc); const char *op = NULL; if (c == NULL) { crm_info("Invalid client: %p", qbc); return 0; } request = pcmk__client_data2xml(c, data, &id, &flags); if (request == NULL) { pcmk__ipc_send_ack(c, id, flags, PCMK__XE_NACK, NULL, CRM_EX_PROTOCOL); return 0; } op = crm_element_value(request, PCMK__XA_CRM_TASK); if(pcmk__str_eq(op, CRM_OP_RM_NODE_CACHE, pcmk__str_casei)) { crm_xml_add(request, PCMK__XA_T, PCMK__VALUE_STONITH_NG); crm_xml_add(request, PCMK__XA_ST_OP, op); crm_xml_add(request, PCMK__XA_ST_CLIENTID, c->id); crm_xml_add(request, PCMK__XA_ST_CLIENTNAME, pcmk__client_name(c)); crm_xml_add(request, PCMK__XA_ST_CLIENTNODE, stonith_our_uname); send_cluster_message(NULL, crm_msg_stonith_ng, request, FALSE); free_xml(request); return 0; } if (c->name == NULL) { const char *value = crm_element_value(request, PCMK__XA_ST_CLIENTNAME); c->name = crm_strdup_printf("%s.%u", pcmk__s(value, "unknown"), c->pid); } crm_element_value_int(request, PCMK__XA_ST_CALLOPT, &call_options); crm_trace("Flags %#08" PRIx32 "/%#08x for command %" PRIu32 " from client %s", flags, call_options, id, pcmk__client_name(c)); if (pcmk_is_set(call_options, st_opt_sync_call)) { CRM_ASSERT(flags & crm_ipc_client_response); CRM_LOG_ASSERT(c->request_id == 0); /* This means the client has two synchronous events in-flight */ c->request_id = id; /* Reply only to the last one */ } crm_xml_add(request, PCMK__XA_ST_CLIENTID, c->id); crm_xml_add(request, PCMK__XA_ST_CLIENTNAME, pcmk__client_name(c)); crm_xml_add(request, PCMK__XA_ST_CLIENTNODE, stonith_our_uname); crm_log_xml_trace(request, "ipc-received"); stonith_command(c, id, flags, request, NULL); free_xml(request); return 0; } /* Error code means? */ static int32_t st_ipc_closed(qb_ipcs_connection_t * c) { pcmk__client_t *client = pcmk__find_client(c); if (client == NULL) { return 0; } crm_trace("Connection %p closed", c); pcmk__free_client(client); /* 0 means: yes, go ahead and destroy the connection */ return 0; } static void st_ipc_destroy(qb_ipcs_connection_t * c) { crm_trace("Connection %p destroyed", c); st_ipc_closed(c); } static void stonith_peer_callback(xmlNode * msg, void *private_data) { const char *remote_peer = crm_element_value(msg, PCMK__XA_SRC); const char *op = crm_element_value(msg, PCMK__XA_ST_OP); if (pcmk__str_eq(op, STONITH_OP_POKE, pcmk__str_none)) { return; } crm_log_xml_trace(msg, "Peer[inbound]"); stonith_command(NULL, 0, 0, msg, remote_peer); } #if SUPPORT_COROSYNC static void stonith_peer_ais_callback(cpg_handle_t handle, const struct cpg_name *groupName, uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) { uint32_t kind = 0; xmlNode *xml = NULL; const char *from = NULL; char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from); if(data == NULL) { return; } if (kind == crm_class_cluster) { xml = pcmk__xml_parse(data); if (xml == NULL) { crm_err("Invalid XML: '%.120s'", data); free(data); return; } crm_xml_add(xml, PCMK__XA_SRC, from); stonith_peer_callback(xml, NULL); } free_xml(xml); free(data); return; } static void stonith_peer_cs_destroy(gpointer user_data) { crm_crit("Lost connection to cluster layer, shutting down"); stonith_shutdown(0); } #endif void do_local_reply(const xmlNode *notify_src, pcmk__client_t *client, int call_options) { /* send callback to originating child */ int local_rc = pcmk_rc_ok; int rid = 0; uint32_t ipc_flags = crm_ipc_server_event; if (pcmk_is_set(call_options, st_opt_sync_call)) { CRM_LOG_ASSERT(client->request_id); rid = client->request_id; client->request_id = 0; ipc_flags = crm_ipc_flags_none; } local_rc = pcmk__ipc_send_xml(client, rid, notify_src, ipc_flags); if (local_rc == pcmk_rc_ok) { crm_trace("Sent response %d to client %s", rid, pcmk__client_name(client)); } else { crm_warn("%synchronous reply to client %s failed: %s", (pcmk_is_set(call_options, st_opt_sync_call)? "S" : "As"), pcmk__client_name(client), pcmk_rc_str(local_rc)); } } uint64_t get_stonith_flag(const char *name) { if (pcmk__str_eq(name, PCMK__VALUE_ST_NOTIFY_FENCE, pcmk__str_none)) { return st_callback_notify_fence; } else if (pcmk__str_eq(name, STONITH_OP_DEVICE_ADD, pcmk__str_casei)) { return st_callback_device_add; } else if (pcmk__str_eq(name, STONITH_OP_DEVICE_DEL, pcmk__str_casei)) { return st_callback_device_del; } else if (pcmk__str_eq(name, PCMK__VALUE_ST_NOTIFY_HISTORY, pcmk__str_none)) { return st_callback_notify_history; } else if (pcmk__str_eq(name, PCMK__VALUE_ST_NOTIFY_HISTORY_SYNCED, pcmk__str_none)) { return st_callback_notify_history_synced; } return st_callback_unknown; } static void stonith_notify_client(gpointer key, gpointer value, gpointer user_data) { const xmlNode *update_msg = user_data; pcmk__client_t *client = value; const char *type = NULL; CRM_CHECK(client != NULL, return); CRM_CHECK(update_msg != NULL, return); type = crm_element_value(update_msg, PCMK__XA_SUBT); CRM_CHECK(type != NULL, crm_log_xml_err(update_msg, "notify"); return); if (client->ipcs == NULL) { crm_trace("Skipping client with NULL channel"); return; } if (pcmk_is_set(client->flags, get_stonith_flag(type))) { int rc = pcmk__ipc_send_xml(client, 0, update_msg, crm_ipc_server_event); if (rc != pcmk_rc_ok) { crm_warn("%s notification of client %s failed: %s " CRM_XS " id=%.8s rc=%d", type, pcmk__client_name(client), pcmk_rc_str(rc), client->id, rc); } else { crm_trace("Sent %s notification to client %s", type, pcmk__client_name(client)); } } } void do_stonith_async_timeout_update(const char *client_id, const char *call_id, int timeout) { pcmk__client_t *client = NULL; xmlNode *notify_data = NULL; if (!timeout || !call_id || !client_id) { return; } client = pcmk__find_client_by_id(client_id); if (!client) { return; } notify_data = pcmk__xe_create(NULL, PCMK__XE_ST_ASYNC_TIMEOUT_VALUE); crm_xml_add(notify_data, PCMK__XA_T, PCMK__VALUE_ST_ASYNC_TIMEOUT_VALUE); crm_xml_add(notify_data, PCMK__XA_ST_CALLID, call_id); crm_xml_add_int(notify_data, PCMK__XA_ST_TIMEOUT, timeout); crm_trace("timeout update is %d for client %s and call id %s", timeout, client_id, call_id); if (client) { pcmk__ipc_send_xml(client, 0, notify_data, crm_ipc_server_event); } free_xml(notify_data); } /*! * \internal * \brief Notify relevant IPC clients of a fencing operation result * * \param[in] type Notification type * \param[in] result Result of fencing operation (assume success if NULL) * \param[in] data If not NULL, add to notification as call data */ void fenced_send_notification(const char *type, const pcmk__action_result_t *result, xmlNode *data) { /* TODO: Standardize the contents of data */ xmlNode *update_msg = pcmk__xe_create(NULL, PCMK__XE_NOTIFY); CRM_LOG_ASSERT(type != NULL); crm_xml_add(update_msg, PCMK__XA_T, PCMK__VALUE_ST_NOTIFY); crm_xml_add(update_msg, PCMK__XA_SUBT, type); crm_xml_add(update_msg, PCMK__XA_ST_OP, type); stonith__xe_set_result(update_msg, result); if (data != NULL) { xmlNode *wrapper = pcmk__xe_create(update_msg, PCMK__XE_ST_CALLDATA); pcmk__xml_copy(wrapper, data); } crm_trace("Notifying clients"); pcmk__foreach_ipc_client(stonith_notify_client, update_msg); free_xml(update_msg); crm_trace("Notify complete"); } /*! * \internal * \brief Send notifications for a configuration change to subscribed clients * * \param[in] op Notification type (\c STONITH_OP_DEVICE_ADD, * \c STONITH_OP_DEVICE_DEL, \c STONITH_OP_LEVEL_ADD, or * \c STONITH_OP_LEVEL_DEL) * \param[in] result Operation result * \param[in] desc Description of what changed (either device ID or string * representation of level * ([])) */ void fenced_send_config_notification(const char *op, const pcmk__action_result_t *result, const char *desc) { xmlNode *notify_data = pcmk__xe_create(NULL, op); crm_xml_add(notify_data, PCMK__XA_ST_DEVICE_ID, desc); fenced_send_notification(op, result, notify_data); free_xml(notify_data); } /*! * \internal * \brief Check whether a node does watchdog-fencing * * \param[in] node Name of node to check * * \return TRUE if node found in stonith_watchdog_targets * or stonith_watchdog_targets is empty indicating * all nodes are doing watchdog-fencing */ gboolean node_does_watchdog_fencing(const char *node) { return ((stonith_watchdog_targets == NULL) || pcmk__str_in_list(node, stonith_watchdog_targets, pcmk__str_casei)); } void stonith_shutdown(int nsig) { crm_info("Terminating with %d clients", pcmk__ipc_client_count()); stonith_shutdown_flag = TRUE; if (mainloop != NULL && g_main_loop_is_running(mainloop)) { g_main_loop_quit(mainloop); } } static void stonith_cleanup(void) { fenced_cib_cleanup(); if (ipcs) { qb_ipcs_destroy(ipcs); } crm_peer_destroy(); pcmk__client_cleanup(); free_stonith_remote_op_list(); free_topology_list(); free_device_list(); free_metadata_cache(); fenced_unregister_handlers(); free(stonith_our_uname); stonith_our_uname = NULL; } static gboolean stand_alone_cpg_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { stand_alone = FALSE; options.no_cib_connect = true; return TRUE; } struct qb_ipcs_service_handlers ipc_callbacks = { .connection_accept = st_ipc_accept, .connection_created = NULL, .msg_process = st_ipc_dispatch, .connection_closed = st_ipc_closed, .connection_destroyed = st_ipc_destroy }; /*! * \internal * \brief Callback for peer status changes * * \param[in] type What changed * \param[in] node What peer had the change * \param[in] data Previous value of what changed */ static void st_peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data) { if ((type != crm_status_processes) && !pcmk_is_set(node->flags, crm_remote_node)) { /* * This is a hack until we can send to a nodeid and/or we fix node name lookups * These messages are ignored in stonith_peer_callback() */ xmlNode *query = pcmk__xe_create(NULL, PCMK__XE_STONITH_COMMAND); crm_xml_add(query, PCMK__XA_T, PCMK__VALUE_STONITH_NG); crm_xml_add(query, PCMK__XA_ST_OP, STONITH_OP_POKE); crm_debug("Broadcasting our uname because of node %u", node->id); send_cluster_message(NULL, crm_msg_stonith_ng, query, FALSE); free_xml(query); } } /* @COMPAT Deprecated since 2.1.8. Use pcmk_list_fence_attrs() or * crm_resource --list-options=fencing instead of querying daemon metadata. */ static int fencer_metadata(void) { const char *name = "pacemaker-fenced"; const char *desc_short = N_("Instance attributes available for all " "\"stonith\"-class resources"); const char *desc_long = N_("Instance attributes available for all " "\"stonith\"-class resources and used by " "Pacemaker's fence daemon, formerly known as " "stonithd"); return pcmk__daemon_metadata(out, name, desc_short, desc_long, pcmk__opt_fencing); } static GOptionEntry entries[] = { { "stand-alone", 's', G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE, &stand_alone, N_("Deprecated (will be removed in a future release)"), NULL }, { "stand-alone-w-cpg", 'c', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, stand_alone_cpg_cb, N_("Intended for use in regression testing only"), NULL }, { "logfile", 'l', G_OPTION_FLAG_NONE, G_OPTION_ARG_FILENAME_ARRAY, &options.log_files, N_("Send logs to the additional named logfile"), NULL }, { NULL } }; static GOptionContext * build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) { GOptionContext *context = NULL; context = pcmk__build_arg_context(args, "text (default), xml", group, NULL); pcmk__add_main_args(context, entries); return context; } int main(int argc, char **argv) { int rc = pcmk_rc_ok; crm_cluster_t *cluster = NULL; crm_ipc_t *old_instance = NULL; GError *error = NULL; GOptionGroup *output_group = NULL; pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY); gchar **processed_args = pcmk__cmdline_preproc(argv, "l"); GOptionContext *context = build_arg_context(args, &output_group); crm_log_preinit(NULL, argc, argv); pcmk__register_formats(output_group, formats); if (!g_option_context_parse_strv(context, &processed_args, &error)) { exit_code = CRM_EX_USAGE; goto done; } rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv); if (rc != pcmk_rc_ok) { exit_code = CRM_EX_ERROR; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Error creating output format %s: %s", args->output_ty, pcmk_rc_str(rc)); goto done; } if (args->version) { out->version(out, false); goto done; } if ((g_strv_length(processed_args) >= 2) && pcmk__str_eq(processed_args[1], "metadata", pcmk__str_none)) { rc = fencer_metadata(); if (rc != pcmk_rc_ok) { exit_code = CRM_EX_FATAL; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Unable to display metadata: %s", pcmk_rc_str(rc)); } goto done; } // Open additional log files pcmk__add_logfiles(options.log_files, out); crm_log_init(NULL, LOG_INFO + args->verbosity, TRUE, (args->verbosity > 0), argc, argv, FALSE); crm_notice("Starting Pacemaker fencer"); old_instance = crm_ipc_new("stonith-ng", 0); if (old_instance == NULL) { /* crm_ipc_new() will have already logged an error message with * crm_err() */ exit_code = CRM_EX_FATAL; goto done; } if (pcmk__connect_generic_ipc(old_instance) == pcmk_rc_ok) { // IPC endpoint already up crm_ipc_close(old_instance); crm_ipc_destroy(old_instance); crm_err("pacemaker-fenced is already active, aborting startup"); goto done; } else { // Not up or not authentic, we'll proceed either way crm_ipc_destroy(old_instance); old_instance = NULL; } mainloop_add_signal(SIGTERM, stonith_shutdown); crm_peer_init(); rc = fenced_scheduler_init(); if (rc != pcmk_rc_ok) { exit_code = CRM_EX_FATAL; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Error initializing scheduler data: %s", pcmk_rc_str(rc)); goto done; } cluster = pcmk_cluster_new(); if (!stand_alone) { #if SUPPORT_COROSYNC if (is_corosync_cluster()) { cluster->destroy = stonith_peer_cs_destroy; cluster->cpg.cpg_deliver_fn = stonith_peer_ais_callback; cluster->cpg.cpg_confchg_fn = pcmk_cpg_membership; } #endif // SUPPORT_COROSYNC crm_set_status_callback(&st_peer_update_callback); - if (crm_cluster_connect(cluster) == FALSE) { + if (pcmk_cluster_connect(cluster) != pcmk_rc_ok) { exit_code = CRM_EX_FATAL; crm_crit("Cannot sign in to the cluster... terminating"); goto done; } pcmk__str_update(&stonith_our_uname, cluster->uname); if (!options.no_cib_connect) { setup_cib(); } } else { pcmk__str_update(&stonith_our_uname, "localhost"); crm_warn("Stand-alone mode is deprecated and will be removed " "in a future release"); } init_device_list(); init_topology_list(); pcmk__serve_fenced_ipc(&ipcs, &ipc_callbacks); // Create the mainloop and run it... mainloop = g_main_loop_new(NULL, FALSE); crm_notice("Pacemaker fencer successfully started and accepting connections"); g_main_loop_run(mainloop); done: g_strfreev(processed_args); pcmk__free_arg_context(context); g_strfreev(options.log_files); stonith_cleanup(); pcmk_cluster_free(cluster); fenced_scheduler_cleanup(); pcmk__output_and_clear_error(&error, out); if (out != NULL) { out->finish(out, exit_code, true, NULL); pcmk__output_free(out); } pcmk__unregister_formats(); crm_exit(exit_code); } diff --git a/include/crm/cluster.h b/include/crm/cluster.h index 9530a7ad3d..13797f47ad 100644 --- a/include/crm/cluster.h +++ b/include/crm/cluster.h @@ -1,244 +1,230 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PCMK__CRM_CLUSTER__H # define PCMK__CRM_CLUSTER__H # include // uint32_t, uint64_t # include // gboolean, GHashTable # include // xmlNode # include # include #ifdef __cplusplus extern "C" { #endif # if SUPPORT_COROSYNC # include # endif extern gboolean crm_have_quorum; extern GHashTable *crm_peer_cache; extern GHashTable *crm_remote_peer_cache; extern unsigned long long crm_peer_seq; #define CRM_NODE_LOST "lost" #define CRM_NODE_MEMBER "member" enum crm_join_phase { /* @COMPAT: crm_join_nack_quiet can be replaced by crm_node_t:user_data * at a compatibility break. */ //! Not allowed to join, but don't send a nack message crm_join_nack_quiet = -2, crm_join_nack = -1, crm_join_none = 0, crm_join_welcomed = 1, crm_join_integrated = 2, crm_join_finalized = 3, crm_join_confirmed = 4, }; enum crm_node_flags { /* node is not a cluster node and should not be considered for cluster membership */ crm_remote_node = 0x0001, /* node's cache entry is dirty */ crm_node_dirty = 0x0010, }; typedef struct crm_peer_node_s { char *uname; // Node name as known to cluster /* @COMPAT This is less than ideal since the value is not a valid XML ID * (for Corosync, it's the string equivalent of the node's numeric node ID, * but XML IDs can't start with a number) and the three elements should have * different IDs. * * Ideally, we would use something like node-NODEID, node_state-NODEID, and * transient_attributes-NODEID as the element IDs. Unfortunately changing it * would be impractical due to backward compatibility; older nodes in a * rolling upgrade will always write and expect the value in the old format. * * This is also named poorly, since the value is not a UUID, but at least * that can be changed at an API compatibility break. */ /*! Value of the PCMK_XA_ID XML attribute to use with the node's * PCMK_XE_NODE, PCMK_XE_NODE_STATE, and PCMK_XE_TRANSIENT_ATTRIBUTES * XML elements in the CIB */ char *uuid; char *state; // @TODO change to enum uint64_t flags; // Bitmask of crm_node_flags uint64_t last_seen; // Only needed by cluster nodes uint32_t processes; // @TODO most not needed, merge into flags /* @TODO When we can break public API compatibility, we can make the rest of * these members separate structs and use void *cluster_data and * void *user_data here instead, to abstract the cluster layer further. */ // Currently only needed by corosync stack uint32_t id; // Node ID time_t when_lost; // When CPG membership was last lost // Only used by controller enum crm_join_phase join; char *expected; time_t peer_lost; char *conn_host; time_t when_member; // Since when node has been a cluster member time_t when_online; // Since when peer has been online in CPG } crm_node_t; void crm_peer_init(void); void crm_peer_destroy(void); typedef struct crm_cluster_s { char *uuid; char *uname; uint32_t nodeid; void (*destroy) (gpointer); # if SUPPORT_COROSYNC /* @TODO When we can break public API compatibility, make these members a * separate struct and use void *cluster_data here instead, to abstract the * cluster layer further. */ struct cpg_name group; cpg_callbacks_t cpg; cpg_handle_t cpg_handle; # endif } crm_cluster_t; -gboolean crm_cluster_connect(crm_cluster_t *cluster); -void crm_cluster_disconnect(crm_cluster_t *cluster); +int pcmk_cluster_connect(crm_cluster_t *cluster); +int pcmk_cluster_disconnect(crm_cluster_t *cluster); crm_cluster_t *pcmk_cluster_new(void); void pcmk_cluster_free(crm_cluster_t *cluster); enum crm_ais_msg_class { crm_class_cluster = 0, }; enum crm_ais_msg_types { crm_msg_none = 0, crm_msg_ais = 1, crm_msg_lrmd = 2, crm_msg_cib = 3, crm_msg_crmd = 4, crm_msg_attrd = 5, crm_msg_stonithd = 6, crm_msg_te = 7, crm_msg_pe = 8, crm_msg_stonith_ng = 9, }; gboolean send_cluster_message(const crm_node_t *node, enum crm_ais_msg_types service, const xmlNode *data, gboolean ordered); -int crm_remote_peer_cache_size(void); - -/* Initialize and refresh the remote peer cache from a cib config */ -void crm_remote_peer_cache_refresh(xmlNode *cib); -crm_node_t *crm_remote_peer_get(const char *node_name); -void crm_remote_peer_cache_remove(const char *node_name); - -guint crm_active_peers(void); -gboolean crm_is_peer_active(const crm_node_t * node); -guint reap_crm_member(uint32_t id, const char *name); - # if SUPPORT_COROSYNC uint32_t get_local_nodeid(cpg_handle_t handle); -gboolean cluster_connect_cpg(crm_cluster_t *cluster); -void cluster_disconnect_cpg(crm_cluster_t * cluster); - void pcmk_cpg_membership(cpg_handle_t handle, const struct cpg_name *groupName, const struct cpg_address *member_list, size_t member_list_entries, const struct cpg_address *left_list, size_t left_list_entries, const struct cpg_address *joined_list, size_t joined_list_entries); gboolean crm_is_corosync_peer_active(const crm_node_t * node); gboolean send_cluster_text(enum crm_ais_msg_class msg_class, const char *data, gboolean local, const crm_node_t *node, enum crm_ais_msg_types dest); char *pcmk_message_common_cs(cpg_handle_t handle, uint32_t nodeid, uint32_t pid, void *msg, uint32_t *kind, const char **from); # endif const char *crm_peer_uuid(crm_node_t *node); const char *crm_peer_uname(const char *uuid); enum crm_status_type { crm_status_uname, crm_status_nstate, crm_status_processes, }; enum crm_ais_msg_types text2msg_type(const char *text); void crm_set_status_callback(void (*dispatch) (enum crm_status_type, crm_node_t *, const void *)); void crm_set_autoreap(gboolean autoreap); enum cluster_type_e { pcmk_cluster_unknown = 0x0001, pcmk_cluster_invalid = 0x0002, // 0x0004 was heartbeat // 0x0010 was corosync 1 with plugin pcmk_cluster_corosync = 0x0020, // 0x0040 was corosync 1 with CMAN }; enum cluster_type_e get_cluster_type(void); const char *name_for_cluster_type(enum cluster_type_e type); gboolean is_corosync_cluster(void); const char *get_local_node_name(void); char *get_node_name(uint32_t nodeid); /*! * \brief Get log-friendly string equivalent of a join phase * * \param[in] phase Join phase * * \return Log-friendly string equivalent of \p phase */ static inline const char * crm_join_phase_str(enum crm_join_phase phase) { switch (phase) { case crm_join_nack_quiet: return "nack_quiet"; case crm_join_nack: return "nack"; case crm_join_none: return "none"; case crm_join_welcomed: return "welcomed"; case crm_join_integrated: return "integrated"; case crm_join_finalized: return "finalized"; case crm_join_confirmed: return "confirmed"; default: return "invalid"; } } #if !defined(PCMK_ALLOW_DEPRECATED) || (PCMK_ALLOW_DEPRECATED == 1) #include #endif #ifdef __cplusplus } #endif #endif diff --git a/include/crm/cluster/compat.h b/include/crm/cluster/compat.h index 14c4504972..3470745f20 100644 --- a/include/crm/cluster/compat.h +++ b/include/crm/cluster/compat.h @@ -1,56 +1,97 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PCMK__CRM_CLUSTER_COMPAT__H # define PCMK__CRM_CLUSTER_COMPAT__H +#include // uint32_t + +#include // gboolean, guint #include // xmlNode + #include // crm_node_t #ifdef __cplusplus extern "C" { #endif /** * \file * \brief Deprecated Pacemaker cluster API * \ingroup cluster * \deprecated Do not include this header directly. The cluster APIs in this * header, and the header itself, will be removed in a future * release. */ // \deprecated Do not use enum crm_get_peer_flags { CRM_GET_PEER_CLUSTER = 0x0001, CRM_GET_PEER_REMOTE = 0x0002, CRM_GET_PEER_ANY = CRM_GET_PEER_CLUSTER|CRM_GET_PEER_REMOTE, }; // \deprecated Do not use Pacemaker for cluster node cacheing crm_node_t *crm_get_peer(unsigned int id, const char *uname); // \deprecated Do not use Pacemaker for cluster node cacheing crm_node_t *crm_get_peer_full(unsigned int id, const char *uname, int flags); // \deprecated Use stonith_api_kick() from libstonithd instead int crm_terminate_member(int nodeid, const char *uname, void *unused); // \deprecated Use stonith_api_kick() from libstonithd instead int crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection); // \deprecated Use crm_xml_add(xml, attr, crm_peer_uuid(node)) instead void set_uuid(xmlNode *xml, const char *attr, crm_node_t *node); +#if SUPPORT_COROSYNC + +// \deprecated Do not use +gboolean cluster_connect_cpg(crm_cluster_t *cluster); + +// \deprecated Do not use +void cluster_disconnect_cpg(crm_cluster_t *cluster); + +#endif // SUPPORT_COROSYNC + +// \deprecated Use \c pcmk_cluster_connect() instead +gboolean crm_cluster_connect(crm_cluster_t *cluster); + +// \deprecated Use \c pcmk_cluster_disconnect() instead +void crm_cluster_disconnect(crm_cluster_t *cluster); + +// \deprecated Do not use +int crm_remote_peer_cache_size(void); + +// \deprecated Do not use +void crm_remote_peer_cache_refresh(xmlNode *cib); + +// \deprecated Do not use +crm_node_t *crm_remote_peer_get(const char *node_name); + +// \deprecated Do not use +void crm_remote_peer_cache_remove(const char *node_name); + +// \deprecated Do not use +gboolean crm_is_peer_active(const crm_node_t *node); + +// \deprecated Do not use +guint crm_active_peers(void); + +// \deprecated Do not use +guint reap_crm_member(uint32_t id, const char *name); + #ifdef __cplusplus } #endif #endif // PCMK_CLUSTER_COMPAT__H diff --git a/include/crm/cluster/internal.h b/include/crm/cluster/internal.h index b75784c256..8549db0bb5 100644 --- a/include/crm/cluster/internal.h +++ b/include/crm/cluster/internal.h @@ -1,151 +1,162 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PCMK__CRM_CLUSTER_INTERNAL__H # define PCMK__CRM_CLUSTER_INTERNAL__H # include // uint32_t, uint64_t + +# include // gboolean + # include /* *INDENT-OFF* */ enum crm_proc_flag { crm_proc_none = 0x00000001, // Cluster layers crm_proc_cpg = 0x04000000, // Daemons crm_proc_execd = 0x00000010, crm_proc_based = 0x00000100, crm_proc_controld = 0x00000200, crm_proc_attrd = 0x00001000, crm_proc_schedulerd = 0x00010000, crm_proc_fenced = 0x00100000, }; /* *INDENT-ON* */ // Used with node cache search functions enum pcmk__node_search_flags { pcmk__node_search_none = 0, pcmk__node_search_cluster = (1 << 0), // Search for cluster nodes pcmk__node_search_remote = (1 << 1), // Search for remote nodes pcmk__node_search_any = pcmk__node_search_cluster |pcmk__node_search_remote, /* @COMPAT The values before this must stay the same until we can drop * support for enum crm_get_peer_flags */ pcmk__node_search_known = (1 << 2), // Search previously known nodes }; /*! * \internal * \brief Return the process bit corresponding to the current cluster stack * * \return Process flag if detectable, otherwise 0 */ static inline uint32_t crm_get_cluster_proc(void) { switch (get_cluster_type()) { case pcmk_cluster_corosync: return crm_proc_cpg; default: break; } return crm_proc_none; } /*! * \internal * \brief Get log-friendly string description of a Corosync return code * * \param[in] error Corosync return code * * \return Log-friendly string description corresponding to \p error */ static inline const char * pcmk__cs_err_str(int error) { # if SUPPORT_COROSYNC switch (error) { case CS_OK: return "OK"; case CS_ERR_LIBRARY: return "Library error"; case CS_ERR_VERSION: return "Version error"; case CS_ERR_INIT: return "Initialization error"; case CS_ERR_TIMEOUT: return "Timeout"; case CS_ERR_TRY_AGAIN: return "Try again"; case CS_ERR_INVALID_PARAM: return "Invalid parameter"; case CS_ERR_NO_MEMORY: return "No memory"; case CS_ERR_BAD_HANDLE: return "Bad handle"; case CS_ERR_BUSY: return "Busy"; case CS_ERR_ACCESS: return "Access error"; case CS_ERR_NOT_EXIST: return "Doesn't exist"; case CS_ERR_NAME_TOO_LONG: return "Name too long"; case CS_ERR_EXIST: return "Exists"; case CS_ERR_NO_SPACE: return "No space"; case CS_ERR_INTERRUPT: return "Interrupt"; case CS_ERR_NAME_NOT_FOUND: return "Name not found"; case CS_ERR_NO_RESOURCES: return "No resources"; case CS_ERR_NOT_SUPPORTED: return "Not supported"; case CS_ERR_BAD_OPERATION: return "Bad operation"; case CS_ERR_FAILED_OPERATION: return "Failed operation"; case CS_ERR_MESSAGE_ERROR: return "Message error"; case CS_ERR_QUEUE_FULL: return "Queue full"; case CS_ERR_QUEUE_NOT_AVAILABLE: return "Queue not available"; case CS_ERR_BAD_FLAGS: return "Bad flags"; case CS_ERR_TOO_BIG: return "Too big"; case CS_ERR_NO_SECTIONS: return "No sections"; } # endif return "Corosync error"; } # if SUPPORT_COROSYNC #if 0 /* This is the new way to do it, but we still support all Corosync 2 versions, * and this isn't always available. A better alternative here would be to check * for support in the configure script and enable this conditionally. */ #define pcmk__init_cmap(handle) cmap_initialize_map((handle), CMAP_MAP_ICMAP) #else #define pcmk__init_cmap(handle) cmap_initialize(handle) #endif char *pcmk__corosync_cluster_name(void); bool pcmk__corosync_add_nodes(xmlNode *xml_parent); # endif crm_node_t *crm_update_peer_proc(const char *source, crm_node_t * peer, uint32_t flag, const char *status); crm_node_t *pcmk__update_peer_state(const char *source, crm_node_t *node, const char *state, uint64_t membership); void pcmk__update_peer_expected(const char *source, crm_node_t *node, const char *expected); void pcmk__reap_unseen_nodes(uint64_t ring_id); void pcmk__corosync_quorum_connect(gboolean (*dispatch)(unsigned long long, gboolean), void (*destroy) (gpointer)); + +bool pcmk__cluster_is_node_active(const crm_node_t *node); +unsigned int pcmk__cluster_num_active_nodes(void); +unsigned int pcmk__cluster_num_remote_nodes(void); + +crm_node_t *pcmk__cluster_lookup_remote_node(const char *node_name); +void pcmk__cluster_forget_cluster_node(uint32_t id, const char *node_name); +void pcmk__cluster_forget_remote_node(const char *node_name); crm_node_t *pcmk__search_node_caches(unsigned int id, const char *uname, uint32_t flags); crm_node_t *pcmk__search_cluster_node_cache(unsigned int id, const char *uname, const char *uuid); void pcmk__purge_node_from_cache(const char *node_name, uint32_t node_id); void pcmk__refresh_node_caches_from_cib(xmlNode *cib); crm_node_t *pcmk__get_node(unsigned int id, const char *uname, const char *uuid, uint32_t flags); #endif // PCMK__CRM_CLUSTER_INTERNAL__H diff --git a/lib/cluster/cluster.c b/lib/cluster/cluster.c index 68f5512fdc..204154e0e2 100644 --- a/lib/cluster/cluster.c +++ b/lib/cluster/cluster.c @@ -1,404 +1,430 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include "crmcluster_private.h" CRM_TRACE_INIT_DATA(cluster); /*! * \brief Get (and set if needed) a node's UUID * * \param[in,out] peer Node to check * * \return Node UUID of \p peer, or NULL if unknown */ const char * crm_peer_uuid(crm_node_t *peer) { char *uuid = NULL; // Check simple cases first, to avoid any calls that might block if (peer == NULL) { return NULL; } if (peer->uuid != NULL) { return peer->uuid; } switch (get_cluster_type()) { case pcmk_cluster_corosync: #if SUPPORT_COROSYNC uuid = pcmk__corosync_uuid(peer); #endif break; case pcmk_cluster_unknown: case pcmk_cluster_invalid: crm_err("Unsupported cluster type"); break; } peer->uuid = uuid; return peer->uuid; } /*! + * \internal * \brief Connect to the cluster layer * - * \param[in,out] Initialized cluster object to connect + * \param[in,out] cluster Initialized cluster object to connect * - * \return TRUE on success, otherwise FALSE + * \return Standard Pacemaker return code */ -gboolean -crm_cluster_connect(crm_cluster_t *cluster) +int +pcmk_cluster_connect(crm_cluster_t *cluster) { enum cluster_type_e type = get_cluster_type(); crm_notice("Connecting to %s cluster infrastructure", name_for_cluster_type(type)); + switch (type) { case pcmk_cluster_corosync: #if SUPPORT_COROSYNC crm_peer_init(); return pcmk__corosync_connect(cluster); #else break; #endif // SUPPORT_COROSYNC default: break; } - return FALSE; + + crm_err("Failed to connect to unsupported cluster type %s", + name_for_cluster_type(type)); + return EPROTONOSUPPORT; } /*! * \brief Disconnect from the cluster layer * * \param[in,out] cluster Cluster object to disconnect + * + * \return Standard Pacemaker return code */ -void -crm_cluster_disconnect(crm_cluster_t *cluster) +int +pcmk_cluster_disconnect(crm_cluster_t *cluster) { enum cluster_type_e type = get_cluster_type(); crm_info("Disconnecting from %s cluster infrastructure", name_for_cluster_type(type)); + switch (type) { case pcmk_cluster_corosync: #if SUPPORT_COROSYNC crm_peer_destroy(); pcmk__corosync_disconnect(cluster); -#endif // SUPPORT_COROSYNC + return pcmk_rc_ok; +#else break; +#endif // SUPPORT_COROSYNC default: break; } + + crm_err("Failed to disconnect from unsupported cluster type %s", + name_for_cluster_type(type)); + return EPROTONOSUPPORT; } /*! * \brief Allocate a new \p crm_cluster_t object * * \return A newly allocated \p crm_cluster_t object (guaranteed not \c NULL) * \note The caller is responsible for freeing the return value using * \p pcmk_cluster_free(). */ crm_cluster_t * pcmk_cluster_new(void) { return (crm_cluster_t *) pcmk__assert_alloc(1, sizeof(crm_cluster_t)); } /*! * \brief Free a \p crm_cluster_t object and its dynamically allocated members * * \param[in,out] cluster Cluster object to free */ void pcmk_cluster_free(crm_cluster_t *cluster) { if (cluster == NULL) { return; } free(cluster->uuid); free(cluster->uname); free(cluster); } /*! * \brief Send an XML message via the cluster messaging layer * * \param[in] node Cluster node to send message to * \param[in] service Message type to use in message host info * \param[in] data XML message to send * \param[in] ordered Ignored for currently supported messaging layers * * \return TRUE on success, otherwise FALSE */ gboolean send_cluster_message(const crm_node_t *node, enum crm_ais_msg_types service, const xmlNode *data, gboolean ordered) { switch (get_cluster_type()) { case pcmk_cluster_corosync: #if SUPPORT_COROSYNC return pcmk__cpg_send_xml(data, node, service); #endif break; default: break; } return FALSE; } /*! * \brief Get the local node's name * * \return Local node's name * \note This will fatally exit if local node name cannot be known. */ const char * get_local_node_name(void) { static char *name = NULL; if (name == NULL) { name = get_node_name(0); } return name; } /*! * \brief Get the node name corresponding to a cluster node ID * * \param[in] nodeid Node ID to check (or 0 for local node) * * \return Node name corresponding to \p nodeid * \note This will fatally exit if \p nodeid is 0 and local node name cannot be * known. */ char * get_node_name(uint32_t nodeid) { char *name = NULL; enum cluster_type_e stack = get_cluster_type(); switch (stack) { case pcmk_cluster_corosync: #if SUPPORT_COROSYNC name = pcmk__corosync_name(0, nodeid); break; #endif // SUPPORT_COROSYNC default: crm_err("Unknown cluster type: %s (%d)", name_for_cluster_type(stack), stack); } if ((name == NULL) && (nodeid == 0)) { name = pcmk_hostname(); if (name == NULL) { // @TODO Maybe let the caller decide what to do crm_err("Could not obtain the local %s node name", name_for_cluster_type(stack)); crm_exit(CRM_EX_FATAL); } crm_notice("Defaulting to uname -n for the local %s node name", name_for_cluster_type(stack)); } if (name == NULL) { crm_notice("Could not obtain a node name for %s node with " PCMK_XA_ID " %u", name_for_cluster_type(stack), nodeid); } return name; } /*! * \brief Get the node name corresponding to a node UUID * * \param[in] uuid UUID of desired node * * \return name of desired node * * \note This relies on the remote peer cache being populated with all * remote nodes in the cluster, so callers should maintain that cache. */ const char * crm_peer_uname(const char *uuid) { GHashTableIter iter; crm_node_t *node = NULL; CRM_CHECK(uuid != NULL, return NULL); /* remote nodes have the same uname and uuid */ if (g_hash_table_lookup(crm_remote_peer_cache, uuid)) { return uuid; } /* avoid blocking calls where possible */ g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { if (pcmk__str_eq(node->uuid, uuid, pcmk__str_casei)) { if (node->uname != NULL) { return node->uname; } break; } } node = NULL; if (is_corosync_cluster()) { long long id; if ((pcmk__scan_ll(uuid, &id, 0LL) != pcmk_rc_ok) || (id < 1LL) || (id > UINT32_MAX)) { crm_err("Invalid Corosync node ID '%s'", uuid); return NULL; } node = pcmk__search_node_caches((uint32_t) id, NULL, pcmk__node_search_cluster); if (node != NULL) { crm_info("Setting uuid for node %s[%u] to %s", node->uname, node->id, uuid); node->uuid = strdup(uuid); return node->uname; } return NULL; } return NULL; } /*! * \brief Get a log-friendly string equivalent of a cluster type * * \param[in] type Cluster type * * \return Log-friendly string corresponding to \p type */ const char * name_for_cluster_type(enum cluster_type_e type) { switch (type) { case pcmk_cluster_corosync: return "corosync"; case pcmk_cluster_unknown: return "unknown"; case pcmk_cluster_invalid: return "invalid"; } crm_err("Invalid cluster type: %d", type); return "invalid"; } /*! * \brief Get (and validate) the local cluster type * * \return Local cluster type * \note This will fatally exit if the local cluster type is invalid. */ enum cluster_type_e get_cluster_type(void) { bool detected = false; const char *cluster = NULL; static enum cluster_type_e cluster_type = pcmk_cluster_unknown; /* Return the previous calculation, if any */ if (cluster_type != pcmk_cluster_unknown) { return cluster_type; } cluster = pcmk__env_option(PCMK__ENV_CLUSTER_TYPE); #if SUPPORT_COROSYNC /* If nothing is defined in the environment, try corosync (if supported) */ if (cluster == NULL) { crm_debug("Testing with Corosync"); cluster_type = pcmk__corosync_detect(); if (cluster_type != pcmk_cluster_unknown) { detected = true; goto done; } } #endif /* Something was defined in the environment, test it against what we support */ crm_info("Verifying cluster type: '%s'", ((cluster == NULL)? "-unspecified-" : cluster)); if (cluster == NULL) { #if SUPPORT_COROSYNC } else if (pcmk__str_eq(cluster, "corosync", pcmk__str_casei)) { cluster_type = pcmk_cluster_corosync; #endif } else { cluster_type = pcmk_cluster_invalid; goto done; /* Keep the compiler happy when no stacks are supported */ } done: if (cluster_type == pcmk_cluster_unknown) { crm_notice("Could not determine the current cluster type"); } else if (cluster_type == pcmk_cluster_invalid) { crm_notice("This installation does not support the '%s' cluster infrastructure: terminating.", cluster); crm_exit(CRM_EX_FATAL); } else { crm_info("%s an active '%s' cluster", (detected? "Detected" : "Assuming"), name_for_cluster_type(cluster_type)); } return cluster_type; } /*! * \brief Check whether the local cluster is a Corosync cluster * * \return TRUE if the local cluster is a Corosync cluster, otherwise FALSE */ gboolean is_corosync_cluster(void) { return get_cluster_type() == pcmk_cluster_corosync; } // Deprecated functions kept only for backward API compatibility // LCOV_EXCL_START #include void set_uuid(xmlNode *xml, const char *attr, crm_node_t *node) { crm_xml_add(xml, attr, crm_peer_uuid(node)); } +gboolean +crm_cluster_connect(crm_cluster_t *cluster) +{ + return pcmk_cluster_connect(cluster) == pcmk_rc_ok; +} + +void +crm_cluster_disconnect(crm_cluster_t *cluster) +{ + pcmk_cluster_disconnect(cluster); +} + // LCOV_EXCL_STOP // End deprecated API diff --git a/lib/cluster/corosync.c b/lib/cluster/corosync.c index 00e627fcb5..ef263027a2 100644 --- a/lib/cluster/corosync.c +++ b/lib/cluster/corosync.c @@ -1,815 +1,820 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include // PRIu64 #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* PCMK__SPECIAL_PID* */ #include "crmcluster_private.h" static quorum_handle_t pcmk_quorum_handle = 0; static gboolean (*quorum_app_callback)(unsigned long long seq, gboolean quorate) = NULL; /*! * \internal * \brief Get the Corosync UUID associated with a Pacemaker node * * \param[in] node Pacemaker node * * \return Newly allocated string with node's Corosync UUID, or NULL if unknown * \note It is the caller's responsibility to free the result with free(). */ char * pcmk__corosync_uuid(const crm_node_t *node) { if ((node != NULL) && is_corosync_cluster()) { if (node->id > 0) { return crm_strdup_printf("%u", node->id); } else { crm_info("Node %s is not yet known by Corosync", node->uname); } } return NULL; } static bool node_name_is_valid(const char *key, const char *name) { int octet; if (name == NULL) { crm_trace("%s is empty", key); return false; } else if (sscanf(name, "%d.%d.%d.%d", &octet, &octet, &octet, &octet) == 4) { crm_trace("%s contains an IPv4 address (%s), ignoring", key, name); return false; } else if (strstr(name, ":") != NULL) { crm_trace("%s contains an IPv6 address (%s), ignoring", key, name); return false; } crm_trace("'%s: %s' is valid", key, name); return true; } /* * \internal * \brief Get Corosync node name corresponding to a node ID * * \param[in] cmap_handle Connection to Corosync CMAP * \param[in] nodeid Node ID to check * * \return Newly allocated string with name or (if no name) IP address * associated with first address assigned to a Corosync node ID (or NULL * if unknown) * \note It is the caller's responsibility to free the result with free(). */ char * pcmk__corosync_name(uint64_t /*cmap_handle_t */ cmap_handle, uint32_t nodeid) { // Originally based on corosync-quorumtool.c:node_name() int lpc = 0; cs_error_t rc = CS_OK; int retries = 0; char *name = NULL; cmap_handle_t local_handle = 0; int fd = -1; uid_t found_uid = 0; gid_t found_gid = 0; pid_t found_pid = 0; int rv; if (nodeid == 0) { nodeid = get_local_nodeid(0); } if (cmap_handle == 0 && local_handle == 0) { retries = 0; crm_trace("Initializing CMAP connection"); do { rc = pcmk__init_cmap(&local_handle); if (rc != CS_OK) { retries++; crm_debug("API connection setup failed: %s. Retrying in %ds", cs_strerror(rc), retries); sleep(retries); } } while (retries < 5 && rc != CS_OK); if (rc != CS_OK) { crm_warn("Could not connect to Cluster Configuration Database API, error %s", cs_strerror(rc)); local_handle = 0; } } if (cmap_handle == 0) { cmap_handle = local_handle; rc = cmap_fd_get(cmap_handle, &fd); if (rc != CS_OK) { crm_err("Could not obtain the CMAP API connection: %s (%d)", cs_strerror(rc), rc); goto bail; } /* CMAP provider run as root (in given user namespace, anyway)? */ if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid, &found_uid, &found_gid))) { crm_err("CMAP provider is not authentic:" " process %lld (uid: %lld, gid: %lld)", (long long) PCMK__SPECIAL_PID_AS_0(found_pid), (long long) found_uid, (long long) found_gid); goto bail; } else if (rv < 0) { crm_err("Could not verify authenticity of CMAP provider: %s (%d)", strerror(-rv), -rv); goto bail; } } while (name == NULL && cmap_handle != 0) { uint32_t id = 0; char *key = NULL; key = crm_strdup_printf("nodelist.node.%d.nodeid", lpc); rc = cmap_get_uint32(cmap_handle, key, &id); crm_trace("Checking %u vs %u from %s", nodeid, id, key); free(key); if (rc != CS_OK) { break; } if (nodeid == id) { crm_trace("Searching for node name for %u in nodelist.node.%d %s", nodeid, lpc, pcmk__s(name, "")); if (name == NULL) { key = crm_strdup_printf("nodelist.node.%d.name", lpc); cmap_get_string(cmap_handle, key, &name); crm_trace("%s = %s", key, pcmk__s(name, "")); free(key); } if (name == NULL) { key = crm_strdup_printf("nodelist.node.%d.ring0_addr", lpc); cmap_get_string(cmap_handle, key, &name); crm_trace("%s = %s", key, pcmk__s(name, "")); if (!node_name_is_valid(key, name)) { free(name); name = NULL; } free(key); } break; } lpc++; } bail: if(local_handle) { cmap_finalize(local_handle); } if (name == NULL) { crm_info("Unable to get node name for nodeid %u", nodeid); } return name; } /*! * \internal * \brief Disconnect from Corosync cluster * - * \param[in,out] cluster Cluster connection to disconnect + * \param[in,out] cluster Cluster object to disconnect */ void pcmk__corosync_disconnect(crm_cluster_t *cluster) { - cluster_disconnect_cpg(cluster); - if (pcmk_quorum_handle) { + pcmk__cpg_disconnect(cluster); + + if (pcmk_quorum_handle != 0) { quorum_finalize(pcmk_quorum_handle); pcmk_quorum_handle = 0; } crm_notice("Disconnected from Corosync"); } /*! * \internal * \brief Dispatch function for quorum connection file descriptor * * \param[in] user_data Ignored * * \return 0 on success, -1 on error (per mainloop_io_t interface) */ static int quorum_dispatch_cb(gpointer user_data) { int rc = quorum_dispatch(pcmk_quorum_handle, CS_DISPATCH_ALL); if (rc < 0) { crm_err("Connection to the Quorum API failed: %d", rc); quorum_finalize(pcmk_quorum_handle); pcmk_quorum_handle = 0; return -1; } return 0; } /*! * \internal * \brief Notification callback for Corosync quorum connection * * \param[in] handle Corosync quorum connection * \param[in] quorate Whether cluster is quorate * \param[in] ring_id Corosync ring ID * \param[in] view_list_entries Number of entries in \p view_list * \param[in] view_list Corosync node IDs in membership */ static void quorum_notification_cb(quorum_handle_t handle, uint32_t quorate, uint64_t ring_id, uint32_t view_list_entries, uint32_t *view_list) { int i; GHashTableIter iter; crm_node_t *node = NULL; static gboolean init_phase = TRUE; if (quorate != crm_have_quorum) { if (quorate) { crm_notice("Quorum acquired " CRM_XS " membership=%" PRIu64 " members=%lu", ring_id, (long unsigned int)view_list_entries); } else { crm_warn("Quorum lost " CRM_XS " membership=%" PRIu64 " members=%lu", ring_id, (long unsigned int)view_list_entries); } crm_have_quorum = quorate; } else { crm_info("Quorum %s " CRM_XS " membership=%" PRIu64 " members=%lu", (quorate? "retained" : "still lost"), ring_id, (long unsigned int)view_list_entries); } if (view_list_entries == 0 && init_phase) { crm_info("Corosync membership is still forming, ignoring"); return; } init_phase = FALSE; /* Reset last_seen for all cached nodes so we can tell which ones aren't * in the view list */ g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { node->last_seen = 0; } /* Update the peer cache for each node in view list */ for (i = 0; i < view_list_entries; i++) { uint32_t id = view_list[i]; crm_debug("Member[%d] %u ", i, id); /* Get this node's peer cache entry (adding one if not already there) */ node = pcmk__get_node(id, NULL, NULL, pcmk__node_search_cluster); if (node->uname == NULL) { char *name = pcmk__corosync_name(0, id); crm_info("Obtaining name for new node %u", id); node = pcmk__get_node(id, name, NULL, pcmk__node_search_cluster); free(name); } /* Update the node state (including updating last_seen to ring_id) */ pcmk__update_peer_state(__func__, node, CRM_NODE_MEMBER, ring_id); } /* Remove any peer cache entries we didn't update */ pcmk__reap_unseen_nodes(ring_id); if (quorum_app_callback) { quorum_app_callback(ring_id, quorate); } } /*! * \internal * \brief Connect to Corosync quorum service * * \param[in] dispatch Connection dispatch callback * \param[in] destroy Connection destroy callback */ void pcmk__corosync_quorum_connect(gboolean (*dispatch)(unsigned long long, gboolean), void (*destroy)(gpointer)) { cs_error_t rc; int fd = 0; int quorate = 0; uint32_t quorum_type = 0; struct mainloop_fd_callbacks quorum_fd_callbacks; uid_t found_uid = 0; gid_t found_gid = 0; pid_t found_pid = 0; int rv; quorum_fd_callbacks.dispatch = quorum_dispatch_cb; quorum_fd_callbacks.destroy = destroy; crm_debug("Configuring Pacemaker to obtain quorum from Corosync"); { #if 0 // New way but not supported by all Corosync 2 versions quorum_model_v0_data_t quorum_model_data = { .model = QUORUM_MODEL_V0, .quorum_notify_fn = quorum_notification_cb, }; rc = quorum_model_initialize(&pcmk_quorum_handle, QUORUM_MODEL_V0, (quorum_model_data_t *) &quorum_model_data, &quorum_type, NULL); #else quorum_callbacks_t quorum_callbacks = { .quorum_notify_fn = quorum_notification_cb, }; rc = quorum_initialize(&pcmk_quorum_handle, &quorum_callbacks, &quorum_type); #endif } if (rc != CS_OK) { crm_err("Could not connect to the Quorum API: %s (%d)", cs_strerror(rc), rc); goto bail; } else if (quorum_type != QUORUM_SET) { crm_err("Corosync quorum is not configured"); goto bail; } rc = quorum_fd_get(pcmk_quorum_handle, &fd); if (rc != CS_OK) { crm_err("Could not obtain the Quorum API connection: %s (%d)", strerror(rc), rc); goto bail; } /* Quorum provider run as root (in given user namespace, anyway)? */ if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid, &found_uid, &found_gid))) { crm_err("Quorum provider is not authentic:" " process %lld (uid: %lld, gid: %lld)", (long long) PCMK__SPECIAL_PID_AS_0(found_pid), (long long) found_uid, (long long) found_gid); rc = CS_ERR_ACCESS; goto bail; } else if (rv < 0) { crm_err("Could not verify authenticity of Quorum provider: %s (%d)", strerror(-rv), -rv); rc = CS_ERR_ACCESS; goto bail; } rc = quorum_getquorate(pcmk_quorum_handle, &quorate); if (rc != CS_OK) { crm_err("Could not obtain the current Quorum API state: %d", rc); goto bail; } if (quorate) { crm_notice("Quorum acquired"); } else { crm_warn("No quorum"); } quorum_app_callback = dispatch; crm_have_quorum = quorate; rc = quorum_trackstart(pcmk_quorum_handle, CS_TRACK_CHANGES | CS_TRACK_CURRENT); if (rc != CS_OK) { crm_err("Could not setup Quorum API notifications: %d", rc); goto bail; } mainloop_add_fd("quorum", G_PRIORITY_HIGH, fd, dispatch, &quorum_fd_callbacks); pcmk__corosync_add_nodes(NULL); bail: if (rc != CS_OK) { quorum_finalize(pcmk_quorum_handle); } } /*! * \internal * \brief Connect to Corosync cluster layer * - * \param[in,out] cluster Initialized cluster object to connect + * \param[in,out] cluster Initialized cluster object to connect + * + * \return Standard Pacemaker return code */ -gboolean +int pcmk__corosync_connect(crm_cluster_t *cluster) { crm_node_t *peer = NULL; enum cluster_type_e stack = get_cluster_type(); + int rc = pcmk_rc_ok; crm_peer_init(); if (stack != pcmk_cluster_corosync) { crm_err("Invalid cluster type: %s " CRM_XS " stack=%d", name_for_cluster_type(stack), stack); - return FALSE; + return EINVAL; } - if (!cluster_connect_cpg(cluster)) { - // Error message was logged by cluster_connect_cpg() - return FALSE; + rc = pcmk__cpg_connect(cluster); + if (rc != pcmk_rc_ok) { + // Error message was logged by pcmk__cpg_connect() + return rc; } crm_info("Connection to %s established", name_for_cluster_type(stack)); cluster->nodeid = get_local_nodeid(0); if (cluster->nodeid == 0) { crm_err("Could not determine local node ID"); - return FALSE; + return ENXIO; } cluster->uname = get_node_name(0); if (cluster->uname == NULL) { crm_err("Could not determine local node name"); - return FALSE; + return ENXIO; } // Ensure local node always exists in peer cache peer = pcmk__get_node(cluster->nodeid, cluster->uname, NULL, pcmk__node_search_cluster); cluster->uuid = pcmk__corosync_uuid(peer); - return TRUE; + return pcmk_rc_ok; } /*! * \internal * \brief Check whether a Corosync cluster is active * * \return pcmk_cluster_corosync if Corosync is found, else pcmk_cluster_unknown */ enum cluster_type_e pcmk__corosync_detect(void) { int rc = CS_OK; cmap_handle_t handle; rc = pcmk__init_cmap(&handle); switch(rc) { case CS_OK: break; case CS_ERR_SECURITY: crm_debug("Failed to initialize the cmap API: Permission denied (%d)", rc); /* It's there, we just can't talk to it. * Good enough for us to identify as 'corosync' */ return pcmk_cluster_corosync; default: crm_info("Failed to initialize the cmap API: %s (%d)", pcmk__cs_err_str(rc), rc); return pcmk_cluster_unknown; } cmap_finalize(handle); return pcmk_cluster_corosync; } /*! * \brief Check whether a Corosync cluster peer is active * * \param[in] node Node to check * * \return TRUE if \p node is an active Corosync peer, otherwise FALSE */ gboolean crm_is_corosync_peer_active(const crm_node_t *node) { if (node == NULL) { crm_trace("Corosync peer inactive: NULL"); return FALSE; } else if (!pcmk__str_eq(node->state, CRM_NODE_MEMBER, pcmk__str_casei)) { crm_trace("Corosync peer %s inactive: state=%s", node->uname, node->state); return FALSE; } else if (!pcmk_is_set(node->processes, crm_proc_cpg)) { crm_trace("Corosync peer %s inactive: processes=%.16x", node->uname, node->processes); return FALSE; } return TRUE; } /*! * \internal * \brief Load Corosync node list (via CMAP) into peer cache and optionally XML * * \param[in,out] xml_parent If not NULL, add entry here for each node * * \return true if any nodes were found, false otherwise */ bool pcmk__corosync_add_nodes(xmlNode *xml_parent) { int lpc = 0; cs_error_t rc = CS_OK; int retries = 0; bool any = false; cmap_handle_t cmap_handle; int fd = -1; uid_t found_uid = 0; gid_t found_gid = 0; pid_t found_pid = 0; int rv; do { rc = pcmk__init_cmap(&cmap_handle); if (rc != CS_OK) { retries++; crm_debug("API connection setup failed: %s. Retrying in %ds", cs_strerror(rc), retries); sleep(retries); } } while (retries < 5 && rc != CS_OK); if (rc != CS_OK) { crm_warn("Could not connect to Cluster Configuration Database API, error %d", rc); return false; } rc = cmap_fd_get(cmap_handle, &fd); if (rc != CS_OK) { crm_err("Could not obtain the CMAP API connection: %s (%d)", cs_strerror(rc), rc); goto bail; } /* CMAP provider run as root (in given user namespace, anyway)? */ if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid, &found_uid, &found_gid))) { crm_err("CMAP provider is not authentic:" " process %lld (uid: %lld, gid: %lld)", (long long) PCMK__SPECIAL_PID_AS_0(found_pid), (long long) found_uid, (long long) found_gid); goto bail; } else if (rv < 0) { crm_err("Could not verify authenticity of CMAP provider: %s (%d)", strerror(-rv), -rv); goto bail; } crm_peer_init(); crm_trace("Initializing Corosync node list"); for (lpc = 0; TRUE; lpc++) { uint32_t nodeid = 0; char *name = NULL; char *key = NULL; key = crm_strdup_printf("nodelist.node.%d.nodeid", lpc); rc = cmap_get_uint32(cmap_handle, key, &nodeid); free(key); if (rc != CS_OK) { break; } name = pcmk__corosync_name(cmap_handle, nodeid); if (name != NULL) { GHashTableIter iter; crm_node_t *node = NULL; g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { if(node && node->uname && strcasecmp(node->uname, name) == 0) { if (node->id && node->id != nodeid) { crm_crit("Nodes %u and %u share the same name '%s': shutting down", node->id, nodeid, name); crm_exit(CRM_EX_FATAL); } } } } if (nodeid > 0 || name != NULL) { crm_trace("Initializing node[%d] %u = %s", lpc, nodeid, name); pcmk__get_node(nodeid, name, NULL, pcmk__node_search_cluster); } if (nodeid > 0 && name != NULL) { any = true; if (xml_parent) { xmlNode *node = pcmk__xe_create(xml_parent, PCMK_XE_NODE); crm_xml_set_id(node, "%u", nodeid); crm_xml_add(node, PCMK_XA_UNAME, name); } } free(name); } bail: cmap_finalize(cmap_handle); return any; } /*! * \internal * \brief Get cluster name from Corosync configuration (via CMAP) * * \return Newly allocated string with cluster name if configured, or NULL */ char * pcmk__corosync_cluster_name(void) { cmap_handle_t handle; char *cluster_name = NULL; cs_error_t rc = CS_OK; int fd = -1; uid_t found_uid = 0; gid_t found_gid = 0; pid_t found_pid = 0; int rv; rc = pcmk__init_cmap(&handle); if (rc != CS_OK) { crm_info("Failed to initialize the cmap API: %s (%d)", cs_strerror(rc), rc); return NULL; } rc = cmap_fd_get(handle, &fd); if (rc != CS_OK) { crm_err("Could not obtain the CMAP API connection: %s (%d)", cs_strerror(rc), rc); goto bail; } /* CMAP provider run as root (in given user namespace, anyway)? */ if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid, &found_uid, &found_gid))) { crm_err("CMAP provider is not authentic:" " process %lld (uid: %lld, gid: %lld)", (long long) PCMK__SPECIAL_PID_AS_0(found_pid), (long long) found_uid, (long long) found_gid); goto bail; } else if (rv < 0) { crm_err("Could not verify authenticity of CMAP provider: %s (%d)", strerror(-rv), -rv); goto bail; } rc = cmap_get_string(handle, "totem.cluster_name", &cluster_name); if (rc != CS_OK) { crm_info("Cannot get totem.cluster_name: %s (%d)", cs_strerror(rc), rc); } else { crm_debug("cmap totem.cluster_name = '%s'", cluster_name); } bail: cmap_finalize(handle); return cluster_name; } /*! * \internal * \brief Check (via CMAP) whether Corosync configuration has a node list * * \return true if Corosync has node list, otherwise false */ bool pcmk__corosync_has_nodelist(void) { cs_error_t cs_rc = CS_OK; int retries = 0; cmap_handle_t cmap_handle; cmap_iter_handle_t iter_handle; char key_name[CMAP_KEYNAME_MAXLEN + 1]; int fd = -1; uid_t found_uid = 0; gid_t found_gid = 0; pid_t found_pid = 0; int rc = pcmk_ok; static bool got_result = false; static bool result = false; if (got_result) { return result; } // Connect to CMAP do { cs_rc = pcmk__init_cmap(&cmap_handle); if (cs_rc != CS_OK) { retries++; crm_debug("CMAP connection failed: %s (rc=%d, retrying in %ds)", cs_strerror(cs_rc), cs_rc, retries); sleep(retries); } } while ((retries < 5) && (cs_rc != CS_OK)); if (cs_rc != CS_OK) { crm_warn("Assuming Corosync does not have node list: " "CMAP connection failed (%s) " CRM_XS " rc=%d", cs_strerror(cs_rc), cs_rc); return false; } // Get CMAP connection file descriptor cs_rc = cmap_fd_get(cmap_handle, &fd); if (cs_rc != CS_OK) { crm_warn("Assuming Corosync does not have node list: " "CMAP unusable (%s) " CRM_XS " rc=%d", cs_strerror(cs_rc), cs_rc); goto bail; } // Check whether CMAP connection is authentic (i.e. provided by root) rc = crm_ipc_is_authentic_process(fd, (uid_t) 0, (gid_t) 0, &found_pid, &found_uid, &found_gid); if (rc == 0) { crm_warn("Assuming Corosync does not have node list: " "CMAP provider is inauthentic " CRM_XS " pid=%lld uid=%lld gid=%lld", (long long) PCMK__SPECIAL_PID_AS_0(found_pid), (long long) found_uid, (long long) found_gid); goto bail; } else if (rc < 0) { crm_warn("Assuming Corosync does not have node list: " "Could not verify CMAP authenticity (%s) " CRM_XS " rc=%d", pcmk_strerror(rc), rc); goto bail; } // Check whether nodelist section is presetn cs_rc = cmap_iter_init(cmap_handle, "nodelist", &iter_handle); if (cs_rc != CS_OK) { crm_warn("Assuming Corosync does not have node list: " "CMAP not readable (%s) " CRM_XS " rc=%d", cs_strerror(cs_rc), cs_rc); goto bail; } cs_rc = cmap_iter_next(cmap_handle, iter_handle, key_name, NULL, NULL); if (cs_rc == CS_OK) { result = true; } cmap_iter_finalize(cmap_handle, iter_handle); got_result = true; crm_debug("Corosync %s node list", (result? "has" : "does not have")); bail: cmap_finalize(cmap_handle); return result; } diff --git a/lib/cluster/cpg.c b/lib/cluster/cpg.c index 9c13518a83..f09700b8ba 100644 --- a/lib/cluster/cpg.c +++ b/lib/cluster/cpg.c @@ -1,1094 +1,1123 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* PCMK__SPECIAL_PID* */ #include "crmcluster_private.h" /* @TODO Once we can update the public API to require crm_cluster_t* in more * functions, we can ditch this in favor of cluster->cpg_handle. */ static cpg_handle_t pcmk_cpg_handle = 0; // @TODO These could be moved to crm_cluster_t* at that time as well static bool cpg_evicted = false; static GList *cs_message_queue = NULL; static int cs_message_timer = 0; struct pcmk__cpg_host_s { uint32_t id; uint32_t pid; gboolean local; enum crm_ais_msg_types type; uint32_t size; char uname[MAX_NAME]; } __attribute__ ((packed)); typedef struct pcmk__cpg_host_s pcmk__cpg_host_t; struct pcmk__cpg_msg_s { struct qb_ipc_response_header header __attribute__ ((aligned(8))); uint32_t id; gboolean is_compressed; pcmk__cpg_host_t host; pcmk__cpg_host_t sender; uint32_t size; uint32_t compressed_size; /* 584 bytes */ char data[0]; } __attribute__ ((packed)); typedef struct pcmk__cpg_msg_s pcmk__cpg_msg_t; static void crm_cs_flush(gpointer data); #define msg_data_len(msg) (msg->is_compressed?msg->compressed_size:msg->size) #define cs_repeat(rc, counter, max, code) do { \ rc = code; \ if ((rc == CS_ERR_TRY_AGAIN) || (rc == CS_ERR_QUEUE_FULL)) { \ counter++; \ crm_debug("Retrying operation after %ds", counter); \ sleep(counter); \ } else { \ break; \ } \ } while (counter < max) -/*! - * \brief Disconnect from Corosync CPG - * - * \param[in,out] cluster Cluster to disconnect - */ -void -cluster_disconnect_cpg(crm_cluster_t *cluster) -{ - pcmk_cpg_handle = 0; - if (cluster->cpg_handle) { - crm_trace("Disconnecting CPG"); - cpg_leave(cluster->cpg_handle, &cluster->group); - cpg_finalize(cluster->cpg_handle); - cluster->cpg_handle = 0; - - } else { - crm_info("No CPG connection"); - } -} - /*! * \brief Get the local Corosync node ID (via CPG) * * \param[in] handle CPG connection to use (or 0 to use new connection) * * \return Corosync ID of local node (or 0 if not known) */ uint32_t get_local_nodeid(cpg_handle_t handle) { cs_error_t rc = CS_OK; int retries = 0; static uint32_t local_nodeid = 0; cpg_handle_t local_handle = handle; cpg_model_v1_data_t cpg_model_info = {CPG_MODEL_V1, NULL, NULL, NULL, 0}; int fd = -1; uid_t found_uid = 0; gid_t found_gid = 0; pid_t found_pid = 0; int rv; if(local_nodeid != 0) { return local_nodeid; } if(handle == 0) { crm_trace("Creating connection"); cs_repeat(rc, retries, 5, cpg_model_initialize(&local_handle, CPG_MODEL_V1, (cpg_model_data_t *)&cpg_model_info, NULL)); if (rc != CS_OK) { crm_err("Could not connect to the CPG API: %s (%d)", cs_strerror(rc), rc); return 0; } rc = cpg_fd_get(local_handle, &fd); if (rc != CS_OK) { crm_err("Could not obtain the CPG API connection: %s (%d)", cs_strerror(rc), rc); goto bail; } /* CPG provider run as root (in given user namespace, anyway)? */ if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid, &found_uid, &found_gid))) { crm_err("CPG provider is not authentic:" " process %lld (uid: %lld, gid: %lld)", (long long) PCMK__SPECIAL_PID_AS_0(found_pid), (long long) found_uid, (long long) found_gid); goto bail; } else if (rv < 0) { crm_err("Could not verify authenticity of CPG provider: %s (%d)", strerror(-rv), -rv); goto bail; } } if (rc == CS_OK) { retries = 0; crm_trace("Performing lookup"); cs_repeat(rc, retries, 5, cpg_local_get(local_handle, &local_nodeid)); } if (rc != CS_OK) { crm_err("Could not get local node id from the CPG API: %s (%d)", pcmk__cs_err_str(rc), rc); } bail: if(handle == 0) { crm_trace("Closing connection"); cpg_finalize(local_handle); } crm_debug("Local nodeid is %u", local_nodeid); return local_nodeid; } /*! * \internal * \brief Callback function for Corosync message queue timer * * \param[in] data CPG handle * * \return FALSE (to indicate to glib that timer should not be removed) */ static gboolean crm_cs_flush_cb(gpointer data) { cs_message_timer = 0; crm_cs_flush(data); return FALSE; } // Send no more than this many CPG messages in one flush #define CS_SEND_MAX 200 /*! * \internal * \brief Send messages in Corosync CPG message queue * * \param[in] data CPG handle */ static void crm_cs_flush(gpointer data) { unsigned int sent = 0; guint queue_len = 0; cs_error_t rc = 0; cpg_handle_t *handle = (cpg_handle_t *) data; if (*handle == 0) { crm_trace("Connection is dead"); return; } queue_len = g_list_length(cs_message_queue); if (((queue_len % 1000) == 0) && (queue_len > 1)) { crm_err("CPG queue has grown to %d", queue_len); } else if (queue_len == CS_SEND_MAX) { crm_warn("CPG queue has grown to %d", queue_len); } if (cs_message_timer != 0) { /* There is already a timer, wait until it goes off */ crm_trace("Timer active %d", cs_message_timer); return; } while ((cs_message_queue != NULL) && (sent < CS_SEND_MAX)) { struct iovec *iov = cs_message_queue->data; rc = cpg_mcast_joined(*handle, CPG_TYPE_AGREED, iov, 1); if (rc != CS_OK) { break; } sent++; crm_trace("CPG message sent, size=%llu", (unsigned long long) iov->iov_len); cs_message_queue = g_list_remove(cs_message_queue, iov); free(iov->iov_base); free(iov); } queue_len -= sent; do_crm_log((queue_len > 5)? LOG_INFO : LOG_TRACE, "Sent %u CPG message%s (%d still queued): %s (rc=%d)", sent, pcmk__plural_s(sent), queue_len, pcmk__cs_err_str(rc), (int) rc); if (cs_message_queue) { uint32_t delay_ms = 100; if (rc != CS_OK) { /* Proportionally more if sending failed but cap at 1s */ delay_ms = QB_MIN(1000, CS_SEND_MAX + (10 * queue_len)); } cs_message_timer = g_timeout_add(delay_ms, crm_cs_flush_cb, data); } } /*! * \internal * \brief Dispatch function for CPG handle * * \param[in,out] user_data Cluster object * * \return 0 on success, -1 on error (per mainloop_io_t interface) */ static int pcmk_cpg_dispatch(gpointer user_data) { cs_error_t rc = CS_OK; crm_cluster_t *cluster = (crm_cluster_t *) user_data; rc = cpg_dispatch(cluster->cpg_handle, CS_DISPATCH_ONE); if (rc != CS_OK) { crm_err("Connection to the CPG API failed: %s (%d)", pcmk__cs_err_str(rc), rc); cpg_finalize(cluster->cpg_handle); cluster->cpg_handle = 0; return -1; } else if (cpg_evicted) { crm_err("Evicted from CPG membership"); return -1; } return 0; } static inline const char * ais_dest(const pcmk__cpg_host_t *host) { if (host->local) { return "local"; } else if (host->size > 0) { return host->uname; } else { return ""; } } static inline const char * msg_type2text(enum crm_ais_msg_types type) { const char *text = "unknown"; switch (type) { case crm_msg_none: text = "unknown"; break; case crm_msg_ais: text = "ais"; break; case crm_msg_cib: text = "cib"; break; case crm_msg_crmd: text = "crmd"; break; case crm_msg_pe: text = "pengine"; break; case crm_msg_te: text = "tengine"; break; case crm_msg_lrmd: text = "lrmd"; break; case crm_msg_attrd: text = "attrd"; break; case crm_msg_stonithd: text = "stonithd"; break; case crm_msg_stonith_ng: text = "stonith-ng"; break; } return text; } /*! * \internal * \brief Check whether a Corosync CPG message is valid * * \param[in] msg Corosync CPG message to check * * \return true if \p msg is valid, otherwise false */ static bool check_message_sanity(const pcmk__cpg_msg_t *msg) { int32_t payload_size = msg->header.size - sizeof(pcmk__cpg_msg_t); if (payload_size < 1) { crm_err("%sCPG message %d from %s invalid: " "Claimed size of %d bytes is too small " CRM_XS " from %s[%u] to %s@%s", (msg->is_compressed? "Compressed " : ""), msg->id, ais_dest(&(msg->sender)), (int) msg->header.size, msg_type2text(msg->sender.type), msg->sender.pid, msg_type2text(msg->host.type), ais_dest(&(msg->host))); return false; } if (msg->header.error != CS_OK) { crm_err("%sCPG message %d from %s invalid: " "Sender indicated error %d " CRM_XS " from %s[%u] to %s@%s", (msg->is_compressed? "Compressed " : ""), msg->id, ais_dest(&(msg->sender)), msg->header.error, msg_type2text(msg->sender.type), msg->sender.pid, msg_type2text(msg->host.type), ais_dest(&(msg->host))); return false; } if (msg_data_len(msg) != payload_size) { crm_err("%sCPG message %d from %s invalid: " "Total size %d inconsistent with payload size %d " CRM_XS " from %s[%u] to %s@%s", (msg->is_compressed? "Compressed " : ""), msg->id, ais_dest(&(msg->sender)), (int) msg->header.size, (int) msg_data_len(msg), msg_type2text(msg->sender.type), msg->sender.pid, msg_type2text(msg->host.type), ais_dest(&(msg->host))); return false; } if (!msg->is_compressed && /* msg->size != (strlen(msg->data) + 1) would be a stronger check, * but checking the last byte or two should be quick */ (((msg->size > 1) && (msg->data[msg->size - 2] == '\0')) || (msg->data[msg->size - 1] != '\0'))) { crm_err("CPG message %d from %s invalid: " "Payload does not end at byte %llu " CRM_XS " from %s[%u] to %s@%s", msg->id, ais_dest(&(msg->sender)), (unsigned long long) msg->size, msg_type2text(msg->sender.type), msg->sender.pid, msg_type2text(msg->host.type), ais_dest(&(msg->host))); return false; } crm_trace("Verified %d-byte %sCPG message %d from %s[%u]@%s to %s@%s", (int) msg->header.size, (msg->is_compressed? "compressed " : ""), msg->id, msg_type2text(msg->sender.type), msg->sender.pid, ais_dest(&(msg->sender)), msg_type2text(msg->host.type), ais_dest(&(msg->host))); return true; } /*! * \brief Extract text data from a Corosync CPG message * * \param[in] handle CPG connection (to get local node ID if not known) * \param[in] nodeid Corosync ID of node that sent message * \param[in] pid Process ID of message sender (for logging only) * \param[in,out] content CPG message * \param[out] kind If not NULL, will be set to CPG header ID * (which should be an enum crm_ais_msg_class value, * currently always crm_class_cluster) * \param[out] from If not NULL, will be set to sender uname * (valid for the lifetime of \p content) * * \return Newly allocated string with message data * \note It is the caller's responsibility to free the return value with free(). */ char * pcmk_message_common_cs(cpg_handle_t handle, uint32_t nodeid, uint32_t pid, void *content, uint32_t *kind, const char **from) { char *data = NULL; pcmk__cpg_msg_t *msg = (pcmk__cpg_msg_t *) content; if(handle) { // Do filtering and field massaging uint32_t local_nodeid = get_local_nodeid(handle); const char *local_name = get_local_node_name(); if (msg->sender.id > 0 && msg->sender.id != nodeid) { crm_err("Nodeid mismatch from %d.%d: claimed nodeid=%u", nodeid, pid, msg->sender.id); return NULL; } else if (msg->host.id != 0 && (local_nodeid != msg->host.id)) { /* Not for us */ crm_trace("Not for us: %u != %u", msg->host.id, local_nodeid); return NULL; } else if (msg->host.size != 0 && !pcmk__str_eq(msg->host.uname, local_name, pcmk__str_casei)) { /* Not for us */ crm_trace("Not for us: %s != %s", msg->host.uname, local_name); return NULL; } msg->sender.id = nodeid; if (msg->sender.size == 0) { crm_node_t *peer = pcmk__get_node(nodeid, NULL, NULL, pcmk__node_search_cluster); if (peer == NULL) { crm_err("Peer with nodeid=%u is unknown", nodeid); } else if (peer->uname == NULL) { crm_err("No uname for peer with nodeid=%u", nodeid); } else { crm_notice("Fixing uname for peer with nodeid=%u", nodeid); msg->sender.size = strlen(peer->uname); memset(msg->sender.uname, 0, MAX_NAME); memcpy(msg->sender.uname, peer->uname, msg->sender.size); } } } crm_trace("Got new%s message (size=%d, %d, %d)", msg->is_compressed ? " compressed" : "", msg_data_len(msg), msg->size, msg->compressed_size); if (kind != NULL) { *kind = msg->header.id; } if (from != NULL) { *from = msg->sender.uname; } if (msg->is_compressed && msg->size > 0) { int rc = BZ_OK; char *uncompressed = NULL; unsigned int new_size = msg->size + 1; if (!check_message_sanity(msg)) { goto badmsg; } crm_trace("Decompressing message data"); uncompressed = pcmk__assert_alloc(1, new_size); rc = BZ2_bzBuffToBuffDecompress(uncompressed, &new_size, msg->data, msg->compressed_size, 1, 0); rc = pcmk__bzlib2rc(rc); if (rc != pcmk_rc_ok) { crm_err("Decompression failed: %s " CRM_XS " rc=%d", pcmk_rc_str(rc), rc); free(uncompressed); goto badmsg; } CRM_ASSERT(new_size == msg->size); data = uncompressed; } else if (!check_message_sanity(msg)) { goto badmsg; } else { data = strdup(msg->data); } // Is this necessary? pcmk__get_node(msg->sender.id, msg->sender.uname, NULL, pcmk__node_search_cluster); crm_trace("Payload: %.200s", data); return data; badmsg: crm_err("Invalid message (id=%d, dest=%s:%s, from=%s:%s.%d):" " min=%d, total=%d, size=%d, bz2_size=%d", msg->id, ais_dest(&(msg->host)), msg_type2text(msg->host.type), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, (int)sizeof(pcmk__cpg_msg_t), msg->header.size, msg->size, msg->compressed_size); free(data); return NULL; } /*! * \internal * \brief Compare cpg_address objects by node ID * * \param[in] first First cpg_address structure to compare * \param[in] second Second cpg_address structure to compare * * \return Negative number if first's node ID is lower, * positive number if first's node ID is greater, * or 0 if both node IDs are equal */ static int cmp_member_list_nodeid(const void *first, const void *second) { const struct cpg_address *const a = *((const struct cpg_address **) first), *const b = *((const struct cpg_address **) second); if (a->nodeid < b->nodeid) { return -1; } else if (a->nodeid > b->nodeid) { return 1; } /* don't bother with "reason" nor "pid" */ return 0; } /*! * \internal * \brief Get a readable string equivalent of a cpg_reason_t value * * \param[in] reason CPG reason value * * \return Readable string suitable for logging */ static const char * cpgreason2str(cpg_reason_t reason) { switch (reason) { case CPG_REASON_JOIN: return " via cpg_join"; case CPG_REASON_LEAVE: return " via cpg_leave"; case CPG_REASON_NODEDOWN: return " via cluster exit"; case CPG_REASON_NODEUP: return " via cluster join"; case CPG_REASON_PROCDOWN: return " for unknown reason"; default: break; } return ""; } /*! * \internal * \brief Get a log-friendly node name * * \param[in] peer Node to check * * \return Node's uname, or readable string if not known */ static inline const char * peer_name(const crm_node_t *peer) { if (peer == NULL) { return "unknown node"; } else if (peer->uname == NULL) { return "peer node"; } else { return peer->uname; } } /*! * \internal * \brief Process a CPG peer's leaving the cluster * * \param[in] cpg_group_name CPG group name (for logging) * \param[in] event_counter Event number (for logging) * \param[in] local_nodeid Node ID of local node * \param[in] cpg_peer CPG peer that left * \param[in] sorted_member_list List of remaining members, qsort()-ed by ID * \param[in] member_list_entries Number of entries in \p sorted_member_list */ static void node_left(const char *cpg_group_name, int event_counter, uint32_t local_nodeid, const struct cpg_address *cpg_peer, const struct cpg_address **sorted_member_list, size_t member_list_entries) { crm_node_t *peer = pcmk__search_node_caches(cpg_peer->nodeid, NULL, pcmk__node_search_cluster); const struct cpg_address **rival = NULL; /* Most CPG-related Pacemaker code assumes that only one process on a node * can be in the process group, but Corosync does not impose this * limitation, and more than one can be a member in practice due to a * daemon attempting to start while another instance is already running. * * Check for any such duplicate instances, because we don't want to process * their leaving as if our actual peer left. If the peer that left still has * an entry in sorted_member_list (with a different PID), we will ignore the * leaving. * * @TODO Track CPG members' PIDs so we can tell exactly who left. */ if (peer != NULL) { rival = bsearch(&cpg_peer, sorted_member_list, member_list_entries, sizeof(const struct cpg_address *), cmp_member_list_nodeid); } if (rival == NULL) { crm_info("Group %s event %d: %s (node %u pid %u) left%s", cpg_group_name, event_counter, peer_name(peer), cpg_peer->nodeid, cpg_peer->pid, cpgreason2str(cpg_peer->reason)); if (peer != NULL) { crm_update_peer_proc(__func__, peer, crm_proc_cpg, PCMK_VALUE_OFFLINE); } } else if (cpg_peer->nodeid == local_nodeid) { crm_warn("Group %s event %d: duplicate local pid %u left%s", cpg_group_name, event_counter, cpg_peer->pid, cpgreason2str(cpg_peer->reason)); } else { crm_warn("Group %s event %d: " "%s (node %u) duplicate pid %u left%s (%u remains)", cpg_group_name, event_counter, peer_name(peer), cpg_peer->nodeid, cpg_peer->pid, cpgreason2str(cpg_peer->reason), (*rival)->pid); } } /*! * \brief Handle a CPG configuration change event * * \param[in] handle CPG connection * \param[in] cpg_name CPG group name * \param[in] member_list List of current CPG members * \param[in] member_list_entries Number of entries in \p member_list * \param[in] left_list List of CPG members that left * \param[in] left_list_entries Number of entries in \p left_list * \param[in] joined_list List of CPG members that joined * \param[in] joined_list_entries Number of entries in \p joined_list */ void pcmk_cpg_membership(cpg_handle_t handle, const struct cpg_name *groupName, const struct cpg_address *member_list, size_t member_list_entries, const struct cpg_address *left_list, size_t left_list_entries, const struct cpg_address *joined_list, size_t joined_list_entries) { int i; gboolean found = FALSE; static int counter = 0; uint32_t local_nodeid = get_local_nodeid(handle); const struct cpg_address **sorted; sorted = pcmk__assert_alloc(member_list_entries, sizeof(const struct cpg_address *)); for (size_t iter = 0; iter < member_list_entries; iter++) { sorted[iter] = member_list + iter; } /* so that the cross-matching multiply-subscribed nodes is then cheap */ qsort(sorted, member_list_entries, sizeof(const struct cpg_address *), cmp_member_list_nodeid); for (i = 0; i < left_list_entries; i++) { node_left(groupName->value, counter, local_nodeid, &left_list[i], sorted, member_list_entries); } free(sorted); sorted = NULL; for (i = 0; i < joined_list_entries; i++) { crm_info("Group %s event %d: node %u pid %u joined%s", groupName->value, counter, joined_list[i].nodeid, joined_list[i].pid, cpgreason2str(joined_list[i].reason)); } for (i = 0; i < member_list_entries; i++) { crm_node_t *peer = pcmk__get_node(member_list[i].nodeid, NULL, NULL, pcmk__node_search_cluster); if (member_list[i].nodeid == local_nodeid && member_list[i].pid != getpid()) { // See the note in node_left() crm_warn("Group %s event %d: detected duplicate local pid %u", groupName->value, counter, member_list[i].pid); continue; } crm_info("Group %s event %d: %s (node %u pid %u) is member", groupName->value, counter, peer_name(peer), member_list[i].nodeid, member_list[i].pid); /* If the caller left auto-reaping enabled, this will also update the * state to member. */ peer = crm_update_peer_proc(__func__, peer, crm_proc_cpg, PCMK_VALUE_ONLINE); if (peer && peer->state && strcmp(peer->state, CRM_NODE_MEMBER)) { /* The node is a CPG member, but we currently think it's not a * cluster member. This is possible only if auto-reaping was * disabled. The node may be joining, and we happened to get the CPG * notification before the quorum notification; or the node may have * just died, and we are processing its final messages; or a bug * has affected the peer cache. */ time_t now = time(NULL); if (peer->when_lost == 0) { // Track when we first got into this contradictory state peer->when_lost = now; } else if (now > (peer->when_lost + 60)) { // If it persists for more than a minute, update the state crm_warn("Node %u is member of group %s but was believed offline", member_list[i].nodeid, groupName->value); pcmk__update_peer_state(__func__, peer, CRM_NODE_MEMBER, 0); } } if (local_nodeid == member_list[i].nodeid) { found = TRUE; } } if (!found) { crm_err("Local node was evicted from group %s", groupName->value); cpg_evicted = true; } counter++; } /*! * \brief Connect to Corosync CPG * - * \param[in,out] cluster Cluster object + * \param[in,out] cluster Initialized cluster object to connect * - * \return TRUE on success, otherwise FALSE + * \return Standard Pacemaker return code */ -gboolean -cluster_connect_cpg(crm_cluster_t *cluster) +int +pcmk__cpg_connect(crm_cluster_t *cluster) { cs_error_t rc; int fd = -1; int retries = 0; uint32_t id = 0; crm_node_t *peer = NULL; cpg_handle_t handle = 0; const char *message_name = pcmk__message_name(crm_system_name); uid_t found_uid = 0; gid_t found_gid = 0; pid_t found_pid = 0; int rv; struct mainloop_fd_callbacks cpg_fd_callbacks = { .dispatch = pcmk_cpg_dispatch, .destroy = cluster->destroy, }; cpg_model_v1_data_t cpg_model_info = { .model = CPG_MODEL_V1, .cpg_deliver_fn = cluster->cpg.cpg_deliver_fn, .cpg_confchg_fn = cluster->cpg.cpg_confchg_fn, .cpg_totem_confchg_fn = NULL, .flags = 0, }; cpg_evicted = false; cluster->group.length = 0; cluster->group.value[0] = 0; /* group.value is char[128] */ strncpy(cluster->group.value, message_name, 127); cluster->group.value[127] = 0; cluster->group.length = 1 + QB_MIN(127, strlen(cluster->group.value)); cs_repeat(rc, retries, 30, cpg_model_initialize(&handle, CPG_MODEL_V1, (cpg_model_data_t *)&cpg_model_info, NULL)); if (rc != CS_OK) { crm_err("Could not connect to the CPG API: %s (%d)", cs_strerror(rc), rc); goto bail; } rc = cpg_fd_get(handle, &fd); if (rc != CS_OK) { crm_err("Could not obtain the CPG API connection: %s (%d)", cs_strerror(rc), rc); goto bail; } /* CPG provider run as root (in given user namespace, anyway)? */ if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid, &found_uid, &found_gid))) { crm_err("CPG provider is not authentic:" " process %lld (uid: %lld, gid: %lld)", (long long) PCMK__SPECIAL_PID_AS_0(found_pid), (long long) found_uid, (long long) found_gid); rc = CS_ERR_ACCESS; goto bail; } else if (rv < 0) { crm_err("Could not verify authenticity of CPG provider: %s (%d)", strerror(-rv), -rv); rc = CS_ERR_ACCESS; goto bail; } id = get_local_nodeid(handle); if (id == 0) { crm_err("Could not get local node id from the CPG API"); goto bail; } cluster->nodeid = id; retries = 0; cs_repeat(rc, retries, 30, cpg_join(handle, &cluster->group)); if (rc != CS_OK) { crm_err("Could not join the CPG group '%s': %d", message_name, rc); goto bail; } pcmk_cpg_handle = handle; cluster->cpg_handle = handle; mainloop_add_fd("corosync-cpg", G_PRIORITY_MEDIUM, fd, cluster, &cpg_fd_callbacks); bail: if (rc != CS_OK) { cpg_finalize(handle); - return FALSE; + // @TODO Map rc to more specific Pacemaker return code + return ENOTCONN; } peer = pcmk__get_node(id, NULL, NULL, pcmk__node_search_cluster); crm_update_peer_proc(__func__, peer, crm_proc_cpg, PCMK_VALUE_ONLINE); - return TRUE; + return pcmk_rc_ok; +} + +/*! + * \internal + * \brief Disconnect from Corosync CPG + * + * \param[in,out] cluster Cluster object to disconnect + */ +void +pcmk__cpg_disconnect(crm_cluster_t *cluster) +{ + pcmk_cpg_handle = 0; + if (cluster->cpg_handle != 0) { + crm_trace("Disconnecting CPG"); + cpg_leave(cluster->cpg_handle, &cluster->group); + cpg_finalize(cluster->cpg_handle); + cluster->cpg_handle = 0; + + } else { + crm_info("No CPG connection"); + } } /*! * \internal * \brief Send an XML message via Corosync CPG * * \param[in] msg XML message to send * \param[in] node Cluster node to send message to * \param[in] dest Type of message to send * * \return TRUE on success, otherwise FALSE */ bool pcmk__cpg_send_xml(const xmlNode *msg, const crm_node_t *node, enum crm_ais_msg_types dest) { bool rc = true; GString *data = g_string_sized_new(1024); pcmk__xml_string(msg, 0, data, 0); rc = send_cluster_text(crm_class_cluster, data->str, FALSE, node, dest); g_string_free(data, TRUE); return rc; } /*! * \internal * \brief Send string data via Corosync CPG * * \param[in] msg_class Message class (to set as CPG header ID) * \param[in] data Data to send * \param[in] local What to set as host "local" value (which is never used) * \param[in] node Cluster node to send message to * \param[in] dest Type of message to send * * \return TRUE on success, otherwise FALSE */ gboolean send_cluster_text(enum crm_ais_msg_class msg_class, const char *data, gboolean local, const crm_node_t *node, enum crm_ais_msg_types dest) { static int msg_id = 0; static int local_pid = 0; static int local_name_len = 0; static const char *local_name = NULL; char *target = NULL; struct iovec *iov; pcmk__cpg_msg_t *msg = NULL; enum crm_ais_msg_types sender = text2msg_type(crm_system_name); switch (msg_class) { case crm_class_cluster: break; default: crm_err("Invalid message class: %d", msg_class); return FALSE; } CRM_CHECK(dest != crm_msg_ais, return FALSE); if (local_name == NULL) { local_name = get_local_node_name(); } if ((local_name_len == 0) && (local_name != NULL)) { local_name_len = strlen(local_name); } if (data == NULL) { data = ""; } if (local_pid == 0) { local_pid = getpid(); } if (sender == crm_msg_none) { sender = local_pid; } msg = pcmk__assert_alloc(1, sizeof(pcmk__cpg_msg_t)); msg_id++; msg->id = msg_id; msg->header.id = msg_class; msg->header.error = CS_OK; msg->host.type = dest; msg->host.local = local; if (node) { if (node->uname) { target = pcmk__str_copy(node->uname); msg->host.size = strlen(node->uname); memset(msg->host.uname, 0, MAX_NAME); memcpy(msg->host.uname, node->uname, msg->host.size); } else { target = crm_strdup_printf("%u", node->id); } msg->host.id = node->id; } else { target = pcmk__str_copy("all"); } msg->sender.id = 0; msg->sender.type = sender; msg->sender.pid = local_pid; msg->sender.size = local_name_len; memset(msg->sender.uname, 0, MAX_NAME); if ((local_name != NULL) && (msg->sender.size != 0)) { memcpy(msg->sender.uname, local_name, msg->sender.size); } msg->size = 1 + strlen(data); msg->header.size = sizeof(pcmk__cpg_msg_t) + msg->size; if (msg->size < CRM_BZ2_THRESHOLD) { msg = pcmk__realloc(msg, msg->header.size); memcpy(msg->data, data, msg->size); } else { char *compressed = NULL; unsigned int new_size = 0; if (pcmk__compress(data, (unsigned int) msg->size, 0, &compressed, &new_size) == pcmk_rc_ok) { msg->header.size = sizeof(pcmk__cpg_msg_t) + new_size; msg = pcmk__realloc(msg, msg->header.size); memcpy(msg->data, compressed, new_size); msg->is_compressed = TRUE; msg->compressed_size = new_size; } else { // cppcheck seems not to understand the abort logic in pcmk__realloc // cppcheck-suppress memleak msg = pcmk__realloc(msg, msg->header.size); memcpy(msg->data, data, msg->size); } free(compressed); } iov = pcmk__assert_alloc(1, sizeof(struct iovec)); iov->iov_base = msg; iov->iov_len = msg->header.size; if (msg->compressed_size) { crm_trace("Queueing CPG message %u to %s (%llu bytes, %d bytes compressed payload): %.200s", msg->id, target, (unsigned long long) iov->iov_len, msg->compressed_size, data); } else { crm_trace("Queueing CPG message %u to %s (%llu bytes, %d bytes payload): %.200s", msg->id, target, (unsigned long long) iov->iov_len, msg->size, data); } free(target); cs_message_queue = g_list_append(cs_message_queue, iov); crm_cs_flush(&pcmk_cpg_handle); return TRUE; } /*! * \brief Get the message type equivalent of a string * * \param[in] text String of message type * * \return Message type equivalent of \p text */ enum crm_ais_msg_types text2msg_type(const char *text) { int type = crm_msg_none; CRM_CHECK(text != NULL, return type); text = pcmk__message_name(text); if (pcmk__str_eq(text, "ais", pcmk__str_casei)) { type = crm_msg_ais; } else if (pcmk__str_eq(text, CRM_SYSTEM_CIB, pcmk__str_casei)) { type = crm_msg_cib; } else if (pcmk__strcase_any_of(text, CRM_SYSTEM_CRMD, CRM_SYSTEM_DC, NULL)) { type = crm_msg_crmd; } else if (pcmk__str_eq(text, CRM_SYSTEM_TENGINE, pcmk__str_casei)) { type = crm_msg_te; } else if (pcmk__str_eq(text, CRM_SYSTEM_PENGINE, pcmk__str_casei)) { type = crm_msg_pe; } else if (pcmk__str_eq(text, CRM_SYSTEM_LRMD, pcmk__str_casei)) { type = crm_msg_lrmd; } else if (pcmk__str_eq(text, CRM_SYSTEM_STONITHD, pcmk__str_casei)) { type = crm_msg_stonithd; } else if (pcmk__str_eq(text, "stonith-ng", pcmk__str_casei)) { type = crm_msg_stonith_ng; } else if (pcmk__str_eq(text, "attrd", pcmk__str_casei)) { type = crm_msg_attrd; } else { /* This will normally be a transient client rather than * a cluster daemon. Set the type to the pid of the client */ int scan_rc = sscanf(text, "%d", &type); if (scan_rc != 1 || type <= crm_msg_stonith_ng) { /* Ensure it's sane */ type = crm_msg_none; } } return type; } + +// Deprecated functions kept only for backward API compatibility +// LCOV_EXCL_START + +#include + +/*! + * \brief Connect to Corosync CPG + * + * \param[in,out] cluster Cluster object + * + * \return TRUE on success, otherwise FALSE + */ +gboolean +cluster_connect_cpg(crm_cluster_t *cluster) +{ + return pcmk__cpg_connect(cluster) == pcmk_rc_ok; +} + +void +cluster_disconnect_cpg(crm_cluster_t *cluster) +{ + pcmk__cpg_disconnect(cluster); +} + +// LCOV_EXCL_STOP +// End deprecated API diff --git a/lib/cluster/crmcluster_private.h b/lib/cluster/crmcluster_private.h index 370bca5f72..e79320013c 100644 --- a/lib/cluster/crmcluster_private.h +++ b/lib/cluster/crmcluster_private.h @@ -1,47 +1,53 @@ /* - * Copyright 2020-2023 the Pacemaker project contributors + * Copyright 2020-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PCMK__CRMCLUSTER_PRIVATE__H # define PCMK__CRMCLUSTER_PRIVATE__H /* This header is for the sole use of libcrmcluster, so that functions can be * declared with G_GNUC_INTERNAL for efficiency. */ #include // uint32_t, uint64_t #include // G_GNUC_INTERNAL, gboolean #include // xmlNode #include // cluster_type_e, crm_node_t G_GNUC_INTERNAL enum cluster_type_e pcmk__corosync_detect(void); G_GNUC_INTERNAL bool pcmk__corosync_has_nodelist(void); G_GNUC_INTERNAL char *pcmk__corosync_uuid(const crm_node_t *peer); G_GNUC_INTERNAL char *pcmk__corosync_name(uint64_t /*cmap_handle_t */ cmap_handle, uint32_t nodeid); G_GNUC_INTERNAL -gboolean pcmk__corosync_connect(crm_cluster_t *cluster); +int pcmk__corosync_connect(crm_cluster_t *cluster); G_GNUC_INTERNAL void pcmk__corosync_disconnect(crm_cluster_t *cluster); +G_GNUC_INTERNAL +int pcmk__cpg_connect(crm_cluster_t *cluster); + +G_GNUC_INTERNAL +void pcmk__cpg_disconnect(crm_cluster_t *cluster); + G_GNUC_INTERNAL bool pcmk__cpg_send_xml(const xmlNode *msg, const crm_node_t *node, enum crm_ais_msg_types dest); #endif // PCMK__CRMCLUSTER_PRIVATE__H diff --git a/lib/cluster/election.c b/lib/cluster/election.c index 3f5ba322e6..02bf024e2c 100644 --- a/lib/cluster/election.c +++ b/lib/cluster/election.c @@ -1,723 +1,723 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #define STORM_INTERVAL 2 /* in seconds */ struct election_s { enum election_result state; guint count; // How many times local node has voted char *name; // Descriptive name for this election char *uname; // Local node's name GSourceFunc cb; // Function to call if election is won GHashTable *voted; // Key = node name, value = how node voted mainloop_timer_t *timeout; // When to abort if all votes not received int election_wins; // Track wins, for storm detection bool wrote_blackbox; // Write a storm blackbox at most once time_t expires; // When storm detection period ends time_t last_election_loss; // When dampening period ends }; static void election_complete(election_t *e) { e->state = election_won; if (e->cb != NULL) { e->cb(e); } election_reset(e); } static gboolean election_timer_cb(gpointer user_data) { election_t *e = user_data; crm_info("%s timed out, declaring local node as winner", e->name); election_complete(e); return FALSE; } /*! * \brief Get current state of an election * * \param[in] e Election object * * \return Current state of \e */ enum election_result election_state(const election_t *e) { return (e == NULL)? election_error : e->state; } /*! * \brief Create a new election object * * Every node that wishes to participate in an election must create an election * object. Typically, this should be done once, at start-up. A caller should * only create a single election object. * * \param[in] name Label for election (for logging) * \param[in] uname Local node's name * \param[in] period_ms How long to wait for all peers to vote * \param[in] cb Function to call if local node wins election * * \return Newly allocated election object on success, NULL on error * \note The caller is responsible for freeing the returned value using * election_fini(). */ election_t * election_init(const char *name, const char *uname, guint period_ms, GSourceFunc cb) { election_t *e = NULL; static guint count = 0; CRM_CHECK(uname != NULL, return NULL); e = calloc(1, sizeof(election_t)); if (e == NULL) { crm_perror(LOG_CRIT, "Cannot create election"); return NULL; } e->uname = strdup(uname); if (e->uname == NULL) { crm_perror(LOG_CRIT, "Cannot create election"); free(e); return NULL; } e->name = name? crm_strdup_printf("election-%s", name) : crm_strdup_printf("election-%u", count++); e->cb = cb; e->timeout = mainloop_timer_add(e->name, period_ms, FALSE, election_timer_cb, e); crm_trace("Created %s", e->name); return e; } /*! * \brief Disregard any previous vote by specified peer * * This discards any recorded vote from a specified peer. Election users should * call this whenever a voting peer becomes inactive. * * \param[in,out] e Election object * \param[in] uname Name of peer to disregard */ void election_remove(election_t *e, const char *uname) { if ((e != NULL) && (uname != NULL) && (e->voted != NULL)) { crm_trace("Discarding %s (no-)vote from lost peer %s", e->name, uname); g_hash_table_remove(e->voted, uname); } } /*! * \brief Stop election timer and disregard all votes * * \param[in,out] e Election object */ void election_reset(election_t *e) { if (e != NULL) { crm_trace("Resetting election %s", e->name); mainloop_timer_stop(e->timeout); if (e->voted) { crm_trace("Destroying voted cache with %d members", g_hash_table_size(e->voted)); g_hash_table_destroy(e->voted); e->voted = NULL; } } } /*! * \brief Free an election object * * Free all memory associated with an election object, stopping its * election timer (if running). * * \param[in,out] e Election object */ void election_fini(election_t *e) { if (e != NULL) { election_reset(e); crm_trace("Destroying %s", e->name); mainloop_timer_del(e->timeout); free(e->uname); free(e->name); free(e); } } static void election_timeout_start(election_t *e) { if (e != NULL) { mainloop_timer_start(e->timeout); } } /*! * \brief Stop an election's timer, if running * * \param[in,out] e Election object */ void election_timeout_stop(election_t *e) { if (e != NULL) { mainloop_timer_stop(e->timeout); } } /*! * \brief Change an election's timeout (restarting timer if running) * * \param[in,out] e Election object * \param[in] period New timeout */ void election_timeout_set_period(election_t *e, guint period) { if (e != NULL) { mainloop_timer_set_period(e->timeout, period); } else { crm_err("No election defined"); } } static int get_uptime(struct timeval *output) { static time_t expires = 0; static struct rusage info; time_t tm_now = time(NULL); if (expires < tm_now) { int rc = 0; info.ru_utime.tv_sec = 0; info.ru_utime.tv_usec = 0; rc = getrusage(RUSAGE_SELF, &info); output->tv_sec = 0; output->tv_usec = 0; if (rc < 0) { crm_perror(LOG_ERR, "Could not calculate the current uptime"); expires = 0; return -1; } crm_debug("Current CPU usage is: %lds, %ldus", (long)info.ru_utime.tv_sec, (long)info.ru_utime.tv_usec); } expires = tm_now + STORM_INTERVAL; /* N seconds after the last _access_ */ output->tv_sec = info.ru_utime.tv_sec; output->tv_usec = info.ru_utime.tv_usec; return 1; } static int compare_age(struct timeval your_age) { struct timeval our_age; get_uptime(&our_age); /* If an error occurred, our_age will be compared as {0,0} */ if (our_age.tv_sec > your_age.tv_sec) { crm_debug("Win: %ld vs %ld (seconds)", (long)our_age.tv_sec, (long)your_age.tv_sec); return 1; } else if (our_age.tv_sec < your_age.tv_sec) { crm_debug("Lose: %ld vs %ld (seconds)", (long)our_age.tv_sec, (long)your_age.tv_sec); return -1; } else if (our_age.tv_usec > your_age.tv_usec) { crm_debug("Win: %ld.%06ld vs %ld.%06ld (usec)", (long)our_age.tv_sec, (long)our_age.tv_usec, (long)your_age.tv_sec, (long)your_age.tv_usec); return 1; } else if (our_age.tv_usec < your_age.tv_usec) { crm_debug("Lose: %ld.%06ld vs %ld.%06ld (usec)", (long)our_age.tv_sec, (long)our_age.tv_usec, (long)your_age.tv_sec, (long)your_age.tv_usec); return -1; } return 0; } /*! * \brief Start a new election by offering local node's candidacy * * Broadcast a "vote" election message containing the local node's ID, * (incremented) election counter, and uptime, and start the election timer. * * \param[in,out] e Election object * * \note Any nodes agreeing to the candidacy will send a "no-vote" reply, and if * all active peers do so, or if the election times out, the local node * wins the election. (If we lose to any peer vote, we will stop the * timer, so a timeout means we did not lose -- either some peer did not * vote, or we did not call election_check() in time.) */ void election_vote(election_t *e) { struct timeval age; xmlNode *vote = NULL; crm_node_t *our_node; if (e == NULL) { crm_trace("Election vote requested, but no election available"); return; } our_node = pcmk__get_node(0, e->uname, NULL, pcmk__node_search_cluster); - if ((our_node == NULL) || (crm_is_peer_active(our_node) == FALSE)) { + if (!pcmk__cluster_is_node_active(our_node)) { crm_trace("Cannot vote in %s yet: local node not connected to cluster", e->name); return; } election_reset(e); e->state = election_in_progress; vote = create_request(CRM_OP_VOTE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL); e->count++; crm_xml_add(vote, PCMK__XA_ELECTION_OWNER, our_node->uuid); crm_xml_add_int(vote, PCMK__XA_ELECTION_ID, e->count); // Warning: PCMK__XA_ELECTION_AGE_NANO_SEC value is actually microseconds get_uptime(&age); crm_xml_add_timeval(vote, PCMK__XA_ELECTION_AGE_SEC, PCMK__XA_ELECTION_AGE_NANO_SEC, &age); send_cluster_message(NULL, crm_msg_crmd, vote, TRUE); free_xml(vote); crm_debug("Started %s round %d", e->name, e->count); election_timeout_start(e); return; } /*! * \brief Check whether local node has won an election * * If all known peers have sent no-vote messages, stop the election timer, set * the election state to won, and call any registered win callback. * * \param[in,out] e Election object * * \return TRUE if local node has won, FALSE otherwise * \note If all known peers have sent no-vote messages, but the election owner * does not call this function, the election will not be won (and the * callback will not be called) until the election times out. * \note This should be called when election_count_vote() returns * \c election_in_progress. */ bool election_check(election_t *e) { int voted_size = 0; int num_members = 0; if (e == NULL) { crm_trace("Election check requested, but no election available"); return FALSE; } if (e->voted == NULL) { crm_trace("%s check requested, but no votes received yet", e->name); return FALSE; } voted_size = g_hash_table_size(e->voted); - num_members = crm_active_peers(); + num_members = pcmk__cluster_num_active_nodes(); /* in the case of #voted > #members, it is better to * wait for the timeout and give the cluster time to * stabilize */ if (voted_size >= num_members) { /* we won and everyone has voted */ election_timeout_stop(e); if (voted_size > num_members) { GHashTableIter gIter; const crm_node_t *node; char *key = NULL; crm_warn("Received too many votes in %s", e->name); g_hash_table_iter_init(&gIter, crm_peer_cache); while (g_hash_table_iter_next(&gIter, NULL, (gpointer *) & node)) { - if (crm_is_peer_active(node)) { + if (pcmk__cluster_is_node_active(node)) { crm_warn("* expected vote: %s", node->uname); } } g_hash_table_iter_init(&gIter, e->voted); while (g_hash_table_iter_next(&gIter, (gpointer *) & key, NULL)) { crm_warn("* actual vote: %s", key); } } crm_info("%s won by local node", e->name); election_complete(e); return TRUE; } else { crm_debug("%s still waiting on %d of %d votes", e->name, num_members - voted_size, num_members); } return FALSE; } #define LOSS_DAMPEN 2 /* in seconds */ struct vote { const char *op; const char *from; const char *version; const char *election_owner; int election_id; struct timeval age; }; /*! * \brief Unpack an election message * * \param[in] e Election object (for logging only) * \param[in] message Election message XML * \param[out] vote Parsed fields from message * * \return TRUE if election message and election are valid, FALSE otherwise * \note The parsed struct's pointer members are valid only for the lifetime of * the message argument. */ static bool parse_election_message(const election_t *e, const xmlNode *message, struct vote *vote) { CRM_CHECK(message && vote, return FALSE); vote->election_id = -1; vote->age.tv_sec = -1; vote->age.tv_usec = -1; vote->op = crm_element_value(message, PCMK__XA_CRM_TASK); vote->from = crm_element_value(message, PCMK__XA_SRC); vote->version = crm_element_value(message, PCMK_XA_VERSION); vote->election_owner = crm_element_value(message, PCMK__XA_ELECTION_OWNER); crm_element_value_int(message, PCMK__XA_ELECTION_ID, &(vote->election_id)); if ((vote->op == NULL) || (vote->from == NULL) || (vote->version == NULL) || (vote->election_owner == NULL) || (vote->election_id < 0)) { crm_warn("Invalid %s message from %s in %s ", (vote->op? vote->op : "election"), (vote->from? vote->from : "unspecified node"), (e? e->name : "election")); return FALSE; } // Op-specific validation if (pcmk__str_eq(vote->op, CRM_OP_VOTE, pcmk__str_none)) { /* Only vote ops have uptime. Warning: PCMK__XA_ELECTION_AGE_NANO_SEC value is in microseconds. */ crm_element_value_timeval(message, PCMK__XA_ELECTION_AGE_SEC, PCMK__XA_ELECTION_AGE_NANO_SEC, &(vote->age)); if ((vote->age.tv_sec < 0) || (vote->age.tv_usec < 0)) { crm_warn("Cannot count %s %s from %s because it is missing uptime", (e? e->name : "election"), vote->op, vote->from); return FALSE; } } else if (!pcmk__str_eq(vote->op, CRM_OP_NOVOTE, pcmk__str_none)) { crm_info("Cannot process %s message from %s because %s is not a known election op", (e? e->name : "election"), vote->from, vote->op); return FALSE; } // Election validation if (e == NULL) { crm_info("Cannot count %s from %s because no election available", vote->op, vote->from); return FALSE; } /* If the membership cache is NULL, we REALLY shouldn't be voting -- * the question is how we managed to get here. */ if (crm_peer_cache == NULL) { crm_info("Cannot count %s %s from %s because no peer information available", e->name, vote->op, vote->from); return FALSE; } return TRUE; } static void record_vote(election_t *e, struct vote *vote) { CRM_ASSERT(e && vote && vote->from && vote->op); if (e->voted == NULL) { e->voted = pcmk__strkey_table(free, free); } pcmk__insert_dup(e->voted, vote->from, vote->op); } static void send_no_vote(crm_node_t *peer, struct vote *vote) { // @TODO probably shouldn't hardcode CRM_SYSTEM_CRMD and crm_msg_crmd xmlNode *novote = create_request(CRM_OP_NOVOTE, NULL, vote->from, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL); crm_xml_add(novote, PCMK__XA_ELECTION_OWNER, vote->election_owner); crm_xml_add_int(novote, PCMK__XA_ELECTION_ID, vote->election_id); send_cluster_message(peer, crm_msg_crmd, novote, TRUE); free_xml(novote); } /*! * \brief Process an election message (vote or no-vote) from a peer * * \param[in,out] e Election object * \param[in] message Election message XML from peer * \param[in] can_win Whether local node is eligible to win * * \return Election state after new vote is considered * \note If the peer message is a vote, and we prefer the peer to win, this will * send a no-vote reply to the peer. * \note The situations "we lost to this vote" from "this is a late no-vote * after we've already lost" both return election_lost. If a caller needs * to distinguish them, it should save the current state before calling * this function, and then compare the result. */ enum election_result election_count_vote(election_t *e, const xmlNode *message, bool can_win) { int log_level = LOG_INFO; gboolean done = FALSE; gboolean we_lose = FALSE; const char *reason = "unknown"; bool we_are_owner = FALSE; crm_node_t *our_node = NULL, *your_node = NULL; time_t tm_now = time(NULL); struct vote vote; CRM_CHECK(message != NULL, return election_error); if (parse_election_message(e, message, &vote) == FALSE) { return election_error; } your_node = pcmk__get_node(0, vote.from, NULL, pcmk__node_search_cluster); our_node = pcmk__get_node(0, e->uname, NULL, pcmk__node_search_cluster); we_are_owner = (our_node != NULL) && pcmk__str_eq(our_node->uuid, vote.election_owner, pcmk__str_none); if (!can_win) { reason = "Not eligible"; we_lose = TRUE; - } else if (our_node == NULL || crm_is_peer_active(our_node) == FALSE) { + } else if (!pcmk__cluster_is_node_active(our_node)) { reason = "We are not part of the cluster"; log_level = LOG_ERR; we_lose = TRUE; } else if (we_are_owner && (vote.election_id != e->count)) { log_level = LOG_TRACE; reason = "Superseded"; done = TRUE; - } else if (your_node == NULL || crm_is_peer_active(your_node) == FALSE) { + } else if (!pcmk__cluster_is_node_active(your_node)) { /* Possibly we cached the message in the FSA queue at a point that it wasn't */ reason = "Peer is not part of our cluster"; log_level = LOG_WARNING; done = TRUE; } else if (pcmk__str_eq(vote.op, CRM_OP_NOVOTE, pcmk__str_none) || pcmk__str_eq(vote.from, e->uname, pcmk__str_none)) { /* Receiving our own broadcast vote, or a no-vote from peer, is a vote * for us to win */ if (!we_are_owner) { crm_warn("Cannot count %s round %d %s from %s because we are not election owner (%s)", e->name, vote.election_id, vote.op, vote.from, vote.election_owner); return election_error; } if (e->state != election_in_progress) { // Should only happen if we already lost crm_debug("Not counting %s round %d %s from %s because no election in progress", e->name, vote.election_id, vote.op, vote.from); return e->state; } record_vote(e, &vote); reason = "Recorded"; done = TRUE; } else { // A peer vote requires a comparison to determine which node is better int age_result = compare_age(vote.age); int version_result = compare_version(vote.version, CRM_FEATURE_SET); if (version_result < 0) { reason = "Version"; we_lose = TRUE; } else if (version_result > 0) { reason = "Version"; } else if (age_result < 0) { reason = "Uptime"; we_lose = TRUE; } else if (age_result > 0) { reason = "Uptime"; } else if (strcasecmp(e->uname, vote.from) > 0) { reason = "Host name"; we_lose = TRUE; } else { reason = "Host name"; } } if (e->expires < tm_now) { e->election_wins = 0; e->expires = tm_now + STORM_INTERVAL; } else if (done == FALSE && we_lose == FALSE) { int peers = 1 + g_hash_table_size(crm_peer_cache); /* If every node has to vote down every other node, thats N*(N-1) total elections * Allow some leeway before _really_ complaining */ e->election_wins++; if (e->election_wins > (peers * peers)) { crm_warn("%s election storm detected: %d wins in %d seconds", e->name, e->election_wins, STORM_INTERVAL); e->election_wins = 0; e->expires = tm_now + STORM_INTERVAL; if (e->wrote_blackbox == FALSE) { /* It's questionable whether a black box (from every node in the * cluster) would be truly helpful in diagnosing an election * storm. It's also highly doubtful a production environment * would get multiple election storms from distinct causes, so * saving one blackbox per process lifetime should be * sufficient. Alternatives would be to save a timestamp of the * last blackbox write instead of a boolean, and write a new one * if some amount of time has passed; or to save a storm count, * write a blackbox on every Nth occurrence. */ crm_write_blackbox(0, NULL); e->wrote_blackbox = TRUE; } } } if (done) { do_crm_log(log_level + 1, "Processed %s round %d %s (current round %d) from %s (%s)", e->name, vote.election_id, vote.op, e->count, vote.from, reason); return e->state; } else if (we_lose == FALSE) { /* We track the time of the last election loss to implement an election * dampening period, reducing the likelihood of an election storm. If * this node has lost within the dampening period, don't start a new * election, even if we win against a peer's vote -- the peer we lost to * should win again. * * @TODO This has a problem case: if an election winner immediately * leaves the cluster, and a new election is immediately called, all * nodes could lose, with no new winner elected. The ideal solution * would be to tie the election structure with the peer caches, which * would allow us to clear the dampening when the previous winner * leaves (and would allow other improvements as well). */ if ((e->last_election_loss == 0) || ((tm_now - e->last_election_loss) > (time_t) LOSS_DAMPEN)) { do_crm_log(log_level, "%s round %d (owner node ID %s) pass: %s from %s (%s)", e->name, vote.election_id, vote.election_owner, vote.op, vote.from, reason); e->last_election_loss = 0; election_timeout_stop(e); /* Start a new election by voting down this, and other, peers */ e->state = election_start; return e->state; } else { char *loss_time = ctime(&e->last_election_loss); if (loss_time) { // Show only HH:MM:SS loss_time += 11; loss_time[8] = '\0'; } crm_info("Ignoring %s round %d (owner node ID %s) pass vs %s because we lost less than %ds ago at %s", e->name, vote.election_id, vote.election_owner, vote.from, LOSS_DAMPEN, (loss_time? loss_time : "unknown")); } } e->last_election_loss = tm_now; do_crm_log(log_level, "%s round %d (owner node ID %s) lost: %s from %s (%s)", e->name, vote.election_id, vote.election_owner, vote.op, vote.from, reason); election_reset(e); send_no_vote(your_node, &vote); e->state = election_lost; return e->state; } /*! * \brief Reset any election dampening currently in effect * * \param[in,out] e Election object to clear */ void election_clear_dampening(election_t *e) { e->last_election_loss = 0; } diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c index d116370f8f..a32977d664 100644 --- a/lib/cluster/membership.c +++ b/lib/cluster/membership.c @@ -1,1384 +1,1538 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #ifndef _GNU_SOURCE # define _GNU_SOURCE #endif +#include // PRIu32 #include #include #include #include #include #include #include #include #include #include #include #include "crmcluster_private.h" /* The peer cache remembers cluster nodes that have been seen. * This is managed mostly automatically by libcluster, based on * cluster membership events. * * Because cluster nodes can have conflicting names or UUIDs, * the hash table key is a uniquely generated ID. */ GHashTable *crm_peer_cache = NULL; /* * The remote peer cache tracks pacemaker_remote nodes. While the * value has the same type as the peer cache's, it is tracked separately for * three reasons: pacemaker_remote nodes can't have conflicting names or UUIDs, * so the name (which is also the UUID) is used as the hash table key; there * is no equivalent of membership events, so management is not automatic; and * most users of the peer cache need to exclude pacemaker_remote nodes. * * That said, using a single cache would be more logical and less error-prone, * so it would be a good idea to merge them one day. * * libcluster provides two avenues for populating the cache: - * crm_remote_peer_get() and crm_remote_peer_cache_remove() directly manage it, - * while crm_remote_peer_cache_refresh() populates it via the CIB. + * pcmk__cluster_lookup_remote_node() and pcmk__cluster_forget_remote_node() + * directly manage it, while refresh_remote_nodes() populates it via the CIB. */ GHashTable *crm_remote_peer_cache = NULL; /* * The known node cache tracks cluster and remote nodes that have been seen in * the CIB. It is useful mainly when a caller needs to know about a node that * may no longer be in the membership, but doesn't want to add the node to the * main peer cache tables. */ static GHashTable *known_node_cache = NULL; unsigned long long crm_peer_seq = 0; gboolean crm_have_quorum = FALSE; static gboolean crm_autoreap = TRUE; // Flag setting and clearing for crm_node_t:flags #define set_peer_flags(peer, flags_to_set) do { \ (peer)->flags = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \ "Peer", (peer)->uname, \ (peer)->flags, (flags_to_set), \ #flags_to_set); \ } while (0) #define clear_peer_flags(peer, flags_to_clear) do { \ (peer)->flags = pcmk__clear_flags_as(__func__, __LINE__, \ LOG_TRACE, \ "Peer", (peer)->uname, \ (peer)->flags, (flags_to_clear), \ #flags_to_clear); \ } while (0) static void update_peer_uname(crm_node_t *node, const char *uname); static crm_node_t *find_known_node(const char *id, const char *uname); -int -crm_remote_peer_cache_size(void) +/*! + * \internal + * \brief Get the number of Pacemaker Remote nodes that have been seen + * + * \return Number of cached Pacemaker Remote nodes + */ +unsigned int +pcmk__cluster_num_remote_nodes(void) { if (crm_remote_peer_cache == NULL) { - return 0; + return 0U; } return g_hash_table_size(crm_remote_peer_cache); } /*! - * \brief Get a remote node peer cache entry, creating it if necessary + * \internal + * \brief Get a remote node cache entry, creating it if necessary * * \param[in] node_name Name of remote node * - * \return Cache entry for node on success, NULL (and set errno) otherwise + * \return Cache entry for node on success, or \c NULL (and set \c errno) + * otherwise * - * \note When creating a new entry, this will leave the node state undetermined, - * so the caller should also call pcmk__update_peer_state() if the state + * \note When creating a new entry, this will leave the node state undetermined. + * The caller should also call \c pcmk__update_peer_state() if the state * is known. * \note Because this can add and remove cache entries, callers should not * assume any previously obtained cache entry pointers remain valid. */ crm_node_t * -crm_remote_peer_get(const char *node_name) +pcmk__cluster_lookup_remote_node(const char *node_name) { crm_node_t *node; char *node_name_copy = NULL; if (node_name == NULL) { errno = EINVAL; return NULL; } /* It's theoretically possible that the node was added to the cluster peer * cache before it was known to be a Pacemaker Remote node. Remove that * entry unless it has a node ID, which means the name actually is * associated with a cluster node. (@TODO return an error in that case?) */ node = pcmk__search_node_caches(0, node_name, pcmk__node_search_cluster); if ((node != NULL) && (node->uuid == NULL)) { /* node_name could be a pointer into the cache entry being removed, so * reassign it to a copy before the original gets freed */ node_name_copy = strdup(node_name); if (node_name_copy == NULL) { errno = ENOMEM; return NULL; } node_name = node_name_copy; - reap_crm_member(0, node_name); + pcmk__cluster_forget_cluster_node(0, node_name); } /* Return existing cache entry if one exists */ node = g_hash_table_lookup(crm_remote_peer_cache, node_name); if (node) { free(node_name_copy); return node; } /* Allocate a new entry */ node = calloc(1, sizeof(crm_node_t)); if (node == NULL) { free(node_name_copy); return NULL; } /* Populate the essential information */ set_peer_flags(node, crm_remote_node); node->uuid = strdup(node_name); if (node->uuid == NULL) { free(node); errno = ENOMEM; free(node_name_copy); return NULL; } /* Add the new entry to the cache */ g_hash_table_replace(crm_remote_peer_cache, node->uuid, node); crm_trace("added %s to remote cache", node_name); /* Update the entry's uname, ensuring peer status callbacks are called */ update_peer_uname(node, node_name); free(node_name_copy); return node; } /*! + * \internal * \brief Remove a node from the Pacemaker Remote node cache * * \param[in] node_name Name of node to remove from cache * * \note The caller must be careful not to use \p node_name after calling this * function if it might be a pointer into the cache entry being removed. */ void -crm_remote_peer_cache_remove(const char *node_name) +pcmk__cluster_forget_remote_node(const char *node_name) { /* Do a lookup first, because node_name could be a pointer within the entry * being removed -- we can't log it *after* removing it. */ if (g_hash_table_lookup(crm_remote_peer_cache, node_name) != NULL) { crm_trace("Removing %s from Pacemaker Remote node cache", node_name); g_hash_table_remove(crm_remote_peer_cache, node_name); } } /*! * \internal * \brief Return node status based on a CIB status entry * * \param[in] node_state XML of node state * * \return \c CRM_NODE_LOST if \c PCMK__XA_IN_CCM is false in * \c PCMK__XE_NODE_STATE, \c CRM_NODE_MEMBER otherwise * \note Unlike most boolean XML attributes, this one defaults to true, for * backward compatibility with older controllers that don't set it. */ static const char * remote_state_from_cib(const xmlNode *node_state) { bool status = false; if ((pcmk__xe_get_bool_attr(node_state, PCMK__XA_IN_CCM, &status) == pcmk_rc_ok) && !status) { return CRM_NODE_LOST; } else { return CRM_NODE_MEMBER; } } /* user data for looping through remote node xpath searches */ struct refresh_data { const char *field; /* XML attribute to check for node name */ gboolean has_state; /* whether to update node state based on XML */ }; /*! * \internal * \brief Process one pacemaker_remote node xpath search result * * \param[in] result XML search result * \param[in] user_data what to look for in the XML */ static void remote_cache_refresh_helper(xmlNode *result, void *user_data) { const struct refresh_data *data = user_data; const char *remote = crm_element_value(result, data->field); const char *state = NULL; crm_node_t *node; CRM_CHECK(remote != NULL, return); /* Determine node's state, if the result has it */ if (data->has_state) { state = remote_state_from_cib(result); } /* Check whether cache already has entry for node */ node = g_hash_table_lookup(crm_remote_peer_cache, remote); if (node == NULL) { /* Node is not in cache, so add a new entry for it */ - node = crm_remote_peer_get(remote); + node = pcmk__cluster_lookup_remote_node(remote); CRM_ASSERT(node); if (state) { pcmk__update_peer_state(__func__, node, state, 0); } } else if (pcmk_is_set(node->flags, crm_node_dirty)) { /* Node is in cache and hasn't been updated already, so mark it clean */ clear_peer_flags(node, crm_node_dirty); if (state) { pcmk__update_peer_state(__func__, node, state, 0); } } } static void mark_dirty(gpointer key, gpointer value, gpointer user_data) { set_peer_flags((crm_node_t *) value, crm_node_dirty); } static gboolean is_dirty(gpointer key, gpointer value, gpointer user_data) { return pcmk_is_set(((crm_node_t*)value)->flags, crm_node_dirty); } /*! - * \brief Repopulate the remote peer cache based on CIB XML + * \internal + * \brief Repopulate the remote node cache based on CIB XML * - * \param[in] xmlNode CIB XML to parse + * \param[in] cib CIB XML to parse */ -void -crm_remote_peer_cache_refresh(xmlNode *cib) +static void +refresh_remote_nodes(xmlNode *cib) { struct refresh_data data; crm_peer_init(); /* First, we mark all existing cache entries as dirty, * so that later we can remove any that weren't in the CIB. * We don't empty the cache, because we need to detect changes in state. */ g_hash_table_foreach(crm_remote_peer_cache, mark_dirty, NULL); /* Look for guest nodes and remote nodes in the status section */ data.field = PCMK_XA_ID; data.has_state = TRUE; crm_foreach_xpath_result(cib, PCMK__XP_REMOTE_NODE_STATUS, remote_cache_refresh_helper, &data); /* Look for guest nodes and remote nodes in the configuration section, * because they may have just been added and not have a status entry yet. * In that case, the cached node state will be left NULL, so that the * peer status callback isn't called until we're sure the node started * successfully. */ data.field = PCMK_XA_VALUE; data.has_state = FALSE; crm_foreach_xpath_result(cib, PCMK__XP_GUEST_NODE_CONFIG, remote_cache_refresh_helper, &data); data.field = PCMK_XA_ID; data.has_state = FALSE; crm_foreach_xpath_result(cib, PCMK__XP_REMOTE_NODE_CONFIG, remote_cache_refresh_helper, &data); /* Remove all old cache entries that weren't seen in the CIB */ g_hash_table_foreach_remove(crm_remote_peer_cache, is_dirty, NULL); } -gboolean -crm_is_peer_active(const crm_node_t * node) +/*! + * \internal + * \brief Check whether a node is an active cluster node + * + * Remote nodes are never considered active. This guarantees that they can never + * become DC. + * + * \param[in] node Node to check + * + * \return \c true if the node is an active cluster node, or \c false otherwise + */ +bool +pcmk__cluster_is_node_active(const crm_node_t *node) { - if(node == NULL) { - return FALSE; - } + const enum cluster_type_e type = get_cluster_type(); - if (pcmk_is_set(node->flags, crm_remote_node)) { - /* remote nodes are never considered active members. This - * guarantees they will never be considered for DC membership.*/ - return FALSE; + if ((node == NULL) || pcmk_is_set(node->flags, crm_remote_node)) { + return false; } + + switch (type) { + case pcmk_cluster_corosync: #if SUPPORT_COROSYNC - if (is_corosync_cluster()) { - return crm_is_corosync_peer_active(node); + return crm_is_corosync_peer_active(node); +#else + break; +#endif // SUPPORT_COROSYNC + default: + break; } -#endif - crm_err("Unhandled cluster type: %s", name_for_cluster_type(get_cluster_type())); - return FALSE; + + crm_err("Unhandled cluster type: %s", name_for_cluster_type(type)); + return false; } +/*! + * \internal + * \brief Check if a node's entry should be removed from the cluster node cache + * + * A node should be removed from the cache if it's inactive and matches another + * \c crm_node_t (the search object). The node is considered a mismatch if any + * of the following are true: + * * The search object is \c NULL. + * * The search object has an ID set and the cached node's ID does not match it. + * * The search object does not have an ID set, and the cached node's name does + * not match the search node's name. (If both names are \c NULL, it's a + * match.) + * + * Otherwise, the node is considered a match. + * + * Note that if the search object has both an ID and a name set, the name is + * ignored for matching purposes. + * + * \param[in] key Ignored + * \param[in] value \c crm_node_t object from cluster node cache + * \param[in] user_data \c crm_node_t object to match against (search object) + * + * \return \c TRUE if the node entry should be removed from \c crm_peer_cache, + * or \c FALSE otherwise + */ static gboolean -crm_reap_dead_member(gpointer key, gpointer value, gpointer user_data) +should_forget_cluster_node(gpointer key, gpointer value, gpointer user_data) { crm_node_t *node = value; crm_node_t *search = user_data; if (search == NULL) { return FALSE; - - } else if (search->id && node->id != search->id) { + } + if ((search->id != 0) && (node->id != search->id)) { return FALSE; - - } else if (search->id == 0 && !pcmk__str_eq(node->uname, search->uname, pcmk__str_casei)) { + } + if ((search->id == 0) + && !pcmk__str_eq(node->uname, search->uname, pcmk__str_casei)) { + // @TODO Consider name even if ID is set? + return FALSE; + } + if (pcmk__cluster_is_node_active(value)) { return FALSE; - - } else if (crm_is_peer_active(value) == FALSE) { - crm_info("Removing node with name %s and " PCMK_XA_ID - " %u from membership cache", - (node->uname? node->uname : "unknown"), node->id); - return TRUE; } - return FALSE; + + crm_info("Removing node with name %s and " PCMK_XA_ID " %u from membership " + "cache", + pcmk__s(node->uname, "(unknown)"), node->id); + return TRUE; } /*! - * \brief Remove all peer cache entries matching a node ID and/or uname + * \internal + * \brief Remove one or more inactive nodes from the cluster node cache * - * \param[in] id ID of node to remove (or 0 to ignore) - * \param[in] name Uname of node to remove (or NULL to ignore) + * All inactive nodes matching \p id and \p node_name as described in + * \c should_forget_cluster_node documentation are removed from the cache. * - * \return Number of cache entries removed + * If \p id is 0 and \p node_name is \c NULL, all inactive nodes are removed + * from the cache regardless of ID and name. This differs from clearing the + * cache, in that entries for active nodes are preserved. * - * \note The caller must be careful not to use \p name after calling this - * function if it might be a pointer into the cache entry being removed. + * \param[in] id ID of node to remove from cache (0 to ignore) + * \param[in] node_name Name of node to remove from cache (ignored if \p id is + * nonzero) + * + * \note \p node_name is not modified directly, but it will be freed if it's a + * pointer into a cache entry that is removed. */ -guint -reap_crm_member(uint32_t id, const char *name) +void +pcmk__cluster_forget_cluster_node(uint32_t id, const char *node_name) { - int matches = 0; crm_node_t search = { 0, }; + char *criterion = NULL; // For logging + guint matches = 0; if (crm_peer_cache == NULL) { - crm_trace("Membership cache not initialized, ignoring purge request"); - return 0; + crm_trace("Membership cache not initialized, ignoring removal request"); + return; } search.id = id; - search.uname = pcmk__str_copy(name); - matches = g_hash_table_foreach_remove(crm_peer_cache, crm_reap_dead_member, &search); - if(matches) { - crm_notice("Purged %d peer%s with " PCMK_XA_ID - "=%u%s%s from the membership cache", - matches, pcmk__plural_s(matches), search.id, - (search.uname? " and/or uname=" : ""), - (search.uname? search.uname : "")); + search.uname = pcmk__str_copy(node_name); // May log after original freed + + if (id > 0) { + criterion = crm_strdup_printf(PCMK_XA_ID "=%" PRIu32, id); + + } else if (node_name != NULL) { + criterion = crm_strdup_printf(PCMK_XA_UNAME "=%s", node_name); + } + + matches = g_hash_table_foreach_remove(crm_peer_cache, + should_forget_cluster_node, &search); + if (matches > 0) { + if (criterion != NULL) { + crm_notice("Removed %u inactive node%s with %s from the membership " + "cache", + matches, pcmk__plural_s(matches), criterion); + } else { + crm_notice("Removed all (%u) inactive cluster nodes from the " + "membership cache", + matches); + } } else { - crm_info("No peers with " PCMK_XA_ID - "=%u%s%s to purge from the membership cache", - search.id, (search.uname? " and/or uname=" : ""), - (search.uname? search.uname : "")); + crm_info("No inactive cluster nodes%s%s to remove from the membership " + "cache", + ((criterion != NULL)? " with" : ""), pcmk__s(criterion, "")); } free(search.uname); - return matches; + free(criterion); } static void count_peer(gpointer key, gpointer value, gpointer user_data) { - guint *count = user_data; + unsigned int *count = user_data; crm_node_t *node = value; - if (crm_is_peer_active(node)) { + if (pcmk__cluster_is_node_active(node)) { *count = *count + 1; } } -guint -crm_active_peers(void) +/*! + * \internal + * \brief Get the number of active cluster nodes that have been seen + * + * Remote nodes are never considered active. This guarantees that they can never + * become DC. + * + * \return Number of active nodes in the cluster node cache + */ +unsigned int +pcmk__cluster_num_active_nodes(void) { - guint count = 0; + unsigned int count = 0; - if (crm_peer_cache) { + if (crm_peer_cache != NULL) { g_hash_table_foreach(crm_peer_cache, count_peer, &count); } return count; } static void destroy_crm_node(gpointer data) { crm_node_t *node = data; crm_trace("Destroying entry for node %u: %s", node->id, node->uname); free(node->uname); free(node->state); free(node->uuid); free(node->expected); free(node->conn_host); free(node); } void crm_peer_init(void) { if (crm_peer_cache == NULL) { crm_peer_cache = pcmk__strikey_table(free, destroy_crm_node); } if (crm_remote_peer_cache == NULL) { crm_remote_peer_cache = pcmk__strikey_table(NULL, destroy_crm_node); } if (known_node_cache == NULL) { known_node_cache = pcmk__strikey_table(free, destroy_crm_node); } } void crm_peer_destroy(void) { if (crm_peer_cache != NULL) { crm_trace("Destroying peer cache with %d members", g_hash_table_size(crm_peer_cache)); g_hash_table_destroy(crm_peer_cache); crm_peer_cache = NULL; } if (crm_remote_peer_cache != NULL) { - crm_trace("Destroying remote peer cache with %d members", g_hash_table_size(crm_remote_peer_cache)); + crm_trace("Destroying remote peer cache with %d members", + pcmk__cluster_num_remote_nodes()); g_hash_table_destroy(crm_remote_peer_cache); crm_remote_peer_cache = NULL; } if (known_node_cache != NULL) { crm_trace("Destroying known node cache with %d members", g_hash_table_size(known_node_cache)); g_hash_table_destroy(known_node_cache); known_node_cache = NULL; } } static void (*peer_status_callback)(enum crm_status_type, crm_node_t *, const void *) = NULL; /*! * \brief Set a client function that will be called after peer status changes * * \param[in] dispatch Pointer to function to use as callback * * \note Previously, client callbacks were responsible for peer cache * management. This is no longer the case, and client callbacks should do * only client-specific handling. Callbacks MUST NOT add or remove entries * in the peer caches. */ void crm_set_status_callback(void (*dispatch) (enum crm_status_type, crm_node_t *, const void *)) { peer_status_callback = dispatch; } /*! * \brief Tell the library whether to automatically reap lost nodes * * If TRUE (the default), calling crm_update_peer_proc() will also update the * peer state to CRM_NODE_MEMBER or CRM_NODE_LOST, and pcmk__update_peer_state() * will reap peers whose state changes to anything other than CRM_NODE_MEMBER. * Callers should leave this enabled unless they plan to manage the cache * separately on their own. * * \param[in] autoreap TRUE to enable automatic reaping, FALSE to disable */ void crm_set_autoreap(gboolean autoreap) { crm_autoreap = autoreap; } static void dump_peer_hash(int level, const char *caller) { GHashTableIter iter; const char *id = NULL; crm_node_t *node = NULL; g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, (gpointer *) &id, (gpointer *) &node)) { do_crm_log(level, "%s: Node %u/%s = %p - %s", caller, node->id, node->uname, node, id); } } static gboolean hash_find_by_data(gpointer key, gpointer value, gpointer user_data) { return value == user_data; } /*! * \internal * \brief Search caches for a node (cluster or Pacemaker Remote) * * \param[in] id If not 0, cluster node ID to search for * \param[in] uname If not NULL, node name to search for * \param[in] flags Group of enum pcmk__node_search_flags * * \return Node cache entry if found, otherwise NULL */ crm_node_t * pcmk__search_node_caches(unsigned int id, const char *uname, uint32_t flags) { crm_node_t *node = NULL; CRM_ASSERT(id > 0 || uname != NULL); crm_peer_init(); if ((uname != NULL) && pcmk_is_set(flags, pcmk__node_search_remote)) { node = g_hash_table_lookup(crm_remote_peer_cache, uname); } if ((node == NULL) && pcmk_is_set(flags, pcmk__node_search_cluster)) { node = pcmk__search_cluster_node_cache(id, uname, NULL); } if ((node == NULL) && pcmk_is_set(flags, pcmk__node_search_known)) { char *id_str = (id == 0)? NULL : crm_strdup_printf("%u", id); node = find_known_node(id_str, uname); free(id_str); } return node; } /*! * \internal * \brief Purge a node from cache (both cluster and Pacemaker Remote) * * \param[in] node_name If not NULL, purge only nodes with this name * \param[in] node_id If not 0, purge cluster nodes only if they have this ID * * \note If \p node_name is NULL and \p node_id is 0, no nodes will be purged. * If \p node_name is not NULL and \p node_id is not 0, Pacemaker Remote * nodes that match \p node_name will be purged, and cluster nodes that * match both \p node_name and \p node_id will be purged. * \note The caller must be careful not to use \p node_name after calling this * function if it might be a pointer into a cache entry being removed. */ void pcmk__purge_node_from_cache(const char *node_name, uint32_t node_id) { char *node_name_copy = NULL; if ((node_name == NULL) && (node_id == 0U)) { return; } // Purge from Pacemaker Remote node cache if ((node_name != NULL) && (g_hash_table_lookup(crm_remote_peer_cache, node_name) != NULL)) { /* node_name could be a pointer into the cache entry being purged, * so reassign it to a copy before the original gets freed */ node_name_copy = pcmk__str_copy(node_name); node_name = node_name_copy; crm_trace("Purging %s from Pacemaker Remote node cache", node_name); g_hash_table_remove(crm_remote_peer_cache, node_name); } - reap_crm_member(node_id, node_name); + pcmk__cluster_forget_cluster_node(node_id, node_name); free(node_name_copy); } /*! * \internal * \brief Search cluster node cache * * \param[in] id If not 0, cluster node ID to search for * \param[in] uname If not NULL, node name to search for * \param[in] uuid If not NULL while id is 0, node UUID instead of cluster * node ID to search for * * \return Cluster node cache entry if found, otherwise NULL */ crm_node_t * pcmk__search_cluster_node_cache(unsigned int id, const char *uname, const char *uuid) { GHashTableIter iter; crm_node_t *node = NULL; crm_node_t *by_id = NULL; crm_node_t *by_name = NULL; CRM_ASSERT(id > 0 || uname != NULL); crm_peer_init(); if (uname != NULL) { g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { if(node->uname && strcasecmp(node->uname, uname) == 0) { crm_trace("Name match: %s = %p", node->uname, node); by_name = node; break; } } } if (id > 0) { g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { if(node->id == id) { crm_trace("ID match: %u = %p", node->id, node); by_id = node; break; } } } else if (uuid != NULL) { g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { if (pcmk__str_eq(node->uuid, uuid, pcmk__str_casei)) { crm_trace("UUID match: %s = %p", node->uuid, node); by_id = node; break; } } } node = by_id; /* Good default */ if(by_id == by_name) { /* Nothing to do if they match (both NULL counts) */ crm_trace("Consistent: %p for %u/%s", by_id, id, uname); } else if(by_id == NULL && by_name) { crm_trace("Only one: %p for %u/%s", by_name, id, uname); if(id && by_name->id) { dump_peer_hash(LOG_WARNING, __func__); crm_crit("Node %u and %u share the same name '%s'", id, by_name->id, uname); node = NULL; /* Create a new one */ } else { node = by_name; } } else if(by_name == NULL && by_id) { crm_trace("Only one: %p for %u/%s", by_id, id, uname); if(uname && by_id->uname) { dump_peer_hash(LOG_WARNING, __func__); crm_crit("Node '%s' and '%s' share the same cluster nodeid %u: assuming '%s' is correct", uname, by_id->uname, id, uname); } } else if(uname && by_id->uname) { if(pcmk__str_eq(uname, by_id->uname, pcmk__str_casei)) { crm_notice("Node '%s' has changed its ID from %u to %u", by_id->uname, by_name->id, by_id->id); g_hash_table_foreach_remove(crm_peer_cache, hash_find_by_data, by_name); } else { crm_warn("Node '%s' and '%s' share the same cluster nodeid: %u %s", by_id->uname, by_name->uname, id, uname); dump_peer_hash(LOG_INFO, __func__); crm_abort(__FILE__, __func__, __LINE__, "member weirdness", TRUE, TRUE); } } else if(id && by_name->id) { crm_warn("Node %u and %u share the same name: '%s'", by_id->id, by_name->id, uname); } else { /* Simple merge */ /* Only corosync-based clusters use node IDs. The functions that call * pcmk__update_peer_state() and crm_update_peer_proc() only know * nodeid, so 'by_id' is authoritative when merging. */ dump_peer_hash(LOG_DEBUG, __func__); crm_info("Merging %p into %p", by_name, by_id); g_hash_table_foreach_remove(crm_peer_cache, hash_find_by_data, by_name); } return node; } #if SUPPORT_COROSYNC static guint remove_conflicting_peer(crm_node_t *node) { int matches = 0; GHashTableIter iter; crm_node_t *existing_node = NULL; if (node->id == 0 || node->uname == NULL) { return 0; } if (!pcmk__corosync_has_nodelist()) { return 0; } g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &existing_node)) { if (existing_node->id > 0 && existing_node->id != node->id && existing_node->uname != NULL && strcasecmp(existing_node->uname, node->uname) == 0) { - if (crm_is_peer_active(existing_node)) { + if (pcmk__cluster_is_node_active(existing_node)) { continue; } crm_warn("Removing cached offline node %u/%s which has conflicting uname with %u", existing_node->id, existing_node->uname, node->id); g_hash_table_iter_remove(&iter); matches++; } } return matches; } #endif /*! * \brief Get a cluster node cache entry * * \param[in] id If not 0, cluster node ID to search for * \param[in] uname If not NULL, node name to search for * \param[in] uuid If not NULL while id is 0, node UUID instead of cluster * node ID to search for * \param[in] flags Group of enum pcmk__node_search_flags * * \return (Possibly newly created) cluster node cache entry */ /* coverity[-alloc] Memory is referenced in one or both hashtables */ crm_node_t * pcmk__get_node(unsigned int id, const char *uname, const char *uuid, uint32_t flags) { crm_node_t *node = NULL; char *uname_lookup = NULL; CRM_ASSERT(id > 0 || uname != NULL); crm_peer_init(); // Check the Pacemaker Remote node cache first if (pcmk_is_set(flags, pcmk__node_search_remote)) { node = g_hash_table_lookup(crm_remote_peer_cache, uname); if (node != NULL) { return node; } } if (!pcmk_is_set(flags, pcmk__node_search_cluster)) { return NULL; } node = pcmk__search_cluster_node_cache(id, uname, uuid); /* if uname wasn't provided, and find_peer did not turn up a uname based on id. * we need to do a lookup of the node name using the id in the cluster membership. */ if ((node == NULL || node->uname == NULL) && (uname == NULL)) { uname_lookup = get_node_name(id); } if (uname_lookup) { uname = uname_lookup; crm_trace("Inferred a name of '%s' for node %u", uname, id); /* try to turn up the node one more time now that we know the uname. */ if (node == NULL) { node = pcmk__search_cluster_node_cache(id, uname, uuid); } } if (node == NULL) { char *uniqueid = crm_generate_uuid(); node = pcmk__assert_alloc(1, sizeof(crm_node_t)); crm_info("Created entry %s/%p for node %s/%u (%d total)", uniqueid, node, uname, id, 1 + g_hash_table_size(crm_peer_cache)); g_hash_table_replace(crm_peer_cache, uniqueid, node); } if(id > 0 && uname && (node->id == 0 || node->uname == NULL)) { crm_info("Node %u is now known as %s", id, uname); } if(id > 0 && node->id == 0) { node->id = id; } if (uname && (node->uname == NULL)) { update_peer_uname(node, uname); } if(node->uuid == NULL) { if (uuid == NULL) { uuid = crm_peer_uuid(node); } if (uuid) { crm_info("Node %u has uuid %s", id, uuid); } else { crm_info("Cannot obtain a UUID for node %u/%s", id, node->uname); } } free(uname_lookup); return node; } /*! * \internal * \brief Update a node's uname * * \param[in,out] node Node object to update * \param[in] uname New name to set * * \note This function should not be called within a peer cache iteration, * because in some cases it can remove conflicting cache entries, * which would invalidate the iterator. */ static void update_peer_uname(crm_node_t *node, const char *uname) { CRM_CHECK(uname != NULL, crm_err("Bug: can't update node name without name"); return); CRM_CHECK(node != NULL, crm_err("Bug: can't update node name to %s without node", uname); return); if (pcmk__str_eq(uname, node->uname, pcmk__str_casei)) { crm_debug("Node uname '%s' did not change", uname); return; } for (const char *c = uname; *c; ++c) { if ((*c >= 'A') && (*c <= 'Z')) { crm_warn("Node names with capitals are discouraged, consider changing '%s'", uname); break; } } pcmk__str_update(&node->uname, uname); if (peer_status_callback != NULL) { peer_status_callback(crm_status_uname, node, NULL); } #if SUPPORT_COROSYNC if (is_corosync_cluster() && !pcmk_is_set(node->flags, crm_remote_node)) { remove_conflicting_peer(node); } #endif } /*! * \internal * \brief Get log-friendly string equivalent of a process flag * * \param[in] proc Process flag * * \return Log-friendly string equivalent of \p proc */ static inline const char * proc2text(enum crm_proc_flag proc) { const char *text = "unknown"; switch (proc) { case crm_proc_none: text = "none"; break; case crm_proc_based: text = "pacemaker-based"; break; case crm_proc_controld: text = "pacemaker-controld"; break; case crm_proc_schedulerd: text = "pacemaker-schedulerd"; break; case crm_proc_execd: text = "pacemaker-execd"; break; case crm_proc_attrd: text = "pacemaker-attrd"; break; case crm_proc_fenced: text = "pacemaker-fenced"; break; case crm_proc_cpg: text = "corosync-cpg"; break; } return text; } /*! * \internal * \brief Update a node's process information (and potentially state) * * \param[in] source Caller's function name (for log messages) * \param[in,out] node Node object to update * \param[in] flag Bitmask of new process information * \param[in] status node status (online, offline, etc.) * * \return NULL if any node was reaped from peer caches, value of node otherwise * * \note If this function returns NULL, the supplied node object was likely * freed and should not be used again. This function should not be * called within a cache iteration if reaping is possible, otherwise * reaping could invalidate the iterator. */ crm_node_t * crm_update_peer_proc(const char *source, crm_node_t * node, uint32_t flag, const char *status) { uint32_t last = 0; gboolean changed = FALSE; CRM_CHECK(node != NULL, crm_err("%s: Could not set %s to %s for NULL", source, proc2text(flag), status); return NULL); /* Pacemaker doesn't spawn processes on remote nodes */ if (pcmk_is_set(node->flags, crm_remote_node)) { return node; } last = node->processes; if (status == NULL) { node->processes = flag; if (node->processes != last) { changed = TRUE; } } else if (pcmk__str_eq(status, PCMK_VALUE_ONLINE, pcmk__str_casei)) { if ((node->processes & flag) != flag) { node->processes = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, "Peer process", node->uname, node->processes, flag, "processes"); changed = TRUE; } } else if (node->processes & flag) { node->processes = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, "Peer process", node->uname, node->processes, flag, "processes"); changed = TRUE; } if (changed) { if (status == NULL && flag <= crm_proc_none) { crm_info("%s: Node %s[%u] - all processes are now offline", source, node->uname, node->id); } else { crm_info("%s: Node %s[%u] - %s is now %s", source, node->uname, node->id, proc2text(flag), status); } if (pcmk_is_set(node->processes, crm_get_cluster_proc())) { node->when_online = time(NULL); } else { node->when_online = 0; } /* Call the client callback first, then update the peer state, * in case the node will be reaped */ if (peer_status_callback != NULL) { peer_status_callback(crm_status_processes, node, &last); } /* The client callback shouldn't touch the peer caches, * but as a safety net, bail if the peer cache was destroyed. */ if (crm_peer_cache == NULL) { return NULL; } if (crm_autoreap) { const char *peer_state = NULL; if (pcmk_is_set(node->processes, crm_get_cluster_proc())) { peer_state = CRM_NODE_MEMBER; } else { peer_state = CRM_NODE_LOST; } node = pcmk__update_peer_state(__func__, node, peer_state, 0); } } else { crm_trace("%s: Node %s[%u] - %s is unchanged (%s)", source, node->uname, node->id, proc2text(flag), status); } return node; } /*! * \internal * \brief Update a cluster node cache entry's expected join state * * \param[in] source Caller's function name (for logging) * \param[in,out] node Node to update * \param[in] expected Node's new join state */ void pcmk__update_peer_expected(const char *source, crm_node_t *node, const char *expected) { char *last = NULL; gboolean changed = FALSE; CRM_CHECK(node != NULL, crm_err("%s: Could not set 'expected' to %s", source, expected); return); /* Remote nodes don't participate in joins */ if (pcmk_is_set(node->flags, crm_remote_node)) { return; } last = node->expected; if (expected != NULL && !pcmk__str_eq(node->expected, expected, pcmk__str_casei)) { node->expected = strdup(expected); changed = TRUE; } if (changed) { crm_info("%s: Node %s[%u] - expected state is now %s (was %s)", source, node->uname, node->id, expected, last); free(last); } else { crm_trace("%s: Node %s[%u] - expected state is unchanged (%s)", source, node->uname, node->id, expected); } } /*! * \internal * \brief Update a node's state and membership information * * \param[in] source Caller's function name (for log messages) * \param[in,out] node Node object to update * \param[in] state Node's new state * \param[in] membership Node's new membership ID * \param[in,out] iter If not NULL, pointer to node's peer cache iterator * * \return NULL if any node was reaped, value of node otherwise * * \note If this function returns NULL, the supplied node object was likely * freed and should not be used again. This function may be called from * within a peer cache iteration if the iterator is supplied. */ static crm_node_t * update_peer_state_iter(const char *source, crm_node_t *node, const char *state, uint64_t membership, GHashTableIter *iter) { gboolean is_member; CRM_CHECK(node != NULL, crm_err("Could not set state for unknown host to %s" CRM_XS " source=%s", state, source); return NULL); is_member = pcmk__str_eq(state, CRM_NODE_MEMBER, pcmk__str_casei); if (is_member) { node->when_lost = 0; if (membership) { node->last_seen = membership; } } if (state && !pcmk__str_eq(node->state, state, pcmk__str_casei)) { char *last = node->state; if (is_member) { node->when_member = time(NULL); } else { node->when_member = 0; } node->state = strdup(state); crm_notice("Node %s state is now %s " CRM_XS " nodeid=%u previous=%s source=%s", node->uname, state, node->id, (last? last : "unknown"), source); if (peer_status_callback != NULL) { peer_status_callback(crm_status_nstate, node, last); } free(last); if (crm_autoreap && !is_member && !pcmk_is_set(node->flags, crm_remote_node)) { /* We only autoreap from the peer cache, not the remote peer cache, * because the latter should be managed only by - * crm_remote_peer_cache_refresh(). + * refresh_remote_nodes(). */ if(iter) { crm_notice("Purged 1 peer with " PCMK_XA_ID "=%u and/or uname=%s from the membership cache", node->id, node->uname); g_hash_table_iter_remove(iter); } else { - reap_crm_member(node->id, node->uname); + pcmk__cluster_forget_cluster_node(node->id, node->uname); } node = NULL; } } else { crm_trace("Node %s state is unchanged (%s) " CRM_XS " nodeid=%u source=%s", node->uname, state, node->id, source); } return node; } /*! * \brief Update a node's state and membership information * * \param[in] source Caller's function name (for log messages) * \param[in,out] node Node object to update * \param[in] state Node's new state * \param[in] membership Node's new membership ID * * \return NULL if any node was reaped, value of node otherwise * * \note If this function returns NULL, the supplied node object was likely * freed and should not be used again. This function should not be * called within a cache iteration if reaping is possible, * otherwise reaping could invalidate the iterator. */ crm_node_t * pcmk__update_peer_state(const char *source, crm_node_t *node, const char *state, uint64_t membership) { return update_peer_state_iter(source, node, state, membership, NULL); } /*! * \internal * \brief Reap all nodes from cache whose membership information does not match * * \param[in] membership Membership ID of nodes to keep */ void pcmk__reap_unseen_nodes(uint64_t membership) { GHashTableIter iter; crm_node_t *node = NULL; crm_trace("Reaping unseen nodes..."); g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *)&node)) { if (node->last_seen != membership) { if (node->state) { /* * Calling update_peer_state_iter() allows us to * remove the node from crm_peer_cache without * invalidating our iterator */ update_peer_state_iter(__func__, node, CRM_NODE_LOST, membership, &iter); } else { crm_info("State of node %s[%u] is still unknown", node->uname, node->id); } } } } static crm_node_t * find_known_node(const char *id, const char *uname) { GHashTableIter iter; crm_node_t *node = NULL; crm_node_t *by_id = NULL; crm_node_t *by_name = NULL; if (uname) { g_hash_table_iter_init(&iter, known_node_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { if (node->uname && strcasecmp(node->uname, uname) == 0) { crm_trace("Name match: %s = %p", node->uname, node); by_name = node; break; } } } if (id) { g_hash_table_iter_init(&iter, known_node_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { if(strcasecmp(node->uuid, id) == 0) { crm_trace("ID match: %s= %p", id, node); by_id = node; break; } } } node = by_id; /* Good default */ if (by_id == by_name) { /* Nothing to do if they match (both NULL counts) */ crm_trace("Consistent: %p for %s/%s", by_id, id, uname); } else if (by_id == NULL && by_name) { crm_trace("Only one: %p for %s/%s", by_name, id, uname); if (id) { node = NULL; } else { node = by_name; } } else if (by_name == NULL && by_id) { crm_trace("Only one: %p for %s/%s", by_id, id, uname); if (uname) { node = NULL; } } else if (uname && by_id->uname && pcmk__str_eq(uname, by_id->uname, pcmk__str_casei)) { /* Multiple nodes have the same uname in the CIB. * Return by_id. */ } else if (id && by_name->uuid && pcmk__str_eq(id, by_name->uuid, pcmk__str_casei)) { /* Multiple nodes have the same id in the CIB. * Return by_name. */ node = by_name; } else { node = NULL; } if (node == NULL) { crm_debug("Couldn't find node%s%s%s%s", id? " " : "", id? id : "", uname? " with name " : "", uname? uname : ""); } return node; } static void known_node_cache_refresh_helper(xmlNode *xml_node, void *user_data) { const char *id = crm_element_value(xml_node, PCMK_XA_ID); const char *uname = crm_element_value(xml_node, PCMK_XA_UNAME); crm_node_t * node = NULL; CRM_CHECK(id != NULL && uname !=NULL, return); node = find_known_node(id, uname); if (node == NULL) { char *uniqueid = crm_generate_uuid(); node = pcmk__assert_alloc(1, sizeof(crm_node_t)); node->uname = pcmk__str_copy(uname); node->uuid = pcmk__str_copy(id); g_hash_table_replace(known_node_cache, uniqueid, node); } else if (pcmk_is_set(node->flags, crm_node_dirty)) { pcmk__str_update(&node->uname, uname); /* Node is in cache and hasn't been updated already, so mark it clean */ clear_peer_flags(node, crm_node_dirty); } } static void refresh_known_node_cache(xmlNode *cib) { crm_peer_init(); g_hash_table_foreach(known_node_cache, mark_dirty, NULL); crm_foreach_xpath_result(cib, PCMK__XP_MEMBER_NODE_CONFIG, known_node_cache_refresh_helper, NULL); /* Remove all old cache entries that weren't seen in the CIB */ g_hash_table_foreach_remove(known_node_cache, is_dirty, NULL); } void pcmk__refresh_node_caches_from_cib(xmlNode *cib) { - crm_remote_peer_cache_refresh(cib); + refresh_remote_nodes(cib); refresh_known_node_cache(cib); } // Deprecated functions kept only for backward API compatibility // LCOV_EXCL_START #include int crm_terminate_member(int nodeid, const char *uname, void *unused) { return stonith_api_kick(nodeid, uname, 120, TRUE); } int crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection) { return stonith_api_kick(nodeid, uname, 120, TRUE); } crm_node_t * crm_get_peer(unsigned int id, const char *uname) { return pcmk__get_node(id, uname, NULL, pcmk__node_search_cluster); } crm_node_t * crm_get_peer_full(unsigned int id, const char *uname, int flags) { return pcmk__get_node(id, uname, NULL, flags); } +int +crm_remote_peer_cache_size(void) +{ + unsigned int count = pcmk__cluster_num_remote_nodes(); + + return QB_MIN(count, INT_MAX); +} + +void +crm_remote_peer_cache_refresh(xmlNode *cib) +{ + refresh_remote_nodes(cib); +} + +crm_node_t * +crm_remote_peer_get(const char *node_name) +{ + return pcmk__cluster_lookup_remote_node(node_name); +} + +void +crm_remote_peer_cache_remove(const char *node_name) +{ + pcmk__cluster_forget_remote_node(node_name); +} + +gboolean +crm_is_peer_active(const crm_node_t * node) +{ + return pcmk__cluster_is_node_active(node); +} + +guint +crm_active_peers(void) +{ + return pcmk__cluster_num_active_nodes(); +} + +guint +reap_crm_member(uint32_t id, const char *name) +{ + int matches = 0; + crm_node_t search = { 0, }; + + if (crm_peer_cache == NULL) { + crm_trace("Membership cache not initialized, ignoring purge request"); + return 0; + } + + search.id = id; + search.uname = pcmk__str_copy(name); + matches = g_hash_table_foreach_remove(crm_peer_cache, + should_forget_cluster_node, &search); + if(matches) { + crm_notice("Purged %d peer%s with " PCMK_XA_ID + "=%u%s%s from the membership cache", + matches, pcmk__plural_s(matches), search.id, + (search.uname? " and/or uname=" : ""), + (search.uname? search.uname : "")); + + } else { + crm_info("No peers with " PCMK_XA_ID + "=%u%s%s to purge from the membership cache", + search.id, (search.uname? " and/or uname=" : ""), + (search.uname? search.uname : "")); + } + + free(search.uname); + return matches; +} + // LCOV_EXCL_STOP // End deprecated API