diff --git a/include/crm/cluster.h b/include/crm/cluster.h index bfa827beab..b3e2d8e55a 100644 --- a/include/crm/cluster.h +++ b/include/crm/cluster.h @@ -1,144 +1,143 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PCMK__CRM_CLUSTER__H # define PCMK__CRM_CLUSTER__H # include // uint32_t, uint64_t # include // gboolean, GHashTable # include // xmlNode # include # include #ifdef __cplusplus extern "C" { #endif # if SUPPORT_COROSYNC # include # endif // @COMPAT Make this internal when we can break API backward compatibility //! \deprecated Do not use (public access will be removed in a future release) extern GHashTable *crm_peer_cache; // @COMPAT Make this internal when we can break API backward compatibility //! \deprecated Do not use (public access will be removed in a future release) extern GHashTable *crm_remote_peer_cache; // @COMPAT Make this internal when we can break API backward compatibility //! \deprecated Do not use (public access will be removed in a future release) extern unsigned long long crm_peer_seq; // @COMPAT Make this internal when we can break API backward compatibility //! \deprecated Do not use (public access will be removed in a future release) #define CRM_NODE_LOST "lost" // @COMPAT Make this internal when we can break API backward compatibility //! \deprecated Do not use (public access will be removed in a future release) #define CRM_NODE_MEMBER "member" // @COMPAT Make this internal when we can break API backward compatibility //!@{ //! \deprecated Do not use (public access will be removed in a future release) enum crm_join_phase { /* @COMPAT: crm_join_nack_quiet can be replaced by * pcmk__node_status_t:user_data at a compatibility break */ //! Not allowed to join, but don't send a nack message crm_join_nack_quiet = -2, crm_join_nack = -1, crm_join_none = 0, crm_join_welcomed = 1, crm_join_integrated = 2, crm_join_finalized = 3, crm_join_confirmed = 4, }; //!@} //! \internal Do not use typedef struct pcmk__cluster_private pcmk__cluster_private_t; // Implementation of pcmk_cluster_t // @COMPAT Make contents internal when we can break API backward compatibility //!@{ //! \deprecated Do not use (public access will be removed in a future release) struct pcmk__cluster { /* @COMPAT Once all members are moved to pcmk__cluster_private_t, we can * make that the pcmk_cluster_t implementation and drop this struct * altogether, leaving pcmk_cluster_t as an opaque public type. */ //! \internal Do not use pcmk__cluster_private_t *priv; - char *uuid; char *uname; uint32_t nodeid; // NOTE: sbd (as of at least 1.5.2) uses this //! \deprecated Call pcmk_cluster_set_destroy_fn() to set this void (*destroy) (gpointer); # if SUPPORT_COROSYNC /* @TODO When we can break public API compatibility, make these members a * separate struct and use void *cluster_data here instead, to abstract the * cluster layer further. */ struct cpg_name group; // NOTE: sbd (as of at least 1.5.2) uses this /*! * \deprecated Call pcmk_cpg_set_deliver_fn() and pcmk_cpg_set_confchg_fn() * to set these */ cpg_callbacks_t cpg; cpg_handle_t cpg_handle; # endif }; //!@} //! Connection to a cluster layer typedef struct pcmk__cluster pcmk_cluster_t; int pcmk_cluster_connect(pcmk_cluster_t *cluster); int pcmk_cluster_disconnect(pcmk_cluster_t *cluster); pcmk_cluster_t *pcmk_cluster_new(void); void pcmk_cluster_free(pcmk_cluster_t *cluster); int pcmk_cluster_set_destroy_fn(pcmk_cluster_t *cluster, void (*fn)(gpointer)); #if SUPPORT_COROSYNC int pcmk_cpg_set_deliver_fn(pcmk_cluster_t *cluster, cpg_deliver_fn_t fn); int pcmk_cpg_set_confchg_fn(pcmk_cluster_t *cluster, cpg_confchg_fn_t fn); #endif // SUPPORT_COROSYNC /*! * \enum pcmk_cluster_layer * \brief Types of cluster layer */ enum pcmk_cluster_layer { pcmk_cluster_layer_unknown = 1, //!< Unknown cluster layer pcmk_cluster_layer_invalid = 2, //!< Invalid cluster layer pcmk_cluster_layer_corosync = 32, //!< Corosync Cluster Engine }; enum pcmk_cluster_layer pcmk_get_cluster_layer(void); const char *pcmk_cluster_layer_text(enum pcmk_cluster_layer layer); #ifdef __cplusplus } #endif #if !defined(PCMK_ALLOW_DEPRECATED) || (PCMK_ALLOW_DEPRECATED == 1) #include #endif #endif diff --git a/lib/cluster/cluster.c b/lib/cluster/cluster.c index f2d28a6cdc..21246be462 100644 --- a/lib/cluster/cluster.c +++ b/lib/cluster/cluster.c @@ -1,489 +1,488 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include // PRIu32 #include #include #include #include #include #include #include #include #include // uname() #include // gboolean #include #include #include #include #include "crmcluster_private.h" CRM_TRACE_INIT_DATA(cluster); /*! * \internal * \brief Get the message type equivalent of a string * * \param[in] text String of message type * * \return Message type equivalent of \p text */ enum pcmk__cluster_msg pcmk__cluster_parse_msg_type(const char *text) { CRM_CHECK(text != NULL, return pcmk__cluster_msg_unknown); text = pcmk__message_name(text); if (pcmk__str_eq(text, "attrd", pcmk__str_none)) { return pcmk__cluster_msg_attrd; } else if (pcmk__str_eq(text, CRM_SYSTEM_CIB, pcmk__str_none)) { return pcmk__cluster_msg_based; } else if (pcmk__str_any_of(text, CRM_SYSTEM_CRMD, CRM_SYSTEM_DC, NULL)) { return pcmk__cluster_msg_controld; } else if (pcmk__str_eq(text, CRM_SYSTEM_LRMD, pcmk__str_none)) { return pcmk__cluster_msg_execd; } else if (pcmk__str_eq(text, "stonith-ng", pcmk__str_none)) { return pcmk__cluster_msg_fenced; } else { return pcmk__cluster_msg_unknown; } } /*! * \internal * \brief Get a node's cluster-layer UUID, setting it if not already set * * \param[in,out] node Node to check * * \return Cluster-layer node UUID of \p node, or \c NULL if unknown */ const char * pcmk__cluster_node_uuid(pcmk__node_status_t *node) { const enum pcmk_cluster_layer cluster_layer = pcmk_get_cluster_layer(); if (node == NULL) { return NULL; } if (node->xml_id != NULL) { return node->xml_id; } switch (cluster_layer) { #if SUPPORT_COROSYNC case pcmk_cluster_layer_corosync: node->xml_id = pcmk__corosync_uuid(node); return node->xml_id; #endif // SUPPORT_COROSYNC default: crm_err("Unsupported cluster layer %s", pcmk_cluster_layer_text(cluster_layer)); return NULL; } } /*! * \internal * \brief Connect to the cluster layer * * \param[in,out] cluster Initialized cluster object to connect * * \return Standard Pacemaker return code */ int pcmk_cluster_connect(pcmk_cluster_t *cluster) { const enum pcmk_cluster_layer cluster_layer = pcmk_get_cluster_layer(); const char *cluster_layer_s = pcmk_cluster_layer_text(cluster_layer); // cts-lab looks for this message crm_notice("Connecting to %s cluster layer", cluster_layer_s); switch (cluster_layer) { #if SUPPORT_COROSYNC case pcmk_cluster_layer_corosync: return pcmk__corosync_connect(cluster); #endif // SUPPORT_COROSYNC default: break; } crm_err("Failed to connect to unsupported cluster layer %s", cluster_layer_s); return EPROTONOSUPPORT; } /*! * \brief Disconnect from the cluster layer * * \param[in,out] cluster Cluster object to disconnect * * \return Standard Pacemaker return code */ int pcmk_cluster_disconnect(pcmk_cluster_t *cluster) { const enum pcmk_cluster_layer cluster_layer = pcmk_get_cluster_layer(); const char *cluster_layer_s = pcmk_cluster_layer_text(cluster_layer); crm_info("Disconnecting from %s cluster layer", cluster_layer_s); switch (cluster_layer) { #if SUPPORT_COROSYNC case pcmk_cluster_layer_corosync: pcmk__corosync_disconnect(cluster); pcmk__cluster_destroy_node_caches(); return pcmk_rc_ok; #endif // SUPPORT_COROSYNC default: break; } crm_err("Failed to disconnect from unsupported cluster layer %s", cluster_layer_s); return EPROTONOSUPPORT; } /*! * \brief Allocate a new \p pcmk_cluster_t object * * \return A newly allocated \p pcmk_cluster_t object (guaranteed not \c NULL) * \note The caller is responsible for freeing the return value using * \p pcmk_cluster_free(). */ pcmk_cluster_t * pcmk_cluster_new(void) { pcmk_cluster_t *cluster = pcmk__assert_alloc(1, sizeof(pcmk_cluster_t)); cluster->priv = pcmk__assert_alloc(1, sizeof(pcmk__cluster_private_t)); return cluster; } /*! * \brief Free a \p pcmk_cluster_t object and its dynamically allocated members * * \param[in,out] cluster Cluster object to free */ void pcmk_cluster_free(pcmk_cluster_t *cluster) { if (cluster == NULL) { return; } - free(cluster->uuid); free(cluster->uname); free(cluster->priv); free(cluster); } /*! * \brief Set the destroy function for a cluster object * * \param[in,out] cluster Cluster object * \param[in] fn Destroy function to set * * \return Standard Pacemaker return code */ int pcmk_cluster_set_destroy_fn(pcmk_cluster_t *cluster, void (*fn)(gpointer)) { if (cluster == NULL) { return EINVAL; } cluster->destroy = fn; return pcmk_rc_ok; } /*! * \internal * \brief Send an XML message via the cluster messaging layer * * \param[in] node Cluster node to send message to * \param[in] service Message type to use in message host info * \param[in] data XML message to send * * \return \c true on success, or \c false otherwise */ bool pcmk__cluster_send_message(const pcmk__node_status_t *node, enum pcmk__cluster_msg service, const xmlNode *data) { // @TODO Return standard Pacemaker return code switch (pcmk_get_cluster_layer()) { #if SUPPORT_COROSYNC case pcmk_cluster_layer_corosync: return pcmk__cpg_send_xml(data, node, service); #endif // SUPPORT_COROSYNC default: break; } return false; } /*! * \internal * \brief Get the node name corresponding to a cluster-layer node ID * * Get the node name from the cluster layer if possible. Otherwise, if for the * local node, call \c uname() and get the \c nodename member from the * struct utsname object. * * \param[in] nodeid Node ID to check (or 0 for the local node) * * \return Node name corresponding to \p nodeid * * \note This will fatally exit if \c uname() fails to get the local node name * or we run out of memory. * \note The caller is responsible for freeing the return value using \c free(). */ char * pcmk__cluster_node_name(uint32_t nodeid) { const enum pcmk_cluster_layer cluster_layer = pcmk_get_cluster_layer(); const char *cluster_layer_s = pcmk_cluster_layer_text(cluster_layer); switch (cluster_layer) { #if SUPPORT_COROSYNC case pcmk_cluster_layer_corosync: return pcmk__corosync_name(0, nodeid); #else break; #endif // SUPPORT_COROSYNC default: crm_err("Unsupported cluster layer: %s", cluster_layer_s); break; } if (nodeid == 0) { struct utsname hostinfo; crm_notice("Could not get local node name from %s cluster layer, " "defaulting to local hostname", cluster_layer_s); if (uname(&hostinfo) < 0) { // @TODO Maybe let the caller decide what to do crm_err("Failed to get the local hostname"); crm_exit(CRM_EX_FATAL); } return pcmk__str_copy(hostinfo.nodename); } crm_notice("Could not obtain a node name for node with " PCMK_XA_ID "=" PRIu32, nodeid); return NULL; } /*! * \internal * \brief Get the local node's cluster-layer node name * * If getting the node name from the cluster layer is impossible, call * \c uname() and get the \c nodename member from the struct utsname * object. * * \return Local node's name * * \note This will fatally exit if \c uname() fails to get the local node name * or we run out of memory. */ const char * pcmk__cluster_local_node_name(void) { // @TODO Refactor to avoid trivially leaking name at exit static char *name = NULL; if (name == NULL) { name = pcmk__cluster_node_name(0); } return name; } /*! * \internal * \brief Get the node name corresonding to a node UUID * * Look for the UUID in both the remote node cache and the cluster member cache. * * \param[in] uuid UUID to search for * * \return Node name corresponding to \p uuid if found, or \c NULL otherwise */ const char * pcmk__node_name_from_uuid(const char *uuid) { /* @TODO There are too many functions in libcrmcluster that look up a node * from the node caches (possibly creating a cache entry if none exists). * There are at least the following: * * pcmk__cluster_lookup_remote_node() * * pcmk__get_node() * * pcmk__node_name_from_uuid() * * pcmk__search_node_caches() * * There's a lot of duplication among them, but they all do slightly * different things. We should try to clean them up and consolidate them to * the extent possible, likely with new helper functions. */ GHashTableIter iter; pcmk__node_status_t *node = NULL; CRM_CHECK(uuid != NULL, return NULL); // Remote nodes have the same uname and uuid if (g_hash_table_lookup(crm_remote_peer_cache, uuid)) { return uuid; } g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { if (pcmk__str_eq(node->xml_id, uuid, pcmk__str_casei)) { return node->name; } } return NULL; } /*! * \brief Get a log-friendly string equivalent of a cluster layer * * \param[in] layer Cluster layer * * \return Log-friendly string corresponding to \p layer */ const char * pcmk_cluster_layer_text(enum pcmk_cluster_layer layer) { switch (layer) { case pcmk_cluster_layer_corosync: return "corosync"; case pcmk_cluster_layer_unknown: return "unknown"; case pcmk_cluster_layer_invalid: return "invalid"; default: crm_err("Invalid cluster layer: %d", layer); return "invalid"; } } /*! * \brief Get and validate the local cluster layer * * If a cluster layer is not configured via the \c PCMK__ENV_CLUSTER_TYPE local * option, this will try to detect an active cluster from among the supported * cluster layers. * * \return Local cluster layer * * \note This will fatally exit if the configured cluster layer is invalid. */ enum pcmk_cluster_layer pcmk_get_cluster_layer(void) { static enum pcmk_cluster_layer cluster_layer = pcmk_cluster_layer_unknown; const char *cluster = NULL; // Cluster layer is stable once set if (cluster_layer != pcmk_cluster_layer_unknown) { return cluster_layer; } cluster = pcmk__env_option(PCMK__ENV_CLUSTER_TYPE); if (cluster != NULL) { crm_info("Verifying configured cluster layer '%s'", cluster); cluster_layer = pcmk_cluster_layer_invalid; #if SUPPORT_COROSYNC if (pcmk__str_eq(cluster, PCMK_VALUE_COROSYNC, pcmk__str_casei)) { cluster_layer = pcmk_cluster_layer_corosync; } #endif // SUPPORT_COROSYNC if (cluster_layer == pcmk_cluster_layer_invalid) { crm_notice("This installation does not support the '%s' cluster " "infrastructure: terminating", cluster); crm_exit(CRM_EX_FATAL); } crm_info("Assuming an active '%s' cluster", cluster); } else { // Nothing configured, so test supported cluster layers #if SUPPORT_COROSYNC crm_debug("Testing with Corosync"); if (pcmk__corosync_is_active()) { cluster_layer = pcmk_cluster_layer_corosync; } #endif // SUPPORT_COROSYNC if (cluster_layer == pcmk_cluster_layer_unknown) { crm_notice("Could not determine the current cluster layer"); } else { crm_info("Detected an active '%s' cluster", pcmk_cluster_layer_text(cluster_layer)); } } return cluster_layer; } // Deprecated functions kept only for backward API compatibility // LCOV_EXCL_START #include gboolean crm_cluster_connect(pcmk_cluster_t *cluster) { return pcmk_cluster_connect(cluster) == pcmk_rc_ok; } const char * name_for_cluster_type(enum cluster_type_e type) { switch (type) { case pcmk_cluster_corosync: return "corosync"; case pcmk_cluster_unknown: return "unknown"; case pcmk_cluster_invalid: return "invalid"; } crm_err("Invalid cluster type: %d", type); return "invalid"; } enum cluster_type_e get_cluster_type(void) { return (enum cluster_type_e) pcmk_get_cluster_layer(); } // LCOV_EXCL_STOP // End deprecated API diff --git a/lib/cluster/corosync.c b/lib/cluster/corosync.c index c999f73fd6..3505efef1a 100644 --- a/lib/cluster/corosync.c +++ b/lib/cluster/corosync.c @@ -1,818 +1,816 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include // PRIu64, etc. #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include // PCMK__SPECIAL_PID #include #include #include "crmcluster_private.h" static quorum_handle_t pcmk_quorum_handle = 0; static gboolean (*quorum_app_callback)(unsigned long long seq, gboolean quorate) = NULL; /*! * \internal * \brief Get the Corosync UUID associated with a Pacemaker node * * \param[in] node Pacemaker node * * \return Newly allocated string with node's Corosync UUID, or NULL if unknown * \note It is the caller's responsibility to free the result with free(). */ char * pcmk__corosync_uuid(const pcmk__node_status_t *node) { CRM_ASSERT(pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync); if (node != NULL) { if (node->cluster_layer_id > 0) { return crm_strdup_printf("%" PRIu32, node->cluster_layer_id); } else { crm_info("Node %s is not yet known by Corosync", node->name); } } return NULL; } static bool node_name_is_valid(const char *key, const char *name) { int octet; if (name == NULL) { crm_trace("%s is empty", key); return false; } else if (sscanf(name, "%d.%d.%d.%d", &octet, &octet, &octet, &octet) == 4) { crm_trace("%s contains an IPv4 address (%s), ignoring", key, name); return false; } else if (strstr(name, ":") != NULL) { crm_trace("%s contains an IPv6 address (%s), ignoring", key, name); return false; } crm_trace("'%s: %s' is valid", key, name); return true; } /* * \internal * \brief Get Corosync node name corresponding to a node ID * * \param[in] cmap_handle Connection to Corosync CMAP * \param[in] nodeid Node ID to check * * \return Newly allocated string with name or (if no name) IP address * associated with first address assigned to a Corosync node ID (or NULL * if unknown) * \note It is the caller's responsibility to free the result with free(). */ char * pcmk__corosync_name(uint64_t /*cmap_handle_t */ cmap_handle, uint32_t nodeid) { // Originally based on corosync-quorumtool.c:node_name() int lpc = 0; cs_error_t rc = CS_OK; int retries = 0; char *name = NULL; cmap_handle_t local_handle = 0; int fd = -1; uid_t found_uid = 0; gid_t found_gid = 0; pid_t found_pid = 0; int rv; if (nodeid == 0) { nodeid = pcmk__cpg_local_nodeid(0); } if (cmap_handle == 0 && local_handle == 0) { retries = 0; crm_trace("Initializing CMAP connection"); do { rc = pcmk__init_cmap(&local_handle); if (rc != CS_OK) { retries++; crm_debug("API connection setup failed: %s. Retrying in %ds", cs_strerror(rc), retries); sleep(retries); } } while (retries < 5 && rc != CS_OK); if (rc != CS_OK) { crm_warn("Could not connect to Cluster Configuration Database API, error %s", cs_strerror(rc)); local_handle = 0; } } if (cmap_handle == 0) { cmap_handle = local_handle; rc = cmap_fd_get(cmap_handle, &fd); if (rc != CS_OK) { crm_err("Could not obtain the CMAP API connection: %s (%d)", cs_strerror(rc), rc); goto bail; } /* CMAP provider run as root (in given user namespace, anyway)? */ if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid, &found_uid, &found_gid))) { crm_err("CMAP provider is not authentic:" " process %lld (uid: %lld, gid: %lld)", (long long) PCMK__SPECIAL_PID_AS_0(found_pid), (long long) found_uid, (long long) found_gid); goto bail; } else if (rv < 0) { crm_err("Could not verify authenticity of CMAP provider: %s (%d)", strerror(-rv), -rv); goto bail; } } while (name == NULL && cmap_handle != 0) { uint32_t id = 0; char *key = NULL; key = crm_strdup_printf("nodelist.node.%d.nodeid", lpc); rc = cmap_get_uint32(cmap_handle, key, &id); crm_trace("Checking %u vs %u from %s", nodeid, id, key); free(key); if (rc != CS_OK) { break; } if (nodeid == id) { crm_trace("Searching for node name for %u in nodelist.node.%d %s", nodeid, lpc, pcmk__s(name, "")); if (name == NULL) { key = crm_strdup_printf("nodelist.node.%d.name", lpc); cmap_get_string(cmap_handle, key, &name); crm_trace("%s = %s", key, pcmk__s(name, "")); free(key); } if (name == NULL) { key = crm_strdup_printf("nodelist.node.%d.ring0_addr", lpc); cmap_get_string(cmap_handle, key, &name); crm_trace("%s = %s", key, pcmk__s(name, "")); if (!node_name_is_valid(key, name)) { free(name); name = NULL; } free(key); } break; } lpc++; } bail: if(local_handle) { cmap_finalize(local_handle); } if (name == NULL) { crm_info("Unable to get node name for nodeid %u", nodeid); } return name; } /*! * \internal * \brief Disconnect from Corosync cluster * * \param[in,out] cluster Cluster object to disconnect */ void pcmk__corosync_disconnect(pcmk_cluster_t *cluster) { pcmk__cpg_disconnect(cluster); if (pcmk_quorum_handle != 0) { quorum_finalize(pcmk_quorum_handle); pcmk_quorum_handle = 0; } crm_notice("Disconnected from Corosync"); } /*! * \internal * \brief Dispatch function for quorum connection file descriptor * * \param[in] user_data Ignored * * \return 0 on success, -1 on error (per mainloop_io_t interface) */ static int quorum_dispatch_cb(gpointer user_data) { int rc = quorum_dispatch(pcmk_quorum_handle, CS_DISPATCH_ALL); if (rc < 0) { crm_err("Connection to the Quorum API failed: %d", rc); quorum_finalize(pcmk_quorum_handle); pcmk_quorum_handle = 0; return -1; } return 0; } /*! * \internal * \brief Notification callback for Corosync quorum connection * * \param[in] handle Corosync quorum connection * \param[in] quorate Whether cluster is quorate * \param[in] ring_id Corosync ring ID * \param[in] view_list_entries Number of entries in \p view_list * \param[in] view_list Corosync node IDs in membership */ static void quorum_notification_cb(quorum_handle_t handle, uint32_t quorate, uint64_t ring_id, uint32_t view_list_entries, uint32_t *view_list) { int i; GHashTableIter iter; pcmk__node_status_t *node = NULL; static gboolean init_phase = TRUE; bool is_quorate = (quorate != 0); bool was_quorate = pcmk__cluster_has_quorum(); if (is_quorate && !was_quorate) { crm_notice("Quorum acquired " QB_XS " membership=%" PRIu64 " members=%" PRIu32, ring_id, view_list_entries); pcmk__cluster_set_quorum(true); } else if (!is_quorate && was_quorate) { crm_warn("Quorum lost " QB_XS " membership=%" PRIu64 " members=" PRIu32, ring_id, view_list_entries); pcmk__cluster_set_quorum(false); } else { crm_info("Quorum %s " QB_XS " membership=%" PRIu64 " members=%" PRIu32, (is_quorate? "retained" : "still lost"), ring_id, view_list_entries); } if (view_list_entries == 0 && init_phase) { crm_info("Corosync membership is still forming, ignoring"); return; } init_phase = FALSE; /* Reset membership_id for all cached nodes so we can tell which ones aren't * in the view list */ g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { node->membership_id = 0; } /* Update the peer cache for each node in view list */ for (i = 0; i < view_list_entries; i++) { uint32_t id = view_list[i]; crm_debug("Member[%d] %u ", i, id); /* Get this node's peer cache entry (adding one if not already there) */ node = pcmk__get_node(id, NULL, NULL, pcmk__node_search_cluster_member); if (node->name == NULL) { char *name = pcmk__corosync_name(0, id); crm_info("Obtaining name for new node %u", id); node = pcmk__get_node(id, name, NULL, pcmk__node_search_cluster_member); free(name); } // Update the node state (including updating membership_id to ring_id) pcmk__update_peer_state(__func__, node, CRM_NODE_MEMBER, ring_id); } /* Remove any peer cache entries we didn't update */ pcmk__reap_unseen_nodes(ring_id); if (quorum_app_callback) { quorum_app_callback(ring_id, is_quorate); } } /*! * \internal * \brief Connect to Corosync quorum service * * \param[in] dispatch Connection dispatch callback * \param[in] destroy Connection destroy callback */ void pcmk__corosync_quorum_connect(gboolean (*dispatch)(unsigned long long, gboolean), void (*destroy)(gpointer)) { cs_error_t rc; int fd = 0; int quorate = 0; uint32_t quorum_type = 0; struct mainloop_fd_callbacks quorum_fd_callbacks; uid_t found_uid = 0; gid_t found_gid = 0; pid_t found_pid = 0; int rv; quorum_fd_callbacks.dispatch = quorum_dispatch_cb; quorum_fd_callbacks.destroy = destroy; crm_debug("Configuring Pacemaker to obtain quorum from Corosync"); { #if 0 // New way but not supported by all Corosync 2 versions quorum_model_v0_data_t quorum_model_data = { .model = QUORUM_MODEL_V0, .quorum_notify_fn = quorum_notification_cb, }; rc = quorum_model_initialize(&pcmk_quorum_handle, QUORUM_MODEL_V0, (quorum_model_data_t *) &quorum_model_data, &quorum_type, NULL); #else quorum_callbacks_t quorum_callbacks = { .quorum_notify_fn = quorum_notification_cb, }; rc = quorum_initialize(&pcmk_quorum_handle, &quorum_callbacks, &quorum_type); #endif } if (rc != CS_OK) { crm_err("Could not connect to the Quorum API: %s (%d)", cs_strerror(rc), rc); goto bail; } else if (quorum_type != QUORUM_SET) { crm_err("Corosync quorum is not configured"); goto bail; } rc = quorum_fd_get(pcmk_quorum_handle, &fd); if (rc != CS_OK) { crm_err("Could not obtain the Quorum API connection: %s (%d)", strerror(rc), rc); goto bail; } /* Quorum provider run as root (in given user namespace, anyway)? */ if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid, &found_uid, &found_gid))) { crm_err("Quorum provider is not authentic:" " process %lld (uid: %lld, gid: %lld)", (long long) PCMK__SPECIAL_PID_AS_0(found_pid), (long long) found_uid, (long long) found_gid); rc = CS_ERR_ACCESS; goto bail; } else if (rv < 0) { crm_err("Could not verify authenticity of Quorum provider: %s (%d)", strerror(-rv), -rv); rc = CS_ERR_ACCESS; goto bail; } rc = quorum_getquorate(pcmk_quorum_handle, &quorate); if (rc != CS_OK) { crm_err("Could not obtain the current Quorum API state: %d", rc); goto bail; } if (quorate) { crm_notice("Quorum acquired"); } else { crm_warn("No quorum"); } quorum_app_callback = dispatch; pcmk__cluster_set_quorum(quorate != 0); rc = quorum_trackstart(pcmk_quorum_handle, CS_TRACK_CHANGES | CS_TRACK_CURRENT); if (rc != CS_OK) { crm_err("Could not setup Quorum API notifications: %d", rc); goto bail; } mainloop_add_fd("quorum", G_PRIORITY_HIGH, fd, dispatch, &quorum_fd_callbacks); pcmk__corosync_add_nodes(NULL); bail: if (rc != CS_OK) { quorum_finalize(pcmk_quorum_handle); } } /*! * \internal * \brief Connect to Corosync cluster layer * * \param[in,out] cluster Initialized cluster object to connect * * \return Standard Pacemaker return code */ int pcmk__corosync_connect(pcmk_cluster_t *cluster) { - pcmk__node_status_t *peer = NULL; const enum pcmk_cluster_layer cluster_layer = pcmk_get_cluster_layer(); const char *cluster_layer_s = pcmk_cluster_layer_text(cluster_layer); int rc = pcmk_rc_ok; pcmk__cluster_init_node_caches(); if (cluster_layer != pcmk_cluster_layer_corosync) { crm_err("Invalid cluster layer: %s " QB_XS " cluster_layer=%d", cluster_layer_s, cluster_layer); return EINVAL; } rc = pcmk__cpg_connect(cluster); if (rc != pcmk_rc_ok) { // Error message was logged by pcmk__cpg_connect() return rc; } crm_info("Connection to %s established", cluster_layer_s); cluster->nodeid = pcmk__cpg_local_nodeid(0); if (cluster->nodeid == 0) { crm_err("Could not determine local node ID"); return ENXIO; } cluster->uname = pcmk__cluster_node_name(0); if (cluster->uname == NULL) { crm_err("Could not determine local node name"); return ENXIO; } // Ensure local node always exists in peer cache - peer = pcmk__get_node(cluster->nodeid, cluster->uname, NULL, - pcmk__node_search_cluster_member); - cluster->uuid = pcmk__corosync_uuid(peer); + pcmk__get_node(cluster->nodeid, cluster->uname, NULL, + pcmk__node_search_cluster_member); return pcmk_rc_ok; } /*! * \internal * \brief Check whether a Corosync cluster is active * * \return \c true if Corosync is found active, or \c false otherwise */ bool pcmk__corosync_is_active(void) { cmap_handle_t handle; int rc = pcmk__init_cmap(&handle); if (rc == CS_OK) { cmap_finalize(handle); return true; } crm_info("Failed to initialize the cmap API: %s (%d)", pcmk__cs_err_str(rc), rc); return false; } /*! * \internal * \brief Check whether a Corosync cluster peer is active * * \param[in] node Node to check * * \return \c true if \p node is an active Corosync peer, or \c false otherwise */ bool pcmk__corosync_is_peer_active(const pcmk__node_status_t *node) { if (node == NULL) { crm_trace("Corosync peer inactive: NULL"); return false; } if (!pcmk__str_eq(node->state, CRM_NODE_MEMBER, pcmk__str_none)) { crm_trace("Corosync peer %s inactive: state=%s", node->name, node->state); return false; } if (!pcmk_is_set(node->processes, crm_proc_cpg)) { crm_trace("Corosync peer %s inactive " QB_XS " processes=%.16" PRIx32, node->name, node->processes); return false; } return true; } /*! * \internal * \brief Load Corosync node list (via CMAP) into peer cache and optionally XML * * \param[in,out] xml_parent If not NULL, add entry here for each node * * \return true if any nodes were found, false otherwise */ bool pcmk__corosync_add_nodes(xmlNode *xml_parent) { int lpc = 0; cs_error_t rc = CS_OK; int retries = 0; bool any = false; cmap_handle_t cmap_handle; int fd = -1; uid_t found_uid = 0; gid_t found_gid = 0; pid_t found_pid = 0; int rv; do { rc = pcmk__init_cmap(&cmap_handle); if (rc != CS_OK) { retries++; crm_debug("API connection setup failed: %s. Retrying in %ds", cs_strerror(rc), retries); sleep(retries); } } while (retries < 5 && rc != CS_OK); if (rc != CS_OK) { crm_warn("Could not connect to Cluster Configuration Database API, error %d", rc); return false; } rc = cmap_fd_get(cmap_handle, &fd); if (rc != CS_OK) { crm_err("Could not obtain the CMAP API connection: %s (%d)", cs_strerror(rc), rc); goto bail; } /* CMAP provider run as root (in given user namespace, anyway)? */ if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid, &found_uid, &found_gid))) { crm_err("CMAP provider is not authentic:" " process %lld (uid: %lld, gid: %lld)", (long long) PCMK__SPECIAL_PID_AS_0(found_pid), (long long) found_uid, (long long) found_gid); goto bail; } else if (rv < 0) { crm_err("Could not verify authenticity of CMAP provider: %s (%d)", strerror(-rv), -rv); goto bail; } pcmk__cluster_init_node_caches(); crm_trace("Initializing Corosync node list"); for (lpc = 0; TRUE; lpc++) { uint32_t nodeid = 0; char *name = NULL; char *key = NULL; key = crm_strdup_printf("nodelist.node.%d.nodeid", lpc); rc = cmap_get_uint32(cmap_handle, key, &nodeid); free(key); if (rc != CS_OK) { break; } name = pcmk__corosync_name(cmap_handle, nodeid); if (name != NULL) { GHashTableIter iter; pcmk__node_status_t *node = NULL; g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { if ((node != NULL) && (node->cluster_layer_id > 0) && (node->cluster_layer_id != nodeid) && pcmk__str_eq(node->name, name, pcmk__str_casei)) { crm_crit("Nodes %" PRIu32 " and %" PRIu32 " share the " "same name '%s': shutting down", node->cluster_layer_id, nodeid, name); crm_exit(CRM_EX_FATAL); } } } if (nodeid > 0 || name != NULL) { crm_trace("Initializing node[%d] %u = %s", lpc, nodeid, name); pcmk__get_node(nodeid, name, NULL, pcmk__node_search_cluster_member); } if (nodeid > 0 && name != NULL) { any = true; if (xml_parent) { xmlNode *node = pcmk__xe_create(xml_parent, PCMK_XE_NODE); pcmk__xe_set_id(node, "%u", nodeid); crm_xml_add(node, PCMK_XA_UNAME, name); } } free(name); } bail: cmap_finalize(cmap_handle); return any; } /*! * \internal * \brief Get cluster name from Corosync configuration (via CMAP) * * \return Newly allocated string with cluster name if configured, or NULL */ char * pcmk__corosync_cluster_name(void) { cmap_handle_t handle; char *cluster_name = NULL; cs_error_t rc = CS_OK; int fd = -1; uid_t found_uid = 0; gid_t found_gid = 0; pid_t found_pid = 0; int rv; rc = pcmk__init_cmap(&handle); if (rc != CS_OK) { crm_info("Failed to initialize the cmap API: %s (%d)", cs_strerror(rc), rc); return NULL; } rc = cmap_fd_get(handle, &fd); if (rc != CS_OK) { crm_err("Could not obtain the CMAP API connection: %s (%d)", cs_strerror(rc), rc); goto bail; } /* CMAP provider run as root (in given user namespace, anyway)? */ if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid, &found_uid, &found_gid))) { crm_err("CMAP provider is not authentic:" " process %lld (uid: %lld, gid: %lld)", (long long) PCMK__SPECIAL_PID_AS_0(found_pid), (long long) found_uid, (long long) found_gid); goto bail; } else if (rv < 0) { crm_err("Could not verify authenticity of CMAP provider: %s (%d)", strerror(-rv), -rv); goto bail; } rc = cmap_get_string(handle, "totem.cluster_name", &cluster_name); if (rc != CS_OK) { crm_info("Cannot get totem.cluster_name: %s (%d)", cs_strerror(rc), rc); } else { crm_debug("cmap totem.cluster_name = '%s'", cluster_name); } bail: cmap_finalize(handle); return cluster_name; } /*! * \internal * \brief Check (via CMAP) whether Corosync configuration has a node list * * \return true if Corosync has node list, otherwise false */ bool pcmk__corosync_has_nodelist(void) { cs_error_t cs_rc = CS_OK; int retries = 0; cmap_handle_t cmap_handle; cmap_iter_handle_t iter_handle; char key_name[CMAP_KEYNAME_MAXLEN + 1]; int fd = -1; uid_t found_uid = 0; gid_t found_gid = 0; pid_t found_pid = 0; int rc = pcmk_ok; static bool got_result = false; static bool result = false; if (got_result) { return result; } // Connect to CMAP do { cs_rc = pcmk__init_cmap(&cmap_handle); if (cs_rc != CS_OK) { retries++; crm_debug("CMAP connection failed: %s (rc=%d, retrying in %ds)", cs_strerror(cs_rc), cs_rc, retries); sleep(retries); } } while ((retries < 5) && (cs_rc != CS_OK)); if (cs_rc != CS_OK) { crm_warn("Assuming Corosync does not have node list: " "CMAP connection failed (%s) " QB_XS " rc=%d", cs_strerror(cs_rc), cs_rc); return false; } // Get CMAP connection file descriptor cs_rc = cmap_fd_get(cmap_handle, &fd); if (cs_rc != CS_OK) { crm_warn("Assuming Corosync does not have node list: " "CMAP unusable (%s) " QB_XS " rc=%d", cs_strerror(cs_rc), cs_rc); goto bail; } // Check whether CMAP connection is authentic (i.e. provided by root) rc = crm_ipc_is_authentic_process(fd, (uid_t) 0, (gid_t) 0, &found_pid, &found_uid, &found_gid); if (rc == 0) { crm_warn("Assuming Corosync does not have node list: " "CMAP provider is inauthentic " QB_XS " pid=%lld uid=%lld gid=%lld", (long long) PCMK__SPECIAL_PID_AS_0(found_pid), (long long) found_uid, (long long) found_gid); goto bail; } else if (rc < 0) { crm_warn("Assuming Corosync does not have node list: " "Could not verify CMAP authenticity (%s) " QB_XS " rc=%d", pcmk_strerror(rc), rc); goto bail; } // Check whether nodelist section is presetn cs_rc = cmap_iter_init(cmap_handle, "nodelist", &iter_handle); if (cs_rc != CS_OK) { crm_warn("Assuming Corosync does not have node list: " "CMAP not readable (%s) " QB_XS " rc=%d", cs_strerror(cs_rc), cs_rc); goto bail; } cs_rc = cmap_iter_next(cmap_handle, iter_handle, key_name, NULL, NULL); if (cs_rc == CS_OK) { result = true; } cmap_iter_finalize(cmap_handle, iter_handle); got_result = true; crm_debug("Corosync %s node list", (result? "has" : "does not have")); bail: cmap_finalize(cmap_handle); return result; }