diff --git a/daemons/attrd/attrd_ipc.c b/daemons/attrd/attrd_ipc.c index 9b74944f60..f8307494c5 100644 --- a/daemons/attrd/attrd_ipc.c +++ b/daemons/attrd/attrd_ipc.c @@ -1,628 +1,628 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "pacemaker-attrd.h" static qb_ipcs_service_t *ipcs = NULL; /*! * \internal * \brief Build the XML reply to a client query * * \param[in] attr Name of requested attribute * \param[in] host Name of requested host (or NULL for all hosts) * * \return New XML reply * \note Caller is responsible for freeing the resulting XML */ static xmlNode *build_query_reply(const char *attr, const char *host) { xmlNode *reply = pcmk__xe_create(NULL, __func__); attribute_t *a; crm_xml_add(reply, PCMK__XA_T, PCMK__VALUE_ATTRD); crm_xml_add(reply, PCMK__XA_SUBT, PCMK__ATTRD_CMD_QUERY); crm_xml_add(reply, PCMK__XA_ATTR_VERSION, ATTRD_PROTOCOL_VERSION); /* If desired attribute exists, add its value(s) to the reply */ a = g_hash_table_lookup(attributes, attr); if (a) { attribute_value_t *v; xmlNode *host_value; crm_xml_add(reply, PCMK__XA_ATTR_NAME, attr); /* Allow caller to use "localhost" to refer to local node */ if (pcmk__str_eq(host, "localhost", pcmk__str_casei)) { host = attrd_cluster->priv->node_name; crm_trace("Mapped localhost to %s", host); } /* If a specific node was requested, add its value */ if (host) { v = g_hash_table_lookup(a->values, host); host_value = pcmk__xe_create(reply, PCMK_XE_NODE); pcmk__xe_add_node(host_value, host, 0); crm_xml_add(host_value, PCMK__XA_ATTR_VALUE, (v? v->current : NULL)); /* Otherwise, add all nodes' values */ } else { GHashTableIter iter; g_hash_table_iter_init(&iter, a->values); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &v)) { host_value = pcmk__xe_create(reply, PCMK_XE_NODE); pcmk__xe_add_node(host_value, v->nodename, 0); crm_xml_add(host_value, PCMK__XA_ATTR_VALUE, v->current); } } } return reply; } xmlNode * attrd_client_clear_failure(pcmk__request_t *request) { xmlNode *xml = request->xml; const char *rsc, *op, *interval_spec; if (minimum_protocol_version >= 2) { /* Propagate to all peers (including ourselves). * This ends up at attrd_peer_message(). */ attrd_send_message(NULL, xml, false); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return NULL; } rsc = crm_element_value(xml, PCMK__XA_ATTR_RESOURCE); op = crm_element_value(xml, PCMK__XA_ATTR_CLEAR_OPERATION); interval_spec = crm_element_value(xml, PCMK__XA_ATTR_CLEAR_INTERVAL); /* Map this to an update */ crm_xml_add(xml, PCMK_XA_TASK, PCMK__ATTRD_CMD_UPDATE); /* Add regular expression matching desired attributes */ if (rsc) { char *pattern; if (op == NULL) { pattern = crm_strdup_printf(ATTRD_RE_CLEAR_ONE, rsc); } else { guint interval_ms = 0U; pcmk_parse_interval_spec(interval_spec, &interval_ms); pattern = crm_strdup_printf(ATTRD_RE_CLEAR_OP, rsc, op, interval_ms); } crm_xml_add(xml, PCMK__XA_ATTR_REGEX, pattern); free(pattern); } else { crm_xml_add(xml, PCMK__XA_ATTR_REGEX, ATTRD_RE_CLEAR_ALL); } /* Make sure attribute and value are not set, so we delete via regex */ pcmk__xe_remove_attr(xml, PCMK__XA_ATTR_NAME); pcmk__xe_remove_attr(xml, PCMK__XA_ATTR_VALUE); return attrd_client_update(request); } xmlNode * attrd_client_peer_remove(pcmk__request_t *request) { xmlNode *xml = request->xml; // Host and ID are not used in combination, rather host has precedence const char *host = crm_element_value(xml, PCMK__XA_ATTR_HOST); char *host_alloc = NULL; attrd_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags); if (host == NULL) { int nodeid = 0; crm_element_value_int(xml, PCMK__XA_ATTR_HOST_ID, &nodeid); if (nodeid > 0) { pcmk__node_status_t *node = NULL; char *host_alloc = NULL; node = pcmk__search_node_caches(nodeid, NULL, pcmk__node_search_cluster_member); if ((node != NULL) && (node->name != NULL)) { // Use cached name if available host = node->name; } else { // Otherwise ask cluster layer host_alloc = pcmk__cluster_node_name(nodeid); host = host_alloc; } pcmk__xe_add_node(xml, host, 0); } } if (host) { crm_info("Client %s is requesting all values for %s be removed", pcmk__client_name(request->ipc_client), host); attrd_send_message(NULL, xml, false); /* ends up at attrd_peer_message() */ free(host_alloc); } else { crm_info("Ignoring request by client %s to remove all peer values without specifying peer", pcmk__client_name(request->ipc_client)); } pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return NULL; } xmlNode * attrd_client_query(pcmk__request_t *request) { xmlNode *query = request->xml; xmlNode *reply = NULL; const char *attr = NULL; crm_debug("Query arrived from %s", pcmk__client_name(request->ipc_client)); /* Request must specify attribute name to query */ attr = crm_element_value(query, PCMK__XA_ATTR_NAME); if (attr == NULL) { pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR, "Ignoring malformed query from %s (no attribute name given)", pcmk__client_name(request->ipc_client)); return NULL; } /* Build the XML reply */ reply = build_query_reply(attr, crm_element_value(query, PCMK__XA_ATTR_HOST)); if (reply == NULL) { pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR, "Could not respond to query from %s: could not create XML reply", pcmk__client_name(request->ipc_client)); return NULL; } else { pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); } request->ipc_client->request_id = 0; return reply; } xmlNode * attrd_client_refresh(pcmk__request_t *request) { crm_info("Updating all attributes"); attrd_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags); attrd_write_attributes(attrd_write_all|attrd_write_no_delay); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return NULL; } static void handle_missing_host(xmlNode *xml) { const char *host = crm_element_value(xml, PCMK__XA_ATTR_HOST); if (host == NULL) { crm_trace("Inferring host"); pcmk__xe_add_node(xml, attrd_cluster->priv->node_name, - attrd_cluster->nodeid); + attrd_cluster->priv->node_id); } } /* Convert a single IPC message with a regex into one with multiple children, one * for each regex match. */ static int expand_regexes(xmlNode *xml, const char *attr, const char *value, const char *regex) { if (attr == NULL && regex) { bool matched = false; GHashTableIter aIter; regex_t r_patt; crm_debug("Setting %s to %s", regex, value); if (regcomp(&r_patt, regex, REG_EXTENDED|REG_NOSUB)) { return EINVAL; } g_hash_table_iter_init(&aIter, attributes); while (g_hash_table_iter_next(&aIter, (gpointer *) & attr, NULL)) { int status = regexec(&r_patt, attr, 0, NULL, 0); if (status == 0) { xmlNode *child = pcmk__xe_create(xml, PCMK_XE_OP); crm_trace("Matched %s with %s", attr, regex); matched = true; /* Copy all the non-conflicting attributes from the parent over, * but remove the regex and replace it with the name. */ pcmk__xe_copy_attrs(child, xml, pcmk__xaf_no_overwrite); pcmk__xe_remove_attr(child, PCMK__XA_ATTR_REGEX); crm_xml_add(child, PCMK__XA_ATTR_NAME, attr); } } regfree(&r_patt); /* Return a code if we never matched anything. This should not be treated * as an error. It indicates there was a regex, and it was a valid regex, * but simply did not match anything and the caller should not continue * doing any regex-related processing. */ if (!matched) { return pcmk_rc_op_unsatisfied; } } else if (attr == NULL) { return pcmk_rc_bad_nvpair; } return pcmk_rc_ok; } static int handle_regexes(pcmk__request_t *request) { xmlNode *xml = request->xml; int rc = pcmk_rc_ok; const char *attr = crm_element_value(xml, PCMK__XA_ATTR_NAME); const char *value = crm_element_value(xml, PCMK__XA_ATTR_VALUE); const char *regex = crm_element_value(xml, PCMK__XA_ATTR_REGEX); rc = expand_regexes(xml, attr, value, regex); if (rc == EINVAL) { pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR, "Bad regex '%s' for update from client %s", regex, pcmk__client_name(request->ipc_client)); } else if (rc == pcmk_rc_bad_nvpair) { crm_err("Update request did not specify attribute or regular expression"); pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR, "Client %s update request did not specify attribute or regular expression", pcmk__client_name(request->ipc_client)); } return rc; } static int handle_value_expansion(const char **value, xmlNode *xml, const char *op, const char *attr) { attribute_t *a = g_hash_table_lookup(attributes, attr); if (a == NULL && pcmk__str_eq(op, PCMK__ATTRD_CMD_UPDATE_DELAY, pcmk__str_none)) { return EINVAL; } if (*value && attrd_value_needs_expansion(*value)) { int int_value; attribute_value_t *v = NULL; if (a) { const char *host = crm_element_value(xml, PCMK__XA_ATTR_HOST); v = g_hash_table_lookup(a->values, host); } int_value = attrd_expand_value(*value, (v? v->current : NULL)); crm_info("Expanded %s=%s to %d", attr, *value, int_value); crm_xml_add_int(xml, PCMK__XA_ATTR_VALUE, int_value); /* Replacing the value frees the previous memory, so re-query it */ *value = crm_element_value(xml, PCMK__XA_ATTR_VALUE); } return pcmk_rc_ok; } static void send_update_msg_to_cluster(pcmk__request_t *request, xmlNode *xml) { if (pcmk__str_eq(attrd_request_sync_point(xml), PCMK__VALUE_CLUSTER, pcmk__str_none)) { /* The client is waiting on the cluster-wide sync point. In this case, * the response ACK is not sent until this attrd broadcasts the update * and receives its own confirmation back from all peers. */ attrd_expect_confirmations(request, attrd_cluster_sync_point_update); attrd_send_message(NULL, xml, true); /* ends up at attrd_peer_message() */ } else { /* The client is either waiting on the local sync point or was not * waiting on any sync point at all. For the local sync point, the * response ACK is sent in attrd_peer_update. For clients not * waiting on any sync point, the response ACK is sent in * handle_update_request immediately before this function was called. */ attrd_send_message(NULL, xml, false); /* ends up at attrd_peer_message() */ } } static int send_child_update(xmlNode *child, void *data) { pcmk__request_t *request = (pcmk__request_t *) data; /* Calling pcmk__set_result is handled by one of these calls to * attrd_client_update, so no need to do it again here. */ request->xml = child; attrd_client_update(request); return pcmk_rc_ok; } xmlNode * attrd_client_update(pcmk__request_t *request) { xmlNode *xml = NULL; const char *attr, *value, *regex; CRM_CHECK((request != NULL) && (request->xml != NULL), return NULL); xml = request->xml; /* If the message has children, that means it is a message from a newer * client that supports sending multiple operations at a time. There are * two ways we can handle that. */ if (xml->children != NULL) { if (ATTRD_SUPPORTS_MULTI_MESSAGE(minimum_protocol_version)) { /* First, if all peers support a certain protocol version, we can * just broadcast the big message and they'll handle it. However, * we also need to apply all the transformations in this function * to the children since they don't happen anywhere else. */ for (xmlNode *child = pcmk__xe_first_child(xml, PCMK_XE_OP, NULL, NULL); child != NULL; child = pcmk__xe_next_same(child)) { attr = crm_element_value(child, PCMK__XA_ATTR_NAME); value = crm_element_value(child, PCMK__XA_ATTR_VALUE); handle_missing_host(child); if (handle_value_expansion(&value, child, request->op, attr) == EINVAL) { pcmk__format_result(&request->result, CRM_EX_NOSUCH, PCMK_EXEC_ERROR, "Attribute %s does not exist", attr); return NULL; } } send_update_msg_to_cluster(request, xml); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); } else { /* Save the original xml node pointer so it can be restored after iterating * over all the children. */ xmlNode *orig_xml = request->xml; /* Second, if they do not support that protocol version, split it * up into individual messages and call attrd_client_update on * each one. */ pcmk__xe_foreach_child(xml, PCMK_XE_OP, send_child_update, request); request->xml = orig_xml; } return NULL; } attr = crm_element_value(xml, PCMK__XA_ATTR_NAME); value = crm_element_value(xml, PCMK__XA_ATTR_VALUE); regex = crm_element_value(xml, PCMK__XA_ATTR_REGEX); if (handle_regexes(request) != pcmk_rc_ok) { /* Error handling was already dealt with in handle_regexes, so just return. */ return NULL; } else if (regex) { /* Recursively call attrd_client_update on the new message with regexes * expanded. If supported by the attribute daemon, this means that all * matches can also be handled atomically. */ return attrd_client_update(request); } handle_missing_host(xml); if (handle_value_expansion(&value, xml, request->op, attr) == EINVAL) { pcmk__format_result(&request->result, CRM_EX_NOSUCH, PCMK_EXEC_ERROR, "Attribute %s does not exist", attr); return NULL; } crm_debug("Broadcasting %s[%s]=%s%s", attr, crm_element_value(xml, PCMK__XA_ATTR_HOST), value, (attrd_election_won()? " (writer)" : "")); send_update_msg_to_cluster(request, xml); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return NULL; } /*! * \internal * \brief Accept a new client IPC connection * * \param[in,out] c New connection * \param[in] uid Client user id * \param[in] gid Client group id * * \return pcmk_ok on success, -errno otherwise */ static int32_t attrd_ipc_accept(qb_ipcs_connection_t *c, uid_t uid, gid_t gid) { crm_trace("New client connection %p", c); if (attrd_shutting_down(false)) { crm_info("Ignoring new connection from pid %d during shutdown", pcmk__client_pid(c)); return -ECONNREFUSED; } if (pcmk__new_client(c, uid, gid) == NULL) { return -ENOMEM; } return pcmk_ok; } /*! * \internal * \brief Destroy a client IPC connection * * \param[in] c Connection to destroy * * \return FALSE (i.e. do not re-run this callback) */ static int32_t attrd_ipc_closed(qb_ipcs_connection_t *c) { pcmk__client_t *client = pcmk__find_client(c); if (client == NULL) { crm_trace("Ignoring request to clean up unknown connection %p", c); } else { crm_trace("Cleaning up closed client connection %p", c); /* Remove the client from the sync point waitlist if it's present. */ attrd_remove_client_from_waitlist(client); /* And no longer wait for confirmations from any peers. */ attrd_do_not_wait_for_client(client); pcmk__free_client(client); } return FALSE; } /*! * \internal * \brief Destroy a client IPC connection * * \param[in,out] c Connection to destroy * * \note We handle a destroyed connection the same as a closed one, * but we need a separate handler because the return type is different. */ static void attrd_ipc_destroy(qb_ipcs_connection_t *c) { crm_trace("Destroying client connection %p", c); attrd_ipc_closed(c); } static int32_t attrd_ipc_dispatch(qb_ipcs_connection_t * c, void *data, size_t size) { uint32_t id = 0; uint32_t flags = 0; pcmk__client_t *client = pcmk__find_client(c); xmlNode *xml = NULL; // Sanity-check, and parse XML from IPC data CRM_CHECK((c != NULL) && (client != NULL), return 0); if (data == NULL) { crm_debug("No IPC data from PID %d", pcmk__client_pid(c)); return 0; } xml = pcmk__client_data2xml(client, data, &id, &flags); if (xml == NULL) { crm_debug("Unrecognizable IPC data from PID %d", pcmk__client_pid(c)); pcmk__ipc_send_ack(client, id, flags, PCMK__XE_ACK, NULL, CRM_EX_PROTOCOL); return 0; } else { pcmk__request_t request = { .ipc_client = client, .ipc_id = id, .ipc_flags = flags, .peer = NULL, .xml = xml, .call_options = 0, .result = PCMK__UNKNOWN_RESULT, }; CRM_ASSERT(client->user != NULL); pcmk__update_acl_user(xml, PCMK__XA_ATTR_USER, client->user); request.op = crm_element_value_copy(request.xml, PCMK_XA_TASK); CRM_CHECK(request.op != NULL, return 0); attrd_handle_request(&request); pcmk__reset_request(&request); } pcmk__xml_free(xml); return 0; } static struct qb_ipcs_service_handlers ipc_callbacks = { .connection_accept = attrd_ipc_accept, .connection_created = NULL, .msg_process = attrd_ipc_dispatch, .connection_closed = attrd_ipc_closed, .connection_destroyed = attrd_ipc_destroy }; void attrd_ipc_fini(void) { if (ipcs != NULL) { pcmk__drop_all_clients(ipcs); qb_ipcs_destroy(ipcs); ipcs = NULL; } attrd_unregister_handlers(); pcmk__client_cleanup(); } /*! * \internal * \brief Set up attrd IPC communication */ void attrd_init_ipc(void) { pcmk__serve_attrd_ipc(&ipcs, &ipc_callbacks); } diff --git a/daemons/attrd/attrd_messages.c b/daemons/attrd/attrd_messages.c index 6ef12af424..908bfc2bf8 100644 --- a/daemons/attrd/attrd_messages.c +++ b/daemons/attrd/attrd_messages.c @@ -1,346 +1,346 @@ /* * Copyright 2022-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include // pcmk__get_node() #include #include "pacemaker-attrd.h" int minimum_protocol_version = -1; static GHashTable *attrd_handlers = NULL; static bool is_sync_point_attr(xmlAttrPtr attr, void *data) { return pcmk__str_eq((const char *) attr->name, PCMK__XA_ATTR_SYNC_POINT, pcmk__str_none); } static int remove_sync_point_attribute(xmlNode *xml, void *data) { pcmk__xe_remove_matching_attrs(xml, is_sync_point_attr, NULL); pcmk__xe_foreach_child(xml, PCMK_XE_OP, remove_sync_point_attribute, NULL); return pcmk_rc_ok; } /* Sync points on a multi-update IPC message to an attrd too old to support * multi-update messages won't work. Strip the sync point attribute off here * so we don't pretend to support this situation and instead ACK the client * immediately. */ static void remove_unsupported_sync_points(pcmk__request_t *request) { if (request->xml->children != NULL && !ATTRD_SUPPORTS_MULTI_MESSAGE(minimum_protocol_version) && attrd_request_has_sync_point(request->xml)) { crm_warn("Ignoring sync point in request from %s because not all nodes support it", pcmk__request_origin(request)); remove_sync_point_attribute(request->xml, NULL); } } static xmlNode * handle_unknown_request(pcmk__request_t *request) { crm_err("Unknown IPC request %s from %s %s", request->op, pcmk__request_origin_type(request), pcmk__request_origin(request)); pcmk__format_result(&request->result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID, "Unknown request type '%s' (bug?)", request->op); return NULL; } static xmlNode * handle_clear_failure_request(pcmk__request_t *request) { if (request->peer != NULL) { /* It is not currently possible to receive this as a peer command, * but will be, if we one day enable propagating this operation. */ attrd_peer_clear_failure(request); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return NULL; } else { remove_unsupported_sync_points(request); if (attrd_request_has_sync_point(request->xml)) { /* If this client supplied a sync point it wants to wait for, add it to * the wait list. Clients on this list will not receive an ACK until * their sync point is hit which will result in the client stalled there * until it receives a response. * * All other clients will receive the expected response as normal. */ attrd_add_client_to_waitlist(request); } else { /* If the client doesn't want to wait for a sync point, go ahead and send * the ACK immediately. Otherwise, we'll send the ACK when the appropriate * sync point is reached. */ attrd_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags); } return attrd_client_clear_failure(request); } } static xmlNode * handle_confirm_request(pcmk__request_t *request) { if (request->peer != NULL) { int callid; crm_debug("Received confirmation from %s", request->peer); if (crm_element_value_int(request->xml, PCMK__XA_CALL_ID, &callid) == -1) { pcmk__set_result(&request->result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID, "Could not get callid from XML"); } else { attrd_handle_confirmation(callid, request->peer); } pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return NULL; } else { return handle_unknown_request(request); } } static xmlNode * handle_query_request(pcmk__request_t *request) { if (request->peer != NULL) { return handle_unknown_request(request); } else { return attrd_client_query(request); } } static xmlNode * handle_remove_request(pcmk__request_t *request) { if (request->peer != NULL) { const char *host = crm_element_value(request->xml, PCMK__XA_ATTR_HOST); bool reap = false; if (pcmk__xe_get_bool_attr(request->xml, PCMK__XA_REAP, &reap) != pcmk_rc_ok) { reap = true; // Default to true for backward compatibility } attrd_peer_remove(host, reap, request->peer); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return NULL; } else { return attrd_client_peer_remove(request); } } static xmlNode * handle_refresh_request(pcmk__request_t *request) { if (request->peer != NULL) { return handle_unknown_request(request); } else { return attrd_client_refresh(request); } } static xmlNode * handle_sync_response_request(pcmk__request_t *request) { if (request->ipc_client != NULL) { return handle_unknown_request(request); } else { if (request->peer != NULL) { pcmk__node_status_t *peer = pcmk__get_node(0, request->peer, NULL, pcmk__node_search_cluster_member); bool peer_won = attrd_check_for_new_writer(peer, request->xml); if (!pcmk__str_eq(peer->name, attrd_cluster->priv->node_name, pcmk__str_casei)) { attrd_peer_sync_response(peer, peer_won, request->xml); } } pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return NULL; } } static xmlNode * handle_update_request(pcmk__request_t *request) { if (request->peer != NULL) { const char *host = crm_element_value(request->xml, PCMK__XA_ATTR_HOST); pcmk__node_status_t *peer = pcmk__get_node(0, request->peer, NULL, pcmk__node_search_cluster_member); attrd_peer_update(peer, request->xml, host, false); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return NULL; } else { remove_unsupported_sync_points(request); if (attrd_request_has_sync_point(request->xml)) { /* If this client supplied a sync point it wants to wait for, add it to * the wait list. Clients on this list will not receive an ACK until * their sync point is hit which will result in the client stalled there * until it receives a response. * * All other clients will receive the expected response as normal. */ attrd_add_client_to_waitlist(request); } else { /* If the client doesn't want to wait for a sync point, go ahead and send * the ACK immediately. Otherwise, we'll send the ACK when the appropriate * sync point is reached. * * In the normal case, attrd_client_update can be called recursively which * makes where to send the ACK tricky. Doing it here ensures the client * only ever receives one. */ attrd_send_ack(request->ipc_client, request->ipc_id, request->flags|crm_ipc_client_response); } return attrd_client_update(request); } } static void attrd_register_handlers(void) { pcmk__server_command_t handlers[] = { { PCMK__ATTRD_CMD_CLEAR_FAILURE, handle_clear_failure_request }, { PCMK__ATTRD_CMD_CONFIRM, handle_confirm_request }, { PCMK__ATTRD_CMD_PEER_REMOVE, handle_remove_request }, { PCMK__ATTRD_CMD_QUERY, handle_query_request }, { PCMK__ATTRD_CMD_REFRESH, handle_refresh_request }, { PCMK__ATTRD_CMD_SYNC_RESPONSE, handle_sync_response_request }, { PCMK__ATTRD_CMD_UPDATE, handle_update_request }, { PCMK__ATTRD_CMD_UPDATE_DELAY, handle_update_request }, { PCMK__ATTRD_CMD_UPDATE_BOTH, handle_update_request }, { NULL, handle_unknown_request }, }; attrd_handlers = pcmk__register_handlers(handlers); } void attrd_unregister_handlers(void) { if (attrd_handlers != NULL) { g_hash_table_destroy(attrd_handlers); attrd_handlers = NULL; } } void attrd_handle_request(pcmk__request_t *request) { xmlNode *reply = NULL; char *log_msg = NULL; const char *reason = NULL; if (attrd_handlers == NULL) { attrd_register_handlers(); } reply = pcmk__process_request(request, attrd_handlers); if (reply != NULL) { crm_log_xml_trace(reply, "Reply"); if (request->ipc_client != NULL) { pcmk__ipc_send_xml(request->ipc_client, request->ipc_id, reply, request->ipc_flags); } else { crm_err("Not sending CPG reply to client"); } pcmk__xml_free(reply); } reason = request->result.exit_reason; log_msg = crm_strdup_printf("Processed %s request from %s %s: %s%s%s%s", request->op, pcmk__request_origin_type(request), pcmk__request_origin(request), pcmk_exec_status_str(request->result.execution_status), (reason == NULL)? "" : " (", pcmk__s(reason, ""), (reason == NULL)? "" : ")"); if (!pcmk__result_ok(&request->result)) { crm_warn("%s", log_msg); } else { crm_debug("%s", log_msg); } free(log_msg); pcmk__reset_request(request); } /*! \internal \brief Broadcast private attribute for local node with protocol version */ void attrd_broadcast_protocol(void) { xmlNode *attrd_op = pcmk__xe_create(NULL, __func__); crm_xml_add(attrd_op, PCMK__XA_T, PCMK__VALUE_ATTRD); crm_xml_add(attrd_op, PCMK__XA_SRC, crm_system_name); crm_xml_add(attrd_op, PCMK_XA_TASK, PCMK__ATTRD_CMD_UPDATE); crm_xml_add(attrd_op, PCMK__XA_ATTR_NAME, CRM_ATTR_PROTOCOL); crm_xml_add(attrd_op, PCMK__XA_ATTR_VALUE, ATTRD_PROTOCOL_VERSION); crm_xml_add_int(attrd_op, PCMK__XA_ATTR_IS_PRIVATE, 1); pcmk__xe_add_node(attrd_op, attrd_cluster->priv->node_name, - attrd_cluster->nodeid); + attrd_cluster->priv->node_id); crm_debug("Broadcasting attrd protocol version %s for node %s", ATTRD_PROTOCOL_VERSION, attrd_cluster->priv->node_name); attrd_send_message(NULL, attrd_op, false); /* ends up at attrd_peer_message() */ pcmk__xml_free(attrd_op); } gboolean attrd_send_message(pcmk__node_status_t *node, xmlNode *data, bool confirm) { const char *op = crm_element_value(data, PCMK_XA_TASK); crm_xml_add(data, PCMK__XA_T, PCMK__VALUE_ATTRD); crm_xml_add(data, PCMK__XA_ATTR_VERSION, ATTRD_PROTOCOL_VERSION); /* Request a confirmation from the destination peer node (which could * be all if node is NULL) that the message has been received and * acted upon. */ if (!pcmk__str_eq(op, PCMK__ATTRD_CMD_CONFIRM, pcmk__str_none)) { pcmk__xe_set_bool_attr(data, PCMK__XA_CONFIRM, confirm); } attrd_xml_add_writer(data); return pcmk__cluster_send_message(node, pcmk__cluster_msg_attrd, data); } diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c index daca127ff2..3034a4c929 100644 --- a/daemons/controld/controld_control.c +++ b/daemons/controld/controld_control.c @@ -1,697 +1,697 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include static qb_ipcs_service_t *ipcs = NULL; static crm_trigger_t *config_read_trigger = NULL; #if SUPPORT_COROSYNC extern gboolean crm_connect_corosync(pcmk_cluster_t *cluster); #endif static void crm_shutdown(int nsig); static gboolean crm_read_options(gpointer user_data); /* A_HA_CONNECT */ void do_ha_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { gboolean registered = FALSE; static pcmk_cluster_t *cluster = NULL; if (cluster == NULL) { cluster = pcmk_cluster_new(); } if (action & A_HA_DISCONNECT) { pcmk_cluster_disconnect(cluster); crm_info("Disconnected from the cluster"); controld_set_fsa_input_flags(R_HA_DISCONNECTED); } if (action & A_HA_CONNECT) { pcmk__cluster_set_status_callback(&peer_update_callback); pcmk__cluster_set_autoreap(false); #if SUPPORT_COROSYNC if (pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync) { registered = crm_connect_corosync(cluster); } #endif // SUPPORT_COROSYNC if (registered) { pcmk__node_status_t *node = - pcmk__get_node(cluster->nodeid, cluster->priv->node_name, NULL, - pcmk__node_search_cluster_member); + pcmk__get_node(cluster->priv->node_id, cluster->priv->node_name, + NULL, pcmk__node_search_cluster_member); controld_election_init(cluster->priv->node_name); controld_globals.our_nodename = cluster->priv->node_name; free(controld_globals.our_uuid); controld_globals.our_uuid = pcmk__str_copy(pcmk__cluster_node_uuid(node)); if (controld_globals.our_uuid == NULL) { crm_err("Could not obtain local uuid"); registered = FALSE; } } if (!registered) { controld_set_fsa_input_flags(R_HA_DISCONNECTED); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return; } populate_cib_nodes(node_update_none, __func__); controld_clear_fsa_input_flags(R_HA_DISCONNECTED); crm_info("Connected to the cluster"); } if (action & ~(A_HA_CONNECT | A_HA_DISCONNECT)) { crm_err("Unexpected action %s in %s", fsa_action2string(action), __func__); } } /* A_SHUTDOWN */ void do_shutdown(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { /* just in case */ controld_set_fsa_input_flags(R_SHUTDOWN); controld_disconnect_fencer(FALSE); } /* A_SHUTDOWN_REQ */ void do_shutdown_req(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { xmlNode *msg = NULL; controld_set_fsa_input_flags(R_SHUTDOWN); //controld_set_fsa_input_flags(R_STAYDOWN); crm_info("Sending shutdown request to all peers (DC is %s)", pcmk__s(controld_globals.dc_name, "not set")); msg = create_request(CRM_OP_SHUTDOWN_REQ, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL); if (!pcmk__cluster_send_message(NULL, pcmk__cluster_msg_controld, msg)) { register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } pcmk__xml_free(msg); } void crmd_fast_exit(crm_exit_t exit_code) { if (pcmk_is_set(controld_globals.fsa_input_register, R_STAYDOWN)) { crm_warn("Inhibiting respawn " QB_XS " remapping exit code %d to %d", exit_code, CRM_EX_FATAL); exit_code = CRM_EX_FATAL; } else if ((exit_code == CRM_EX_OK) && pcmk_is_set(controld_globals.fsa_input_register, R_IN_RECOVERY)) { crm_err("Could not recover from internal error"); exit_code = CRM_EX_ERROR; } if (controld_globals.logger_out != NULL) { controld_globals.logger_out->finish(controld_globals.logger_out, exit_code, true, NULL); pcmk__output_free(controld_globals.logger_out); controld_globals.logger_out = NULL; } crm_exit(exit_code); } crm_exit_t crmd_exit(crm_exit_t exit_code) { GMainLoop *mloop = controld_globals.mainloop; static bool in_progress = FALSE; if (in_progress && (exit_code == CRM_EX_OK)) { crm_debug("Exit is already in progress"); return exit_code; } else if(in_progress) { crm_notice("Error during shutdown process, exiting now with status %d (%s)", exit_code, crm_exit_str(exit_code)); crm_write_blackbox(SIGTRAP, NULL); crmd_fast_exit(exit_code); } in_progress = TRUE; crm_trace("Preparing to exit with status %d (%s)", exit_code, crm_exit_str(exit_code)); /* Suppress secondary errors resulting from us disconnecting everything */ controld_set_fsa_input_flags(R_HA_DISCONNECTED); /* Close all IPC servers and clients to ensure any and all shared memory files are cleaned up */ if(ipcs) { crm_trace("Closing IPC server"); mainloop_del_ipc_server(ipcs); ipcs = NULL; } controld_close_attrd_ipc(); controld_shutdown_schedulerd_ipc(); controld_disconnect_fencer(TRUE); if ((exit_code == CRM_EX_OK) && (controld_globals.mainloop == NULL)) { crm_debug("No mainloop detected"); exit_code = CRM_EX_ERROR; } /* On an error, just get out. * * Otherwise, make the effort to have mainloop exit gracefully so * that it (mostly) cleans up after itself and valgrind has less * to report on - allowing real errors stand out */ if (exit_code != CRM_EX_OK) { crm_notice("Forcing immediate exit with status %d (%s)", exit_code, crm_exit_str(exit_code)); crm_write_blackbox(SIGTRAP, NULL); crmd_fast_exit(exit_code); } /* Clean up as much memory as possible for valgrind */ for (GList *iter = controld_globals.fsa_message_queue; iter != NULL; iter = iter->next) { fsa_data_t *fsa_data = (fsa_data_t *) iter->data; crm_info("Dropping %s: [ state=%s cause=%s origin=%s ]", fsa_input2string(fsa_data->fsa_input), fsa_state2string(controld_globals.fsa_state), fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin); delete_fsa_input(fsa_data); } controld_clear_fsa_input_flags(R_MEMBERSHIP); g_list_free(controld_globals.fsa_message_queue); controld_globals.fsa_message_queue = NULL; controld_free_node_pending_timers(); controld_election_fini(); /* Tear down the CIB manager connection, but don't free it yet -- it could * be used when we drain the mainloop later. */ controld_disconnect_cib_manager(); verify_stopped(controld_globals.fsa_state, LOG_WARNING); controld_clear_fsa_input_flags(R_LRM_CONNECTED); lrm_state_destroy_all(); mainloop_destroy_trigger(config_read_trigger); config_read_trigger = NULL; controld_destroy_fsa_trigger(); controld_destroy_transition_trigger(); pcmk__client_cleanup(); pcmk__cluster_destroy_node_caches(); controld_free_fsa_timers(); te_cleanup_stonith_history_sync(NULL, TRUE); controld_free_sched_timer(); free(controld_globals.our_nodename); controld_globals.our_nodename = NULL; free(controld_globals.our_uuid); controld_globals.our_uuid = NULL; free(controld_globals.dc_name); controld_globals.dc_name = NULL; free(controld_globals.dc_version); controld_globals.dc_version = NULL; free(controld_globals.cluster_name); controld_globals.cluster_name = NULL; free(controld_globals.te_uuid); controld_globals.te_uuid = NULL; free_max_generation(); controld_destroy_failed_sync_table(); controld_destroy_outside_events_table(); mainloop_destroy_signal(SIGPIPE); mainloop_destroy_signal(SIGUSR1); mainloop_destroy_signal(SIGTERM); mainloop_destroy_signal(SIGTRAP); /* leave SIGCHLD engaged as we might still want to drain some service-actions */ if (mloop) { GMainContext *ctx = g_main_loop_get_context(controld_globals.mainloop); /* Don't re-enter this block */ controld_globals.mainloop = NULL; /* no signals on final draining anymore */ mainloop_destroy_signal(SIGCHLD); crm_trace("Draining mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx)); { int lpc = 0; while((g_main_context_pending(ctx) && lpc < 10)) { lpc++; crm_trace("Iteration %d", lpc); g_main_context_dispatch(ctx); } } crm_trace("Closing mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx)); g_main_loop_quit(mloop); /* Won't do anything yet, since we're inside it now */ g_main_loop_unref(mloop); } else { mainloop_destroy_signal(SIGCHLD); } cib_delete(controld_globals.cib_conn); controld_globals.cib_conn = NULL; throttle_fini(); /* Graceful */ crm_trace("Done preparing for exit with status %d (%s)", exit_code, crm_exit_str(exit_code)); return exit_code; } /* A_EXIT_0, A_EXIT_1 */ void do_exit(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { crm_exit_t exit_code = CRM_EX_OK; if (pcmk_is_set(action, A_EXIT_1)) { exit_code = CRM_EX_ERROR; crm_err("Exiting now due to errors"); } verify_stopped(cur_state, LOG_ERR); crmd_exit(exit_code); } static void sigpipe_ignore(int nsig) { return; } /* A_STARTUP */ void do_startup(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { crm_debug("Registering Signal Handlers"); mainloop_add_signal(SIGTERM, crm_shutdown); mainloop_add_signal(SIGPIPE, sigpipe_ignore); config_read_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, crm_read_options, NULL); controld_init_fsa_trigger(); controld_init_transition_trigger(); crm_debug("Creating CIB manager and executor objects"); controld_globals.cib_conn = cib_new(); lrm_state_init_local(); if (controld_init_fsa_timers() == FALSE) { register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } // \return libqb error code (0 on success, -errno on error) static int32_t accept_controller_client(qb_ipcs_connection_t *c, uid_t uid, gid_t gid) { crm_trace("Accepting new IPC client connection"); if (pcmk__new_client(c, uid, gid) == NULL) { return -ENOMEM; } return 0; } // \return libqb error code (0 on success, -errno on error) static int32_t dispatch_controller_ipc(qb_ipcs_connection_t * c, void *data, size_t size) { uint32_t id = 0; uint32_t flags = 0; pcmk__client_t *client = pcmk__find_client(c); xmlNode *msg = pcmk__client_data2xml(client, data, &id, &flags); if (msg == NULL) { pcmk__ipc_send_ack(client, id, flags, PCMK__XE_ACK, NULL, CRM_EX_PROTOCOL); return 0; } pcmk__ipc_send_ack(client, id, flags, PCMK__XE_ACK, NULL, CRM_EX_INDETERMINATE); CRM_ASSERT(client->user != NULL); pcmk__update_acl_user(msg, PCMK__XA_CRM_USER, client->user); crm_xml_add(msg, PCMK__XA_CRM_SYS_FROM, client->id); if (controld_authorize_ipc_message(msg, client, NULL)) { crm_trace("Processing IPC message from client %s", pcmk__client_name(client)); route_message(C_IPC_MESSAGE, msg); } controld_trigger_fsa(); pcmk__xml_free(msg); return 0; } static int32_t ipc_client_disconnected(qb_ipcs_connection_t *c) { pcmk__client_t *client = pcmk__find_client(c); if (client) { crm_trace("Disconnecting %sregistered client %s (%p/%p)", (client->userdata? "" : "un"), pcmk__client_name(client), c, client); free(client->userdata); pcmk__free_client(client); controld_trigger_fsa(); } return 0; } static void ipc_connection_destroyed(qb_ipcs_connection_t *c) { crm_trace("Connection %p", c); ipc_client_disconnected(c); } /* A_STOP */ void do_stop(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { crm_trace("Closing IPC server"); mainloop_del_ipc_server(ipcs); ipcs = NULL; register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL); } /* A_STARTED */ void do_started(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { static struct qb_ipcs_service_handlers crmd_callbacks = { .connection_accept = accept_controller_client, .connection_created = NULL, .msg_process = dispatch_controller_ipc, .connection_closed = ipc_client_disconnected, .connection_destroyed = ipc_connection_destroyed }; if (cur_state != S_STARTING) { crm_err("Start cancelled... %s", fsa_state2string(cur_state)); return; } else if (!pcmk_is_set(controld_globals.fsa_input_register, R_MEMBERSHIP)) { crm_info("Delaying start, no membership data (%.16llx)", R_MEMBERSHIP); crmd_fsa_stall(TRUE); return; } else if (!pcmk_is_set(controld_globals.fsa_input_register, R_LRM_CONNECTED)) { crm_info("Delaying start, not connected to executor (%.16llx)", R_LRM_CONNECTED); crmd_fsa_stall(TRUE); return; } else if (!pcmk_is_set(controld_globals.fsa_input_register, R_CIB_CONNECTED)) { crm_info("Delaying start, CIB not connected (%.16llx)", R_CIB_CONNECTED); crmd_fsa_stall(TRUE); return; } else if (!pcmk_is_set(controld_globals.fsa_input_register, R_READ_CONFIG)) { crm_info("Delaying start, Config not read (%.16llx)", R_READ_CONFIG); crmd_fsa_stall(TRUE); return; } else if (!pcmk_is_set(controld_globals.fsa_input_register, R_PEER_DATA)) { crm_info("Delaying start, No peer data (%.16llx)", R_PEER_DATA); crmd_fsa_stall(TRUE); return; } crm_debug("Init server comms"); ipcs = pcmk__serve_controld_ipc(&crmd_callbacks); if (ipcs == NULL) { crm_err("Failed to create IPC server: shutting down and inhibiting respawn"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } else { crm_notice("Pacemaker controller successfully started and accepting connections"); } controld_set_fsa_input_flags(R_ST_REQUIRED); controld_timer_fencer_connect(GINT_TO_POINTER(TRUE)); controld_clear_fsa_input_flags(R_STARTING); register_fsa_input(msg_data->fsa_cause, I_PENDING, NULL); } /* A_RECOVER */ void do_recover(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { controld_set_fsa_input_flags(R_IN_RECOVERY); crm_warn("Fast-tracking shutdown in response to errors"); register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL); } static void config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { const char *value = NULL; GHashTable *config_hash = NULL; crm_time_t *now = crm_time_new(NULL); xmlNode *crmconfig = NULL; xmlNode *alerts = NULL; if (rc != pcmk_ok) { fsa_data_t *msg_data = NULL; crm_err("Local CIB query resulted in an error: %s", pcmk_strerror(rc)); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); if (rc == -EACCES || rc == -pcmk_err_schema_validation) { crm_err("The cluster is mis-configured - shutting down and staying down"); controld_set_fsa_input_flags(R_STAYDOWN); } goto bail; } crmconfig = output; if ((crmconfig != NULL) && !pcmk__xe_is(crmconfig, PCMK_XE_CRM_CONFIG)) { crmconfig = pcmk__xe_first_child(crmconfig, PCMK_XE_CRM_CONFIG, NULL, NULL); } if (!crmconfig) { fsa_data_t *msg_data = NULL; crm_err("Local CIB query for " PCMK_XE_CRM_CONFIG " section failed"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); goto bail; } crm_debug("Call %d : Parsing CIB options", call_id); config_hash = pcmk__strkey_table(free, free); pe_unpack_nvpairs(crmconfig, crmconfig, PCMK_XE_CLUSTER_PROPERTY_SET, NULL, config_hash, PCMK_VALUE_CIB_BOOTSTRAP_OPTIONS, FALSE, now, NULL); // Validate all options, and use defaults if not already present in hash pcmk__validate_cluster_options(config_hash); /* Validate the watchdog timeout in the context of the local node * environment. If invalid, the controller will exit with a fatal error. * * We do this via a wrapper in the controller, so that we call * pcmk__valid_stonith_watchdog_timeout() only if watchdog fencing is * enabled for the local node. Otherwise, we may exit unnecessarily. * * A validator function in libcrmcommon can't act as such a wrapper, because * it doesn't have a stonith API connection or the local node name. */ value = g_hash_table_lookup(config_hash, PCMK_OPT_STONITH_WATCHDOG_TIMEOUT); controld_verify_stonith_watchdog_timeout(value); value = g_hash_table_lookup(config_hash, PCMK_OPT_NO_QUORUM_POLICY); if (pcmk__str_eq(value, PCMK_VALUE_FENCE_LEGACY, pcmk__str_casei) && (pcmk__locate_sbd() != 0)) { controld_set_global_flags(controld_no_quorum_suicide); } value = g_hash_table_lookup(config_hash, PCMK_OPT_SHUTDOWN_LOCK); if (crm_is_true(value)) { controld_set_global_flags(controld_shutdown_lock_enabled); } else { controld_clear_global_flags(controld_shutdown_lock_enabled); } value = g_hash_table_lookup(config_hash, PCMK_OPT_SHUTDOWN_LOCK_LIMIT); pcmk_parse_interval_spec(value, &controld_globals.shutdown_lock_limit); controld_globals.shutdown_lock_limit /= 1000; value = g_hash_table_lookup(config_hash, PCMK_OPT_NODE_PENDING_TIMEOUT); pcmk_parse_interval_spec(value, &controld_globals.node_pending_timeout); controld_globals.node_pending_timeout /= 1000; value = g_hash_table_lookup(config_hash, PCMK_OPT_CLUSTER_NAME); pcmk__str_update(&(controld_globals.cluster_name), value); // Let subcomponents initialize their own static variables controld_configure_election(config_hash); controld_configure_fencing(config_hash); controld_configure_fsa_timers(config_hash); controld_configure_throttle(config_hash); alerts = pcmk__xe_first_child(output, PCMK_XE_ALERTS, NULL, NULL); crmd_unpack_alerts(alerts); controld_set_fsa_input_flags(R_READ_CONFIG); controld_trigger_fsa(); g_hash_table_destroy(config_hash); bail: crm_time_free(now); } /*! * \internal * \brief Trigger read and processing of the configuration * * \param[in] fn Calling function name * \param[in] line Line number where call occurred */ void controld_trigger_config_as(const char *fn, int line) { if (config_read_trigger != NULL) { crm_trace("%s:%d - Triggered config processing", fn, line); mainloop_set_trigger(config_read_trigger); } } gboolean crm_read_options(gpointer user_data) { cib_t *cib_conn = controld_globals.cib_conn; int call_id = cib_conn->cmds->query(cib_conn, "//" PCMK_XE_CRM_CONFIG " | //" PCMK_XE_ALERTS, NULL, cib_xpath); fsa_register_cib_callback(call_id, NULL, config_query_callback); crm_trace("Querying the CIB... call %d", call_id); return TRUE; } /* A_READCONFIG */ void do_read_config(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { throttle_init(); controld_trigger_config(); } static void crm_shutdown(int nsig) { const char *value = NULL; guint default_period_ms = 0; if ((controld_globals.mainloop == NULL) || !g_main_loop_is_running(controld_globals.mainloop)) { crmd_exit(CRM_EX_OK); return; } if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { crm_err("Escalating shutdown"); register_fsa_input_before(C_SHUTDOWN, I_ERROR, NULL); return; } controld_set_fsa_input_flags(R_SHUTDOWN); register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL); /* If shutdown timer doesn't have a period set, use the default * * @TODO: Evaluate whether this is still necessary. As long as * config_query_callback() has been run at least once, it doesn't look like * anything could have changed the timer period since then. */ value = pcmk__cluster_option(NULL, PCMK_OPT_SHUTDOWN_ESCALATION); pcmk_parse_interval_spec(value, &default_period_ms); controld_shutdown_start_countdown(default_period_ms); } diff --git a/include/crm/cluster.h b/include/crm/cluster.h index 43d53e5b6f..ac46f5af50 100644 --- a/include/crm/cluster.h +++ b/include/crm/cluster.h @@ -1,142 +1,140 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PCMK__CRM_CLUSTER__H # define PCMK__CRM_CLUSTER__H # include // uint32_t, uint64_t # include // gboolean, GHashTable # include // xmlNode # include # include #ifdef __cplusplus extern "C" { #endif # if SUPPORT_COROSYNC # include # endif // @COMPAT Make this internal when we can break API backward compatibility //! \deprecated Do not use (public access will be removed in a future release) extern GHashTable *crm_peer_cache; // @COMPAT Make this internal when we can break API backward compatibility //! \deprecated Do not use (public access will be removed in a future release) extern GHashTable *crm_remote_peer_cache; // @COMPAT Make this internal when we can break API backward compatibility //! \deprecated Do not use (public access will be removed in a future release) extern unsigned long long crm_peer_seq; // @COMPAT Make this internal when we can break API backward compatibility //! \deprecated Do not use (public access will be removed in a future release) #define CRM_NODE_LOST "lost" // @COMPAT Make this internal when we can break API backward compatibility //! \deprecated Do not use (public access will be removed in a future release) #define CRM_NODE_MEMBER "member" // @COMPAT Make this internal when we can break API backward compatibility //!@{ //! \deprecated Do not use (public access will be removed in a future release) enum crm_join_phase { /* @COMPAT: crm_join_nack_quiet can be replaced by * pcmk__node_status_t:user_data at a compatibility break */ //! Not allowed to join, but don't send a nack message crm_join_nack_quiet = -2, crm_join_nack = -1, crm_join_none = 0, crm_join_welcomed = 1, crm_join_integrated = 2, crm_join_finalized = 3, crm_join_confirmed = 4, }; //!@} //! \internal Do not use typedef struct pcmk__cluster_private pcmk__cluster_private_t; // Implementation of pcmk_cluster_t // @COMPAT Make contents internal when we can break API backward compatibility //!@{ //! \deprecated Do not use (public access will be removed in a future release) struct pcmk__cluster { /* @COMPAT Once all members are moved to pcmk__cluster_private_t, we can * make that the pcmk_cluster_t implementation and drop this struct * altogether, leaving pcmk_cluster_t as an opaque public type. */ //! \internal Do not use pcmk__cluster_private_t *priv; - uint32_t nodeid; - // NOTE: sbd (as of at least 1.5.2) uses this //! \deprecated Call pcmk_cluster_set_destroy_fn() to set this void (*destroy) (gpointer); # if SUPPORT_COROSYNC /* @TODO When we can break public API compatibility, make these members a * separate struct and use void *cluster_data here instead, to abstract the * cluster layer further. */ struct cpg_name group; // NOTE: sbd (as of at least 1.5.2) uses this /*! * \deprecated Call pcmk_cpg_set_deliver_fn() and pcmk_cpg_set_confchg_fn() * to set these */ cpg_callbacks_t cpg; cpg_handle_t cpg_handle; # endif }; //!@} //! Connection to a cluster layer typedef struct pcmk__cluster pcmk_cluster_t; int pcmk_cluster_connect(pcmk_cluster_t *cluster); int pcmk_cluster_disconnect(pcmk_cluster_t *cluster); pcmk_cluster_t *pcmk_cluster_new(void); void pcmk_cluster_free(pcmk_cluster_t *cluster); int pcmk_cluster_set_destroy_fn(pcmk_cluster_t *cluster, void (*fn)(gpointer)); #if SUPPORT_COROSYNC int pcmk_cpg_set_deliver_fn(pcmk_cluster_t *cluster, cpg_deliver_fn_t fn); int pcmk_cpg_set_confchg_fn(pcmk_cluster_t *cluster, cpg_confchg_fn_t fn); #endif // SUPPORT_COROSYNC /*! * \enum pcmk_cluster_layer * \brief Types of cluster layer */ enum pcmk_cluster_layer { pcmk_cluster_layer_unknown = 1, //!< Unknown cluster layer pcmk_cluster_layer_invalid = 2, //!< Invalid cluster layer pcmk_cluster_layer_corosync = 32, //!< Corosync Cluster Engine }; enum pcmk_cluster_layer pcmk_get_cluster_layer(void); const char *pcmk_cluster_layer_text(enum pcmk_cluster_layer layer); #ifdef __cplusplus } #endif #if !defined(PCMK_ALLOW_DEPRECATED) || (PCMK_ALLOW_DEPRECATED == 1) #include #endif #endif diff --git a/include/crm/cluster/internal.h b/include/crm/cluster/internal.h index 0c15006212..cea5d83fb2 100644 --- a/include/crm/cluster/internal.h +++ b/include/crm/cluster/internal.h @@ -1,315 +1,318 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PCMK__CRM_CLUSTER_INTERNAL__H #define PCMK__CRM_CLUSTER_INTERNAL__H #include #include // uint32_t, uint64_t #include // gboolean #include #ifdef __cplusplus extern "C" { #endif /*! * \internal * \enum pcmk__cluster_msg * \brief Types of message sent via the cluster layer */ enum pcmk__cluster_msg { pcmk__cluster_msg_unknown, pcmk__cluster_msg_attrd, pcmk__cluster_msg_based, pcmk__cluster_msg_controld, pcmk__cluster_msg_execd, pcmk__cluster_msg_fenced, }; enum crm_proc_flag { /* @COMPAT When pcmk__node_status_t:processes is made internal, we can merge * this into node flags or turn it into a boolean. Until then, in theory * something could depend on these particular numeric values. */ crm_proc_none = 0x00000001, // Cluster layers crm_proc_cpg = 0x04000000, }; /*! * \internal * \enum pcmk__node_status_flags * \brief Boolean flags for a \c pcmk__node_status_t object * * Some flags may not be related to status specifically. However, we keep these * separate from enum pcmk__node_flags because they're used with * different object types. */ enum pcmk__node_status_flags { /*! * Node is a Pacemaker Remote node and should not be considered for cluster * membership */ pcmk__node_status_remote = (UINT32_C(1) << 0), //! Node's cache entry is dirty pcmk__node_status_dirty = (UINT32_C(1) << 1), }; // Used with node cache search functions enum pcmk__node_search_flags { //! Does not affect search pcmk__node_search_none = 0, //! Search for cluster nodes from membership cache pcmk__node_search_cluster_member = (1 << 0), //! Search for remote nodes pcmk__node_search_remote = (1 << 1), //! Search for cluster member nodes and remote nodes pcmk__node_search_any = pcmk__node_search_cluster_member |pcmk__node_search_remote, //! Search for cluster nodes from CIB (as of last cache refresh) pcmk__node_search_cluster_cib = (1 << 2), }; /*! * \internal * \enum pcmk__node_update * \brief Type of update to a \c pcmk__node_status_t object */ enum pcmk__node_update { pcmk__node_update_name, //!< Node name updated pcmk__node_update_state, //!< Node connection state updated pcmk__node_update_processes, //!< Node process group membership updated }; //! Implementation of pcmk__cluster_private_t struct pcmk__cluster_private { + // @TODO Drop and replace with per-daemon cluster-layer ID global variables? + uint32_t node_id; //!< Local node ID at cluster layer + // @TODO Drop and replace with per-daemon node name global variables? char *node_name; //!< Local node name at cluster layer }; //! Node status data (may be a cluster node or a Pacemaker Remote node) typedef struct pcmk__node_status { //! Node name as known to cluster layer, or Pacemaker Remote node name char *name; /* @COMPAT This is less than ideal since the value is not a valid XML ID * (for Corosync, it's the string equivalent of the node's numeric node ID, * but XML IDs can't start with a number) and the three elements should have * different IDs. * * Ideally, we would use something like node-NODEID, node_state-NODEID, and * transient_attributes-NODEID as the element IDs. Unfortunately changing it * would be impractical due to backward compatibility; older nodes in a * rolling upgrade will always write and expect the value in the old format. */ /*! * Value of the PCMK_XA_ID XML attribute to use with the node's * PCMK_XE_NODE, PCMK_XE_NODE_STATE, and PCMK_XE_TRANSIENT_ATTRIBUTES * XML elements in the CIB */ char *xml_id; char *state; // @TODO change to enum //! Group of enum pcmk__node_status_flags uint32_t flags; /*! * Most recent cluster membership in which node was seen (0 for Pacemaker * Remote nodes) */ uint64_t membership_id; uint32_t processes; // @TODO most not needed, merge into flags /* @TODO When we can break public API compatibility, we can make the rest of * these members separate structs and use void *cluster_data and * void *user_data here instead, to abstract the cluster layer further. */ // Only used by controller enum crm_join_phase join; char *expected; time_t peer_lost; char *conn_host; time_t when_member; // Since when node has been a cluster member time_t when_online; // Since when peer has been online in CPG /* @TODO The following are currently needed only by the Corosync stack. * Eventually consider moving them to a cluster-layer-specific data object. */ uint32_t cluster_layer_id; //!< Cluster-layer numeric node ID time_t when_lost; //!< When CPG membership was last lost } pcmk__node_status_t; /*! * \internal * \brief Return the process bit corresponding to the current cluster stack * * \return Process flag if detectable, otherwise 0 */ static inline uint32_t crm_get_cluster_proc(void) { switch (pcmk_get_cluster_layer()) { case pcmk_cluster_layer_corosync: return crm_proc_cpg; default: break; } return crm_proc_none; } /*! * \internal * \brief Get log-friendly string description of a Corosync return code * * \param[in] error Corosync return code * * \return Log-friendly string description corresponding to \p error */ static inline const char * pcmk__cs_err_str(int error) { # if SUPPORT_COROSYNC switch (error) { case CS_OK: return "OK"; case CS_ERR_LIBRARY: return "Library error"; case CS_ERR_VERSION: return "Version error"; case CS_ERR_INIT: return "Initialization error"; case CS_ERR_TIMEOUT: return "Timeout"; case CS_ERR_TRY_AGAIN: return "Try again"; case CS_ERR_INVALID_PARAM: return "Invalid parameter"; case CS_ERR_NO_MEMORY: return "No memory"; case CS_ERR_BAD_HANDLE: return "Bad handle"; case CS_ERR_BUSY: return "Busy"; case CS_ERR_ACCESS: return "Access error"; case CS_ERR_NOT_EXIST: return "Doesn't exist"; case CS_ERR_NAME_TOO_LONG: return "Name too long"; case CS_ERR_EXIST: return "Exists"; case CS_ERR_NO_SPACE: return "No space"; case CS_ERR_INTERRUPT: return "Interrupt"; case CS_ERR_NAME_NOT_FOUND: return "Name not found"; case CS_ERR_NO_RESOURCES: return "No resources"; case CS_ERR_NOT_SUPPORTED: return "Not supported"; case CS_ERR_BAD_OPERATION: return "Bad operation"; case CS_ERR_FAILED_OPERATION: return "Failed operation"; case CS_ERR_MESSAGE_ERROR: return "Message error"; case CS_ERR_QUEUE_FULL: return "Queue full"; case CS_ERR_QUEUE_NOT_AVAILABLE: return "Queue not available"; case CS_ERR_BAD_FLAGS: return "Bad flags"; case CS_ERR_TOO_BIG: return "Too big"; case CS_ERR_NO_SECTIONS: return "No sections"; } # endif return "Corosync error"; } # if SUPPORT_COROSYNC #if 0 /* This is the new way to do it, but we still support all Corosync 2 versions, * and this isn't always available. A better alternative here would be to check * for support in the configure script and enable this conditionally. */ #define pcmk__init_cmap(handle) cmap_initialize_map((handle), CMAP_MAP_ICMAP) #else #define pcmk__init_cmap(handle) cmap_initialize(handle) #endif char *pcmk__corosync_cluster_name(void); bool pcmk__corosync_add_nodes(xmlNode *xml_parent); void pcmk__cpg_confchg_cb(cpg_handle_t handle, const struct cpg_name *group_name, const struct cpg_address *member_list, size_t member_list_entries, const struct cpg_address *left_list, size_t left_list_entries, const struct cpg_address *joined_list, size_t joined_list_entries); char *pcmk__cpg_message_data(cpg_handle_t handle, uint32_t sender_id, uint32_t pid, void *content, const char **from); # endif const char *pcmk__cluster_node_uuid(pcmk__node_status_t *node); char *pcmk__cluster_node_name(uint32_t nodeid); const char *pcmk__cluster_local_node_name(void); const char *pcmk__node_name_from_uuid(const char *uuid); pcmk__node_status_t *crm_update_peer_proc(const char *source, pcmk__node_status_t *peer, uint32_t flag, const char *status); pcmk__node_status_t *pcmk__update_peer_state(const char *source, pcmk__node_status_t *node, const char *state, uint64_t membership); void pcmk__update_peer_expected(const char *source, pcmk__node_status_t *node, const char *expected); void pcmk__reap_unseen_nodes(uint64_t ring_id); void pcmk__corosync_quorum_connect(gboolean (*dispatch)(unsigned long long, gboolean), void (*destroy) (gpointer)); enum pcmk__cluster_msg pcmk__cluster_parse_msg_type(const char *text); bool pcmk__cluster_send_message(const pcmk__node_status_t *node, enum pcmk__cluster_msg service, const xmlNode *data); // Membership bool pcmk__cluster_has_quorum(void); void pcmk__cluster_init_node_caches(void); void pcmk__cluster_destroy_node_caches(void); void pcmk__cluster_set_autoreap(bool enable); void pcmk__cluster_set_status_callback(void (*dispatch)(enum pcmk__node_update, pcmk__node_status_t *, const void *)); bool pcmk__cluster_is_node_active(const pcmk__node_status_t *node); unsigned int pcmk__cluster_num_active_nodes(void); unsigned int pcmk__cluster_num_remote_nodes(void); pcmk__node_status_t *pcmk__cluster_lookup_remote_node(const char *node_name); void pcmk__cluster_forget_cluster_node(uint32_t id, const char *node_name); void pcmk__cluster_forget_remote_node(const char *node_name); pcmk__node_status_t *pcmk__search_node_caches(unsigned int id, const char *uname, uint32_t flags); void pcmk__purge_node_from_cache(const char *node_name, uint32_t node_id); void pcmk__refresh_node_caches_from_cib(xmlNode *cib); pcmk__node_status_t *pcmk__get_node(unsigned int id, const char *uname, const char *uuid, uint32_t flags); #ifdef __cplusplus } #endif #endif // PCMK__CRM_CLUSTER_INTERNAL__H diff --git a/lib/cluster/corosync.c b/lib/cluster/corosync.c index 71d95925e2..1c4a4df62a 100644 --- a/lib/cluster/corosync.c +++ b/lib/cluster/corosync.c @@ -1,816 +1,816 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include // PRIu64, etc. #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include // PCMK__SPECIAL_PID #include #include #include "crmcluster_private.h" static quorum_handle_t pcmk_quorum_handle = 0; static gboolean (*quorum_app_callback)(unsigned long long seq, gboolean quorate) = NULL; /*! * \internal * \brief Get the Corosync UUID associated with a Pacemaker node * * \param[in] node Pacemaker node * * \return Newly allocated string with node's Corosync UUID, or NULL if unknown * \note It is the caller's responsibility to free the result with free(). */ char * pcmk__corosync_uuid(const pcmk__node_status_t *node) { CRM_ASSERT(pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync); if (node != NULL) { if (node->cluster_layer_id > 0) { return crm_strdup_printf("%" PRIu32, node->cluster_layer_id); } else { crm_info("Node %s is not yet known by Corosync", node->name); } } return NULL; } static bool node_name_is_valid(const char *key, const char *name) { int octet; if (name == NULL) { crm_trace("%s is empty", key); return false; } else if (sscanf(name, "%d.%d.%d.%d", &octet, &octet, &octet, &octet) == 4) { crm_trace("%s contains an IPv4 address (%s), ignoring", key, name); return false; } else if (strstr(name, ":") != NULL) { crm_trace("%s contains an IPv6 address (%s), ignoring", key, name); return false; } crm_trace("'%s: %s' is valid", key, name); return true; } /* * \internal * \brief Get Corosync node name corresponding to a node ID * * \param[in] cmap_handle Connection to Corosync CMAP * \param[in] nodeid Node ID to check * * \return Newly allocated string with name or (if no name) IP address * associated with first address assigned to a Corosync node ID (or NULL * if unknown) * \note It is the caller's responsibility to free the result with free(). */ char * pcmk__corosync_name(uint64_t /*cmap_handle_t */ cmap_handle, uint32_t nodeid) { // Originally based on corosync-quorumtool.c:node_name() int lpc = 0; cs_error_t rc = CS_OK; int retries = 0; char *name = NULL; cmap_handle_t local_handle = 0; int fd = -1; uid_t found_uid = 0; gid_t found_gid = 0; pid_t found_pid = 0; int rv; if (nodeid == 0) { nodeid = pcmk__cpg_local_nodeid(0); } if (cmap_handle == 0 && local_handle == 0) { retries = 0; crm_trace("Initializing CMAP connection"); do { rc = pcmk__init_cmap(&local_handle); if (rc != CS_OK) { retries++; crm_debug("API connection setup failed: %s. Retrying in %ds", cs_strerror(rc), retries); sleep(retries); } } while (retries < 5 && rc != CS_OK); if (rc != CS_OK) { crm_warn("Could not connect to Cluster Configuration Database API, error %s", cs_strerror(rc)); local_handle = 0; } } if (cmap_handle == 0) { cmap_handle = local_handle; rc = cmap_fd_get(cmap_handle, &fd); if (rc != CS_OK) { crm_err("Could not obtain the CMAP API connection: %s (%d)", cs_strerror(rc), rc); goto bail; } /* CMAP provider run as root (in given user namespace, anyway)? */ if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid, &found_uid, &found_gid))) { crm_err("CMAP provider is not authentic:" " process %lld (uid: %lld, gid: %lld)", (long long) PCMK__SPECIAL_PID_AS_0(found_pid), (long long) found_uid, (long long) found_gid); goto bail; } else if (rv < 0) { crm_err("Could not verify authenticity of CMAP provider: %s (%d)", strerror(-rv), -rv); goto bail; } } while (name == NULL && cmap_handle != 0) { uint32_t id = 0; char *key = NULL; key = crm_strdup_printf("nodelist.node.%d.nodeid", lpc); rc = cmap_get_uint32(cmap_handle, key, &id); crm_trace("Checking %u vs %u from %s", nodeid, id, key); free(key); if (rc != CS_OK) { break; } if (nodeid == id) { crm_trace("Searching for node name for %u in nodelist.node.%d %s", nodeid, lpc, pcmk__s(name, "")); if (name == NULL) { key = crm_strdup_printf("nodelist.node.%d.name", lpc); cmap_get_string(cmap_handle, key, &name); crm_trace("%s = %s", key, pcmk__s(name, "")); free(key); } if (name == NULL) { key = crm_strdup_printf("nodelist.node.%d.ring0_addr", lpc); cmap_get_string(cmap_handle, key, &name); crm_trace("%s = %s", key, pcmk__s(name, "")); if (!node_name_is_valid(key, name)) { free(name); name = NULL; } free(key); } break; } lpc++; } bail: if(local_handle) { cmap_finalize(local_handle); } if (name == NULL) { crm_info("Unable to get node name for nodeid %u", nodeid); } return name; } /*! * \internal * \brief Disconnect from Corosync cluster * * \param[in,out] cluster Cluster object to disconnect */ void pcmk__corosync_disconnect(pcmk_cluster_t *cluster) { pcmk__cpg_disconnect(cluster); if (pcmk_quorum_handle != 0) { quorum_finalize(pcmk_quorum_handle); pcmk_quorum_handle = 0; } crm_notice("Disconnected from Corosync"); } /*! * \internal * \brief Dispatch function for quorum connection file descriptor * * \param[in] user_data Ignored * * \return 0 on success, -1 on error (per mainloop_io_t interface) */ static int quorum_dispatch_cb(gpointer user_data) { int rc = quorum_dispatch(pcmk_quorum_handle, CS_DISPATCH_ALL); if (rc < 0) { crm_err("Connection to the Quorum API failed: %d", rc); quorum_finalize(pcmk_quorum_handle); pcmk_quorum_handle = 0; return -1; } return 0; } /*! * \internal * \brief Notification callback for Corosync quorum connection * * \param[in] handle Corosync quorum connection * \param[in] quorate Whether cluster is quorate * \param[in] ring_id Corosync ring ID * \param[in] view_list_entries Number of entries in \p view_list * \param[in] view_list Corosync node IDs in membership */ static void quorum_notification_cb(quorum_handle_t handle, uint32_t quorate, uint64_t ring_id, uint32_t view_list_entries, uint32_t *view_list) { int i; GHashTableIter iter; pcmk__node_status_t *node = NULL; static gboolean init_phase = TRUE; bool is_quorate = (quorate != 0); bool was_quorate = pcmk__cluster_has_quorum(); if (is_quorate && !was_quorate) { crm_notice("Quorum acquired " QB_XS " membership=%" PRIu64 " members=%" PRIu32, ring_id, view_list_entries); pcmk__cluster_set_quorum(true); } else if (!is_quorate && was_quorate) { crm_warn("Quorum lost " QB_XS " membership=%" PRIu64 " members=" PRIu32, ring_id, view_list_entries); pcmk__cluster_set_quorum(false); } else { crm_info("Quorum %s " QB_XS " membership=%" PRIu64 " members=%" PRIu32, (is_quorate? "retained" : "still lost"), ring_id, view_list_entries); } if (view_list_entries == 0 && init_phase) { crm_info("Corosync membership is still forming, ignoring"); return; } init_phase = FALSE; /* Reset membership_id for all cached nodes so we can tell which ones aren't * in the view list */ g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { node->membership_id = 0; } /* Update the peer cache for each node in view list */ for (i = 0; i < view_list_entries; i++) { uint32_t id = view_list[i]; crm_debug("Member[%d] %u ", i, id); /* Get this node's peer cache entry (adding one if not already there) */ node = pcmk__get_node(id, NULL, NULL, pcmk__node_search_cluster_member); if (node->name == NULL) { char *name = pcmk__corosync_name(0, id); crm_info("Obtaining name for new node %u", id); node = pcmk__get_node(id, name, NULL, pcmk__node_search_cluster_member); free(name); } // Update the node state (including updating membership_id to ring_id) pcmk__update_peer_state(__func__, node, CRM_NODE_MEMBER, ring_id); } /* Remove any peer cache entries we didn't update */ pcmk__reap_unseen_nodes(ring_id); if (quorum_app_callback) { quorum_app_callback(ring_id, is_quorate); } } /*! * \internal * \brief Connect to Corosync quorum service * * \param[in] dispatch Connection dispatch callback * \param[in] destroy Connection destroy callback */ void pcmk__corosync_quorum_connect(gboolean (*dispatch)(unsigned long long, gboolean), void (*destroy)(gpointer)) { cs_error_t rc; int fd = 0; int quorate = 0; uint32_t quorum_type = 0; struct mainloop_fd_callbacks quorum_fd_callbacks; uid_t found_uid = 0; gid_t found_gid = 0; pid_t found_pid = 0; int rv; quorum_fd_callbacks.dispatch = quorum_dispatch_cb; quorum_fd_callbacks.destroy = destroy; crm_debug("Configuring Pacemaker to obtain quorum from Corosync"); { #if 0 // New way but not supported by all Corosync 2 versions quorum_model_v0_data_t quorum_model_data = { .model = QUORUM_MODEL_V0, .quorum_notify_fn = quorum_notification_cb, }; rc = quorum_model_initialize(&pcmk_quorum_handle, QUORUM_MODEL_V0, (quorum_model_data_t *) &quorum_model_data, &quorum_type, NULL); #else quorum_callbacks_t quorum_callbacks = { .quorum_notify_fn = quorum_notification_cb, }; rc = quorum_initialize(&pcmk_quorum_handle, &quorum_callbacks, &quorum_type); #endif } if (rc != CS_OK) { crm_err("Could not connect to the Quorum API: %s (%d)", cs_strerror(rc), rc); goto bail; } else if (quorum_type != QUORUM_SET) { crm_err("Corosync quorum is not configured"); goto bail; } rc = quorum_fd_get(pcmk_quorum_handle, &fd); if (rc != CS_OK) { crm_err("Could not obtain the Quorum API connection: %s (%d)", strerror(rc), rc); goto bail; } /* Quorum provider run as root (in given user namespace, anyway)? */ if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid, &found_uid, &found_gid))) { crm_err("Quorum provider is not authentic:" " process %lld (uid: %lld, gid: %lld)", (long long) PCMK__SPECIAL_PID_AS_0(found_pid), (long long) found_uid, (long long) found_gid); rc = CS_ERR_ACCESS; goto bail; } else if (rv < 0) { crm_err("Could not verify authenticity of Quorum provider: %s (%d)", strerror(-rv), -rv); rc = CS_ERR_ACCESS; goto bail; } rc = quorum_getquorate(pcmk_quorum_handle, &quorate); if (rc != CS_OK) { crm_err("Could not obtain the current Quorum API state: %d", rc); goto bail; } if (quorate) { crm_notice("Quorum acquired"); } else { crm_warn("No quorum"); } quorum_app_callback = dispatch; pcmk__cluster_set_quorum(quorate != 0); rc = quorum_trackstart(pcmk_quorum_handle, CS_TRACK_CHANGES | CS_TRACK_CURRENT); if (rc != CS_OK) { crm_err("Could not setup Quorum API notifications: %d", rc); goto bail; } mainloop_add_fd("quorum", G_PRIORITY_HIGH, fd, dispatch, &quorum_fd_callbacks); pcmk__corosync_add_nodes(NULL); bail: if (rc != CS_OK) { quorum_finalize(pcmk_quorum_handle); } } /*! * \internal * \brief Connect to Corosync cluster layer * * \param[in,out] cluster Initialized cluster object to connect * * \return Standard Pacemaker return code */ int pcmk__corosync_connect(pcmk_cluster_t *cluster) { const enum pcmk_cluster_layer cluster_layer = pcmk_get_cluster_layer(); const char *cluster_layer_s = pcmk_cluster_layer_text(cluster_layer); int rc = pcmk_rc_ok; pcmk__cluster_init_node_caches(); if (cluster_layer != pcmk_cluster_layer_corosync) { crm_err("Invalid cluster layer: %s " QB_XS " cluster_layer=%d", cluster_layer_s, cluster_layer); return EINVAL; } rc = pcmk__cpg_connect(cluster); if (rc != pcmk_rc_ok) { // Error message was logged by pcmk__cpg_connect() return rc; } crm_info("Connection to %s established", cluster_layer_s); - cluster->nodeid = pcmk__cpg_local_nodeid(0); - if (cluster->nodeid == 0) { + cluster->priv->node_id = pcmk__cpg_local_nodeid(0); + if (cluster->priv->node_id == 0) { crm_err("Could not determine local node ID"); return ENXIO; } cluster->priv->node_name = pcmk__cluster_node_name(0); if (cluster->priv->node_name == NULL) { crm_err("Could not determine local node name"); return ENXIO; } // Ensure local node always exists in peer cache - pcmk__get_node(cluster->nodeid, cluster->priv->node_name, NULL, + pcmk__get_node(cluster->priv->node_id, cluster->priv->node_name, NULL, pcmk__node_search_cluster_member); return pcmk_rc_ok; } /*! * \internal * \brief Check whether a Corosync cluster is active * * \return \c true if Corosync is found active, or \c false otherwise */ bool pcmk__corosync_is_active(void) { cmap_handle_t handle; int rc = pcmk__init_cmap(&handle); if (rc == CS_OK) { cmap_finalize(handle); return true; } crm_info("Failed to initialize the cmap API: %s (%d)", pcmk__cs_err_str(rc), rc); return false; } /*! * \internal * \brief Check whether a Corosync cluster peer is active * * \param[in] node Node to check * * \return \c true if \p node is an active Corosync peer, or \c false otherwise */ bool pcmk__corosync_is_peer_active(const pcmk__node_status_t *node) { if (node == NULL) { crm_trace("Corosync peer inactive: NULL"); return false; } if (!pcmk__str_eq(node->state, CRM_NODE_MEMBER, pcmk__str_none)) { crm_trace("Corosync peer %s inactive: state=%s", node->name, node->state); return false; } if (!pcmk_is_set(node->processes, crm_proc_cpg)) { crm_trace("Corosync peer %s inactive " QB_XS " processes=%.16" PRIx32, node->name, node->processes); return false; } return true; } /*! * \internal * \brief Load Corosync node list (via CMAP) into peer cache and optionally XML * * \param[in,out] xml_parent If not NULL, add entry here for each node * * \return true if any nodes were found, false otherwise */ bool pcmk__corosync_add_nodes(xmlNode *xml_parent) { int lpc = 0; cs_error_t rc = CS_OK; int retries = 0; bool any = false; cmap_handle_t cmap_handle; int fd = -1; uid_t found_uid = 0; gid_t found_gid = 0; pid_t found_pid = 0; int rv; do { rc = pcmk__init_cmap(&cmap_handle); if (rc != CS_OK) { retries++; crm_debug("API connection setup failed: %s. Retrying in %ds", cs_strerror(rc), retries); sleep(retries); } } while (retries < 5 && rc != CS_OK); if (rc != CS_OK) { crm_warn("Could not connect to Cluster Configuration Database API, error %d", rc); return false; } rc = cmap_fd_get(cmap_handle, &fd); if (rc != CS_OK) { crm_err("Could not obtain the CMAP API connection: %s (%d)", cs_strerror(rc), rc); goto bail; } /* CMAP provider run as root (in given user namespace, anyway)? */ if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid, &found_uid, &found_gid))) { crm_err("CMAP provider is not authentic:" " process %lld (uid: %lld, gid: %lld)", (long long) PCMK__SPECIAL_PID_AS_0(found_pid), (long long) found_uid, (long long) found_gid); goto bail; } else if (rv < 0) { crm_err("Could not verify authenticity of CMAP provider: %s (%d)", strerror(-rv), -rv); goto bail; } pcmk__cluster_init_node_caches(); crm_trace("Initializing Corosync node list"); for (lpc = 0; TRUE; lpc++) { uint32_t nodeid = 0; char *name = NULL; char *key = NULL; key = crm_strdup_printf("nodelist.node.%d.nodeid", lpc); rc = cmap_get_uint32(cmap_handle, key, &nodeid); free(key); if (rc != CS_OK) { break; } name = pcmk__corosync_name(cmap_handle, nodeid); if (name != NULL) { GHashTableIter iter; pcmk__node_status_t *node = NULL; g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { if ((node != NULL) && (node->cluster_layer_id > 0) && (node->cluster_layer_id != nodeid) && pcmk__str_eq(node->name, name, pcmk__str_casei)) { crm_crit("Nodes %" PRIu32 " and %" PRIu32 " share the " "same name '%s': shutting down", node->cluster_layer_id, nodeid, name); crm_exit(CRM_EX_FATAL); } } } if (nodeid > 0 || name != NULL) { crm_trace("Initializing node[%d] %u = %s", lpc, nodeid, name); pcmk__get_node(nodeid, name, NULL, pcmk__node_search_cluster_member); } if (nodeid > 0 && name != NULL) { any = true; if (xml_parent) { xmlNode *node = pcmk__xe_create(xml_parent, PCMK_XE_NODE); pcmk__xe_set_id(node, "%u", nodeid); crm_xml_add(node, PCMK_XA_UNAME, name); } } free(name); } bail: cmap_finalize(cmap_handle); return any; } /*! * \internal * \brief Get cluster name from Corosync configuration (via CMAP) * * \return Newly allocated string with cluster name if configured, or NULL */ char * pcmk__corosync_cluster_name(void) { cmap_handle_t handle; char *cluster_name = NULL; cs_error_t rc = CS_OK; int fd = -1; uid_t found_uid = 0; gid_t found_gid = 0; pid_t found_pid = 0; int rv; rc = pcmk__init_cmap(&handle); if (rc != CS_OK) { crm_info("Failed to initialize the cmap API: %s (%d)", cs_strerror(rc), rc); return NULL; } rc = cmap_fd_get(handle, &fd); if (rc != CS_OK) { crm_err("Could not obtain the CMAP API connection: %s (%d)", cs_strerror(rc), rc); goto bail; } /* CMAP provider run as root (in given user namespace, anyway)? */ if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid, &found_uid, &found_gid))) { crm_err("CMAP provider is not authentic:" " process %lld (uid: %lld, gid: %lld)", (long long) PCMK__SPECIAL_PID_AS_0(found_pid), (long long) found_uid, (long long) found_gid); goto bail; } else if (rv < 0) { crm_err("Could not verify authenticity of CMAP provider: %s (%d)", strerror(-rv), -rv); goto bail; } rc = cmap_get_string(handle, "totem.cluster_name", &cluster_name); if (rc != CS_OK) { crm_info("Cannot get totem.cluster_name: %s (%d)", cs_strerror(rc), rc); } else { crm_debug("cmap totem.cluster_name = '%s'", cluster_name); } bail: cmap_finalize(handle); return cluster_name; } /*! * \internal * \brief Check (via CMAP) whether Corosync configuration has a node list * * \return true if Corosync has node list, otherwise false */ bool pcmk__corosync_has_nodelist(void) { cs_error_t cs_rc = CS_OK; int retries = 0; cmap_handle_t cmap_handle; cmap_iter_handle_t iter_handle; char key_name[CMAP_KEYNAME_MAXLEN + 1]; int fd = -1; uid_t found_uid = 0; gid_t found_gid = 0; pid_t found_pid = 0; int rc = pcmk_ok; static bool got_result = false; static bool result = false; if (got_result) { return result; } // Connect to CMAP do { cs_rc = pcmk__init_cmap(&cmap_handle); if (cs_rc != CS_OK) { retries++; crm_debug("CMAP connection failed: %s (rc=%d, retrying in %ds)", cs_strerror(cs_rc), cs_rc, retries); sleep(retries); } } while ((retries < 5) && (cs_rc != CS_OK)); if (cs_rc != CS_OK) { crm_warn("Assuming Corosync does not have node list: " "CMAP connection failed (%s) " QB_XS " rc=%d", cs_strerror(cs_rc), cs_rc); return false; } // Get CMAP connection file descriptor cs_rc = cmap_fd_get(cmap_handle, &fd); if (cs_rc != CS_OK) { crm_warn("Assuming Corosync does not have node list: " "CMAP unusable (%s) " QB_XS " rc=%d", cs_strerror(cs_rc), cs_rc); goto bail; } // Check whether CMAP connection is authentic (i.e. provided by root) rc = crm_ipc_is_authentic_process(fd, (uid_t) 0, (gid_t) 0, &found_pid, &found_uid, &found_gid); if (rc == 0) { crm_warn("Assuming Corosync does not have node list: " "CMAP provider is inauthentic " QB_XS " pid=%lld uid=%lld gid=%lld", (long long) PCMK__SPECIAL_PID_AS_0(found_pid), (long long) found_uid, (long long) found_gid); goto bail; } else if (rc < 0) { crm_warn("Assuming Corosync does not have node list: " "Could not verify CMAP authenticity (%s) " QB_XS " rc=%d", pcmk_strerror(rc), rc); goto bail; } // Check whether nodelist section is presetn cs_rc = cmap_iter_init(cmap_handle, "nodelist", &iter_handle); if (cs_rc != CS_OK) { crm_warn("Assuming Corosync does not have node list: " "CMAP not readable (%s) " QB_XS " rc=%d", cs_strerror(cs_rc), cs_rc); goto bail; } cs_rc = cmap_iter_next(cmap_handle, iter_handle, key_name, NULL, NULL); if (cs_rc == CS_OK) { result = true; } cmap_iter_finalize(cmap_handle, iter_handle); got_result = true; crm_debug("Corosync %s node list", (result? "has" : "does not have")); bail: cmap_finalize(cmap_handle); return result; } diff --git a/lib/cluster/cpg.c b/lib/cluster/cpg.c index 2bda2a1b37..87f43bae97 100644 --- a/lib/cluster/cpg.c +++ b/lib/cluster/cpg.c @@ -1,1058 +1,1058 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include // PRIu32 #include #include #include #include // uint32_t #include #include // size_t #include #include #include #include #include #include #include #include #include #include #include #include // PCMK__SPECIAL_PID #include #include #include "crmcluster_private.h" /* @TODO Once we can update the public API to require pcmk_cluster_t* in more * functions, we can ditch this in favor of cluster->cpg_handle. */ static cpg_handle_t pcmk_cpg_handle = 0; // @TODO These could be moved to pcmk_cluster_t* at that time as well static bool cpg_evicted = false; static GList *cs_message_queue = NULL; static int cs_message_timer = 0; struct pcmk__cpg_host_s { uint32_t id; uint32_t pid; enum pcmk__cluster_msg type; uint32_t size; char uname[MAX_NAME]; } __attribute__ ((packed)); typedef struct pcmk__cpg_host_s pcmk__cpg_host_t; struct pcmk__cpg_msg_s { struct qb_ipc_response_header header __attribute__ ((aligned(8))); uint32_t id; gboolean is_compressed; pcmk__cpg_host_t host; pcmk__cpg_host_t sender; uint32_t size; uint32_t compressed_size; /* 584 bytes */ char data[0]; } __attribute__ ((packed)); typedef struct pcmk__cpg_msg_s pcmk__cpg_msg_t; static void crm_cs_flush(gpointer data); #define msg_data_len(msg) (msg->is_compressed?msg->compressed_size:msg->size) #define cs_repeat(rc, counter, max, code) do { \ rc = code; \ if ((rc == CS_ERR_TRY_AGAIN) || (rc == CS_ERR_QUEUE_FULL)) { \ counter++; \ crm_debug("Retrying operation after %ds", counter); \ sleep(counter); \ } else { \ break; \ } \ } while (counter < max) /*! * \internal * \brief Get the local Corosync node ID (via CPG) * * \param[in] handle CPG connection to use (or 0 to use new connection) * * \return Corosync ID of local node (or 0 if not known) */ uint32_t pcmk__cpg_local_nodeid(cpg_handle_t handle) { cs_error_t rc = CS_OK; int retries = 0; static uint32_t local_nodeid = 0; cpg_handle_t local_handle = handle; cpg_model_v1_data_t cpg_model_info = {CPG_MODEL_V1, NULL, NULL, NULL, 0}; int fd = -1; uid_t found_uid = 0; gid_t found_gid = 0; pid_t found_pid = 0; int rv = 0; if (local_nodeid != 0) { return local_nodeid; } if (handle == 0) { crm_trace("Creating connection"); cs_repeat(rc, retries, 5, cpg_model_initialize(&local_handle, CPG_MODEL_V1, (cpg_model_data_t *) &cpg_model_info, NULL)); if (rc != CS_OK) { crm_err("Could not connect to the CPG API: %s (%d)", cs_strerror(rc), rc); return 0; } rc = cpg_fd_get(local_handle, &fd); if (rc != CS_OK) { crm_err("Could not obtain the CPG API connection: %s (%d)", cs_strerror(rc), rc); goto bail; } // CPG provider run as root (at least in given user namespace)? rv = crm_ipc_is_authentic_process(fd, (uid_t) 0, (gid_t) 0, &found_pid, &found_uid, &found_gid); if (rv == 0) { crm_err("CPG provider is not authentic:" " process %lld (uid: %lld, gid: %lld)", (long long) PCMK__SPECIAL_PID_AS_0(found_pid), (long long) found_uid, (long long) found_gid); goto bail; } else if (rv < 0) { crm_err("Could not verify authenticity of CPG provider: %s (%d)", strerror(-rv), -rv); goto bail; } } if (rc == CS_OK) { retries = 0; crm_trace("Performing lookup"); cs_repeat(rc, retries, 5, cpg_local_get(local_handle, &local_nodeid)); } if (rc != CS_OK) { crm_err("Could not get local node id from the CPG API: %s (%d)", pcmk__cs_err_str(rc), rc); } bail: if (handle == 0) { crm_trace("Closing connection"); cpg_finalize(local_handle); } crm_debug("Local nodeid is %u", local_nodeid); return local_nodeid; } /*! * \internal * \brief Callback function for Corosync message queue timer * * \param[in] data CPG handle * * \return FALSE (to indicate to glib that timer should not be removed) */ static gboolean crm_cs_flush_cb(gpointer data) { cs_message_timer = 0; crm_cs_flush(data); return FALSE; } // Send no more than this many CPG messages in one flush #define CS_SEND_MAX 200 /*! * \internal * \brief Send messages in Corosync CPG message queue * * \param[in] data CPG handle */ static void crm_cs_flush(gpointer data) { unsigned int sent = 0; guint queue_len = 0; cs_error_t rc = 0; cpg_handle_t *handle = (cpg_handle_t *) data; if (*handle == 0) { crm_trace("Connection is dead"); return; } queue_len = g_list_length(cs_message_queue); if (((queue_len % 1000) == 0) && (queue_len > 1)) { crm_err("CPG queue has grown to %d", queue_len); } else if (queue_len == CS_SEND_MAX) { crm_warn("CPG queue has grown to %d", queue_len); } if (cs_message_timer != 0) { /* There is already a timer, wait until it goes off */ crm_trace("Timer active %d", cs_message_timer); return; } while ((cs_message_queue != NULL) && (sent < CS_SEND_MAX)) { struct iovec *iov = cs_message_queue->data; rc = cpg_mcast_joined(*handle, CPG_TYPE_AGREED, iov, 1); if (rc != CS_OK) { break; } sent++; crm_trace("CPG message sent, size=%llu", (unsigned long long) iov->iov_len); cs_message_queue = g_list_remove(cs_message_queue, iov); free(iov->iov_base); free(iov); } queue_len -= sent; do_crm_log((queue_len > 5)? LOG_INFO : LOG_TRACE, "Sent %u CPG message%s (%d still queued): %s (rc=%d)", sent, pcmk__plural_s(sent), queue_len, pcmk__cs_err_str(rc), (int) rc); if (cs_message_queue) { uint32_t delay_ms = 100; if (rc != CS_OK) { /* Proportionally more if sending failed but cap at 1s */ delay_ms = QB_MIN(1000, CS_SEND_MAX + (10 * queue_len)); } cs_message_timer = g_timeout_add(delay_ms, crm_cs_flush_cb, data); } } /*! * \internal * \brief Dispatch function for CPG handle * * \param[in,out] user_data Cluster object * * \return 0 on success, -1 on error (per mainloop_io_t interface) */ static int pcmk_cpg_dispatch(gpointer user_data) { cs_error_t rc = CS_OK; pcmk_cluster_t *cluster = (pcmk_cluster_t *) user_data; rc = cpg_dispatch(cluster->cpg_handle, CS_DISPATCH_ONE); if (rc != CS_OK) { crm_err("Connection to the CPG API failed: %s (%d)", pcmk__cs_err_str(rc), rc); cpg_finalize(cluster->cpg_handle); cluster->cpg_handle = 0; return -1; } else if (cpg_evicted) { crm_err("Evicted from CPG membership"); return -1; } return 0; } static inline const char * ais_dest(const pcmk__cpg_host_t *host) { return (host->size > 0)? host->uname : ""; } static inline const char * msg_type2text(enum pcmk__cluster_msg type) { switch (type) { case pcmk__cluster_msg_attrd: return "attrd"; case pcmk__cluster_msg_based: return "cib"; case pcmk__cluster_msg_controld: return "crmd"; case pcmk__cluster_msg_execd: return "lrmd"; case pcmk__cluster_msg_fenced: return "stonith-ng"; default: return "unknown"; } } /*! * \internal * \brief Check whether a Corosync CPG message is valid * * \param[in] msg Corosync CPG message to check * * \return true if \p msg is valid, otherwise false */ static bool check_message_sanity(const pcmk__cpg_msg_t *msg) { int32_t payload_size = msg->header.size - sizeof(pcmk__cpg_msg_t); if (payload_size < 1) { crm_err("%sCPG message %d from %s invalid: " "Claimed size of %d bytes is too small " QB_XS " from %s[%u] to %s@%s", (msg->is_compressed? "Compressed " : ""), msg->id, ais_dest(&(msg->sender)), (int) msg->header.size, msg_type2text(msg->sender.type), msg->sender.pid, msg_type2text(msg->host.type), ais_dest(&(msg->host))); return false; } if (msg->header.error != CS_OK) { crm_err("%sCPG message %d from %s invalid: " "Sender indicated error %d " QB_XS " from %s[%u] to %s@%s", (msg->is_compressed? "Compressed " : ""), msg->id, ais_dest(&(msg->sender)), msg->header.error, msg_type2text(msg->sender.type), msg->sender.pid, msg_type2text(msg->host.type), ais_dest(&(msg->host))); return false; } if (msg_data_len(msg) != payload_size) { crm_err("%sCPG message %d from %s invalid: " "Total size %d inconsistent with payload size %d " QB_XS " from %s[%u] to %s@%s", (msg->is_compressed? "Compressed " : ""), msg->id, ais_dest(&(msg->sender)), (int) msg->header.size, (int) msg_data_len(msg), msg_type2text(msg->sender.type), msg->sender.pid, msg_type2text(msg->host.type), ais_dest(&(msg->host))); return false; } if (!msg->is_compressed && /* msg->size != (strlen(msg->data) + 1) would be a stronger check, * but checking the last byte or two should be quick */ (((msg->size > 1) && (msg->data[msg->size - 2] == '\0')) || (msg->data[msg->size - 1] != '\0'))) { crm_err("CPG message %d from %s invalid: " "Payload does not end at byte %llu " QB_XS " from %s[%u] to %s@%s", msg->id, ais_dest(&(msg->sender)), (unsigned long long) msg->size, msg_type2text(msg->sender.type), msg->sender.pid, msg_type2text(msg->host.type), ais_dest(&(msg->host))); return false; } crm_trace("Verified %d-byte %sCPG message %d from %s[%u]@%s to %s@%s", (int) msg->header.size, (msg->is_compressed? "compressed " : ""), msg->id, msg_type2text(msg->sender.type), msg->sender.pid, ais_dest(&(msg->sender)), msg_type2text(msg->host.type), ais_dest(&(msg->host))); return true; } /*! * \internal * \brief Extract text data from a Corosync CPG message * * \param[in] handle CPG connection (to get local node ID if not known) * \param[in] sender_id Corosync ID of node that sent message * \param[in] pid Process ID of message sender (for logging only) * \param[in,out] content CPG message * \param[out] from If not \c NULL, will be set to sender uname * (valid for the lifetime of \p content) * * \return Newly allocated string with message data * * \note The caller is responsible for freeing the return value using \c free(). */ char * pcmk__cpg_message_data(cpg_handle_t handle, uint32_t sender_id, uint32_t pid, void *content, const char **from) { char *data = NULL; pcmk__cpg_msg_t *msg = content; if (handle != 0) { // Do filtering and field massaging uint32_t local_nodeid = pcmk__cpg_local_nodeid(handle); const char *local_name = pcmk__cluster_local_node_name(); if ((msg->sender.id != 0) && (msg->sender.id != sender_id)) { crm_err("Nodeid mismatch from %" PRIu32 ".%" PRIu32 ": claimed nodeid=%" PRIu32, sender_id, pid, msg->sender.id); return NULL; } if ((msg->host.id != 0) && (local_nodeid != msg->host.id)) { crm_trace("Not for us: %" PRIu32" != %" PRIu32, msg->host.id, local_nodeid); return NULL; } if ((msg->host.size > 0) && !pcmk__str_eq(msg->host.uname, local_name, pcmk__str_casei)) { crm_trace("Not for us: %s != %s", msg->host.uname, local_name); return NULL; } msg->sender.id = sender_id; if (msg->sender.size == 0) { const pcmk__node_status_t *peer = pcmk__get_node(sender_id, NULL, NULL, pcmk__node_search_cluster_member); if (peer->name == NULL) { crm_err("No node name for peer with nodeid=%u", sender_id); } else { crm_notice("Fixing node name for peer with nodeid=%u", sender_id); msg->sender.size = strlen(peer->name); memset(msg->sender.uname, 0, MAX_NAME); memcpy(msg->sender.uname, peer->name, msg->sender.size); } } } crm_trace("Got new%s message (size=%d, %d, %d)", msg->is_compressed ? " compressed" : "", msg_data_len(msg), msg->size, msg->compressed_size); if (from != NULL) { *from = msg->sender.uname; } if (msg->is_compressed && (msg->size > 0)) { int rc = BZ_OK; char *uncompressed = NULL; unsigned int new_size = msg->size + 1; if (!check_message_sanity(msg)) { goto badmsg; } crm_trace("Decompressing message data"); uncompressed = pcmk__assert_alloc(1, new_size); rc = BZ2_bzBuffToBuffDecompress(uncompressed, &new_size, msg->data, msg->compressed_size, 1, 0); rc = pcmk__bzlib2rc(rc); if (rc != pcmk_rc_ok) { crm_err("Decompression failed: %s " QB_XS " rc=%d", pcmk_rc_str(rc), rc); free(uncompressed); goto badmsg; } CRM_ASSERT(new_size == msg->size); data = uncompressed; } else if (!check_message_sanity(msg)) { goto badmsg; } else { data = strdup(msg->data); } // Is this necessary? pcmk__get_node(msg->sender.id, msg->sender.uname, NULL, pcmk__node_search_cluster_member); crm_trace("Payload: %.200s", data); return data; badmsg: crm_err("Invalid message (id=%d, dest=%s:%s, from=%s:%s.%d):" " min=%d, total=%d, size=%d, bz2_size=%d", msg->id, ais_dest(&(msg->host)), msg_type2text(msg->host.type), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, (int)sizeof(pcmk__cpg_msg_t), msg->header.size, msg->size, msg->compressed_size); free(data); return NULL; } /*! * \internal * \brief Compare cpg_address objects by node ID * * \param[in] first First cpg_address structure to compare * \param[in] second Second cpg_address structure to compare * * \return Negative number if first's node ID is lower, * positive number if first's node ID is greater, * or 0 if both node IDs are equal */ static int cmp_member_list_nodeid(const void *first, const void *second) { const struct cpg_address *const a = *((const struct cpg_address **) first), *const b = *((const struct cpg_address **) second); if (a->nodeid < b->nodeid) { return -1; } else if (a->nodeid > b->nodeid) { return 1; } /* don't bother with "reason" nor "pid" */ return 0; } /*! * \internal * \brief Get a readable string equivalent of a cpg_reason_t value * * \param[in] reason CPG reason value * * \return Readable string suitable for logging */ static const char * cpgreason2str(cpg_reason_t reason) { switch (reason) { case CPG_REASON_JOIN: return " via cpg_join"; case CPG_REASON_LEAVE: return " via cpg_leave"; case CPG_REASON_NODEDOWN: return " via cluster exit"; case CPG_REASON_NODEUP: return " via cluster join"; case CPG_REASON_PROCDOWN: return " for unknown reason"; default: break; } return ""; } /*! * \internal * \brief Get a log-friendly node name * * \param[in] peer Node to check * * \return Node's uname, or readable string if not known */ static inline const char * peer_name(const pcmk__node_status_t *peer) { return (peer != NULL)? pcmk__s(peer->name, "peer node") : "unknown node"; } /*! * \internal * \brief Process a CPG peer's leaving the cluster * * \param[in] cpg_group_name CPG group name (for logging) * \param[in] event_counter Event number (for logging) * \param[in] local_nodeid Node ID of local node * \param[in] cpg_peer CPG peer that left * \param[in] sorted_member_list List of remaining members, qsort()-ed by ID * \param[in] member_list_entries Number of entries in \p sorted_member_list */ static void node_left(const char *cpg_group_name, int event_counter, uint32_t local_nodeid, const struct cpg_address *cpg_peer, const struct cpg_address **sorted_member_list, size_t member_list_entries) { pcmk__node_status_t *peer = pcmk__search_node_caches(cpg_peer->nodeid, NULL, pcmk__node_search_cluster_member); const struct cpg_address **rival = NULL; /* Most CPG-related Pacemaker code assumes that only one process on a node * can be in the process group, but Corosync does not impose this * limitation, and more than one can be a member in practice due to a * daemon attempting to start while another instance is already running. * * Check for any such duplicate instances, because we don't want to process * their leaving as if our actual peer left. If the peer that left still has * an entry in sorted_member_list (with a different PID), we will ignore the * leaving. * * @TODO Track CPG members' PIDs so we can tell exactly who left. */ if (peer != NULL) { rival = bsearch(&cpg_peer, sorted_member_list, member_list_entries, sizeof(const struct cpg_address *), cmp_member_list_nodeid); } if (rival == NULL) { crm_info("Group %s event %d: %s (node %u pid %u) left%s", cpg_group_name, event_counter, peer_name(peer), cpg_peer->nodeid, cpg_peer->pid, cpgreason2str(cpg_peer->reason)); if (peer != NULL) { crm_update_peer_proc(__func__, peer, crm_proc_cpg, PCMK_VALUE_OFFLINE); } } else if (cpg_peer->nodeid == local_nodeid) { crm_warn("Group %s event %d: duplicate local pid %u left%s", cpg_group_name, event_counter, cpg_peer->pid, cpgreason2str(cpg_peer->reason)); } else { crm_warn("Group %s event %d: " "%s (node %u) duplicate pid %u left%s (%u remains)", cpg_group_name, event_counter, peer_name(peer), cpg_peer->nodeid, cpg_peer->pid, cpgreason2str(cpg_peer->reason), (*rival)->pid); } } /*! * \internal * \brief Handle a CPG configuration change event * * \param[in] handle CPG connection * \param[in] group_name CPG group name * \param[in] member_list List of current CPG members * \param[in] member_list_entries Number of entries in \p member_list * \param[in] left_list List of CPG members that left * \param[in] left_list_entries Number of entries in \p left_list * \param[in] joined_list List of CPG members that joined * \param[in] joined_list_entries Number of entries in \p joined_list * * \note This is of type \c cpg_confchg_fn_t, intended to be used in a * \c cpg_callbacks_t object. */ void pcmk__cpg_confchg_cb(cpg_handle_t handle, const struct cpg_name *group_name, const struct cpg_address *member_list, size_t member_list_entries, const struct cpg_address *left_list, size_t left_list_entries, const struct cpg_address *joined_list, size_t joined_list_entries) { static int counter = 0; bool found = false; uint32_t local_nodeid = pcmk__cpg_local_nodeid(handle); const struct cpg_address **sorted = NULL; sorted = pcmk__assert_alloc(member_list_entries, sizeof(const struct cpg_address *)); for (size_t iter = 0; iter < member_list_entries; iter++) { sorted[iter] = member_list + iter; } // So that the cross-matching of multiply-subscribed nodes is then cheap qsort(sorted, member_list_entries, sizeof(const struct cpg_address *), cmp_member_list_nodeid); for (int i = 0; i < left_list_entries; i++) { node_left(group_name->value, counter, local_nodeid, &left_list[i], sorted, member_list_entries); } free(sorted); sorted = NULL; for (int i = 0; i < joined_list_entries; i++) { crm_info("Group %s event %d: node %u pid %u joined%s", group_name->value, counter, joined_list[i].nodeid, joined_list[i].pid, cpgreason2str(joined_list[i].reason)); } for (int i = 0; i < member_list_entries; i++) { pcmk__node_status_t *peer = pcmk__get_node(member_list[i].nodeid, NULL, NULL, pcmk__node_search_cluster_member); if (member_list[i].nodeid == local_nodeid && member_list[i].pid != getpid()) { // See the note in node_left() crm_warn("Group %s event %d: detected duplicate local pid %u", group_name->value, counter, member_list[i].pid); continue; } crm_info("Group %s event %d: %s (node %u pid %u) is member", group_name->value, counter, peer_name(peer), member_list[i].nodeid, member_list[i].pid); /* If the caller left auto-reaping enabled, this will also update the * state to member. */ peer = crm_update_peer_proc(__func__, peer, crm_proc_cpg, PCMK_VALUE_ONLINE); if (peer && peer->state && strcmp(peer->state, CRM_NODE_MEMBER)) { /* The node is a CPG member, but we currently think it's not a * cluster member. This is possible only if auto-reaping was * disabled. The node may be joining, and we happened to get the CPG * notification before the quorum notification; or the node may have * just died, and we are processing its final messages; or a bug * has affected the peer cache. */ time_t now = time(NULL); if (peer->when_lost == 0) { // Track when we first got into this contradictory state peer->when_lost = now; } else if (now > (peer->when_lost + 60)) { // If it persists for more than a minute, update the state crm_warn("Node %u is member of group %s but was believed " "offline", member_list[i].nodeid, group_name->value); pcmk__update_peer_state(__func__, peer, CRM_NODE_MEMBER, 0); } } if (local_nodeid == member_list[i].nodeid) { found = true; } } if (!found) { crm_err("Local node was evicted from group %s", group_name->value); cpg_evicted = true; } counter++; } /*! * \brief Set the CPG deliver callback function for a cluster object * * \param[in,out] cluster Cluster object * \param[in] fn Deliver callback function to set * * \return Standard Pacemaker return code */ int pcmk_cpg_set_deliver_fn(pcmk_cluster_t *cluster, cpg_deliver_fn_t fn) { if (cluster == NULL) { return EINVAL; } cluster->cpg.cpg_deliver_fn = fn; return pcmk_rc_ok; } /*! * \brief Set the CPG config change callback function for a cluster object * * \param[in,out] cluster Cluster object * \param[in] fn Configuration change callback function to set * * \return Standard Pacemaker return code */ int pcmk_cpg_set_confchg_fn(pcmk_cluster_t *cluster, cpg_confchg_fn_t fn) { if (cluster == NULL) { return EINVAL; } cluster->cpg.cpg_confchg_fn = fn; return pcmk_rc_ok; } /*! * \brief Connect to Corosync CPG * * \param[in,out] cluster Initialized cluster object to connect * * \return Standard Pacemaker return code */ int pcmk__cpg_connect(pcmk_cluster_t *cluster) { cs_error_t rc; int fd = -1; int retries = 0; uint32_t id = 0; pcmk__node_status_t *peer = NULL; cpg_handle_t handle = 0; const char *message_name = pcmk__message_name(crm_system_name); uid_t found_uid = 0; gid_t found_gid = 0; pid_t found_pid = 0; int rv; struct mainloop_fd_callbacks cpg_fd_callbacks = { .dispatch = pcmk_cpg_dispatch, .destroy = cluster->destroy, }; cpg_model_v1_data_t cpg_model_info = { .model = CPG_MODEL_V1, .cpg_deliver_fn = cluster->cpg.cpg_deliver_fn, .cpg_confchg_fn = cluster->cpg.cpg_confchg_fn, .cpg_totem_confchg_fn = NULL, .flags = 0, }; cpg_evicted = false; cluster->group.length = 0; cluster->group.value[0] = 0; /* group.value is char[128] */ strncpy(cluster->group.value, message_name, 127); cluster->group.value[127] = 0; cluster->group.length = 1 + QB_MIN(127, strlen(cluster->group.value)); cs_repeat(rc, retries, 30, cpg_model_initialize(&handle, CPG_MODEL_V1, (cpg_model_data_t *)&cpg_model_info, NULL)); if (rc != CS_OK) { crm_err("Could not connect to the CPG API: %s (%d)", cs_strerror(rc), rc); goto bail; } rc = cpg_fd_get(handle, &fd); if (rc != CS_OK) { crm_err("Could not obtain the CPG API connection: %s (%d)", cs_strerror(rc), rc); goto bail; } /* CPG provider run as root (in given user namespace, anyway)? */ if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid, &found_uid, &found_gid))) { crm_err("CPG provider is not authentic:" " process %lld (uid: %lld, gid: %lld)", (long long) PCMK__SPECIAL_PID_AS_0(found_pid), (long long) found_uid, (long long) found_gid); rc = CS_ERR_ACCESS; goto bail; } else if (rv < 0) { crm_err("Could not verify authenticity of CPG provider: %s (%d)", strerror(-rv), -rv); rc = CS_ERR_ACCESS; goto bail; } id = pcmk__cpg_local_nodeid(handle); if (id == 0) { crm_err("Could not get local node id from the CPG API"); goto bail; } - cluster->nodeid = id; + cluster->priv->node_id = id; retries = 0; cs_repeat(rc, retries, 30, cpg_join(handle, &cluster->group)); if (rc != CS_OK) { crm_err("Could not join the CPG group '%s': %d", message_name, rc); goto bail; } pcmk_cpg_handle = handle; cluster->cpg_handle = handle; mainloop_add_fd("corosync-cpg", G_PRIORITY_MEDIUM, fd, cluster, &cpg_fd_callbacks); bail: if (rc != CS_OK) { cpg_finalize(handle); // @TODO Map rc to more specific Pacemaker return code return ENOTCONN; } peer = pcmk__get_node(id, NULL, NULL, pcmk__node_search_cluster_member); crm_update_peer_proc(__func__, peer, crm_proc_cpg, PCMK_VALUE_ONLINE); return pcmk_rc_ok; } /*! * \internal * \brief Disconnect from Corosync CPG * * \param[in,out] cluster Cluster object to disconnect */ void pcmk__cpg_disconnect(pcmk_cluster_t *cluster) { pcmk_cpg_handle = 0; if (cluster->cpg_handle != 0) { crm_trace("Disconnecting CPG"); cpg_leave(cluster->cpg_handle, &cluster->group); cpg_finalize(cluster->cpg_handle); cluster->cpg_handle = 0; } else { crm_info("No CPG connection"); } } /*! * \internal * \brief Send string data via Corosync CPG * * \param[in] data Data to send * \param[in] node Cluster node to send message to * \param[in] dest Type of message to send * * \return \c true on success, or \c false otherwise */ static bool send_cpg_text(const char *data, const pcmk__node_status_t *node, enum pcmk__cluster_msg dest) { static int msg_id = 0; static int local_pid = 0; static int local_name_len = 0; static const char *local_name = NULL; char *target = NULL; struct iovec *iov; pcmk__cpg_msg_t *msg = NULL; if (local_name == NULL) { local_name = pcmk__cluster_local_node_name(); } if ((local_name_len == 0) && (local_name != NULL)) { local_name_len = strlen(local_name); } if (data == NULL) { data = ""; } if (local_pid == 0) { local_pid = getpid(); } msg = pcmk__assert_alloc(1, sizeof(pcmk__cpg_msg_t)); msg_id++; msg->id = msg_id; msg->header.error = CS_OK; msg->host.type = dest; if (node != NULL) { if (node->name != NULL) { target = pcmk__str_copy(node->name); msg->host.size = strlen(node->name); memset(msg->host.uname, 0, MAX_NAME); memcpy(msg->host.uname, node->name, msg->host.size); } else { target = crm_strdup_printf("%" PRIu32, node->cluster_layer_id); } msg->host.id = node->cluster_layer_id; } else { target = pcmk__str_copy("all"); } msg->sender.id = 0; msg->sender.type = pcmk__cluster_parse_msg_type(crm_system_name); msg->sender.pid = local_pid; msg->sender.size = local_name_len; memset(msg->sender.uname, 0, MAX_NAME); if ((local_name != NULL) && (msg->sender.size != 0)) { memcpy(msg->sender.uname, local_name, msg->sender.size); } msg->size = 1 + strlen(data); msg->header.size = sizeof(pcmk__cpg_msg_t) + msg->size; if (msg->size < CRM_BZ2_THRESHOLD) { msg = pcmk__realloc(msg, msg->header.size); memcpy(msg->data, data, msg->size); } else { char *compressed = NULL; unsigned int new_size = 0; if (pcmk__compress(data, (unsigned int) msg->size, 0, &compressed, &new_size) == pcmk_rc_ok) { msg->header.size = sizeof(pcmk__cpg_msg_t) + new_size; msg = pcmk__realloc(msg, msg->header.size); memcpy(msg->data, compressed, new_size); msg->is_compressed = TRUE; msg->compressed_size = new_size; } else { // cppcheck seems not to understand the abort logic in pcmk__realloc // cppcheck-suppress memleak msg = pcmk__realloc(msg, msg->header.size); memcpy(msg->data, data, msg->size); } free(compressed); } iov = pcmk__assert_alloc(1, sizeof(struct iovec)); iov->iov_base = msg; iov->iov_len = msg->header.size; if (msg->compressed_size > 0) { crm_trace("Queueing CPG message %u to %s " "(%llu bytes, %d bytes compressed payload): %.200s", msg->id, target, (unsigned long long) iov->iov_len, msg->compressed_size, data); } else { crm_trace("Queueing CPG message %u to %s " "(%llu bytes, %d bytes payload): %.200s", msg->id, target, (unsigned long long) iov->iov_len, msg->size, data); } free(target); cs_message_queue = g_list_append(cs_message_queue, iov); crm_cs_flush(&pcmk_cpg_handle); return true; } /*! * \internal * \brief Send an XML message via Corosync CPG * * \param[in] msg XML message to send * \param[in] node Cluster node to send message to * \param[in] dest Type of message to send * * \return TRUE on success, otherwise FALSE */ bool pcmk__cpg_send_xml(const xmlNode *msg, const pcmk__node_status_t *node, enum pcmk__cluster_msg dest) { bool rc = true; GString *data = g_string_sized_new(1024); pcmk__xml_string(msg, 0, data, 0); rc = send_cpg_text(data->str, node, dest); g_string_free(data, TRUE); return rc; }