diff --git a/daemons/controld/controld_schedulerd.c b/daemons/controld/controld_schedulerd.c index 88f7ac1489..b716440701 100644 --- a/daemons/controld/controld_schedulerd.c +++ b/daemons/controld/controld_schedulerd.c @@ -1,518 +1,518 @@ /* * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include /* pid_t, sleep, ssize_t */ #include #include #include #include #include #include #include #include // xmlXPathObject, etc. #include static void handle_disconnect(void); static pcmk_ipc_api_t *schedulerd_api = NULL; /*! * \internal * \brief Close any scheduler connection and free associated memory */ void controld_shutdown_schedulerd_ipc(void) { controld_clear_fsa_input_flags(R_PE_REQUIRED); pcmk_disconnect_ipc(schedulerd_api); handle_disconnect(); pcmk_free_ipc_api(schedulerd_api); schedulerd_api = NULL; } /*! * \internal * \brief Save CIB query result to file, raising FSA error * * \param[in] msg Ignored * \param[in] call_id Call ID of CIB query * \param[in] rc Return code of CIB query * \param[in] output Result of CIB query * \param[in] user_data Unique identifier for filename * * \note This is intended to be called after a scheduler connection fails. */ static void save_cib_contents(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { const char *id = user_data; register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__); CRM_CHECK(id != NULL, return); if (rc == pcmk_ok) { char *filename = crm_strdup_printf(PCMK_SCHEDULER_INPUT_DIR "/pe-core-%s.bz2", id); if (pcmk__xml_write_file(output, filename, true) != pcmk_rc_ok) { crm_err("Could not save Cluster Information Base to %s after scheduler crash", filename); } else { crm_notice("Saved Cluster Information Base to %s after scheduler crash", filename); } free(filename); } } /*! * \internal * \brief Respond to scheduler connection failure */ static void handle_disconnect(void) { // If we aren't connected to the scheduler, we can't expect a reply controld_expect_sched_reply(NULL); if (pcmk_is_set(controld_globals.fsa_input_register, R_PE_REQUIRED)) { int rc = pcmk_ok; char *uuid_str = crm_generate_uuid(); crm_crit("Lost connection to the scheduler " QB_XS " CIB will be saved to " PCMK_SCHEDULER_INPUT_DIR "/pe-core-%s.bz2", uuid_str); /* * The scheduler died... * * Save the current CIB so that we have a chance of * figuring out what killed it. * * Delay raising the I_ERROR until the query below completes or * 5s is up, whichever comes first. * */ rc = controld_globals.cib_conn->cmds->query(controld_globals.cib_conn, NULL, NULL, cib_none); fsa_register_cib_callback(rc, uuid_str, save_cib_contents); } controld_clear_fsa_input_flags(R_PE_CONNECTED); controld_trigger_fsa(); return; } static void handle_reply(pcmk_schedulerd_api_reply_t *reply) { const char *msg_ref = NULL; if (!AM_I_DC) { return; } msg_ref = reply->data.graph.reference; if (msg_ref == NULL) { crm_err("%s - Ignoring calculation with no reference", CRM_OP_PECALC); } else if (pcmk__str_eq(msg_ref, controld_globals.fsa_pe_ref, pcmk__str_none)) { ha_msg_input_t fsa_input; xmlNode *crm_data_node; controld_stop_sched_timer(); /* do_te_invoke (which will eventually process the fsa_input we are constructing * here) requires that fsa_input.xml be non-NULL. That will only happen if * copy_ha_msg_input (which is called by register_fsa_input_adv) sees the * fsa_input.msg that it is expecting. The scheduler's IPC dispatch function * gave us the values we need, we just need to put them into XML. * * The name of the top level element here is irrelevant. Nothing checks it. */ fsa_input.msg = pcmk__xe_create(NULL, "dummy-reply"); crm_xml_add(fsa_input.msg, PCMK_XA_REFERENCE, msg_ref); crm_xml_add(fsa_input.msg, PCMK__XA_CRM_TGRAPH_IN, reply->data.graph.input); crm_data_node = pcmk__xe_create(fsa_input.msg, PCMK__XE_CRM_XML); pcmk__xml_copy(crm_data_node, reply->data.graph.tgraph); register_fsa_input_later(C_IPC_MESSAGE, I_PE_SUCCESS, &fsa_input); pcmk__xml_free(fsa_input.msg); } else { crm_info("%s calculation %s is obsolete", CRM_OP_PECALC, msg_ref); } } static void scheduler_event_callback(pcmk_ipc_api_t *api, enum pcmk_ipc_event event_type, crm_exit_t status, void *event_data, void *user_data) { pcmk_schedulerd_api_reply_t *reply = event_data; switch (event_type) { case pcmk_ipc_event_disconnect: handle_disconnect(); break; case pcmk_ipc_event_reply: handle_reply(reply); break; default: break; } } static bool new_schedulerd_ipc_connection(void) { int rc; controld_set_fsa_input_flags(R_PE_REQUIRED); if (schedulerd_api == NULL) { rc = pcmk_new_ipc_api(&schedulerd_api, pcmk_ipc_schedulerd); if (rc != pcmk_rc_ok) { crm_err("Error connecting to the scheduler: %s", pcmk_rc_str(rc)); return false; } } pcmk_register_ipc_callback(schedulerd_api, scheduler_event_callback, NULL); - rc = pcmk__connect_ipc(schedulerd_api, pcmk_ipc_dispatch_main, 3); + rc = pcmk__connect_ipc_retry_conrefused(schedulerd_api, pcmk_ipc_dispatch_main, 3); if (rc != pcmk_rc_ok) { crm_err("Error connecting to %s: %s", pcmk_ipc_name(schedulerd_api, true), pcmk_rc_str(rc)); return false; } controld_set_fsa_input_flags(R_PE_CONNECTED); return true; } static void do_pe_invoke_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data); /* A_PE_START, A_PE_STOP, O_PE_RESTART */ void do_pe_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { if (pcmk_is_set(action, A_PE_STOP)) { controld_clear_fsa_input_flags(R_PE_REQUIRED); pcmk_disconnect_ipc(schedulerd_api); handle_disconnect(); } if (pcmk_is_set(action, A_PE_START) && !pcmk_is_set(controld_globals.fsa_input_register, R_PE_CONNECTED)) { if (cur_state == S_STOPPING) { crm_info("Ignoring request to connect to scheduler while shutting down"); } else if (!new_schedulerd_ipc_connection()) { crm_warn("Could not connect to scheduler"); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } } } static int fsa_pe_query = 0; static mainloop_timer_t *controld_sched_timer = NULL; // @TODO Make this a configurable cluster option if there's demand for it #define SCHED_TIMEOUT_MS (120000) /*! * \internal * \brief Handle a timeout waiting for scheduler reply * * \param[in] user_data Ignored * * \return FALSE (indicating that timer should not be restarted) */ static gboolean controld_sched_timeout(gpointer user_data) { if (AM_I_DC) { /* If this node is the DC but can't communicate with the scheduler, just * exit (and likely get fenced) so this node doesn't interfere with any * further DC elections. * * @TODO We could try something less drastic first, like disconnecting * and reconnecting to the scheduler, but something is likely going * seriously wrong, so perhaps it's better to just fail as quickly as * possible. */ crmd_exit(CRM_EX_FATAL); } return FALSE; } void controld_stop_sched_timer(void) { if ((controld_sched_timer != NULL) && (controld_globals.fsa_pe_ref != NULL)) { crm_trace("Stopping timer for scheduler reply %s", controld_globals.fsa_pe_ref); } mainloop_timer_stop(controld_sched_timer); } /*! * \internal * \brief Set the scheduler request currently being waited on * * \param[in] ref Request to expect reply to (or NULL for none) * * \note This function takes ownership of \p ref. */ void controld_expect_sched_reply(char *ref) { if (ref) { if (controld_sched_timer == NULL) { controld_sched_timer = mainloop_timer_add("scheduler_reply_timer", SCHED_TIMEOUT_MS, FALSE, controld_sched_timeout, NULL); } mainloop_timer_start(controld_sched_timer); } else { controld_stop_sched_timer(); } free(controld_globals.fsa_pe_ref); controld_globals.fsa_pe_ref = ref; } /*! * \internal * \brief Free the scheduler reply timer */ void controld_free_sched_timer(void) { if (controld_sched_timer != NULL) { mainloop_timer_del(controld_sched_timer); controld_sched_timer = NULL; } } /* A_PE_INVOKE */ void do_pe_invoke(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { cib_t *cib_conn = controld_globals.cib_conn; if (AM_I_DC == FALSE) { crm_err("Not invoking scheduler because not DC: %s", fsa_action2string(action)); return; } if (!pcmk_is_set(controld_globals.fsa_input_register, R_PE_CONNECTED)) { if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { crm_err("Cannot shut down gracefully without the scheduler"); register_fsa_input_before(C_FSA_INTERNAL, I_TERMINATE, NULL); } else { crm_info("Waiting for the scheduler to connect"); crmd_fsa_stall(FALSE); controld_set_fsa_action_flags(A_PE_START); controld_trigger_fsa(); } return; } if (cur_state != S_POLICY_ENGINE) { crm_notice("Not invoking scheduler because in state %s", fsa_state2string(cur_state)); return; } if (!pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) { crm_err("Attempted to invoke scheduler without consistent Cluster Information Base!"); /* start the join from scratch */ register_fsa_input_before(C_FSA_INTERNAL, I_ELECTION, NULL); return; } fsa_pe_query = cib_conn->cmds->query(cib_conn, NULL, NULL, cib_none); crm_debug("Query %d: Requesting the current CIB: %s", fsa_pe_query, fsa_state2string(controld_globals.fsa_state)); controld_expect_sched_reply(NULL); fsa_register_cib_callback(fsa_pe_query, NULL, do_pe_invoke_callback); } static void force_local_option(xmlNode *xml, const char *attr_name, const char *attr_value) { int max = 0; int lpc = 0; const char *xpath_base = NULL; char *xpath_string = NULL; xmlXPathObject *xpathObj = NULL; xpath_base = pcmk_cib_xpath_for(PCMK_XE_CRM_CONFIG); if (xpath_base == NULL) { crm_err(PCMK_XE_CRM_CONFIG " CIB element not known (bug?)"); return; } xpath_string = crm_strdup_printf("%s//%s//nvpair[@name='%s']", xpath_base, PCMK_XE_CLUSTER_PROPERTY_SET, attr_name); xpathObj = pcmk__xpath_search(xml->doc, xpath_string); max = pcmk__xpath_num_results(xpathObj); free(xpath_string); for (lpc = 0; lpc < max; lpc++) { xmlNode *match = pcmk__xpath_result(xpathObj, lpc); if (match == NULL) { continue; } crm_trace("Forcing %s/%s = %s", pcmk__xe_id(match), attr_name, attr_value); crm_xml_add(match, PCMK_XA_VALUE, attr_value); } if(max == 0) { xmlNode *configuration = NULL; xmlNode *crm_config = NULL; xmlNode *cluster_property_set = NULL; crm_trace("Creating %s-%s for %s=%s", PCMK_VALUE_CIB_BOOTSTRAP_OPTIONS, attr_name, attr_name, attr_value); configuration = pcmk__xe_first_child(xml, PCMK_XE_CONFIGURATION, NULL, NULL); if (configuration == NULL) { configuration = pcmk__xe_create(xml, PCMK_XE_CONFIGURATION); } crm_config = pcmk__xe_first_child(configuration, PCMK_XE_CRM_CONFIG, NULL, NULL); if (crm_config == NULL) { crm_config = pcmk__xe_create(configuration, PCMK_XE_CRM_CONFIG); } cluster_property_set = pcmk__xe_first_child(crm_config, PCMK_XE_CLUSTER_PROPERTY_SET, NULL, NULL); if (cluster_property_set == NULL) { cluster_property_set = pcmk__xe_create(crm_config, PCMK_XE_CLUSTER_PROPERTY_SET); crm_xml_add(cluster_property_set, PCMK_XA_ID, PCMK_VALUE_CIB_BOOTSTRAP_OPTIONS); } xml = pcmk__xe_create(cluster_property_set, PCMK_XE_NVPAIR); pcmk__xe_set_id(xml, "%s-%s", PCMK_VALUE_CIB_BOOTSTRAP_OPTIONS, attr_name); crm_xml_add(xml, PCMK_XA_NAME, attr_name); crm_xml_add(xml, PCMK_XA_VALUE, attr_value); } xmlXPathFreeObject(xpathObj); } static void do_pe_invoke_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { char *ref = NULL; pid_t watchdog = pcmk__locate_sbd(); if (rc != pcmk_ok) { crm_err("Could not retrieve the Cluster Information Base: %s " QB_XS " rc=%d call=%d", pcmk_strerror(rc), rc, call_id); register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__); return; } else if (call_id != fsa_pe_query) { crm_trace("Skipping superseded CIB query: %d (current=%d)", call_id, fsa_pe_query); return; } else if (!AM_I_DC || !pcmk_is_set(controld_globals.fsa_input_register, R_PE_CONNECTED)) { crm_debug("No need to invoke the scheduler anymore"); return; } else if (controld_globals.fsa_state != S_POLICY_ENGINE) { crm_debug("Discarding scheduler request in state: %s", fsa_state2string(controld_globals.fsa_state)); return; /* this callback counts as 1 */ } else if (num_cib_op_callbacks() > 1) { crm_debug("Re-asking for the CIB: %d other peer updates still pending", (num_cib_op_callbacks() - 1)); sleep(1); controld_set_fsa_action_flags(A_PE_INVOKE); controld_trigger_fsa(); return; } CRM_LOG_ASSERT(output != NULL); /* Refresh the remote node cache and the known node cache when the * scheduler is invoked */ pcmk__refresh_node_caches_from_cib(output); crm_xml_add(output, PCMK_XA_DC_UUID, controld_globals.our_uuid); pcmk__xe_set_bool_attr(output, PCMK_XA_HAVE_QUORUM, pcmk_is_set(controld_globals.flags, controld_has_quorum)); force_local_option(output, PCMK_OPT_HAVE_WATCHDOG, pcmk__btoa(watchdog)); if (pcmk_is_set(controld_globals.flags, controld_ever_had_quorum) && !pcmk__cluster_has_quorum()) { crm_xml_add_int(output, PCMK_XA_NO_QUORUM_PANIC, 1); } rc = pcmk_schedulerd_api_graph(schedulerd_api, output, &ref); if (rc != pcmk_rc_ok) { free(ref); crm_err("Could not contact the scheduler: %s " QB_XS " rc=%d", pcmk_rc_str(rc), rc); register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__); } else { pcmk__assert(ref != NULL); controld_expect_sched_reply(ref); crm_debug("Invoking the scheduler: query=%d, ref=%s, seq=%llu, " "quorate=%s", fsa_pe_query, controld_globals.fsa_pe_ref, controld_globals.peer_seq, pcmk__flag_text(controld_globals.flags, controld_has_quorum)); } } diff --git a/include/crm/common/ipc_internal.h b/include/crm/common/ipc_internal.h index 108210cb78..942a7bbda3 100644 --- a/include/crm/common/ipc_internal.h +++ b/include/crm/common/ipc_internal.h @@ -1,288 +1,290 @@ /* * Copyright 2013-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PCMK__CRM_COMMON_IPC_INTERNAL__H #define PCMK__CRM_COMMON_IPC_INTERNAL__H #include // bool #include // uint32_t, uint64_t, UINT64_C() #include // struct iovec #include // uid_t, gid_t, pid_t, size_t #include // gnutls_session_t #include // guint, gpointer, GQueue, ... #include // xmlNode #include // qb_ipcs_connection_t, ... #include // HAVE_GETPEEREID #include // crm_system_name #include #include // pcmk_controld_api_reply #include // pcmk_pacemakerd_{api_reply,state} #include // mainloop_io_t #ifdef __cplusplus extern "C" { #endif /* denotes "non yieldable PID" on FreeBSD, or actual PID1 in scenarios that require a delicate handling anyway (socket-based activation with systemd); we can be reasonably sure that this PID is never possessed by the actual child daemon, as it gets taken either by the proper init, or by pacemakerd itself (i.e. this precludes anything else); note that value of zero is meant to carry "unset" meaning, and better not to bet on/conditionalize over signedness of pid_t */ #define PCMK__SPECIAL_PID 1 // Timeout (in seconds) to use for IPC client sends, reply waits, etc. #define PCMK__IPC_TIMEOUT 120 #if defined(HAVE_GETPEEREID) /* on FreeBSD, we don't want to expose "non-yieldable PID" (leading to "IPC liveness check only") as its nominal representation, which could cause confusion -- this is unambiguous as long as there's no socket-based activation like with systemd (very improbable) */ #define PCMK__SPECIAL_PID_AS_0(p) (((p) == PCMK__SPECIAL_PID) ? 0 : (p)) #else #define PCMK__SPECIAL_PID_AS_0(p) (p) #endif /*! * \internal * \brief Check the authenticity and liveness of the process via IPC end-point * * When IPC daemon under given IPC end-point (name) detected, its authenticity * is verified by the means of comparing against provided referential UID and * GID, and the result of this check can be deduced from the return value. * As an exception, referential UID of 0 (~ root) satisfies arbitrary * detected daemon's credentials. * * \param[in] name IPC name to base the search on * \param[in] refuid referential UID to check against * \param[in] refgid referential GID to check against * \param[out] gotpid to optionally store obtained PID of the found process * upon returning 1 or -2 * (not available on FreeBSD, special value of 1, * see PCMK__SPECIAL_PID, used instead, and the caller * is required to special case this value respectively) * * \return Standard Pacemaker return code * * \note Return codes of particular interest include pcmk_rc_ipc_unresponsive * indicating that no trace of IPC liveness was detected, and * pcmk_rc_ipc_unauthorized indicating that the IPC endpoint is blocked by * an unauthorized process. * \note This function emits a log message for return codes other than * pcmk_rc_ok and pcmk_rc_ipc_unresponsive, and when there isn't a perfect * match in respect to \p reguid and/or \p refgid, for a possible * least privilege principle violation. * * \see crm_ipc_is_authentic_process */ int pcmk__ipc_is_authentic_process_active(const char *name, uid_t refuid, gid_t refgid, pid_t *gotpid); int pcmk__connect_generic_ipc(crm_ipc_t *ipc); int pcmk__ipc_fd(crm_ipc_t *ipc, int *fd); int pcmk__connect_ipc(pcmk_ipc_api_t *api, enum pcmk_ipc_dispatch dispatch_type, int attempts); - +int pcmk__connect_ipc_retry_conrefused(pcmk_ipc_api_t *api, + enum pcmk_ipc_dispatch dispatch_type, + int attempts); /* * Server-related */ typedef struct pcmk__client_s pcmk__client_t; struct pcmk__remote_s { /* Shared */ char *buffer; size_t buffer_size; size_t buffer_offset; int auth_timeout; int tcp_socket; mainloop_io_t *source; time_t uptime; char *start_state; /* CIB-only */ char *token; /* TLS only */ // Must be created by pcmk__new_tls_session() gnutls_session_t tls_session; }; enum pcmk__client_flags { // Lower 32 bits are reserved for server (not library) use // Next 8 bits are reserved for client type (sort of a cheap enum) //! Client uses plain IPC pcmk__client_ipc = (UINT64_C(1) << 32), //! Client uses TCP connection pcmk__client_tcp = (UINT64_C(1) << 33), //! Client uses TCP with TLS pcmk__client_tls = (UINT64_C(1) << 34), // The rest are client attributes //! Client IPC is proxied pcmk__client_proxied = (UINT64_C(1) << 40), //! Client is run by root or cluster user pcmk__client_privileged = (UINT64_C(1) << 41), //! Local client to be proxied pcmk__client_to_proxy = (UINT64_C(1) << 42), /*! * \brief Client IPC connection accepted * * Used only for remote CIB connections via \c PCMK_XA_REMOTE_TLS_PORT. */ pcmk__client_authenticated = (UINT64_C(1) << 43), //! Client TLS handshake is complete pcmk__client_tls_handshake_complete = (UINT64_C(1) << 44), }; #define PCMK__CLIENT_TYPE(client) ((client)->flags & UINT64_C(0xff00000000)) struct pcmk__client_s { unsigned int pid; char *id; char *name; char *user; uint64_t flags; // Group of pcmk__client_flags int request_id; void *userdata; int event_timer; GQueue *event_queue; /* Depending on the client type, only some of the following will be * populated/valid. @TODO Maybe convert to a union. */ qb_ipcs_connection_t *ipcs; /* IPC */ struct pcmk__remote_s *remote; /* TCP/TLS */ unsigned int queue_backlog; /* IPC queue length after last flush */ unsigned int queue_max; /* Evict client whose queue grows this big */ }; #define pcmk__set_client_flags(client, flags_to_set) do { \ (client)->flags = pcmk__set_flags_as(__func__, __LINE__, \ LOG_TRACE, \ "Client", pcmk__client_name(client), \ (client)->flags, (flags_to_set), #flags_to_set); \ } while (0) #define pcmk__clear_client_flags(client, flags_to_clear) do { \ (client)->flags = pcmk__clear_flags_as(__func__, __LINE__, \ LOG_TRACE, \ "Client", pcmk__client_name(client), \ (client)->flags, (flags_to_clear), #flags_to_clear); \ } while (0) #define pcmk__set_ipc_flags(ipc_flags, ipc_name, flags_to_set) do { \ ipc_flags = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \ "IPC", (ipc_name), \ (ipc_flags), (flags_to_set), \ #flags_to_set); \ } while (0) #define pcmk__clear_ipc_flags(ipc_flags, ipc_name, flags_to_clear) do { \ ipc_flags = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, \ "IPC", (ipc_name), \ (ipc_flags), (flags_to_clear), \ #flags_to_clear); \ } while (0) guint pcmk__ipc_client_count(void); void pcmk__foreach_ipc_client(GHFunc func, gpointer user_data); void pcmk__client_cleanup(void); pcmk__client_t *pcmk__find_client(const qb_ipcs_connection_t *c); pcmk__client_t *pcmk__find_client_by_id(const char *id); const char *pcmk__client_name(const pcmk__client_t *c); const char *pcmk__client_type_str(uint64_t client_type); pcmk__client_t *pcmk__new_unauth_client(void *key); pcmk__client_t *pcmk__new_client(qb_ipcs_connection_t *c, uid_t uid, gid_t gid); void pcmk__free_client(pcmk__client_t *c); void pcmk__drop_all_clients(qb_ipcs_service_t *s); void pcmk__set_client_queue_max(pcmk__client_t *client, const char *qmax); xmlNode *pcmk__ipc_create_ack_as(const char *function, int line, uint32_t flags, const char *tag, const char *ver, crm_exit_t status); #define pcmk__ipc_create_ack(flags, tag, ver, st) \ pcmk__ipc_create_ack_as(__func__, __LINE__, (flags), (tag), (ver), (st)) int pcmk__ipc_send_ack_as(const char *function, int line, pcmk__client_t *c, uint32_t request, uint32_t flags, const char *tag, const char *ver, crm_exit_t status); #define pcmk__ipc_send_ack(c, req, flags, tag, ver, st) \ pcmk__ipc_send_ack_as(__func__, __LINE__, (c), (req), (flags), (tag), (ver), (st)) int pcmk__ipc_prepare_iov(uint32_t request, const GString *message, uint16_t index, struct iovec **result, ssize_t *bytes); int pcmk__ipc_send_xml(pcmk__client_t *c, uint32_t request, const xmlNode *message, uint32_t flags); int pcmk__ipc_send_iov(pcmk__client_t *c, struct iovec *iov, uint32_t flags); void pcmk__ipc_free_client_buffer(crm_ipc_t *client); int pcmk__ipc_msg_append(GByteArray **buffer, guint8 *data); xmlNode *pcmk__client_data2xml(pcmk__client_t *c, void *data, uint32_t *id, uint32_t *flags); int pcmk__client_pid(qb_ipcs_connection_t *c); void pcmk__serve_attrd_ipc(qb_ipcs_service_t **ipcs, struct qb_ipcs_service_handlers *cb); void pcmk__serve_fenced_ipc(qb_ipcs_service_t **ipcs, struct qb_ipcs_service_handlers *cb); void pcmk__serve_pacemakerd_ipc(qb_ipcs_service_t **ipcs, struct qb_ipcs_service_handlers *cb); qb_ipcs_service_t *pcmk__serve_schedulerd_ipc(struct qb_ipcs_service_handlers *cb); qb_ipcs_service_t *pcmk__serve_controld_ipc(struct qb_ipcs_service_handlers *cb); void pcmk__serve_based_ipc(qb_ipcs_service_t **ipcs_ro, qb_ipcs_service_t **ipcs_rw, qb_ipcs_service_t **ipcs_shm, struct qb_ipcs_service_handlers *ro_cb, struct qb_ipcs_service_handlers *rw_cb); void pcmk__stop_based_ipc(qb_ipcs_service_t *ipcs_ro, qb_ipcs_service_t *ipcs_rw, qb_ipcs_service_t *ipcs_shm); static inline const char * pcmk__ipc_sys_name(const char *ipc_name, const char *fallback) { return ipc_name ? ipc_name : ((crm_system_name ? crm_system_name : fallback)); } const char *pcmk__pcmkd_state_enum2friendly(enum pcmk_pacemakerd_state state); const char *pcmk__controld_api_reply2str(enum pcmk_controld_api_reply reply); const char *pcmk__pcmkd_api_reply2str(enum pcmk_pacemakerd_api_reply reply); #ifdef __cplusplus } #endif #endif // PCMK__CRM_COMMON_IPC_INTERNAL__H diff --git a/lib/common/ipc_attrd.c b/lib/common/ipc_attrd.c index 394bf75c1d..7ff0ca7873 100644 --- a/lib/common/ipc_attrd.c +++ b/lib/common/ipc_attrd.c @@ -1,488 +1,499 @@ /* * Copyright 2011-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include // xmlChar #include #include #include #include #include #include "crmcommon_private.h" static void set_pairs_data(pcmk__attrd_api_reply_t *data, xmlNode *msg_data) { const char *name = NULL; pcmk__attrd_query_pair_t *pair; name = crm_element_value(msg_data, PCMK__XA_ATTR_NAME); for (xmlNode *node = pcmk__xe_first_child(msg_data, PCMK_XE_NODE, NULL, NULL); node != NULL; node = pcmk__xe_next(node, PCMK_XE_NODE)) { pair = pcmk__assert_alloc(1, sizeof(pcmk__attrd_query_pair_t)); pair->node = crm_element_value(node, PCMK__XA_ATTR_HOST); pair->name = name; pair->value = crm_element_value(node, PCMK__XA_ATTR_VALUE); data->data.pairs = g_list_prepend(data->data.pairs, pair); } } static bool reply_expected(pcmk_ipc_api_t *api, const xmlNode *request) { const char *command = crm_element_value(request, PCMK_XA_TASK); return pcmk__str_any_of(command, PCMK__ATTRD_CMD_CLEAR_FAILURE, PCMK__ATTRD_CMD_QUERY, PCMK__ATTRD_CMD_REFRESH, PCMK__ATTRD_CMD_UPDATE, PCMK__ATTRD_CMD_UPDATE_BOTH, PCMK__ATTRD_CMD_UPDATE_DELAY, NULL); } static bool dispatch(pcmk_ipc_api_t *api, xmlNode *reply) { const char *value = NULL; crm_exit_t status = CRM_EX_OK; pcmk__attrd_api_reply_t reply_data = { pcmk__attrd_reply_unknown }; if (pcmk__xe_is(reply, PCMK__XE_ACK)) { return false; } /* Do some basic validation of the reply */ value = crm_element_value(reply, PCMK__XA_T); if (pcmk__str_empty(value) || !pcmk__str_eq(value, PCMK__VALUE_ATTRD, pcmk__str_none)) { crm_info("Unrecognizable message from attribute manager: " "message type '%s' not '" PCMK__VALUE_ATTRD "'", pcmk__s(value, "")); status = CRM_EX_PROTOCOL; goto done; } value = crm_element_value(reply, PCMK__XA_SUBT); /* Only the query command gets a reply for now. NULL counts as query for * backward compatibility with attribute managers <2.1.3 that didn't set it. */ if (pcmk__str_eq(value, PCMK__ATTRD_CMD_QUERY, pcmk__str_null_matches)) { if (!xmlHasProp(reply, (const xmlChar *) PCMK__XA_ATTR_NAME)) { status = ENXIO; // Most likely, the attribute doesn't exist goto done; } reply_data.reply_type = pcmk__attrd_reply_query; set_pairs_data(&reply_data, reply); } else { crm_info("Unrecognizable message from attribute manager: " "message subtype '%s' unknown", pcmk__s(value, "")); status = CRM_EX_PROTOCOL; goto done; } done: pcmk__call_ipc_callback(api, pcmk_ipc_event_reply, status, &reply_data); /* Free any reply data that was allocated */ if (reply_data.data.pairs) { g_list_free_full(reply_data.data.pairs, free); } return false; } pcmk__ipc_methods_t * pcmk__attrd_api_methods(void) { pcmk__ipc_methods_t *cmds = calloc(1, sizeof(pcmk__ipc_methods_t)); if (cmds != NULL) { cmds->new_data = NULL; cmds->free_data = NULL; cmds->post_connect = NULL; cmds->reply_expected = reply_expected; cmds->dispatch = dispatch; } return cmds; } /*! * \internal * \brief Create a generic attribute manager operation * * \param[in] user_name If not NULL, ACL user to set for operation * * \return XML of attribute manager operation */ static xmlNode * create_attrd_op(const char *user_name) { xmlNode *attrd_op = pcmk__xe_create(NULL, __func__); crm_xml_add(attrd_op, PCMK__XA_T, PCMK__VALUE_ATTRD); crm_xml_add(attrd_op, PCMK__XA_SRC, pcmk__s(crm_system_name, "unknown")); crm_xml_add(attrd_op, PCMK__XA_ATTR_USER, user_name); return attrd_op; } static int connect_and_send_attrd_request(pcmk_ipc_api_t *api, const xmlNode *request) { + static const int max_retries = 5; + int remaining_attempts = max_retries; int rc = pcmk_rc_ok; bool created_api = false; enum pcmk_ipc_dispatch dispatch = pcmk_ipc_dispatch_sync; if (api == NULL) { rc = pcmk_new_ipc_api(&api, pcmk_ipc_attrd); if (rc != pcmk_rc_ok) { return rc; } created_api = true; } else { dispatch = api->dispatch_type; } - rc = pcmk__connect_ipc(api, dispatch, 5); - if (rc == pcmk_rc_ok) { - rc = pcmk__send_ipc_request(api, request); - } + // If attrd is killed and is being restarted we will temporarily get + // ECONNREFUSED on connect if it is already dead or ENOTCONN if it died + // after we connected to it. We should wait a bit and retry in those cases. + do { + if (rc == ENOTCONN || rc == ECONNREFUSED) { + sleep(max_retries - remaining_attempts); + } + rc = pcmk__connect_ipc(api, dispatch, remaining_attempts); + if (rc == pcmk_rc_ok) { + rc = pcmk__send_ipc_request(api, request); + } + remaining_attempts--; + } while ((rc == ENOTCONN || rc == ECONNREFUSED) && remaining_attempts >= 0); if (created_api) { pcmk_free_ipc_api(api); } return rc; } int pcmk__attrd_api_clear_failures(pcmk_ipc_api_t *api, const char *node, const char *resource, const char *operation, const char *interval_spec, const char *user_name, uint32_t options) { int rc = pcmk_rc_ok; xmlNode *request = create_attrd_op(user_name); const char *interval_desc = NULL; const char *op_desc = NULL; const char *target = pcmk__node_attr_target(node); if (target != NULL) { node = target; } if (operation) { interval_desc = pcmk__s(interval_spec, "nonrecurring"); op_desc = operation; } else { interval_desc = "all"; op_desc = "operations"; } crm_debug("Asking %s to clear failure of %s %s for %s on %s", pcmk_ipc_name(api, true), interval_desc, op_desc, pcmk__s(resource, "all resources"), pcmk__s(node, "all nodes")); crm_xml_add(request, PCMK_XA_TASK, PCMK__ATTRD_CMD_CLEAR_FAILURE); crm_xml_add(request, PCMK__XA_ATTR_HOST, node); crm_xml_add(request, PCMK__XA_ATTR_RESOURCE, resource); crm_xml_add(request, PCMK__XA_ATTR_CLEAR_OPERATION, operation); crm_xml_add(request, PCMK__XA_ATTR_CLEAR_INTERVAL, interval_spec); crm_xml_add_int(request, PCMK__XA_ATTR_IS_REMOTE, pcmk_is_set(options, pcmk__node_attr_remote)); rc = connect_and_send_attrd_request(api, request); pcmk__xml_free(request); return rc; } int pcmk__attrd_api_delete(pcmk_ipc_api_t *api, const char *node, const char *name, uint32_t options) { const char *target = NULL; if (name == NULL) { return EINVAL; } target = pcmk__node_attr_target(node); if (target != NULL) { node = target; } /* Make sure the right update option is set. */ options &= ~pcmk__node_attr_delay; options |= pcmk__node_attr_value; return pcmk__attrd_api_update(api, node, name, NULL, NULL, NULL, NULL, options); } int pcmk__attrd_api_purge(pcmk_ipc_api_t *api, const char *node, bool reap) { int rc = pcmk_rc_ok; xmlNode *request = NULL; const char *target = pcmk__node_attr_target(node); if (target != NULL) { node = target; } crm_debug("Asking %s to purge transient attributes%s for %s", pcmk_ipc_name(api, true), (reap? " and node cache entries" : ""), pcmk__s(node, "local node")); request = create_attrd_op(NULL); crm_xml_add(request, PCMK_XA_TASK, PCMK__ATTRD_CMD_PEER_REMOVE); pcmk__xe_set_bool_attr(request, PCMK__XA_REAP, reap); crm_xml_add(request, PCMK__XA_ATTR_HOST, node); rc = connect_and_send_attrd_request(api, request); pcmk__xml_free(request); return rc; } int pcmk__attrd_api_query(pcmk_ipc_api_t *api, const char *node, const char *name, uint32_t options) { int rc = pcmk_rc_ok; xmlNode *request = NULL; const char *target = NULL; if (name == NULL) { return EINVAL; } if (pcmk_is_set(options, pcmk__node_attr_query_all)) { node = NULL; } else { target = pcmk__node_attr_target(node); if (target != NULL) { node = target; } else if (node == NULL) { node = "localhost"; } } crm_debug("Querying %s for value of '%s'%s%s", pcmk_ipc_name(api, true), name, ((node == NULL)? "" : " on "), pcmk__s(node, "")); request = create_attrd_op(NULL); crm_xml_add(request, PCMK__XA_ATTR_NAME, name); crm_xml_add(request, PCMK_XA_TASK, PCMK__ATTRD_CMD_QUERY); crm_xml_add(request, PCMK__XA_ATTR_HOST, node); rc = connect_and_send_attrd_request(api, request); pcmk__xml_free(request); return rc; } int pcmk__attrd_api_refresh(pcmk_ipc_api_t *api, const char *node) { int rc = pcmk_rc_ok; xmlNode *request = NULL; const char *target = pcmk__node_attr_target(node); if (target != NULL) { node = target; } crm_debug("Asking %s to write all transient attributes for %s to CIB", pcmk_ipc_name(api, true), pcmk__s(node, "local node")); request = create_attrd_op(NULL); crm_xml_add(request, PCMK_XA_TASK, PCMK__ATTRD_CMD_REFRESH); crm_xml_add(request, PCMK__XA_ATTR_HOST, node); rc = connect_and_send_attrd_request(api, request); pcmk__xml_free(request); return rc; } static void add_op_attr(xmlNode *op, uint32_t options) { if (pcmk_all_flags_set(options, pcmk__node_attr_value | pcmk__node_attr_delay)) { crm_xml_add(op, PCMK_XA_TASK, PCMK__ATTRD_CMD_UPDATE_BOTH); } else if (pcmk_is_set(options, pcmk__node_attr_value)) { crm_xml_add(op, PCMK_XA_TASK, PCMK__ATTRD_CMD_UPDATE); } else if (pcmk_is_set(options, pcmk__node_attr_delay)) { crm_xml_add(op, PCMK_XA_TASK, PCMK__ATTRD_CMD_UPDATE_DELAY); } } static void populate_update_op(xmlNode *op, const char *node, const char *name, const char *value, const char *dampen, const char *set, uint32_t options) { if (pcmk_is_set(options, pcmk__node_attr_pattern)) { crm_xml_add(op, PCMK__XA_ATTR_REGEX, name); } else { crm_xml_add(op, PCMK__XA_ATTR_NAME, name); } if (pcmk_is_set(options, pcmk__node_attr_utilization)) { crm_xml_add(op, PCMK__XA_ATTR_SET_TYPE, PCMK_XE_UTILIZATION); } else { crm_xml_add(op, PCMK__XA_ATTR_SET_TYPE, PCMK_XE_INSTANCE_ATTRIBUTES); } add_op_attr(op, options); crm_xml_add(op, PCMK__XA_ATTR_VALUE, value); crm_xml_add(op, PCMK__XA_ATTR_DAMPENING, dampen); crm_xml_add(op, PCMK__XA_ATTR_HOST, node); crm_xml_add(op, PCMK__XA_ATTR_SET, set); crm_xml_add_int(op, PCMK__XA_ATTR_IS_REMOTE, pcmk_is_set(options, pcmk__node_attr_remote)); crm_xml_add_int(op, PCMK__XA_ATTR_IS_PRIVATE, pcmk_is_set(options, pcmk__node_attr_private)); if (pcmk_is_set(options, pcmk__node_attr_sync_local)) { crm_xml_add(op, PCMK__XA_ATTR_SYNC_POINT, PCMK__VALUE_LOCAL); } else if (pcmk_is_set(options, pcmk__node_attr_sync_cluster)) { crm_xml_add(op, PCMK__XA_ATTR_SYNC_POINT, PCMK__VALUE_CLUSTER); } } int pcmk__attrd_api_update(pcmk_ipc_api_t *api, const char *node, const char *name, const char *value, const char *dampen, const char *set, const char *user_name, uint32_t options) { int rc = pcmk_rc_ok; xmlNode *request = NULL; const char *target = NULL; if (name == NULL) { return EINVAL; } target = pcmk__node_attr_target(node); if (target != NULL) { node = target; } crm_debug("Asking %s to update '%s' to '%s' for %s", pcmk_ipc_name(api, true), name, pcmk__s(value, "(null)"), pcmk__s(node, "local node")); request = create_attrd_op(user_name); populate_update_op(request, node, name, value, dampen, set, options); rc = connect_and_send_attrd_request(api, request); pcmk__xml_free(request); return rc; } int pcmk__attrd_api_update_list(pcmk_ipc_api_t *api, GList *attrs, const char *dampen, const char *set, const char *user_name, uint32_t options) { int rc = pcmk_rc_ok; xmlNode *request = NULL; if (attrs == NULL) { return EINVAL; } /* There are two different ways of handling a list of attributes: * * (1) For messages originating from some command line tool, we have to send * them one at a time. In this loop, we just call pcmk__attrd_api_update * for each, letting it deal with creating the API object if it doesn't * already exist. * * The reason we can't use a single message in this case is that we can't * trust that the server supports it. Remote nodes could be involved * here, and there's no guarantee that a newer client running on a remote * node is talking to (or proxied through) a cluster node with a newer * attrd. We also can't just try sending a single message and then falling * back on multiple. There's no handshake with the attrd server to * determine its version. And then we would need to do that fallback in the * dispatch function for this to work for all connection types (mainloop in * particular), and at that point we won't know what the original message * was in order to break it apart and resend as individual messages. * * (2) For messages between daemons, we can be assured that the local attrd * will support the new message and that it can send to the other attrds * as one request or split up according to the minimum supported version. */ for (GList *iter = attrs; iter != NULL; iter = iter->next) { pcmk__attrd_query_pair_t *pair = (pcmk__attrd_query_pair_t *) iter->data; if (pcmk__is_daemon) { const char *target = NULL; xmlNode *child = NULL; /* First time through this loop - create the basic request. */ if (request == NULL) { request = create_attrd_op(user_name); add_op_attr(request, options); } /* Add a child node for this operation. We add the task to the top * level XML node so attrd_ipc_dispatch doesn't need changes. And * then we also add the task to each child node in populate_update_op * so attrd_client_update knows what form of update is taking place. */ child = pcmk__xe_create(request, PCMK_XE_OP); target = pcmk__node_attr_target(pair->node); if (target != NULL) { pair->node = target; } populate_update_op(child, pair->node, pair->name, pair->value, dampen, set, options); } else { rc = pcmk__attrd_api_update(api, pair->node, pair->name, pair->value, dampen, set, user_name, options); } } /* If we were doing multiple attributes at once, we still need to send the * request. Do that now, creating and destroying the API object if needed. */ if (pcmk__is_daemon) { rc = connect_and_send_attrd_request(api, request); pcmk__xml_free(request); } return rc; } diff --git a/lib/common/ipc_client.c b/lib/common/ipc_client.c index 12a85ff607..f47290fb47 100644 --- a/lib/common/ipc_client.c +++ b/lib/common/ipc_client.c @@ -1,1680 +1,1711 @@ /* * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #if defined(HAVE_UCRED) || defined(HAVE_SOCKPEERCRED) #include #elif defined(HAVE_GETPEERUCRED) #include #endif #include #include #include #include #include /* indirectly: pcmk_err_generic */ #include #include #include #include "crmcommon_private.h" static int is_ipc_provider_expected(qb_ipcc_connection_t *qb_ipc, int sock, uid_t refuid, gid_t refgid, pid_t *gotpid, uid_t *gotuid, gid_t *gotgid); /*! * \brief Create a new object for using Pacemaker daemon IPC * * \param[out] api Where to store new IPC object * \param[in] server Which Pacemaker daemon the object is for * * \return Standard Pacemaker result code * * \note The caller is responsible for freeing *api using pcmk_free_ipc_api(). * \note This is intended to supersede crm_ipc_new() but currently only * supports the controller, pacemakerd, and schedulerd IPC API. */ int pcmk_new_ipc_api(pcmk_ipc_api_t **api, enum pcmk_ipc_server server) { if (api == NULL) { return EINVAL; } *api = calloc(1, sizeof(pcmk_ipc_api_t)); if (*api == NULL) { return errno; } (*api)->server = server; if (pcmk_ipc_name(*api, false) == NULL) { pcmk_free_ipc_api(*api); *api = NULL; return EOPNOTSUPP; } // Set server methods switch (server) { case pcmk_ipc_attrd: (*api)->cmds = pcmk__attrd_api_methods(); break; case pcmk_ipc_based: break; case pcmk_ipc_controld: (*api)->cmds = pcmk__controld_api_methods(); break; case pcmk_ipc_execd: break; case pcmk_ipc_fenced: break; case pcmk_ipc_pacemakerd: (*api)->cmds = pcmk__pacemakerd_api_methods(); break; case pcmk_ipc_schedulerd: (*api)->cmds = pcmk__schedulerd_api_methods(); break; default: // pcmk_ipc_unknown pcmk_free_ipc_api(*api); *api = NULL; return EINVAL; } if ((*api)->cmds == NULL) { pcmk_free_ipc_api(*api); *api = NULL; return ENOMEM; } (*api)->ipc = crm_ipc_new(pcmk_ipc_name(*api, false), 0); if ((*api)->ipc == NULL) { pcmk_free_ipc_api(*api); *api = NULL; return ENOMEM; } // If daemon API has its own data to track, allocate it if ((*api)->cmds->new_data != NULL) { if ((*api)->cmds->new_data(*api) != pcmk_rc_ok) { pcmk_free_ipc_api(*api); *api = NULL; return ENOMEM; } } crm_trace("Created %s API IPC object", pcmk_ipc_name(*api, true)); return pcmk_rc_ok; } static void free_daemon_specific_data(pcmk_ipc_api_t *api) { if ((api != NULL) && (api->cmds != NULL)) { if ((api->cmds->free_data != NULL) && (api->api_data != NULL)) { api->cmds->free_data(api->api_data); api->api_data = NULL; } free(api->cmds); api->cmds = NULL; } } /*! * \internal * \brief Call an IPC API event callback, if one is registed * * \param[in,out] api IPC API connection * \param[in] event_type The type of event that occurred * \param[in] status Event status * \param[in,out] event_data Event-specific data */ void pcmk__call_ipc_callback(pcmk_ipc_api_t *api, enum pcmk_ipc_event event_type, crm_exit_t status, void *event_data) { if ((api != NULL) && (api->cb != NULL)) { api->cb(api, event_type, status, event_data, api->user_data); } } /*! * \internal * \brief Clean up after an IPC disconnect * * \param[in,out] user_data IPC API connection that disconnected * * \note This function can be used as a main loop IPC destroy callback. */ static void ipc_post_disconnect(gpointer user_data) { pcmk_ipc_api_t *api = user_data; crm_info("Disconnected from %s", pcmk_ipc_name(api, true)); // Perform any daemon-specific handling needed if ((api->cmds != NULL) && (api->cmds->post_disconnect != NULL)) { api->cmds->post_disconnect(api); } // Call client's registered event callback pcmk__call_ipc_callback(api, pcmk_ipc_event_disconnect, CRM_EX_DISCONNECT, NULL); /* If this is being called from a running main loop, mainloop_gio_destroy() * will free ipc and mainloop_io immediately after calling this function. * If this is called from a stopped main loop, these will leak, so the best * practice is to close the connection before stopping the main loop. */ api->ipc = NULL; api->mainloop_io = NULL; if (api->free_on_disconnect) { /* pcmk_free_ipc_api() has already been called, but did not free api * or api->cmds because this function needed them. Do that now. */ free_daemon_specific_data(api); crm_trace("Freeing IPC API object after disconnect"); free(api); } } /*! * \brief Free the contents of an IPC API object * * \param[in,out] api IPC API object to free */ void pcmk_free_ipc_api(pcmk_ipc_api_t *api) { bool free_on_disconnect = false; if (api == NULL) { return; } crm_debug("Releasing %s IPC API", pcmk_ipc_name(api, true)); if (api->ipc != NULL) { if (api->mainloop_io != NULL) { /* We need to keep the api pointer itself around, because it is the * user data for the IPC client destroy callback. That will be * triggered by the pcmk_disconnect_ipc() call below, but it might * happen later in the main loop (if still running). * * This flag tells the destroy callback to free the object. It can't * do that unconditionally, because the application might call this * function after a disconnect that happened by other means. */ free_on_disconnect = api->free_on_disconnect = true; } pcmk_disconnect_ipc(api); // Frees api if free_on_disconnect is true } if (!free_on_disconnect) { free_daemon_specific_data(api); crm_trace("Freeing IPC API object"); free(api); } } /*! * \brief Get the IPC name used with an IPC API connection * * \param[in] api IPC API connection * \param[in] for_log If true, return human-friendly name instead of IPC name * * \return IPC API's human-friendly or connection name, or if none is available, * "Pacemaker" if for_log is true and NULL if for_log is false */ const char * pcmk_ipc_name(const pcmk_ipc_api_t *api, bool for_log) { if (api == NULL) { return for_log? "Pacemaker" : NULL; } if (for_log) { const char *name = pcmk__server_log_name(api->server); return pcmk__s(name, "Pacemaker"); } switch (api->server) { // These servers do not have pcmk_ipc_api_t implementations yet case pcmk_ipc_based: case pcmk_ipc_execd: case pcmk_ipc_fenced: return NULL; default: return pcmk__server_ipc_name(api->server); } } /*! * \brief Check whether an IPC API connection is active * * \param[in,out] api IPC API connection * * \return true if IPC is connected, false otherwise */ bool pcmk_ipc_is_connected(pcmk_ipc_api_t *api) { return (api != NULL) && crm_ipc_connected(api->ipc); } /*! * \internal * \brief Call the daemon-specific API's dispatch function * * Perform daemon-specific handling of IPC reply dispatch. It is the daemon * method's responsibility to call the client's registered event callback, as * well as allocate and free any event data. * * \param[in,out] api IPC API connection * \param[in,out] message IPC reply XML to dispatch */ static bool call_api_dispatch(pcmk_ipc_api_t *api, xmlNode *message) { crm_log_xml_trace(message, "ipc-received"); if ((api->cmds != NULL) && (api->cmds->dispatch != NULL)) { return api->cmds->dispatch(api, message); } return false; } /*! * \internal * \brief Dispatch previously read IPC data * * \param[in] buffer Data read from IPC * \param[in,out] api IPC object * * \return Standard Pacemaker return code. In particular: * * pcmk_rc_ok: There are no more messages expected from the server. Quit * reading. * EINPROGRESS: There are more messages expected from the server. Keep reading. * * All other values indicate an error. */ static int dispatch_ipc_data(const char *buffer, pcmk_ipc_api_t *api) { bool more = false; xmlNode *msg; if (buffer == NULL) { crm_warn("Empty message received from %s IPC", pcmk_ipc_name(api, true)); return ENOMSG; } msg = pcmk__xml_parse(buffer); if (msg == NULL) { crm_warn("Malformed message received from %s IPC", pcmk_ipc_name(api, true)); return EPROTO; } more = call_api_dispatch(api, msg); pcmk__xml_free(msg); if (more) { return EINPROGRESS; } else { return pcmk_rc_ok; } } /*! * \internal * \brief Dispatch data read from IPC source * * \param[in] buffer Data read from IPC * \param[in] length Number of bytes of data in buffer (ignored) * \param[in,out] user_data IPC object * * \return Always 0 (meaning connection is still required) * * \note This function can be used as a main loop IPC dispatch callback. */ static int dispatch_ipc_source_data(const char *buffer, ssize_t length, gpointer user_data) { pcmk_ipc_api_t *api = user_data; CRM_CHECK(api != NULL, return 0); dispatch_ipc_data(buffer, api); return 0; } /*! * \brief Check whether an IPC connection has data available (without main loop) * * \param[in] api IPC API connection * \param[in] timeout_ms If less than 0, poll indefinitely; if 0, poll once * and return immediately; otherwise, poll for up to * this many milliseconds * * \return Standard Pacemaker return code * * \note Callers of pcmk_connect_ipc() using pcmk_ipc_dispatch_poll should call * this function to check whether IPC data is available. Return values of * interest include pcmk_rc_ok meaning data is available, and EAGAIN * meaning no data is available; all other values indicate errors. * \todo This does not allow the caller to poll multiple file descriptors at * once. If there is demand for that, we could add a wrapper for * pcmk__ipc_fd(api->ipc), so the caller can call poll() themselves. */ int pcmk_poll_ipc(const pcmk_ipc_api_t *api, int timeout_ms) { int rc; struct pollfd pollfd = { 0, }; if ((api == NULL) || (api->dispatch_type != pcmk_ipc_dispatch_poll)) { return EINVAL; } rc = pcmk__ipc_fd(api->ipc, &(pollfd.fd)); if (rc != pcmk_rc_ok) { crm_debug("Could not obtain file descriptor for %s IPC: %s", pcmk_ipc_name(api, true), pcmk_rc_str(rc)); return rc; } pollfd.events = POLLIN; rc = poll(&pollfd, 1, timeout_ms); if (rc < 0) { /* Some UNIX systems return negative and set EAGAIN for failure to * allocate memory; standardize the return code in that case */ return (errno == EAGAIN)? ENOMEM : errno; } else if (rc == 0) { return EAGAIN; } return pcmk_rc_ok; } /*! * \brief Dispatch available messages on an IPC connection (without main loop) * * \param[in,out] api IPC API connection * * \return Standard Pacemaker return code * * \note Callers of pcmk_connect_ipc() using pcmk_ipc_dispatch_poll should call * this function when IPC data is available. */ void pcmk_dispatch_ipc(pcmk_ipc_api_t *api) { if (api == NULL) { return; } while (crm_ipc_ready(api->ipc) > 0) { if (crm_ipc_read(api->ipc) > 0) { dispatch_ipc_data(crm_ipc_buffer(api->ipc), api); pcmk__ipc_free_client_buffer(api->ipc); } } } // \return Standard Pacemaker return code static int connect_with_main_loop(pcmk_ipc_api_t *api) { int rc; struct ipc_client_callbacks callbacks = { .dispatch = dispatch_ipc_source_data, .destroy = ipc_post_disconnect, }; rc = pcmk__add_mainloop_ipc(api->ipc, G_PRIORITY_DEFAULT, api, &callbacks, &(api->mainloop_io)); if (rc != pcmk_rc_ok) { return rc; } crm_debug("Connected to %s IPC (attached to main loop)", pcmk_ipc_name(api, true)); /* After this point, api->mainloop_io owns api->ipc, so api->ipc * should not be explicitly freed. */ return pcmk_rc_ok; } // \return Standard Pacemaker return code static int connect_without_main_loop(pcmk_ipc_api_t *api) { int rc = pcmk__connect_generic_ipc(api->ipc); if (rc != pcmk_rc_ok) { crm_ipc_close(api->ipc); } else { crm_debug("Connected to %s IPC (without main loop)", pcmk_ipc_name(api, true)); } return rc; } +/*! + * \internal + * \brief Connect to a Pacemaker daemon via IPC (retrying after soft errors + * and ECONNREFUSED) + * + * \param[in,out] api IPC API instance + * \param[in] dispatch_type How IPC replies should be dispatched + * \param[in] attempts How many times to try (in case of soft error) + * + * \return Standard Pacemaker return code + */ +int +pcmk__connect_ipc_retry_conrefused(pcmk_ipc_api_t *api, + enum pcmk_ipc_dispatch dispatch_type, + int attempts) +{ + int remaining = attempts; + int rc = pcmk_rc_ok; + + do { + if (rc == ECONNREFUSED) { + pcmk__sleep_ms((attempts - remaining) * 500); + } + rc = pcmk__connect_ipc(api, dispatch_type, remaining); + remaining--; + } while (rc == ECONNREFUSED && remaining >= 0); + + return rc; +} + + /*! * \internal * \brief Connect to a Pacemaker daemon via IPC (retrying after soft errors) * * \param[in,out] api IPC API instance * \param[in] dispatch_type How IPC replies should be dispatched * \param[in] attempts How many times to try (in case of soft error) * * \return Standard Pacemaker return code */ int pcmk__connect_ipc(pcmk_ipc_api_t *api, enum pcmk_ipc_dispatch dispatch_type, int attempts) { int rc = pcmk_rc_ok; if ((api == NULL) || (attempts < 1)) { return EINVAL; } if (api->ipc == NULL) { api->ipc = crm_ipc_new(pcmk_ipc_name(api, false), 0); if (api->ipc == NULL) { return ENOMEM; } } if (crm_ipc_connected(api->ipc)) { crm_trace("Already connected to %s", pcmk_ipc_name(api, true)); return pcmk_rc_ok; } api->dispatch_type = dispatch_type; crm_debug("Attempting connection to %s (up to %d time%s)", pcmk_ipc_name(api, true), attempts, pcmk__plural_s(attempts)); for (int remaining = attempts - 1; remaining >= 0; --remaining) { switch (dispatch_type) { case pcmk_ipc_dispatch_main: rc = connect_with_main_loop(api); break; case pcmk_ipc_dispatch_sync: case pcmk_ipc_dispatch_poll: rc = connect_without_main_loop(api); break; } if ((remaining == 0) || ((rc != EAGAIN) && (rc != EALREADY))) { break; // Result is final } // Retry after soft error (interrupted by signal, etc.) pcmk__sleep_ms((attempts - remaining) * 500); crm_debug("Re-attempting connection to %s (%d attempt%s remaining)", pcmk_ipc_name(api, true), remaining, pcmk__plural_s(remaining)); } if (rc != pcmk_rc_ok) { return rc; } if ((api->cmds != NULL) && (api->cmds->post_connect != NULL)) { rc = api->cmds->post_connect(api); if (rc != pcmk_rc_ok) { crm_ipc_close(api->ipc); } } return rc; } /*! * \brief Connect to a Pacemaker daemon via IPC * * \param[in,out] api IPC API instance * \param[in] dispatch_type How IPC replies should be dispatched * * \return Standard Pacemaker return code */ int pcmk_connect_ipc(pcmk_ipc_api_t *api, enum pcmk_ipc_dispatch dispatch_type) { int rc = pcmk__connect_ipc(api, dispatch_type, 2); if (rc != pcmk_rc_ok) { crm_err("Connection to %s failed: %s", pcmk_ipc_name(api, true), pcmk_rc_str(rc)); } return rc; } /*! * \brief Disconnect an IPC API instance * * \param[in,out] api IPC API connection * * \return Standard Pacemaker return code * * \note If the connection is attached to a main loop, this function should be * called before quitting the main loop, to ensure that all memory is * freed. */ void pcmk_disconnect_ipc(pcmk_ipc_api_t *api) { if ((api == NULL) || (api->ipc == NULL)) { return; } switch (api->dispatch_type) { case pcmk_ipc_dispatch_main: { mainloop_io_t *mainloop_io = api->mainloop_io; // Make sure no code with access to api can use these again api->mainloop_io = NULL; api->ipc = NULL; mainloop_del_ipc_client(mainloop_io); // After this point api might have already been freed } break; case pcmk_ipc_dispatch_poll: case pcmk_ipc_dispatch_sync: { crm_ipc_t *ipc = api->ipc; // Make sure no code with access to api can use ipc again api->ipc = NULL; // This should always be the case already, but to be safe api->free_on_disconnect = false; crm_ipc_close(ipc); crm_ipc_destroy(ipc); ipc_post_disconnect(api); } break; } } /*! * \brief Register a callback for IPC API events * * \param[in,out] api IPC API connection * \param[in] callback Callback to register * \param[in] userdata Caller data to pass to callback * * \note This function may be called multiple times to update the callback * and/or user data. The caller remains responsible for freeing * userdata in any case (after the IPC is disconnected, if the * user data is still registered with the IPC). */ void pcmk_register_ipc_callback(pcmk_ipc_api_t *api, pcmk_ipc_callback_t cb, void *user_data) { if (api == NULL) { return; } api->cb = cb; api->user_data = user_data; } /*! * \internal * \brief Send an XML request across an IPC API connection * * \param[in,out] api IPC API connection * \param[in] request XML request to send * * \return Standard Pacemaker return code * * \note Daemon-specific IPC API functions should call this function to send * requests, because it handles different dispatch types appropriately. */ int pcmk__send_ipc_request(pcmk_ipc_api_t *api, const xmlNode *request) { int rc; xmlNode *reply = NULL; enum crm_ipc_flags flags = crm_ipc_flags_none; if ((api == NULL) || (api->ipc == NULL) || (request == NULL)) { return EINVAL; } crm_log_xml_trace(request, "ipc-sent"); // Synchronous dispatch requires waiting for a reply if ((api->dispatch_type == pcmk_ipc_dispatch_sync) && (api->cmds != NULL) && (api->cmds->reply_expected != NULL) && (api->cmds->reply_expected(api, request))) { flags = crm_ipc_client_response; } /* The 0 here means a default timeout of 5 seconds * * @TODO Maybe add a timeout_ms member to pcmk_ipc_api_t and a * pcmk_set_ipc_timeout() setter for it, then use it here. */ rc = crm_ipc_send(api->ipc, request, flags, 0, &reply); if (rc < 0) { return pcmk_legacy2rc(rc); } else if (rc == 0) { return ENODATA; } // With synchronous dispatch, we dispatch any reply now if (reply != NULL) { bool more = call_api_dispatch(api, reply); pcmk__xml_free(reply); while (more) { rc = crm_ipc_read(api->ipc); if (rc == -EAGAIN) { continue; } else if (rc == -ENOMSG || rc == pcmk_ok) { return pcmk_rc_ok; } else if (rc < 0) { return -rc; } rc = dispatch_ipc_data(crm_ipc_buffer(api->ipc), api); pcmk__ipc_free_client_buffer(api->ipc); if (rc == pcmk_rc_ok) { more = false; } else if (rc == EINPROGRESS) { more = true; } else { continue; } } } return pcmk_rc_ok; } /*! * \internal * \brief Create the XML for an IPC request to purge a node from the peer cache * * \param[in] api IPC API connection * \param[in] node_name If not NULL, name of node to purge * \param[in] nodeid If not 0, node ID of node to purge * * \return Newly allocated IPC request XML * * \note The controller, fencer, and pacemakerd use the same request syntax, but * the attribute manager uses a different one. The CIB manager doesn't * have any syntax for it. The executor and scheduler don't connect to the * cluster layer and thus don't have or need any syntax for it. * * \todo Modify the attribute manager to accept the common syntax (as well * as its current one, for compatibility with older clients). Modify * the CIB manager to accept and honor the common syntax. Modify the * executor and scheduler to accept the syntax (immediately returning * success), just for consistency. Modify this function to use the * common syntax with all daemons if their version supports it. */ static xmlNode * create_purge_node_request(const pcmk_ipc_api_t *api, const char *node_name, uint32_t nodeid) { xmlNode *request = NULL; const char *client = crm_system_name? crm_system_name : "client"; switch (api->server) { case pcmk_ipc_attrd: request = pcmk__xe_create(NULL, __func__); crm_xml_add(request, PCMK__XA_T, PCMK__VALUE_ATTRD); crm_xml_add(request, PCMK__XA_SRC, crm_system_name); crm_xml_add(request, PCMK_XA_TASK, PCMK__ATTRD_CMD_PEER_REMOVE); pcmk__xe_set_bool_attr(request, PCMK__XA_REAP, true); crm_xml_add(request, PCMK__XA_ATTR_HOST, node_name); if (nodeid > 0) { crm_xml_add_int(request, PCMK__XA_ATTR_HOST_ID, nodeid); } break; case pcmk_ipc_controld: case pcmk_ipc_fenced: case pcmk_ipc_pacemakerd: request = pcmk__new_request(api->server, client, NULL, pcmk_ipc_name(api, false), CRM_OP_RM_NODE_CACHE, NULL); if (nodeid > 0) { crm_xml_add_ll(request, PCMK_XA_ID, (long long) nodeid); } crm_xml_add(request, PCMK_XA_UNAME, node_name); break; case pcmk_ipc_based: case pcmk_ipc_execd: case pcmk_ipc_schedulerd: break; default: // pcmk_ipc_unknown (shouldn't be possible) return NULL; } return request; } /*! * \brief Ask a Pacemaker daemon to purge a node from its peer cache * * \param[in,out] api IPC API connection * \param[in] node_name If not NULL, name of node to purge * \param[in] nodeid If not 0, node ID of node to purge * * \return Standard Pacemaker return code * * \note At least one of node_name or nodeid must be specified. */ int pcmk_ipc_purge_node(pcmk_ipc_api_t *api, const char *node_name, uint32_t nodeid) { int rc = 0; xmlNode *request = NULL; if (api == NULL) { return EINVAL; } if ((node_name == NULL) && (nodeid == 0)) { return EINVAL; } request = create_purge_node_request(api, node_name, nodeid); if (request == NULL) { return EOPNOTSUPP; } rc = pcmk__send_ipc_request(api, request); pcmk__xml_free(request); crm_debug("%s peer cache purge of node %s[%lu]: rc=%d", pcmk_ipc_name(api, true), node_name, (unsigned long) nodeid, rc); return rc; } /* * Generic IPC API (to eventually be deprecated as public API and made internal) */ struct crm_ipc_s { struct pollfd pfd; unsigned int buf_size; // size of allocated buffer int need_reply; GByteArray *buffer; char *server_name; // server IPC name being connected to qb_ipcc_connection_t *ipc; }; /*! * \brief Create a new (legacy) object for using Pacemaker daemon IPC * * \param[in] name IPC system name to connect to * \param[in] max_size Use a maximum IPC buffer size of at least this size * * \return Newly allocated IPC object on success, NULL otherwise * * \note The caller is responsible for freeing the result using * crm_ipc_destroy(). * \note This should be considered deprecated for use with daemons supported by * pcmk_new_ipc_api(). * \note @COMPAT Since 3.0.1, \p max_size is ignored and the default given by * \c crm_ipc_default_buffer_size() will be used instead. */ crm_ipc_t * crm_ipc_new(const char *name, size_t max_size) { crm_ipc_t *client = NULL; client = calloc(1, sizeof(crm_ipc_t)); if (client == NULL) { crm_err("Could not create IPC connection: %s", strerror(errno)); return NULL; } client->server_name = strdup(name); if (client->server_name == NULL) { crm_err("Could not create %s IPC connection: %s", name, strerror(errno)); free(client); return NULL; } client->buf_size = crm_ipc_default_buffer_size(); client->buffer = NULL; client->pfd.fd = -1; client->pfd.events = POLLIN; client->pfd.revents = 0; return client; } /*! * \internal * \brief Connect a generic (not daemon-specific) IPC object * * \param[in,out] ipc Generic IPC object to connect * * \return Standard Pacemaker return code */ int pcmk__connect_generic_ipc(crm_ipc_t *ipc) { uid_t cl_uid = 0; gid_t cl_gid = 0; pid_t found_pid = 0; uid_t found_uid = 0; gid_t found_gid = 0; int rc = pcmk_rc_ok; if (ipc == NULL) { return EINVAL; } ipc->need_reply = FALSE; ipc->ipc = qb_ipcc_connect(ipc->server_name, ipc->buf_size); if (ipc->ipc == NULL) { return errno; } rc = qb_ipcc_fd_get(ipc->ipc, &ipc->pfd.fd); if (rc < 0) { // -errno crm_ipc_close(ipc); return -rc; } rc = pcmk_daemon_user(&cl_uid, &cl_gid); rc = pcmk_legacy2rc(rc); if (rc != pcmk_rc_ok) { crm_ipc_close(ipc); return rc; } rc = is_ipc_provider_expected(ipc->ipc, ipc->pfd.fd, cl_uid, cl_gid, &found_pid, &found_uid, &found_gid); if (rc != pcmk_rc_ok) { if (rc == pcmk_rc_ipc_unauthorized) { crm_info("%s IPC provider authentication failed: process %lld has " "uid %lld (expected %lld) and gid %lld (expected %lld)", ipc->server_name, (long long) PCMK__SPECIAL_PID_AS_0(found_pid), (long long) found_uid, (long long) cl_uid, (long long) found_gid, (long long) cl_gid); } crm_ipc_close(ipc); return rc; } return pcmk_rc_ok; } void crm_ipc_close(crm_ipc_t * client) { if (client) { if (client->ipc) { qb_ipcc_connection_t *ipc = client->ipc; client->ipc = NULL; qb_ipcc_disconnect(ipc); } } } void crm_ipc_destroy(crm_ipc_t * client) { if (client) { if (client->ipc && qb_ipcc_is_connected(client->ipc)) { crm_notice("Destroying active %s IPC connection", client->server_name); /* The next line is basically unsafe * * If this connection was attached to mainloop and mainloop is active, * the 'disconnected' callback will end up back here and we'll end * up free'ing the memory twice - something that can still happen * even without this if we destroy a connection and it closes before * we call exit */ /* crm_ipc_close(client); */ } else { crm_trace("Destroying inactive %s IPC connection", client->server_name); } if (client->buffer != NULL) { pcmk__ipc_free_client_buffer(client); } free(client->server_name); free(client); } } /*! * \internal * \brief Get the file descriptor for a generic IPC object * * \param[in,out] ipc Generic IPC object to get file descriptor for * \param[out] fd Where to store file descriptor * * \return Standard Pacemaker return code */ int pcmk__ipc_fd(crm_ipc_t *ipc, int *fd) { if ((ipc == NULL) || (fd == NULL)) { return EINVAL; } if ((ipc->ipc == NULL) || (ipc->pfd.fd < 0)) { return ENOTCONN; } *fd = ipc->pfd.fd; return pcmk_rc_ok; } int crm_ipc_get_fd(crm_ipc_t * client) { int fd = -1; if (pcmk__ipc_fd(client, &fd) != pcmk_rc_ok) { crm_err("Could not obtain file descriptor for %s IPC", ((client == NULL)? "unspecified" : client->server_name)); errno = EINVAL; return -EINVAL; } return fd; } bool crm_ipc_connected(crm_ipc_t * client) { bool rc = FALSE; if (client == NULL) { crm_trace("No client"); return FALSE; } else if (client->ipc == NULL) { crm_trace("No connection"); return FALSE; } else if (client->pfd.fd < 0) { crm_trace("Bad descriptor"); return FALSE; } rc = qb_ipcc_is_connected(client->ipc); if (rc == FALSE) { client->pfd.fd = -EINVAL; } return rc; } /*! * \brief Check whether an IPC connection is ready to be read * * \param[in,out] client Connection to check * * \return Positive value if ready to be read, 0 if not ready, -errno on error */ int crm_ipc_ready(crm_ipc_t *client) { int rc; pcmk__assert(client != NULL); if (!crm_ipc_connected(client)) { return -ENOTCONN; } client->pfd.revents = 0; rc = poll(&(client->pfd), 1, 0); return (rc < 0)? -errno : rc; } long crm_ipc_read(crm_ipc_t *client) { guint8 *buffer = NULL; long rc = -ENOMSG; pcmk__assert((client != NULL) && (client->ipc != NULL)); buffer = g_malloc0(crm_ipc_default_buffer_size()); do { pcmk__ipc_header_t *header = NULL; ssize_t bytes = qb_ipcc_event_recv(client->ipc, buffer, crm_ipc_default_buffer_size(), 0); header = (pcmk__ipc_header_t *)(void *) buffer; if (bytes <= 0) { crm_trace("No message received from %s IPC: %s", client->server_name, strerror(-bytes)); if (!crm_ipc_connected(client) || bytes == -ENOTCONN) { crm_err("Connection to %s IPC failed", client->server_name); rc = -ENOTCONN; pcmk__ipc_free_client_buffer(client); } else if (bytes == -EAGAIN) { rc = -EAGAIN; } goto done; } else if (bytes != header->size + sizeof(pcmk__ipc_header_t)) { crm_trace("Message size does not match header"); rc = -EBADMSG; pcmk__ipc_free_client_buffer(client); goto done; } crm_trace("Received %s IPC event %" PRId32 " size=%" PRIu32 " rc=%zu", client->server_name, header->qb.id, header->qb.size, bytes); rc = pcmk__ipc_msg_append(&client->buffer, buffer); if (rc == pcmk_rc_ok) { break; } else if (rc == pcmk_rc_ipc_more) { continue; } else { pcmk__ipc_free_client_buffer(client); rc = pcmk_rc2legacy(rc); goto done; } } while (true); if (client->buffer->len > 0) { /* Data length excluding the header */ rc = client->buffer->len - sizeof(pcmk__ipc_header_t); } done: g_free(buffer); return rc; } void pcmk__ipc_free_client_buffer(crm_ipc_t *client) { pcmk__assert(client != NULL); if (client->buffer != NULL) { g_byte_array_free(client->buffer, TRUE); client->buffer = NULL; } } const char * crm_ipc_buffer(crm_ipc_t * client) { pcmk__assert(client != NULL); CRM_CHECK(client->buffer != NULL, return NULL); return (const char *) (client->buffer->data + sizeof(pcmk__ipc_header_t)); } uint32_t crm_ipc_buffer_flags(crm_ipc_t * client) { pcmk__ipc_header_t *header = NULL; pcmk__assert(client != NULL); if (client->buffer == NULL) { return 0; } header = (pcmk__ipc_header_t *)(void*) client->buffer->data; return header->flags; } const char * crm_ipc_name(crm_ipc_t * client) { pcmk__assert(client != NULL); return client->server_name; } // \return Standard Pacemaker return code static int internal_ipc_get_reply(crm_ipc_t *client, int request_id, int ms_timeout, ssize_t *bytes, xmlNode **reply) { guint8 *buffer = NULL; pcmk__ipc_header_t *hdr = NULL; time_t timeout = 0; int32_t qb_timeout = -1; int rc = pcmk_rc_ok; if (ms_timeout > 0) { timeout = time(NULL) + 1 + pcmk__timeout_ms2s(ms_timeout); qb_timeout = 1000; } /* get the reply */ crm_trace("Expecting reply to %s IPC message %d", client->server_name, request_id); buffer = g_malloc0(client->buf_size); do { guint8 *data = NULL; xmlNode *xml = NULL; *bytes = qb_ipcc_recv(client->ipc, buffer, client->buf_size, qb_timeout); hdr = (pcmk__ipc_header_t *) (void *) buffer; if (*bytes <= 0) { if (!crm_ipc_connected(client)) { crm_err("%s IPC provider disconnected while waiting for message %d", client->server_name, request_id); break; } continue; } else if (*bytes != hdr->size + sizeof(pcmk__ipc_header_t)) { crm_trace("Message size does not match header"); rc = -EBADMSG; break; } if (hdr->qb.id == request_id) { /* Got the reply we were expecting. */ if (client->buffer != NULL) { pcmk__ipc_free_client_buffer(client); } client->buffer = g_byte_array_sized_new(client->buf_size); g_byte_array_append(client->buffer, (const guint8 *) buffer, *bytes); break; } data = buffer + sizeof(pcmk__ipc_header_t); xml = pcmk__xml_parse((const char *) data); if (hdr->qb.id < request_id) { crm_err("Discarding old reply %d (need %d)", hdr->qb.id, request_id); crm_log_xml_notice(xml, "OldIpcReply"); } else if (hdr->qb.id > request_id) { crm_err("Discarding newer reply %d (need %d)", hdr->qb.id, request_id); crm_log_xml_notice(xml, "ImpossibleReply"); pcmk__assert(hdr->qb.id <= request_id); } } while (time(NULL) < timeout || (timeout == 0 && *bytes == -EAGAIN)); if (*bytes > 0) { crm_trace("Received %zd-byte reply %" PRId32 " to %s IPC %d: %.100s", *bytes, hdr->qb.id, client->server_name, request_id, crm_ipc_buffer(client)); if (reply != NULL) { *reply = pcmk__xml_parse(crm_ipc_buffer(client)); } } else if (*bytes < 0) { rc = (int) -*bytes; // System errno crm_trace("No reply to %s IPC %d: %s " QB_XS " rc=%d", client->server_name, request_id, pcmk_rc_str(rc), rc); } /* If bytes == 0, we'll return that to crm_ipc_send which will interpret * that as pcmk_rc_ok, log that the IPC request failed (since we did not * give it a valid reply), and return that 0 to its callers. It's up to * the callers to take appropriate action after that. */ /* Once we've parsed the client buffer as XML and saved it to reply, * there's no need to keep the client buffer around anymore. Free it here * to avoid having to do this anywhere crm_ipc_send is called. */ pcmk__ipc_free_client_buffer(client); g_free(buffer); return rc; } /*! * \brief Send an IPC XML message * * \param[in,out] client Connection to IPC server * \param[in] message XML message to send * \param[in] flags Bitmask of crm_ipc_flags * \param[in] ms_timeout Give up if not sent within this much time * (5 seconds if 0, or no timeout if negative) * \param[out] reply Reply from server (or NULL if none) * * \return Negative errno on error, otherwise size of reply received in bytes * if reply was needed, otherwise number of bytes sent */ int crm_ipc_send(crm_ipc_t *client, const xmlNode *message, enum crm_ipc_flags flags, int32_t ms_timeout, xmlNode **reply) { int rc = 0; time_t timeout = 0; ssize_t qb_rc = 0; ssize_t bytes = 0; struct iovec *iov; static uint32_t id = 0; pcmk__ipc_header_t *header; GString *iov_buffer = NULL; if (client == NULL) { crm_notice("Can't send IPC request without connection (bug?): %.100s", message); return -ENOTCONN; } else if (!crm_ipc_connected(client)) { /* Don't even bother */ crm_notice("Can't send %s IPC requests: Connection closed", client->server_name); return -ENOTCONN; } if (ms_timeout == 0) { ms_timeout = 5000; } /* This loop exists only to clear out any old replies that we haven't * yet read. We don't care about their contents since it's too late to * do anything with them, so we just read and throw them away. */ if (client->need_reply) { char *buffer = pcmk__assert_alloc(crm_ipc_default_buffer_size(), sizeof(char)); qb_rc = qb_ipcc_recv(client->ipc, buffer, client->buf_size, ms_timeout); free(buffer); if (qb_rc < 0) { crm_warn("Sending %s IPC disabled until pending reply received", client->server_name); return -EALREADY; } else { crm_notice("Sending %s IPC re-enabled after pending reply received", client->server_name); client->need_reply = FALSE; } } id++; CRM_LOG_ASSERT(id != 0); /* Crude wrap-around detection */ iov_buffer = g_string_sized_new(1024); pcmk__xml_string(message, 0, iov_buffer, 0); rc = pcmk__ipc_prepare_iov(id, iov_buffer, 0, &iov, &bytes); if ((rc != pcmk_rc_ok) && (rc != pcmk_rc_ipc_more)) { crm_warn("Couldn't prepare %s IPC request: %s " QB_XS " rc=%d", client->server_name, pcmk_rc_str(rc), rc); g_string_free(iov_buffer, TRUE); return pcmk_rc2legacy(rc); } header = iov[0].iov_base; pcmk__set_ipc_flags(header->flags, client->server_name, flags); if (pcmk_is_set(flags, crm_ipc_proxied)) { /* Don't look for a synchronous response */ pcmk__clear_ipc_flags(flags, "client", crm_ipc_client_response); } crm_trace("Sending %s IPC request %d of %u bytes using %dms timeout", client->server_name, header->qb.id, header->qb.size, ms_timeout); /* Send the IPC request, respecting any timeout we were passed */ if (ms_timeout > 0) { timeout = time(NULL) + 1 + pcmk__timeout_ms2s(ms_timeout); } do { qb_rc = qb_ipcc_sendv(client->ipc, iov, 2); } while ((qb_rc == -EAGAIN) && ((timeout == 0) || (time(NULL) < timeout))); rc = (int) qb_rc; // Negative of system errno, or bytes sent if (qb_rc <= 0) { goto send_cleanup; } /* If we should not wait for a response, bail now */ if (!pcmk_is_set(flags, crm_ipc_client_response)) { crm_trace("Not waiting for reply to %s IPC request %d", client->server_name, header->qb.id); goto send_cleanup; } rc = internal_ipc_get_reply(client, header->qb.id, ms_timeout, &bytes, reply); if (rc == pcmk_rc_ok) { rc = (int) bytes; // Size of reply received } else { /* rc is either a positive system errno or a negative standard Pacemaker * return code. If it's an errno, we need to convert it back to a * negative number for comparison and return at the end of this function. */ rc = pcmk_rc2legacy(rc); if (ms_timeout > 0) { /* We didn't get the reply in time, so disable future sends for now. * The only alternative would be to close the connection since we * don't know how to detect and discard out-of-sequence replies. * * @TODO Implement out-of-sequence detection */ client->need_reply = TRUE; } } send_cleanup: if (!crm_ipc_connected(client)) { crm_notice("Couldn't send %s IPC request %d: Connection closed " QB_XS " rc=%d", client->server_name, header->qb.id, rc); } else if (rc == -ETIMEDOUT) { crm_warn("%s IPC request %d failed: %s after %dms " QB_XS " rc=%d", client->server_name, header->qb.id, pcmk_strerror(rc), ms_timeout, rc); crm_write_blackbox(0, NULL); } else if (rc <= 0) { crm_warn("%s IPC request %d failed: %s " QB_XS " rc=%d", client->server_name, header->qb.id, ((rc == 0)? "No bytes sent" : pcmk_strerror(rc)), rc); } g_string_free(iov_buffer, TRUE); pcmk_free_ipc_event(iov); return rc; } /*! * \brief Ensure an IPC provider has expected user or group * * \param[in] qb_ipc libqb client connection if available * \param[in] sock Connected Unix socket for IPC * \param[in] refuid Expected user ID * \param[in] refgid Expected group ID * \param[out] gotpid If not NULL, where to store provider's actual process ID * (or 1 on platforms where ID is not available) * \param[out] gotuid If not NULL, where to store provider's actual user ID * \param[out] gotgid If not NULL, where to store provider's actual group ID * * \return Standard Pacemaker return code * \note An actual user ID of 0 (root) will always be considered authorized, * regardless of the expected values provided. The caller can use the * output arguments to be stricter than this function. */ static int is_ipc_provider_expected(qb_ipcc_connection_t *qb_ipc, int sock, uid_t refuid, gid_t refgid, pid_t *gotpid, uid_t *gotuid, gid_t *gotgid) { int rc = EOPNOTSUPP; pid_t found_pid = 0; uid_t found_uid = 0; gid_t found_gid = 0; #ifdef HAVE_QB_IPCC_AUTH_GET if (qb_ipc != NULL) { rc = qb_ipcc_auth_get(qb_ipc, &found_pid, &found_uid, &found_gid); rc = -rc; // libqb returns 0 or -errno if (rc == pcmk_rc_ok) { goto found; } } #endif #ifdef HAVE_UCRED { struct ucred ucred; socklen_t ucred_len = sizeof(ucred); if (getsockopt(sock, SOL_SOCKET, SO_PEERCRED, &ucred, &ucred_len) < 0) { rc = errno; } else if (ucred_len != sizeof(ucred)) { rc = EOPNOTSUPP; } else { found_pid = ucred.pid; found_uid = ucred.uid; found_gid = ucred.gid; goto found; } } #endif #ifdef HAVE_SOCKPEERCRED { struct sockpeercred sockpeercred; socklen_t sockpeercred_len = sizeof(sockpeercred); if (getsockopt(sock, SOL_SOCKET, SO_PEERCRED, &sockpeercred, &sockpeercred_len) < 0) { rc = errno; } else if (sockpeercred_len != sizeof(sockpeercred)) { rc = EOPNOTSUPP; } else { found_pid = sockpeercred.pid; found_uid = sockpeercred.uid; found_gid = sockpeercred.gid; goto found; } } #endif #ifdef HAVE_GETPEEREID // For example, FreeBSD if (getpeereid(sock, &found_uid, &found_gid) < 0) { rc = errno; } else { found_pid = PCMK__SPECIAL_PID; goto found; } #endif #ifdef HAVE_GETPEERUCRED { ucred_t *ucred = NULL; if (getpeerucred(sock, &ucred) < 0) { rc = errno; } else { found_pid = ucred_getpid(ucred); found_uid = ucred_geteuid(ucred); found_gid = ucred_getegid(ucred); ucred_free(ucred); goto found; } } #endif return rc; // If we get here, nothing succeeded found: if (gotpid != NULL) { *gotpid = found_pid; } if (gotuid != NULL) { *gotuid = found_uid; } if (gotgid != NULL) { *gotgid = found_gid; } if ((found_uid != 0) && (found_uid != refuid) && (found_gid != refgid)) { return pcmk_rc_ipc_unauthorized; } return pcmk_rc_ok; } int crm_ipc_is_authentic_process(int sock, uid_t refuid, gid_t refgid, pid_t *gotpid, uid_t *gotuid, gid_t *gotgid) { int ret = is_ipc_provider_expected(NULL, sock, refuid, refgid, gotpid, gotuid, gotgid); /* The old function had some very odd return codes*/ if (ret == 0) { return 1; } else if (ret == pcmk_rc_ipc_unauthorized) { return 0; } else { return pcmk_rc2legacy(ret); } } int pcmk__ipc_is_authentic_process_active(const char *name, uid_t refuid, gid_t refgid, pid_t *gotpid) { static char last_asked_name[PATH_MAX / 2] = ""; /* log spam prevention */ int fd; int rc = pcmk_rc_ipc_unresponsive; int auth_rc = 0; int32_t qb_rc; pid_t found_pid = 0; uid_t found_uid = 0; gid_t found_gid = 0; qb_ipcc_connection_t *c; #ifdef HAVE_QB_IPCC_CONNECT_ASYNC struct pollfd pollfd = { 0, }; int poll_rc; c = qb_ipcc_connect_async(name, 0, &(pollfd.fd)); #else c = qb_ipcc_connect(name, 0); #endif if (c == NULL) { crm_info("Could not connect to %s IPC: %s", name, strerror(errno)); rc = pcmk_rc_ipc_unresponsive; goto bail; } #ifdef HAVE_QB_IPCC_CONNECT_ASYNC pollfd.events = POLLIN; do { poll_rc = poll(&pollfd, 1, 2000); } while ((poll_rc == -1) && (errno == EINTR)); /* If poll() failed, given that disconnect function is not registered yet, * qb_ipcc_disconnect() won't clean up the socket. In any case, call * qb_ipcc_connect_continue() here so that it may fail and do the cleanup * for us. */ if (qb_ipcc_connect_continue(c) != 0) { crm_info("Could not connect to %s IPC: %s", name, (poll_rc == 0)?"timeout":strerror(errno)); rc = pcmk_rc_ipc_unresponsive; c = NULL; // qb_ipcc_connect_continue cleaned up for us goto bail; } #endif qb_rc = qb_ipcc_fd_get(c, &fd); if (qb_rc != 0) { rc = (int) -qb_rc; // System errno crm_err("Could not get fd from %s IPC: %s " QB_XS " rc=%d", name, pcmk_rc_str(rc), rc); goto bail; } auth_rc = is_ipc_provider_expected(c, fd, refuid, refgid, &found_pid, &found_uid, &found_gid); if (auth_rc == pcmk_rc_ipc_unauthorized) { crm_err("Daemon (IPC %s) effectively blocked with unauthorized" " process %lld (uid: %lld, gid: %lld)", name, (long long) PCMK__SPECIAL_PID_AS_0(found_pid), (long long) found_uid, (long long) found_gid); rc = pcmk_rc_ipc_unauthorized; goto bail; } if (auth_rc != pcmk_rc_ok) { rc = auth_rc; crm_err("Could not get peer credentials from %s IPC: %s " QB_XS " rc=%d", name, pcmk_rc_str(rc), rc); goto bail; } if (gotpid != NULL) { *gotpid = found_pid; } rc = pcmk_rc_ok; if ((found_uid != refuid || found_gid != refgid) && strncmp(last_asked_name, name, sizeof(last_asked_name))) { if ((found_uid == 0) && (refuid != 0)) { crm_warn("Daemon (IPC %s) runs as root, whereas the expected" " credentials are %lld:%lld, hazard of violating" " the least privilege principle", name, (long long) refuid, (long long) refgid); } else { crm_notice("Daemon (IPC %s) runs as %lld:%lld, whereas the" " expected credentials are %lld:%lld, which may" " mean a different set of privileges than expected", name, (long long) found_uid, (long long) found_gid, (long long) refuid, (long long) refgid); } memccpy(last_asked_name, name, '\0', sizeof(last_asked_name)); } bail: if (c != NULL) { qb_ipcc_disconnect(c); } return rc; } // Deprecated functions kept only for backward API compatibility // LCOV_EXCL_START #include bool crm_ipc_connect(crm_ipc_t *client) { int rc = pcmk__connect_generic_ipc(client); if (rc == pcmk_rc_ok) { return true; } if ((client != NULL) && (client->ipc == NULL)) { errno = (rc > 0)? rc : ENOTCONN; crm_debug("Could not establish %s IPC connection: %s (%d)", client->server_name, pcmk_rc_str(errno), errno); } else if (rc == pcmk_rc_ipc_unauthorized) { crm_err("%s IPC provider authentication failed", (client == NULL)? "Pacemaker" : client->server_name); errno = ECONNABORTED; } else { crm_err("Could not verify authenticity of %s IPC provider", (client == NULL)? "Pacemaker" : client->server_name); errno = ENOTCONN; } return false; } // LCOV_EXCL_STOP // End deprecated API diff --git a/lib/pacemaker/pcmk_cluster_queries.c b/lib/pacemaker/pcmk_cluster_queries.c index 2f91a68738..8a08d99180 100644 --- a/lib/pacemaker/pcmk_cluster_queries.c +++ b/lib/pacemaker/pcmk_cluster_queries.c @@ -1,905 +1,905 @@ /* * Copyright 2020-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include // xmlNode #include #include #include #include #include #include #include #include #include #include #include //! Object to store node info from the controller API typedef struct { /* Adapted from pcmk_controld_api_reply_t:data:node_info. * (char **) are convenient here for use within callbacks: we can skip * copying strings unless the caller passes a non-NULL value. */ uint32_t id; char **node_name; char **uuid; char **state; bool have_quorum; bool is_remote; } node_info_t; //! Object to store API results, a timeout, and an output object typedef struct { pcmk__output_t *out; bool show_output; int rc; unsigned int message_timeout_ms; enum pcmk_pacemakerd_state pcmkd_state; node_info_t node_info; } data_t; /*! * \internal * \brief Validate that an IPC API event is a good reply * * \param[in,out] data API results and options * \param[in] api IPC API connection * \param[in] event_type Type of event that occurred * \param[in] status Event status * * \return Standard Pacemaker return code */ static int validate_reply_event(data_t *data, const pcmk_ipc_api_t *api, enum pcmk_ipc_event event_type, crm_exit_t status) { pcmk__output_t *out = data->out; switch (event_type) { case pcmk_ipc_event_reply: break; case pcmk_ipc_event_disconnect: if (data->rc == ECONNRESET) { // Unexpected out->err(out, "error: Lost connection to %s", pcmk_ipc_name(api, true)); } // Nothing bad but not the reply we're looking for return ENOTSUP; default: // Ditto return ENOTSUP; } if (status != CRM_EX_OK) { out->err(out, "error: Bad reply from %s: %s", pcmk_ipc_name(api, true), crm_exit_str(status)); data->rc = EBADMSG; return data->rc; } return pcmk_rc_ok; } /*! * \internal * \brief Validate that a controller API event is a good reply of expected type * * \param[in,out] data API results and options * \param[in] api Controller connection * \param[in] event_type Type of event that occurred * \param[in] status Event status * \param[in] event_data Event-specific data * \param[in] expected_type Expected reply type * * \return Standard Pacemaker return code */ static int validate_controld_reply(data_t *data, const pcmk_ipc_api_t *api, enum pcmk_ipc_event event_type, crm_exit_t status, const void *event_data, enum pcmk_controld_api_reply expected_type) { pcmk__output_t *out = data->out; int rc = pcmk_rc_ok; const pcmk_controld_api_reply_t *reply = NULL; rc = validate_reply_event(data, api, event_type, status); if (rc != pcmk_rc_ok) { return rc; } reply = (const pcmk_controld_api_reply_t *) event_data; if (reply->reply_type != expected_type) { out->err(out, "error: Unexpected reply type '%s' from controller", pcmk__controld_api_reply2str(reply->reply_type)); data->rc = EBADMSG; return data->rc; } return pcmk_rc_ok; } /*! * \internal * \brief Validate that a \p pacemakerd API event is a good reply of expected * type * * \param[in,out] data API results and options * \param[in] api \p pacemakerd connection * \param[in] event_type Type of event that occurred * \param[in] status Event status * \param[in] event_data Event-specific data * \param[in] expected_type Expected reply type * * \return Standard Pacemaker return code */ static int validate_pcmkd_reply(data_t *data, const pcmk_ipc_api_t *api, enum pcmk_ipc_event event_type, crm_exit_t status, const void *event_data, enum pcmk_pacemakerd_api_reply expected_type) { pcmk__output_t *out = data->out; const pcmk_pacemakerd_api_reply_t *reply = NULL; int rc = validate_reply_event(data, api, event_type, status); if (rc != pcmk_rc_ok) { return rc; } reply = (const pcmk_pacemakerd_api_reply_t *) event_data; if (reply->reply_type != expected_type) { out->err(out, "error: Unexpected reply type '%s' from pacemakerd", pcmk__pcmkd_api_reply2str(reply->reply_type)); data->rc = EBADMSG; return data->rc; } return pcmk_rc_ok; } /*! * \internal * \brief Process a controller status IPC event * * \param[in,out] controld_api Controller connection * \param[in] event_type Type of event that occurred * \param[in] status Event status * \param[in,out] event_data \p pcmk_controld_api_reply_t object containing * event-specific data * \param[in,out] user_data \p data_t object for API results and options */ static void controller_status_event_cb(pcmk_ipc_api_t *controld_api, enum pcmk_ipc_event event_type, crm_exit_t status, void *event_data, void *user_data) { data_t *data = (data_t *) user_data; pcmk__output_t *out = data->out; const pcmk_controld_api_reply_t *reply = NULL; int rc = validate_controld_reply(data, controld_api, event_type, status, event_data, pcmk_controld_reply_ping); if (rc != pcmk_rc_ok) { return; } reply = (const pcmk_controld_api_reply_t *) event_data; out->message(out, "health", reply->data.ping.sys_from, reply->host_from, reply->data.ping.fsa_state, reply->data.ping.result); data->rc = pcmk_rc_ok; } /*! * \internal * \brief Process a designated controller IPC event * * \param[in,out] controld_api Controller connection * \param[in] event_type Type of event that occurred * \param[in] status Event status * \param[in,out] event_data \p pcmk_controld_api_reply_t object containing * event-specific data * \param[in,out] user_data \p data_t object for API results and options */ static void designated_controller_event_cb(pcmk_ipc_api_t *controld_api, enum pcmk_ipc_event event_type, crm_exit_t status, void *event_data, void *user_data) { data_t *data = (data_t *) user_data; pcmk__output_t *out = data->out; const pcmk_controld_api_reply_t *reply = NULL; int rc = validate_controld_reply(data, controld_api, event_type, status, event_data, pcmk_controld_reply_ping); if (rc != pcmk_rc_ok) { return; } reply = (const pcmk_controld_api_reply_t *) event_data; out->message(out, "dc", reply->host_from); data->rc = reply->host_from ? pcmk_rc_ok : pcmk_rc_no_dc; } /*! * \internal * \brief Process a node info IPC event * * \param[in,out] controld_api Controller connection * \param[in] event_type Type of event that occurred * \param[in] status Event status * \param[in,out] event_data \p pcmk_controld_api_reply_t object containing * event-specific data * \param[in,out] user_data \p data_t object for API results and options */ static void node_info_event_cb(pcmk_ipc_api_t *controld_api, enum pcmk_ipc_event event_type, crm_exit_t status, void *event_data, void *user_data) { data_t *data = (data_t *) user_data; pcmk__output_t *out = data->out; const pcmk_controld_api_reply_t *reply = NULL; int rc = validate_controld_reply(data, controld_api, event_type, status, event_data, pcmk_controld_reply_info); if (rc != pcmk_rc_ok) { return; } reply = (const pcmk_controld_api_reply_t *) event_data; if (reply->data.node_info.uname == NULL) { out->err(out, "Node is not known to cluster"); data->rc = pcmk_rc_node_unknown; return; } data->node_info.have_quorum = reply->data.node_info.have_quorum; data->node_info.is_remote = reply->data.node_info.is_remote; data->node_info.id = (uint32_t) reply->data.node_info.id; pcmk__str_update(data->node_info.node_name, reply->data.node_info.uname); pcmk__str_update(data->node_info.uuid, reply->data.node_info.uuid); pcmk__str_update(data->node_info.state, reply->data.node_info.state); if (data->show_output) { out->message(out, "node-info", (uint32_t) reply->data.node_info.id, reply->data.node_info.uname, reply->data.node_info.uuid, reply->data.node_info.state, reply->data.node_info.have_quorum, reply->data.node_info.is_remote); } data->rc = pcmk_rc_ok; } /*! * \internal * \brief Process a \p pacemakerd status IPC event * * \param[in,out] pacemakerd_api \p pacemakerd connection * \param[in] event_type Type of event that occurred * \param[in] status Event status * \param[in,out] event_data \p pcmk_pacemakerd_api_reply_t object * containing event-specific data * \param[in,out] user_data \p data_t object for API results and options */ static void pacemakerd_event_cb(pcmk_ipc_api_t *pacemakerd_api, enum pcmk_ipc_event event_type, crm_exit_t status, void *event_data, void *user_data) { data_t *data = user_data; pcmk__output_t *out = data->out; const pcmk_pacemakerd_api_reply_t *reply = NULL; int rc = validate_pcmkd_reply(data, pacemakerd_api, event_type, status, event_data, pcmk_pacemakerd_reply_ping); if (rc != pcmk_rc_ok) { return; } // Parse desired information from reply reply = (const pcmk_pacemakerd_api_reply_t *) event_data; data->pcmkd_state = reply->data.ping.state; data->rc = pcmk_rc_ok; if (!data->show_output) { return; } if (reply->data.ping.status == pcmk_rc_ok) { out->message(out, "pacemakerd-health", reply->data.ping.sys_from, reply->data.ping.state, NULL, reply->data.ping.last_good); } else { out->message(out, "pacemakerd-health", reply->data.ping.sys_from, reply->data.ping.state, "query failed", time(NULL)); } } static pcmk_ipc_api_t * ipc_connect(data_t *data, enum pcmk_ipc_server server, pcmk_ipc_callback_t cb, enum pcmk_ipc_dispatch dispatch_type, bool eremoteio_ok) { int rc; pcmk__output_t *out = data->out; pcmk_ipc_api_t *api = NULL; rc = pcmk_new_ipc_api(&api, server); if (api == NULL) { out->err(out, "error: Could not connect to %s: %s", pcmk_ipc_name(api, true), pcmk_rc_str(rc)); data->rc = rc; return NULL; } if (cb != NULL) { pcmk_register_ipc_callback(api, cb, data); } - rc = pcmk__connect_ipc(api, dispatch_type, 5); + rc = pcmk__connect_ipc_retry_conrefused(api, dispatch_type, 5); if (rc != pcmk_rc_ok) { if (rc == EREMOTEIO) { data->pcmkd_state = pcmk_pacemakerd_state_remote; if (eremoteio_ok) { /* EREMOTEIO may be expected and acceptable for some callers * on a Pacemaker Remote node */ crm_debug("Ignoring %s connection failure: No " "Pacemaker Remote connection", pcmk_ipc_name(api, true)); rc = pcmk_rc_ok; } else { out->err(out, "error: Could not connect to %s: %s", pcmk_ipc_name(api, true), pcmk_rc_str(rc)); } } data->rc = rc; pcmk_free_ipc_api(api); return NULL; } return api; } /*! * \internal * \brief Poll an IPC API connection until timeout or a reply is received * * \param[in,out] data API results and options * \param[in,out] api IPC API connection * \param[in] on_node If not \p NULL, name of the node to poll (used only * for logging) * * \note Sets the \p rc member of \p data on error */ static void poll_until_reply(data_t *data, pcmk_ipc_api_t *api, const char *on_node) { pcmk__output_t *out = data->out; uint64_t start_nsec = qb_util_nano_current_get(); uint64_t end_nsec = 0; uint64_t elapsed_ms = 0; uint64_t remaining_ms = data->message_timeout_ms; while (remaining_ms > 0) { int rc = pcmk_poll_ipc(api, remaining_ms); if (rc == EAGAIN) { // Poll timed out break; } if (rc != pcmk_rc_ok) { out->err(out, "error: Failed to poll %s API%s%s: %s", pcmk_ipc_name(api, true), (on_node != NULL)? " on " : "", pcmk__s(on_node, ""), pcmk_rc_str(rc)); data->rc = rc; return; } pcmk_dispatch_ipc(api); if (data->rc != EAGAIN) { // Received a reply return; } end_nsec = qb_util_nano_current_get(); elapsed_ms = (end_nsec - start_nsec) / QB_TIME_NS_IN_MSEC; remaining_ms = data->message_timeout_ms - elapsed_ms; } out->err(out, "error: Timed out after %ums waiting for reply from %s API%s%s", data->message_timeout_ms, pcmk_ipc_name(api, true), (on_node != NULL)? " on " : "", pcmk__s(on_node, "")); data->rc = EAGAIN; } /*! * \internal * \brief Get and output controller status * * \param[in,out] out Output object * \param[in] node_name Name of node whose status is desired * (\p NULL for DC) * \param[in] message_timeout_ms How long to wait for a reply from the * the controller API. If 0, * \p pcmk_ipc_dispatch_sync will be used. * Otherwise, \p pcmk_ipc_dispatch_poll will * be used. * * \return Standard Pacemaker return code */ int pcmk__controller_status(pcmk__output_t *out, const char *node_name, unsigned int message_timeout_ms) { data_t data = { .out = out, .rc = EAGAIN, .message_timeout_ms = message_timeout_ms, }; enum pcmk_ipc_dispatch dispatch_type = pcmk_ipc_dispatch_poll; pcmk_ipc_api_t *controld_api = NULL; if (message_timeout_ms == 0) { dispatch_type = pcmk_ipc_dispatch_sync; } controld_api = ipc_connect(&data, pcmk_ipc_controld, controller_status_event_cb, dispatch_type, false); if (controld_api != NULL) { int rc = pcmk_controld_api_ping(controld_api, node_name); if (rc != pcmk_rc_ok) { out->err(out, "error: Could not ping controller API on %s: %s", pcmk__s(node_name, "DC"), pcmk_rc_str(rc)); data.rc = rc; } if (dispatch_type == pcmk_ipc_dispatch_poll) { poll_until_reply(&data, controld_api, pcmk__s(node_name, "DC")); } pcmk_free_ipc_api(controld_api); } return data.rc; } // Documented in header int pcmk_controller_status(xmlNodePtr *xml, const char *node_name, unsigned int message_timeout_ms) { pcmk__output_t *out = NULL; int rc = pcmk_rc_ok; rc = pcmk__xml_output_new(&out, xml); if (rc != pcmk_rc_ok) { return rc; } pcmk__register_lib_messages(out); rc = pcmk__controller_status(out, node_name, message_timeout_ms); pcmk__xml_output_finish(out, pcmk_rc2exitc(rc), xml); return rc; } /*! * \internal * \brief Get and output designated controller node name * * \param[in,out] out Output object * \param[in] message_timeout_ms How long to wait for a reply from the * the controller API. If 0, * \p pcmk_ipc_dispatch_sync will be used. * Otherwise, \p pcmk_ipc_dispatch_poll will * be used. * * \return Standard Pacemaker return code */ int pcmk__designated_controller(pcmk__output_t *out, unsigned int message_timeout_ms) { data_t data = { .out = out, .rc = EAGAIN, .message_timeout_ms = message_timeout_ms, }; enum pcmk_ipc_dispatch dispatch_type = pcmk_ipc_dispatch_poll; pcmk_ipc_api_t *controld_api = NULL; if (message_timeout_ms == 0) { dispatch_type = pcmk_ipc_dispatch_sync; } controld_api = ipc_connect(&data, pcmk_ipc_controld, designated_controller_event_cb, dispatch_type, false); if (controld_api != NULL) { int rc = pcmk_controld_api_ping(controld_api, NULL); if (rc != pcmk_rc_ok) { out->err(out, "error: Could not ping controller API on DC: %s", pcmk_rc_str(rc)); data.rc = rc; } if (dispatch_type == pcmk_ipc_dispatch_poll) { poll_until_reply(&data, controld_api, "DC"); } pcmk_free_ipc_api(controld_api); } return data.rc; } // Documented in header int pcmk_designated_controller(xmlNodePtr *xml, unsigned int message_timeout_ms) { pcmk__output_t *out = NULL; int rc = pcmk_rc_ok; rc = pcmk__xml_output_new(&out, xml); if (rc != pcmk_rc_ok) { return rc; } pcmk__register_lib_messages(out); rc = pcmk__designated_controller(out, message_timeout_ms); pcmk__xml_output_finish(out, pcmk_rc2exitc(rc), xml); return rc; } /*! * \internal * \brief Get and optionally output node info corresponding to a node ID from * the controller * * \param[in,out] out Output object * \param[in,out] node_id ID of node whose info to get. If \p NULL * or 0, get the local node's info. If not * \c NULL, store the true node ID here on * success. * \param[out] node_name If not \c NULL, where to store the node * name * \param[out] uuid If not \c NULL, where to store the node * UUID * \param[out] state If not \c NULL, where to store the * membership state * \param[out] is_remote If not \c NULL, where to store whether the * node is a Pacemaker Remote node * \param[out] have_quorum If not \c NULL, where to store whether the * node has quorum * \param[in] show_output Whether to show the node info * \param[in] message_timeout_ms How long to wait for a reply from the * the controller API. If 0, * \c pcmk_ipc_dispatch_sync will be used. * Otherwise, \c pcmk_ipc_dispatch_poll will * be used. * * \return Standard Pacemaker return code * * \note The caller is responsible for freeing \p *node_name, \p *uuid, and * \p *state using \p free(). */ int pcmk__query_node_info(pcmk__output_t *out, uint32_t *node_id, char **node_name, char **uuid, char **state, bool *have_quorum, bool *is_remote, bool show_output, unsigned int message_timeout_ms) { data_t data = { .out = out, .show_output = show_output, .rc = EAGAIN, .message_timeout_ms = message_timeout_ms, .node_info = { .id = (node_id == NULL)? 0 : *node_id, .node_name = node_name, .uuid = uuid, .state = state, }, }; enum pcmk_ipc_dispatch dispatch_type = pcmk_ipc_dispatch_poll; pcmk_ipc_api_t *controld_api = NULL; if (node_name != NULL) { *node_name = NULL; } if (uuid != NULL) { *uuid = NULL; } if (state != NULL) { *state = NULL; } if (message_timeout_ms == 0) { dispatch_type = pcmk_ipc_dispatch_sync; } controld_api = ipc_connect(&data, pcmk_ipc_controld, node_info_event_cb, dispatch_type, false); if (controld_api != NULL) { int rc = pcmk_controld_api_node_info(controld_api, (node_id != NULL)? *node_id : 0); if (rc != pcmk_rc_ok) { out->err(out, "error: Could not send request to controller API on local " "node: %s", pcmk_rc_str(rc)); data.rc = rc; } if (dispatch_type == pcmk_ipc_dispatch_poll) { poll_until_reply(&data, controld_api, "local node"); } pcmk_free_ipc_api(controld_api); } if (data.rc != pcmk_rc_ok) { return data.rc; } // String outputs are set in callback if (node_id != NULL) { *node_id = data.node_info.id; } if (have_quorum != NULL) { *have_quorum = data.node_info.have_quorum; } if (is_remote != NULL) { *is_remote = data.node_info.is_remote; } return data.rc; } // Documented in header int pcmk_query_node_info(xmlNodePtr *xml, uint32_t *node_id, char **node_name, char **uuid, char **state, bool *have_quorum, bool *is_remote, bool show_output, unsigned int message_timeout_ms) { pcmk__output_t *out = NULL; int rc = pcmk_rc_ok; pcmk__assert(node_name != NULL); rc = pcmk__xml_output_new(&out, xml); if (rc != pcmk_rc_ok) { return rc; } pcmk__register_lib_messages(out); rc = pcmk__query_node_info(out, node_id, node_name, uuid, state, have_quorum, is_remote, show_output, message_timeout_ms); pcmk__xml_output_finish(out, pcmk_rc2exitc(rc), xml); return rc; } /*! * \internal * \brief Get and optionally output \p pacemakerd status * * \param[in,out] out Output object * \param[in] ipc_name IPC name for request * \param[in] message_timeout_ms How long to wait for a reply from the * \p pacemakerd API. If 0, * \p pcmk_ipc_dispatch_sync will be used. * Otherwise, \p pcmk_ipc_dispatch_poll will * be used. * \param[in] show_output Whether to output the \p pacemakerd state * \param[out] state Where to store the \p pacemakerd state, if * not \p NULL * * \return Standard Pacemaker return code * * \note This function sets \p state to \p pcmk_pacemakerd_state_remote and * returns \p pcmk_rc_ok if the IPC connection attempt returns * \p EREMOTEIO. That code indicates that this is a Pacemaker Remote node * with the remote executor running. The node may be connected to the * cluster. */ int pcmk__pacemakerd_status(pcmk__output_t *out, const char *ipc_name, unsigned int message_timeout_ms, bool show_output, enum pcmk_pacemakerd_state *state) { data_t data = { .out = out, .show_output = show_output, .rc = EAGAIN, .message_timeout_ms = message_timeout_ms, .pcmkd_state = pcmk_pacemakerd_state_invalid, }; enum pcmk_ipc_dispatch dispatch_type = pcmk_ipc_dispatch_poll; pcmk_ipc_api_t *pacemakerd_api = NULL; if (message_timeout_ms == 0) { dispatch_type = pcmk_ipc_dispatch_sync; } pacemakerd_api = ipc_connect(&data, pcmk_ipc_pacemakerd, pacemakerd_event_cb, dispatch_type, true); if (pacemakerd_api != NULL) { int rc = pcmk_pacemakerd_api_ping(pacemakerd_api, ipc_name); if (rc != pcmk_rc_ok) { out->err(out, "error: Could not ping launcher API: %s", pcmk_rc_str(rc)); data.rc = rc; } if (dispatch_type == pcmk_ipc_dispatch_poll) { poll_until_reply(&data, pacemakerd_api, NULL); } pcmk_free_ipc_api(pacemakerd_api); } else if ((data.pcmkd_state == pcmk_pacemakerd_state_remote) && show_output) { // No API connection so the callback wasn't run out->message(out, "pacemakerd-health", NULL, data.pcmkd_state, NULL, time(NULL)); } if (state != NULL) { *state = data.pcmkd_state; } return data.rc; } // Documented in header int pcmk_pacemakerd_status(xmlNodePtr *xml, const char *ipc_name, unsigned int message_timeout_ms) { pcmk__output_t *out = NULL; int rc = pcmk_rc_ok; rc = pcmk__xml_output_new(&out, xml); if (rc != pcmk_rc_ok) { return rc; } pcmk__register_lib_messages(out); rc = pcmk__pacemakerd_status(out, ipc_name, message_timeout_ms, true, NULL); pcmk__xml_output_finish(out, pcmk_rc2exitc(rc), xml); return rc; } /* user data for looping through remote node xpath searches */ struct node_data { pcmk__output_t *out; int found; const char *field; /* XML attribute to check for node name */ const char *type; bool bash_export; }; static void remote_node_print_helper(xmlNode *result, void *user_data) { struct node_data *data = user_data; pcmk__output_t *out = data->out; const char *name = crm_element_value(result, PCMK_XA_UNAME); const char *id = crm_element_value(result, data->field); // node name and node id are the same for remote/guest nodes out->message(out, "crmadmin-node", data->type, pcmk__s(name, id), id, data->bash_export); data->found++; } // \return Standard Pacemaker return code int pcmk__list_nodes(pcmk__output_t *out, const char *node_types, bool bash_export) { xmlNode *xml_node = NULL; int rc; rc = cib__signon_query(out, NULL, &xml_node); if (rc == pcmk_rc_ok) { struct node_data data = { .out = out, .found = 0, .bash_export = bash_export }; /* PCMK_XE_NODES acts as the list's element name for CLI tools that * use pcmk__output_enable_list_element. Otherwise PCMK_XE_NODES is * the value of the list's PCMK_XA_NAME attribute. */ out->begin_list(out, NULL, NULL, PCMK_XE_NODES); if (!pcmk__str_empty(node_types) && strstr(node_types, "all")) { node_types = NULL; } if (pcmk__str_empty(node_types) || strstr(node_types, "cluster")) { data.field = PCMK_XA_ID; data.type = "cluster"; pcmk__xpath_foreach_result(xml_node->doc, PCMK__XP_MEMBER_NODE_CONFIG, remote_node_print_helper, &data); } if (pcmk__str_empty(node_types) || strstr(node_types, "guest")) { data.field = PCMK_XA_VALUE; data.type = "guest"; pcmk__xpath_foreach_result(xml_node->doc, PCMK__XP_GUEST_NODE_CONFIG, remote_node_print_helper, &data); } if (pcmk__str_empty(node_types) || pcmk__str_eq(node_types, ",|^remote", pcmk__str_regex)) { data.field = PCMK_XA_ID; data.type = "remote"; pcmk__xpath_foreach_result(xml_node->doc, PCMK__XP_REMOTE_NODE_CONFIG, remote_node_print_helper, &data); } out->end_list(out); if (data.found == 0) { out->info(out, "No nodes configured"); } pcmk__xml_free(xml_node); } return rc; } int pcmk_list_nodes(xmlNodePtr *xml, const char *node_types) { pcmk__output_t *out = NULL; int rc = pcmk_rc_ok; rc = pcmk__xml_output_new(&out, xml); if (rc != pcmk_rc_ok) { return rc; } pcmk__register_lib_messages(out); rc = pcmk__list_nodes(out, node_types, FALSE); pcmk__xml_output_finish(out, pcmk_rc2exitc(rc), xml); return rc; }