diff --git a/daemons/based/based_notify.c b/daemons/based/based_notify.c index 04b8ae4843..243c1f7db7 100644 --- a/daemons/based/based_notify.c +++ b/daemons/based/based_notify.c @@ -1,198 +1,198 @@ /* - * Copyright 2004-2024 the Pacemaker project contributors + * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include // PRIx64 #include #include #include #include #include #include #include #include #include #include #include struct cib_notification_s { const xmlNode *msg; struct iovec *iov; int32_t iov_size; }; static void cib_notify_send_one(gpointer key, gpointer value, gpointer user_data) { const char *type = NULL; gboolean do_send = FALSE; int rc = pcmk_rc_ok; pcmk__client_t *client = value; struct cib_notification_s *update = user_data; if (client->ipcs == NULL && client->remote == NULL) { crm_warn("Skipping client with NULL channel"); return; } type = crm_element_value(update->msg, PCMK__XA_SUBT); CRM_LOG_ASSERT(type != NULL); if (pcmk_is_set(client->flags, cib_notify_diff) && pcmk__str_eq(type, PCMK__VALUE_CIB_DIFF_NOTIFY, pcmk__str_none)) { do_send = TRUE; } else if (pcmk_is_set(client->flags, cib_notify_confirm) && pcmk__str_eq(type, PCMK__VALUE_CIB_UPDATE_CONFIRMATION, pcmk__str_none)) { do_send = TRUE; } else if (pcmk_is_set(client->flags, cib_notify_pre) && pcmk__str_eq(type, PCMK__VALUE_CIB_PRE_NOTIFY, pcmk__str_none)) { do_send = TRUE; } else if (pcmk_is_set(client->flags, cib_notify_post) && pcmk__str_eq(type, PCMK__VALUE_CIB_POST_NOTIFY, pcmk__str_none)) { do_send = TRUE; } if (do_send) { switch (PCMK__CLIENT_TYPE(client)) { case pcmk__client_ipc: rc = pcmk__ipc_send_iov(client, update->iov, crm_ipc_server_event); if (rc != pcmk_rc_ok) { crm_warn("Could not notify client %s: %s " QB_XS " id=%s", pcmk__client_name(client), pcmk_rc_str(rc), client->id); } break; case pcmk__client_tls: case pcmk__client_tcp: crm_debug("Sent %s notification to client %s (id %s)", type, pcmk__client_name(client), client->id); pcmk__remote_send_xml(client->remote, update->msg); break; default: crm_err("Unknown transport for client %s " QB_XS " flags=%#016" PRIx64, pcmk__client_name(client), client->flags); } } } static void cib_notify_send(const xmlNode *xml) { struct iovec *iov; struct cib_notification_s update; ssize_t bytes = 0; - int rc = pcmk__ipc_prepare_iov(0, xml, 0, &iov, &bytes); + int rc = pcmk__ipc_prepare_iov(0, xml, &iov, &bytes); if (rc == pcmk_rc_ok) { update.msg = xml; update.iov = iov; update.iov_size = bytes; pcmk__foreach_ipc_client(cib_notify_send_one, &update); + pcmk_free_ipc_event(iov); } else { crm_notice("Could not notify clients: %s " QB_XS " rc=%d", pcmk_rc_str(rc), rc); } - pcmk_free_ipc_event(iov); } void cib_diff_notify(const char *op, int result, const char *call_id, const char *client_id, const char *client_name, const char *origin, xmlNode *update, xmlNode *diff) { int add_updates = 0; int add_epoch = 0; int add_admin_epoch = 0; int del_updates = 0; int del_epoch = 0; int del_admin_epoch = 0; uint8_t log_level = LOG_TRACE; xmlNode *update_msg = NULL; xmlNode *wrapper = NULL; if (diff == NULL) { return; } if (result != pcmk_ok) { log_level = LOG_WARNING; } cib_diff_version_details(diff, &add_admin_epoch, &add_epoch, &add_updates, &del_admin_epoch, &del_epoch, &del_updates); if ((add_admin_epoch != del_admin_epoch) || (add_epoch != del_epoch) || (add_updates != del_updates)) { do_crm_log(log_level, "Updated CIB generation %d.%d.%d to %d.%d.%d from client " "%s%s%s (%s) (%s)", del_admin_epoch, del_epoch, del_updates, add_admin_epoch, add_epoch, add_updates, client_name, ((call_id != NULL)? " call " : ""), pcmk__s(call_id, ""), pcmk__s(origin, "unspecified peer"), pcmk_strerror(result)); } else if ((add_admin_epoch != 0) || (add_epoch != 0) || (add_updates != 0)) { do_crm_log(log_level, "Local-only change to CIB generation %d.%d.%d from client " "%s%s%s (%s) (%s)", add_admin_epoch, add_epoch, add_updates, client_name, ((call_id != NULL)? " call " : ""), pcmk__s(call_id, ""), pcmk__s(origin, "unspecified peer"), pcmk_strerror(result)); } update_msg = pcmk__xe_create(NULL, PCMK__XE_NOTIFY); crm_xml_add(update_msg, PCMK__XA_T, PCMK__VALUE_CIB_NOTIFY); crm_xml_add(update_msg, PCMK__XA_SUBT, PCMK__VALUE_CIB_DIFF_NOTIFY); crm_xml_add(update_msg, PCMK__XA_CIB_OP, op); crm_xml_add(update_msg, PCMK__XA_CIB_CLIENTID, client_id); crm_xml_add(update_msg, PCMK__XA_CIB_CLIENTNAME, client_name); crm_xml_add(update_msg, PCMK__XA_CIB_CALLID, call_id); crm_xml_add(update_msg, PCMK__XA_SRC, origin); crm_xml_add_int(update_msg, PCMK__XA_CIB_RC, result); wrapper = pcmk__xe_create(update_msg, PCMK__XE_CIB_UPDATE_RESULT); pcmk__xml_copy(wrapper, diff); crm_log_xml_trace(update_msg, "diff-notify"); cib_notify_send(update_msg); pcmk__xml_free(update_msg); } diff --git a/doc/sphinx/Pacemaker_Explained/local-options.rst b/doc/sphinx/Pacemaker_Explained/local-options.rst index fe7ab9f585..64e45d0a02 100644 --- a/doc/sphinx/Pacemaker_Explained/local-options.rst +++ b/doc/sphinx/Pacemaker_Explained/local-options.rst @@ -1,793 +1,780 @@ Host-Local Configuration ------------------------ .. index:: pair: XML element; configuration .. note:: Directory and file paths below may differ on your system depending on your Pacemaker build settings. Check your Pacemaker configuration file to find the correct paths. Configuration Value Types ######################### Throughout this document, configuration values will be designated as having one of the following types: .. list-table:: **Configuration Value Types** :class: longtable :widths: 1 3 :header-rows: 1 * - Type - Description * - .. _boolean: .. index:: pair: type; boolean boolean - Case-insensitive text value where ``1``, ``yes``, ``y``, ``on``, and ``true`` evaluate as true and ``0``, ``no``, ``n``, ``off``, ``false``, and unset evaluate as false * - .. _date_time: .. index:: pair: type; date/time date/time - Textual timestamp like ``Sat Dec 21 11:47:45 2013`` * - .. _duration: .. index:: pair: type; duration duration - A nonnegative time duration, specified either like a :ref:`timeout ` or an `ISO 8601 duration `_. A duration may be up to approximately 49 days but is intended for much smaller time periods. * - .. _enumeration: .. index:: pair: type; enumeration enumeration - Text that must be one of a set of defined values (which will be listed in the description) * - .. _epoch_time: .. index:: pair: type; epoch_time epoch_time - Time as the integer number of seconds since the Unix epoch, ``1970-01-01 00:00:00 +0000 (UTC)``. * - .. _id: .. index:: pair: type; id id - A text string starting with a letter or underbar, followed by any combination of letters, numbers, dashes, dots, and/or underbars; when used for a property named ``id``, the string must be unique across all ``id`` properties in the CIB * - .. _integer: .. index:: pair: type; integer integer - 32-bit signed integer value (-2,147,483,648 to 2,147,483,647) * - .. _iso8601: .. index:: pair: type; iso8601 ISO 8601 - An `ISO 8601 `_ date/time. * - .. _nonnegative_integer: .. index:: pair: type; nonnegative integer nonnegative integer - 32-bit nonnegative integer value (0 to 2,147,483,647) * - .. _percentage: .. index:: pair: type; percentage percentage - Floating-point number followed by an optional percent sign ('%') * - .. _port: .. index:: pair: type; port port - Integer TCP port number (0 to 65535) * - .. _range: .. index:: pair: type; range range - A range may be a single nonnegative integer or a dash-separated range of nonnegative integers. Either the first or last value may be omitted to leave the range open-ended. Examples: ``0``, ``3-``, ``-5``, ``4-6``. * - .. _score: .. index:: pair: type; score score - A Pacemaker score can be an integer between -1,000,000 and 1,000,000, or a string alias: ``INFINITY`` or ``+INFINITY`` is equivalent to 1,000,000, ``-INFINITY`` is equivalent to -1,000,000, and ``red``, ``yellow``, and ``green`` are equivalent to integers as described in :ref:`node-health`. * - .. _text: .. index:: pair: type; text text - A text string * - .. _timeout: .. index:: pair: type; timeout timeout - A time duration, specified as a bare number (in which case it is considered to be in seconds) or a number with a unit (``ms`` or ``msec`` for milliseconds, ``us`` or ``usec`` for microseconds, ``s`` or ``sec`` for seconds, ``m`` or ``min`` for minutes, ``h`` or ``hr`` for hours) optionally with whitespace before and/or after the number. * - .. _version: .. index:: pair: type; version version - Version number (any combination of alphanumeric characters, dots, and dashes, starting with a number). Scores ______ Scores are integral to how Pacemaker works. Practically everything from moving a resource to deciding which resource to stop in a degraded cluster is achieved by manipulating scores in some way. Scores are calculated per resource and node. Any node with a negative score for a resource can't run that resource. The cluster places a resource on the node with the highest score for it. Score addition and subtraction follow these rules: * Any value (including ``INFINITY``) - ``INFINITY`` = ``-INFINITY`` * ``INFINITY`` + any value other than ``-INFINITY`` = ``INFINITY`` .. note:: What if you want to use a score higher than 1,000,000? Typically this possibility arises when someone wants to base the score on some external metric that might go above 1,000,000. The short answer is you can't. The long answer is it is sometimes possible work around this limitation creatively. You may be able to set the score to some computed value based on the external metric rather than use the metric directly. For nodes, you can store the metric as a node attribute, and query the attribute when computing the score (possibly as part of a custom resource agent). Local Options ############# Most Pacemaker configuration is in the cluster-wide CIB, but some host-local configuration options either are needed at startup (before the CIB is read) or provide per-host overrides of cluster-wide options. These options are configured as environment variables set when Pacemaker is started, in the format ``=""``. These are typically set in a file whose location varies by OS (most commonly ``/etc/sysconfig/pacemaker`` or ``/etc/default/pacemaker``; this documentation was generated on a system using |PCMK_CONFIG_FILE|). .. list-table:: **Local Options** :class: longtable :widths: 2 2 2 5 :header-rows: 1 * - Name - Type - Default - Description * - .. _cib_pam_service: .. index:: pair: node option; CIB_pam_service CIB_pam_service - :ref:`text ` - login - PAM service to use for remote CIB client authentication (passed to ``pam_start``). * - .. _pcmk_logfacility: .. index:: pair: node option; PCMK_logfacility PCMK_logfacility - :ref:`enumeration ` - daemon - Enable logging via the system log or journal, using the specified log facility. Messages sent here are of value to all Pacemaker administrators. This can be disabled using ``none``, but that is not recommended. Allowed values: * ``none`` * ``daemon`` * ``user`` * ``local0`` * ``local1`` * ``local2`` * ``local3`` * ``local4`` * ``local5`` * ``local6`` * ``local7`` * - .. _pcmk_logpriority: .. index:: pair: node option; PCMK_logpriority PCMK_logpriority - :ref:`enumeration ` - notice - Unless system logging is disabled using ``PCMK_logfacility=none``, messages of the specified log severity and higher will be sent to the system log. The default is appropriate for most installations. Allowed values: * ``emerg`` * ``alert`` * ``crit`` * ``error`` * ``warning`` * ``notice`` * ``info`` * ``debug`` * - .. _pcmk_logfile: .. index:: pair: node option; PCMK_logfile PCMK_logfile - :ref:`text ` - |PCMK_LOG_FILE| - Unless set to ``none``, more detailed log messages will be sent to the specified file (in addition to the system log, if enabled). These messages may have extended information, and will include messages of info severity. This log is of more use to developers and advanced system administrators, and when reporting problems. Note: The default is |PCMK_CONTAINER_LOG_FILE| (inside the container) for bundled container nodes; this would typically be mapped to a different path on the host running the container. * - .. _pcmk_logfile_mode: .. index:: pair: node option; PCMK_logfile_mode PCMK_logfile_mode - :ref:`text ` - 0660 - Pacemaker will set the permissions on the detail log to this value (see ``chmod(1)``). * - .. _pcmk_debug: .. index:: pair: node option; PCMK_debug PCMK_debug - :ref:`enumeration ` - no - Whether to send debug severity messages to the detail log. This may be set for all subsystems (``yes`` or ``no``) or for specific (comma- separated) subsystems. Allowed subsystems are: * ``pacemakerd`` * ``pacemaker-attrd`` * ``pacemaker-based`` * ``pacemaker-controld`` * ``pacemaker-execd`` * ``pacemaker-fenced`` * ``pacemaker-schedulerd`` Example: ``PCMK_debug="pacemakerd,pacemaker-execd"`` * - .. _pcmk_stderr: .. index:: pair: node option; PCMK_stderr PCMK_stderr - :ref:`boolean ` - no - *Advanced Use Only:* Whether to send daemon log messages to stderr. This would be useful only during troubleshooting, when starting Pacemaker manually on the command line. Setting this option in the configuration file is pointless, since the file is not read when starting Pacemaker manually. However, it can be set directly as an environment variable on the command line. * - .. _pcmk_trace_functions: .. index:: pair: node option; PCMK_trace_functions PCMK_trace_functions - :ref:`text ` - - *Advanced Use Only:* Send debug and trace severity messages from these (comma-separated) source code functions to the detail log. Example: ``PCMK_trace_functions="func1,func2"`` * - .. _pcmk_trace_files: .. index:: pair: node option; PCMK_trace_files PCMK_trace_files - :ref:`text ` - - *Advanced Use Only:* Send debug and trace severity messages from all functions in these (comma-separated) source file names to the detail log. Example: ``PCMK_trace_files="file1.c,file2.c"`` * - .. _pcmk_trace_formats: .. index:: pair: node option; PCMK_trace_formats PCMK_trace_formats - :ref:`text ` - - *Advanced Use Only:* Send trace severity messages that are generated by these (comma-separated) format strings in the source code to the detail log. Example: ``PCMK_trace_formats="Error: %s (%d)"`` * - .. _pcmk_trace_tags: .. index:: pair: node option; PCMK_trace_tags PCMK_trace_tags - :ref:`text ` - - *Advanced Use Only:* Send debug and trace severity messages related to these (comma-separated) resource IDs to the detail log. Example: ``PCMK_trace_tags="client-ip,dbfs"`` * - .. _pcmk_blackbox: .. index:: pair: node option; PCMK_blackbox PCMK_blackbox - :ref:`enumeration ` - no - *Advanced Use Only:* Enable blackbox logging globally (``yes`` or ``no``) or by subsystem. A blackbox contains a rolling buffer of all logs (of all severities). Blackboxes are stored under |CRM_BLACKBOX_DIR| by default, by default, and their contents can be viewed using the ``qb-blackbox(8)`` command. The blackbox recorder can be enabled at start using this variable, or at runtime by sending a Pacemaker subsystem daemon process a ``SIGUSR1`` or ``SIGTRAP`` signal, and disabled by sending ``SIGUSR2`` (see ``kill(1)``). The blackbox will be written after a crash, assertion failure, or ``SIGTRAP`` signal. See :ref:`PCMK_debug ` for allowed subsystems. Example: ``PCMK_blackbox="pacemakerd,pacemaker-execd"`` * - .. _pcmk_trace_blackbox: .. index:: pair: node option; PCMK_trace_blackbox PCMK_trace_blackbox - :ref:`enumeration ` - - *Advanced Use Only:* Write a blackbox whenever the message at the specified function and line is logged. Multiple entries may be comma- separated. Example: ``PCMK_trace_blackbox="remote.c:144,remote.c:149"`` * - .. _pcmk_node_start_state: .. index:: pair: node option; PCMK_node_start_state PCMK_node_start_state - :ref:`enumeration ` - default - By default, the local host will join the cluster in an online or standby state when Pacemaker first starts depending on whether it was previously put into standby mode. If this variable is set to ``standby`` or ``online``, it will force the local host to join in the specified state. * - .. _pcmk_node_action_limit: .. index:: pair: node option; PCMK_node_action_limit PCMK_node_action_limit - :ref:`nonnegative integer ` - - If set, this overrides the :ref:`node-action-limit ` cluster option on this node to specify the maximum number of jobs that can be scheduled on this node (or 0 to use twice the number of CPU cores). * - .. _pcmk_fail_fast: .. index:: pair: node option; PCMK_fail_fast PCMK_fail_fast - :ref:`boolean ` - no - By default, if a Pacemaker subsystem crashes, the main ``pacemakerd`` process will attempt to restart it. If this variable is set to ``yes``, ``pacemakerd`` will panic the local host instead. * - .. _pcmk_panic_action: .. index:: pair: node option; PCMK_panic_action PCMK_panic_action - :ref:`enumeration ` - reboot - Pacemaker will panic the local host under certain conditions. By default, this means rebooting the host. This variable can change that behavior: if ``crash``, trigger a kernel crash (useful if you want a kernel dump to investigate); if ``sync-reboot`` or ``sync-crash``, synchronize filesystems before rebooting the host or triggering a kernel crash. The sync values are more likely to preserve log messages, but with the risk that the host may be left active if the synchronization hangs. * - .. _pcmk_remote_address: .. index:: pair: node option; PCMK_remote_address PCMK_remote_address - :ref:`text ` - - By default, if the :ref:`Pacemaker Remote ` service is run on the local node, it will listen for connections on all IP addresses. This may be set to one address to listen on instead, as a resolvable hostname or as a numeric IPv4 or IPv6 address. When resolving names or listening on all addresses, IPv6 will be preferred if available. When listening on an IPv6 address, IPv4 clients will be supported via IPv4-mapped IPv6 addresses. Example: ``PCMK_remote_address="192.0.2.1"`` * - .. _pcmk_remote_port: .. index:: pair: node option; PCMK_remote_port PCMK_remote_port - :ref:`port ` - 3121 - Use this TCP port number for :ref:`Pacemaker Remote ` node connections. This value must be the same on all nodes. * - .. _pcmk_ca_file: .. index:: pair: node option; PCMK_ca_file PCMK_ca_file - :ref:`text ` - - The location of a file containing trusted Certificate Authorities, used to verify client or server certificates. This file must be in PEM format and must be readable by Pacemaker daemons (that is, it must allow read permissions to either the |CRM_DAEMON_USER| user or the |CRM_DAEMON_GROUP| group). If set, along with :ref:`PCMK_key_file ` and :ref:`PCMK_cert_file `, X509 authentication will be enabled for :ref:`Pacemaker Remote ` and remote CIB connections. Example: ``PCMK_ca_file="/etc/pacemaker/ca.cert.pem"`` * - .. _pcmk_cert_file: .. index:: pair: node option; PCMK_cert_file PCMK_cert_file - :ref:`text ` - - The location of a file containing the signed certificate for the server side of the connection. This file must be in PEM format and must be readable by Pacemaker daemons (that is, it must allow read permissions to either the |CRM_DAEMON_USER| user or the |CRM_DAEMON_GROUP| group). If set, along with :ref:`PCMK_ca_file ` and :ref:`PCMK_key_file `, X509 authentication will be enabled for :ref:`Pacemaker Remote ` and remote CIB connections. Example: ``PCMK_cert_file="/etc/pacemaker/server.cert.pem"`` * - .. _pcmk_crl_file: .. index:: pair: node option; PCMK_crl_file PCMK_crl_file - :ref:`text ` - - The location of a Certificate Revocation List file, in PEM format. This setting is optional for X509 authentication. Example: ``PCMK_cr1_file="/etc/pacemaker/crl.pem"`` * - .. _pcmk_key_file: .. index:: pair: node option; PCMK_key_file PCMK_key_file - :ref:`text ` - - The location of a file containing the private key for the matching :ref:`PCMK_cert_file `, in PEM format. This file must be readble by Pacemaker daemons (that is, it must allow read permissions to either the |CRM_DAEMON_USER| user or the |CRM_DAEMON_GROUP| group). If set, along with :ref:`PCMK_ca_file ` and :ref:`PCMK_cert_file `, X509 authentication will be enabled for :ref:`Pacemaker Remote ` and remote CIB connections. Example: ``PCMK_key_file="/etc/pacemaker/server.key.pem"`` * - .. _pcmk_authkey_location: .. index:: pair: node option; PCMK_authkey_location PCMK_authkey_location - :ref:`text ` - |PCMK_AUTHKEY_FILE| - As an alternative to using X509 authentication for :ref:`Pacemaker Remote ` connections, use the contents of this file as the authorization key. This file must be readable by Pacemaker daemons (that is, it must allow read permissions to either the |CRM_DAEMON_USER| user or the |CRM_DAEMON_GROUP| group), and its contents must be identical on all nodes. This is an alternative to using X509 certificates. * - .. _pcmk_remote_pid1: .. index:: pair: node option; PCMK_remote_pid1 PCMK_remote_pid1 - :ref:`enumeration ` - default - *Advanced Use Only:* When a bundle resource's ``run-command`` option is left to default, :ref:`Pacemaker Remote ` runs as PID 1 in the bundle's containers. When it does so, it loads environment variables from the container's |PCMK_INIT_ENV_FILE| and performs the PID 1 responsibility of reaping dead subprocesses. This option controls whether those actions are performed when Pacemaker Remote is not running as PID 1. It is intended primarily for developer testing but can be useful when ``run-command`` is set to a separate, custom PID 1 process that launches Pacemaker Remote. * ``full``: Pacemaker Remote loads environment variables from |PCMK_INIT_ENV_FILE| and reaps dead subprocesses. * ``vars``: Pacemaker Remote loads environment variables from |PCMK_INIT_ENV_FILE| but does not reap dead subprocesses. * ``default``: Pacemaker Remote performs neither action. If Pacemaker Remote is running as PID 1, this option is ignored, and the behavior is the same as for ``full``. * - .. _pcmk_tls_priorities: .. index:: pair: node option; PCMK_tls_priorities PCMK_tls_priorities - :ref:`text ` - |PCMK__GNUTLS_PRIORITIES| - *Advanced Use Only:* These `GnuTLS cipher priorities `_ will be used for TLS connections (whether for :ref:`Pacemaker Remote ` connections or remote CIB access, when enabled). Pacemaker will append ``":+ANON-DH"`` for remote CIB access and ``":+DHE-PSK:+PSK"`` for Pacemaker Remote connections, as they are required for the respective functionality. Example: ``PCMK_tls_priorities="SECURE128:+SECURE192"`` * - .. _pcmk_dh_max_bits: .. index:: pair: node option; PCMK_dh_max_bits PCMK_dh_max_bits - :ref:`nonnegative integer ` - 0 (no maximum) - *Advanced Use Only:* Set an upper bound on the bit length of the prime number generated for Diffie-Hellman parameters needed by TLS connections. The default is no maximum. The server (:ref:`Pacemaker Remote ` daemon, or CIB manager configured to accept remote clients) will use this value to provide a ceiling for the value recommended by the GnuTLS library. The library will only accept a limited number of specific values, which vary by library version, so setting these is recommended only when required for compatibility with specific client versions. Clients do not use ``PCMK_dh_max_bits``. * - .. _pcmk_ipc_type: .. index:: pair: node option; PCMK_ipc_type PCMK_ipc_type - :ref:`enumeration ` - shared-mem - *Advanced Use Only:* Force use of a particular IPC method. Allowed values: * ``shared-mem`` * ``socket`` * ``posix`` * ``sysv`` - * - .. _pcmk_ipc_buffer: - - .. index:: - pair: node option; PCMK_ipc_buffer - - PCMK_ipc_buffer - - :ref:`nonnegative integer ` - - 131072 - - *Advanced Use Only:* Specify an IPC buffer size in bytes. This can be - useful when connecting to large clusters that result in messages - exceeding the default size (which will also result in log messages - referencing this variable). - * - .. _pcmk_cluster_type: .. index:: pair: node option; PCMK_cluster_type PCMK_cluster_type - :ref:`enumeration ` - corosync - *Advanced Use Only:* Specify the cluster layer to be used. If unset, Pacemaker will detect and use a supported cluster layer, if available. Currently, ``"corosync"`` is the only supported cluster layer. If multiple layers are supported in the future, this will allow overriding Pacemaker's automatic detection to select a specific one. * - .. _pcmk_schema_directory: .. index:: pair: node option; PCMK_schema_directory PCMK_schema_directory - :ref:`text ` - |PCMK_SCHEMA_DIR| - *Advanced Use Only:* Specify an alternate location for RNG schemas and XSL transforms. * - .. _pcmk_remote_schema_directory: .. index:: pair: node option; PCMK_remote_schema_directory PCMK_remote_schema_directory - :ref:`text ` - |PCMK__REMOTE_SCHEMA_DIR| - *Advanced Use Only:* Specify an alternate location on :ref:`Pacemaker Remote ` nodes for storing newer RNG schemas and XSL transforms fetched from the cluster. * - .. _pcmk_valgrind_enabled: .. index:: pair: node option; PCMK_valgrind_enabled PCMK_valgrind_enabled - :ref:`enumeration ` - no - *Advanced Use Only:* Whether subsystem daemons should be run under ``valgrind``. Allowed values are the same as for ``PCMK_debug``. * - .. _pcmk_callgrind_enabled: .. index:: pair: node option; PCMK_callgrind_enabled PCMK_callgrind_enabled - :ref:`enumeration ` - no - *Advanced Use Only:* Whether subsystem daemons should be run under ``valgrind`` with the ``callgrind`` tool enabled. Allowed values are the same as for ``PCMK_debug``. * - .. _sbd_sync_resource_startup: .. index:: pair: node option; SBD_SYNC_RESOURCE_STARTUP SBD_SYNC_RESOURCE_STARTUP - :ref:`boolean ` - - If true, ``pacemakerd`` waits for a ping from ``sbd`` during startup before starting other Pacemaker daemons, and during shutdown after stopping other Pacemaker daemons but before exiting. Default value is set based on the ``--with-sbd-sync-default`` configure script option. * - .. _sbd_watchdog_timeout: .. index:: pair: node option; SBD_WATCHDOG_TIMEOUT SBD_WATCHDOG_TIMEOUT - :ref:`duration ` - - If the ``stonith-watchdog-timeout`` cluster property is set to a negative or invalid value, use double this value as the default if positive, or use 0 as the default otherwise. This value must be greater than the value of ``stonith-watchdog-timeout`` if both are set. * - .. _valgrind_opts: .. index:: pair: node option; VALGRIND_OPTS VALGRIND_OPTS - :ref:`text ` - - *Advanced Use Only:* Pass these options to valgrind, when enabled (see ``valgrind(1)``). ``"--vgdb=no"`` should usually be specified because ``pacemaker-execd`` can lower privileges when executing commands, which would otherwise leave a bunch of unremovable files in ``/tmp``. diff --git a/etc/sysconfig/pacemaker.in b/etc/sysconfig/pacemaker.in index 412ee7a99a..c3d374ef96 100644 --- a/etc/sysconfig/pacemaker.in +++ b/etc/sysconfig/pacemaker.in @@ -1,436 +1,428 @@ # # Pacemaker start-up configuration # # This file contains environment variables that affect Pacemaker behavior. # They are not options stored in the Cluster Information Base (CIB) because # they may be needed before the CIB is available. # ## Logging # PCMK_logfacility # # Enable logging via the system log or journal, using the specified log # facility. Messages sent here are of value to all Pacemaker administrators. # This can be disabled using "none", but that is not recommended. Allowed # values: # # none # daemon # user # local0 # local1 # local2 # local3 # local4 # local5 # local6 # local7 # # Default: PCMK_logfacility="daemon" # PCMK_logpriority # # Unless system logging is disabled using PCMK_logfacility=none, messages of # the specified log severity and higher will be sent to the system log. The # default is appropriate for most installations. Allowed values: # # emerg # alert # crit # error # warning # notice # info # debug # # Default: PCMK_logpriority="notice" # Warning: Debug logs may show sensitive configuration values. # PCMK_logfile # # Unless set to "none", more detailed log messages will be sent to the # specified file (in addition to the system log, if enabled). These messages # may have extended information, and will include messages of info severity. # This log is of more use to developers and advanced system administrators, and # when reporting problems. # # Default: PCMK_logfile="@CRM_LOG_DIR@/pacemaker.log" # PCMK_logfile_mode # # Pacemaker will set the permissions on the detail log to this value (see # chmod(1)). # # Default: PCMK_logfile_mode="0660" # PCMK_debug (Advanced Use Only) # # Whether to send debug severity messages to the detail log. # This may be set for all subsystems (yes or no) or for specific # (comma-separated) subsystems. Allowed subsystems are: # # pacemakerd # pacemaker-attrd # pacemaker-based # pacemaker-controld # pacemaker-execd # pacemaker-fenced # pacemaker-schedulerd # # Default: PCMK_debug="no" # Example: PCMK_debug="pacemakerd,pacemaker-execd" # Warning: Debug logs may show sensitive configuration values. # PCMK_stderr (Advanced Use Only) # # Whether to send daemon log messages to stderr. This would be useful only # during troubleshooting, when starting Pacemaker manually on the command line. # # Setting this option in this file is pointless, since this file is not read # when starting Pacemaker manually. However, it can be set directly as an # environment variable on the command line. # # Default: PCMK_stderr="no" # PCMK_trace_functions (Advanced Use Only) # # Send debug and trace severity messages from these (comma-separated) # source code functions to the detail log. # # Default: PCMK_trace_functions="" # Example: PCMK_trace_functions="unpack_colocation_set,pcmk__cmp_instance" # Warning: Trace logs may show sensitive configuration values. # PCMK_trace_files (Advanced Use Only) # # Send debug and trace severity messages from all functions in these # (comma-separated) source file names to the detail log. # # Default: PCMK_trace_files="" # Example: PCMK_trace_files="remote.c,watchdog.c" # Warning: Trace logs may show sensitive configuration values. # PCMK_trace_formats (Advanced Use Only) # # Send trace severity messages that are generated by these (comma-separated) # format strings in the source code to the detail log. # # Default: PCMK_trace_formats="" # Example: PCMK_trace_formats="TLS handshake failed: %s (%d)" # Warning: Trace logs may show sensitive configuration values. # PCMK_trace_tags (Advanced Use Only) # # Send debug and trace severity messages related to these (comma-separated) # resource IDs to the detail log. # # Default: PCMK_trace_tags="" # Example: PCMK_trace_tags="client-ip,dbfs" # Warning: Trace logs may show sensitive configuration values. # PCMK_blackbox (Advanced Use Only) # # Enable blackbox logging globally (yes or no) or by subsystem. A blackbox # contains a rolling buffer of all logs (of all severities). Blackboxes are # stored under @CRM_BLACKBOX_DIR@ by default, and their contents can # be viewed using the qb-blackbox(8) command. # # The blackbox recorder can be enabled at start using this variable, or at # runtime by sending a Pacemaker subsystem daemon process a SIGUSR1 or SIGTRAP # signal, and disabled by sending SIGUSR2 (see kill(1)). The blackbox will be # written after a crash, assertion failure, or SIGTRAP signal. # # Default: PCMK_blackbox="no" # Example: PCMK_blackbox="pacemaker-controld,pacemaker-fenced" # Warning: Blackboxes may contain sensitive configuration values. # PCMK_trace_blackbox (Advanced Use Only) # # Write a blackbox whenever the message at the specified function and line is # logged. Multiple entries may be comma-separated. # # Default: PCMK_trace_blackbox="" # Example: PCMK_trace_blackbox="remote.c:144,remote.c:149" # Warning: Blackboxes may contain sensitive configuration values. ## Option overrides # PCMK_node_start_state # # By default, the local host will join the cluster in an online or standby # state when Pacemaker first starts depending on whether it was previously put # into standby mode. If this variable is set to "standby" or "online", it will # force the local host to join in the specified state. # # Default: PCMK_node_start_state="default" # PCMK_node_action_limit # # If set, this overrides the node-action-limit cluster option for this node to # specify the maximum number of jobs that can be scheduled on this node (or 0 # to use twice the number of CPU cores). # # Default: unset # Example: PCMK_node_action_limit="1" ## Crash Handling # PCMK_fail_fast # # By default, if a Pacemaker subsystem crashes, the main pacemakerd process # will attempt to restart it. If this variable is set to "yes", pacemakerd # will panic the local host instead. # # Default: PCMK_fail_fast="no" # PCMK_panic_action # # Pacemaker panics the local node under certain conditions (for example, losing # quorum when no-quorum-policy is "suicide", or being notified of the local # node's own fencing when fence-reaction is "panic"). This variable determines # the panic behavior. Allowed values: # # reboot Immediately reboot the host (not a clean reboot) # off Immediately kill power to the host (not a clean shutdown) # crash Trigger a kernel crash if possible, otherwise like reboot # sync-reboot, sync-off, sync-crash # "sync-" can be put in front of any of the above values to synchronize # filesystems before panicking (making log messages more likely to be # preserved, but with the risk that the host may be left active if the # synchronization hangs) # # Default: PCMK_panic_action="reboot" ## Pacemaker Remote and remote CIB administration # PCMK_remote_address # # By default, if the Pacemaker Remote service is run on the local node, it will # listen for connections on all IP addresses. This may be set to one address to # listen on instead, as a resolvable hostname or as a numeric IPv4 or IPv6 # address. When resolving names or listening on all addresses, IPv6 will be # preferred if available. When listening on an IPv6 address, IPv4 clients will # be supported via IPv4-mapped IPv6 addresses. # # Default: PCMK_remote_address="" # Example: PCMK_remote_address="192.0.2.1" # PCMK_remote_port # # Use this TCP port number for Pacemaker Remote node connections. This value # must be the same on all nodes. # # Default: PCMK_remote_port="3121" # PCMK_ca_file # # The location of a file containing trusted Certificate Authorities, used to # verify client or server certificates. This file must be in PEM format and # must be readable by Pacemaker daemons (that is, it must allow read permissions # to either the @CRM_DAEMON_USER@ user or the @CRM_DAEMON_GROUP@ group). # If set, along with PCMK_key_file and PCMK_cert_file, X509 authentication # will be enabled for Pacemaker Remote and remote CIB connections. # # Default: PCMK_ca_file="" # PCMK_cert_file # # The location of a file containing the signed certificate for the server # side of the connection. This file must be in PEM format and must be # readable by Pacemaker daemons (that is, it must allow read permissions # to either the @CRM_DAEMON_USER@ user or the @CRM_DAEMON_GROUP@ group). # If set, along with PCMK_ca_file and PCMK_key_file, X509 authentication # will be enabled for Pacemaker Remote and remote CIB connections. # # Default: PCMK_cert_file="" # PCMK_crl_file # # The location of a Certificate Revocation List file, in PEM format. This # setting is optional for X509 authentication. # # Default: PCMK_crl_file="" # PCMK_key_file # # The location of a file containing the private key for the matching PCMK_cert_file, # in PEM format. This file must be readble by Pacemaker daemons (that is, it # must allow read permissions to either the @CRM_DAEMON_USER@ user or the # @CRM_DAEMON_GROUP@ group). If set, along with PCMK_ca_file and PCMK_cert_file, # X509 authentication will be enabled for Pacemaker Remote and remote CIB # connections. # # Default: PCMK_key_file="" # PCMK_authkey_location # # As an alternative to using X509 authentication for Pacemaker Remote # connections, use the contents of this file as the authorization key. This # file must be readable by Pacemaker daemons (that is, it must allow read # permissions to either the @CRM_DAEMON_USER@ user or the @CRM_DAEMON_GROUP@ # group), and its contents must be identical on all nodes. # # This is an alternative to using X509 certificates. # # Default: PCMK_authkey_location="@PACEMAKER_CONFIG_DIR@/authkey" # PCMK_remote_pid1 (Advanced Use Only) # # When a bundle resource's "run-command" option is left to default, Pacemaker # Remote runs as PID 1 in the bundle's containers. When it does so, it loads # environment variables from the container's # @PACEMAKER_CONFIG_DIR@/pcmk-init.env and performs the PID 1 responsibility of # reaping dead subprocesses. # # This option controls whether those actions are performed when Pacemaker # Remote is not running as PID 1. It is intended primarily for developer testing # but can be useful when "run-command" is set to a separate, custom PID 1 # process that launches Pacemaker Remote. # # * If set to "full", Pacemaker Remote loads environment variables from # @PACEMAKER_CONFIG_DIR@/pcmk-init.env and reaps dead subprocesses. # * If set to "vars", Pacemaker Remote loads environment variables from # @PACEMAKER_CONFIG_DIR@/pcmk-init.env but does not reap dead subprocesses. # * If set to "default", Pacemaker Remote performs neither action. # # If Pacemaker Remote is running as PID 1, this option is ignored, and the # behavior is the same as for "full". # # Default: PCMK_remote_pid1="default" # PCMK_tls_priorities (Advanced Use Only) # # These GnuTLS cipher priorities will be used for TLS connections (whether for # Pacemaker Remote connections or remote CIB access, when enabled). See: # # https://gnutls.org/manual/html_node/Priority-Strings.html # # Pacemaker will append ":+ANON-DH" for remote CIB access and ":+DHE-PSK:+PSK" # for Pacemaker Remote connections, as they are required for the respective # functionality. # # Default: PCMK_tls_priorities="@PCMK__GNUTLS_PRIORITIES@" # Example: PCMK_tls_priorities="SECURE128:+SECURE192:-VERS-ALL:+VERS-TLS1.2" # PCMK_dh_max_bits (Advanced Use Only) # # Set an upper bound on the bit length of the prime number generated for # Diffie-Hellman parameters needed by TLS connections. The default is no # maximum. # # The server (Pacemaker Remote daemon, or CIB manager configured to accept # remote clients) will use this value to provide a ceiling for the value # recommended by the GnuTLS library. The library will only accept a limited # number of specific values, which vary by library version, so setting these is # recommended only when required for compatibility with specific client # versions. # # Clients do not use PCMK_dh_max_bits. # # Default: PCMK_dh_max_bits="0" (no maximum) ## Inter-process Communication # PCMK_ipc_type (Advanced Use Only) # # Force use of a particular IPC method. Allowed values: # # shared-mem # socket # posix # sysv # # Default: PCMK_ipc_type="shared-mem" -# PCMK_ipc_buffer (Advanced Use Only) -# -# Specify an IPC buffer size in bytes. This can be useful when connecting to -# large clusters that result in messages exceeding the default size (which will -# also result in log messages referencing this variable). -# -# Default: PCMK_ipc_buffer="131072" - ## Cluster type # PCMK_cluster_type (Advanced Use Only) # # Specify the cluster layer to be used. If unset, Pacemaker will detect and use # a supported cluster layer, if available. Currently, "corosync" is the only # supported cluster layer. If multiple layers are supported in the future, this # will allow overriding Pacemaker's automatic detection to select a specific # one. # # Default: PCMK_cluster_type="" ## Developer Options # PCMK_schema_directory (Advanced Use Only) # # Specify an alternate location for RNG schemas and XSL transforms. # # Default: PCMK_schema_directory="@PCMK_SCHEMA_DIR@" # PCMK_remote_schema_directory (Advanced Use Only) # # Specify an alternate location on Pacemaker Remote nodes for storing newer # RNG schemas and XSL transforms fetched from the cluster. # # Default: PCMK_remote_schema_directory="@PCMK__REMOTE_SCHEMA_DIR@" # G_SLICE (Advanced Use Only) # # Affect the behavior of glib's memory allocator. Setting to "always-malloc" # when running under valgrind will help valgrind track malloc/free better; # setting to "debug-blocks" when not running under valgrind will perform # (somewhat expensive) memory checks. # # Default: G_SLICE="" # Example: G_SLICE="always-malloc" # MALLOC_PERTURB_ (Advanced Use Only) # # Setting this to a decimal byte value will make malloc() initialize newly # allocated memory and free() wipe it, to help catch uninitialized-memory and # use-after-free bugs. # # Default: MALLOC_PERTURB_="" # Example: MALLOC_PERTURB_="221" # MALLOC_CHECK_ (Advanced Use Only) # # Setting this to 3 will make malloc() and friends print to stderr and abort # for some (inexpensive) memory checks. # # Default: MALLOC_CHECK_="" # Example: MALLOC_CHECK_="3" # PCMK_valgrind_enabled (Advanced Use Only) # # Whether subsystem daemons should be run under valgrind. Allowed values are # the same as for PCMK_debug. # # Default: PCMK_valgrind_enabled="no" # PCMK_callgrind_enabled # # Whether subsystem daemons should be run under valgrind with the callgrind # tool enabled. Allowed values are the same as for PCMK_debug. # # Default: PCMK_callgrind_enabled="no" # VALGRIND_OPTS # # Pass these options to valgrind, when enabled (see valgrind(1)). "--vgdb=no" # is specified because pacemaker-execd can lower privileges when executing # commands, which would otherwise leave a bunch of unremovable files in /tmp. # # Default: VALGRIND_OPTS="" VALGRIND_OPTS="--leak-check=full --trace-children=no --vgdb=no --num-callers=25 --log-file=@PCMK__PERSISTENT_DATA_DIR@/valgrind-%p --suppressions=@datadir@/pacemaker/tests/valgrind-pcmk.suppressions --gen-suppressions=all" diff --git a/include/crm/common/ipc.h b/include/crm/common/ipc.h index 27be73c900..3890919d81 100644 --- a/include/crm/common/ipc.h +++ b/include/crm/common/ipc.h @@ -1,216 +1,216 @@ /* * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PCMK__CRM_COMMON_IPC__H #define PCMK__CRM_COMMON_IPC__H #include #include #include #include #ifdef __cplusplus extern "C" { #endif /** * \file * \brief IPC interface to Pacemaker daemons * * \ingroup core */ /* * The library supports two methods of creating IPC connections. The older code * allows connecting to any arbitrary IPC name. The newer code only allows * connecting to one of the Pacemaker daemons. * * As daemons are converted to use the new model, the old functions should be * considered deprecated for use with those daemons. Once all daemons are * converted, the old functions should be officially deprecated as public API * and eventually made internal API. */ /* * Pacemaker daemon IPC */ /* @COMPAT This is also used internally for cluster message types, but it's not * worth the hassle of redefining this public API just to change the name. */ //! Available IPC interfaces enum pcmk_ipc_server { pcmk_ipc_unknown, //!< Unknown or invalid pcmk_ipc_attrd, //!< Attribute manager pcmk_ipc_based, //!< CIB manager pcmk_ipc_controld, //!< Controller pcmk_ipc_execd, //!< Executor pcmk_ipc_fenced, //!< Fencer pcmk_ipc_pacemakerd, //!< Launcher pcmk_ipc_schedulerd, //!< Scheduler }; // NOTE: sbd (as of at least 1.5.2) uses this enum //! Possible event types that an IPC event callback can be called for enum pcmk_ipc_event { pcmk_ipc_event_connect, //!< Result of asynchronous connection attempt // NOTE: sbd (as of at least 1.5.2) uses this value pcmk_ipc_event_disconnect, //!< Termination of IPC connection // NOTE: sbd (as of at least 1.5.2) uses this value pcmk_ipc_event_reply, //!< Daemon's reply to client IPC request pcmk_ipc_event_notify, //!< Notification from daemon }; //! How IPC replies should be dispatched enum pcmk_ipc_dispatch { pcmk_ipc_dispatch_main, //!< Attach IPC to GMainLoop for dispatch pcmk_ipc_dispatch_poll, //!< Caller will poll and dispatch IPC pcmk_ipc_dispatch_sync, //!< Sending a command will wait for any reply }; // NOTE: sbd (as of at least 1.5.2) uses this //! Client connection to Pacemaker IPC typedef struct pcmk_ipc_api_s pcmk_ipc_api_t; /*! * \brief Callback function type for Pacemaker daemon IPC APIs * * \param[in,out] api IPC API connection * \param[in] event_type The type of event that occurred * \param[in] status Event status * \param[in,out] event_data Event-specific data * \param[in,out] user_data Caller data provided when callback was registered * * \note For connection and disconnection events, event_data may be NULL (for * local IPC) or the name of the connected node (for remote IPC, for * daemons that support that). For reply and notify events, event_data is * defined by the specific daemon API. */ typedef void (*pcmk_ipc_callback_t)(pcmk_ipc_api_t *api, enum pcmk_ipc_event event_type, crm_exit_t status, void *event_data, void *user_data); // NOTE: sbd (as of at least 1.5.2) uses this int pcmk_new_ipc_api(pcmk_ipc_api_t **api, enum pcmk_ipc_server server); // NOTE: sbd (as of at least 1.5.2) uses this void pcmk_free_ipc_api(pcmk_ipc_api_t *api); // NOTE: sbd (as of at least 1.5.2) uses this int pcmk_connect_ipc(pcmk_ipc_api_t *api, enum pcmk_ipc_dispatch dispatch_type); void pcmk_disconnect_ipc(pcmk_ipc_api_t *api); int pcmk_poll_ipc(const pcmk_ipc_api_t *api, int timeout_ms); void pcmk_dispatch_ipc(pcmk_ipc_api_t *api); // NOTE: sbd (as of at least 1.5.2) uses this void pcmk_register_ipc_callback(pcmk_ipc_api_t *api, pcmk_ipc_callback_t cb, void *user_data); const char *pcmk_ipc_name(const pcmk_ipc_api_t *api, bool for_log); bool pcmk_ipc_is_connected(pcmk_ipc_api_t *api); int pcmk_ipc_purge_node(pcmk_ipc_api_t *api, const char *node_name, uint32_t nodeid); /* * Generic IPC API (to eventually be deprecated as public API and made internal) */ enum crm_ipc_flags { crm_ipc_flags_none = UINT32_C(0), - //! Message has been compressed + //! \deprecated Since 3.0.1 crm_ipc_compressed = (UINT32_C(1) << 0), //! _ALL_ replies to proxied connections need to be sent as events crm_ipc_proxied = (UINT32_C(1) << 8), //! A response is expected in reply crm_ipc_client_response = (UINT32_C(1) << 9), // These are options for Pacemaker's internal use only (pcmk__ipc_send_*()) //! Send an Event instead of a Response crm_ipc_server_event = (UINT32_C(1) << 16), //! Free the iovec after sending crm_ipc_server_free = (UINT32_C(1) << 17), //! All replies to proxied connections are sent as events. This flag //! preserves whether the events should be treated as an Event or a Response crm_ipc_proxied_relay_response = (UINT32_C(1) << 18), }; typedef struct crm_ipc_s crm_ipc_t; crm_ipc_t *crm_ipc_new(const char *name, size_t max_size); void crm_ipc_close(crm_ipc_t * client); void crm_ipc_destroy(crm_ipc_t * client); void pcmk_free_ipc_event(struct iovec *event); int crm_ipc_send(crm_ipc_t *client, const xmlNode *message, enum crm_ipc_flags flags, int32_t ms_timeout, xmlNode **reply); int crm_ipc_get_fd(crm_ipc_t * client); bool crm_ipc_connected(crm_ipc_t * client); int crm_ipc_ready(crm_ipc_t * client); long crm_ipc_read(crm_ipc_t * client); const char *crm_ipc_buffer(crm_ipc_t * client); uint32_t crm_ipc_buffer_flags(crm_ipc_t * client); const char *crm_ipc_name(crm_ipc_t * client); unsigned int crm_ipc_default_buffer_size(void); /*! * \brief Check the authenticity of the IPC socket peer process (legacy) * * If everything goes well, peer's authenticity is verified by the means * of comparing against provided referential UID and GID (either satisfies), * and the result of this check can be deduced from the return value. * As an exception, detected UID of 0 ("root") satisfies arbitrary * provided referential daemon's credentials. * * \param[in] sock IPC related, connected Unix socket to check peer of * \param[in] refuid referential UID to check against * \param[in] refgid referential GID to check against * \param[out] gotpid to optionally store obtained PID of the peer * (not available on FreeBSD, special value of 1 * used instead, and the caller is required to * special case this value respectively) * \param[out] gotuid to optionally store obtained UID of the peer * \param[out] gotgid to optionally store obtained GID of the peer * * \return 0 if IPC related socket's peer is not authentic given the * referential credentials (see above), 1 if it is, * negative value on error (generally expressing -errno unless * it was zero even on nonhappy path, -pcmk_err_generic is * returned then; no message is directly emitted) * * \note While this function is tolerant on what constitutes authorized * IPC daemon process (its effective user matches UID=0 or \p refuid, * or at least its group matches \p refgid), either or both (in case * of UID=0) mismatches on the expected credentials of such peer * process \e shall be investigated at the caller when value of 1 * gets returned there, since higher-than-expected privileges in * respect to the expected/intended credentials possibly violate * the least privilege principle and may pose an additional risk * (i.e. such accidental inconsistency shall be eventually fixed). */ int crm_ipc_is_authentic_process(int sock, uid_t refuid, gid_t refgid, pid_t *gotpid, uid_t *gotuid, gid_t *gotgid); #ifdef __cplusplus } #endif #endif diff --git a/include/crm/common/ipc_internal.h b/include/crm/common/ipc_internal.h index cfa06541b0..72b6f7f189 100644 --- a/include/crm/common/ipc_internal.h +++ b/include/crm/common/ipc_internal.h @@ -1,287 +1,286 @@ /* * Copyright 2013-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PCMK__CRM_COMMON_IPC_INTERNAL__H #define PCMK__CRM_COMMON_IPC_INTERNAL__H #include // bool #include // uint32_t, uint64_t, UINT64_C() #include // struct iovec #include // uid_t, gid_t, pid_t, size_t #include // gnutls_session_t #include // guint, gpointer, GQueue, ... #include // xmlNode #include // qb_ipcs_connection_t, ... #include // HAVE_GETPEEREID #include // crm_system_name #include #include // pcmk_controld_api_reply #include // pcmk_pacemakerd_{api_reply,state} #include // mainloop_io_t #ifdef __cplusplus extern "C" { #endif /* denotes "non yieldable PID" on FreeBSD, or actual PID1 in scenarios that require a delicate handling anyway (socket-based activation with systemd); we can be reasonably sure that this PID is never possessed by the actual child daemon, as it gets taken either by the proper init, or by pacemakerd itself (i.e. this precludes anything else); note that value of zero is meant to carry "unset" meaning, and better not to bet on/conditionalize over signedness of pid_t */ #define PCMK__SPECIAL_PID 1 // Timeout (in seconds) to use for IPC client sends, reply waits, etc. #define PCMK__IPC_TIMEOUT 120 #if defined(HAVE_GETPEEREID) /* on FreeBSD, we don't want to expose "non-yieldable PID" (leading to "IPC liveness check only") as its nominal representation, which could cause confusion -- this is unambiguous as long as there's no socket-based activation like with systemd (very improbable) */ #define PCMK__SPECIAL_PID_AS_0(p) (((p) == PCMK__SPECIAL_PID) ? 0 : (p)) #else #define PCMK__SPECIAL_PID_AS_0(p) (p) #endif /*! * \internal * \brief Check the authenticity and liveness of the process via IPC end-point * * When IPC daemon under given IPC end-point (name) detected, its authenticity * is verified by the means of comparing against provided referential UID and * GID, and the result of this check can be deduced from the return value. * As an exception, referential UID of 0 (~ root) satisfies arbitrary * detected daemon's credentials. * * \param[in] name IPC name to base the search on * \param[in] refuid referential UID to check against * \param[in] refgid referential GID to check against * \param[out] gotpid to optionally store obtained PID of the found process * upon returning 1 or -2 * (not available on FreeBSD, special value of 1, * see PCMK__SPECIAL_PID, used instead, and the caller * is required to special case this value respectively) * * \return Standard Pacemaker return code * * \note Return codes of particular interest include pcmk_rc_ipc_unresponsive * indicating that no trace of IPC liveness was detected, and * pcmk_rc_ipc_unauthorized indicating that the IPC endpoint is blocked by * an unauthorized process. * \note This function emits a log message for return codes other than * pcmk_rc_ok and pcmk_rc_ipc_unresponsive, and when there isn't a perfect * match in respect to \p reguid and/or \p refgid, for a possible * least privilege principle violation. * * \see crm_ipc_is_authentic_process */ int pcmk__ipc_is_authentic_process_active(const char *name, uid_t refuid, gid_t refgid, pid_t *gotpid); int pcmk__connect_generic_ipc(crm_ipc_t *ipc); int pcmk__ipc_fd(crm_ipc_t *ipc, int *fd); int pcmk__connect_ipc(pcmk_ipc_api_t *api, enum pcmk_ipc_dispatch dispatch_type, int attempts); /* * Server-related */ typedef struct pcmk__client_s pcmk__client_t; struct pcmk__remote_s { /* Shared */ char *buffer; size_t buffer_size; size_t buffer_offset; int auth_timeout; int tcp_socket; mainloop_io_t *source; time_t uptime; char *start_state; /* CIB-only */ char *token; /* TLS only */ // Must be created by pcmk__new_tls_session() gnutls_session_t tls_session; }; enum pcmk__client_flags { // Lower 32 bits are reserved for server (not library) use // Next 8 bits are reserved for client type (sort of a cheap enum) //! Client uses plain IPC pcmk__client_ipc = (UINT64_C(1) << 32), //! Client uses TCP connection pcmk__client_tcp = (UINT64_C(1) << 33), //! Client uses TCP with TLS pcmk__client_tls = (UINT64_C(1) << 34), // The rest are client attributes //! Client IPC is proxied pcmk__client_proxied = (UINT64_C(1) << 40), //! Client is run by root or cluster user pcmk__client_privileged = (UINT64_C(1) << 41), //! Local client to be proxied pcmk__client_to_proxy = (UINT64_C(1) << 42), /*! * \brief Client IPC connection accepted * * Used only for remote CIB connections via \c PCMK_XA_REMOTE_TLS_PORT. */ pcmk__client_authenticated = (UINT64_C(1) << 43), //! Client TLS handshake is complete pcmk__client_tls_handshake_complete = (UINT64_C(1) << 44), }; #define PCMK__CLIENT_TYPE(client) ((client)->flags & UINT64_C(0xff00000000)) struct pcmk__client_s { unsigned int pid; char *id; char *name; char *user; uint64_t flags; // Group of pcmk__client_flags int request_id; void *userdata; int event_timer; GQueue *event_queue; /* Depending on the client type, only some of the following will be * populated/valid. @TODO Maybe convert to a union. */ qb_ipcs_connection_t *ipcs; /* IPC */ struct pcmk__remote_s *remote; /* TCP/TLS */ unsigned int queue_backlog; /* IPC queue length after last flush */ unsigned int queue_max; /* Evict client whose queue grows this big */ }; #define pcmk__set_client_flags(client, flags_to_set) do { \ (client)->flags = pcmk__set_flags_as(__func__, __LINE__, \ LOG_TRACE, \ "Client", pcmk__client_name(client), \ (client)->flags, (flags_to_set), #flags_to_set); \ } while (0) #define pcmk__clear_client_flags(client, flags_to_clear) do { \ (client)->flags = pcmk__clear_flags_as(__func__, __LINE__, \ LOG_TRACE, \ "Client", pcmk__client_name(client), \ (client)->flags, (flags_to_clear), #flags_to_clear); \ } while (0) #define pcmk__set_ipc_flags(ipc_flags, ipc_name, flags_to_set) do { \ ipc_flags = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \ "IPC", (ipc_name), \ (ipc_flags), (flags_to_set), \ #flags_to_set); \ } while (0) #define pcmk__clear_ipc_flags(ipc_flags, ipc_name, flags_to_clear) do { \ ipc_flags = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, \ "IPC", (ipc_name), \ (ipc_flags), (flags_to_clear), \ #flags_to_clear); \ } while (0) guint pcmk__ipc_client_count(void); void pcmk__foreach_ipc_client(GHFunc func, gpointer user_data); void pcmk__client_cleanup(void); pcmk__client_t *pcmk__find_client(const qb_ipcs_connection_t *c); pcmk__client_t *pcmk__find_client_by_id(const char *id); const char *pcmk__client_name(const pcmk__client_t *c); const char *pcmk__client_type_str(uint64_t client_type); pcmk__client_t *pcmk__new_unauth_client(void *key); pcmk__client_t *pcmk__new_client(qb_ipcs_connection_t *c, uid_t uid, gid_t gid); void pcmk__free_client(pcmk__client_t *c); void pcmk__drop_all_clients(qb_ipcs_service_t *s); void pcmk__set_client_queue_max(pcmk__client_t *client, const char *qmax); xmlNode *pcmk__ipc_create_ack_as(const char *function, int line, uint32_t flags, const char *tag, const char *ver, crm_exit_t status); #define pcmk__ipc_create_ack(flags, tag, ver, st) \ pcmk__ipc_create_ack_as(__func__, __LINE__, (flags), (tag), (ver), (st)) int pcmk__ipc_send_ack_as(const char *function, int line, pcmk__client_t *c, uint32_t request, uint32_t flags, const char *tag, const char *ver, crm_exit_t status); #define pcmk__ipc_send_ack(c, req, flags, tag, ver, st) \ pcmk__ipc_send_ack_as(__func__, __LINE__, (c), (req), (flags), (tag), (ver), (st)) int pcmk__ipc_prepare_iov(uint32_t request, const xmlNode *message, - uint32_t max_send_size, struct iovec **result, ssize_t *bytes); int pcmk__ipc_send_xml(pcmk__client_t *c, uint32_t request, const xmlNode *message, uint32_t flags); int pcmk__ipc_send_iov(pcmk__client_t *c, struct iovec *iov, uint32_t flags); xmlNode *pcmk__client_data2xml(pcmk__client_t *c, void *data, uint32_t *id, uint32_t *flags); int pcmk__client_pid(qb_ipcs_connection_t *c); void pcmk__serve_attrd_ipc(qb_ipcs_service_t **ipcs, struct qb_ipcs_service_handlers *cb); void pcmk__serve_fenced_ipc(qb_ipcs_service_t **ipcs, struct qb_ipcs_service_handlers *cb); void pcmk__serve_pacemakerd_ipc(qb_ipcs_service_t **ipcs, struct qb_ipcs_service_handlers *cb); qb_ipcs_service_t *pcmk__serve_schedulerd_ipc(struct qb_ipcs_service_handlers *cb); qb_ipcs_service_t *pcmk__serve_controld_ipc(struct qb_ipcs_service_handlers *cb); void pcmk__serve_based_ipc(qb_ipcs_service_t **ipcs_ro, qb_ipcs_service_t **ipcs_rw, qb_ipcs_service_t **ipcs_shm, struct qb_ipcs_service_handlers *ro_cb, struct qb_ipcs_service_handlers *rw_cb); void pcmk__stop_based_ipc(qb_ipcs_service_t *ipcs_ro, qb_ipcs_service_t *ipcs_rw, qb_ipcs_service_t *ipcs_shm); static inline const char * pcmk__ipc_sys_name(const char *ipc_name, const char *fallback) { return ipc_name ? ipc_name : ((crm_system_name ? crm_system_name : fallback)); } const char *pcmk__pcmkd_state_enum2friendly(enum pcmk_pacemakerd_state state); const char *pcmk__controld_api_reply2str(enum pcmk_controld_api_reply reply); const char *pcmk__pcmkd_api_reply2str(enum pcmk_pacemakerd_api_reply reply); #ifdef __cplusplus } #endif #endif // PCMK__CRM_COMMON_IPC_INTERNAL__H diff --git a/include/crm/common/mainloop.h b/include/crm/common/mainloop.h index ebdc6b092a..30bde33111 100644 --- a/include/crm/common/mainloop.h +++ b/include/crm/common/mainloop.h @@ -1,200 +1,201 @@ /* - * Copyright 2009-2024 the Pacemaker project contributors + * Copyright 2009-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PCMK__CRM_COMMON_MAINLOOP__H #define PCMK__CRM_COMMON_MAINLOOP__H #include // bool #include // sighandler_t #include // pid_t, ssize_t #include // gpointer, gboolean, guint, GSourceFunc, GMainLoop #include // qb_ipcs_service_t, etc. #include #ifdef __cplusplus extern "C" { #endif /** * \file * \brief Wrappers for and extensions to glib mainloop * \ingroup core */ enum mainloop_child_flags { /* don't kill pid group on timeout, only kill the pid */ mainloop_leave_pid_group = 0x01, }; // NOTE: sbd (as of at least 1.5.2) uses this typedef struct trigger_s crm_trigger_t; typedef struct mainloop_io_s mainloop_io_t; typedef struct mainloop_child_s mainloop_child_t; // NOTE: sbd (as of at least 1.5.2) uses this typedef struct mainloop_timer_s mainloop_timer_t; void mainloop_cleanup(void); // NOTE: sbd (as of at least 1.5.2) uses this crm_trigger_t *mainloop_add_trigger(int priority, int (*dispatch) (gpointer user_data), gpointer userdata); // NOTE: sbd (as of at least 1.5.2) uses this void mainloop_set_trigger(crm_trigger_t * source); void mainloop_trigger_complete(crm_trigger_t * trig); gboolean mainloop_destroy_trigger(crm_trigger_t * source); #ifndef HAVE_SIGHANDLER_T typedef void (*sighandler_t)(int); #endif sighandler_t crm_signal_handler(int sig, sighandler_t dispatch); // NOTE: sbd (as of at least 1.5.2) uses this gboolean mainloop_add_signal(int sig, void (*dispatch) (int sig)); gboolean mainloop_destroy_signal(int sig); bool mainloop_timer_running(mainloop_timer_t *t); // NOTE: sbd (as of at least 1.5.2) uses this void mainloop_timer_start(mainloop_timer_t *t); // NOTE: sbd (as of at least 1.5.2) uses this void mainloop_timer_stop(mainloop_timer_t *t); guint mainloop_timer_set_period(mainloop_timer_t *t, guint period_ms); // NOTE: sbd (as of at least 1.5.2) uses this mainloop_timer_t *mainloop_timer_add(const char *name, guint period_ms, bool repeat, GSourceFunc cb, void *userdata); void mainloop_timer_del(mainloop_timer_t *t); struct ipc_client_callbacks { /*! * \brief Dispatch function for an IPC connection used as mainloop source * * \param[in] buffer Message read from IPC connection * \param[in] length Number of bytes in \p buffer * \param[in] userdata User data passed when creating mainloop source * * \return Negative value to remove source, anything else to keep it */ int (*dispatch) (const char *buffer, ssize_t length, gpointer userdata); /*! * \brief Destroy function for mainloop IPC connection client data * * \param[in,out] userdata User data passed when creating mainloop source */ void (*destroy) (gpointer userdata); }; qb_ipcs_service_t *mainloop_add_ipc_server(const char *name, enum qb_ipc_type type, struct qb_ipcs_service_handlers *callbacks); /*! * \brief Start server-side API end-point, hooked into the internal event loop * * \param[in] name name of the IPC end-point ("address" for the client) * \param[in] type selects libqb's IPC back-end (or use #QB_IPC_NATIVE) * \param[in] callbacks defines libqb's IPC service-level handlers * \param[in] priority priority relative to other events handled in the * abstract handling loop, use #QB_LOOP_MED when unsure * * \return libqb's opaque handle to the created service abstraction * * \note For portability concerns, do not use this function if you keep * \p priority as #QB_LOOP_MED, stick with #mainloop_add_ipc_server * (with exactly such semantics) instead (once you link with this new * symbol employed, you can't downgrade the library freely anymore). * * \note The intended effect will only get fully reflected when run-time * linked to patched libqb: https://github.com/ClusterLabs/libqb/pull/352 */ qb_ipcs_service_t *mainloop_add_ipc_server_with_prio(const char *name, enum qb_ipc_type type, struct qb_ipcs_service_handlers *callbacks, enum qb_loop_priority prio); void mainloop_del_ipc_server(qb_ipcs_service_t * server); +// @COMPAT max_size parameter is deprecated and unused since 3.0.1 mainloop_io_t *mainloop_add_ipc_client(const char *name, int priority, size_t max_size, void *userdata, struct ipc_client_callbacks *callbacks); void mainloop_del_ipc_client(mainloop_io_t * client); crm_ipc_t *mainloop_get_ipc_client(mainloop_io_t * client); struct mainloop_fd_callbacks { /*! * \brief Dispatch function for mainloop file descriptor with data ready * * \param[in,out] userdata User data passed when creating mainloop source * * \return Negative value to remove source, anything else to keep it */ int (*dispatch) (gpointer userdata); /*! * \brief Destroy function for mainloop file descriptor client data * * \param[in,out] userdata User data passed when creating mainloop source */ void (*destroy) (gpointer userdata); }; mainloop_io_t *mainloop_add_fd(const char *name, int priority, int fd, void *userdata, struct mainloop_fd_callbacks *callbacks); void mainloop_del_fd(mainloop_io_t * client); /* * Create a new tracked process * To track a process group, use -pid */ void mainloop_child_add(pid_t pid, int timeout, const char *desc, void *userdata, void (*callback) (mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode)); void mainloop_child_add_with_flags(pid_t pid, int timeout, const char *desc, void *userdata, enum mainloop_child_flags, void (*callback) (mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode)); void *mainloop_child_userdata(mainloop_child_t * child); int mainloop_child_timeout(mainloop_child_t * child); const char *mainloop_child_name(mainloop_child_t * child); pid_t mainloop_child_pid(mainloop_child_t * child); void mainloop_clear_child_userdata(mainloop_child_t * child); gboolean mainloop_child_kill(pid_t pid); void pcmk_quit_main_loop(GMainLoop *mloop, unsigned int n); void pcmk_drain_main_loop(GMainLoop *mloop, guint timer_ms, bool (*check)(guint)); #define G_PRIORITY_MEDIUM (G_PRIORITY_HIGH/2) #ifdef __cplusplus } #endif #endif diff --git a/include/crm/common/options_internal.h b/include/crm/common/options_internal.h index 2acf037a95..95813a73d5 100644 --- a/include/crm/common/options_internal.h +++ b/include/crm/common/options_internal.h @@ -1,250 +1,249 @@ /* * Copyright 2006-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PCMK__CRM_COMMON_OPTIONS_INTERNAL__H #define PCMK__CRM_COMMON_OPTIONS_INTERNAL__H #ifndef PCMK__CONFIG_H #define PCMK__CONFIG_H #include // _Noreturn #endif #include // GHashTable #include // bool #include // pcmk_parse_interval_spec() #include // pcmk__output_t #ifdef __cplusplus extern "C" { #endif _Noreturn void pcmk__cli_help(char cmd); /* * Environment variable option handling */ const char *pcmk__env_option(const char *option); void pcmk__set_env_option(const char *option, const char *value, bool compat); bool pcmk__env_option_enabled(const char *daemon, const char *option); /* * Cluster option handling */ /*! * \internal * \brief Option flags */ enum pcmk__opt_flags { pcmk__opt_none = 0U, //!< No additional information /*! * \brief In CIB manager metadata * * \deprecated This flag will be removed with CIB manager metadata */ pcmk__opt_based = (1U << 0), /*! * \brief In controller metadata * * \deprecated This flag will be removed with controller metadata */ pcmk__opt_controld = (1U << 1), /*! * \brief In scheduler metadata * * \deprecated This flag will be removed with scheduler metadata */ pcmk__opt_schedulerd = (1U << 2), pcmk__opt_advanced = (1U << 3), //!< Advanced use only pcmk__opt_generated = (1U << 4), //!< Generated by Pacemaker pcmk__opt_deprecated = (1U << 5), //!< Option is deprecated pcmk__opt_fencing = (1U << 6), //!< Common fencing resource parameter pcmk__opt_primitive = (1U << 7), //!< Primitive resource meta-attribute }; typedef struct pcmk__cluster_option_s { const char *name; const char *alt_name; const char *type; const char *values; const char *default_value; bool (*is_valid)(const char *); uint32_t flags; //!< Group of enum pcmk__opt_flags const char *description_short; const char *description_long; } pcmk__cluster_option_t; const char *pcmk__cluster_option(GHashTable *options, const char *name); int pcmk__output_cluster_options(pcmk__output_t *out, const char *name, const char *desc_short, const char *desc_long, uint32_t filter, bool all); int pcmk__output_fencing_params(pcmk__output_t *out, const char *name, const char *desc_short, const char *desc_long, bool all); int pcmk__output_primitive_meta(pcmk__output_t *out, const char *name, const char *desc_short, const char *desc_long, bool all); int pcmk__daemon_metadata(pcmk__output_t *out, const char *name, const char *short_desc, const char *long_desc, enum pcmk__opt_flags filter); void pcmk__validate_cluster_options(GHashTable *options); bool pcmk__valid_interval_spec(const char *value); bool pcmk__valid_boolean(const char *value); bool pcmk__valid_int(const char *value); bool pcmk__valid_positive_int(const char *value); bool pcmk__valid_no_quorum_policy(const char *value); bool pcmk__valid_percentage(const char *value); bool pcmk__valid_placement_strategy(const char *value); // from watchdog.c long pcmk__get_sbd_watchdog_timeout(void); bool pcmk__get_sbd_sync_resource_startup(void); long pcmk__auto_stonith_watchdog_timeout(void); bool pcmk__valid_stonith_watchdog_timeout(const char *value); // Constants for environment variable names #define PCMK__ENV_AUTHKEY_LOCATION "authkey_location" #define PCMK__ENV_BLACKBOX "blackbox" #define PCMK__ENV_CA_FILE "ca_file" #define PCMK__ENV_CALLGRIND_ENABLED "callgrind_enabled" #define PCMK__ENV_CERT_FILE "cert_file" #define PCMK__ENV_CLUSTER_TYPE "cluster_type" #define PCMK__ENV_CRL_FILE "crl_file" #define PCMK__ENV_DEBUG "debug" #define PCMK__ENV_DH_MAX_BITS "dh_max_bits" #define PCMK__ENV_FAIL_FAST "fail_fast" -#define PCMK__ENV_IPC_BUFFER "ipc_buffer" #define PCMK__ENV_IPC_TYPE "ipc_type" #define PCMK__ENV_KEY_FILE "key_file" #define PCMK__ENV_LOGFACILITY "logfacility" #define PCMK__ENV_LOGFILE "logfile" #define PCMK__ENV_LOGFILE_MODE "logfile_mode" #define PCMK__ENV_LOGPRIORITY "logpriority" #define PCMK__ENV_NODE_ACTION_LIMIT "node_action_limit" #define PCMK__ENV_NODE_START_STATE "node_start_state" #define PCMK__ENV_PANIC_ACTION "panic_action" #define PCMK__ENV_REMOTE_ADDRESS "remote_address" #define PCMK__ENV_REMOTE_SCHEMA_DIRECTORY "remote_schema_directory" #define PCMK__ENV_REMOTE_PID1 "remote_pid1" #define PCMK__ENV_REMOTE_PORT "remote_port" #define PCMK__ENV_RESPAWNED "respawned" #define PCMK__ENV_SCHEMA_DIRECTORY "schema_directory" #define PCMK__ENV_SERVICE "service" #define PCMK__ENV_STDERR "stderr" #define PCMK__ENV_TLS_PRIORITIES "tls_priorities" #define PCMK__ENV_TRACE_BLACKBOX "trace_blackbox" #define PCMK__ENV_TRACE_FILES "trace_files" #define PCMK__ENV_TRACE_FORMATS "trace_formats" #define PCMK__ENV_TRACE_FUNCTIONS "trace_functions" #define PCMK__ENV_TRACE_TAGS "trace_tags" #define PCMK__ENV_VALGRIND_ENABLED "valgrind_enabled" // Constants for meta-attribute names #define PCMK__META_CLONE "clone" #define PCMK__META_CONTAINER "container" #define PCMK__META_DIGESTS_ALL "digests-all" #define PCMK__META_DIGESTS_SECURE "digests-secure" #define PCMK__META_INTERNAL_RSC "internal_rsc" #define PCMK__META_MIGRATE_SOURCE "migrate_source" #define PCMK__META_MIGRATE_TARGET "migrate_target" #define PCMK__META_ON_NODE "on_node" #define PCMK__META_ON_NODE_UUID "on_node_uuid" #define PCMK__META_OP_NO_WAIT "op_no_wait" #define PCMK__META_OP_TARGET_RC "op_target_rc" #define PCMK__META_PHYSICAL_HOST "physical-host" #define PCMK__META_STONITH_ACTION "stonith_action" /* @TODO Plug these in. Currently, they're never set. These are op attrs for use * with https://projects.clusterlabs.org/T382. */ #define PCMK__META_CLEAR_FAILURE_OP "clear_failure_op" #define PCMK__META_CLEAR_FAILURE_INTERVAL "clear_failure_interval" // @COMPAT Deprecated alias for PCMK__META_PROMOTED_MAX since 2.0.0 #define PCMK__META_PROMOTED_MAX_LEGACY "master-max" // @COMPAT Deprecated alias for PCMK__META_PROMOTED_NODE_MAX since 2.0.0 #define PCMK__META_PROMOTED_NODE_MAX_LEGACY "master-node-max" // Constants for enumerated values #define PCMK__VALUE_ATTRD "attrd" #define PCMK__VALUE_BOLD "bold" #define PCMK__VALUE_BROADCAST "broadcast" #define PCMK__VALUE_CIB "cib" #define PCMK__VALUE_CIB_DIFF_NOTIFY "cib_diff_notify" #define PCMK__VALUE_CIB_NOTIFY "cib_notify" #define PCMK__VALUE_CIB_POST_NOTIFY "cib_post_notify" #define PCMK__VALUE_CIB_PRE_NOTIFY "cib_pre_notify" #define PCMK__VALUE_CIB_UPDATE_CONFIRMATION "cib_update_confirmation" #define PCMK__VALUE_CLUSTER "cluster" #define PCMK__VALUE_CRMD "crmd" #define PCMK__VALUE_EN "en" #define PCMK__VALUE_EPOCH "epoch" #define PCMK__VALUE_HEALTH_RED "health_red" #define PCMK__VALUE_HEALTH_YELLOW "health_yellow" #define PCMK__VALUE_INIT "init" #define PCMK__VALUE_LOCAL "local" #define PCMK__VALUE_LOST "lost" #define PCMK__VALUE_LRMD "lrmd" #define PCMK__VALUE_MAINT "maint" #define PCMK__VALUE_OUTPUT "output" #define PCMK__VALUE_PASSWORD "password" #define PCMK__VALUE_PRIMITIVE "primitive" #define PCMK__VALUE_REFRESH "refresh" #define PCMK__VALUE_REQUEST "request" #define PCMK__VALUE_RESPONSE "response" #define PCMK__VALUE_RSC_FAILED "rsc-failed" #define PCMK__VALUE_RSC_FAILURE_IGNORED "rsc-failure-ignored" #define PCMK__VALUE_RSC_MANAGED "rsc-managed" #define PCMK__VALUE_RSC_MULTIPLE "rsc-multiple" #define PCMK__VALUE_RSC_OK "rsc-ok" #define PCMK__VALUE_RUNNING "running" #define PCMK__VALUE_SCHEDULER "scheduler" #define PCMK__VALUE_SHUTDOWN_COMPLETE "shutdown_complete" #define PCMK__VALUE_SHUTTING_DOWN "shutting_down" #define PCMK__VALUE_ST_ASYNC_TIMEOUT_VALUE "st-async-timeout-value" #define PCMK__VALUE_ST_NOTIFY "st_notify" #define PCMK__VALUE_ST_NOTIFY_DISCONNECT "st_notify_disconnect" #define PCMK__VALUE_ST_NOTIFY_FENCE "st_notify_fence" #define PCMK__VALUE_ST_NOTIFY_HISTORY "st_notify_history" #define PCMK__VALUE_ST_NOTIFY_HISTORY_SYNCED "st_notify_history_synced" #define PCMK__VALUE_STARTING_DAEMONS "starting_daemons" #define PCMK__VALUE_STONITH_NG "stonith-ng" #define PCMK__VALUE_WAIT_FOR_PING "wait_for_ping" #define PCMK__VALUE_WARNING "warning" /* @COMPAT Deprecated since 2.1.7 (used with PCMK__XA_ORDERING attribute of * resource sets) */ #define PCMK__VALUE_GROUP "group" // @COMPAT Drop when daemon metadata commands are dropped #define PCMK__VALUE_TIME "time" #ifdef __cplusplus } #endif #endif // PCMK__OPTIONS_INTERNAL__H diff --git a/lib/cib/cib_native.c b/lib/cib/cib_native.c index 274c43b9d1..0182be4002 100644 --- a/lib/cib/cib_native.c +++ b/lib/cib/cib_native.c @@ -1,486 +1,486 @@ /* * Copyright 2004 International Business Machines - * Later changes copyright 2004-2024 the Pacemaker project contributors + * Later changes copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include typedef struct cib_native_opaque_s { char *token; crm_ipc_t *ipc; void (*dnotify_fn) (gpointer user_data); mainloop_io_t *source; } cib_native_opaque_t; static int cib_native_perform_op_delegate(cib_t *cib, const char *op, const char *host, const char *section, xmlNode *data, xmlNode **output_data, int call_options, const char *user_name) { int rc = pcmk_ok; int reply_id = 0; enum crm_ipc_flags ipc_flags = crm_ipc_flags_none; xmlNode *op_msg = NULL; xmlNode *op_reply = NULL; cib_native_opaque_t *native = cib->variant_opaque; if (cib->state == cib_disconnected) { return -ENOTCONN; } if (output_data != NULL) { *output_data = NULL; } if (op == NULL) { crm_err("No operation specified"); return -EINVAL; } if (call_options & cib_sync_call) { pcmk__set_ipc_flags(ipc_flags, "client", crm_ipc_client_response); } rc = cib__create_op(cib, op, host, section, data, call_options, user_name, NULL, &op_msg); if (rc != pcmk_ok) { return rc; } if (pcmk_is_set(call_options, cib_transaction)) { rc = cib__extend_transaction(cib, op_msg); goto done; } crm_trace("Sending %s message to the CIB manager (timeout=%ds)", op, cib->call_timeout); rc = crm_ipc_send(native->ipc, op_msg, ipc_flags, cib->call_timeout * 1000, &op_reply); if (rc < 0) { crm_err("Couldn't perform %s operation (timeout=%ds): %s (%d)", op, cib->call_timeout, pcmk_strerror(rc), rc); rc = -ECOMM; goto done; } crm_log_xml_trace(op_reply, "Reply"); if (!(call_options & cib_sync_call)) { crm_trace("Async call, returning %d", cib->call_id); CRM_CHECK(cib->call_id != 0, rc = -ENOMSG; goto done); rc = cib->call_id; goto done; } rc = pcmk_ok; crm_element_value_int(op_reply, PCMK__XA_CIB_CALLID, &reply_id); if (reply_id == cib->call_id) { xmlNode *wrapper = pcmk__xe_first_child(op_reply, PCMK__XE_CIB_CALLDATA, NULL, NULL); xmlNode *tmp = pcmk__xe_first_child(wrapper, NULL, NULL, NULL); crm_trace("Synchronous reply %d received", reply_id); if (crm_element_value_int(op_reply, PCMK__XA_CIB_RC, &rc) != 0) { rc = -EPROTO; } if (output_data == NULL || (call_options & cib_discard_reply)) { crm_trace("Discarding reply"); } else { *output_data = pcmk__xml_copy(NULL, tmp); } } else if (reply_id <= 0) { crm_err("Received bad reply: No id set"); crm_log_xml_err(op_reply, "Bad reply"); rc = -ENOMSG; goto done; } else { crm_err("Received bad reply: %d (wanted %d)", reply_id, cib->call_id); crm_log_xml_err(op_reply, "Old reply"); rc = -ENOMSG; goto done; } if (op_reply == NULL && cib->state == cib_disconnected) { rc = -ENOTCONN; } else if (rc == pcmk_ok && op_reply == NULL) { rc = -ETIME; } switch (rc) { case pcmk_ok: case -EPERM: break; /* This is an internal value that clients do not and should not care about */ case -pcmk_err_diff_resync: rc = pcmk_ok; break; /* These indicate internal problems */ case -EPROTO: case -ENOMSG: crm_err("Call failed: %s", pcmk_strerror(rc)); if (op_reply) { crm_log_xml_err(op_reply, "Invalid reply"); } break; default: if (!pcmk__str_eq(op, PCMK__CIB_REQUEST_QUERY, pcmk__str_none)) { crm_warn("Call failed: %s", pcmk_strerror(rc)); } } done: if (!crm_ipc_connected(native->ipc)) { crm_err("The CIB manager disconnected"); cib->state = cib_disconnected; } pcmk__xml_free(op_msg); pcmk__xml_free(op_reply); return rc; } static int cib_native_dispatch_internal(const char *buffer, ssize_t length, gpointer userdata) { const char *type = NULL; xmlNode *msg = NULL; cib_t *cib = userdata; crm_trace("dispatching %p", userdata); if (cib == NULL) { crm_err("No CIB!"); return 0; } msg = pcmk__xml_parse(buffer); if (msg == NULL) { crm_warn("Received a NULL message from the CIB manager"); return 0; } /* do callbacks */ type = crm_element_value(msg, PCMK__XA_T); crm_trace("Activating %s callbacks...", type); crm_log_xml_explicit(msg, "cib-reply"); if (pcmk__str_eq(type, PCMK__VALUE_CIB, pcmk__str_none)) { cib_native_callback(cib, msg, 0, 0); } else if (pcmk__str_eq(type, PCMK__VALUE_CIB_NOTIFY, pcmk__str_none)) { g_list_foreach(cib->notify_list, cib_native_notify, msg); } else { crm_err("Unknown message type: %s", type); } pcmk__xml_free(msg); return 0; } static void cib_native_destroy(void *userdata) { cib_t *cib = userdata; cib_native_opaque_t *native = cib->variant_opaque; crm_trace("destroying %p", userdata); cib->state = cib_disconnected; native->source = NULL; native->ipc = NULL; if (native->dnotify_fn) { native->dnotify_fn(userdata); } } static int cib_native_signoff(cib_t *cib) { cib_native_opaque_t *native = cib->variant_opaque; crm_debug("Disconnecting from the CIB manager"); cib_free_notify(cib); remove_cib_op_callback(0, TRUE); if (native->source != NULL) { /* Attached to mainloop */ mainloop_del_ipc_client(native->source); native->source = NULL; native->ipc = NULL; } else if (native->ipc) { /* Not attached to mainloop */ crm_ipc_t *ipc = native->ipc; native->ipc = NULL; crm_ipc_close(ipc); crm_ipc_destroy(ipc); } cib->cmds->end_transaction(cib, false, cib_none); cib->state = cib_disconnected; cib->type = cib_no_connection; return pcmk_ok; } static int cib_native_signon(cib_t *cib, const char *name, enum cib_conn_type type) { int rc = pcmk_ok; const char *channel = NULL; cib_native_opaque_t *native = cib->variant_opaque; xmlNode *hello = NULL; struct ipc_client_callbacks cib_callbacks = { .dispatch = cib_native_dispatch_internal, .destroy = cib_native_destroy }; if (name == NULL) { name = pcmk__s(crm_system_name, "client"); } cib->call_timeout = PCMK__IPC_TIMEOUT; if (type == cib_command) { cib->state = cib_connected_command; channel = PCMK__SERVER_BASED_RW; } else if (type == cib_command_nonblocking) { cib->state = cib_connected_command; channel = PCMK__SERVER_BASED_SHM; } else if (type == cib_query) { cib->state = cib_connected_query; channel = PCMK__SERVER_BASED_RO; } else { return -ENOTCONN; } crm_trace("Connecting %s channel", channel); - native->source = mainloop_add_ipc_client(channel, G_PRIORITY_HIGH, - 512 * 1024, cib, &cib_callbacks); + native->source = mainloop_add_ipc_client(channel, G_PRIORITY_HIGH, 0, cib, + &cib_callbacks); native->ipc = mainloop_get_ipc_client(native->source); if (rc != pcmk_ok || native->ipc == NULL || !crm_ipc_connected(native->ipc)) { crm_info("Could not connect to CIB manager for %s", name); rc = -ENOTCONN; } if (rc == pcmk_ok) { rc = cib__create_op(cib, CRM_OP_REGISTER, NULL, NULL, NULL, cib_sync_call, NULL, name, &hello); } if (rc == pcmk_ok) { xmlNode *reply = NULL; if (crm_ipc_send(native->ipc, hello, crm_ipc_client_response, -1, &reply) > 0) { const char *msg_type = crm_element_value(reply, PCMK__XA_CIB_OP); crm_log_xml_trace(reply, "reg-reply"); if (!pcmk__str_eq(msg_type, CRM_OP_REGISTER, pcmk__str_casei)) { crm_info("Reply to CIB registration message has unknown type " "'%s'", msg_type); rc = -EPROTO; } else { native->token = crm_element_value_copy(reply, PCMK__XA_CIB_CLIENTID); if (native->token == NULL) { rc = -EPROTO; } } pcmk__xml_free(reply); } else { rc = -ECOMM; } pcmk__xml_free(hello); } if (rc == pcmk_ok) { crm_info("Successfully connected to CIB manager for %s", name); return pcmk_ok; } crm_info("Connection to CIB manager for %s failed: %s", name, pcmk_strerror(rc)); cib_native_signoff(cib); return rc; } static int cib_native_free(cib_t *cib) { int rc = pcmk_ok; if (cib->state != cib_disconnected) { rc = cib_native_signoff(cib); } if (cib->state == cib_disconnected) { cib_native_opaque_t *native = cib->variant_opaque; free(native->token); free(cib->variant_opaque); free(cib->cmds); free(cib->user); free(cib); } return rc; } static int cib_native_register_notification(cib_t *cib, const char *callback, int enabled) { int rc = pcmk_ok; xmlNode *notify_msg = pcmk__xe_create(NULL, PCMK__XE_CIB_CALLBACK); cib_native_opaque_t *native = cib->variant_opaque; if (cib->state != cib_disconnected) { crm_xml_add(notify_msg, PCMK__XA_CIB_OP, PCMK__VALUE_CIB_NOTIFY); crm_xml_add(notify_msg, PCMK__XA_CIB_NOTIFY_TYPE, callback); crm_xml_add_int(notify_msg, PCMK__XA_CIB_NOTIFY_ACTIVATE, enabled); rc = crm_ipc_send(native->ipc, notify_msg, crm_ipc_client_response, 1000 * cib->call_timeout, NULL); if (rc <= 0) { crm_trace("Notification not registered: %d", rc); rc = -ECOMM; } } pcmk__xml_free(notify_msg); return rc; } static int cib_native_set_connection_dnotify(cib_t *cib, void (*dnotify) (gpointer user_data)) { cib_native_opaque_t *native = NULL; if (cib == NULL) { crm_err("No CIB!"); return FALSE; } native = cib->variant_opaque; native->dnotify_fn = dnotify; return pcmk_ok; } /*! * \internal * \brief Get the given CIB connection's unique client identifier * * These can be used to check whether this client requested the action that * triggered a CIB notification. * * \param[in] cib CIB connection * \param[out] async_id If not \p NULL, where to store asynchronous client ID * \param[out] sync_id If not \p NULL, where to store synchronous client ID * * \return Legacy Pacemaker return code (specifically, \p pcmk_ok) * * \note This is the \p cib_native variant implementation of * \p cib_api_operations_t:client_id(). * \note For \p cib_native objects, \p async_id and \p sync_id are the same. * \note The client ID is assigned during CIB sign-on. */ static int cib_native_client_id(const cib_t *cib, const char **async_id, const char **sync_id) { cib_native_opaque_t *native = cib->variant_opaque; if (async_id != NULL) { *async_id = native->token; } if (sync_id != NULL) { *sync_id = native->token; } return pcmk_ok; } cib_t * cib_native_new(void) { cib_native_opaque_t *native = NULL; cib_t *cib = cib_new_variant(); if (cib == NULL) { return NULL; } native = calloc(1, sizeof(cib_native_opaque_t)); if (native == NULL) { free(cib); return NULL; } cib->variant = cib_native; cib->variant_opaque = native; native->ipc = NULL; native->source = NULL; native->dnotify_fn = NULL; /* assign variant specific ops */ cib->delegate_fn = cib_native_perform_op_delegate; cib->cmds->signon = cib_native_signon; cib->cmds->signoff = cib_native_signoff; cib->cmds->free = cib_native_free; cib->cmds->register_notification = cib_native_register_notification; cib->cmds->set_connection_dnotify = cib_native_set_connection_dnotify; cib->cmds->client_id = cib_native_client_id; return cib; } diff --git a/lib/common/crmcommon_private.h b/lib/common/crmcommon_private.h index 8e1b453eb6..03a516f475 100644 --- a/lib/common/crmcommon_private.h +++ b/lib/common/crmcommon_private.h @@ -1,488 +1,483 @@ /* * Copyright 2018-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PCMK__COMMON_CRMCOMMON_PRIVATE__H #define PCMK__COMMON_CRMCOMMON_PRIVATE__H /* This header is for the sole use of libcrmcommon, so that functions can be * declared with G_GNUC_INTERNAL for efficiency. */ #include // uint8_t, uint32_t #include // bool #include // size_t #include // G_GNUC_INTERNAL, G_GNUC_PRINTF, gchar, etc. #include // xmlNode, xmlAttr #include // xmlChar #include // struct qb_ipc_response_header #include // pcmk_ipc_api_t, crm_ipc_t, etc. #include // crm_time_t #include // LOG_NEVER #include // mainloop_io_t #include // pcmk__output_t #include // crm_exit_t #include // pcmk_rule_input_t #include // enum xml_private_flags #ifdef __cplusplus extern "C" { #endif // Decent chunk size for processing large amounts of data #define PCMK__BUFFER_SIZE 4096 #if defined(PCMK__UNIT_TESTING) #undef G_GNUC_INTERNAL #define G_GNUC_INTERNAL #endif /*! * \internal * \brief Information about an XML node that was deleted * * When change tracking is enabled and we delete an XML node using * \c pcmk__xml_free(), we free it and add its path and position to a list in * its document's private data. This allows us to display changes, generate * patchsets, etc. * * Note that this does not happen when deleting an XML attribute using * \c pcmk__xa_remove(). In that case: * * If \c force is \c true, we remove the attribute without any tracking. * * If \c force is \c false, we mark the attribute as deleted but leave it in * place until we commit changes. */ typedef struct pcmk__deleted_xml_s { gchar *path; //!< XPath expression identifying the deleted node int position; //!< Position of the deleted node among its siblings } pcmk__deleted_xml_t; /*! * \internal * \brief Private data for an XML node */ typedef struct xml_node_private_s { uint32_t check; //!< Magic number for checking integrity uint32_t flags; //!< Group of enum xml_private_flags } xml_node_private_t; /*! * \internal * \brief Private data for an XML document */ typedef struct xml_doc_private_s { uint32_t check; //!< Magic number for checking integrity uint32_t flags; //!< Group of enum xml_private_flags char *acl_user; //!< User affected by \c acls (for logging) //! ACLs to check requested changes against (list of \c xml_acl_t) GList *acls; //! XML nodes marked as deleted (list of \c pcmk__deleted_xml_t) GList *deleted_objs; } xml_doc_private_t; // XML private data magic numbers #define PCMK__XML_DOC_PRIVATE_MAGIC 0x81726354UL #define PCMK__XML_NODE_PRIVATE_MAGIC 0x54637281UL // XML entity references #define PCMK__XML_ENTITY_AMP "&" #define PCMK__XML_ENTITY_GT ">" #define PCMK__XML_ENTITY_LT "<" #define PCMK__XML_ENTITY_QUOT """ #define pcmk__set_xml_flags(xml_priv, flags_to_set) do { \ (xml_priv)->flags = pcmk__set_flags_as(__func__, __LINE__, \ LOG_NEVER, "XML", "XML node", (xml_priv)->flags, \ (flags_to_set), #flags_to_set); \ } while (0) #define pcmk__clear_xml_flags(xml_priv, flags_to_clear) do { \ (xml_priv)->flags = pcmk__clear_flags_as(__func__, __LINE__, \ LOG_NEVER, "XML", "XML node", (xml_priv)->flags, \ (flags_to_clear), #flags_to_clear); \ } while (0) G_GNUC_INTERNAL const char *pcmk__xml_element_type_text(xmlElementType type); G_GNUC_INTERNAL bool pcmk__xml_reset_node_flags(xmlNode *xml, void *user_data); G_GNUC_INTERNAL void pcmk__xml_set_parent_flags(xmlNode *xml, uint64_t flags); G_GNUC_INTERNAL void pcmk__xml_new_private_data(xmlNode *xml); G_GNUC_INTERNAL void pcmk__xml_free_private_data(xmlNode *xml); G_GNUC_INTERNAL void pcmk__xml_free_node(xmlNode *xml); G_GNUC_INTERNAL xmlDoc *pcmk__xml_new_doc(void); G_GNUC_INTERNAL int pcmk__xml_position(const xmlNode *xml, enum xml_private_flags ignore_if_set); G_GNUC_INTERNAL xmlNode *pcmk__xml_match(const xmlNode *haystack, const xmlNode *needle, bool exact); G_GNUC_INTERNAL xmlNode *pcmk__xc_match(const xmlNode *root, const xmlNode *search_comment, bool exact); G_GNUC_INTERNAL void pcmk__xc_update(xmlNode *parent, xmlNode *target, xmlNode *update); G_GNUC_INTERNAL void pcmk__free_acls(GList *acls); G_GNUC_INTERNAL void pcmk__unpack_acl(xmlNode *source, xmlNode *target, const char *user); G_GNUC_INTERNAL bool pcmk__is_user_in_group(const char *user, const char *group); G_GNUC_INTERNAL void pcmk__apply_acl(xmlNode *xml); G_GNUC_INTERNAL void pcmk__apply_creation_acl(xmlNode *xml, bool check_top); G_GNUC_INTERNAL int pcmk__xa_remove(xmlAttr *attr, bool force); G_GNUC_INTERNAL void pcmk__mark_xml_attr_dirty(xmlAttr *a); G_GNUC_INTERNAL bool pcmk__xa_filterable(const char *name); G_GNUC_INTERNAL void pcmk__log_xmllib_err(void *ctx, const char *fmt, ...) G_GNUC_PRINTF(2, 3); G_GNUC_INTERNAL void pcmk__mark_xml_node_dirty(xmlNode *xml); G_GNUC_INTERNAL bool pcmk__marked_as_deleted(xmlAttrPtr a, void *user_data); G_GNUC_INTERNAL void pcmk__dump_xml_attr(const xmlAttr *attr, GString *buffer); G_GNUC_INTERNAL int pcmk__xe_set_score(xmlNode *target, const char *name, const char *value); G_GNUC_INTERNAL bool pcmk__xml_is_name_start_char(const char *utf8, int *len); G_GNUC_INTERNAL bool pcmk__xml_is_name_char(const char *utf8, int *len); /* * Date/times */ // For use with pcmk__add_time_from_xml() enum pcmk__time_component { pcmk__time_unknown, pcmk__time_years, pcmk__time_months, pcmk__time_weeks, pcmk__time_days, pcmk__time_hours, pcmk__time_minutes, pcmk__time_seconds, }; G_GNUC_INTERNAL const char *pcmk__time_component_attr(enum pcmk__time_component component); G_GNUC_INTERNAL int pcmk__add_time_from_xml(crm_time_t *t, enum pcmk__time_component component, const xmlNode *xml); G_GNUC_INTERNAL void pcmk__set_time_if_earlier(crm_time_t *target, const crm_time_t *source); /* * IPC */ #define PCMK__IPC_VERSION 1 #define PCMK__CONTROLD_API_MAJOR "1" #define PCMK__CONTROLD_API_MINOR "0" // IPC behavior that varies by daemon typedef struct pcmk__ipc_methods_s { /*! * \internal * \brief Allocate any private data needed by daemon IPC * * \param[in,out] api IPC API connection * * \return Standard Pacemaker return code */ int (*new_data)(pcmk_ipc_api_t *api); /*! * \internal * \brief Free any private data used by daemon IPC * * \param[in,out] api_data Data allocated by new_data() method */ void (*free_data)(void *api_data); /*! * \internal * \brief Perform daemon-specific handling after successful connection * * Some daemons require clients to register before sending any other * commands. The controller requires a CRM_OP_HELLO (with no reply), and * the CIB manager, executor, and fencer require a CRM_OP_REGISTER (with a * reply). Ideally this would be consistent across all daemons, but for now * this allows each to do its own authorization. * * \param[in,out] api IPC API connection * * \return Standard Pacemaker return code */ int (*post_connect)(pcmk_ipc_api_t *api); /*! * \internal * \brief Check whether an IPC request results in a reply * * \param[in,out] api IPC API connection * \param[in] request IPC request XML * * \return true if request would result in an IPC reply, false otherwise */ bool (*reply_expected)(pcmk_ipc_api_t *api, const xmlNode *request); /*! * \internal * \brief Perform daemon-specific handling of an IPC message * * \param[in,out] api IPC API connection * \param[in,out] msg Message read from IPC connection * * \return true if more IPC reply messages should be expected */ bool (*dispatch)(pcmk_ipc_api_t *api, xmlNode *msg); /*! * \internal * \brief Perform daemon-specific handling of an IPC disconnect * * \param[in,out] api IPC API connection */ void (*post_disconnect)(pcmk_ipc_api_t *api); } pcmk__ipc_methods_t; // Implementation of pcmk_ipc_api_t struct pcmk_ipc_api_s { enum pcmk_ipc_server server; // Daemon this IPC API instance is for enum pcmk_ipc_dispatch dispatch_type; // How replies should be dispatched - size_t ipc_size_max; // maximum IPC buffer size crm_ipc_t *ipc; // IPC connection mainloop_io_t *mainloop_io; // If using mainloop, I/O source for IPC bool free_on_disconnect; // Whether disconnect should free object pcmk_ipc_callback_t cb; // Caller-registered callback (if any) void *user_data; // Caller-registered data (if any) void *api_data; // For daemon-specific use pcmk__ipc_methods_t *cmds; // Behavior that varies by daemon }; typedef struct pcmk__ipc_header_s { struct qb_ipc_response_header qb; - uint32_t size_uncompressed; - uint32_t size_compressed; + uint32_t size; uint32_t flags; uint8_t version; } pcmk__ipc_header_t; G_GNUC_INTERNAL int pcmk__send_ipc_request(pcmk_ipc_api_t *api, const xmlNode *request); G_GNUC_INTERNAL void pcmk__call_ipc_callback(pcmk_ipc_api_t *api, enum pcmk_ipc_event event_type, crm_exit_t status, void *event_data); -G_GNUC_INTERNAL -unsigned int pcmk__ipc_buffer_size(unsigned int max); - G_GNUC_INTERNAL bool pcmk__valid_ipc_header(const pcmk__ipc_header_t *header); G_GNUC_INTERNAL pcmk__ipc_methods_t *pcmk__attrd_api_methods(void); G_GNUC_INTERNAL pcmk__ipc_methods_t *pcmk__controld_api_methods(void); G_GNUC_INTERNAL pcmk__ipc_methods_t *pcmk__pacemakerd_api_methods(void); G_GNUC_INTERNAL pcmk__ipc_methods_t *pcmk__schedulerd_api_methods(void); /* * Logging */ //! XML is newly created #define PCMK__XML_PREFIX_CREATED "++" //! XML has been deleted #define PCMK__XML_PREFIX_DELETED "--" //! XML has been modified #define PCMK__XML_PREFIX_MODIFIED "+ " //! XML has been moved #define PCMK__XML_PREFIX_MOVED "+~" /* * Output */ G_GNUC_INTERNAL int pcmk__bare_output_new(pcmk__output_t **out, const char *fmt_name, const char *filename, char **argv); G_GNUC_INTERNAL void pcmk__register_option_messages(pcmk__output_t *out); G_GNUC_INTERNAL void pcmk__register_patchset_messages(pcmk__output_t *out); G_GNUC_INTERNAL bool pcmk__output_text_get_fancy(pcmk__output_t *out); /* * Rules */ // How node attribute values may be compared in rules enum pcmk__comparison { pcmk__comparison_unknown, pcmk__comparison_defined, pcmk__comparison_undefined, pcmk__comparison_eq, pcmk__comparison_ne, pcmk__comparison_lt, pcmk__comparison_lte, pcmk__comparison_gt, pcmk__comparison_gte, }; // How node attribute values may be parsed in rules enum pcmk__type { pcmk__type_unknown, pcmk__type_string, pcmk__type_integer, pcmk__type_number, pcmk__type_version, }; // Where to obtain reference value for a node attribute comparison enum pcmk__reference_source { pcmk__source_unknown, pcmk__source_literal, pcmk__source_instance_attrs, pcmk__source_meta_attrs, }; G_GNUC_INTERNAL enum pcmk__comparison pcmk__parse_comparison(const char *op); G_GNUC_INTERNAL enum pcmk__type pcmk__parse_type(const char *type, enum pcmk__comparison op, const char *value1, const char *value2); G_GNUC_INTERNAL enum pcmk__reference_source pcmk__parse_source(const char *source); G_GNUC_INTERNAL int pcmk__cmp_by_type(const char *value1, const char *value2, enum pcmk__type type); G_GNUC_INTERNAL int pcmk__unpack_duration(const xmlNode *duration, const crm_time_t *start, crm_time_t **end); G_GNUC_INTERNAL int pcmk__evaluate_date_spec(const xmlNode *date_spec, const crm_time_t *now); G_GNUC_INTERNAL int pcmk__evaluate_attr_expression(const xmlNode *expression, const pcmk_rule_input_t *rule_input); G_GNUC_INTERNAL int pcmk__evaluate_rsc_expression(const xmlNode *expr, const pcmk_rule_input_t *rule_input); G_GNUC_INTERNAL int pcmk__evaluate_op_expression(const xmlNode *expr, const pcmk_rule_input_t *rule_input); /* * Utils */ #define PCMK__PW_BUFFER_LEN 500 /* * Schemas */ typedef struct { unsigned char v[2]; } pcmk__schema_version_t; enum pcmk__schema_validator { pcmk__schema_validator_none, pcmk__schema_validator_rng }; typedef struct { int schema_index; char *name; /*! * List of XSLT stylesheets for upgrading from this schema version to the * next one. Sorted by the order in which they should be applied to the CIB. */ GList *transforms; void *cache; enum pcmk__schema_validator validator; pcmk__schema_version_t version; } pcmk__schema_t; G_GNUC_INTERNAL GList *pcmk__find_x_0_schema(void); #ifdef __cplusplus } #endif #endif // PCMK__COMMON_CRMCOMMON_PRIVATE__H diff --git a/lib/common/ipc_client.c b/lib/common/ipc_client.c index 63483e7b2d..1f65ace114 100644 --- a/lib/common/ipc_client.c +++ b/lib/common/ipc_client.c @@ -1,1694 +1,1610 @@ /* * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #if defined(HAVE_UCRED) || defined(HAVE_SOCKPEERCRED) #include #elif defined(HAVE_GETPEERUCRED) #include #endif #include #include #include #include #include /* indirectly: pcmk_err_generic */ #include #include #include #include "crmcommon_private.h" static int is_ipc_provider_expected(qb_ipcc_connection_t *qb_ipc, int sock, uid_t refuid, gid_t refgid, pid_t *gotpid, uid_t *gotuid, gid_t *gotgid); /*! * \brief Create a new object for using Pacemaker daemon IPC * * \param[out] api Where to store new IPC object * \param[in] server Which Pacemaker daemon the object is for * * \return Standard Pacemaker result code * * \note The caller is responsible for freeing *api using pcmk_free_ipc_api(). * \note This is intended to supersede crm_ipc_new() but currently only * supports the controller, pacemakerd, and schedulerd IPC API. */ int pcmk_new_ipc_api(pcmk_ipc_api_t **api, enum pcmk_ipc_server server) { if (api == NULL) { return EINVAL; } *api = calloc(1, sizeof(pcmk_ipc_api_t)); if (*api == NULL) { return errno; } (*api)->server = server; if (pcmk_ipc_name(*api, false) == NULL) { pcmk_free_ipc_api(*api); *api = NULL; return EOPNOTSUPP; } - (*api)->ipc_size_max = 0; - - // Set server methods and max_size (if not default) + // Set server methods switch (server) { case pcmk_ipc_attrd: (*api)->cmds = pcmk__attrd_api_methods(); break; case pcmk_ipc_based: - (*api)->ipc_size_max = 512 * 1024; // 512KB break; case pcmk_ipc_controld: (*api)->cmds = pcmk__controld_api_methods(); break; case pcmk_ipc_execd: break; case pcmk_ipc_fenced: break; case pcmk_ipc_pacemakerd: (*api)->cmds = pcmk__pacemakerd_api_methods(); break; case pcmk_ipc_schedulerd: (*api)->cmds = pcmk__schedulerd_api_methods(); - // @TODO max_size could vary by client, maybe take as argument? - (*api)->ipc_size_max = 5 * 1024 * 1024; // 5MB break; default: // pcmk_ipc_unknown pcmk_free_ipc_api(*api); *api = NULL; return EINVAL; } if ((*api)->cmds == NULL) { pcmk_free_ipc_api(*api); *api = NULL; return ENOMEM; } - (*api)->ipc = crm_ipc_new(pcmk_ipc_name(*api, false), - (*api)->ipc_size_max); + (*api)->ipc = crm_ipc_new(pcmk_ipc_name(*api, false), 0); if ((*api)->ipc == NULL) { pcmk_free_ipc_api(*api); *api = NULL; return ENOMEM; } // If daemon API has its own data to track, allocate it if ((*api)->cmds->new_data != NULL) { if ((*api)->cmds->new_data(*api) != pcmk_rc_ok) { pcmk_free_ipc_api(*api); *api = NULL; return ENOMEM; } } crm_trace("Created %s API IPC object", pcmk_ipc_name(*api, true)); return pcmk_rc_ok; } static void free_daemon_specific_data(pcmk_ipc_api_t *api) { if ((api != NULL) && (api->cmds != NULL)) { if ((api->cmds->free_data != NULL) && (api->api_data != NULL)) { api->cmds->free_data(api->api_data); api->api_data = NULL; } free(api->cmds); api->cmds = NULL; } } /*! * \internal * \brief Call an IPC API event callback, if one is registed * * \param[in,out] api IPC API connection * \param[in] event_type The type of event that occurred * \param[in] status Event status * \param[in,out] event_data Event-specific data */ void pcmk__call_ipc_callback(pcmk_ipc_api_t *api, enum pcmk_ipc_event event_type, crm_exit_t status, void *event_data) { if ((api != NULL) && (api->cb != NULL)) { api->cb(api, event_type, status, event_data, api->user_data); } } /*! * \internal * \brief Clean up after an IPC disconnect * * \param[in,out] user_data IPC API connection that disconnected * * \note This function can be used as a main loop IPC destroy callback. */ static void ipc_post_disconnect(gpointer user_data) { pcmk_ipc_api_t *api = user_data; crm_info("Disconnected from %s", pcmk_ipc_name(api, true)); // Perform any daemon-specific handling needed if ((api->cmds != NULL) && (api->cmds->post_disconnect != NULL)) { api->cmds->post_disconnect(api); } // Call client's registered event callback pcmk__call_ipc_callback(api, pcmk_ipc_event_disconnect, CRM_EX_DISCONNECT, NULL); /* If this is being called from a running main loop, mainloop_gio_destroy() * will free ipc and mainloop_io immediately after calling this function. * If this is called from a stopped main loop, these will leak, so the best * practice is to close the connection before stopping the main loop. */ api->ipc = NULL; api->mainloop_io = NULL; if (api->free_on_disconnect) { /* pcmk_free_ipc_api() has already been called, but did not free api * or api->cmds because this function needed them. Do that now. */ free_daemon_specific_data(api); crm_trace("Freeing IPC API object after disconnect"); free(api); } } /*! * \brief Free the contents of an IPC API object * * \param[in,out] api IPC API object to free */ void pcmk_free_ipc_api(pcmk_ipc_api_t *api) { bool free_on_disconnect = false; if (api == NULL) { return; } crm_debug("Releasing %s IPC API", pcmk_ipc_name(api, true)); if (api->ipc != NULL) { if (api->mainloop_io != NULL) { /* We need to keep the api pointer itself around, because it is the * user data for the IPC client destroy callback. That will be * triggered by the pcmk_disconnect_ipc() call below, but it might * happen later in the main loop (if still running). * * This flag tells the destroy callback to free the object. It can't * do that unconditionally, because the application might call this * function after a disconnect that happened by other means. */ free_on_disconnect = api->free_on_disconnect = true; } pcmk_disconnect_ipc(api); // Frees api if free_on_disconnect is true } if (!free_on_disconnect) { free_daemon_specific_data(api); crm_trace("Freeing IPC API object"); free(api); } } /*! * \brief Get the IPC name used with an IPC API connection * * \param[in] api IPC API connection * \param[in] for_log If true, return human-friendly name instead of IPC name * * \return IPC API's human-friendly or connection name, or if none is available, * "Pacemaker" if for_log is true and NULL if for_log is false */ const char * pcmk_ipc_name(const pcmk_ipc_api_t *api, bool for_log) { if (api == NULL) { return for_log? "Pacemaker" : NULL; } if (for_log) { const char *name = pcmk__server_log_name(api->server); return pcmk__s(name, "Pacemaker"); } switch (api->server) { // These servers do not have pcmk_ipc_api_t implementations yet case pcmk_ipc_based: case pcmk_ipc_execd: case pcmk_ipc_fenced: return NULL; default: return pcmk__server_ipc_name(api->server); } } /*! * \brief Check whether an IPC API connection is active * * \param[in,out] api IPC API connection * * \return true if IPC is connected, false otherwise */ bool pcmk_ipc_is_connected(pcmk_ipc_api_t *api) { return (api != NULL) && crm_ipc_connected(api->ipc); } /*! * \internal * \brief Call the daemon-specific API's dispatch function * * Perform daemon-specific handling of IPC reply dispatch. It is the daemon * method's responsibility to call the client's registered event callback, as * well as allocate and free any event data. * * \param[in,out] api IPC API connection * \param[in,out] message IPC reply XML to dispatch */ static bool call_api_dispatch(pcmk_ipc_api_t *api, xmlNode *message) { crm_log_xml_trace(message, "ipc-received"); if ((api->cmds != NULL) && (api->cmds->dispatch != NULL)) { return api->cmds->dispatch(api, message); } return false; } /*! * \internal * \brief Dispatch previously read IPC data * * \param[in] buffer Data read from IPC * \param[in,out] api IPC object * * \return Standard Pacemaker return code. In particular: * * pcmk_rc_ok: There are no more messages expected from the server. Quit * reading. * EINPROGRESS: There are more messages expected from the server. Keep reading. * * All other values indicate an error. */ static int dispatch_ipc_data(const char *buffer, pcmk_ipc_api_t *api) { bool more = false; xmlNode *msg; if (buffer == NULL) { crm_warn("Empty message received from %s IPC", pcmk_ipc_name(api, true)); return ENOMSG; } msg = pcmk__xml_parse(buffer); if (msg == NULL) { crm_warn("Malformed message received from %s IPC", pcmk_ipc_name(api, true)); return EPROTO; } more = call_api_dispatch(api, msg); pcmk__xml_free(msg); if (more) { return EINPROGRESS; } else { return pcmk_rc_ok; } } /*! * \internal * \brief Dispatch data read from IPC source * * \param[in] buffer Data read from IPC * \param[in] length Number of bytes of data in buffer (ignored) * \param[in,out] user_data IPC object * * \return Always 0 (meaning connection is still required) * * \note This function can be used as a main loop IPC dispatch callback. */ static int dispatch_ipc_source_data(const char *buffer, ssize_t length, gpointer user_data) { pcmk_ipc_api_t *api = user_data; CRM_CHECK(api != NULL, return 0); dispatch_ipc_data(buffer, api); return 0; } /*! * \brief Check whether an IPC connection has data available (without main loop) * * \param[in] api IPC API connection * \param[in] timeout_ms If less than 0, poll indefinitely; if 0, poll once * and return immediately; otherwise, poll for up to * this many milliseconds * * \return Standard Pacemaker return code * * \note Callers of pcmk_connect_ipc() using pcmk_ipc_dispatch_poll should call * this function to check whether IPC data is available. Return values of * interest include pcmk_rc_ok meaning data is available, and EAGAIN * meaning no data is available; all other values indicate errors. * \todo This does not allow the caller to poll multiple file descriptors at * once. If there is demand for that, we could add a wrapper for * pcmk__ipc_fd(api->ipc), so the caller can call poll() themselves. */ int pcmk_poll_ipc(const pcmk_ipc_api_t *api, int timeout_ms) { int rc; struct pollfd pollfd = { 0, }; if ((api == NULL) || (api->dispatch_type != pcmk_ipc_dispatch_poll)) { return EINVAL; } rc = pcmk__ipc_fd(api->ipc, &(pollfd.fd)); if (rc != pcmk_rc_ok) { crm_debug("Could not obtain file descriptor for %s IPC: %s", pcmk_ipc_name(api, true), pcmk_rc_str(rc)); return rc; } pollfd.events = POLLIN; rc = poll(&pollfd, 1, timeout_ms); if (rc < 0) { /* Some UNIX systems return negative and set EAGAIN for failure to * allocate memory; standardize the return code in that case */ return (errno == EAGAIN)? ENOMEM : errno; } else if (rc == 0) { return EAGAIN; } return pcmk_rc_ok; } /*! * \brief Dispatch available messages on an IPC connection (without main loop) * * \param[in,out] api IPC API connection * * \return Standard Pacemaker return code * * \note Callers of pcmk_connect_ipc() using pcmk_ipc_dispatch_poll should call * this function when IPC data is available. */ void pcmk_dispatch_ipc(pcmk_ipc_api_t *api) { if (api == NULL) { return; } while (crm_ipc_ready(api->ipc) > 0) { if (crm_ipc_read(api->ipc) > 0) { dispatch_ipc_data(crm_ipc_buffer(api->ipc), api); } } } // \return Standard Pacemaker return code static int connect_with_main_loop(pcmk_ipc_api_t *api) { int rc; struct ipc_client_callbacks callbacks = { .dispatch = dispatch_ipc_source_data, .destroy = ipc_post_disconnect, }; rc = pcmk__add_mainloop_ipc(api->ipc, G_PRIORITY_DEFAULT, api, &callbacks, &(api->mainloop_io)); if (rc != pcmk_rc_ok) { return rc; } crm_debug("Connected to %s IPC (attached to main loop)", pcmk_ipc_name(api, true)); /* After this point, api->mainloop_io owns api->ipc, so api->ipc * should not be explicitly freed. */ return pcmk_rc_ok; } // \return Standard Pacemaker return code static int connect_without_main_loop(pcmk_ipc_api_t *api) { int rc = pcmk__connect_generic_ipc(api->ipc); if (rc != pcmk_rc_ok) { crm_ipc_close(api->ipc); } else { crm_debug("Connected to %s IPC (without main loop)", pcmk_ipc_name(api, true)); } return rc; } /*! * \internal * \brief Connect to a Pacemaker daemon via IPC (retrying after soft errors) * * \param[in,out] api IPC API instance * \param[in] dispatch_type How IPC replies should be dispatched * \param[in] attempts How many times to try (in case of soft error) * * \return Standard Pacemaker return code */ int pcmk__connect_ipc(pcmk_ipc_api_t *api, enum pcmk_ipc_dispatch dispatch_type, int attempts) { int rc = pcmk_rc_ok; if ((api == NULL) || (attempts < 1)) { return EINVAL; } if (api->ipc == NULL) { - api->ipc = crm_ipc_new(pcmk_ipc_name(api, false), api->ipc_size_max); + api->ipc = crm_ipc_new(pcmk_ipc_name(api, false), 0); if (api->ipc == NULL) { return ENOMEM; } } if (crm_ipc_connected(api->ipc)) { crm_trace("Already connected to %s", pcmk_ipc_name(api, true)); return pcmk_rc_ok; } api->dispatch_type = dispatch_type; crm_debug("Attempting connection to %s (up to %d time%s)", pcmk_ipc_name(api, true), attempts, pcmk__plural_s(attempts)); for (int remaining = attempts - 1; remaining >= 0; --remaining) { switch (dispatch_type) { case pcmk_ipc_dispatch_main: rc = connect_with_main_loop(api); break; case pcmk_ipc_dispatch_sync: case pcmk_ipc_dispatch_poll: rc = connect_without_main_loop(api); break; } if ((remaining == 0) || ((rc != EAGAIN) && (rc != EALREADY))) { break; // Result is final } // Retry after soft error (interrupted by signal, etc.) pcmk__sleep_ms((attempts - remaining) * 500); crm_debug("Re-attempting connection to %s (%d attempt%s remaining)", pcmk_ipc_name(api, true), remaining, pcmk__plural_s(remaining)); } if (rc != pcmk_rc_ok) { return rc; } if ((api->cmds != NULL) && (api->cmds->post_connect != NULL)) { rc = api->cmds->post_connect(api); if (rc != pcmk_rc_ok) { crm_ipc_close(api->ipc); } } return rc; } /*! * \brief Connect to a Pacemaker daemon via IPC * * \param[in,out] api IPC API instance * \param[in] dispatch_type How IPC replies should be dispatched * * \return Standard Pacemaker return code */ int pcmk_connect_ipc(pcmk_ipc_api_t *api, enum pcmk_ipc_dispatch dispatch_type) { int rc = pcmk__connect_ipc(api, dispatch_type, 2); if (rc != pcmk_rc_ok) { crm_err("Connection to %s failed: %s", pcmk_ipc_name(api, true), pcmk_rc_str(rc)); } return rc; } /*! * \brief Disconnect an IPC API instance * * \param[in,out] api IPC API connection * * \return Standard Pacemaker return code * * \note If the connection is attached to a main loop, this function should be * called before quitting the main loop, to ensure that all memory is * freed. */ void pcmk_disconnect_ipc(pcmk_ipc_api_t *api) { if ((api == NULL) || (api->ipc == NULL)) { return; } switch (api->dispatch_type) { case pcmk_ipc_dispatch_main: { mainloop_io_t *mainloop_io = api->mainloop_io; // Make sure no code with access to api can use these again api->mainloop_io = NULL; api->ipc = NULL; mainloop_del_ipc_client(mainloop_io); // After this point api might have already been freed } break; case pcmk_ipc_dispatch_poll: case pcmk_ipc_dispatch_sync: { crm_ipc_t *ipc = api->ipc; // Make sure no code with access to api can use ipc again api->ipc = NULL; // This should always be the case already, but to be safe api->free_on_disconnect = false; crm_ipc_close(ipc); crm_ipc_destroy(ipc); ipc_post_disconnect(api); } break; } } /*! * \brief Register a callback for IPC API events * * \param[in,out] api IPC API connection * \param[in] callback Callback to register * \param[in] userdata Caller data to pass to callback * * \note This function may be called multiple times to update the callback * and/or user data. The caller remains responsible for freeing * userdata in any case (after the IPC is disconnected, if the * user data is still registered with the IPC). */ void pcmk_register_ipc_callback(pcmk_ipc_api_t *api, pcmk_ipc_callback_t cb, void *user_data) { if (api == NULL) { return; } api->cb = cb; api->user_data = user_data; } /*! * \internal * \brief Send an XML request across an IPC API connection * * \param[in,out] api IPC API connection * \param[in] request XML request to send * * \return Standard Pacemaker return code * * \note Daemon-specific IPC API functions should call this function to send * requests, because it handles different dispatch types appropriately. */ int pcmk__send_ipc_request(pcmk_ipc_api_t *api, const xmlNode *request) { int rc; xmlNode *reply = NULL; enum crm_ipc_flags flags = crm_ipc_flags_none; if ((api == NULL) || (api->ipc == NULL) || (request == NULL)) { return EINVAL; } crm_log_xml_trace(request, "ipc-sent"); // Synchronous dispatch requires waiting for a reply if ((api->dispatch_type == pcmk_ipc_dispatch_sync) && (api->cmds != NULL) && (api->cmds->reply_expected != NULL) && (api->cmds->reply_expected(api, request))) { flags = crm_ipc_client_response; } /* The 0 here means a default timeout of 5 seconds * * @TODO Maybe add a timeout_ms member to pcmk_ipc_api_t and a * pcmk_set_ipc_timeout() setter for it, then use it here. */ rc = crm_ipc_send(api->ipc, request, flags, 0, &reply); if (rc < 0) { return pcmk_legacy2rc(rc); } else if (rc == 0) { return ENODATA; } // With synchronous dispatch, we dispatch any reply now if (reply != NULL) { bool more = call_api_dispatch(api, reply); pcmk__xml_free(reply); while (more) { rc = crm_ipc_read(api->ipc); if (rc == -EAGAIN) { continue; } else if (rc == -ENOMSG || rc == pcmk_ok) { return pcmk_rc_ok; } else if (rc < 0) { return -rc; } rc = dispatch_ipc_data(crm_ipc_buffer(api->ipc), api); if (rc == pcmk_rc_ok) { more = false; } else if (rc == EINPROGRESS) { more = true; } else { continue; } } } return pcmk_rc_ok; } /*! * \internal * \brief Create the XML for an IPC request to purge a node from the peer cache * * \param[in] api IPC API connection * \param[in] node_name If not NULL, name of node to purge * \param[in] nodeid If not 0, node ID of node to purge * * \return Newly allocated IPC request XML * * \note The controller, fencer, and pacemakerd use the same request syntax, but * the attribute manager uses a different one. The CIB manager doesn't * have any syntax for it. The executor and scheduler don't connect to the * cluster layer and thus don't have or need any syntax for it. * * \todo Modify the attribute manager to accept the common syntax (as well * as its current one, for compatibility with older clients). Modify * the CIB manager to accept and honor the common syntax. Modify the * executor and scheduler to accept the syntax (immediately returning * success), just for consistency. Modify this function to use the * common syntax with all daemons if their version supports it. */ static xmlNode * create_purge_node_request(const pcmk_ipc_api_t *api, const char *node_name, uint32_t nodeid) { xmlNode *request = NULL; const char *client = crm_system_name? crm_system_name : "client"; switch (api->server) { case pcmk_ipc_attrd: request = pcmk__xe_create(NULL, __func__); crm_xml_add(request, PCMK__XA_T, PCMK__VALUE_ATTRD); crm_xml_add(request, PCMK__XA_SRC, crm_system_name); crm_xml_add(request, PCMK_XA_TASK, PCMK__ATTRD_CMD_PEER_REMOVE); pcmk__xe_set_bool_attr(request, PCMK__XA_REAP, true); crm_xml_add(request, PCMK__XA_ATTR_HOST, node_name); if (nodeid > 0) { crm_xml_add_int(request, PCMK__XA_ATTR_HOST_ID, nodeid); } break; case pcmk_ipc_controld: case pcmk_ipc_fenced: case pcmk_ipc_pacemakerd: request = pcmk__new_request(api->server, client, NULL, pcmk_ipc_name(api, false), CRM_OP_RM_NODE_CACHE, NULL); if (nodeid > 0) { crm_xml_add_ll(request, PCMK_XA_ID, (long long) nodeid); } crm_xml_add(request, PCMK_XA_UNAME, node_name); break; case pcmk_ipc_based: case pcmk_ipc_execd: case pcmk_ipc_schedulerd: break; default: // pcmk_ipc_unknown (shouldn't be possible) return NULL; } return request; } /*! * \brief Ask a Pacemaker daemon to purge a node from its peer cache * * \param[in,out] api IPC API connection * \param[in] node_name If not NULL, name of node to purge * \param[in] nodeid If not 0, node ID of node to purge * * \return Standard Pacemaker return code * * \note At least one of node_name or nodeid must be specified. */ int pcmk_ipc_purge_node(pcmk_ipc_api_t *api, const char *node_name, uint32_t nodeid) { int rc = 0; xmlNode *request = NULL; if (api == NULL) { return EINVAL; } if ((node_name == NULL) && (nodeid == 0)) { return EINVAL; } request = create_purge_node_request(api, node_name, nodeid); if (request == NULL) { return EOPNOTSUPP; } rc = pcmk__send_ipc_request(api, request); pcmk__xml_free(request); crm_debug("%s peer cache purge of node %s[%lu]: rc=%d", pcmk_ipc_name(api, true), node_name, (unsigned long) nodeid, rc); return rc; } /* * Generic IPC API (to eventually be deprecated as public API and made internal) */ struct crm_ipc_s { struct pollfd pfd; - unsigned int max_buf_size; // maximum bytes we can send or receive over IPC unsigned int buf_size; // size of allocated buffer int msg_size; int need_reply; char *buffer; char *server_name; // server IPC name being connected to qb_ipcc_connection_t *ipc; }; /*! * \brief Create a new (legacy) object for using Pacemaker daemon IPC * * \param[in] name IPC system name to connect to * \param[in] max_size Use a maximum IPC buffer size of at least this size * * \return Newly allocated IPC object on success, NULL otherwise * * \note The caller is responsible for freeing the result using * crm_ipc_destroy(). * \note This should be considered deprecated for use with daemons supported by * pcmk_new_ipc_api(). + * \note @COMPAT Since 3.0.1, \p max_size is ignored and the default given by + * \c crm_ipc_default_buffer_size() will be used instead. */ crm_ipc_t * crm_ipc_new(const char *name, size_t max_size) { crm_ipc_t *client = NULL; client = calloc(1, sizeof(crm_ipc_t)); if (client == NULL) { crm_err("Could not create IPC connection: %s", strerror(errno)); return NULL; } client->server_name = strdup(name); if (client->server_name == NULL) { crm_err("Could not create %s IPC connection: %s", name, strerror(errno)); free(client); return NULL; } - client->buf_size = pcmk__ipc_buffer_size(max_size); + client->buf_size = crm_ipc_default_buffer_size(); client->buffer = malloc(client->buf_size); if (client->buffer == NULL) { crm_err("Could not create %s IPC connection: %s", name, strerror(errno)); free(client->server_name); free(client); return NULL; } - /* Clients initiating connection pick the max buf size */ - client->max_buf_size = client->buf_size; - client->pfd.fd = -1; client->pfd.events = POLLIN; client->pfd.revents = 0; return client; } /*! * \internal * \brief Connect a generic (not daemon-specific) IPC object * * \param[in,out] ipc Generic IPC object to connect * * \return Standard Pacemaker return code */ int pcmk__connect_generic_ipc(crm_ipc_t *ipc) { uid_t cl_uid = 0; gid_t cl_gid = 0; pid_t found_pid = 0; uid_t found_uid = 0; gid_t found_gid = 0; int rc = pcmk_rc_ok; if (ipc == NULL) { return EINVAL; } ipc->need_reply = FALSE; ipc->ipc = qb_ipcc_connect(ipc->server_name, ipc->buf_size); if (ipc->ipc == NULL) { return errno; } rc = qb_ipcc_fd_get(ipc->ipc, &ipc->pfd.fd); if (rc < 0) { // -errno crm_ipc_close(ipc); return -rc; } rc = pcmk_daemon_user(&cl_uid, &cl_gid); rc = pcmk_legacy2rc(rc); if (rc != pcmk_rc_ok) { crm_ipc_close(ipc); return rc; } rc = is_ipc_provider_expected(ipc->ipc, ipc->pfd.fd, cl_uid, cl_gid, &found_pid, &found_uid, &found_gid); if (rc != pcmk_rc_ok) { if (rc == pcmk_rc_ipc_unauthorized) { crm_info("%s IPC provider authentication failed: process %lld has " "uid %lld (expected %lld) and gid %lld (expected %lld)", ipc->server_name, (long long) PCMK__SPECIAL_PID_AS_0(found_pid), (long long) found_uid, (long long) cl_uid, (long long) found_gid, (long long) cl_gid); } crm_ipc_close(ipc); return rc; } - ipc->max_buf_size = qb_ipcc_get_buffer_size(ipc->ipc); - if (ipc->max_buf_size > ipc->buf_size) { - free(ipc->buffer); - ipc->buffer = calloc(ipc->max_buf_size, sizeof(char)); - if (ipc->buffer == NULL) { - rc = errno; - crm_ipc_close(ipc); - return rc; - } - ipc->buf_size = ipc->max_buf_size; - } - return pcmk_rc_ok; } void crm_ipc_close(crm_ipc_t * client) { if (client) { if (client->ipc) { qb_ipcc_connection_t *ipc = client->ipc; client->ipc = NULL; qb_ipcc_disconnect(ipc); } } } void crm_ipc_destroy(crm_ipc_t * client) { if (client) { if (client->ipc && qb_ipcc_is_connected(client->ipc)) { crm_notice("Destroying active %s IPC connection", client->server_name); /* The next line is basically unsafe * * If this connection was attached to mainloop and mainloop is active, * the 'disconnected' callback will end up back here and we'll end * up free'ing the memory twice - something that can still happen * even without this if we destroy a connection and it closes before * we call exit */ /* crm_ipc_close(client); */ } else { crm_trace("Destroying inactive %s IPC connection", client->server_name); } free(client->buffer); free(client->server_name); free(client); } } /*! * \internal * \brief Get the file descriptor for a generic IPC object * * \param[in,out] ipc Generic IPC object to get file descriptor for * \param[out] fd Where to store file descriptor * * \return Standard Pacemaker return code */ int pcmk__ipc_fd(crm_ipc_t *ipc, int *fd) { if ((ipc == NULL) || (fd == NULL)) { return EINVAL; } if ((ipc->ipc == NULL) || (ipc->pfd.fd < 0)) { return ENOTCONN; } *fd = ipc->pfd.fd; return pcmk_rc_ok; } int crm_ipc_get_fd(crm_ipc_t * client) { int fd = -1; if (pcmk__ipc_fd(client, &fd) != pcmk_rc_ok) { crm_err("Could not obtain file descriptor for %s IPC", ((client == NULL)? "unspecified" : client->server_name)); errno = EINVAL; return -EINVAL; } return fd; } bool crm_ipc_connected(crm_ipc_t * client) { bool rc = FALSE; if (client == NULL) { crm_trace("No client"); return FALSE; } else if (client->ipc == NULL) { crm_trace("No connection"); return FALSE; } else if (client->pfd.fd < 0) { crm_trace("Bad descriptor"); return FALSE; } rc = qb_ipcc_is_connected(client->ipc); if (rc == FALSE) { client->pfd.fd = -EINVAL; } return rc; } /*! * \brief Check whether an IPC connection is ready to be read * * \param[in,out] client Connection to check * * \return Positive value if ready to be read, 0 if not ready, -errno on error */ int crm_ipc_ready(crm_ipc_t *client) { int rc; pcmk__assert(client != NULL); if (!crm_ipc_connected(client)) { return -ENOTCONN; } client->pfd.revents = 0; rc = poll(&(client->pfd), 1, 0); return (rc < 0)? -errno : rc; } -// \return Standard Pacemaker return code -static int -crm_ipc_decompress(crm_ipc_t * client) -{ - pcmk__ipc_header_t *header = (pcmk__ipc_header_t *)(void*)client->buffer; - - if (header->size_compressed) { - int rc = 0; - unsigned int size_u = 1 + header->size_uncompressed; - /* never let buf size fall below our max size required for ipc reads. */ - unsigned int new_buf_size = QB_MAX((sizeof(pcmk__ipc_header_t) + size_u), client->max_buf_size); - char *uncompressed = pcmk__assert_alloc(1, new_buf_size); - - crm_trace("Decompressing message data %u bytes into %u bytes", - header->size_compressed, size_u); - - rc = BZ2_bzBuffToBuffDecompress(uncompressed + sizeof(pcmk__ipc_header_t), &size_u, - client->buffer + sizeof(pcmk__ipc_header_t), header->size_compressed, 1, 0); - rc = pcmk__bzlib2rc(rc); - - if (rc != pcmk_rc_ok) { - crm_err("Decompression failed: %s " QB_XS " rc=%d", - pcmk_rc_str(rc), rc); - free(uncompressed); - return rc; - } - - pcmk__assert(size_u == header->size_uncompressed); - - memcpy(uncompressed, client->buffer, sizeof(pcmk__ipc_header_t)); /* Preserve the header */ - header = (pcmk__ipc_header_t *)(void*)uncompressed; - - free(client->buffer); - client->buf_size = new_buf_size; - client->buffer = uncompressed; - } - - pcmk__assert(client->buffer[sizeof(pcmk__ipc_header_t) - + header->size_uncompressed - 1] == 0); - return pcmk_rc_ok; -} - long crm_ipc_read(crm_ipc_t * client) { pcmk__ipc_header_t *header = NULL; pcmk__assert((client != NULL) && (client->ipc != NULL) && (client->buffer != NULL)); client->buffer[0] = 0; client->msg_size = qb_ipcc_event_recv(client->ipc, client->buffer, client->buf_size, 0); if (client->msg_size >= 0) { - int rc = crm_ipc_decompress(client); - - if (rc != pcmk_rc_ok) { - return pcmk_rc2legacy(rc); - } - header = (pcmk__ipc_header_t *)(void*)client->buffer; if (!pcmk__valid_ipc_header(header)) { return -EBADMSG; } crm_trace("Received %s IPC event %d size=%u rc=%d text='%.100s'", client->server_name, header->qb.id, header->qb.size, client->msg_size, client->buffer + sizeof(pcmk__ipc_header_t)); } else { crm_trace("No message received from %s IPC: %s", client->server_name, pcmk_strerror(client->msg_size)); if (client->msg_size == -EAGAIN) { return -EAGAIN; } } if (!crm_ipc_connected(client) || client->msg_size == -ENOTCONN) { crm_err("Connection to %s IPC failed", client->server_name); } if (header) { /* Data excluding the header */ - return header->size_uncompressed; + return header->size; } return -ENOMSG; } const char * crm_ipc_buffer(crm_ipc_t * client) { pcmk__assert(client != NULL); return client->buffer + sizeof(pcmk__ipc_header_t); } uint32_t crm_ipc_buffer_flags(crm_ipc_t * client) { pcmk__ipc_header_t *header = NULL; pcmk__assert(client != NULL); if (client->buffer == NULL) { return 0; } header = (pcmk__ipc_header_t *)(void*)client->buffer; return header->flags; } const char * crm_ipc_name(crm_ipc_t * client) { pcmk__assert(client != NULL); return client->server_name; } // \return Standard Pacemaker return code static int internal_ipc_get_reply(crm_ipc_t *client, int request_id, int ms_timeout, ssize_t *bytes, xmlNode **reply) { pcmk__ipc_header_t *hdr = NULL; time_t timeout = 0; int32_t qb_timeout = -1; int rc = pcmk_rc_ok; if (ms_timeout > 0) { timeout = time(NULL) + 1 + pcmk__timeout_ms2s(ms_timeout); qb_timeout = 1000; } /* get the reply */ crm_trace("Expecting reply to %s IPC message %d", client->server_name, request_id); do { xmlNode *xml = NULL; *bytes = qb_ipcc_recv(client->ipc, client->buffer, client->buf_size, qb_timeout); if (*bytes <= 0) { if (!crm_ipc_connected(client)) { crm_err("%s IPC provider disconnected while waiting for message %d", client->server_name, request_id); break; } continue; } - rc = crm_ipc_decompress(client); - if (rc != pcmk_rc_ok) { - return rc; - } - hdr = (pcmk__ipc_header_t *)(void*) client->buffer; if (hdr->qb.id == request_id) { /* Got the reply we were expecting. */ break; } xml = pcmk__xml_parse(crm_ipc_buffer(client)); if (hdr->qb.id < request_id) { crm_err("Discarding old reply %d (need %d)", hdr->qb.id, request_id); crm_log_xml_notice(xml, "OldIpcReply"); } else if (hdr->qb.id > request_id) { crm_err("Discarding newer reply %d (need %d)", hdr->qb.id, request_id); crm_log_xml_notice(xml, "ImpossibleReply"); pcmk__assert(hdr->qb.id <= request_id); } } while (time(NULL) < timeout || (timeout == 0 && *bytes == -EAGAIN)); if (*bytes > 0) { crm_trace("Received %zd-byte reply %" PRId32 " to %s IPC %d: %.100s", *bytes, hdr->qb.id, client->server_name, request_id, crm_ipc_buffer(client)); if (reply != NULL) { *reply = pcmk__xml_parse(crm_ipc_buffer(client)); } } else if (*bytes < 0) { rc = (int) -*bytes; // System errno crm_trace("No reply to %s IPC %d: %s " QB_XS " rc=%d", client->server_name, request_id, pcmk_rc_str(rc), rc); } /* If bytes == 0, we'll return that to crm_ipc_send which will interpret * that as pcmk_rc_ok, log that the IPC request failed (since we did not * give it a valid reply), and return that 0 to its callers. It's up to * the callers to take appropriate action after that. */ return rc; } /*! * \brief Send an IPC XML message * * \param[in,out] client Connection to IPC server * \param[in] message XML message to send * \param[in] flags Bitmask of crm_ipc_flags * \param[in] ms_timeout Give up if not sent within this much time * (5 seconds if 0, or no timeout if negative) * \param[out] reply Reply from server (or NULL if none) * * \return Negative errno on error, otherwise size of reply received in bytes * if reply was needed, otherwise number of bytes sent */ int crm_ipc_send(crm_ipc_t *client, const xmlNode *message, enum crm_ipc_flags flags, int32_t ms_timeout, xmlNode **reply) { int rc = 0; time_t timeout = 0; ssize_t qb_rc = 0; ssize_t bytes = 0; struct iovec *iov; static uint32_t id = 0; - static int factor = 8; pcmk__ipc_header_t *header; if (client == NULL) { crm_notice("Can't send IPC request without connection (bug?): %.100s", message); return -ENOTCONN; } else if (!crm_ipc_connected(client)) { /* Don't even bother */ crm_notice("Can't send %s IPC requests: Connection closed", client->server_name); return -ENOTCONN; } if (ms_timeout == 0) { ms_timeout = 5000; } if (client->need_reply) { qb_rc = qb_ipcc_recv(client->ipc, client->buffer, client->buf_size, ms_timeout); if (qb_rc < 0) { crm_warn("Sending %s IPC disabled until pending reply received", client->server_name); return -EALREADY; } else { crm_notice("Sending %s IPC re-enabled after pending reply received", client->server_name); client->need_reply = FALSE; } } id++; CRM_LOG_ASSERT(id != 0); /* Crude wrap-around detection */ - rc = pcmk__ipc_prepare_iov(id, message, client->max_buf_size, &iov, &bytes); + rc = pcmk__ipc_prepare_iov(id, message, &iov, &bytes); if (rc != pcmk_rc_ok) { crm_warn("Couldn't prepare %s IPC request: %s " QB_XS " rc=%d", client->server_name, pcmk_rc_str(rc), rc); return pcmk_rc2legacy(rc); } header = iov[0].iov_base; pcmk__set_ipc_flags(header->flags, client->server_name, flags); if (pcmk_is_set(flags, crm_ipc_proxied)) { /* Don't look for a synchronous response */ pcmk__clear_ipc_flags(flags, "client", crm_ipc_client_response); } - if(header->size_compressed) { - if(factor < 10 && (client->max_buf_size / 10) < (bytes / factor)) { - crm_notice("Compressed message exceeds %d0%% of configured IPC " - "limit (%u bytes); consider setting PCMK_ipc_buffer to " - "%u or higher", - factor, client->max_buf_size, 2 * client->max_buf_size); - factor++; - } - } - crm_trace("Sending %s IPC request %d of %u bytes using %dms timeout", client->server_name, header->qb.id, header->qb.size, ms_timeout); /* Send the IPC request, respecting any timeout we were passed */ if (ms_timeout > 0) { timeout = time(NULL) + 1 + pcmk__timeout_ms2s(ms_timeout); } do { qb_rc = qb_ipcc_sendv(client->ipc, iov, 2); } while ((qb_rc == -EAGAIN) && ((timeout == 0) || (time(NULL) < timeout))); rc = (int) qb_rc; // Negative of system errno, or bytes sent if (qb_rc <= 0) { goto send_cleanup; } /* If we should not wait for a response, bail now */ if (!pcmk_is_set(flags, crm_ipc_client_response)) { crm_trace("Not waiting for reply to %s IPC request %d", client->server_name, header->qb.id); goto send_cleanup; } rc = internal_ipc_get_reply(client, header->qb.id, ms_timeout, &bytes, reply); if (rc == pcmk_rc_ok) { rc = (int) bytes; // Size of reply received } else { /* rc is either a positive system errno or a negative standard Pacemaker * return code. If it's an errno, we need to convert it back to a * negative number for comparison and return at the end of this function. */ rc = pcmk_rc2legacy(rc); if (ms_timeout > 0) { /* We didn't get the reply in time, so disable future sends for now. * The only alternative would be to close the connection since we * don't know how to detect and discard out-of-sequence replies. * * @TODO Implement out-of-sequence detection */ client->need_reply = TRUE; } } send_cleanup: if (!crm_ipc_connected(client)) { crm_notice("Couldn't send %s IPC request %d: Connection closed " QB_XS " rc=%d", client->server_name, header->qb.id, rc); } else if (rc == -ETIMEDOUT) { crm_warn("%s IPC request %d failed: %s after %dms " QB_XS " rc=%d", client->server_name, header->qb.id, pcmk_strerror(rc), ms_timeout, rc); crm_write_blackbox(0, NULL); } else if (rc <= 0) { crm_warn("%s IPC request %d failed: %s " QB_XS " rc=%d", client->server_name, header->qb.id, ((rc == 0)? "No bytes sent" : pcmk_strerror(rc)), rc); } pcmk_free_ipc_event(iov); return rc; } /*! * \brief Ensure an IPC provider has expected user or group * * \param[in] qb_ipc libqb client connection if available * \param[in] sock Connected Unix socket for IPC * \param[in] refuid Expected user ID * \param[in] refgid Expected group ID * \param[out] gotpid If not NULL, where to store provider's actual process ID * (or 1 on platforms where ID is not available) * \param[out] gotuid If not NULL, where to store provider's actual user ID * \param[out] gotgid If not NULL, where to store provider's actual group ID * * \return Standard Pacemaker return code * \note An actual user ID of 0 (root) will always be considered authorized, * regardless of the expected values provided. The caller can use the * output arguments to be stricter than this function. */ static int is_ipc_provider_expected(qb_ipcc_connection_t *qb_ipc, int sock, uid_t refuid, gid_t refgid, pid_t *gotpid, uid_t *gotuid, gid_t *gotgid) { int rc = EOPNOTSUPP; pid_t found_pid = 0; uid_t found_uid = 0; gid_t found_gid = 0; #ifdef HAVE_QB_IPCC_AUTH_GET if (qb_ipc != NULL) { rc = qb_ipcc_auth_get(qb_ipc, &found_pid, &found_uid, &found_gid); rc = -rc; // libqb returns 0 or -errno if (rc == pcmk_rc_ok) { goto found; } } #endif #ifdef HAVE_UCRED { struct ucred ucred; socklen_t ucred_len = sizeof(ucred); if (getsockopt(sock, SOL_SOCKET, SO_PEERCRED, &ucred, &ucred_len) < 0) { rc = errno; } else if (ucred_len != sizeof(ucred)) { rc = EOPNOTSUPP; } else { found_pid = ucred.pid; found_uid = ucred.uid; found_gid = ucred.gid; goto found; } } #endif #ifdef HAVE_SOCKPEERCRED { struct sockpeercred sockpeercred; socklen_t sockpeercred_len = sizeof(sockpeercred); if (getsockopt(sock, SOL_SOCKET, SO_PEERCRED, &sockpeercred, &sockpeercred_len) < 0) { rc = errno; } else if (sockpeercred_len != sizeof(sockpeercred)) { rc = EOPNOTSUPP; } else { found_pid = sockpeercred.pid; found_uid = sockpeercred.uid; found_gid = sockpeercred.gid; goto found; } } #endif #ifdef HAVE_GETPEEREID // For example, FreeBSD if (getpeereid(sock, &found_uid, &found_gid) < 0) { rc = errno; } else { found_pid = PCMK__SPECIAL_PID; goto found; } #endif #ifdef HAVE_GETPEERUCRED { ucred_t *ucred = NULL; if (getpeerucred(sock, &ucred) < 0) { rc = errno; } else { found_pid = ucred_getpid(ucred); found_uid = ucred_geteuid(ucred); found_gid = ucred_getegid(ucred); ucred_free(ucred); goto found; } } #endif return rc; // If we get here, nothing succeeded found: if (gotpid != NULL) { *gotpid = found_pid; } if (gotuid != NULL) { *gotuid = found_uid; } if (gotgid != NULL) { *gotgid = found_gid; } if ((found_uid != 0) && (found_uid != refuid) && (found_gid != refgid)) { return pcmk_rc_ipc_unauthorized; } return pcmk_rc_ok; } int crm_ipc_is_authentic_process(int sock, uid_t refuid, gid_t refgid, pid_t *gotpid, uid_t *gotuid, gid_t *gotgid) { int ret = is_ipc_provider_expected(NULL, sock, refuid, refgid, gotpid, gotuid, gotgid); /* The old function had some very odd return codes*/ if (ret == 0) { return 1; } else if (ret == pcmk_rc_ipc_unauthorized) { return 0; } else { return pcmk_rc2legacy(ret); } } int pcmk__ipc_is_authentic_process_active(const char *name, uid_t refuid, gid_t refgid, pid_t *gotpid) { static char last_asked_name[PATH_MAX / 2] = ""; /* log spam prevention */ int fd; int rc = pcmk_rc_ipc_unresponsive; int auth_rc = 0; int32_t qb_rc; pid_t found_pid = 0; uid_t found_uid = 0; gid_t found_gid = 0; qb_ipcc_connection_t *c; #ifdef HAVE_QB_IPCC_CONNECT_ASYNC struct pollfd pollfd = { 0, }; int poll_rc; c = qb_ipcc_connect_async(name, 0, &(pollfd.fd)); #else c = qb_ipcc_connect(name, 0); #endif if (c == NULL) { crm_info("Could not connect to %s IPC: %s", name, strerror(errno)); rc = pcmk_rc_ipc_unresponsive; goto bail; } #ifdef HAVE_QB_IPCC_CONNECT_ASYNC pollfd.events = POLLIN; do { poll_rc = poll(&pollfd, 1, 2000); } while ((poll_rc == -1) && (errno == EINTR)); /* If poll() failed, given that disconnect function is not registered yet, * qb_ipcc_disconnect() won't clean up the socket. In any case, call * qb_ipcc_connect_continue() here so that it may fail and do the cleanup * for us. */ if (qb_ipcc_connect_continue(c) != 0) { crm_info("Could not connect to %s IPC: %s", name, (poll_rc == 0)?"timeout":strerror(errno)); rc = pcmk_rc_ipc_unresponsive; c = NULL; // qb_ipcc_connect_continue cleaned up for us goto bail; } #endif qb_rc = qb_ipcc_fd_get(c, &fd); if (qb_rc != 0) { rc = (int) -qb_rc; // System errno crm_err("Could not get fd from %s IPC: %s " QB_XS " rc=%d", name, pcmk_rc_str(rc), rc); goto bail; } auth_rc = is_ipc_provider_expected(c, fd, refuid, refgid, &found_pid, &found_uid, &found_gid); if (auth_rc == pcmk_rc_ipc_unauthorized) { crm_err("Daemon (IPC %s) effectively blocked with unauthorized" " process %lld (uid: %lld, gid: %lld)", name, (long long) PCMK__SPECIAL_PID_AS_0(found_pid), (long long) found_uid, (long long) found_gid); rc = pcmk_rc_ipc_unauthorized; goto bail; } if (auth_rc != pcmk_rc_ok) { rc = auth_rc; crm_err("Could not get peer credentials from %s IPC: %s " QB_XS " rc=%d", name, pcmk_rc_str(rc), rc); goto bail; } if (gotpid != NULL) { *gotpid = found_pid; } rc = pcmk_rc_ok; if ((found_uid != refuid || found_gid != refgid) && strncmp(last_asked_name, name, sizeof(last_asked_name))) { if ((found_uid == 0) && (refuid != 0)) { crm_warn("Daemon (IPC %s) runs as root, whereas the expected" " credentials are %lld:%lld, hazard of violating" " the least privilege principle", name, (long long) refuid, (long long) refgid); } else { crm_notice("Daemon (IPC %s) runs as %lld:%lld, whereas the" " expected credentials are %lld:%lld, which may" " mean a different set of privileges than expected", name, (long long) found_uid, (long long) found_gid, (long long) refuid, (long long) refgid); } memccpy(last_asked_name, name, '\0', sizeof(last_asked_name)); } bail: if (c != NULL) { qb_ipcc_disconnect(c); } return rc; } // Deprecated functions kept only for backward API compatibility // LCOV_EXCL_START #include bool crm_ipc_connect(crm_ipc_t *client) { int rc = pcmk__connect_generic_ipc(client); if (rc == pcmk_rc_ok) { return true; } if ((client != NULL) && (client->ipc == NULL)) { errno = (rc > 0)? rc : ENOTCONN; crm_debug("Could not establish %s IPC connection: %s (%d)", client->server_name, pcmk_rc_str(errno), errno); } else if (rc == pcmk_rc_ipc_unauthorized) { crm_err("%s IPC provider authentication failed", (client == NULL)? "Pacemaker" : client->server_name); errno = ECONNABORTED; } else { crm_err("Could not verify authenticity of %s IPC provider", (client == NULL)? "Pacemaker" : client->server_name); errno = ENOTCONN; } return false; } // LCOV_EXCL_STOP // End deprecated API diff --git a/lib/common/ipc_common.c b/lib/common/ipc_common.c index df932f670f..4996ec6d28 100644 --- a/lib/common/ipc_common.c +++ b/lib/common/ipc_common.c @@ -1,134 +1,72 @@ /* - * Copyright 2004-2024 the Pacemaker project contributors + * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include // uint64_t #include #include #include "crmcommon_private.h" -#define MIN_MSG_SIZE 12336 // sizeof(struct qb_ipc_connection_response) -#define MAX_MSG_SIZE 128*1024 // 128k default - -/*! - * \internal - * \brief Choose an IPC buffer size in bytes - * - * \param[in] max Use this value if environment/default is lower - * - * \return Maximum of max and value of PCMK_ipc_buffer (default 128KB) +/* The IPC buffer is always 128k. If we are asked to send a message larger + * than that size, it will be split into multiple messages that must be + * reassembled on the other end. */ -unsigned int -pcmk__ipc_buffer_size(unsigned int max) -{ - static long long env_value = 0LL; // Will be bounded to unsigned int - - if (env_value == 0LL) { - const char *env_value_s = pcmk__env_option(PCMK__ENV_IPC_BUFFER); - int rc = pcmk__scan_ll(env_value_s, &env_value, MAX_MSG_SIZE); - - if (rc != pcmk_rc_ok) { - env_value = MAX_MSG_SIZE; - max = QB_MAX(max, env_value); - crm_warn("Using %u as IPC buffer size because '%s' is not " - "a valid value for PCMK_" PCMK__ENV_IPC_BUFFER ": %s", - max, env_value_s, pcmk_rc_str(rc)); - - } else if (env_value <= 0LL) { - env_value = MAX_MSG_SIZE; - max = QB_MAX(max, env_value); - crm_warn("Using %u as IPC buffer size because PCMK_" - PCMK__ENV_IPC_BUFFER " (%s) is not a positive integer", - max, env_value_s); - - } else if (env_value < MIN_MSG_SIZE) { - env_value = MIN_MSG_SIZE; - max = QB_MAX(max, env_value); - crm_debug("Using %u as IPC buffer size because PCMK_" - PCMK__ENV_IPC_BUFFER " (%s) is too small", - max, env_value_s); - - } else if (env_value > UINT_MAX) { - env_value = UINT_MAX; - max = UINT_MAX; - crm_debug("Using %u as IPC buffer size because PCMK_" - PCMK__ENV_IPC_BUFFER " (%s) is too big", - max, env_value_s); - } - } - - if (env_value > max) { - const char *source = "PCMK_" PCMK__ENV_IPC_BUFFER; - - if (env_value == MAX_MSG_SIZE) { - source = "default"; - } - crm_debug("Using IPC buffer size %lld from %s (not %u)", - env_value, source, max); - max = env_value; - } - return max; -} +#define BUFFER_SIZE (128*1024) // 128k /*! - * \brief Return pacemaker's default IPC buffer size + * \brief Return pacemaker's IPC buffer size * * \return IPC buffer size in bytes */ unsigned int crm_ipc_default_buffer_size(void) { - static unsigned int default_size = 0; - - if (default_size == 0) { - default_size = pcmk__ipc_buffer_size(0); - } - return default_size; + return BUFFER_SIZE; } /*! * \internal * \brief Check whether an IPC header is valid * * \param[in] header IPC header to check * * \return true if IPC header has a supported version, false otherwise */ bool pcmk__valid_ipc_header(const pcmk__ipc_header_t *header) { if (header == NULL) { crm_err("IPC message without header"); return false; } else if (header->version > PCMK__IPC_VERSION) { crm_err("Filtering incompatible v%d IPC message (only versions <= %d supported)", header->version, PCMK__IPC_VERSION); return false; } return true; } const char * pcmk__client_type_str(uint64_t client_type) { switch (client_type) { case pcmk__client_ipc: return "IPC"; case pcmk__client_tcp: return "TCP"; case pcmk__client_tls: return "TLS"; default: return "unknown"; } } diff --git a/lib/common/ipc_server.c b/lib/common/ipc_server.c index 2a2d791053..f6bb0bddcc 100644 --- a/lib/common/ipc_server.c +++ b/lib/common/ipc_server.c @@ -1,1009 +1,947 @@ /* * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include "crmcommon_private.h" /* Evict clients whose event queue grows this large (by default) */ #define PCMK_IPC_DEFAULT_QUEUE_MAX 500 static GHashTable *client_connections = NULL; /*! * \internal * \brief Count IPC clients * * \return Number of active IPC client connections */ guint pcmk__ipc_client_count(void) { return client_connections? g_hash_table_size(client_connections) : 0; } /*! * \internal * \brief Execute a function for each active IPC client connection * * \param[in] func Function to call * \param[in,out] user_data Pointer to pass to function * * \note The parameters are the same as for g_hash_table_foreach(). */ void pcmk__foreach_ipc_client(GHFunc func, gpointer user_data) { if ((func != NULL) && (client_connections != NULL)) { g_hash_table_foreach(client_connections, func, user_data); } } pcmk__client_t * pcmk__find_client(const qb_ipcs_connection_t *c) { if (client_connections) { return g_hash_table_lookup(client_connections, c); } crm_trace("No client found for %p", c); return NULL; } pcmk__client_t * pcmk__find_client_by_id(const char *id) { if ((client_connections != NULL) && (id != NULL)) { gpointer key; pcmk__client_t *client = NULL; GHashTableIter iter; g_hash_table_iter_init(&iter, client_connections); while (g_hash_table_iter_next(&iter, &key, (gpointer *) & client)) { if (strcmp(client->id, id) == 0) { return client; } } } crm_trace("No client found with id='%s'", pcmk__s(id, "")); return NULL; } /*! * \internal * \brief Get a client identifier for use in log messages * * \param[in] c Client * * \return Client's name, client's ID, or a string literal, as available * \note This is intended to be used in format strings like "client %s". */ const char * pcmk__client_name(const pcmk__client_t *c) { if (c == NULL) { return "(unspecified)"; } else if (c->name != NULL) { return c->name; } else if (c->id != NULL) { return c->id; } else { return "(unidentified)"; } } void pcmk__client_cleanup(void) { if (client_connections != NULL) { int active = g_hash_table_size(client_connections); if (active > 0) { crm_warn("Exiting with %d active IPC client%s", active, pcmk__plural_s(active)); } g_hash_table_destroy(client_connections); client_connections = NULL; } } void pcmk__drop_all_clients(qb_ipcs_service_t *service) { qb_ipcs_connection_t *c = NULL; if (service == NULL) { return; } c = qb_ipcs_connection_first_get(service); while (c != NULL) { qb_ipcs_connection_t *last = c; c = qb_ipcs_connection_next_get(service, last); /* There really shouldn't be anyone connected at this point */ crm_notice("Disconnecting client %p, pid=%d...", last, pcmk__client_pid(last)); qb_ipcs_disconnect(last); qb_ipcs_connection_unref(last); } } /*! * \internal * \brief Allocate a new pcmk__client_t object based on an IPC connection * * \param[in] c IPC connection (NULL to allocate generic client) * \param[in] key Connection table key (NULL to use sane default) * \param[in] uid_client UID corresponding to c (ignored if c is NULL) * * \return Pointer to new pcmk__client_t (guaranteed not to be \c NULL) */ static pcmk__client_t * client_from_connection(qb_ipcs_connection_t *c, void *key, uid_t uid_client) { pcmk__client_t *client = pcmk__assert_alloc(1, sizeof(pcmk__client_t)); if (c) { client->user = pcmk__uid2username(uid_client); if (client->user == NULL) { client->user = pcmk__str_copy("#unprivileged"); crm_err("Unable to enforce ACLs for user ID %d, assuming unprivileged", uid_client); } client->ipcs = c; pcmk__set_client_flags(client, pcmk__client_ipc); client->pid = pcmk__client_pid(c); if (key == NULL) { key = c; } } client->id = crm_generate_uuid(); if (key == NULL) { key = client->id; } if (client_connections == NULL) { crm_trace("Creating IPC client table"); client_connections = g_hash_table_new(g_direct_hash, g_direct_equal); } g_hash_table_insert(client_connections, key, client); return client; } /*! * \brief Allocate a new pcmk__client_t object and generate its ID * * \param[in] key What to use as connections hash table key (NULL to use ID) * * \return Pointer to new pcmk__client_t (asserts on failure) */ pcmk__client_t * pcmk__new_unauth_client(void *key) { return client_from_connection(NULL, key, 0); } pcmk__client_t * pcmk__new_client(qb_ipcs_connection_t *c, uid_t uid_client, gid_t gid_client) { gid_t uid_cluster = 0; gid_t gid_cluster = 0; pcmk__client_t *client = NULL; CRM_CHECK(c != NULL, return NULL); if (pcmk_daemon_user(&uid_cluster, &gid_cluster) < 0) { static bool need_log = TRUE; if (need_log) { crm_warn("Could not find user and group IDs for user %s", CRM_DAEMON_USER); need_log = FALSE; } } if (uid_client != 0) { crm_trace("Giving group %u access to new IPC connection", gid_cluster); /* Passing -1 to chown(2) means don't change */ qb_ipcs_connection_auth_set(c, -1, gid_cluster, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP); } /* TODO: Do our own auth checking, return NULL if unauthorized */ client = client_from_connection(c, NULL, uid_client); if ((uid_client == 0) || (uid_client == uid_cluster)) { /* Remember when a connection came from root or hacluster */ pcmk__set_client_flags(client, pcmk__client_privileged); } crm_debug("New IPC client %s for PID %u with uid %d and gid %d", client->id, client->pid, uid_client, gid_client); return client; } static struct iovec * pcmk__new_ipc_event(void) { return (struct iovec *) pcmk__assert_alloc(2, sizeof(struct iovec)); } /*! * \brief Free an I/O vector created by pcmk__ipc_prepare_iov() * * \param[in,out] event I/O vector to free */ void pcmk_free_ipc_event(struct iovec *event) { if (event != NULL) { free(event[0].iov_base); free(event[1].iov_base); free(event); } } static void free_event(gpointer data) { pcmk_free_ipc_event((struct iovec *) data); } static void add_event(pcmk__client_t *c, struct iovec *iov) { if (c->event_queue == NULL) { c->event_queue = g_queue_new(); } g_queue_push_tail(c->event_queue, iov); } void pcmk__free_client(pcmk__client_t *c) { if (c == NULL) { return; } if (client_connections) { if (c->ipcs) { crm_trace("Destroying %p/%p (%d remaining)", c, c->ipcs, g_hash_table_size(client_connections) - 1); g_hash_table_remove(client_connections, c->ipcs); } else { crm_trace("Destroying remote connection %p (%d remaining)", c, g_hash_table_size(client_connections) - 1); g_hash_table_remove(client_connections, c->id); } } if (c->event_timer) { g_source_remove(c->event_timer); } if (c->event_queue) { crm_debug("Destroying %d events", g_queue_get_length(c->event_queue)); g_queue_free_full(c->event_queue, free_event); } free(c->id); free(c->name); free(c->user); if (c->remote) { if (c->remote->auth_timeout) { g_source_remove(c->remote->auth_timeout); } if (c->remote->tls_session != NULL) { /* @TODO Reduce duplication at callers. Put here everything * necessary to tear down and free tls_session. */ gnutls_deinit(c->remote->tls_session); } free(c->remote->buffer); free(c->remote); } free(c); } /*! * \internal * \brief Raise IPC eviction threshold for a client, if allowed * * \param[in,out] client Client to modify * \param[in] qmax New threshold */ void pcmk__set_client_queue_max(pcmk__client_t *client, const char *qmax) { int rc = pcmk_rc_ok; long long qmax_ll = 0LL; unsigned int orig_value = 0U; CRM_CHECK(client != NULL, return); orig_value = client->queue_max; if (pcmk_is_set(client->flags, pcmk__client_privileged)) { rc = pcmk__scan_ll(qmax, &qmax_ll, 0LL); if (rc == pcmk_rc_ok) { if ((qmax_ll <= 0LL) || (qmax_ll > UINT_MAX)) { rc = ERANGE; } else { client->queue_max = (unsigned int) qmax_ll; } } } else { rc = EACCES; } if (rc != pcmk_rc_ok) { crm_info("Could not set IPC threshold for client %s[%u] to %s: %s", pcmk__client_name(client), client->pid, pcmk__s(qmax, "default"), pcmk_rc_str(rc)); } else if (client->queue_max != orig_value) { crm_debug("IPC threshold for client %s[%u] is now %u (was %u)", pcmk__client_name(client), client->pid, client->queue_max, orig_value); } } int pcmk__client_pid(qb_ipcs_connection_t *c) { struct qb_ipcs_connection_stats stats; stats.client_pid = 0; qb_ipcs_connection_stats_get(c, &stats, 0); return stats.client_pid; } /*! * \internal * \brief Retrieve message XML from data read from client IPC * * \param[in,out] c IPC client connection * \param[in] data Data read from client connection * \param[out] id Where to store message ID from libqb header * \param[out] flags Where to store flags from libqb header * * \return Message XML on success, NULL otherwise */ xmlNode * pcmk__client_data2xml(pcmk__client_t *c, void *data, uint32_t *id, uint32_t *flags) { xmlNode *xml = NULL; - char *uncompressed = NULL; char *text = ((char *)data) + sizeof(pcmk__ipc_header_t); pcmk__ipc_header_t *header = data; if (!pcmk__valid_ipc_header(header)) { return NULL; } if (id) { *id = ((struct qb_ipc_response_header *)data)->id; } if (flags) { *flags = header->flags; } if (pcmk_is_set(header->flags, crm_ipc_proxied)) { /* Mark this client as being the endpoint of a proxy connection. * Proxy connections responses are sent on the event channel, to avoid * blocking the controller serving as proxy. */ pcmk__set_client_flags(c, pcmk__client_proxied); } - if (header->size_compressed) { - int rc = 0; - unsigned int size_u = 1 + header->size_uncompressed; - uncompressed = pcmk__assert_alloc(1, size_u); - - crm_trace("Decompressing message data %u bytes into %u bytes", - header->size_compressed, size_u); - - rc = BZ2_bzBuffToBuffDecompress(uncompressed, &size_u, text, header->size_compressed, 1, 0); - text = uncompressed; - - rc = pcmk__bzlib2rc(rc); - - if (rc != pcmk_rc_ok) { - crm_err("Decompression failed: %s " QB_XS " rc=%d", - pcmk_rc_str(rc), rc); - free(uncompressed); - return NULL; - } - } - - pcmk__assert(text[header->size_uncompressed - 1] == 0); + pcmk__assert(text[header->size - 1] == 0); xml = pcmk__xml_parse(text); crm_log_xml_trace(xml, "[IPC received]"); - - free(uncompressed); return xml; } static int crm_ipcs_flush_events(pcmk__client_t *c); static gboolean crm_ipcs_flush_events_cb(gpointer data) { pcmk__client_t *c = data; c->event_timer = 0; crm_ipcs_flush_events(c); return FALSE; } /*! * \internal * \brief Add progressive delay before next event queue flush * * \param[in,out] c Client connection to add delay to * \param[in] queue_len Current event queue length */ static inline void delay_next_flush(pcmk__client_t *c, unsigned int queue_len) { /* Delay a maximum of 1.5 seconds */ guint delay = (queue_len < 5)? (1000 + 100 * queue_len) : 1500; c->event_timer = pcmk__create_timer(delay, crm_ipcs_flush_events_cb, c); } /*! * \internal * \brief Send client any messages in its queue * * \param[in,out] c Client to flush * * \return Standard Pacemaker return value */ static int crm_ipcs_flush_events(pcmk__client_t *c) { int rc = pcmk_rc_ok; ssize_t qb_rc = 0; unsigned int sent = 0; unsigned int queue_len = 0; if (c == NULL) { return rc; } else if (c->event_timer) { /* There is already a timer, wait until it goes off */ crm_trace("Timer active for %p - %d", c->ipcs, c->event_timer); return rc; } if (c->event_queue) { queue_len = g_queue_get_length(c->event_queue); } while (sent < 100) { pcmk__ipc_header_t *header = NULL; struct iovec *event = NULL; if (c->event_queue) { // We don't pop unless send is successful event = g_queue_peek_head(c->event_queue); } if (event == NULL) { // Queue is empty break; } qb_rc = qb_ipcs_event_sendv(c->ipcs, event, 2); if (qb_rc < 0) { rc = (int) -qb_rc; break; } event = g_queue_pop_head(c->event_queue); sent++; header = event[0].iov_base; - if (header->size_compressed) { - crm_trace("Event %" PRId32 " to %p[%u] (%zd compressed bytes) sent", - header->qb.id, c->ipcs, c->pid, qb_rc); - } else { - crm_trace("Event %" PRId32 " to %p[%u] (%zd bytes) sent: %.120s", - header->qb.id, c->ipcs, c->pid, qb_rc, - (char *) (event[1].iov_base)); - } + crm_trace("Event %" PRId32 " to %p[%u] (%zd bytes) sent: %.120s", + header->qb.id, c->ipcs, c->pid, qb_rc, + (char *) (event[1].iov_base)); pcmk_free_ipc_event(event); } queue_len -= sent; if (sent > 0 || queue_len) { crm_trace("Sent %u events (%u remaining) for %p[%d]: %s (%zd)", sent, queue_len, c->ipcs, c->pid, pcmk_rc_str(rc), qb_rc); } if (queue_len) { /* Allow clients to briefly fall behind on processing incoming messages, * but drop completely unresponsive clients so the connection doesn't * consume resources indefinitely. */ if (queue_len > QB_MAX(c->queue_max, PCMK_IPC_DEFAULT_QUEUE_MAX)) { if ((c->queue_backlog <= 1) || (queue_len < c->queue_backlog)) { /* Don't evict for a new or shrinking backlog */ crm_warn("Client with process ID %u has a backlog of %u messages " QB_XS " %p", c->pid, queue_len, c->ipcs); } else { crm_err("Evicting client with process ID %u due to backlog of %u messages " QB_XS " %p", c->pid, queue_len, c->ipcs); c->queue_backlog = 0; qb_ipcs_disconnect(c->ipcs); return rc; } } c->queue_backlog = queue_len; delay_next_flush(c, queue_len); } else { /* Event queue is empty, there is no backlog */ c->queue_backlog = 0; } return rc; } /*! * \internal * \brief Create an I/O vector for sending an IPC XML message * * \param[in] request Identifier for libqb response header * \param[in] message XML message to send - * \param[in] max_send_size If 0, default IPC buffer size is used - * \param[out] result Where to store prepared I/O vector + * \param[out] result Where to store prepared I/O vector - NULL + * on error * \param[out] bytes Size of prepared data in bytes * * \return Standard Pacemaker return code */ int pcmk__ipc_prepare_iov(uint32_t request, const xmlNode *message, - uint32_t max_send_size, struct iovec **result, - ssize_t *bytes) + struct iovec **result, ssize_t *bytes) { struct iovec *iov; unsigned int total = 0; + unsigned int max_send_size = crm_ipc_default_buffer_size(); GString *buffer = NULL; pcmk__ipc_header_t *header = NULL; int rc = pcmk_rc_ok; if ((message == NULL) || (result == NULL)) { rc = EINVAL; goto done; } header = calloc(1, sizeof(pcmk__ipc_header_t)); if (header == NULL) { rc = ENOMEM; goto done; } buffer = g_string_sized_new(1024); pcmk__xml_string(message, 0, buffer, 0); - if (max_send_size == 0) { - max_send_size = crm_ipc_default_buffer_size(); - } - CRM_LOG_ASSERT(max_send_size != 0); - *result = NULL; iov = pcmk__new_ipc_event(); iov[0].iov_len = sizeof(pcmk__ipc_header_t); iov[0].iov_base = header; header->version = PCMK__IPC_VERSION; - header->size_uncompressed = 1 + buffer->len; - total = iov[0].iov_len + header->size_uncompressed; - - if (total < max_send_size) { - iov[1].iov_base = pcmk__str_copy(buffer->str); - iov[1].iov_len = header->size_uncompressed; - - } else { - static unsigned int biggest = 0; - - char *compressed = NULL; - unsigned int new_size = 0; - - if (pcmk__compress(buffer->str, - (unsigned int) header->size_uncompressed, - (unsigned int) max_send_size, &compressed, - &new_size) == pcmk_rc_ok) { - - pcmk__set_ipc_flags(header->flags, "send data", crm_ipc_compressed); - header->size_compressed = new_size; - - iov[1].iov_len = header->size_compressed; - iov[1].iov_base = compressed; - - biggest = QB_MAX(header->size_compressed, biggest); - - } else { - crm_log_xml_trace(message, "EMSGSIZE"); - biggest = QB_MAX(header->size_uncompressed, biggest); - - crm_err("Could not compress %u-byte message into less than IPC " - "limit of %u bytes; set PCMK_ipc_buffer to higher value " - "(%u bytes suggested)", - header->size_uncompressed, max_send_size, 4 * biggest); - - free(compressed); - pcmk_free_ipc_event(iov); - rc = EMSGSIZE; - goto done; - } + header->size = buffer->len + 1; + total = iov[0].iov_len + header->size; + + if (total >= max_send_size) { + crm_log_xml_trace(message, "EMSGSIZE"); + crm_err("Could not transmit message; message size %" PRIu32" bytes is " + "larger than the maximum of %" PRIu32, header->size, + max_send_size); + rc = EMSGSIZE; + pcmk_free_ipc_event(iov); + goto done; } + iov[1].iov_base = pcmk__str_copy(buffer->str); + iov[1].iov_len = header->size; + header->qb.size = iov[0].iov_len + iov[1].iov_len; header->qb.id = (int32_t)request; /* Replying to a specific request */ *result = iov; pcmk__assert(header->qb.size > 0); if (bytes != NULL) { *bytes = header->qb.size; } done: if (buffer != NULL) { g_string_free(buffer, TRUE); } return rc; } int pcmk__ipc_send_iov(pcmk__client_t *c, struct iovec *iov, uint32_t flags) { int rc = pcmk_rc_ok; static uint32_t id = 1; pcmk__ipc_header_t *header = iov[0].iov_base; if (c->flags & pcmk__client_proxied) { /* _ALL_ replies to proxied connections need to be sent as events */ if (!pcmk_is_set(flags, crm_ipc_server_event)) { /* The proxied flag lets us know this was originally meant to be a * response, even though we're sending it over the event channel. */ pcmk__set_ipc_flags(flags, "server event", crm_ipc_server_event |crm_ipc_proxied_relay_response); } } pcmk__set_ipc_flags(header->flags, "server event", flags); if (flags & crm_ipc_server_event) { header->qb.id = id++; /* We don't really use it, but doesn't hurt to set one */ if (flags & crm_ipc_server_free) { crm_trace("Sending the original to %p[%d]", c->ipcs, c->pid); add_event(c, iov); } else { struct iovec *iov_copy = pcmk__new_ipc_event(); crm_trace("Sending a copy to %p[%d]", c->ipcs, c->pid); iov_copy[0].iov_len = iov[0].iov_len; iov_copy[0].iov_base = malloc(iov[0].iov_len); memcpy(iov_copy[0].iov_base, iov[0].iov_base, iov[0].iov_len); iov_copy[1].iov_len = iov[1].iov_len; iov_copy[1].iov_base = malloc(iov[1].iov_len); memcpy(iov_copy[1].iov_base, iov[1].iov_base, iov[1].iov_len); add_event(c, iov_copy); } } else { ssize_t qb_rc; CRM_LOG_ASSERT(header->qb.id != 0); /* Replying to a specific request */ qb_rc = qb_ipcs_response_sendv(c->ipcs, iov, 2); if (qb_rc < header->qb.size) { if (qb_rc < 0) { rc = (int) -qb_rc; } crm_notice("Response %" PRId32 " to pid %u failed: %s " QB_XS " bytes=%" PRId32 " rc=%zd ipcs=%p", header->qb.id, c->pid, pcmk_rc_str(rc), header->qb.size, qb_rc, c->ipcs); } else { crm_trace("Response %" PRId32 " sent, %zd bytes to %p[%u]", header->qb.id, qb_rc, c->ipcs, c->pid); } if (flags & crm_ipc_server_free) { pcmk_free_ipc_event(iov); } } if (flags & crm_ipc_server_event) { rc = crm_ipcs_flush_events(c); } else { crm_ipcs_flush_events(c); } if ((rc == EPIPE) || (rc == ENOTCONN)) { crm_trace("Client %p disconnected", c->ipcs); } return rc; } int pcmk__ipc_send_xml(pcmk__client_t *c, uint32_t request, const xmlNode *message, uint32_t flags) { struct iovec *iov = NULL; int rc = pcmk_rc_ok; if (c == NULL) { return EINVAL; } - rc = pcmk__ipc_prepare_iov(request, message, crm_ipc_default_buffer_size(), - &iov, NULL); + rc = pcmk__ipc_prepare_iov(request, message, &iov, NULL); if (rc == pcmk_rc_ok) { pcmk__set_ipc_flags(flags, "send data", crm_ipc_server_free); rc = pcmk__ipc_send_iov(c, iov, flags); } else { - pcmk_free_ipc_event(iov); crm_notice("IPC message to pid %d failed: %s " QB_XS " rc=%d", c->pid, pcmk_rc_str(rc), rc); } return rc; } /*! * \internal * \brief Create an acknowledgement with a status code to send to a client * * \param[in] function Calling function * \param[in] line Source file line within calling function * \param[in] flags IPC flags to use when sending * \param[in] tag Element name to use for acknowledgement * \param[in] ver IPC protocol version (can be NULL) * \param[in] status Exit status code to add to ack * * \return Newly created XML for ack * * \note The caller is responsible for freeing the return value with * \c pcmk__xml_free(). */ xmlNode * pcmk__ipc_create_ack_as(const char *function, int line, uint32_t flags, const char *tag, const char *ver, crm_exit_t status) { xmlNode *ack = NULL; if (pcmk_is_set(flags, crm_ipc_client_response)) { ack = pcmk__xe_create(NULL, tag); crm_xml_add(ack, PCMK_XA_FUNCTION, function); crm_xml_add_int(ack, PCMK__XA_LINE, line); crm_xml_add_int(ack, PCMK_XA_STATUS, (int) status); crm_xml_add(ack, PCMK__XA_IPC_PROTO_VERSION, ver); } return ack; } /*! * \internal * \brief Send an acknowledgement with a status code to a client * * \param[in] function Calling function * \param[in] line Source file line within calling function * \param[in] c Client to send ack to * \param[in] request Request ID being replied to * \param[in] flags IPC flags to use when sending * \param[in] tag Element name to use for acknowledgement * \param[in] ver IPC protocol version (can be NULL) * \param[in] status Status code to send with acknowledgement * * \return Standard Pacemaker return code */ int pcmk__ipc_send_ack_as(const char *function, int line, pcmk__client_t *c, uint32_t request, uint32_t flags, const char *tag, const char *ver, crm_exit_t status) { int rc = pcmk_rc_ok; xmlNode *ack = pcmk__ipc_create_ack_as(function, line, flags, tag, ver, status); if (ack != NULL) { crm_trace("Ack'ing IPC message from client %s as <%s status=%d>", pcmk__client_name(c), tag, status); crm_log_xml_trace(ack, "sent-ack"); c->request_id = 0; rc = pcmk__ipc_send_xml(c, request, ack, flags); pcmk__xml_free(ack); } return rc; } /*! * \internal * \brief Add an IPC server to the main loop for the CIB manager API * * \param[out] ipcs_ro New IPC server for read-only CIB manager API * \param[out] ipcs_rw New IPC server for read/write CIB manager API * \param[out] ipcs_shm New IPC server for shared-memory CIB manager API * \param[in] ro_cb IPC callbacks for read-only API * \param[in] rw_cb IPC callbacks for read/write and shared-memory APIs * * \note This function exits fatally if unable to create the servers. * \note There is no actual difference between the three IPC endpoints other * than their names. */ void pcmk__serve_based_ipc(qb_ipcs_service_t **ipcs_ro, qb_ipcs_service_t **ipcs_rw, qb_ipcs_service_t **ipcs_shm, struct qb_ipcs_service_handlers *ro_cb, struct qb_ipcs_service_handlers *rw_cb) { *ipcs_ro = mainloop_add_ipc_server(PCMK__SERVER_BASED_RO, QB_IPC_NATIVE, ro_cb); *ipcs_rw = mainloop_add_ipc_server(PCMK__SERVER_BASED_RW, QB_IPC_NATIVE, rw_cb); *ipcs_shm = mainloop_add_ipc_server(PCMK__SERVER_BASED_SHM, QB_IPC_SHM, rw_cb); if (*ipcs_ro == NULL || *ipcs_rw == NULL || *ipcs_shm == NULL) { crm_err("Failed to create the CIB manager: exiting and inhibiting respawn"); crm_warn("Verify pacemaker and pacemaker_remote are not both enabled"); crm_exit(CRM_EX_FATAL); } } /*! * \internal * \brief Destroy IPC servers for the CIB manager API * * \param[out] ipcs_ro IPC server for read-only the CIB manager API * \param[out] ipcs_rw IPC server for read/write the CIB manager API * \param[out] ipcs_shm IPC server for shared-memory the CIB manager API * * \note This is a convenience function for calling qb_ipcs_destroy() for each * argument. */ void pcmk__stop_based_ipc(qb_ipcs_service_t *ipcs_ro, qb_ipcs_service_t *ipcs_rw, qb_ipcs_service_t *ipcs_shm) { qb_ipcs_destroy(ipcs_ro); qb_ipcs_destroy(ipcs_rw); qb_ipcs_destroy(ipcs_shm); } /*! * \internal * \brief Add an IPC server to the main loop for the controller API * * \param[in] cb IPC callbacks * * \return Newly created IPC server */ qb_ipcs_service_t * pcmk__serve_controld_ipc(struct qb_ipcs_service_handlers *cb) { return mainloop_add_ipc_server(CRM_SYSTEM_CRMD, QB_IPC_NATIVE, cb); } /*! * \internal * \brief Add an IPC server to the main loop for the attribute manager API * * \param[out] ipcs Where to store newly created IPC server * \param[in] cb IPC callbacks * * \note This function exits fatally if unable to create the servers. */ void pcmk__serve_attrd_ipc(qb_ipcs_service_t **ipcs, struct qb_ipcs_service_handlers *cb) { *ipcs = mainloop_add_ipc_server(PCMK__VALUE_ATTRD, QB_IPC_NATIVE, cb); if (*ipcs == NULL) { crm_crit("Exiting fatally because unable to serve " PCMK__SERVER_ATTRD " IPC (verify pacemaker and pacemaker_remote are not both " "enabled)"); crm_exit(CRM_EX_FATAL); } } /*! * \internal * \brief Add an IPC server to the main loop for the fencer API * * \param[out] ipcs Where to store newly created IPC server * \param[in] cb IPC callbacks * * \note This function exits fatally if unable to create the servers. */ void pcmk__serve_fenced_ipc(qb_ipcs_service_t **ipcs, struct qb_ipcs_service_handlers *cb) { *ipcs = mainloop_add_ipc_server_with_prio("stonith-ng", QB_IPC_NATIVE, cb, QB_LOOP_HIGH); if (*ipcs == NULL) { crm_err("Failed to create fencer: exiting and inhibiting respawn."); crm_warn("Verify pacemaker and pacemaker_remote are not both enabled."); crm_exit(CRM_EX_FATAL); } } /*! * \internal * \brief Add an IPC server to the main loop for the pacemakerd API * * \param[out] ipcs Where to store newly created IPC server * \param[in] cb IPC callbacks * * \note This function exits with CRM_EX_OSERR if unable to create the servers. */ void pcmk__serve_pacemakerd_ipc(qb_ipcs_service_t **ipcs, struct qb_ipcs_service_handlers *cb) { *ipcs = mainloop_add_ipc_server(CRM_SYSTEM_MCP, QB_IPC_NATIVE, cb); if (*ipcs == NULL) { crm_err("Couldn't start pacemakerd IPC server"); crm_warn("Verify pacemaker and pacemaker_remote are not both enabled."); /* sub-daemons are observed by pacemakerd. Thus we exit CRM_EX_FATAL * if we want to prevent pacemakerd from restarting them. * With pacemakerd we leave the exit-code shown to e.g. systemd * to what it was prior to moving the code here from pacemakerd.c */ crm_exit(CRM_EX_OSERR); } } /*! * \internal * \brief Add an IPC server to the main loop for the scheduler API * * \param[in] cb IPC callbacks * * \return Newly created IPC server * \note This function exits fatally if unable to create the servers. */ qb_ipcs_service_t * pcmk__serve_schedulerd_ipc(struct qb_ipcs_service_handlers *cb) { return mainloop_add_ipc_server(CRM_SYSTEM_PENGINE, QB_IPC_NATIVE, cb); } diff --git a/lib/common/mainloop.c b/lib/common/mainloop.c index 9529d523af..b3a7a7c602 100644 --- a/lib/common/mainloop.c +++ b/lib/common/mainloop.c @@ -1,1465 +1,1465 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include struct mainloop_child_s { pid_t pid; char *desc; unsigned timerid; gboolean timeout; void *privatedata; enum mainloop_child_flags flags; /* Called when a process dies */ void (*callback) (mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode); }; struct trigger_s { GSource source; gboolean running; gboolean trigger; void *user_data; guint id; }; struct mainloop_timer_s { guint id; guint period_ms; bool repeat; char *name; GSourceFunc cb; void *userdata; }; static gboolean crm_trigger_prepare(GSource * source, gint * timeout) { crm_trigger_t *trig = (crm_trigger_t *) source; /* cluster-glue's FD and IPC related sources make use of * g_source_add_poll() but do not set a timeout in their prepare * functions * * This means mainloop's poll() will block until an event for one * of these sources occurs - any /other/ type of source, such as * this one or g_idle_*, that doesn't use g_source_add_poll() is * S-O-L and won't be processed until there is something fd-based * happens. * * Luckily the timeout we can set here affects all sources and * puts an upper limit on how long poll() can take. * * So unconditionally set a small-ish timeout, not too small that * we're in constant motion, which will act as an upper bound on * how long the signal handling might be delayed for. */ *timeout = 500; /* Timeout in ms */ return trig->trigger; } static gboolean crm_trigger_check(GSource * source) { crm_trigger_t *trig = (crm_trigger_t *) source; return trig->trigger; } /*! * \internal * \brief GSource dispatch function for crm_trigger_t * * \param[in] source crm_trigger_t being dispatched * \param[in] callback Callback passed at source creation * \param[in,out] userdata User data passed at source creation * * \return G_SOURCE_REMOVE to remove source, G_SOURCE_CONTINUE to keep it */ static gboolean crm_trigger_dispatch(GSource *source, GSourceFunc callback, gpointer userdata) { gboolean rc = G_SOURCE_CONTINUE; crm_trigger_t *trig = (crm_trigger_t *) source; if (trig->running) { /* Wait until the existing job is complete before starting the next one */ return G_SOURCE_CONTINUE; } trig->trigger = FALSE; if (callback) { int callback_rc = callback(trig->user_data); if (callback_rc < 0) { crm_trace("Trigger handler %p not yet complete", trig); trig->running = TRUE; } else if (callback_rc == 0) { rc = G_SOURCE_REMOVE; } } return rc; } static void crm_trigger_finalize(GSource * source) { crm_trace("Trigger %p destroyed", source); } static GSourceFuncs crm_trigger_funcs = { crm_trigger_prepare, crm_trigger_check, crm_trigger_dispatch, crm_trigger_finalize, }; static crm_trigger_t * mainloop_setup_trigger(GSource * source, int priority, int (*dispatch) (gpointer user_data), gpointer userdata) { crm_trigger_t *trigger = NULL; trigger = (crm_trigger_t *) source; trigger->id = 0; trigger->trigger = FALSE; trigger->user_data = userdata; if (dispatch) { g_source_set_callback(source, dispatch, trigger, NULL); } g_source_set_priority(source, priority); g_source_set_can_recurse(source, FALSE); trigger->id = g_source_attach(source, NULL); return trigger; } void mainloop_trigger_complete(crm_trigger_t * trig) { crm_trace("Trigger handler %p complete", trig); trig->running = FALSE; } /*! * \brief Create a trigger to be used as a mainloop source * * \param[in] priority Relative priority of source (lower number is higher priority) * \param[in] dispatch Trigger dispatch function (should return 0 to remove the * trigger from the mainloop, -1 if the trigger should be * kept but the job is still running and not complete, and * 1 if the trigger should be kept and the job is complete) * \param[in] userdata Pointer to pass to \p dispatch * * \return Newly allocated mainloop source for trigger */ crm_trigger_t * mainloop_add_trigger(int priority, int (*dispatch) (gpointer user_data), gpointer userdata) { GSource *source = NULL; pcmk__assert(sizeof(crm_trigger_t) > sizeof(GSource)); source = g_source_new(&crm_trigger_funcs, sizeof(crm_trigger_t)); return mainloop_setup_trigger(source, priority, dispatch, userdata); } void mainloop_set_trigger(crm_trigger_t * source) { if(source) { source->trigger = TRUE; } } gboolean mainloop_destroy_trigger(crm_trigger_t * source) { GSource *gs = NULL; if(source == NULL) { return TRUE; } gs = (GSource *)source; g_source_destroy(gs); /* Remove from mainloop, ref_count-- */ g_source_unref(gs); /* The caller no longer carries a reference to source * * At this point the source should be free'd, * unless we're currently processing said * source, in which case mainloop holds an * additional reference and it will be free'd * once our processing completes */ return TRUE; } // Define a custom glib source for signal handling // Data structure for custom glib source typedef struct signal_s { crm_trigger_t trigger; // trigger that invoked source (must be first) void (*handler) (int sig); // signal handler int signal; // signal that was received } crm_signal_t; // Table to associate signal handlers with signal numbers static crm_signal_t *crm_signals[NSIG]; /*! * \internal * \brief Dispatch an event from custom glib source for signals * * Given an signal event, clear the event trigger and call any registered * signal handler. * * \param[in] source glib source that triggered this dispatch * \param[in] callback (ignored) * \param[in] userdata (ignored) */ static gboolean crm_signal_dispatch(GSource *source, GSourceFunc callback, gpointer userdata) { crm_signal_t *sig = (crm_signal_t *) source; if(sig->signal != SIGCHLD) { crm_notice("Caught '%s' signal " QB_XS " %d (%s handler)", strsignal(sig->signal), sig->signal, (sig->handler? "invoking" : "no")); } sig->trigger.trigger = FALSE; if (sig->handler) { sig->handler(sig->signal); } return TRUE; } /*! * \internal * \brief Handle a signal by setting a trigger for signal source * * \param[in] sig Signal number that was received * * \note This is the true signal handler for the mainloop signal source, and * must be async-safe. */ static void mainloop_signal_handler(int sig) { if (sig > 0 && sig < NSIG && crm_signals[sig] != NULL) { mainloop_set_trigger((crm_trigger_t *) crm_signals[sig]); } } // Functions implementing our custom glib source for signal handling static GSourceFuncs crm_signal_funcs = { crm_trigger_prepare, crm_trigger_check, crm_signal_dispatch, crm_trigger_finalize, }; /*! * \internal * \brief Set a true signal handler * * signal()-like interface to sigaction() * * \param[in] sig Signal number to register handler for * \param[in] dispatch Signal handler * * \return The previous value of the signal handler, or SIG_ERR on error * \note The dispatch function must be async-safe. */ sighandler_t crm_signal_handler(int sig, sighandler_t dispatch) { sigset_t mask; struct sigaction sa; struct sigaction old; if (sigemptyset(&mask) < 0) { crm_err("Could not set handler for signal %d: %s", sig, pcmk_rc_str(errno)); return SIG_ERR; } memset(&sa, 0, sizeof(struct sigaction)); sa.sa_handler = dispatch; sa.sa_flags = SA_RESTART; sa.sa_mask = mask; if (sigaction(sig, &sa, &old) < 0) { crm_err("Could not set handler for signal %d: %s", sig, pcmk_rc_str(errno)); return SIG_ERR; } return old.sa_handler; } static void mainloop_destroy_signal_entry(int sig) { crm_signal_t *tmp = crm_signals[sig]; if (tmp != NULL) { crm_signals[sig] = NULL; crm_trace("Unregistering mainloop handler for signal %d", sig); mainloop_destroy_trigger((crm_trigger_t *) tmp); } } /*! * \internal * \brief Add a signal handler to a mainloop * * \param[in] sig Signal number to handle * \param[in] dispatch Signal handler function * * \note The true signal handler merely sets a mainloop trigger to call this * dispatch function via the mainloop. Therefore, the dispatch function * does not need to be async-safe. */ gboolean mainloop_add_signal(int sig, void (*dispatch) (int sig)) { GSource *source = NULL; int priority = G_PRIORITY_HIGH - 1; if (sig == SIGTERM) { /* TERM is higher priority than other signals, * signals are higher priority than other ipc. * Yes, minus: smaller is "higher" */ priority--; } if (sig >= NSIG || sig < 0) { crm_err("Signal %d is out of range", sig); return FALSE; } else if (crm_signals[sig] != NULL && crm_signals[sig]->handler == dispatch) { crm_trace("Signal handler for %d is already installed", sig); return TRUE; } else if (crm_signals[sig] != NULL) { crm_err("Different signal handler for %d is already installed", sig); return FALSE; } pcmk__assert(sizeof(crm_signal_t) > sizeof(GSource)); source = g_source_new(&crm_signal_funcs, sizeof(crm_signal_t)); crm_signals[sig] = (crm_signal_t *) mainloop_setup_trigger(source, priority, NULL, NULL); pcmk__assert(crm_signals[sig] != NULL); crm_signals[sig]->handler = dispatch; crm_signals[sig]->signal = sig; if (crm_signal_handler(sig, mainloop_signal_handler) == SIG_ERR) { mainloop_destroy_signal_entry(sig); return FALSE; } return TRUE; } gboolean mainloop_destroy_signal(int sig) { if (sig >= NSIG || sig < 0) { crm_err("Signal %d is out of range", sig); return FALSE; } else if (crm_signal_handler(sig, NULL) == SIG_ERR) { crm_perror(LOG_ERR, "Could not uninstall signal handler for signal %d", sig); return FALSE; } else if (crm_signals[sig] == NULL) { return TRUE; } mainloop_destroy_signal_entry(sig); return TRUE; } static qb_array_t *gio_map = NULL; void mainloop_cleanup(void) { if (gio_map != NULL) { qb_array_free(gio_map); gio_map = NULL; } for (int sig = 0; sig < NSIG; ++sig) { mainloop_destroy_signal_entry(sig); } } /* * libqb... */ struct gio_to_qb_poll { int32_t is_used; guint source; int32_t events; void *data; qb_ipcs_dispatch_fn_t fn; enum qb_loop_priority p; }; static gboolean gio_read_socket(GIOChannel * gio, GIOCondition condition, gpointer data) { struct gio_to_qb_poll *adaptor = (struct gio_to_qb_poll *)data; gint fd = g_io_channel_unix_get_fd(gio); crm_trace("%p.%d %d", data, fd, condition); /* if this assert get's hit, then there is a race condition between * when we destroy a fd and when mainloop actually gives it up */ pcmk__assert(adaptor->is_used > 0); return (adaptor->fn(fd, condition, adaptor->data) == 0); } static void gio_poll_destroy(gpointer data) { struct gio_to_qb_poll *adaptor = (struct gio_to_qb_poll *)data; adaptor->is_used--; pcmk__assert(adaptor->is_used >= 0); if (adaptor->is_used == 0) { crm_trace("Marking adaptor %p unused", adaptor); adaptor->source = 0; } } /*! * \internal * \brief Convert libqb's poll priority into GLib's one * * \param[in] prio libqb's poll priority (#QB_LOOP_MED assumed as fallback) * * \return best matching GLib's priority */ static gint conv_prio_libqb2glib(enum qb_loop_priority prio) { switch (prio) { case QB_LOOP_LOW: return G_PRIORITY_LOW; case QB_LOOP_HIGH: return G_PRIORITY_HIGH; default: return G_PRIORITY_DEFAULT; // QB_LOOP_MED } } /*! * \internal * \brief Convert libqb's poll priority to rate limiting spec * * \param[in] prio libqb's poll priority (#QB_LOOP_MED assumed as fallback) * * \return best matching rate limiting spec * \note This is the inverse of libqb's qb_ipcs_request_rate_limit(). */ static enum qb_ipcs_rate_limit conv_libqb_prio2ratelimit(enum qb_loop_priority prio) { switch (prio) { case QB_LOOP_LOW: return QB_IPCS_RATE_SLOW; case QB_LOOP_HIGH: return QB_IPCS_RATE_FAST; default: return QB_IPCS_RATE_NORMAL; // QB_LOOP_MED } } static int32_t gio_poll_dispatch_update(enum qb_loop_priority p, int32_t fd, int32_t evts, void *data, qb_ipcs_dispatch_fn_t fn, int32_t add) { struct gio_to_qb_poll *adaptor; GIOChannel *channel; int32_t res = 0; res = qb_array_index(gio_map, fd, (void **)&adaptor); if (res < 0) { crm_err("Array lookup failed for fd=%d: %d", fd, res); return res; } crm_trace("Adding fd=%d to mainloop as adaptor %p", fd, adaptor); if (add && adaptor->source) { crm_err("Adaptor for descriptor %d is still in-use", fd); return -EEXIST; } if (!add && !adaptor->is_used) { crm_err("Adaptor for descriptor %d is not in-use", fd); return -ENOENT; } /* channel is created with ref_count = 1 */ channel = g_io_channel_unix_new(fd); if (!channel) { crm_err("No memory left to add fd=%d", fd); return -ENOMEM; } if (adaptor->source) { g_source_remove(adaptor->source); adaptor->source = 0; } /* Because unlike the poll() API, glib doesn't tell us about HUPs by default */ evts |= (G_IO_HUP | G_IO_NVAL | G_IO_ERR); adaptor->fn = fn; adaptor->events = evts; adaptor->data = data; adaptor->p = p; adaptor->is_used++; adaptor->source = g_io_add_watch_full(channel, conv_prio_libqb2glib(p), evts, gio_read_socket, adaptor, gio_poll_destroy); /* Now that mainloop now holds a reference to channel, * thanks to g_io_add_watch_full(), drop ours from g_io_channel_unix_new(). * * This means that channel will be free'd by: * g_main_context_dispatch() * -> g_source_destroy_internal() * -> g_source_callback_unref() * shortly after gio_poll_destroy() completes */ g_io_channel_unref(channel); crm_trace("Added to mainloop with gsource id=%d", adaptor->source); if (adaptor->source > 0) { return 0; } return -EINVAL; } static int32_t gio_poll_dispatch_add(enum qb_loop_priority p, int32_t fd, int32_t evts, void *data, qb_ipcs_dispatch_fn_t fn) { return gio_poll_dispatch_update(p, fd, evts, data, fn, QB_TRUE); } static int32_t gio_poll_dispatch_mod(enum qb_loop_priority p, int32_t fd, int32_t evts, void *data, qb_ipcs_dispatch_fn_t fn) { return gio_poll_dispatch_update(p, fd, evts, data, fn, QB_FALSE); } static int32_t gio_poll_dispatch_del(int32_t fd) { struct gio_to_qb_poll *adaptor; crm_trace("Looking for fd=%d", fd); if (qb_array_index(gio_map, fd, (void **)&adaptor) == 0) { if (adaptor->source) { g_source_remove(adaptor->source); adaptor->source = 0; } } return 0; } struct qb_ipcs_poll_handlers gio_poll_funcs = { .job_add = NULL, .dispatch_add = gio_poll_dispatch_add, .dispatch_mod = gio_poll_dispatch_mod, .dispatch_del = gio_poll_dispatch_del, }; static enum qb_ipc_type pick_ipc_type(enum qb_ipc_type requested) { const char *env = pcmk__env_option(PCMK__ENV_IPC_TYPE); if (env && strcmp("shared-mem", env) == 0) { return QB_IPC_SHM; } else if (env && strcmp("socket", env) == 0) { return QB_IPC_SOCKET; } else if (env && strcmp("posix", env) == 0) { return QB_IPC_POSIX_MQ; } else if (env && strcmp("sysv", env) == 0) { return QB_IPC_SYSV_MQ; } else if (requested == QB_IPC_NATIVE) { /* We prefer shared memory because the server never blocks on * send. If part of a message fits into the socket, libqb * needs to block until the remainder can be sent also. * Otherwise the client will wait forever for the remaining * bytes. */ return QB_IPC_SHM; } return requested; } qb_ipcs_service_t * mainloop_add_ipc_server(const char *name, enum qb_ipc_type type, struct qb_ipcs_service_handlers *callbacks) { return mainloop_add_ipc_server_with_prio(name, type, callbacks, QB_LOOP_MED); } qb_ipcs_service_t * mainloop_add_ipc_server_with_prio(const char *name, enum qb_ipc_type type, struct qb_ipcs_service_handlers *callbacks, enum qb_loop_priority prio) { int rc = 0; qb_ipcs_service_t *server = NULL; if (gio_map == NULL) { gio_map = qb_array_create_2(64, sizeof(struct gio_to_qb_poll), 1); } server = qb_ipcs_create(name, 0, pick_ipc_type(type), callbacks); if (server == NULL) { crm_err("Could not create %s IPC server: %s (%d)", name, pcmk_rc_str(errno), errno); return NULL; } if (prio != QB_LOOP_MED) { qb_ipcs_request_rate_limit(server, conv_libqb_prio2ratelimit(prio)); } - // All clients should use at least PCMK_ipc_buffer as their buffer size + // Enforce a minimum IPC buffer size on all clients qb_ipcs_enforce_buffer_size(server, crm_ipc_default_buffer_size()); qb_ipcs_poll_handlers_set(server, &gio_poll_funcs); rc = qb_ipcs_run(server); if (rc < 0) { crm_err("Could not start %s IPC server: %s (%d)", name, pcmk_strerror(rc), rc); return NULL; // qb_ipcs_run() destroys server on failure } return server; } void mainloop_del_ipc_server(qb_ipcs_service_t * server) { if (server) { qb_ipcs_destroy(server); } } struct mainloop_io_s { char *name; void *userdata; int fd; guint source; crm_ipc_t *ipc; GIOChannel *channel; int (*dispatch_fn_ipc) (const char *buffer, ssize_t length, gpointer userdata); int (*dispatch_fn_io) (gpointer userdata); void (*destroy_fn) (gpointer userdata); }; /*! * \internal * \brief I/O watch callback function (GIOFunc) * * \param[in] gio I/O channel being watched * \param[in] condition I/O condition satisfied * \param[in] data User data passed when source was created * * \return G_SOURCE_REMOVE to remove source, G_SOURCE_CONTINUE to keep it */ static gboolean mainloop_gio_callback(GIOChannel *gio, GIOCondition condition, gpointer data) { gboolean rc = G_SOURCE_CONTINUE; mainloop_io_t *client = data; pcmk__assert(client->fd == g_io_channel_unix_get_fd(gio)); if (condition & G_IO_IN) { if (client->ipc) { long read_rc = 0L; int max = 10; do { read_rc = crm_ipc_read(client->ipc); if (read_rc <= 0) { crm_trace("Could not read IPC message from %s: %s (%ld)", client->name, pcmk_strerror(read_rc), read_rc); } else if (client->dispatch_fn_ipc) { const char *buffer = crm_ipc_buffer(client->ipc); crm_trace("New %ld-byte IPC message from %s " "after I/O condition %d", read_rc, client->name, (int) condition); if (client->dispatch_fn_ipc(buffer, read_rc, client->userdata) < 0) { crm_trace("Connection to %s no longer required", client->name); rc = G_SOURCE_REMOVE; } } } while ((rc == G_SOURCE_CONTINUE) && (read_rc > 0) && --max > 0); } else { crm_trace("New I/O event for %s after I/O condition %d", client->name, (int) condition); if (client->dispatch_fn_io) { if (client->dispatch_fn_io(client->userdata) < 0) { crm_trace("Connection to %s no longer required", client->name); rc = G_SOURCE_REMOVE; } } } } if (client->ipc && !crm_ipc_connected(client->ipc)) { crm_err("Connection to %s closed " QB_XS " client=%p condition=%d", client->name, client, condition); rc = G_SOURCE_REMOVE; } else if (condition & (G_IO_HUP | G_IO_NVAL | G_IO_ERR)) { crm_trace("The connection %s[%p] has been closed (I/O condition=%d)", client->name, client, condition); rc = G_SOURCE_REMOVE; } else if ((condition & G_IO_IN) == 0) { /* #define GLIB_SYSDEF_POLLIN =1 #define GLIB_SYSDEF_POLLPRI =2 #define GLIB_SYSDEF_POLLOUT =4 #define GLIB_SYSDEF_POLLERR =8 #define GLIB_SYSDEF_POLLHUP =16 #define GLIB_SYSDEF_POLLNVAL =32 typedef enum { G_IO_IN GLIB_SYSDEF_POLLIN, G_IO_OUT GLIB_SYSDEF_POLLOUT, G_IO_PRI GLIB_SYSDEF_POLLPRI, G_IO_ERR GLIB_SYSDEF_POLLERR, G_IO_HUP GLIB_SYSDEF_POLLHUP, G_IO_NVAL GLIB_SYSDEF_POLLNVAL } GIOCondition; A bitwise combination representing a condition to watch for on an event source. G_IO_IN There is data to read. G_IO_OUT Data can be written (without blocking). G_IO_PRI There is urgent data to read. G_IO_ERR Error condition. G_IO_HUP Hung up (the connection has been broken, usually for pipes and sockets). G_IO_NVAL Invalid request. The file descriptor is not open. */ crm_err("Strange condition: %d", condition); } /* G_SOURCE_REMOVE results in mainloop_gio_destroy() being called * just before the source is removed from mainloop */ return rc; } static void mainloop_gio_destroy(gpointer c) { mainloop_io_t *client = c; char *c_name = strdup(client->name); /* client->source is valid but about to be destroyed (ref_count == 0) in gmain.c * client->channel will still have ref_count > 0... should be == 1 */ crm_trace("Destroying client %s[%p]", c_name, c); if (client->ipc) { crm_ipc_close(client->ipc); } if (client->destroy_fn) { void (*destroy_fn) (gpointer userdata) = client->destroy_fn; client->destroy_fn = NULL; destroy_fn(client->userdata); } if (client->ipc) { crm_ipc_t *ipc = client->ipc; client->ipc = NULL; crm_ipc_destroy(ipc); } crm_trace("Destroyed client %s[%p]", c_name, c); free(client->name); client->name = NULL; free(client); free(c_name); } /*! * \brief Connect to IPC and add it as a main loop source * * \param[in,out] ipc IPC connection to add * \param[in] priority Event source priority to use for connection * \param[in] userdata Data to register with callbacks * \param[in] callbacks Dispatch and destroy callbacks for connection * \param[out] source Newly allocated event source * * \return Standard Pacemaker return code * * \note On failure, the caller is still responsible for ipc. On success, the * caller should call mainloop_del_ipc_client() when source is no longer * needed, which will lead to the disconnection of the IPC later in the * main loop if it is connected. However the IPC disconnects, * mainloop_gio_destroy() will free ipc and source after calling the * destroy callback. */ int pcmk__add_mainloop_ipc(crm_ipc_t *ipc, int priority, void *userdata, const struct ipc_client_callbacks *callbacks, mainloop_io_t **source) { int rc = pcmk_rc_ok; int fd = -1; const char *ipc_name = NULL; CRM_CHECK((ipc != NULL) && (callbacks != NULL), return EINVAL); ipc_name = pcmk__s(crm_ipc_name(ipc), "Pacemaker"); rc = pcmk__connect_generic_ipc(ipc); if (rc != pcmk_rc_ok) { crm_debug("Connection to %s failed: %s", ipc_name, pcmk_rc_str(rc)); return rc; } rc = pcmk__ipc_fd(ipc, &fd); if (rc != pcmk_rc_ok) { crm_debug("Could not obtain file descriptor for %s IPC: %s", ipc_name, pcmk_rc_str(rc)); crm_ipc_close(ipc); return rc; } *source = mainloop_add_fd(ipc_name, priority, fd, userdata, NULL); if (*source == NULL) { rc = errno; crm_ipc_close(ipc); return rc; } (*source)->ipc = ipc; (*source)->destroy_fn = callbacks->destroy; (*source)->dispatch_fn_ipc = callbacks->dispatch; return pcmk_rc_ok; } /*! * \brief Get period for mainloop timer * * \param[in] timer Timer * * \return Period in ms */ guint pcmk__mainloop_timer_get_period(const mainloop_timer_t *timer) { if (timer) { return timer->period_ms; } return 0; } mainloop_io_t * mainloop_add_ipc_client(const char *name, int priority, size_t max_size, void *userdata, struct ipc_client_callbacks *callbacks) { - crm_ipc_t *ipc = crm_ipc_new(name, max_size); + crm_ipc_t *ipc = crm_ipc_new(name, 0); mainloop_io_t *source = NULL; int rc = pcmk__add_mainloop_ipc(ipc, priority, userdata, callbacks, &source); if (rc != pcmk_rc_ok) { if (crm_log_level == LOG_STDOUT) { fprintf(stderr, "Connection to %s failed: %s", name, pcmk_rc_str(rc)); } crm_ipc_destroy(ipc); if (rc > 0) { errno = rc; } else { errno = ENOTCONN; } return NULL; } return source; } void mainloop_del_ipc_client(mainloop_io_t * client) { mainloop_del_fd(client); } crm_ipc_t * mainloop_get_ipc_client(mainloop_io_t * client) { if (client) { return client->ipc; } return NULL; } mainloop_io_t * mainloop_add_fd(const char *name, int priority, int fd, void *userdata, struct mainloop_fd_callbacks * callbacks) { mainloop_io_t *client = NULL; if (fd >= 0) { client = calloc(1, sizeof(mainloop_io_t)); if (client == NULL) { return NULL; } client->name = strdup(name); client->userdata = userdata; if (callbacks) { client->destroy_fn = callbacks->destroy; client->dispatch_fn_io = callbacks->dispatch; } client->fd = fd; client->channel = g_io_channel_unix_new(fd); client->source = g_io_add_watch_full(client->channel, priority, (G_IO_IN | G_IO_HUP | G_IO_NVAL | G_IO_ERR), mainloop_gio_callback, client, mainloop_gio_destroy); /* Now that mainloop now holds a reference to channel, * thanks to g_io_add_watch_full(), drop ours from g_io_channel_unix_new(). * * This means that channel will be free'd by: * g_main_context_dispatch() or g_source_remove() * -> g_source_destroy_internal() * -> g_source_callback_unref() * shortly after mainloop_gio_destroy() completes */ g_io_channel_unref(client->channel); crm_trace("Added connection %d for %s[%p].%d", client->source, client->name, client, fd); } else { errno = EINVAL; } return client; } void mainloop_del_fd(mainloop_io_t * client) { if (client != NULL) { crm_trace("Removing client %s[%p]", client->name, client); if (client->source) { /* Results in mainloop_gio_destroy() being called just * before the source is removed from mainloop */ g_source_remove(client->source); } } } static GList *child_list = NULL; pid_t mainloop_child_pid(mainloop_child_t * child) { return child->pid; } const char * mainloop_child_name(mainloop_child_t * child) { return child->desc; } int mainloop_child_timeout(mainloop_child_t * child) { return child->timeout; } void * mainloop_child_userdata(mainloop_child_t * child) { return child->privatedata; } void mainloop_clear_child_userdata(mainloop_child_t * child) { child->privatedata = NULL; } /* good function name */ static void child_free(mainloop_child_t *child) { if (child->timerid != 0) { crm_trace("Removing timer %d", child->timerid); g_source_remove(child->timerid); child->timerid = 0; } free(child->desc); free(child); } /* terrible function name */ static int child_kill_helper(mainloop_child_t *child) { int rc; if (child->flags & mainloop_leave_pid_group) { crm_debug("Kill pid %d only. leave group intact.", child->pid); rc = kill(child->pid, SIGKILL); } else { crm_debug("Kill pid %d's group", child->pid); rc = kill(-child->pid, SIGKILL); } if (rc < 0) { if (errno != ESRCH) { crm_perror(LOG_ERR, "kill(%d, KILL) failed", child->pid); } return -errno; } return 0; } static gboolean child_timeout_callback(gpointer p) { mainloop_child_t *child = p; int rc = 0; child->timerid = 0; if (child->timeout) { crm_warn("%s process (PID %d) will not die!", child->desc, (int)child->pid); return FALSE; } rc = child_kill_helper(child); if (rc == -ESRCH) { /* Nothing left to do. pid doesn't exist */ return FALSE; } child->timeout = TRUE; crm_debug("%s process (PID %d) timed out", child->desc, (int)child->pid); child->timerid = pcmk__create_timer(5000, child_timeout_callback, child); return FALSE; } static bool child_waitpid(mainloop_child_t *child, int flags) { int rc = 0; int core = 0; int signo = 0; int status = 0; int exitcode = 0; bool callback_needed = true; rc = waitpid(child->pid, &status, flags); if (rc == 0) { // WNOHANG in flags, and child status is not available crm_trace("Child process %d (%s) still active", child->pid, child->desc); callback_needed = false; } else if (rc != child->pid) { /* According to POSIX, possible conditions: * - child->pid was non-positive (process group or any child), * and rc is specific child * - errno ECHILD (pid does not exist or is not child) * - errno EINVAL (invalid flags) * - errno EINTR (caller interrupted by signal) * * @TODO Handle these cases more specifically. */ signo = SIGCHLD; exitcode = 1; crm_notice("Wait for child process %d (%s) interrupted: %s", child->pid, child->desc, pcmk_rc_str(errno)); } else if (WIFEXITED(status)) { exitcode = WEXITSTATUS(status); crm_trace("Child process %d (%s) exited with status %d", child->pid, child->desc, exitcode); } else if (WIFSIGNALED(status)) { signo = WTERMSIG(status); crm_trace("Child process %d (%s) exited with signal %d (%s)", child->pid, child->desc, signo, strsignal(signo)); #ifdef WCOREDUMP // AIX, SunOS, maybe others } else if (WCOREDUMP(status)) { core = 1; crm_err("Child process %d (%s) dumped core", child->pid, child->desc); #endif } else { // flags must contain WUNTRACED and/or WCONTINUED to reach this crm_trace("Child process %d (%s) stopped or continued", child->pid, child->desc); callback_needed = false; } if (callback_needed && child->callback) { child->callback(child, child->pid, core, signo, exitcode); } return callback_needed; } static void child_death_dispatch(int signal) { for (GList *iter = child_list; iter; ) { GList *saved = iter; mainloop_child_t *child = iter->data; iter = iter->next; if (child_waitpid(child, WNOHANG)) { crm_trace("Removing completed process %d from child list", child->pid); child_list = g_list_remove_link(child_list, saved); g_list_free(saved); child_free(child); } } } static gboolean child_signal_init(gpointer p) { crm_trace("Installed SIGCHLD handler"); /* Do NOT use g_child_watch_add() and friends, they rely on pthreads */ mainloop_add_signal(SIGCHLD, child_death_dispatch); /* In case they terminated before the signal handler was installed */ child_death_dispatch(SIGCHLD); return FALSE; } gboolean mainloop_child_kill(pid_t pid) { GList *iter; mainloop_child_t *child = NULL; mainloop_child_t *match = NULL; /* It is impossible to block SIGKILL, this allows us to * call waitpid without WNOHANG flag.*/ int waitflags = 0, rc = 0; for (iter = child_list; iter != NULL && match == NULL; iter = iter->next) { child = iter->data; if (pid == child->pid) { match = child; } } if (match == NULL) { return FALSE; } rc = child_kill_helper(match); if(rc == -ESRCH) { /* It's gone, but hasn't shown up in waitpid() yet. Wait until we get * SIGCHLD and let handler clean it up as normal (so we get the correct * return code/status). The blocking alternative would be to call * child_waitpid(match, 0). */ crm_trace("Waiting for signal that child process %d completed", match->pid); return TRUE; } else if(rc != 0) { /* If KILL for some other reason set the WNOHANG flag since we * can't be certain what happened. */ waitflags = WNOHANG; } if (!child_waitpid(match, waitflags)) { /* not much we can do if this occurs */ return FALSE; } child_list = g_list_remove(child_list, match); child_free(match); return TRUE; } /* Create/Log a new tracked process * To track a process group, use -pid * * @TODO Using a non-positive pid (i.e. any child, or process group) would * likely not be useful since we will free the child after the first * completed process. */ void mainloop_child_add_with_flags(pid_t pid, int timeout, const char *desc, void *privatedata, enum mainloop_child_flags flags, void (*callback) (mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode)) { static bool need_init = TRUE; mainloop_child_t *child = pcmk__assert_alloc(1, sizeof(mainloop_child_t)); child->pid = pid; child->timerid = 0; child->timeout = FALSE; child->privatedata = privatedata; child->callback = callback; child->flags = flags; child->desc = pcmk__str_copy(desc); if (timeout) { child->timerid = pcmk__create_timer(timeout, child_timeout_callback, child); } child_list = g_list_append(child_list, child); if(need_init) { need_init = FALSE; /* SIGCHLD processing has to be invoked from mainloop. * We do not want it to be possible to both add a child pid * to mainloop, and have the pid's exit callback invoked within * the same callstack. */ pcmk__create_timer(1, child_signal_init, NULL); } } void mainloop_child_add(pid_t pid, int timeout, const char *desc, void *privatedata, void (*callback) (mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode)) { mainloop_child_add_with_flags(pid, timeout, desc, privatedata, 0, callback); } static gboolean mainloop_timer_cb(gpointer user_data) { int id = 0; bool repeat = FALSE; struct mainloop_timer_s *t = user_data; pcmk__assert(t != NULL); id = t->id; t->id = 0; /* Ensure it's unset during callbacks so that * mainloop_timer_running() works as expected */ if(t->cb) { crm_trace("Invoking callbacks for timer %s", t->name); repeat = t->repeat; if(t->cb(t->userdata) == FALSE) { crm_trace("Timer %s complete", t->name); repeat = FALSE; } } if(repeat) { /* Restore if repeating */ t->id = id; } return repeat; } bool mainloop_timer_running(mainloop_timer_t *t) { if(t && t->id != 0) { return TRUE; } return FALSE; } void mainloop_timer_start(mainloop_timer_t *t) { mainloop_timer_stop(t); if(t && t->period_ms > 0) { crm_trace("Starting timer %s", t->name); t->id = pcmk__create_timer(t->period_ms, mainloop_timer_cb, t); } } void mainloop_timer_stop(mainloop_timer_t *t) { if(t && t->id != 0) { crm_trace("Stopping timer %s", t->name); g_source_remove(t->id); t->id = 0; } } guint mainloop_timer_set_period(mainloop_timer_t *t, guint period_ms) { guint last = 0; if(t) { last = t->period_ms; t->period_ms = period_ms; } if(t && t->id != 0 && last != t->period_ms) { mainloop_timer_start(t); } return last; } mainloop_timer_t * mainloop_timer_add(const char *name, guint period_ms, bool repeat, GSourceFunc cb, void *userdata) { mainloop_timer_t *t = pcmk__assert_alloc(1, sizeof(mainloop_timer_t)); if (name != NULL) { t->name = crm_strdup_printf("%s-%u-%d", name, period_ms, repeat); } else { t->name = crm_strdup_printf("%p-%u-%d", t, period_ms, repeat); } t->id = 0; t->period_ms = period_ms; t->repeat = repeat; t->cb = cb; t->userdata = userdata; crm_trace("Created timer %s with %p %p", t->name, userdata, t->userdata); return t; } void mainloop_timer_del(mainloop_timer_t *t) { if(t) { crm_trace("Destroying timer %s", t->name); mainloop_timer_stop(t); free(t->name); free(t); } } /* * Helpers to make sure certain events aren't lost at shutdown */ static gboolean drain_timeout_cb(gpointer user_data) { bool *timeout_popped = (bool*) user_data; *timeout_popped = TRUE; return FALSE; } /*! * \brief Drain some remaining main loop events then quit it * * \param[in,out] mloop Main loop to drain and quit * \param[in] n Drain up to this many pending events */ void pcmk_quit_main_loop(GMainLoop *mloop, unsigned int n) { if ((mloop != NULL) && g_main_loop_is_running(mloop)) { GMainContext *ctx = g_main_loop_get_context(mloop); /* Drain up to n events in case some memory clean-up is pending * (helpful to reduce noise in valgrind output). */ for (int i = 0; (i < n) && g_main_context_pending(ctx); ++i) { g_main_context_dispatch(ctx); } g_main_loop_quit(mloop); } } /*! * \brief Process main loop events while a certain condition is met * * \param[in,out] mloop Main loop to process * \param[in] timer_ms Don't process longer than this amount of time * \param[in] check Function that returns true if events should be * processed * * \note This function is intended to be called at shutdown if certain important * events should not be missed. The caller would likely quit the main loop * or exit after calling this function. The check() function will be * passed the remaining timeout in milliseconds. */ void pcmk_drain_main_loop(GMainLoop *mloop, guint timer_ms, bool (*check)(guint)) { bool timeout_popped = FALSE; guint timer = 0; GMainContext *ctx = NULL; CRM_CHECK(mloop && check, return); ctx = g_main_loop_get_context(mloop); if (ctx) { time_t start_time = time(NULL); timer = pcmk__create_timer(timer_ms, drain_timeout_cb, &timeout_popped); while (!timeout_popped && check(timer_ms - (time(NULL) - start_time) * 1000)) { g_main_context_iteration(ctx, TRUE); } } if (!timeout_popped && (timer > 0)) { g_source_remove(timer); } }