diff --git a/daemons/attrd/attrd_cib.c b/daemons/attrd/attrd_cib.c index 95c9e1366f..3f71691117 100644 --- a/daemons/attrd/attrd_cib.c +++ b/daemons/attrd/attrd_cib.c @@ -1,685 +1,685 @@ /* * Copyright 2013-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include // PRIu32 #include #include #include #include #include #include #include #include #include "pacemaker-attrd.h" static int last_cib_op_done = 0; static void write_attribute(attribute_t *a, bool ignore_delay); static void attrd_cib_destroy_cb(gpointer user_data) { cib_t *cib = user_data; cib->cmds->signoff(cib); if (attrd_shutting_down(false)) { crm_info("Disconnected from the CIB manager"); } else { // @TODO This should trigger a reconnect, not a shutdown crm_crit("Lost connection to the CIB manager, shutting down"); attrd_exit_status = CRM_EX_DISCONNECT; attrd_shutdown(0); } } static void attrd_cib_updated_cb(const char *event, xmlNode *msg) { const xmlNode *patchset = NULL; const char *client_name = NULL; if (attrd_shutting_down(true)) { return; } if (cib__get_notify_patchset(msg, &patchset) != pcmk_rc_ok) { return; } if (cib__element_in_patchset(patchset, XML_CIB_TAG_ALERTS)) { mainloop_set_trigger(attrd_config_read); } if (!attrd_election_won()) { // Don't write attributes if we're not the writer return; } client_name = crm_element_value(msg, F_CIB_CLIENTNAME); if (!cib__client_triggers_refresh(client_name)) { // The CIB is still accurate return; } if (cib__element_in_patchset(patchset, XML_CIB_TAG_NODES) || cib__element_in_patchset(patchset, XML_CIB_TAG_STATUS)) { /* An unsafe client modified the nodes or status section. Write * transient attributes to ensure they're up-to-date in the CIB. */ if (client_name == NULL) { client_name = crm_element_value(msg, F_CIB_CLIENTID); } crm_notice("Updating all attributes after %s event triggered by %s", event, pcmk__s(client_name, "(unidentified client)")); attrd_write_attributes(attrd_write_all); } } int attrd_cib_connect(int max_retry) { static int attempts = 0; int rc = -ENOTCONN; the_cib = cib_new(); if (the_cib == NULL) { return -ENOTCONN; } do { if (attempts > 0) { sleep(attempts); } attempts++; crm_debug("Connection attempt %d to the CIB manager", attempts); rc = the_cib->cmds->signon(the_cib, T_ATTRD, cib_command); } while ((rc != pcmk_ok) && (attempts < max_retry)); if (rc != pcmk_ok) { crm_err("Connection to the CIB manager failed: %s " CRM_XS " rc=%d", pcmk_strerror(rc), rc); goto cleanup; } crm_debug("Connected to the CIB manager after %d attempts", attempts); rc = the_cib->cmds->set_connection_dnotify(the_cib, attrd_cib_destroy_cb); if (rc != pcmk_ok) { crm_err("Could not set disconnection callback"); goto cleanup; } rc = the_cib->cmds->add_notify_callback(the_cib, T_CIB_DIFF_NOTIFY, attrd_cib_updated_cb); if (rc != pcmk_ok) { crm_err("Could not set CIB notification callback"); goto cleanup; } return pcmk_ok; cleanup: cib__clean_up_connection(&the_cib); return -ENOTCONN; } void attrd_cib_disconnect(void) { CRM_CHECK(the_cib != NULL, return); the_cib->cmds->del_notify_callback(the_cib, T_CIB_DIFF_NOTIFY, attrd_cib_updated_cb); cib__clean_up_connection(&the_cib); } static void attrd_erase_cb(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { do_crm_log_unlikely(((rc != pcmk_ok)? LOG_NOTICE : LOG_DEBUG), "Cleared transient attributes: %s " CRM_XS " xpath=%s rc=%d", pcmk_strerror(rc), (char *) user_data, rc); } #define XPATH_TRANSIENT "//node_state[@uname='%s']/" XML_TAG_TRANSIENT_NODEATTRS /*! * \internal * \brief Wipe all transient attributes for this node from the CIB * * Clear any previous transient node attributes from the CIB. This is * normally done by the DC's controller when this node leaves the cluster, but * this handles the case where the node restarted so quickly that the * cluster layer didn't notice. * - * \todo If pacemaker-attrd respawns after crashing (see PCMK_respawned), + * \todo If pacemaker-attrd respawns after crashing (see PCMK_ENV_RESPAWNED), * ideally we'd skip this and sync our attributes from the writer. * However, currently we reject any values for us that the writer has, in * attrd_peer_update(). */ static void attrd_erase_attrs(void) { int call_id = 0; char *xpath = crm_strdup_printf(XPATH_TRANSIENT, attrd_cluster->uname); crm_info("Clearing transient attributes from CIB " CRM_XS " xpath=%s", xpath); call_id = the_cib->cmds->remove(the_cib, xpath, NULL, cib_xpath); the_cib->cmds->register_callback_full(the_cib, call_id, 120, FALSE, xpath, "attrd_erase_cb", attrd_erase_cb, free); } /*! * \internal * \brief Prepare the CIB after cluster is connected */ void attrd_cib_init(void) { // We have no attribute values in memory, wipe the CIB to match attrd_erase_attrs(); // Set a trigger for reading the CIB (for the alerts section) attrd_config_read = mainloop_add_trigger(G_PRIORITY_HIGH, attrd_read_options, NULL); // Always read the CIB at start-up mainloop_set_trigger(attrd_config_read); } static gboolean attribute_timer_cb(gpointer data) { attribute_t *a = data; crm_trace("Dampen interval expired for %s", a->id); attrd_write_or_elect_attribute(a); return FALSE; } static void attrd_cib_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { int level = LOG_ERR; GHashTableIter iter; const char *peer = NULL; attribute_value_t *v = NULL; char *name = user_data; attribute_t *a = g_hash_table_lookup(attributes, name); if(a == NULL) { crm_info("Attribute %s no longer exists", name); return; } a->update = 0; if (rc == pcmk_ok && call_id < 0) { rc = call_id; } switch (rc) { case pcmk_ok: level = LOG_INFO; last_cib_op_done = call_id; if (a->timer && !a->timeout_ms) { // Remove temporary dampening for failed writes mainloop_timer_del(a->timer); a->timer = NULL; } break; case -pcmk_err_diff_failed: /* When an attr changes while the CIB is syncing */ case -ETIME: /* When an attr changes while there is a DC election */ case -ENXIO: /* When an attr changes while the CIB is syncing a * newer config from a node that just came up */ level = LOG_WARNING; break; } do_crm_log(level, "CIB update %d result for %s: %s " CRM_XS " rc=%d", call_id, a->id, pcmk_strerror(rc), rc); g_hash_table_iter_init(&iter, a->values); while (g_hash_table_iter_next(&iter, (gpointer *) & peer, (gpointer *) & v)) { do_crm_log(level, "* %s[%s]=%s", a->id, peer, v->requested); free(v->requested); v->requested = NULL; if (rc != pcmk_ok) { a->changed = true; /* Attempt write out again */ } } if (a->changed && attrd_election_won()) { if (rc == pcmk_ok) { /* We deferred a write of a new update because this update was in * progress. Write out the new value without additional delay. */ write_attribute(a, false); /* We're re-attempting a write because the original failed; delay * the next attempt so we don't potentially flood the CIB manager * and logs with a zillion attempts per second. * * @TODO We could elect a new writer instead. However, we'd have to * somehow downgrade our vote, and we'd still need something like this * if all peers similarly fail to write this attribute (which may * indicate a corrupted attribute entry rather than a CIB issue). */ } else if (a->timer) { // Attribute has a dampening value, so use that as delay if (!mainloop_timer_running(a->timer)) { crm_trace("Delayed re-attempted write for %s by %s", name, pcmk__readable_interval(a->timeout_ms)); mainloop_timer_start(a->timer); } } else { /* Set a temporary dampening of 2 seconds (timer will continue * to exist until the attribute's dampening gets set or the * write succeeds). */ a->timer = attrd_add_timer(a->id, 2000, a); mainloop_timer_start(a->timer); } } } /*! * \internal * \brief Add a set-attribute update request to the current CIB transaction * * \param[in] attr Attribute to update * \param[in] attr_id ID of attribute to update * \param[in] node_id ID of node for which to update attribute value * \param[in] set_id ID of attribute set * \param[in] value New value for attribute * * \return Standard Pacemaker return code */ static int add_set_attr_update(const attribute_t *attr, const char *attr_id, const char *node_id, const char *set_id, const char *value) { xmlNode *update = create_xml_node(NULL, XML_CIB_TAG_STATE); xmlNode *child = update; int rc = ENOMEM; if (child == NULL) { goto done; } crm_xml_add(child, XML_ATTR_ID, node_id); child = create_xml_node(child, XML_TAG_TRANSIENT_NODEATTRS); if (child == NULL) { goto done; } crm_xml_add(child, XML_ATTR_ID, node_id); child = create_xml_node(child, attr->set_type); if (child == NULL) { goto done; } crm_xml_add(child, XML_ATTR_ID, set_id); child = create_xml_node(child, XML_CIB_TAG_NVPAIR); if (child == NULL) { goto done; } crm_xml_add(child, XML_ATTR_ID, attr_id); crm_xml_add(child, XML_NVPAIR_ATTR_NAME, attr->id); crm_xml_add(child, XML_NVPAIR_ATTR_VALUE, value); rc = the_cib->cmds->modify(the_cib, XML_CIB_TAG_STATUS, update, cib_can_create|cib_transaction); rc = pcmk_legacy2rc(rc); done: free_xml(update); return rc; } /*! * \internal * \brief Add an unset-attribute update request to the current CIB transaction * * \param[in] attr Attribute to update * \param[in] attr_id ID of attribute to update * \param[in] node_id ID of node for which to update attribute value * \param[in] set_id ID of attribute set * * \return Standard Pacemaker return code */ static int add_unset_attr_update(const attribute_t *attr, const char *attr_id, const char *node_id, const char *set_id) { char *xpath = crm_strdup_printf("/" XML_CIB_TAG_STATUS "/" XML_CIB_TAG_STATE "[@" XML_ATTR_ID "='%s']" "/" XML_TAG_TRANSIENT_NODEATTRS "[@" XML_ATTR_ID "='%s']" "/%s[@" XML_ATTR_ID "='%s']" "/" XML_CIB_TAG_NVPAIR "[@" XML_ATTR_ID "='%s' " "and @" XML_NVPAIR_ATTR_NAME "='%s']", node_id, node_id, attr->set_type, set_id, attr_id, attr->id); int rc = the_cib->cmds->remove(the_cib, xpath, NULL, cib_xpath|cib_transaction); free(xpath); return pcmk_legacy2rc(rc); } /*! * \internal * \brief Add an attribute update request to the current CIB transaction * * \param[in] attr Attribute to update * \param[in] value New value for attribute * \param[in] node_id ID of node for which to update attribute value * * \return Standard Pacemaker return code */ static int add_attr_update(const attribute_t *attr, const char *value, const char *node_id) { char *set_id = NULL; char *attr_id = NULL; int rc = pcmk_rc_ok; if (attr->set_id != NULL) { pcmk__str_update(&set_id, attr->set_id); } else { set_id = crm_strdup_printf("%s-%s", XML_CIB_TAG_STATUS, node_id); } crm_xml_sanitize_id(set_id); if (attr->uuid != NULL) { pcmk__str_update(&attr_id, attr->uuid); } else { attr_id = crm_strdup_printf("%s-%s", set_id, attr->id); } crm_xml_sanitize_id(attr_id); if (value != NULL) { rc = add_set_attr_update(attr, attr_id, node_id, set_id, value); } else { rc = add_unset_attr_update(attr, attr_id, node_id, set_id); } free(set_id); free(attr_id); return rc; } static void send_alert_attributes_value(attribute_t *a, GHashTable *t) { int rc = 0; attribute_value_t *at = NULL; GHashTableIter vIter; g_hash_table_iter_init(&vIter, t); while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & at)) { rc = attrd_send_attribute_alert(at->nodename, at->nodeid, a->id, at->current); crm_trace("Sent alerts for %s[%s]=%s: nodeid=%d rc=%d", a->id, at->nodename, at->current, at->nodeid, rc); } } static void set_alert_attribute_value(GHashTable *t, attribute_value_t *v) { attribute_value_t *a_v = NULL; a_v = calloc(1, sizeof(attribute_value_t)); CRM_ASSERT(a_v != NULL); a_v->nodeid = v->nodeid; a_v->nodename = strdup(v->nodename); pcmk__str_update(&a_v->current, v->current); g_hash_table_replace(t, a_v->nodename, a_v); } mainloop_timer_t * attrd_add_timer(const char *id, int timeout_ms, attribute_t *attr) { return mainloop_timer_add(id, timeout_ms, FALSE, attribute_timer_cb, attr); } /*! * \internal * \brief Write an attribute's values to the CIB if appropriate * * \param[in,out] a Attribute to write * \param[in] ignore_delay If true, write attribute now regardless of any * configured delay */ static void write_attribute(attribute_t *a, bool ignore_delay) { int private_updates = 0, cib_updates = 0; attribute_value_t *v = NULL; GHashTableIter iter; GHashTable *alert_attribute_value = NULL; int rc = pcmk_ok; if (a == NULL) { return; } /* If this attribute will be written to the CIB ... */ if (!stand_alone && !a->is_private) { /* Defer the write if now's not a good time */ if (a->update && (a->update < last_cib_op_done)) { crm_info("Write out of '%s' continuing: update %d considered lost", a->id, a->update); a->update = 0; // Don't log this message again } else if (a->update) { crm_info("Write out of '%s' delayed: update %d in progress", a->id, a->update); goto done; } else if (mainloop_timer_running(a->timer)) { if (ignore_delay) { mainloop_timer_stop(a->timer); crm_debug("Overriding '%s' write delay", a->id); } else { crm_info("Delaying write of '%s'", a->id); goto done; } } // Initiate a transaction for all the peer value updates CRM_CHECK(the_cib != NULL, goto done); the_cib->cmds->set_user(the_cib, a->user); rc = the_cib->cmds->init_transaction(the_cib); if (rc != pcmk_ok) { crm_err("Failed to write %s (id %s, set %s): Could not initiate " "CIB transaction", a->id, pcmk__s(a->uuid, "n/a"), pcmk__s(a->set_id, "n/a")); goto done; } } /* Attribute will be written shortly, so clear changed flag */ a->changed = false; /* We will check all peers' uuids shortly, so initialize this to false */ a->unknown_peer_uuids = false; /* Attribute will be written shortly, so clear forced write flag */ a->force_write = FALSE; /* Make the table for the attribute trap */ alert_attribute_value = pcmk__strikey_table(NULL, attrd_free_attribute_value); /* Iterate over each peer value of this attribute */ g_hash_table_iter_init(&iter, a->values); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &v)) { crm_node_t *peer = crm_get_peer_full(v->nodeid, v->nodename, CRM_GET_PEER_ANY); /* If the value's peer info does not correspond to a peer, ignore it */ if (peer == NULL) { crm_notice("Cannot update %s[%s]=%s because peer not known", a->id, v->nodename, v->current); continue; } /* If we're just learning the peer's node id, remember it */ if (peer->id && (v->nodeid == 0)) { crm_trace("Learned ID %u for node %s", peer->id, v->nodename); v->nodeid = peer->id; } /* If this is a private attribute, no update needs to be sent */ if (stand_alone || a->is_private) { private_updates++; continue; } /* If the peer is found, but its uuid is unknown, defer write */ if (peer->uuid == NULL) { a->unknown_peer_uuids = true; crm_notice("Cannot update %s[%s]=%s because peer UUID not known " "(will retry if learned)", a->id, v->nodename, v->current); continue; } // Update this value as part of the CIB transaction we're building rc = add_attr_update(a, v->current, peer->uuid); if (rc != pcmk_rc_ok) { crm_err("Failed to update %s[%s]=%s (peer known as %s, UUID %s, " "ID %" PRIu32 "/%" PRIu32 "): %s", a->id, v->nodename, v->current, peer->uname, peer->uuid, peer->id, v->nodeid, pcmk_rc_str(rc)); continue; } crm_debug("Updating %s[%s]=%s (peer known as %s, UUID %s, ID " "%" PRIu32 "/%" PRIu32 ")", a->id, v->nodename, v->current, peer->uname, peer->uuid, peer->id, v->nodeid); cib_updates++; /* Preservation of the attribute to transmit alert */ set_alert_attribute_value(alert_attribute_value, v); free(v->requested); v->requested = NULL; if (v->current) { v->requested = strdup(v->current); } } if (private_updates) { crm_info("Processed %d private change%s for %s, id=%s, set=%s", private_updates, pcmk__plural_s(private_updates), a->id, pcmk__s(a->uuid, "n/a"), pcmk__s(a->set_id, "n/a")); } if (cib_updates > 0) { char *id = NULL; // Commit transaction a->update = the_cib->cmds->end_transaction(the_cib, true, cib_none); crm_info("Sent CIB request %d with %d change%s for %s (id %s, set %s)", a->update, cib_updates, pcmk__plural_s(cib_updates), a->id, pcmk__s(a->uuid, "n/a"), pcmk__s(a->set_id, "n/a")); pcmk__str_update(&id, a->id); if (the_cib->cmds->register_callback_full(the_cib, a->update, CIB_OP_TIMEOUT_S, FALSE, id, "attrd_cib_callback", attrd_cib_callback, free)) { // Transmit alert of the attribute send_alert_attributes_value(a, alert_attribute_value); } } done: // Discard transaction (if any) if (the_cib != NULL) { the_cib->cmds->end_transaction(the_cib, false, cib_none); the_cib->cmds->set_user(the_cib, NULL); } if (alert_attribute_value != NULL) { g_hash_table_destroy(alert_attribute_value); } } /*! * \internal * \brief Write out attributes * * \param[in] options Group of enum attrd_write_options */ void attrd_write_attributes(uint32_t options) { GHashTableIter iter; attribute_t *a = NULL; crm_debug("Writing out %s attributes", pcmk_is_set(options, attrd_write_all)? "all" : "changed"); g_hash_table_iter_init(&iter, attributes); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & a)) { if (!pcmk_is_set(options, attrd_write_all) && a->unknown_peer_uuids) { // Try writing this attribute again, in case peer ID was learned a->changed = true; } else if (a->force_write) { /* If the force_write flag is set, write the attribute. */ a->changed = true; } if (pcmk_is_set(options, attrd_write_all) || a->changed) { bool ignore_delay = pcmk_is_set(options, attrd_write_no_delay); if (a->force_write) { // Always ignore delay when forced write flag is set ignore_delay = true; } write_attribute(a, ignore_delay); } else { crm_trace("Skipping unchanged attribute %s", a->id); } } } void attrd_write_or_elect_attribute(attribute_t *a) { if (attrd_election_won()) { write_attribute(a, false); } else { attrd_start_election_if_needed(); } } diff --git a/daemons/controld/controld_cib.c b/daemons/controld/controld_cib.c index 822c491977..865e41f082 100644 --- a/daemons/controld/controld_cib.c +++ b/daemons/controld/controld_cib.c @@ -1,1069 +1,1070 @@ /* * Copyright 2004-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include /* sleep */ #include #include #include #include #include #include // Call ID of the most recent in-progress CIB resource update (or 0 if none) static int pending_rsc_update = 0; /*! * \internal * \brief Respond to a dropped CIB connection * * \param[in] user_data CIB connection that dropped */ static void handle_cib_disconnect(gpointer user_data) { CRM_LOG_ASSERT(user_data == controld_globals.cib_conn); controld_trigger_fsa(); controld_globals.cib_conn->state = cib_disconnected; if (pcmk_is_set(controld_globals.fsa_input_register, R_CIB_CONNECTED)) { // @TODO This should trigger a reconnect, not a shutdown crm_crit("Lost connection to the CIB manager, shutting down"); register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL); controld_clear_fsa_input_flags(R_CIB_CONNECTED); } else { // Expected crm_info("Disconnected from the CIB manager"); } } static void do_cib_updated(const char *event, xmlNode * msg) { const xmlNode *patchset = NULL; const char *client_name = NULL; crm_debug("Received CIB diff notification: DC=%s", pcmk__btoa(AM_I_DC)); if (cib__get_notify_patchset(msg, &patchset) != pcmk_rc_ok) { return; } if (cib__element_in_patchset(patchset, XML_CIB_TAG_ALERTS) || cib__element_in_patchset(patchset, XML_CIB_TAG_CRMCONFIG)) { controld_trigger_config(); } if (!AM_I_DC) { // We're not in control of the join sequence return; } client_name = crm_element_value(msg, F_CIB_CLIENTNAME); if (!cib__client_triggers_refresh(client_name)) { // The CIB is still accurate return; } if (cib__element_in_patchset(patchset, XML_CIB_TAG_NODES) || cib__element_in_patchset(patchset, XML_CIB_TAG_STATUS)) { /* An unsafe client modified the nodes or status section. Ensure the * node list is up-to-date, and start the join process again so we get * everyone's current resource history. */ if (client_name == NULL) { client_name = crm_element_value(msg, F_CIB_CLIENTID); } crm_notice("Populating nodes and starting an election after %s event " "triggered by %s", event, pcmk__s(client_name, "(unidentified client)")); populate_cib_nodes(node_update_quick|node_update_all, __func__); register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL); } } void controld_disconnect_cib_manager(void) { cib_t *cib_conn = controld_globals.cib_conn; CRM_ASSERT(cib_conn != NULL); crm_debug("Disconnecting from the CIB manager"); controld_clear_fsa_input_flags(R_CIB_CONNECTED); cib_conn->cmds->del_notify_callback(cib_conn, T_CIB_DIFF_NOTIFY, do_cib_updated); cib_free_callbacks(cib_conn); if (cib_conn->state != cib_disconnected) { cib_conn->cmds->set_secondary(cib_conn, cib_scope_local|cib_discard_reply); cib_conn->cmds->signoff(cib_conn); } } /* A_CIB_STOP, A_CIB_START, O_CIB_RESTART */ void do_cib_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { static int cib_retries = 0; cib_t *cib_conn = controld_globals.cib_conn; void (*dnotify_fn) (gpointer user_data) = handle_cib_disconnect; void (*update_cb) (const char *event, xmlNodePtr msg) = do_cib_updated; int rc = pcmk_ok; CRM_ASSERT(cib_conn != NULL); if (pcmk_is_set(action, A_CIB_STOP)) { if ((cib_conn->state != cib_disconnected) && (pending_rsc_update != 0)) { crm_info("Waiting for resource update %d to complete", pending_rsc_update); crmd_fsa_stall(FALSE); return; } controld_disconnect_cib_manager(); } if (!pcmk_is_set(action, A_CIB_START)) { return; } if (cur_state == S_STOPPING) { crm_err("Ignoring request to connect to the CIB manager after " "shutdown"); return; } rc = cib_conn->cmds->signon(cib_conn, CRM_SYSTEM_CRMD, cib_command_nonblocking); if (rc != pcmk_ok) { // A short wait that usually avoids stalling the FSA sleep(1); rc = cib_conn->cmds->signon(cib_conn, CRM_SYSTEM_CRMD, cib_command_nonblocking); } if (rc != pcmk_ok) { crm_info("Could not connect to the CIB manager: %s", pcmk_strerror(rc)); } else if (cib_conn->cmds->set_connection_dnotify(cib_conn, dnotify_fn) != pcmk_ok) { crm_err("Could not set dnotify callback"); } else if (cib_conn->cmds->add_notify_callback(cib_conn, T_CIB_DIFF_NOTIFY, update_cb) != pcmk_ok) { crm_err("Could not set CIB notification callback (update)"); } else { controld_set_fsa_input_flags(R_CIB_CONNECTED); cib_retries = 0; } if (!pcmk_is_set(controld_globals.fsa_input_register, R_CIB_CONNECTED)) { cib_retries++; if (cib_retries < 30) { crm_warn("Couldn't complete CIB registration %d times... " "pause and retry", cib_retries); controld_start_wait_timer(); crmd_fsa_stall(FALSE); } else { crm_err("Could not complete CIB registration %d times... " "hard error", cib_retries); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } } #define MIN_CIB_OP_TIMEOUT (30) /*! * \internal * \brief Get the timeout (in seconds) that should be used with CIB operations * * \return The maximum of 30 seconds, the value of the PCMK_cib_timeout * environment variable, or 10 seconds times one more than the number of * nodes in the cluster. */ unsigned int cib_op_timeout(void) { + // @COMPAT: Drop env_timeout at 3.0.0 static int env_timeout = -1; unsigned int calculated_timeout = 0; if (env_timeout == -1) { const char *env = pcmk__env_option(PCMK__ENV_CIB_TIMEOUT); pcmk__scan_min_int(env, &env_timeout, MIN_CIB_OP_TIMEOUT); crm_trace("Minimum CIB op timeout: %ds (environment: %s)", env_timeout, (env? env : "none")); } calculated_timeout = 1 + crm_active_peers(); if (crm_remote_peer_cache) { calculated_timeout += g_hash_table_size(crm_remote_peer_cache); } calculated_timeout *= 10; calculated_timeout = QB_MAX(calculated_timeout, env_timeout); crm_trace("Calculated timeout: %us", calculated_timeout); if (controld_globals.cib_conn) { controld_globals.cib_conn->call_timeout = calculated_timeout; } return calculated_timeout; } /*! * \internal * \brief Get CIB call options to use local scope if primary is unavailable * * \return CIB call options */ int crmd_cib_smart_opt(void) { int call_opt = cib_none; if ((controld_globals.fsa_state == S_ELECTION) || (controld_globals.fsa_state == S_PENDING)) { crm_info("Sending update to local CIB in state: %s", fsa_state2string(controld_globals.fsa_state)); cib__set_call_options(call_opt, "update", cib_scope_local); } return call_opt; } static void cib_delete_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { char *desc = user_data; if (rc == 0) { crm_debug("Deletion of %s (via CIB call %d) succeeded", desc, call_id); } else { crm_warn("Deletion of %s (via CIB call %d) failed: %s " CRM_XS " rc=%d", desc, call_id, pcmk_strerror(rc), rc); } } // Searches for various portions of node_state to delete // Match a particular node's node_state (takes node name 1x) #define XPATH_NODE_STATE "//" XML_CIB_TAG_STATE "[@" XML_ATTR_UNAME "='%s']" // Node's lrm section (name 1x) #define XPATH_NODE_LRM XPATH_NODE_STATE "/" XML_CIB_TAG_LRM /* Node's lrm_rsc_op entries and lrm_resource entries without unexpired lock * (name 2x, (seconds_since_epoch - XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT) 1x) */ #define XPATH_NODE_LRM_UNLOCKED XPATH_NODE_STATE "//" XML_LRM_TAG_RSC_OP \ "|" XPATH_NODE_STATE \ "//" XML_LRM_TAG_RESOURCE \ "[not(@" XML_CONFIG_ATTR_SHUTDOWN_LOCK ") " \ "or " XML_CONFIG_ATTR_SHUTDOWN_LOCK "<%lld]" // Node's transient_attributes section (name 1x) #define XPATH_NODE_ATTRS XPATH_NODE_STATE "/" XML_TAG_TRANSIENT_NODEATTRS // Everything under node_state (name 1x) #define XPATH_NODE_ALL XPATH_NODE_STATE "/*" /* Unlocked history + transient attributes * (name 2x, (seconds_since_epoch - XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT) 1x, * name 1x) */ #define XPATH_NODE_ALL_UNLOCKED XPATH_NODE_LRM_UNLOCKED "|" XPATH_NODE_ATTRS /*! * \internal * \brief Get the XPath and description of a node state section to be deleted * * \param[in] uname Desired node * \param[in] section Subsection of node_state to be deleted * \param[out] xpath Where to store XPath of \p section * \param[out] desc If not \c NULL, where to store description of \p section */ void controld_node_state_deletion_strings(const char *uname, enum controld_section_e section, char **xpath, char **desc) { const char *desc_pre = NULL; // Shutdown locks that started before this time are expired long long expire = (long long) time(NULL) - controld_globals.shutdown_lock_limit; switch (section) { case controld_section_lrm: *xpath = crm_strdup_printf(XPATH_NODE_LRM, uname); desc_pre = "resource history"; break; case controld_section_lrm_unlocked: *xpath = crm_strdup_printf(XPATH_NODE_LRM_UNLOCKED, uname, uname, expire); desc_pre = "resource history (other than shutdown locks)"; break; case controld_section_attrs: *xpath = crm_strdup_printf(XPATH_NODE_ATTRS, uname); desc_pre = "transient attributes"; break; case controld_section_all: *xpath = crm_strdup_printf(XPATH_NODE_ALL, uname); desc_pre = "all state"; break; case controld_section_all_unlocked: *xpath = crm_strdup_printf(XPATH_NODE_ALL_UNLOCKED, uname, uname, expire, uname); desc_pre = "all state (other than shutdown locks)"; break; default: // We called this function incorrectly CRM_ASSERT(false); break; } if (desc != NULL) { *desc = crm_strdup_printf("%s for node %s", desc_pre, uname); } } /*! * \internal * \brief Delete subsection of a node's CIB node_state * * \param[in] uname Desired node * \param[in] section Subsection of node_state to delete * \param[in] options CIB call options to use */ void controld_delete_node_state(const char *uname, enum controld_section_e section, int options) { cib_t *cib = controld_globals.cib_conn; char *xpath = NULL; char *desc = NULL; int cib_rc = pcmk_ok; CRM_ASSERT((uname != NULL) && (cib != NULL)); controld_node_state_deletion_strings(uname, section, &xpath, &desc); cib__set_call_options(options, "node state deletion", cib_xpath|cib_multiple); cib_rc = cib->cmds->remove(cib, xpath, NULL, options); fsa_register_cib_callback(cib_rc, desc, cib_delete_callback); crm_info("Deleting %s (via CIB call %d) " CRM_XS " xpath=%s", desc, cib_rc, xpath); // CIB library handles freeing desc free(xpath); } // Takes node name and resource ID #define XPATH_RESOURCE_HISTORY "//" XML_CIB_TAG_STATE \ "[@" XML_ATTR_UNAME "='%s']/" \ XML_CIB_TAG_LRM "/" XML_LRM_TAG_RESOURCES \ "/" XML_LRM_TAG_RESOURCE \ "[@" XML_ATTR_ID "='%s']" // @TODO could add "and @XML_CONFIG_ATTR_SHUTDOWN_LOCK" to limit to locks /*! * \internal * \brief Clear resource history from CIB for a given resource and node * * \param[in] rsc_id ID of resource to be cleared * \param[in] node Node whose resource history should be cleared * \param[in] user_name ACL user name to use * \param[in] call_options CIB call options * * \return Standard Pacemaker return code */ int controld_delete_resource_history(const char *rsc_id, const char *node, const char *user_name, int call_options) { char *desc = NULL; char *xpath = NULL; int rc = pcmk_rc_ok; cib_t *cib = controld_globals.cib_conn; CRM_CHECK((rsc_id != NULL) && (node != NULL), return EINVAL); desc = crm_strdup_printf("resource history for %s on %s", rsc_id, node); if (cib == NULL) { crm_err("Unable to clear %s: no CIB connection", desc); free(desc); return ENOTCONN; } // Ask CIB to delete the entry xpath = crm_strdup_printf(XPATH_RESOURCE_HISTORY, node, rsc_id); cib->cmds->set_user(cib, user_name); rc = cib->cmds->remove(cib, xpath, NULL, call_options|cib_xpath); cib->cmds->set_user(cib, NULL); if (rc < 0) { rc = pcmk_legacy2rc(rc); crm_err("Could not delete resource status of %s on %s%s%s: %s " CRM_XS " rc=%d", rsc_id, node, (user_name? " for user " : ""), (user_name? user_name : ""), pcmk_rc_str(rc), rc); free(desc); free(xpath); return rc; } if (pcmk_is_set(call_options, cib_sync_call)) { if (pcmk_is_set(call_options, cib_dryrun)) { crm_debug("Deletion of %s would succeed", desc); } else { crm_debug("Deletion of %s succeeded", desc); } free(desc); } else { crm_info("Clearing %s (via CIB call %d) " CRM_XS " xpath=%s", desc, rc, xpath); fsa_register_cib_callback(rc, desc, cib_delete_callback); // CIB library handles freeing desc } free(xpath); return pcmk_rc_ok; } /*! * \internal * \brief Build XML and string of parameters meeting some criteria, for digest * * \param[in] op Executor event with parameter table to use * \param[in] metadata Parsed meta-data for executed resource agent * \param[in] param_type Flag used for selection criteria * \param[out] result Will be set to newly created XML with selected * parameters as attributes * * \return Newly allocated space-separated string of parameter names * \note Selection criteria varies by param_type: for the restart digest, we * want parameters that are *not* marked reloadable (OCF 1.1) or that * *are* marked unique (pre-1.1), for both string and XML results; for the * secure digest, we want parameters that *are* marked private for the * string, but parameters that are *not* marked private for the XML. * \note It is the caller's responsibility to free the string return value with * \p g_string_free() and the XML result with \p free_xml(). */ static GString * build_parameter_list(const lrmd_event_data_t *op, const struct ra_metadata_s *metadata, enum ra_param_flags_e param_type, xmlNode **result) { GString *list = NULL; *result = create_xml_node(NULL, XML_TAG_PARAMS); /* Consider all parameters only except private ones to be consistent with * what scheduler does with calculate_secure_digest(). */ if (param_type == ra_param_private && compare_version(controld_globals.dc_version, "3.16.0") >= 0) { g_hash_table_foreach(op->params, hash2field, *result); pcmk__filter_op_for_digest(*result); } for (GList *iter = metadata->ra_params; iter != NULL; iter = iter->next) { struct ra_param_s *param = (struct ra_param_s *) iter->data; bool accept_for_list = false; bool accept_for_xml = false; switch (param_type) { case ra_param_reloadable: accept_for_list = !pcmk_is_set(param->rap_flags, param_type); accept_for_xml = accept_for_list; break; case ra_param_unique: accept_for_list = pcmk_is_set(param->rap_flags, param_type); accept_for_xml = accept_for_list; break; case ra_param_private: accept_for_list = pcmk_is_set(param->rap_flags, param_type); accept_for_xml = !accept_for_list; break; } if (accept_for_list) { crm_trace("Attr %s is %s", param->rap_name, ra_param_flag2text(param_type)); if (list == NULL) { // We will later search for " WORD ", so start list with a space pcmk__add_word(&list, 256, " "); } pcmk__add_word(&list, 0, param->rap_name); } else { crm_trace("Rejecting %s for %s", param->rap_name, ra_param_flag2text(param_type)); } if (accept_for_xml) { const char *v = g_hash_table_lookup(op->params, param->rap_name); if (v != NULL) { crm_trace("Adding attr %s=%s to the xml result", param->rap_name, v); crm_xml_add(*result, param->rap_name, v); } } else { crm_trace("Removing attr %s from the xml result", param->rap_name); xml_remove_prop(*result, param->rap_name); } } if (list != NULL) { // We will later search for " WORD ", so end list with a space pcmk__add_word(&list, 0, " "); } return list; } static void append_restart_list(lrmd_event_data_t *op, struct ra_metadata_s *metadata, xmlNode *update, const char *version) { GString *list = NULL; char *digest = NULL; xmlNode *restart = NULL; CRM_LOG_ASSERT(op->params != NULL); if (op->interval_ms > 0) { /* monitors are not reloadable */ return; } if (pcmk_is_set(metadata->ra_flags, ra_supports_reload_agent)) { // Add parameters not marked reloadable to the "op-force-restart" list list = build_parameter_list(op, metadata, ra_param_reloadable, &restart); } else if (pcmk_is_set(metadata->ra_flags, ra_supports_legacy_reload)) { /* @COMPAT pre-OCF-1.1 resource agents * * Before OCF 1.1, Pacemaker abused "unique=0" to indicate * reloadability. Add any parameters with unique="1" to the * "op-force-restart" list. */ list = build_parameter_list(op, metadata, ra_param_unique, &restart); } else { // Resource does not support agent reloads return; } digest = calculate_operation_digest(restart, version); /* Add "op-force-restart" and "op-restart-digest" to indicate the resource supports reload, * no matter if it actually supports any parameters with unique="1"). */ crm_xml_add(update, XML_LRM_ATTR_OP_RESTART, (list == NULL)? "" : (const char *) list->str); crm_xml_add(update, XML_LRM_ATTR_RESTART_DIGEST, digest); if ((list != NULL) && (list->len > 0)) { crm_trace("%s: %s, %s", op->rsc_id, digest, (const char *) list->str); } else { crm_trace("%s: %s", op->rsc_id, digest); } if (list != NULL) { g_string_free(list, TRUE); } free_xml(restart); free(digest); } static void append_secure_list(lrmd_event_data_t *op, struct ra_metadata_s *metadata, xmlNode *update, const char *version) { GString *list = NULL; char *digest = NULL; xmlNode *secure = NULL; CRM_LOG_ASSERT(op->params != NULL); /* * To keep XML_LRM_ATTR_OP_SECURE short, we want it to contain the * secure parameters but XML_LRM_ATTR_SECURE_DIGEST to be based on * the insecure ones */ list = build_parameter_list(op, metadata, ra_param_private, &secure); if (list != NULL) { digest = calculate_operation_digest(secure, version); crm_xml_add(update, XML_LRM_ATTR_OP_SECURE, (const char *) list->str); crm_xml_add(update, XML_LRM_ATTR_SECURE_DIGEST, digest); crm_trace("%s: %s, %s", op->rsc_id, digest, (const char *) list->str); g_string_free(list, TRUE); } else { crm_trace("%s: no secure parameters", op->rsc_id); } free_xml(secure); free(digest); } /*! * \internal * \brief Create XML for a resource history entry * * \param[in] func Function name of caller * \param[in,out] parent XML to add entry to * \param[in] rsc Affected resource * \param[in,out] op Action to add an entry for (or NULL to do nothing) * \param[in] node_name Node where action occurred */ void controld_add_resource_history_xml_as(const char *func, xmlNode *parent, const lrmd_rsc_info_t *rsc, lrmd_event_data_t *op, const char *node_name) { int target_rc = 0; xmlNode *xml_op = NULL; struct ra_metadata_s *metadata = NULL; const char *caller_version = NULL; lrm_state_t *lrm_state = NULL; if (op == NULL) { return; } target_rc = rsc_op_expected_rc(op); caller_version = g_hash_table_lookup(op->params, XML_ATTR_CRM_VERSION); CRM_CHECK(caller_version != NULL, caller_version = CRM_FEATURE_SET); xml_op = pcmk__create_history_xml(parent, op, caller_version, target_rc, controld_globals.our_nodename, func); if (xml_op == NULL) { return; } if ((rsc == NULL) || (op->params == NULL) || !crm_op_needs_metadata(rsc->standard, op->op_type)) { crm_trace("No digests needed for %s action on %s (params=%p rsc=%p)", op->op_type, op->rsc_id, op->params, rsc); return; } lrm_state = lrm_state_find(node_name); if (lrm_state == NULL) { crm_warn("Cannot calculate digests for operation " PCMK__OP_FMT " because we have no connection to executor for %s", op->rsc_id, op->op_type, op->interval_ms, node_name); return; } /* Ideally the metadata is cached, and the agent is just a fallback. * * @TODO Go through all callers and ensure they get metadata asynchronously * first. */ metadata = controld_get_rsc_metadata(lrm_state, rsc, controld_metadata_from_agent |controld_metadata_from_cache); if (metadata == NULL) { return; } crm_trace("Including additional digests for %s:%s:%s", rsc->standard, rsc->provider, rsc->type); append_restart_list(op, metadata, xml_op, caller_version); append_secure_list(op, metadata, xml_op, caller_version); return; } /*! * \internal * \brief Record an action as pending in the CIB, if appropriate * * \param[in] node_name Node where the action is pending * \param[in] rsc Resource that action is for * \param[in,out] op Pending action * * \return true if action was recorded in CIB, otherwise false */ bool controld_record_pending_op(const char *node_name, const lrmd_rsc_info_t *rsc, lrmd_event_data_t *op) { const char *record_pending = NULL; CRM_CHECK((node_name != NULL) && (rsc != NULL) && (op != NULL), return false); // Never record certain operation types as pending if ((op->op_type == NULL) || (op->params == NULL) || !controld_action_is_recordable(op->op_type)) { return false; } // Check action's record-pending meta-attribute (defaults to true) record_pending = crm_meta_value(op->params, XML_OP_ATTR_PENDING); if ((record_pending != NULL) && !crm_is_true(record_pending)) { return false; } op->call_id = -1; op->t_run = time(NULL); op->t_rcchange = op->t_run; lrmd__set_result(op, PCMK_OCF_UNKNOWN, PCMK_EXEC_PENDING, NULL); crm_debug("Recording pending %s-interval %s for %s on %s in the CIB", pcmk__readable_interval(op->interval_ms), op->op_type, op->rsc_id, node_name); controld_update_resource_history(node_name, rsc, op, 0); return true; } static void cib_rsc_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { switch (rc) { case pcmk_ok: case -pcmk_err_diff_failed: case -pcmk_err_diff_resync: crm_trace("Resource history update completed (call=%d rc=%d)", call_id, rc); break; default: if (call_id > 0) { crm_warn("Resource history update %d failed: %s " CRM_XS " rc=%d", call_id, pcmk_strerror(rc), rc); } else { crm_warn("Resource history update failed: %s " CRM_XS " rc=%d", pcmk_strerror(rc), rc); } } if (call_id == pending_rsc_update) { pending_rsc_update = 0; controld_trigger_fsa(); } } /* Only successful stops, and probes that found the resource inactive, get locks * recorded in the history. This ensures the resource stays locked to the node * until it is active there again after the node comes back up. */ static bool should_preserve_lock(lrmd_event_data_t *op) { if (!pcmk_is_set(controld_globals.flags, controld_shutdown_lock_enabled)) { return false; } if (!strcmp(op->op_type, PCMK_ACTION_STOP) && (op->rc == PCMK_OCF_OK)) { return true; } if (!strcmp(op->op_type, PCMK_ACTION_MONITOR) && (op->rc == PCMK_OCF_NOT_RUNNING)) { return true; } return false; } /*! * \internal * \brief Request a CIB update * * \param[in] section Section of CIB to update * \param[in] data New XML of CIB section to update * \param[in] options CIB call options * \param[in] callback If not \c NULL, set this as the operation callback * * \return Standard Pacemaker return code * * \note If \p callback is \p cib_rsc_callback(), the CIB update's call ID is * stored in \p pending_rsc_update on success. */ int controld_update_cib(const char *section, xmlNode *data, int options, void (*callback)(xmlNode *, int, int, xmlNode *, void *)) { cib_t *cib = controld_globals.cib_conn; int cib_rc = -ENOTCONN; CRM_ASSERT(data != NULL); if (cib != NULL) { cib_rc = cib->cmds->modify(cib, section, data, options); if (cib_rc >= 0) { crm_debug("Submitted CIB update %d for %s section", cib_rc, section); } } if (callback == NULL) { if (cib_rc < 0) { crm_err("Failed to update CIB %s section: %s", section, pcmk_rc_str(pcmk_legacy2rc(cib_rc))); } } else { if ((cib_rc >= 0) && (callback == cib_rsc_callback)) { /* Checking for a particular callback is a little hacky, but it * didn't seem worth adding an output argument for cib_rc for just * one use case. */ pending_rsc_update = cib_rc; } fsa_register_cib_callback(cib_rc, NULL, callback); } return (cib_rc >= 0)? pcmk_rc_ok : pcmk_legacy2rc(cib_rc); } /*! * \internal * \brief Update resource history entry in CIB * * \param[in] node_name Node where action occurred * \param[in] rsc Resource that action is for * \param[in,out] op Action to record * \param[in] lock_time If nonzero, when resource was locked to node * * \note On success, the CIB update's call ID will be stored in * pending_rsc_update. */ void controld_update_resource_history(const char *node_name, const lrmd_rsc_info_t *rsc, lrmd_event_data_t *op, time_t lock_time) { xmlNode *update = NULL; xmlNode *xml = NULL; int call_opt = crmd_cib_smart_opt(); const char *node_id = NULL; const char *container = NULL; CRM_CHECK((node_name != NULL) && (op != NULL), return); if (rsc == NULL) { crm_warn("Resource %s no longer exists in the executor", op->rsc_id); controld_ack_event_directly(NULL, NULL, rsc, op, op->rsc_id); return; } // update = create_xml_node(NULL, XML_CIB_TAG_STATUS); // xml = create_xml_node(update, XML_CIB_TAG_STATE); if (pcmk__str_eq(node_name, controld_globals.our_nodename, pcmk__str_casei)) { node_id = controld_globals.our_uuid; } else { node_id = node_name; pcmk__xe_set_bool_attr(xml, XML_NODE_IS_REMOTE, true); } crm_xml_add(xml, XML_ATTR_ID, node_id); crm_xml_add(xml, XML_ATTR_UNAME, node_name); crm_xml_add(xml, XML_ATTR_ORIGIN, __func__); // xml = create_xml_node(xml, XML_CIB_TAG_LRM); crm_xml_add(xml, XML_ATTR_ID, node_id); // xml = create_xml_node(xml, XML_LRM_TAG_RESOURCES); // xml = create_xml_node(xml, XML_LRM_TAG_RESOURCE); crm_xml_add(xml, XML_ATTR_ID, op->rsc_id); crm_xml_add(xml, XML_AGENT_ATTR_CLASS, rsc->standard); crm_xml_add(xml, XML_AGENT_ATTR_PROVIDER, rsc->provider); crm_xml_add(xml, XML_ATTR_TYPE, rsc->type); if (lock_time != 0) { /* Actions on a locked resource should either preserve the lock by * recording it with the action result, or clear it. */ if (!should_preserve_lock(op)) { lock_time = 0; } crm_xml_add_ll(xml, XML_CONFIG_ATTR_SHUTDOWN_LOCK, (long long) lock_time); } if (op->params != NULL) { container = g_hash_table_lookup(op->params, CRM_META "_" XML_RSC_ATTR_CONTAINER); if (container != NULL) { crm_trace("Resource %s is a part of container resource %s", op->rsc_id, container); crm_xml_add(xml, XML_RSC_ATTR_CONTAINER, container); } } // (possibly more than one) controld_add_resource_history_xml(xml, rsc, op, node_name); /* Update CIB asynchronously. Even if it fails, the resource state should be * discovered during the next election. Worst case, the node is wrongly * fenced for running a resource it isn't. */ crm_log_xml_trace(update, __func__); controld_update_cib(XML_CIB_TAG_STATUS, update, call_opt, cib_rsc_callback); free_xml(update); } /*! * \internal * \brief Erase an LRM history entry from the CIB, given the operation data * * \param[in] op Operation whose history should be deleted */ void controld_delete_action_history(const lrmd_event_data_t *op) { xmlNode *xml_top = NULL; CRM_CHECK(op != NULL, return); xml_top = create_xml_node(NULL, XML_LRM_TAG_RSC_OP); crm_xml_add_int(xml_top, XML_LRM_ATTR_CALLID, op->call_id); crm_xml_add(xml_top, XML_ATTR_TRANSITION_KEY, op->user_data); if (op->interval_ms > 0) { char *op_id = pcmk__op_key(op->rsc_id, op->op_type, op->interval_ms); /* Avoid deleting last_failure too (if it was a result of this recurring op failing) */ crm_xml_add(xml_top, XML_ATTR_ID, op_id); free(op_id); } crm_debug("Erasing resource operation history for " PCMK__OP_FMT " (call=%d)", op->rsc_id, op->op_type, op->interval_ms, op->call_id); controld_globals.cib_conn->cmds->remove(controld_globals.cib_conn, XML_CIB_TAG_STATUS, xml_top, cib_none); crm_log_xml_trace(xml_top, "op:cancel"); free_xml(xml_top); } /* Define xpath to find LRM resource history entry by node and resource */ #define XPATH_HISTORY \ "/" XML_TAG_CIB "/" XML_CIB_TAG_STATUS \ "/" XML_CIB_TAG_STATE "[@" XML_ATTR_UNAME "='%s']" \ "/" XML_CIB_TAG_LRM "/" XML_LRM_TAG_RESOURCES \ "/" XML_LRM_TAG_RESOURCE "[@" XML_ATTR_ID "='%s']" \ "/" XML_LRM_TAG_RSC_OP /* ... and also by operation key */ #define XPATH_HISTORY_ID XPATH_HISTORY \ "[@" XML_ATTR_ID "='%s']" /* ... and also by operation key and operation call ID */ #define XPATH_HISTORY_CALL XPATH_HISTORY \ "[@" XML_ATTR_ID "='%s' and @" XML_LRM_ATTR_CALLID "='%d']" /* ... and also by operation key and original operation key */ #define XPATH_HISTORY_ORIG XPATH_HISTORY \ "[@" XML_ATTR_ID "='%s' and @" XML_LRM_ATTR_TASK_KEY "='%s']" /*! * \internal * \brief Delete a last_failure resource history entry from the CIB * * \param[in] rsc_id Name of resource to clear history for * \param[in] node Name of node to clear history for * \param[in] action If specified, delete only if this was failed action * \param[in] interval_ms If \p action is specified, it has this interval */ void controld_cib_delete_last_failure(const char *rsc_id, const char *node, const char *action, guint interval_ms) { char *xpath = NULL; char *last_failure_key = NULL; CRM_CHECK((rsc_id != NULL) && (node != NULL), return); // Generate XPath to match desired entry last_failure_key = pcmk__op_key(rsc_id, "last_failure", 0); if (action == NULL) { xpath = crm_strdup_printf(XPATH_HISTORY_ID, node, rsc_id, last_failure_key); } else { char *action_key = pcmk__op_key(rsc_id, action, interval_ms); xpath = crm_strdup_printf(XPATH_HISTORY_ORIG, node, rsc_id, last_failure_key, action_key); free(action_key); } free(last_failure_key); controld_globals.cib_conn->cmds->remove(controld_globals.cib_conn, xpath, NULL, cib_xpath); free(xpath); } /*! * \internal * \brief Delete resource history entry from the CIB, given operation key * * \param[in] rsc_id Name of resource to clear history for * \param[in] node Name of node to clear history for * \param[in] key Operation key of operation to clear history for * \param[in] call_id If specified, delete entry only if it has this call ID */ void controld_delete_action_history_by_key(const char *rsc_id, const char *node, const char *key, int call_id) { char *xpath = NULL; CRM_CHECK((rsc_id != NULL) && (node != NULL) && (key != NULL), return); if (call_id > 0) { xpath = crm_strdup_printf(XPATH_HISTORY_CALL, node, rsc_id, key, call_id); } else { xpath = crm_strdup_printf(XPATH_HISTORY_ID, node, rsc_id, key); } controld_globals.cib_conn->cmds->remove(controld_globals.cib_conn, xpath, NULL, cib_xpath); free(xpath); } diff --git a/daemons/execd/remoted_pidone.c b/daemons/execd/remoted_pidone.c index 96fee90c07..08271bfb2a 100644 --- a/daemons/execd/remoted_pidone.c +++ b/daemons/execd/remoted_pidone.c @@ -1,301 +1,302 @@ /* * Copyright 2017-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include "pacemaker-execd.h" static pid_t main_pid = 0; static void sigdone(void) { exit(CRM_EX_OK); } static void sigreap(void) { pid_t pid = 0; int status; do { /* * Opinions seem to differ as to what to put here: * -1, any child process * 0, any child process whose process group ID is equal to that of the calling process */ pid = waitpid(-1, &status, WNOHANG); if (pid == main_pid) { /* Exit when pacemaker-remote exits and use the same return code */ if (WIFEXITED(status)) { exit(WEXITSTATUS(status)); } exit(CRM_EX_ERROR); } } while (pid > 0); } static struct { int sig; void (*handler)(void); } sigmap[] = { { SIGCHLD, sigreap }, { SIGINT, sigdone }, }; /*! * \internal * \brief Check a line of text for a valid environment variable name * * \param[in] line Text to check * \param[out] first First character of valid name if found, NULL otherwise * \param[out] last Last character of valid name if found, NULL otherwise * * \return TRUE if valid name found, FALSE otherwise * \note It's reasonable to impose limitations on environment variable names * beyond what C or setenv() does: We only allow names that contain only * [a-zA-Z0-9_] characters and do not start with a digit. */ static bool find_env_var_name(char *line, char **first, char **last) { // Skip leading whitespace *first = line; while (isspace(**first)) { ++*first; } if (isalpha(**first) || (**first == '_')) { // Valid first character *last = *first; while (isalnum(*(*last + 1)) || (*(*last + 1) == '_')) { ++*last; } return TRUE; } *first = *last = NULL; return FALSE; } static void load_env_vars(const char *filename) { /* We haven't forked or initialized logging yet, so don't leave any file * descriptors open, and don't log -- silently ignore errors. */ FILE *fp = fopen(filename, "r"); if (fp != NULL) { char line[LINE_MAX] = { '\0', }; while (fgets(line, LINE_MAX, fp) != NULL) { char *name = NULL; char *end = NULL; char *value = NULL; char *quote = NULL; // Look for valid name immediately followed by equals sign if (find_env_var_name(line, &name, &end) && (*++end == '=')) { // Null-terminate name, and advance beyond equals sign *end++ = '\0'; // Check whether value is quoted if ((*end == '\'') || (*end == '"')) { quote = end++; } value = end; if (quote) { /* Value is remaining characters up to next non-backslashed * matching quote character. */ while (((*end != *quote) || (*(end - 1) == '\\')) && (*end != '\0')) { end++; } if (*end == *quote) { // Null-terminate value, and advance beyond close quote *end++ = '\0'; } else { // Matching closing quote wasn't found value = NULL; } } else { /* Value is remaining characters up to next non-backslashed * whitespace. */ while ((!isspace(*end) || (*(end - 1) == '\\')) && (*end != '\0')) { ++end; } if (end == (line + LINE_MAX - 1)) { // Line was too long value = NULL; } // Do NOT null-terminate value (yet) } /* We have a valid name and value, and end is now the character * after the closing quote or the first whitespace after the * unquoted value. Make sure the rest of the line is just * whitespace or a comment. */ if (value) { char *value_end = end; while (isspace(*end) && (*end != '\n')) { ++end; } if ((*end == '\n') || (*end == '#')) { if (quote == NULL) { // Now we can null-terminate an unquoted value *value_end = '\0'; } // Don't overwrite (bundle options take precedence) setenv(name, value, 0); } else { value = NULL; } } } if ((value == NULL) && (strchr(line, '\n') == NULL)) { // Eat remainder of line beyond LINE_MAX if (fscanf(fp, "%*[^\n]\n") == EOF) { value = NULL; // Don't care, make compiler happy } } } fclose(fp); } } void remoted_spawn_pidone(int argc, char **argv, char **envp) { sigset_t set; /* This environment variable exists for two purposes: * - For testing, setting it to "full" enables full PID 1 behavior even * when PID is not 1 * - Setting to "vars" enables just the loading of environment variables * from /etc/pacemaker/pcmk-init.env, which could be useful for testing or * containers with a custom PID 1 script that launches pacemaker-remoted. */ - const char *pid1 = "full"; + const char *pid1 = "default"; if (getpid() != 1) { pid1 = pcmk__env_option(PCMK__ENV_REMOTE_PID1); - if (pid1 == NULL) { + if (!pcmk__str_any_of(pid1, "full", "vars", NULL)) { + // Default, unset, or invalid return; } } /* When a container is launched, it may be given specific environment * variables, which for Pacemaker bundles are given in the bundle * configuration. However, that does not allow for host-specific values. * To allow for that, look for a special file containing a shell-like syntax * of name/value pairs, and export those into the environment. */ load_env_vars("/etc/pacemaker/pcmk-init.env"); - if (strcmp(pid1, "full")) { + if (strcmp(pid1, "vars") == 0) { return; } /* Containers can be expected to have /var/log, but they may not have * /var/log/pacemaker, so use a different default if no value has been * explicitly configured in the container's environment. */ if (pcmk__env_option(PCMK__ENV_LOGFILE) == NULL) { pcmk__set_env_option(PCMK__ENV_LOGFILE, "/var/log/pcmk-init.log", true); } sigfillset(&set); sigprocmask(SIG_BLOCK, &set, 0); main_pid = fork(); switch (main_pid) { case 0: sigprocmask(SIG_UNBLOCK, &set, NULL); setsid(); setpgid(0, 0); // Child remains as pacemaker-remoted return; case -1: crm_err("fork failed: %s", pcmk_rc_str(errno)); } /* Parent becomes the reaper of zombie processes */ /* Safe to initialize logging now if needed */ # ifdef HAVE_PROGNAME /* Differentiate ourselves in the 'ps' output */ { char *p; int i, maxlen; char *LastArgv = NULL; const char *name = "pcmk-init"; for (i = 0; i < argc; i++) { if (!i || (LastArgv + 1 == argv[i])) LastArgv = argv[i] + strlen(argv[i]); } for (i = 0; envp[i] != NULL; i++) { if ((LastArgv + 1) == envp[i]) { LastArgv = envp[i] + strlen(envp[i]); } } maxlen = (LastArgv - argv[0]) - 2; i = strlen(name); /* We can overwrite individual argv[] arguments */ snprintf(argv[0], maxlen, "%s", name); /* Now zero out everything else */ p = &argv[0][i]; while (p < LastArgv) { *p++ = '\0'; } argv[1] = NULL; } # endif // HAVE_PROGNAME while (1) { int sig; size_t i; sigwait(&set, &sig); for (i = 0; i < PCMK__NELEM(sigmap); i++) { if (sigmap[i].sig == sig) { sigmap[i].handler(); break; } } } } diff --git a/daemons/pacemakerd/pcmkd_subdaemons.c b/daemons/pacemakerd/pcmkd_subdaemons.c index e06b8eb9c8..21e432eeeb 100644 --- a/daemons/pacemakerd/pcmkd_subdaemons.c +++ b/daemons/pacemakerd/pcmkd_subdaemons.c @@ -1,890 +1,891 @@ /* * Copyright 2010-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include "pacemakerd.h" #include #include #include #include #include #include #include #include #include #include #include typedef struct pcmk_child_s { pid_t pid; int respawn_count; bool respawn; const char *name; const char *uid; const char *command; const char *endpoint; /* IPC server name */ bool needs_cluster; int check_count; /* Anything below here will be dynamically initialized */ bool needs_retry; bool active_before_startup; } pcmk_child_t; #define PCMK_PROCESS_CHECK_INTERVAL 1 #define PCMK_PROCESS_CHECK_RETRIES 5 #define SHUTDOWN_ESCALATION_PERIOD 180000 /* 3m */ /* Index into the array below */ #define PCMK_CHILD_CONTROLD 5 static pcmk_child_t pcmk_children[] = { { 0, 0, true, "pacemaker-based", CRM_DAEMON_USER, CRM_DAEMON_DIR "/pacemaker-based", PCMK__SERVER_BASED_RO, true }, { 0, 0, true, "pacemaker-fenced", NULL, CRM_DAEMON_DIR "/pacemaker-fenced", "stonith-ng", true }, { 0, 0, true, "pacemaker-execd", NULL, CRM_DAEMON_DIR "/pacemaker-execd", CRM_SYSTEM_LRMD, false }, { 0, 0, true, "pacemaker-attrd", CRM_DAEMON_USER, CRM_DAEMON_DIR "/pacemaker-attrd", T_ATTRD, true }, { 0, 0, true, "pacemaker-schedulerd", CRM_DAEMON_USER, CRM_DAEMON_DIR "/pacemaker-schedulerd", CRM_SYSTEM_PENGINE, false }, { 0, 0, true, "pacemaker-controld", CRM_DAEMON_USER, CRM_DAEMON_DIR "/pacemaker-controld", CRM_SYSTEM_CRMD, true }, }; static char *opts_default[] = { NULL, NULL }; static char *opts_vgrind[] = { NULL, NULL, NULL, NULL, NULL }; crm_trigger_t *shutdown_trigger = NULL; crm_trigger_t *startup_trigger = NULL; time_t subdaemon_check_progress = 0; // Whether we need root group access to talk to cluster layer static bool need_root_group = true; /* When contacted via pacemakerd-api by a client having sbd in * the name we assume it is sbd-daemon which wants to know * if pacemakerd shutdown gracefully. * Thus when everything is shutdown properly pacemakerd * waits till it has reported the graceful completion of * shutdown to sbd and just when sbd-client closes the * connection we can assume that the report has arrived * properly so that pacemakerd can finally exit. * Following two variables are used to track that handshake. */ unsigned int shutdown_complete_state_reported_to = 0; gboolean shutdown_complete_state_reported_client_closed = FALSE; /* state we report when asked via pacemakerd-api status-ping */ const char *pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_INIT; gboolean running_with_sbd = FALSE; /* local copy */ GMainLoop *mainloop = NULL; static gboolean fatal_error = FALSE; static int child_liveness(pcmk_child_t *child); static gboolean escalate_shutdown(gpointer data); static int start_child(pcmk_child_t * child); static void pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode); static void pcmk_process_exit(pcmk_child_t * child); static gboolean pcmk_shutdown_worker(gpointer user_data); static gboolean stop_child(pcmk_child_t * child, int signal); static bool pcmkd_cluster_connected(void) { #if SUPPORT_COROSYNC return pcmkd_corosync_connected(); #else return true; #endif } static gboolean check_next_subdaemon(gpointer user_data) { static int next_child = 0; int rc = child_liveness(&pcmk_children[next_child]); crm_trace("Checked %s[%lld]: %s (%d)", pcmk_children[next_child].name, (long long) PCMK__SPECIAL_PID_AS_0(pcmk_children[next_child].pid), pcmk_rc_str(rc), rc); switch (rc) { case pcmk_rc_ok: pcmk_children[next_child].check_count = 0; subdaemon_check_progress = time(NULL); break; case pcmk_rc_ipc_pid_only: // This case: it was previously OK pcmk_children[next_child].check_count++; if (pcmk_children[next_child].check_count >= PCMK_PROCESS_CHECK_RETRIES) { crm_err("%s[%lld] is unresponsive to ipc after %d tries but " "we found the pid so have it killed that we can restart", pcmk_children[next_child].name, (long long) PCMK__SPECIAL_PID_AS_0( pcmk_children[next_child].pid), pcmk_children[next_child].check_count); stop_child(&pcmk_children[next_child], SIGKILL); if (pcmk_children[next_child].respawn) { /* as long as the respawn-limit isn't reached give it another round of check retries */ pcmk_children[next_child].check_count = 0; } } else { crm_notice("%s[%lld] is unresponsive to ipc after %d tries", pcmk_children[next_child].name, (long long) PCMK__SPECIAL_PID_AS_0( pcmk_children[next_child].pid), pcmk_children[next_child].check_count); if (pcmk_children[next_child].respawn) { /* as long as the respawn-limit isn't reached and we haven't run out of connect retries we account this as progress we are willing to tell to sbd */ subdaemon_check_progress = time(NULL); } } /* go to the next child and see if we can make progress there */ break; case pcmk_rc_ipc_unresponsive: if (!pcmk_children[next_child].respawn) { /* if a subdaemon is down and we don't want it to be restarted this is a success during shutdown. if it isn't restarted anymore due to MAX_RESPAWN it is rather no success. */ if (pcmk_children[next_child].respawn_count <= MAX_RESPAWN) { subdaemon_check_progress = time(NULL); } } if (!pcmk_children[next_child].active_before_startup) { crm_trace("found %s[%lld] missing - signal-handler " "will take care of it", pcmk_children[next_child].name, (long long) PCMK__SPECIAL_PID_AS_0( pcmk_children[next_child].pid)); break; } if (pcmk_children[next_child].respawn) { crm_err("%s[%lld] terminated", pcmk_children[next_child].name, (long long) PCMK__SPECIAL_PID_AS_0( pcmk_children[next_child].pid)); } else { /* orderly shutdown */ crm_notice("%s[%lld] terminated", pcmk_children[next_child].name, (long long) PCMK__SPECIAL_PID_AS_0( pcmk_children[next_child].pid)); } pcmk_process_exit(&(pcmk_children[next_child])); break; default: crm_exit(CRM_EX_FATAL); break; /* static analysis/noreturn */ } next_child++; if (next_child >= PCMK__NELEM(pcmk_children)) { next_child = 0; } return G_SOURCE_CONTINUE; } static gboolean escalate_shutdown(gpointer data) { pcmk_child_t *child = data; if (child->pid == PCMK__SPECIAL_PID) { pcmk_process_exit(child); } else if (child->pid != 0) { /* Use SIGSEGV instead of SIGKILL to create a core so we can see what it was up to */ crm_err("Child %s not terminating in a timely manner, forcing", child->name); stop_child(child, SIGSEGV); } return FALSE; } static void pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode) { pcmk_child_t *child = mainloop_child_userdata(p); const char *name = mainloop_child_name(p); if (signo) { do_crm_log(((signo == SIGKILL)? LOG_WARNING : LOG_ERR), "%s[%d] terminated with signal %d (%s)%s", name, pid, signo, strsignal(signo), (core? " and dumped core" : "")); } else { switch(exitcode) { case CRM_EX_OK: crm_info("%s[%d] exited with status %d (%s)", name, pid, exitcode, crm_exit_str(exitcode)); break; case CRM_EX_FATAL: crm_warn("Shutting cluster down because %s[%d] had fatal failure", name, pid); child->respawn = false; fatal_error = TRUE; pcmk_shutdown(SIGTERM); break; case CRM_EX_PANIC: crm_emerg("%s[%d] instructed the machine to reset", name, pid); child->respawn = false; fatal_error = TRUE; pcmk__panic(__func__); pcmk_shutdown(SIGTERM); break; default: crm_err("%s[%d] exited with status %d (%s)", name, pid, exitcode, crm_exit_str(exitcode)); break; } } pcmk_process_exit(child); } static void pcmk_process_exit(pcmk_child_t * child) { child->pid = 0; child->active_before_startup = false; child->check_count = 0; child->respawn_count += 1; if (child->respawn_count > MAX_RESPAWN) { crm_err("Child respawn count exceeded by %s", child->name); child->respawn = false; } if (shutdown_trigger) { /* resume step-wise shutdown (returned TRUE yields no parallelizing) */ mainloop_set_trigger(shutdown_trigger); } else if (!child->respawn) { /* nothing to do */ } else if (crm_is_true(pcmk__env_option(PCMK__ENV_FAIL_FAST))) { crm_err("Rebooting system because of %s", child->name); pcmk__panic(__func__); } else if (child_liveness(child) == pcmk_rc_ok) { crm_warn("One-off suppressing strict respawning of a child process %s," " appears alright per %s IPC end-point", child->name, child->endpoint); } else if (child->needs_cluster && !pcmkd_cluster_connected()) { crm_notice("Not respawning %s subdaemon until cluster returns", child->name); child->needs_retry = true; } else { crm_notice("Respawning %s subdaemon after unexpected exit", child->name); start_child(child); } } static gboolean pcmk_shutdown_worker(gpointer user_data) { static int phase = PCMK__NELEM(pcmk_children) - 1; static time_t next_log = 0; if (phase == PCMK__NELEM(pcmk_children) - 1) { crm_notice("Shutting down Pacemaker"); pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_SHUTTINGDOWN; } for (; phase >= 0; phase--) { pcmk_child_t *child = &(pcmk_children[phase]); if (child->pid != 0) { time_t now = time(NULL); if (child->respawn) { if (child->pid == PCMK__SPECIAL_PID) { crm_warn("The process behind %s IPC cannot be" " terminated, so either wait the graceful" " period of %ld s for its native termination" " if it vitally depends on some other daemons" " going down in a controlled way already," " or locate and kill the correct %s process" " on your own; set PCMK_" PCMK__ENV_FAIL_FAST "=1" " to avoid this altogether next time around", child->name, (long) SHUTDOWN_ESCALATION_PERIOD, child->command); } next_log = now + 30; child->respawn = false; stop_child(child, SIGTERM); if (phase < PCMK_CHILD_CONTROLD) { g_timeout_add(SHUTDOWN_ESCALATION_PERIOD, escalate_shutdown, child); } } else if (now >= next_log) { next_log = now + 30; crm_notice("Still waiting for %s to terminate " CRM_XS " pid=%lld", child->name, (long long) child->pid); } return TRUE; } /* cleanup */ crm_debug("%s confirmed stopped", child->name); child->pid = 0; } crm_notice("Shutdown complete"); pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE; if (!fatal_error && running_with_sbd && pcmk__get_sbd_sync_resource_startup() && !shutdown_complete_state_reported_client_closed) { crm_notice("Waiting for SBD to pick up shutdown-complete-state."); return TRUE; } + // @COMPAT Drop shutdown delay at 3.0.0 { const char *delay = pcmk__env_option(PCMK__ENV_SHUTDOWN_DELAY); if(delay) { sync(); pcmk__sleep_ms(crm_get_msec(delay)); } } g_main_loop_quit(mainloop); if (fatal_error) { crm_notice("Shutting down and staying down after fatal error"); #ifdef SUPPORT_COROSYNC pcmkd_shutdown_corosync(); #endif crm_exit(CRM_EX_FATAL); } return TRUE; } /* TODO once libqb is taught to juggle with IPC end-points carried over as bare file descriptor (https://github.com/ClusterLabs/libqb/issues/325) it shall hand over these descriptors here if/once they are successfully pre-opened in (presumably) child_liveness(), to avoid any remaining room for races */ // \return Standard Pacemaker return code static int start_child(pcmk_child_t * child) { uid_t uid = 0; gid_t gid = 0; gboolean use_valgrind = FALSE; gboolean use_callgrind = FALSE; const char *env_valgrind = pcmk__env_option(PCMK__ENV_VALGRIND_ENABLED); const char *env_callgrind = pcmk__env_option(PCMK__ENV_CALLGRIND_ENABLED); child->active_before_startup = false; child->check_count = 0; if (child->command == NULL) { crm_info("Nothing to do for child \"%s\"", child->name); return pcmk_rc_ok; } if (env_callgrind != NULL && crm_is_true(env_callgrind)) { use_callgrind = TRUE; use_valgrind = TRUE; } else if (env_callgrind != NULL && strstr(env_callgrind, child->name)) { use_callgrind = TRUE; use_valgrind = TRUE; } else if (env_valgrind != NULL && crm_is_true(env_valgrind)) { use_valgrind = TRUE; } else if (env_valgrind != NULL && strstr(env_valgrind, child->name)) { use_valgrind = TRUE; } if (use_valgrind && strlen(VALGRIND_BIN) == 0) { crm_warn("Cannot enable valgrind for %s:" " The location of the valgrind binary is unknown", child->name); use_valgrind = FALSE; } if (child->uid) { if (crm_user_lookup(child->uid, &uid, &gid) < 0) { crm_err("Invalid user (%s) for %s: not found", child->uid, child->name); return EACCES; } crm_info("Using uid=%u and group=%u for process %s", uid, gid, child->name); } child->pid = fork(); CRM_ASSERT(child->pid != -1); if (child->pid > 0) { /* parent */ mainloop_child_add(child->pid, 0, child->name, child, pcmk_child_exit); crm_info("Forked child %lld for process %s%s", (long long) child->pid, child->name, use_valgrind ? " (valgrind enabled: " VALGRIND_BIN ")" : ""); return pcmk_rc_ok; } else { /* Start a new session */ (void)setsid(); /* Setup the two alternate arg arrays */ opts_vgrind[0] = strdup(VALGRIND_BIN); if (use_callgrind) { opts_vgrind[1] = strdup("--tool=callgrind"); opts_vgrind[2] = strdup("--callgrind-out-file=" CRM_STATE_DIR "/callgrind.out.%p"); opts_vgrind[3] = strdup(child->command); opts_vgrind[4] = NULL; } else { opts_vgrind[1] = strdup(child->command); opts_vgrind[2] = NULL; opts_vgrind[3] = NULL; opts_vgrind[4] = NULL; } opts_default[0] = strdup(child->command); if(gid) { // Drop root group access if not needed if (!need_root_group && (setgid(gid) < 0)) { crm_warn("Could not set group to %d: %s", gid, strerror(errno)); } /* Initialize supplementary groups to only those always granted to * the user, plus haclient (so we can access IPC). */ if (initgroups(child->uid, gid) < 0) { crm_err("Cannot initialize groups for %s: %s (%d)", child->uid, pcmk_rc_str(errno), errno); } } if (uid && setuid(uid) < 0) { crm_warn("Could not set user to %s (id %d): %s", child->uid, uid, strerror(errno)); } pcmk__close_fds_in_child(true); pcmk__open_devnull(O_RDONLY); // stdin (fd 0) pcmk__open_devnull(O_WRONLY); // stdout (fd 1) pcmk__open_devnull(O_WRONLY); // stderr (fd 2) if (use_valgrind) { (void)execvp(VALGRIND_BIN, opts_vgrind); } else { (void)execvp(child->command, opts_default); } crm_crit("Could not execute %s: %s", child->command, strerror(errno)); crm_exit(CRM_EX_FATAL); } return pcmk_rc_ok; /* never reached */ } /*! * \internal * \brief Check the liveness of the child based on IPC name and PID if tracked * * \param[in,out] child Child tracked data * * \return Standard Pacemaker return code * * \note Return codes of particular interest include pcmk_rc_ipc_unresponsive * indicating that no trace of IPC liveness was detected, * pcmk_rc_ipc_unauthorized indicating that the IPC endpoint is blocked by * an unauthorized process, and pcmk_rc_ipc_pid_only indicating that * the child is up by PID but not IPC end-point (possibly starting). * \note This function doesn't modify any of \p child members but \c pid, * and is not actively toying with processes as such but invoking * \c stop_child in one particular case (there's for some reason * a different authentic holder of the IPC end-point). */ static int child_liveness(pcmk_child_t *child) { uid_t cl_uid = 0; gid_t cl_gid = 0; const uid_t root_uid = 0; const gid_t root_gid = 0; const uid_t *ref_uid; const gid_t *ref_gid; int rc = pcmk_rc_ipc_unresponsive; pid_t ipc_pid = 0; if (child->endpoint == NULL && (child->pid <= 0 || child->pid == PCMK__SPECIAL_PID)) { crm_err("Cannot track child %s for missing both API end-point and PID", child->name); rc = EINVAL; // Misuse of function when child is not trackable } else if (child->endpoint != NULL) { int legacy_rc = pcmk_ok; if (child->uid == NULL) { ref_uid = &root_uid; ref_gid = &root_gid; } else { ref_uid = &cl_uid; ref_gid = &cl_gid; legacy_rc = pcmk_daemon_user(&cl_uid, &cl_gid); } if (legacy_rc < 0) { rc = pcmk_legacy2rc(legacy_rc); crm_err("Could not find user and group IDs for user %s: %s " CRM_XS " rc=%d", CRM_DAEMON_USER, pcmk_rc_str(rc), rc); } else { rc = pcmk__ipc_is_authentic_process_active(child->endpoint, *ref_uid, *ref_gid, &ipc_pid); if ((rc == pcmk_rc_ok) || (rc == pcmk_rc_ipc_unresponsive)) { if (child->pid <= 0) { /* If rc is pcmk_rc_ok, ipc_pid is nonzero and this * initializes a new child. If rc is * pcmk_rc_ipc_unresponsive, ipc_pid is zero, and we will * investigate further. */ child->pid = ipc_pid; } else if ((ipc_pid != 0) && (child->pid != ipc_pid)) { /* An unexpected (but authorized) process is responding to * IPC. Investigate further. */ rc = pcmk_rc_ipc_unresponsive; } } } } if (rc == pcmk_rc_ipc_unresponsive) { /* If we get here, a child without IPC is being tracked, no IPC liveness * has been detected, or IPC liveness has been detected with an * unexpected (but authorized) process. This is safe on FreeBSD since * the only change possible from a proper child's PID into "special" PID * of 1 behind more loosely related process. */ int ret = pcmk__pid_active(child->pid, child->name); if (ipc_pid && ((ret != pcmk_rc_ok) || ipc_pid == PCMK__SPECIAL_PID || (pcmk__pid_active(ipc_pid, child->name) == pcmk_rc_ok))) { /* An unexpected (but authorized) process was detected at the IPC * endpoint, and either it is active, or the child we're tracking is * not. */ if (ret == pcmk_rc_ok) { /* The child we're tracking is active. Kill it, and adopt the * detected process. This assumes that our children don't fork * (thus getting a different PID owning the IPC), but rather the * tracking got out of sync because of some means external to * Pacemaker, and adopting the detected process is better than * killing it and possibly having to spawn a new child. */ /* not possessing IPC, afterall (what about corosync CPG?) */ stop_child(child, SIGKILL); } rc = pcmk_rc_ok; child->pid = ipc_pid; } else if (ret == pcmk_rc_ok) { // Our tracked child's PID was found active, but not its IPC rc = pcmk_rc_ipc_pid_only; } else if ((child->pid == 0) && (ret == EINVAL)) { // FreeBSD can return EINVAL rc = pcmk_rc_ipc_unresponsive; } else { switch (ret) { case EACCES: rc = pcmk_rc_ipc_unauthorized; break; case ESRCH: rc = pcmk_rc_ipc_unresponsive; break; default: rc = ret; break; } } } return rc; } /*! * \internal * \brief Initial one-off check of the pre-existing "child" processes * * With "child" process, we mean the subdaemon that defines an API end-point * (all of them do as of the comment) -- the possible complement is skipped * as it is deemed it has no such shared resources to cause conflicts about, * hence it can presumably be started anew without hesitation. * If that won't hold true in the future, the concept of a shared resource * will have to be generalized beyond the API end-point. * * For boundary cases that the "child" is still starting (IPC end-point is yet * to be witnessed), or more rarely (practically FreeBSD only), when there's * a pre-existing "untrackable" authentic process, we give the situation some * time to possibly unfold in the right direction, meaning that said socket * will appear or the unattainable process will disappear per the observable * IPC, respectively. * * \return Standard Pacemaker return code * * \note Since this gets run at the very start, \c respawn_count fields * for particular children get temporarily overloaded with "rounds * of waiting" tracking, restored once we are about to finish with * success (i.e. returning value >=0) and will remain unrestored * otherwise. One way to suppress liveness detection logic for * particular child is to set the said value to a negative number. */ #define WAIT_TRIES 4 /* together with interleaved sleeps, worst case ~ 1s */ int find_and_track_existing_processes(void) { bool wait_in_progress; int rc; size_t i, rounds; for (rounds = 1; rounds <= WAIT_TRIES; rounds++) { wait_in_progress = false; for (i = 0; i < PCMK__NELEM(pcmk_children); i++) { if ((pcmk_children[i].endpoint == NULL) || (pcmk_children[i].respawn_count < 0)) { continue; } rc = child_liveness(&pcmk_children[i]); if (rc == pcmk_rc_ipc_unresponsive) { /* As a speculation, don't give up if there are more rounds to * come for other reasons, but don't artificially wait just * because of this, since we would preferably start ASAP. */ continue; } // @TODO Functionize more of this to reduce nesting pcmk_children[i].respawn_count = rounds; switch (rc) { case pcmk_rc_ok: if (pcmk_children[i].pid == PCMK__SPECIAL_PID) { if (crm_is_true(pcmk__env_option(PCMK__ENV_FAIL_FAST))) { crm_crit("Cannot reliably track pre-existing" " authentic process behind %s IPC on this" " platform and PCMK_" PCMK__ENV_FAIL_FAST " requested", pcmk_children[i].endpoint); return EOPNOTSUPP; } else if (pcmk_children[i].respawn_count == WAIT_TRIES) { crm_notice("Assuming pre-existing authentic, though" " on this platform untrackable, process" " behind %s IPC is stable (was in %d" " previous samples) so rather than" " bailing out (PCMK_" PCMK__ENV_FAIL_FAST " not requested), we just switch to a" " less optimal IPC liveness monitoring" " (not very suitable for heavy load)", pcmk_children[i].name, WAIT_TRIES - 1); crm_warn("The process behind %s IPC cannot be" " terminated, so the overall shutdown" " will get delayed implicitly (%ld s)," " which serves as a graceful period for" " its native termination if it vitally" " depends on some other daemons going" " down in a controlled way already", pcmk_children[i].name, (long) SHUTDOWN_ESCALATION_PERIOD); } else { wait_in_progress = true; crm_warn("Cannot reliably track pre-existing" " authentic process behind %s IPC on this" " platform, can still disappear in %d" " attempt(s)", pcmk_children[i].endpoint, WAIT_TRIES - pcmk_children[i].respawn_count); continue; } } crm_notice("Tracking existing %s process (pid=%lld)", pcmk_children[i].name, (long long) PCMK__SPECIAL_PID_AS_0( pcmk_children[i].pid)); pcmk_children[i].respawn_count = -1; /* 0~keep watching */ pcmk_children[i].active_before_startup = true; break; case pcmk_rc_ipc_pid_only: if (pcmk_children[i].respawn_count == WAIT_TRIES) { crm_crit("%s IPC end-point for existing authentic" " process %lld did not (re)appear", pcmk_children[i].endpoint, (long long) PCMK__SPECIAL_PID_AS_0( pcmk_children[i].pid)); return rc; } wait_in_progress = true; crm_warn("Cannot find %s IPC end-point for existing" " authentic process %lld, can still (re)appear" " in %d attempts (?)", pcmk_children[i].endpoint, (long long) PCMK__SPECIAL_PID_AS_0( pcmk_children[i].pid), WAIT_TRIES - pcmk_children[i].respawn_count); continue; default: crm_crit("Checked liveness of %s: %s " CRM_XS " rc=%d", pcmk_children[i].name, pcmk_rc_str(rc), rc); return rc; } } if (!wait_in_progress) { break; } pcmk__sleep_ms(250); // Wait a bit for changes to possibly happen } for (i = 0; i < PCMK__NELEM(pcmk_children); i++) { pcmk_children[i].respawn_count = 0; /* restore pristine state */ } g_timeout_add_seconds(PCMK_PROCESS_CHECK_INTERVAL, check_next_subdaemon, NULL); return pcmk_rc_ok; } gboolean init_children_processes(void *user_data) { if (is_corosync_cluster()) { /* Corosync clusters can drop root group access, because we set * uidgid.gid.${gid}=1 via CMAP, which allows these processes to connect * to corosync. */ need_root_group = false; } /* start any children that have not been detected */ for (int i = 0; i < PCMK__NELEM(pcmk_children); i++) { if (pcmk_children[i].pid != 0) { /* we are already tracking it */ continue; } start_child(&(pcmk_children[i])); } /* From this point on, any daemons being started will be due to * respawning rather than node start. * * This may be useful for the daemons to know */ pcmk__set_env_option(PCMK__ENV_RESPAWNED, "true", false); pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_RUNNING; return TRUE; } void pcmk_shutdown(int nsig) { if (shutdown_trigger == NULL) { shutdown_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, pcmk_shutdown_worker, NULL); } mainloop_set_trigger(shutdown_trigger); } void restart_cluster_subdaemons(void) { for (int i = 0; i < PCMK__NELEM(pcmk_children); i++) { if (!pcmk_children[i].needs_retry || pcmk_children[i].pid != 0) { continue; } crm_notice("Respawning cluster-based subdaemon: %s", pcmk_children[i].name); if (start_child(&pcmk_children[i])) { pcmk_children[i].needs_retry = false; } } } static gboolean stop_child(pcmk_child_t * child, int signal) { if (signal == 0) { signal = SIGTERM; } /* why to skip PID of 1? - FreeBSD ~ how untrackable process behind IPC is masqueraded as - elsewhere: how "init" task is designated; in particular, in systemd arrangement of socket-based activation, this is pretty real */ if (child->command == NULL || child->pid == PCMK__SPECIAL_PID) { crm_debug("Nothing to do for child \"%s\" (process %lld)", child->name, (long long) PCMK__SPECIAL_PID_AS_0(child->pid)); return TRUE; } if (child->pid <= 0) { crm_trace("Client %s not running", child->name); return TRUE; } errno = 0; if (kill(child->pid, signal) == 0) { crm_notice("Stopping %s "CRM_XS" sent signal %d to process %lld", child->name, signal, (long long) child->pid); } else { crm_err("Could not stop %s (process %lld) with signal %d: %s", child->name, (long long) child->pid, signal, strerror(errno)); } return TRUE; } diff --git a/doc/sphinx/Makefile.am b/doc/sphinx/Makefile.am index dc7ded2058..e48e19a780 100644 --- a/doc/sphinx/Makefile.am +++ b/doc/sphinx/Makefile.am @@ -1,214 +1,222 @@ # # Copyright 2003-2023 the Pacemaker project contributors # # The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # or later (GPLv2+) WITHOUT ANY WARRANTY. # include $(top_srcdir)/mk/common.mk # Define release-related variables include $(top_srcdir)/mk/release.mk # Things you might want to override on the command line # Books to generate BOOKS ?= Clusters_from_Scratch \ Pacemaker_Administration \ Pacemaker_Development \ Pacemaker_Explained \ Pacemaker_Python_API \ Pacemaker_Remote # Output formats to generate. Possible values: # html (multiple HTML files) # dirhtml (HTML files named index.html in multiple directories) # singlehtml (a single large HTML file) # text # pdf # epub # latex # linkcheck (not actually a format; check validity of external links) # # The results will end up in /_build/ BOOK_FORMATS ?= singlehtml # Set to "a4paper" or "letterpaper" if building latex format PAPER ?= letterpaper # Additional options for sphinx-build SPHINXFLAGS ?= # toplevel rsync destination for www targets (without trailing slash) RSYNC_DEST ?= root@www.clusterlabs.org:/var/www/html # End of useful overrides # Example scheduler transition graphs # @TODO The original CIB XML for these is long lost. Ideally, we would recreate # something similar and keep those here instead of the DOTs (or use a couple of # scheduler regression test inputs instead), then regenerate the SVG # equivalents using crm_simulate and dot when making a release. DOTS = $(wildcard shared/images/*.dot) # Vector sources for generated PNGs (including SVG equivalents of DOTS, created # manually using dot) SVGS = $(wildcard shared/images/pcmk-*.svg) \ $(DOTS:%.dot=%.svg) # PNG images generated from SVGS # # These will not be accessible in a VPATH build, which will generate warnings # when building the documentation, but the make will still succeed. It is # nontrivial to get them working for VPATH builds and not worth the effort. PNGS_GENERATED = $(SVGS:%.svg=%.png) # Original PNG image sources PNGS_Clusters_from_Scratch = $(wildcard Clusters_from_Scratch/images/*.png) PNGS_Pacemaker_Explained = $(wildcard Pacemaker_Explained/images/*.png) PNGS_Pacemaker_Remote = $(wildcard Pacemaker_Remote/images/*.png) STATIC_FILES = $(wildcard _static/*.css) EXTRA_DIST = $(wildcard */*.rst) $(DOTS) $(SVGS) \ $(PNGS_Clusters_from_Scratch) \ $(PNGS_Pacemaker_Explained) \ $(PNGS_Pacemaker_Remote) \ $(wildcard Pacemaker_Python_API/_templates/*rst) \ $(STATIC_FILES) \ conf.py.in # recursive, preserve symlinks/permissions/times, verbose, compress, # don't cross filesystems, sparse, show progress RSYNC_OPTS = -rlptvzxS --progress -PACKAGE_SERIES=$(shell echo "$VERSION" | awk -F. '{ print $1"."$2 }'`) +PACKAGE_SERIES=$(shell echo "$VERSION" | awk -F. '{ print $1"."$2 }') BOOK_RSYNC_DEST = $(RSYNC_DEST)/$(PACKAGE)/doc/$(PACKAGE_SERIES) BOOK = none DEPS_intro = shared/pacemaker-intro.rst \ $(PNGS_GENERATED) DEPS_Clusters_from_Scratch = $(DEPS_intro) \ $(PNGS_Clusters_from_Scratch) DEPS_Pacemaker_Administration = $(DEPS_intro) DEPS_Pacemaker_Development = DEPS_Pacemaker_Explained = $(DEPS_intro) \ $(PNGS_Pacemaker_Explained) DEPS_Pacemaker_Python_API = ../../python DEPS_Pacemaker_Remote = $(PNGS_Pacemaker_Remote) if BUILD_SPHINX_DOCS INKSCAPE_CMD = $(INKSCAPE) --export-dpi=90 -C # Pattern rule to generate PNGs from SVGs # (--export-png works with Inkscape <1.0, --export-filename with >=1.0; # create the destination directory in case this is a VPATH build) %.png: %.svg $(AM_V_at)-$(MKDIR_P) "$(shell dirname "$@")" $(AM_V_GEN) { \ $(INKSCAPE_CMD) --export-png="$@" "$<" 2>/dev/null \ || $(INKSCAPE_CMD) --export-filename="$@" "$<"; \ } $(PCMK_quiet) # Create a book's Sphinx configuration. # Create the book directory in case this is a VPATH build. $(BOOKS:%=%/conf.py): conf.py.in $(AM_V_at)-$(MKDIR_P) "$(@:%/conf.py=%)" $(AM_V_GEN)sed \ -e 's/%VERSION%/$(VERSION)/g' \ -e 's/%BOOK_ID%/$(@:%/conf.py=%)/g' \ -e 's/%BOOK_TITLE%/$(subst _, ,$(@:%/conf.py=%))/g' \ -e 's#%SRC_DIR%#$(abs_srcdir)#g' \ -e 's#%ABS_TOP_SRCDIR%#$(abs_top_srcdir)#g' \ + -e 's#%CONFIGDIR%#@CONFIGDIR@#g' \ + -e 's#%CRM_BLACKBOX_DIR%#@CRM_BLACKBOX_DIR@#g' \ + -e 's#%CRM_DAEMON_GROUP%#@CRM_DAEMON_GROUP@#g' \ + -e 's#%CRM_DAEMON_USER%#@CRM_DAEMON_USER@#g' \ + -e 's#%CRM_LOG_DIR%#@CRM_LOG_DIR@#g' \ + -e 's#%CRM_SCHEMA_DIRECTORY%#@CRM_SCHEMA_DIRECTORY@#g' \ + -e 's#%PACEMAKER_CONFIG_DIR%#@PACEMAKER_CONFIG_DIR@#g' \ + -e 's#%PCMK_GNUTLS_PRIORITIES%#@PCMK_GNUTLS_PRIORITIES@#g' \ $(<) > "$@" $(BOOK)/_build: $(STATIC_FILES) $(BOOK)/conf.py $(DEPS_$(BOOK)) $(wildcard $(srcdir)/$(BOOK)/*.rst) @echo 'Building "$(subst _, ,$(BOOK))" because of $?' $(PCMK_quiet) $(AM_V_at)rm -rf "$@" $(AM_V_BOOK)for format in $(BOOK_FORMATS); do \ echo -e "\n * Building $$format" $(PCMK_quiet); \ doctrees="doctrees"; \ real_format="$$format"; \ case "$$format" in \ pdf) real_format="latex" ;; \ gettext) doctrees="gettext-doctrees" ;; \ esac; \ $(SPHINX) -b "$$real_format" -d "$@/$$doctrees" \ -c "$(builddir)/$(BOOK)" \ -D latex_elements.papersize=$(PAPER) \ $(SPHINXFLAGS) \ "$(srcdir)/$(BOOK)" "$@/$$format" \ $(PCMK_quiet); \ if [ "$$format" = "pdf" ]; then \ $(MAKE) $(AM_MAKEFLAGS) -C "$@/$$format" \ all-pdf; \ fi; \ done endif build-$(PACKAGE_SERIES).txt: all $(AM_V_GEN)echo "Generated on `date --utc` from version $(TAG)" > "$@" .PHONY: books-upload books-upload: all build-$(PACKAGE_SERIES).txt if BUILD_SPHINX_DOCS @echo "Uploading $(PACKAGE_SERIES) documentation set" @for book in $(BOOKS); do \ echo " * $$book"; \ rsync $(RSYNC_OPTS) $(BOOK_FORMATS:%=$$book/_build/%) \ "$(BOOK_RSYNC_DEST)/$$book/"; \ done @rsync $(RSYNC_OPTS) "$(builddir)/build-$(PACKAGE_SERIES).txt" \ "$(RSYNC_DEST)/$(PACKAGE)/doc" endif .PHONY: all-local all-local: if BUILD_SPHINX_DOCS @for book in $(BOOKS); do \ $(MAKE) $(AM_MAKEFLAGS) BOOK=$$book \ PAPER="$(PAPER)" SPHINXFLAGS="$(SPHINXFLAGS)" \ BOOK_FORMATS="$(BOOK_FORMATS)" $$book/_build; \ done endif .PHONY: install-data-local install-data-local: all-local if BUILD_SPHINX_DOCS $(AM_V_at)for book in $(BOOKS); do \ for format in $(BOOK_FORMATS); do \ formatdir="$$book/_build/$$format"; \ for f in `find "$$formatdir" -print`; do \ dname="`echo $$f | sed s:_build/::`"; \ dloc="$(DESTDIR)/$(docdir)/$$dname"; \ if [ -d "$$f" ]; then \ $(INSTALL) -d -m 755 "$$dloc"; \ else \ $(INSTALL_DATA) "$$f" "$$dloc"; \ fi \ done; \ done; \ done endif .PHONY: uninstall-local uninstall-local: if BUILD_SPHINX_DOCS $(AM_V_at)for book in $(BOOKS); do \ rm -rf "$(DESTDIR)/$(docdir)/$$book"; \ done endif .PHONY: clean-local clean-local: $(AM_V_at)-rm -rf \ $(BOOKS:%="$(builddir)/%/_build") \ $(BOOKS:%="$(builddir)/%/conf.py") \ $(BOOKS:%="$(builddir)/%/generated") \ $(PNGS_GENERATED) diff --git a/doc/sphinx/Pacemaker_Explained/options.rst b/doc/sphinx/Pacemaker_Explained/cluster-options.rst similarity index 100% rename from doc/sphinx/Pacemaker_Explained/options.rst rename to doc/sphinx/Pacemaker_Explained/cluster-options.rst diff --git a/doc/sphinx/Pacemaker_Explained/index.rst b/doc/sphinx/Pacemaker_Explained/index.rst index e3b7e9e55e..63387f3421 100644 --- a/doc/sphinx/Pacemaker_Explained/index.rst +++ b/doc/sphinx/Pacemaker_Explained/index.rst @@ -1,41 +1,42 @@ Pacemaker Explained =================== *Configuring Pacemaker Clusters* Abstract -------- This document definitively explains Pacemaker's features and capabilities, particularly the XML syntax used in Pacemaker's Cluster Information Base (CIB). Table of Contents ----------------- .. toctree:: :maxdepth: 3 :numbered: intro - options + local-options + cluster-options nodes resources operations constraints fencing alerts rules collective reusing-configuration utilization acls status multi-site-clusters ap-samples Index ----- * :ref:`genindex` * :ref:`search` diff --git a/doc/sphinx/Pacemaker_Explained/local-options.rst b/doc/sphinx/Pacemaker_Explained/local-options.rst new file mode 100644 index 0000000000..91eda6632d --- /dev/null +++ b/doc/sphinx/Pacemaker_Explained/local-options.rst @@ -0,0 +1,515 @@ +Host-Local Configuration +------------------------ + +.. index:: + pair: XML element; configuration + +.. note:: Directory and file paths below may differ on your system depending on + your Pacemaker build settings. Check your Pacemaker configuration + file to find the correct paths. + +Pacemaker supports several host-local configuration options. These options can +be configured on each node in the main Pacemaker configuration file +(|PCMK_CONFIG_FILE|) in the format ``=""``. They work by setting +environment variables when Pacemaker daemons start up. + +.. list-table:: **Local Options** + :class: longtable + :widths: 2 2 2 5 + :header-rows: 1 + + * - Name + - Type + - Default + - Description + * - .. _pcmk_logfacility: + + .. index:: + pair: node option; PCMK_logfacility + + PCMK_logfacility + - :ref:`enumeration ` + - daemon + - Enable logging via the system log or journal, using the specified log + facility. Messages sent here are of value to all Pacemaker + administrators. This can be disabled using ``none``, but that is not + recommended. Allowed values: + + * ``none`` + * ``daemon`` + * ``user`` + * ``local0`` + * ``local1`` + * ``local2`` + * ``local3`` + * ``local4`` + * ``local5`` + * ``local6`` + * ``local7`` + + * - .. _pcmk_logpriority: + + .. index:: + pair:: node option; PCMK_logpriority + + PCMK_logpriority + - :ref:`enumeration ` + - notice + - Unless system logging is disabled using ``PCMK_logfacility=none``, + messages of the specified log severity and higher will be sent to the + system log. The default is appropriate for most installations. Allowed + values: + + * ``emerg`` + * ``alert`` + * ``crit`` + * ``error`` + * ``warning`` + * ``notice`` + * ``info`` + * ``debug`` + + * - .. _pcmk_logfile: + + .. index:: + pair:: node option; PCMK_logfile + + PCMK_logfile + - :ref:`text ` + - |PCMK_LOG_FILE| + - Unless set to ``none``, more detailed log messages will be sent to the + specified file (in addition to the system log, if enabled). These + messages may have extended information, and will include messages of info + severity. This log is of more use to developers and advanced system + administrators, and when reporting problems. + + * - .. _pcmk_logfile_mode: + + .. index:: + pair:: node option; PCMK_logfile_mode + + PCMK_logfile_mode + - :ref:`text ` + - 0660 + - Pacemaker will set the permissions on the detail log to this value (see + ``chmod(1)``). + + * - .. _pcmk_debug: + + .. index:: + pair:: node option; PCMK_debug + + PCMK_debug + - :ref:`enumeration ` + - no + - Whether to send debug severity messages to the detail log. This may be + set for all subsystems (``yes`` or ``no``) or for specific (comma- + separated) subsystems. Allowed subsystems are: + + * ``pacemakerd`` + * ``pacemaker-attrd`` + * ``pacemaker-based`` + * ``pacemaker-controld`` + * ``pacemaker-execd`` + * ``pacemaker-fenced`` + * ``pacemaker-schedulerd`` + + Example: ``PCMK_debug="pacemakerd,pacemaker-execd"`` + + * - .. _pcmk_stderr: + + .. index:: + pair:: node option; PCMK_stderr + + PCMK_stderr + - :ref:`boolean ` + - no + - *Advanced Use Only:* Whether to send daemon log messages to stderr. This + would be useful only during troubleshooting, when starting Pacemaker + manually on the command line. + + Setting this option in the configuration file is pointless, since the + file is not read when starting Pacemaker manually. However, it can be set + directly as an environment variable on the command line. + + * - .. _pcmk_trace_functions: + + .. index:: + pair:: node option; PCMK_trace_functions + + PCMK_trace_functions + - :ref:`text ` + - + - *Advanced Use Only:* Send debug and trace severity messages from these + (comma-separated) source code functions to the detail log. + + Example: + ``PCMK_trace_functions="func1,func2"`` + + * - .. _pcmk_trace_files: + + .. index:: + pair:: node option; PCMK_trace_files + + PCMK_trace_files + - :ref:`text ` + - + - *Advanced Use Only:* Send debug and trace severity messages from all + functions in these (comma-separated) source file names to the detail log. + + Example: ``PCMK_trace_files="file1.c,file2.c"`` + + * - .. _pcmk_trace_formats: + + .. index:: + pair:: node option; PCMK_trace_formats + + PCMK_trace_formats + - :ref:`text ` + - + - *Advanced Use Only:* Send trace severity messages that are generated by + these (comma-separated) format strings in the source code to the detail + log. + + Example: ``PCMK_trace_formats="Error: %s (%d)"`` + + * - .. _pcmk_trace_tags: + + .. index:: + pair:: node option; PCMK_trace_tags + + PCMK_trace_tags + - :ref:`text ` + - + - *Advanced Use Only:* Send debug and trace severity messages related to + these (comma-separated) resource IDs to the detail log. + + Example: ``PCMK_trace_tags="client-ip,dbfs"`` + + * - .. _pcmk_blackbox: + + .. index:: + pair:: node option; PCMK_blackbox + + PCMK_blackbox + - :ref:`enumeration ` + - no + - *Advanced Use Only:* Enable blackbox logging globally (``yes`` or ``no``) + or by subsystem. A blackbox contains a rolling buffer of all logs (of all + severities). Blackboxes are stored under |CRM_BLACKBOX_DIR| by default, + by default, and their contents can be viewed using the ``qb-blackbox(8)`` + command. + + The blackbox recorder can be enabled at start using this variable, or at + runtime by sending a Pacemaker subsystem daemon process a ``SIGUSR1`` or + ``SIGTRAP`` signal, and disabled by sending ``SIGUSR2`` (see + ``kill(1)``). The blackbox will be written after a crash, assertion + failure, or ``SIGTRAP`` signal. + + See :ref:`PCMK_debug ` for allowed subsystems. + + Example: + ``PCMK_blackbox="pacemakerd,pacemaker-execd"`` + + * - .. _pcmk_trace_blackbox: + + .. index:: + pair:: node option; PCMK_trace_blackbox + + PCMK_trace_blackbox + - :ref:`enumeration ` + - + - *Advanced Use Only:* Write a blackbox whenever the message at the + specified function and line is logged. Multiple entries may be comma- + separated. + + Example: ``PCMK_trace_blackbox="remote.c:144,remote.c:149"`` + + * - .. _pcmk_node_start_state: + + .. index:: + pair:: node option; PCMK_node_start_state + + PCMK_node_start_state + - :ref:`enumeration ` + - default + - By default, the local host will join the cluster in an online or standby + state when Pacemaker first starts depending on whether it was previously + put into standby mode. If this variable is set to ``standby`` or + ``online``, it will force the local host to join in the specified state. + + * - .. _pcmk_node_action_limit: + + .. index:: + pair:: node option; PCMK_node_action_limit + + PCMK_node_action_limit + - :ref:`nonnegative integer ` + - + - Specify the maximum number of jobs that can be scheduled on this node. If + set, this overrides the ``node-action-limit`` cluster property for this + node. + + * - .. _pcmk_shutdown_delay: + + .. index:: + pair:: node option; PCMK_shutdown_delay + + PCMK_shutdown_delay + - :ref:`timeout ` + - + - Specify a delay before shutting down ``pacemakerd`` after shutting down + all other Pacemaker daemons. + + * - .. _pcmk_fail_fast: + + .. index:: + pair:: node option; PCMK_fail_fast + + PCMK_fail_fast + - :ref:`boolean ` + - no + - By default, if a Pacemaker subsystem crashes, the main ``pacemakerd`` + process will attempt to restart it. If this variable is set to ``yes``, + ``pacemakerd`` will panic the local host instead. + + * - .. _pcmk_panic_action: + + .. index:: + pair:: node option; PCMK_panic_action + + PCMK_panic_action + - :ref:`enumeration ` + - reboot + - Pacemaker will panic the local host under certain conditions. By default, + this means rebooting the host. This variable can change that behavior: if + ``crash``, trigger a kernel crash (useful if you want a kernel dump to + investigate); if ``sync-reboot`` or ``sync-crash``, synchronize + filesystems before rebooting the host or triggering a kernel crash. The + sync values are more likely to preserve log messages, but with the risk + that the host may be left active if the synchronization hangs. + + * - .. _pcmk_authkey_location: + + .. index:: + pair:: node option; PCMK_authkey_location + + PCMK_authkey_location + - :ref:`text ` + - |PCMK_AUTHKEY_FILE| + - Use the contents of this file as the authorization key to use with + Pacemaker Remote connections. This file must be readable by Pacemaker + daemons (that is, it must allow read permissions to either the + |CRM_DAEMON_USER| user or the |CRM_DAEMON_GROUP| group), and its contents + must be identical on all nodes. + + * - .. _pcmk_remote_address: + + .. index:: + pair:: node option; PCMK_remote_address + + PCMK_remote_address + - :ref:`text ` + - + - By default, if the Pacemaker Remote service is run on the local node, it + will listen for connections on all IP addresses. This may be set to one + address to listen on instead, as a resolvable hostname or as a numeric + IPv4 or IPv6 address. When resolving names or listening on all addresses, + IPv6 will be preferred if available. When listening on an IPv6 address, + IPv4 clients will be supported via IPv4-mapped IPv6 addresses. + + Example: ``PCMK_remote_address="192.0.2.1"`` + + * - .. _pcmk_remote_port: + + .. index:: + pair:: node option; PCMK_remote_port + + PCMK_remote_port + - :ref:`port ` + - 3121 + - Use this TCP port number for Pacemaker Remote node connections. This + value must be the same on all nodes. + + * - .. _pcmk_remote_pid1: + + .. index:: + pair:: node option; PCMK_remote_pid1 + + PCMK_remote_pid1 + - :ref:`enumeration ` + - default + - *Advanced Use Only:* When a bundle resource's ``run-command`` option is + left to default, Pacemaker Remote runs as PID 1 in the bundle's + containers. When it does so, it loads environment variables from the + container's |PCMK_INIT_ENV_FILE| and performs the PID 1 responsibility of + reaping dead subprocesses. + + This option controls whether those actions are performed when Pacemaker + Remote is not running as PID 1. It is intended primarily for developer + testing but can be useful when ``run-command`` is set to a separate, + custom PID 1 process that launches Pacemaker Remote. + + * ``full``: Pacemaker Remote loads environment variables from + |PCMK_INIT_ENV_FILE| and reaps dead subprocesses. + * ``vars``: Pacemaker Remote loads environment variables from + |PCMK_INIT_ENV_FILE| but does not reap dead subprocesses. + * ``default``: Pacemaker Remote performs neither action. + + If Pacemaker Remote is running as PID 1, this option is ignored, and the + behavior is the same as for ``full``. + + * - .. _pcmk_tls_priorities: + + .. index:: + pair:: node option; PCMK_tls_priorities + + PCMK_tls_priorities + - :ref:`text ` + - |PCMK_GNUTLS_PRIORITIES| + - *Advanced Use Only:* These GnuTLS cipher priorities will be used for TLS + connections (whether for Pacemaker Remote connections or remote CIB + access, when enabled). See: + + https://gnutls.org/manual/html_node/Priority-Strings.html + + Pacemaker will append ``":+ANON-DH"`` for remote CIB access and + ``":+DHE-PSK:+PSK"`` for Pacemaker Remote connections, as they are + required for the respective functionality. + + Example: + ``PCMK_tls_priorities="SECURE128:+SECURE192"`` + + * - .. _pcmk_dh_min_bits: + + .. index:: + pair:: node option; PCMK_dh_min_bits + + PCMK_dh_min_bits + - :ref:`nonnegative integer ` + - 0 (no minimum) + - *Advanced Use Only:* Set a lower bound on the bit length of the prime + number generated for Diffie-Hellman parameters needed by TLS connections. + The default is no minimum. + + The server (Pacemaker Remote daemon, or CIB manager configured to accept + remote clients) will use this value to provide a floor for the value + recommended by the GnuTLS library. The library will only accept a limited + number of specific values, which vary by library version, so setting + these is recommended only when required for compatibility with specific + client versions. + + Clients (connecting cluster nodes or remote CIB commands) will require + that the server use a prime of at least this size. This is recommended + only when the value must be lowered in order for the client's GnuTLS + library to accept a connection to an older server. + + * - .. _pcmk_dh_max_bits: + + .. index:: + pair:: node option; PCMK_dh_max_bits + + PCMK_dh_max_bits + - :ref:`nonnegative integer ` + - 0 (no maximum) + - *Advanced Use Only:* Set an upper bound on the bit length of the prime + number generated for Diffie-Hellman parameters needed by TLS connections. + The default is no maximum. + + The server (Pacemaker Remote daemon, or CIB manager configured to accept + remote clients) will use this value to provide a ceiling for the value + recommended by the GnuTLS library. The library will only accept a limited + number of specific values, which vary by library version, so setting + these is recommended only when required for compatibility with specific + client versions. + + Clients do not use ``PCMK_dh_max_bits``. + + * - .. _pcmk_ipc_type: + + .. index:: + pair:: node option; PCMK_ipc_type + + PCMK_ipc_type + - :ref:`enumeration ` + - shared-mem + - *Advanced Use Only:* Force use of a particular IPC method. Allowed values: + + * ``shared-mem`` + * ``socket`` + * ``posix`` + * ``sysv`` + + * - .. _pcmk_ipc_buffer: + + .. index:: + pair:: node option; PCMK_ipc_buffer + + PCMK_ipc_buffer + - :ref:`nonnegative integer ` + - 131072 + - *Advanced Use Only:* Specify an IPC buffer size in bytes. This can be + useful when connecting to large clusters that result in messages + exceeding the default size (which will also result in log messages + referencing this variable). + + * - .. _pcmk_cluster_type: + + .. index:: + pair:: node option; PCMK_cluster_type + + PCMK_cluster_type + - :ref:`enumeration ` + - corosync + - *Advanced Use Only:* Specify the cluster layer to be used. If unset, + Pacemaker will detect and use a supported cluster layer, if available. + Currently, ``"corosync"`` is the only supported cluster layer. If + multiple layers are supported in the future, this will allow overriding + Pacemaker's automatic detection to select a specific one. + + * - .. _pcmk_schema_directory: + + .. index:: + pair:: node option; PCMK_schema_directory + + PCMK_schema_directory + - :ref:`text ` + - |CRM_SCHEMA_DIRECTORY| + - *Advanced Use Only:* Specify an alternate location for RNG schemas and + XSL transforms. + + * - .. _pcmk_valgrind_enabled: + + .. index:: + pair:: node option; PCMK_valgrind_enabled + + PCMK_valgrind_enabled + - :ref:`enumeration ` + - no + - *Advanced Use Only:* Whether subsystem daemons should be run under + ``valgrind``. Allowed values are the same as for ``PCMK_debug``. + + * - .. _pcmk_callgrind_enabled: + + .. index:: + pair:: node option; PCMK_callgrind_enabled + + PCMK_callgrind_enabled + - :ref:`enumeration ` + - no + - *Advanced Use Only:* Whether subsystem daemons should be run under + ``valgrind`` with the ``callgrind`` tool enabled. Allowed values are the + same as for ``PCMK_debug``. + + * - .. _valgrind_opts: + + .. index:: + pair:: node option; VALGRIND_OPTS + + VALGRIND_OPTS + - :ref:`text ` + - + - *Advanced Use Only:* Pass these options to valgrind, when enabled (see + ``valgrind(1)``). ``"--vgdb=no"`` should usually be specified because + ``pacemaker-execd`` can lower privileges when executing commands, which + would otherwise leave a bunch of unremovable files in ``/tmp``. diff --git a/doc/sphinx/conf.py.in b/doc/sphinx/conf.py.in index 7d843d82b7..7d74cbf05b 100644 --- a/doc/sphinx/conf.py.in +++ b/doc/sphinx/conf.py.in @@ -1,319 +1,328 @@ """ Sphinx configuration for Pacemaker documentation """ __copyright__ = "Copyright 2020-2023 the Pacemaker project contributors" __license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY" # This file is execfile()d with the current directory set to its containing dir. # # Note that not all possible configuration values are present in this # autogenerated file. # # All configuration values have a default; values that are commented out # serve to show the default. import datetime import os import sys # Variables that can be used later in this file authors = "the Pacemaker project contributors" year = datetime.datetime.now().year doc_license = "Creative Commons Attribution-ShareAlike International Public License" doc_license += " version 4.0 or later (CC-BY-SA v4.0+)" # rST markup to insert at beginning of every document; mainly used for # # .. || replace:: # # where occurrences of || in the rST will be substituted with rst_prolog=""" .. |CFS_DISTRO| replace:: AlmaLinux .. |CFS_DISTRO_VER| replace:: 9 +.. |CRM_BLACKBOX_DIR| replace:: ``%CRM_BLACKBOX_DIR%`` +.. |CRM_DAEMON_GROUP| replace:: ``%CRM_DAEMON_GROUP%`` +.. |CRM_DAEMON_USER| replace:: ``%CRM_DAEMON_USER%`` +.. |CRM_SCHEMA_DIRECTORY| replace:: %CRM_SCHEMA_DIRECTORY% +.. |PCMK_AUTHKEY_FILE| replace:: %PACEMAKER_CONFIG_DIR%/authkey +.. |PCMK_CONFIG_FILE| replace:: ``%CONFIGDIR%/pacemaker`` +.. |PCMK_INIT_ENV_FILE| replace:: ``%PACEMAKER_CONFIG_DIR%/pcmk-init.env`` +.. |PCMK_LOG_FILE| replace:: %CRM_LOG_DIR%/pacemaker.log +.. |PCMK_GNUTLS_PRIORITIES| replace:: %PCMK_GNUTLS_PRIORITIES% .. |REMOTE_DISTRO| replace:: AlmaLinux .. |REMOTE_DISTRO_VER| replace:: 9 """ # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. sys.path.insert(0, os.path.abspath('%ABS_TOP_SRCDIR%/python')) # -- General configuration ----------------------------------------------------- # If your documentation needs a minimal Sphinx version, state it here. needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. extensions = ['sphinx.ext.autodoc', 'sphinx.ext.autosummary'] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] # The suffix of source filenames. source_suffix = '.rst' # The encoding of source files. #source_encoding = 'utf-8-sig' # The master toctree document. master_doc = 'index' # General information about the project. project = '%BOOK_ID%' copyright = "2009-%s %s. Released under the terms of the %s" % (year, authors, doc_license) # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The full version, including alpha/beta/rc tags. release = '%VERSION%' # The short X.Y version. version = release.rsplit('.', 1)[0] # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. #language = None # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: #today = '' # Else, today_fmt is used as the format for a strftime call. #today_fmt = '%B %d, %Y' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. exclude_patterns = ['_build'] # The reST default role (used for this markup: `text`) to use for all documents. #default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. #add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). #add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. #show_authors = False # The name of the Pygments (syntax highlighting) style to use. pygments_style = 'vs' # A list of ignored prefixes for module index sorting. #modindex_common_prefix = [] # -- Options for HTML output --------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. html_theme = 'pyramid' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. #html_theme_options = {} # Add any paths that contain custom themes here, relative to this directory. #html_theme_path = [] html_style = 'pacemaker.css' # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". html_title = "%BOOK_TITLE%" # A shorter title for the navigation bar. Default is the same as html_title. #html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. #html_logo = None # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. #html_favicon = None # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = [ '%SRC_DIR%/_static' ] # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. #html_last_updated_fmt = '%b %d, %Y' # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. #html_use_smartypants = True # Custom sidebar templates, maps document names to template names. #html_sidebars = {} # Additional templates that should be rendered to pages, maps page names to # template names. #html_additional_pages = {} # If false, no module index is generated. #html_domain_indices = True # If false, no index is generated. #html_use_index = True # If true, the index is split into individual pages for each letter. #html_split_index = False # If true, links to the reST sources are added to the pages. #html_show_sourcelink = True # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. #html_show_sphinx = True # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. #html_show_copyright = True # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. #html_use_opensearch = '' # This is the file name suffix for HTML files (e.g. ".xhtml"). #html_file_suffix = None # Output file base name for HTML help builder. htmlhelp_basename = 'Pacemakerdoc' # -- Options for LaTeX output -------------------------------------------------- latex_engine = "xelatex" latex_elements = { # The paper size ('letterpaper' or 'a4paper'). #'papersize': 'letterpaper', # The font size ('10pt', '11pt' or '12pt'). #'pointsize': '10pt', # Additional stuff for the LaTeX preamble. #'preamble': '', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, documentclass [howto/manual]). latex_documents = [ ('index', '%BOOK_ID%.tex', '%BOOK_TITLE%', authors, 'manual'), ] # The name of an image file (relative to this directory) to place at the top of # the title page. #latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. #latex_use_parts = False # If true, show page references after internal links. #latex_show_pagerefs = False # If true, show URL addresses after external links. #latex_show_urls = False # Documents to append as an appendix to all manuals. #latex_appendices = [] # If false, no module index is generated. #latex_domain_indices = True # -- Options for manual page output -------------------------------------------- # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ ('index', '%BOOK_ID%', 'Part of the Pacemaker documentation set', [authors], 8) ] # If true, show URL addresses after external links. #man_show_urls = False # -- Options for Texinfo output ------------------------------------------------ # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ ('index', '%BOOK_ID%', '%BOOK_TITLE%', authors, '%BOOK_TITLE%', 'Pacemaker is an advanced, scalable high-availability cluster resource manager.', 'Miscellaneous'), ] # Documents to append as an appendix to all manuals. #texinfo_appendices = [] # If false, no module index is generated. #texinfo_domain_indices = True # How to display URL addresses: 'footnote', 'no', or 'inline'. #texinfo_show_urls = 'footnote' # -- Options for Epub output --------------------------------------------------- # Bibliographic Dublin Core info. epub_title = '%BOOK_TITLE%' epub_author = authors epub_publisher = 'ClusterLabs.org' epub_copyright = copyright # The language of the text. It defaults to the language option # or en if the language is not set. #epub_language = '' # The scheme of the identifier. Typical schemes are ISBN or URL. epub_scheme = 'URL' # The unique identifier of the text. This can be a ISBN number # or the project homepage. epub_identifier = 'https://www.clusterlabs.org/pacemaker/doc/2.1/%BOOK_ID%/epub/%BOOK_ID%.epub' # A unique identification for the text. epub_uid = 'ClusterLabs.org-Pacemaker-%BOOK_ID%' # A tuple containing the cover image and cover page html template filenames. #epub_cover = () # HTML files that should be inserted before the pages created by sphinx. # The format is a list of tuples containing the path and title. #epub_pre_files = [] # HTML files that should be inserted after the pages created by sphinx. # The format is a list of tuples containing the path and title. #epub_post_files = [] # A list of files that should not be packed into the epub file. epub_exclude_files = [ '_static/doctools.js', '_static/jquery.js', '_static/searchtools.js', '_static/underscore.js', '_static/basic.css', '_static/websupport.js', 'search.html', ] # The depth of the table of contents in toc.ncx. #epub_tocdepth = 3 # Allow duplicate toc entries. #epub_tocdup = True autosummary_generate = True diff --git a/etc/sysconfig/pacemaker.in b/etc/sysconfig/pacemaker.in index 041da7195f..0c3609d8e7 100644 --- a/etc/sysconfig/pacemaker.in +++ b/etc/sysconfig/pacemaker.in @@ -1,338 +1,393 @@ # # Pacemaker start-up configuration # # This file contains environment variables that affect Pacemaker behavior. # They are not options stored in the Cluster Information Base (CIB) because # they may be needed before the CIB is available. # ## Logging # PCMK_logfacility # # Enable logging via the system log or journal, using the specified log # facility. Messages sent here are of value to all Pacemaker administrators. # This can be disabled using "none", but that is not recommended. Allowed # values: # # none # daemon # user # local0 # local1 # local2 # local3 # local4 # local5 # local6 # local7 # # Default: PCMK_logfacility="daemon" # PCMK_logpriority # # Unless system logging is disabled using PCMK_logfacility=none, messages of # the specified log severity and higher will be sent to the system log. The # default is appropriate for most installations. Allowed values: # # emerg # alert # crit # error # warning # notice # info # debug # # Default: PCMK_logpriority="notice" # PCMK_logfile # # Unless set to "none", more detailed log messages will be sent to the # specified file (in addition to the system log, if enabled). These messages # may have extended information, and will include messages of info severity. # This log is of more use to developers and advanced system administrators, and # when reporting problems. # # Default: PCMK_logfile="@CRM_LOG_DIR@/pacemaker.log" # PCMK_logfile_mode # # Pacemaker will set the permissions on the detail log to this value (see # chmod(1)). # # Default: PCMK_logfile_mode="0660" # PCMK_debug (Advanced Use Only) # # Whether to send debug severity messages to the detail log. # This may be set for all subsystems (yes or no) or for specific # (comma-separated) subsystems. Allowed subsystems are: # # pacemakerd # pacemaker-attrd # pacemaker-based # pacemaker-controld # pacemaker-execd # pacemaker-fenced # pacemaker-schedulerd # # Default: PCMK_debug="no" # Example: PCMK_debug="pacemakerd,pacemaker-execd" +# PCMK_stderr (Advanced Use Only) +# +# Whether to send daemon log messages to stderr. This would be useful only +# during troubleshooting, when starting Pacemaker manually on the command line. +# +# Setting this option in this file is pointless, since this file is not read +# when starting Pacemaker manually. However, it can be set directly as an +# environment variable on the command line. +# +# Default: PCMK_stderr="no" + # PCMK_trace_functions (Advanced Use Only) # # Send debug and trace severity messages from these (comma-separated) # source code functions to the detail log. # # Default: PCMK_trace_functions="" # Example: PCMK_trace_functions="unpack_colocation_set,pcmk__cmp_instance" # PCMK_trace_files (Advanced Use Only) # # Send debug and trace severity messages from all functions in these # (comma-separated) source file names to the detail log. # # Default: PCMK_trace_files="" # Example: PCMK_trace_files="remote.c,watchdog.c" # PCMK_trace_formats (Advanced Use Only) # # Send trace severity messages that are generated by these (comma-separated) # format strings in the source code to the detail log. # # Default: PCMK_trace_formats="" # Example: PCMK_trace_formats="TLS handshake failed: %s (%d)" # PCMK_trace_tags (Advanced Use Only) # # Send debug and trace severity messages related to these (comma-separated) # resource IDs to the detail log. # # Default: PCMK_trace_tags="" # Example: PCMK_trace_tags="client-ip,dbfs" # PCMK_blackbox (Advanced Use Only) # # Enable blackbox logging globally (yes or no) or by subsystem. A blackbox # contains a rolling buffer of all logs (of all severities). Blackboxes are # stored under @CRM_BLACKBOX_DIR@ by default, and their contents can # be viewed using the qb-blackbox(8) command. # # The blackbox recorder can be enabled at start using this variable, or at # runtime by sending a Pacemaker subsystem daemon process a SIGUSR1 or SIGTRAP # signal, and disabled by sending SIGUSR2 (see kill(1)). The blackbox will be # written after a crash, assertion failure, or SIGTRAP signal. # # Default: PCMK_blackbox="no" # Example: PCMK_blackbox="pacemaker-controld,pacemaker-fenced" # PCMK_trace_blackbox (Advanced Use Only) # # Write a blackbox whenever the message at the specified function and line is # logged. Multiple entries may be comma-separated. # # Default: PCMK_trace_blackbox="" # Example: PCMK_trace_blackbox="remote.c:144,remote.c:149" -## Node start state +## Option overrides # PCMK_node_start_state # # By default, the local host will join the cluster in an online or standby # state when Pacemaker first starts depending on whether it was previously put # into standby mode. If this variable is set to "standby" or "online", it will # force the local host to join in the specified state. # # Default: PCMK_node_start_state="default" +# PCMK_node_action_limit +# +# Specify the maximum number of jobs that can be scheduled on this node. If set, +# this overrides the node-action-limit cluster property for this node. +# +# Default: PCMK_node_action_limit="" + ## Crash Handling # PCMK_fail_fast # # By default, if a Pacemaker subsystem crashes, the main pacemakerd process # will attempt to restart it. If this variable is set to "yes", pacemakerd # will panic the local host instead. # # Default: PCMK_fail_fast="no" # PCMK_panic_action # # Pacemaker will panic the local host under certain conditions. By default, # this means rebooting the host. This variable can change that behavior: if # "crash", trigger a kernel crash (useful if you want a kernel dump to # investigate); if "sync-reboot" or "sync-crash", synchronize filesystems # before rebooting the host or triggering a kernel crash. The sync values are # more likely to preserve log messages, but with the risk that the host may be # left active if the synchronization hangs. # # Default: PCMK_panic_action="reboot" ## Pacemaker Remote # PCMK_authkey_location # # Use the contents of this file as the authorization key to use with Pacemaker # Remote connections. This file must be readable by Pacemaker daemons (that is, -# it must allow read permissions to either the hacluster user or the haclient -# group), and its contents must be identical on all nodes. +# it must allow read permissions to either the @CRM_DAEMON_USER@ user or the +# @CRM_DAEMON_GROUP@ group), and its contents must be identical on all nodes. # # Default: PCMK_authkey_location="@PACEMAKER_CONFIG_DIR@/authkey" # PCMK_remote_address # # By default, if the Pacemaker Remote service is run on the local node, it will # listen for connections on all IP addresses. This may be set to one address to # listen on instead, as a resolvable hostname or as a numeric IPv4 or IPv6 # address. When resolving names or listening on all addresses, IPv6 will be # preferred if available. When listening on an IPv6 address, IPv4 clients will # be supported via IPv4-mapped IPv6 addresses. # # Default: PCMK_remote_address="" # Example: PCMK_remote_address="192.0.2.1" # PCMK_remote_port # # Use this TCP port number for Pacemaker Remote node connections. This value # must be the same on all nodes. # # Default: PCMK_remote_port="3121" +# PCMK_remote_pid1 (Advanced Use Only) +# +# When a bundle resource's "run-command" option is left to default, Pacemaker +# Remote runs as PID 1 in the bundle's containers. When it does so, it loads +# environment variables from the container's +# @PACEMAKER_CONFIG_DIR@/pcmk-init.env and performs the PID 1 responsibility of +# reaping dead subprocesses. +# +# This option controls whether those actions are performed when Pacemaker +# Remote is not running as PID 1. It is intended primarily for developer testing +# but can be useful when "run-command" is set to a separate, custom PID 1 +# process that launches Pacemaker Remote. +# +# * If set to "full", Pacemaker Remote loads environment variables from +# @PACEMAKER_CONFIG_DIR@/pcmk-init.env and reaps dead subprocesses. +# * If set to "vars", Pacemaker Remote loads environment variables from +# @PACEMAKER_CONFIG_DIR@/pcmk-init.env but does not reap dead subprocesses. +# * If set to "default", Pacemaker Remote performs neither action. +# +# If Pacemaker Remote is running as PID 1, this option is ignored, and the +# behavior is the same as for "full". +# +# Default: PCMK_remote_pid1="default" + # PCMK_tls_priorities (Advanced Use Only) # # These GnuTLS cipher priorities will be used for TLS connections (whether for # Pacemaker Remote connections or remote CIB access, when enabled). See: # # https://gnutls.org/manual/html_node/Priority-Strings.html # # Pacemaker will append ":+ANON-DH" for remote CIB access and ":+DHE-PSK:+PSK" # for Pacemaker Remote connections, as they are required for the respective # functionality. # # Default: PCMK_tls_priorities="@PCMK_GNUTLS_PRIORITIES@" # Example: PCMK_tls_priorities="SECURE128:+SECURE192:-VERS-ALL:+VERS-TLS1.2" # PCMK_dh_min_bits (Advanced Use Only) # # Set a lower bound on the bit length of the prime number generated for # Diffie-Hellman parameters needed by TLS connections. The default is no # minimum. # # The server (Pacemaker Remote daemon, or CIB manager configured to accept # remote clients) will use this value to provide a floor for the value # recommended by the GnuTLS library. The library will only accept a limited # number of specific values, which vary by library version, so setting these is # recommended only when required for compatibility with specific client # versions. # # Clients (connecting cluster nodes or remote CIB commands) will require that # the server use a prime of at least this size. This is recommended only when # the value must be lowered in order for the client's GnuTLS library to accept # a connection to an older server. # -# Default: PCMK_dh_min_bits="1024" +# Default: PCMK_dh_min_bits="0" (no minimum) # PCMK_dh_max_bits (Advanced Use Only) # # Set an upper bound on the bit length of the prime number generated for # Diffie-Hellman parameters needed by TLS connections. The default is no # maximum. # # The server (Pacemaker Remote daemon, or CIB manager configured to accept # remote clients) will use this value to provide a ceiling for the value # recommended by the GnuTLS library. The library will only accept a limited # number of specific values, which vary by library version, so setting these is # recommended only when required for compatibility with specific client # versions. # # Clients do not use PCMK_dh_max_bits. # -# Default: PCMK_dh_max_bits="2048" +# Default: PCMK_dh_max_bits="0" (no maximum) ## Inter-process Communication # PCMK_ipc_type (Advanced Use Only) # # Force use of a particular IPC method. Allowed values: # # shared-mem # socket # posix # sysv # # Default: PCMK_ipc_type="shared-mem" # PCMK_ipc_buffer (Advanced Use Only) # # Specify an IPC buffer size in bytes. This can be useful when connecting to # large clusters that result in messages exceeding the default size (which will # also result in log messages referencing this variable). # # Default: PCMK_ipc_buffer="131072" +## Cluster type + +# PCMK_cluster_type (Advanced Use Only) +# +# Specify the cluster layer to be used. If unset, Pacemaker will detect and use +# a supported cluster layer, if available. Currently, "corosync" is the only +# supported cluster layer. If multiple layers are supported in the future, this +# will allow overriding Pacemaker's automatic detection to select a specific +# one. +# +# Default: PCMK_cluster_type="" + + ## Developer Options # PCMK_schema_directory (Advanced Use Only) # # Specify an alternate location for RNG schemas and XSL transforms. # # Default: PCMK_schema_directory="@CRM_SCHEMA_DIRECTORY@" # G_SLICE (Advanced Use Only) # # Affect the behavior of glib's memory allocator. Setting to "always-malloc" # when running under valgrind will help valgrind track malloc/free better; # setting to "debug-blocks" when not running under valgrind will perform # (somewhat expensive) memory checks. # # Default: G_SLICE="" # Example: G_SLICE="always-malloc" # MALLOC_PERTURB_ (Advanced Use Only) # # Setting this to a decimal byte value will make malloc() initialize newly # allocated memory and free() wipe it, to help catch uninitialized-memory and # use-after-free bugs. # # Default: MALLOC_PERTURB_="" # Example: MALLOC_PERTURB_="221" # MALLOC_CHECK_ (Advanced Use Only) # # Setting this to 3 will make malloc() and friends print to stderr and abort # for some (inexpensive) memory checks. # # Default: MALLOC_CHECK_="" # Example: MALLOC_CHECK_="3" # PCMK_valgrind_enabled (Advanced Use Only) # # Whether subsystem daemons should be run under valgrind. Allowed values are # the same as for PCMK_debug. # # Default: PCMK_valgrind_enabled="no" # PCMK_callgrind_enabled # # Whether subsystem daemons should be run under valgrind with the callgrind # tool enabled. Allowed values are the same as for PCMK_debug. # # Default: PCMK_callgrind_enabled="no" # VALGRIND_OPTS # # Pass these options to valgrind, when enabled (see valgrind(1)). "--vgdb=no" # is specified because pacemaker-execd can lower privileges when executing # commands, which would otherwise leave a bunch of unremovable files in /tmp. # # Default: VALGRIND_OPTS="" VALGRIND_OPTS="--leak-check=full --trace-children=no --vgdb=no --num-callers=25" VALGRIND_OPTS="$VALGRIND_OPTS --log-file=@CRM_PACEMAKER_DIR@/valgrind-%p" VALGRIND_OPTS="$VALGRIND_OPTS --suppressions=@datadir@/pacemaker/tests/valgrind-pcmk.suppressions" VALGRIND_OPTS="$VALGRIND_OPTS --gen-suppressions=all" diff --git a/include/crm/common/options_internal.h b/include/crm/common/options_internal.h index f80f8fcded..5c561fd1f8 100644 --- a/include/crm/common/options_internal.h +++ b/include/crm/common/options_internal.h @@ -1,146 +1,152 @@ /* * Copyright 2006-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PCMK__OPTIONS_INTERNAL__H # define PCMK__OPTIONS_INTERNAL__H # ifndef PCMK__CONFIG_H # define PCMK__CONFIG_H # include // _Noreturn # endif # include // GHashTable # include // bool _Noreturn void pcmk__cli_help(char cmd); /* * Environment variable option handling */ const char *pcmk__env_option(const char *option); void pcmk__set_env_option(const char *option, const char *value, bool compat); bool pcmk__env_option_enabled(const char *daemon, const char *option); /* * Cluster option handling */ typedef struct pcmk__cluster_option_s { const char *name; const char *alt_name; const char *type; const char *values; const char *default_value; bool (*is_valid)(const char *); const char *description_short; const char *description_long; } pcmk__cluster_option_t; const char *pcmk__cluster_option(GHashTable *options, const pcmk__cluster_option_t *option_list, int len, const char *name); gchar *pcmk__format_option_metadata(const char *name, const char *desc_short, const char *desc_long, pcmk__cluster_option_t *option_list, int len); void pcmk__validate_cluster_options(GHashTable *options, pcmk__cluster_option_t *option_list, int len); bool pcmk__valid_interval_spec(const char *value); bool pcmk__valid_boolean(const char *value); bool pcmk__valid_number(const char *value); bool pcmk__valid_positive_number(const char *value); bool pcmk__valid_quorum(const char *value); bool pcmk__valid_script(const char *value); bool pcmk__valid_percentage(const char *value); // from watchdog.c long pcmk__get_sbd_timeout(void); bool pcmk__get_sbd_sync_resource_startup(void); long pcmk__auto_watchdog_timeout(void); bool pcmk__valid_sbd_timeout(const char *value); // Constants for environment variable names #define PCMK__ENV_AUTHKEY_LOCATION "authkey_location" #define PCMK__ENV_BLACKBOX "blackbox" #define PCMK__ENV_CALLGRIND_ENABLED "callgrind_enabled" -#define PCMK__ENV_CIB_TIMEOUT "cib_timeout" #define PCMK__ENV_CLUSTER_TYPE "cluster_type" #define PCMK__ENV_DEBUG "debug" #define PCMK__ENV_DH_MAX_BITS "dh_max_bits" #define PCMK__ENV_DH_MIN_BITS "dh_min_bits" #define PCMK__ENV_FAIL_FAST "fail_fast" #define PCMK__ENV_IPC_BUFFER "ipc_buffer" #define PCMK__ENV_IPC_TYPE "ipc_type" #define PCMK__ENV_LOGFACILITY "logfacility" #define PCMK__ENV_LOGFILE "logfile" #define PCMK__ENV_LOGFILE_MODE "logfile_mode" #define PCMK__ENV_LOGPRIORITY "logpriority" #define PCMK__ENV_NODE_ACTION_LIMIT "node_action_limit" #define PCMK__ENV_NODE_START_STATE "node_start_state" #define PCMK__ENV_PANIC_ACTION "panic_action" #define PCMK__ENV_PHYSICAL_HOST "physical_host" #define PCMK__ENV_REMOTE_ADDRESS "remote_address" #define PCMK__ENV_REMOTE_PID1 "remote_pid1" #define PCMK__ENV_REMOTE_PORT "remote_port" #define PCMK__ENV_RESPAWNED "respawned" #define PCMK__ENV_SCHEMA_DIRECTORY "schema_directory" #define PCMK__ENV_SERVICE "service" -#define PCMK__ENV_SHUTDOWN_DELAY "shutdown_delay" #define PCMK__ENV_STDERR "stderr" #define PCMK__ENV_TLS_PRIORITIES "tls_priorities" #define PCMK__ENV_TRACE_BLACKBOX "trace_blackbox" #define PCMK__ENV_TRACE_FILES "trace_files" #define PCMK__ENV_TRACE_FORMATS "trace_formats" #define PCMK__ENV_TRACE_FUNCTIONS "trace_functions" #define PCMK__ENV_TRACE_TAGS "trace_tags" #define PCMK__ENV_VALGRIND_ENABLED "valgrind_enabled" +// @COMPAT Drop at 3.0.0; default is plenty +#define PCMK__ENV_CIB_TIMEOUT "cib_timeout" + // @COMPAT Drop at 3.0.0; likely last used in 1.1.24 #define PCMK__ENV_MCP "mcp" // @COMPAT Drop at 3.0.0; added unused in 1.1.9 #define PCMK__ENV_QUORUM_TYPE "quorum_type" +/* @COMPAT Drop at 3.0.0; added to debug shutdown issues when Pacemaker is + * managed by systemd, but no longer useful. + */ +#define PCMK__ENV_SHUTDOWN_DELAY "shutdown_delay" + // Constants for cluster option names #define PCMK__OPT_NODE_HEALTH_BASE "node-health-base" #define PCMK__OPT_NODE_HEALTH_GREEN "node-health-green" #define PCMK__OPT_NODE_HEALTH_RED "node-health-red" #define PCMK__OPT_NODE_HEALTH_STRATEGY "node-health-strategy" #define PCMK__OPT_NODE_HEALTH_YELLOW "node-health-yellow" // Constants for meta-attribute names #define PCMK__META_ALLOW_UNHEALTHY_NODES "allow-unhealthy-nodes" // Constants for enumerated values for various options #define PCMK__VALUE_CLUSTER "cluster" #define PCMK__VALUE_CUSTOM "custom" #define PCMK__VALUE_FENCING "fencing" #define PCMK__VALUE_GREEN "green" #define PCMK__VALUE_LOCAL "local" #define PCMK__VALUE_MIGRATE_ON_RED "migrate-on-red" #define PCMK__VALUE_NONE "none" #define PCMK__VALUE_NOTHING "nothing" #define PCMK__VALUE_ONLY_GREEN "only-green" #define PCMK__VALUE_PROGRESSIVE "progressive" #define PCMK__VALUE_QUORUM "quorum" #define PCMK__VALUE_RED "red" #define PCMK__VALUE_UNFENCING "unfencing" #define PCMK__VALUE_YELLOW "yellow" #endif // PCMK__OPTIONS_INTERNAL__H