diff --git a/daemons/attrd/attrd_cib.c b/daemons/attrd/attrd_cib.c index 82db59e0ee..4c76e9e696 100644 --- a/daemons/attrd/attrd_cib.c +++ b/daemons/attrd/attrd_cib.c @@ -1,685 +1,685 @@ /* * Copyright 2013-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include // PRIu32 #include #include #include #include // cib__* #include #include #include #include #include // pcmk__get_node() #include "pacemaker-attrd.h" static int last_cib_op_done = 0; static void write_attribute(attribute_t *a, bool ignore_delay); static void attrd_cib_destroy_cb(gpointer user_data) { cib_t *cib = user_data; cib->cmds->signoff(cib); if (attrd_shutting_down(false)) { crm_info("Disconnected from the CIB manager"); } else { // @TODO This should trigger a reconnect, not a shutdown crm_crit("Lost connection to the CIB manager, shutting down"); attrd_exit_status = CRM_EX_DISCONNECT; attrd_shutdown(0); } } static void attrd_cib_updated_cb(const char *event, xmlNode *msg) { const xmlNode *patchset = NULL; const char *client_name = NULL; bool status_changed = false; if (attrd_shutting_down(true)) { crm_debug("Ignoring CIB change during shutdown"); return; } if (cib__get_notify_patchset(msg, &patchset) != pcmk_rc_ok) { return; } if (cib__element_in_patchset(patchset, PCMK_XE_ALERTS)) { mainloop_set_trigger(attrd_config_read); } status_changed = cib__element_in_patchset(patchset, PCMK_XE_STATUS); client_name = crm_element_value(msg, PCMK__XA_CIB_CLIENTNAME); if (!cib__client_triggers_refresh(client_name)) { /* This change came from a source that ensured the CIB is consistent * with our attributes table, so we don't need to write anything out. */ return; } if (!attrd_election_won()) { // Don't write attributes if we're not the writer return; } if (status_changed || cib__element_in_patchset(patchset, PCMK_XE_NODES)) { /* An unsafe client modified the PCMK_XE_NODES or PCMK_XE_STATUS * section. Write transient attributes to ensure they're up-to-date in * the CIB. */ if (client_name == NULL) { client_name = crm_element_value(msg, PCMK__XA_CIB_CLIENTID); } crm_notice("Updating all attributes after %s event triggered by %s", event, pcmk__s(client_name, "(unidentified client)")); attrd_write_attributes(attrd_write_all); } } int attrd_cib_connect(int max_retry) { static int attempts = 0; int rc = -ENOTCONN; the_cib = cib_new(); if (the_cib == NULL) { return -ENOTCONN; } do { if (attempts > 0) { sleep(attempts); } attempts++; crm_debug("Connection attempt %d to the CIB manager", attempts); rc = the_cib->cmds->signon(the_cib, crm_system_name, cib_command); } while ((rc != pcmk_ok) && (attempts < max_retry)); if (rc != pcmk_ok) { crm_err("Connection to the CIB manager failed: %s " QB_XS " rc=%d", pcmk_strerror(rc), rc); goto cleanup; } crm_debug("Connected to the CIB manager after %d attempts", attempts); rc = the_cib->cmds->set_connection_dnotify(the_cib, attrd_cib_destroy_cb); if (rc != pcmk_ok) { crm_err("Could not set disconnection callback"); goto cleanup; } rc = the_cib->cmds->add_notify_callback(the_cib, PCMK__VALUE_CIB_DIFF_NOTIFY, attrd_cib_updated_cb); if (rc != pcmk_ok) { crm_err("Could not set CIB notification callback"); goto cleanup; } return pcmk_ok; cleanup: cib__clean_up_connection(&the_cib); return -ENOTCONN; } void attrd_cib_disconnect(void) { CRM_CHECK(the_cib != NULL, return); the_cib->cmds->del_notify_callback(the_cib, PCMK__VALUE_CIB_DIFF_NOTIFY, attrd_cib_updated_cb); cib__clean_up_connection(&the_cib); mainloop_destroy_trigger(attrd_config_read); } static void attrd_erase_cb(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { const char *node = pcmk__s((const char *) user_data, "a node"); if (rc == pcmk_ok) { crm_info("Cleared transient node attributes for %s from CIB", node); } else { crm_err("Unable to clear transient node attributes for %s from CIB: %s", node, pcmk_strerror(rc)); } } #define XPATH_TRANSIENT "//" PCMK__XE_NODE_STATE \ "[@" PCMK_XA_UNAME "='%s']" \ "/" PCMK__XE_TRANSIENT_ATTRIBUTES /*! * \internal * \brief Wipe all transient node attributes for a node from the CIB * * \param[in] node Node to clear attributes for */ void attrd_cib_erase_transient_attrs(const char *node) { int call_id = 0; char *xpath = NULL; CRM_CHECK(node != NULL, return); xpath = crm_strdup_printf(XPATH_TRANSIENT, node); crm_debug("Clearing transient node attributes for %s from CIB using %s", node, xpath); call_id = the_cib->cmds->remove(the_cib, xpath, NULL, cib_xpath); free(xpath); the_cib->cmds->register_callback_full(the_cib, call_id, 120, FALSE, pcmk__str_copy(node), "attrd_erase_cb", attrd_erase_cb, free); } /*! * \internal * \brief Prepare the CIB after cluster is connected */ void attrd_cib_init(void) { /* We have no attribute values in memory, so wipe the CIB to match. This is * normally done by the DC's controller when this node leaves the cluster, but * this handles the case where the node restarted so quickly that the * cluster layer didn't notice. * - * \todo If pacemaker-attrd respawns after crashing (see PCMK_ENV_RESPAWNED), - * ideally we'd skip this and sync our attributes from the writer. - * However, currently we reject any values for us that the writer has, in - * attrd_peer_update(). + * \todo If the attribute manager respawns after crashing (see + * PCMK_ENV_RESPAWNED), ideally we'd skip this and sync our attributes + * from the writer. However, currently we reject any values for us + * that the writer has, in attrd_peer_update(). */ attrd_cib_erase_transient_attrs(attrd_cluster->priv->node_name); // Set a trigger for reading the CIB (for the alerts section) attrd_config_read = mainloop_add_trigger(G_PRIORITY_HIGH, attrd_read_options, NULL); // Always read the CIB at start-up mainloop_set_trigger(attrd_config_read); } static gboolean attribute_timer_cb(gpointer data) { attribute_t *a = data; crm_trace("Dampen interval expired for %s", a->id); attrd_write_or_elect_attribute(a); return FALSE; } static void attrd_cib_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { int level = LOG_ERR; GHashTableIter iter; const char *peer = NULL; attribute_value_t *v = NULL; char *name = user_data; attribute_t *a = g_hash_table_lookup(attributes, name); if(a == NULL) { crm_info("Attribute %s no longer exists", name); return; } a->update = 0; if (rc == pcmk_ok && call_id < 0) { rc = call_id; } switch (rc) { case pcmk_ok: level = LOG_INFO; last_cib_op_done = call_id; if (a->timer && !a->timeout_ms) { // Remove temporary dampening for failed writes mainloop_timer_del(a->timer); a->timer = NULL; } break; case -pcmk_err_diff_failed: /* When an attr changes while the CIB is syncing */ case -ETIME: /* When an attr changes while there is a DC election */ case -ENXIO: /* When an attr changes while the CIB is syncing a * newer config from a node that just came up */ level = LOG_WARNING; break; } do_crm_log(level, "CIB update %d result for %s: %s " QB_XS " rc=%d", call_id, a->id, pcmk_strerror(rc), rc); g_hash_table_iter_init(&iter, a->values); while (g_hash_table_iter_next(&iter, (gpointer *) & peer, (gpointer *) & v)) { if (rc == pcmk_ok) { crm_info("* Wrote %s[%s]=%s", a->id, peer, pcmk__s(v->requested, "(unset)")); pcmk__str_update(&(v->requested), NULL); } else { do_crm_log(level, "* Could not write %s[%s]=%s", a->id, peer, pcmk__s(v->requested, "(unset)")); /* Reattempt write below if we are still the writer */ attrd_set_attr_flags(a, attrd_attr_changed); } } if (pcmk_is_set(a->flags, attrd_attr_changed) && attrd_election_won()) { if (rc == pcmk_ok) { /* We deferred a write of a new update because this update was in * progress. Write out the new value without additional delay. */ crm_debug("Pending update for %s can be written now", a->id); write_attribute(a, false); /* We're re-attempting a write because the original failed; delay * the next attempt so we don't potentially flood the CIB manager * and logs with a zillion attempts per second. * * @TODO We could elect a new writer instead. However, we'd have to * somehow downgrade our vote, and we'd still need something like this * if all peers similarly fail to write this attribute (which may * indicate a corrupted attribute entry rather than a CIB issue). */ } else if (a->timer) { // Attribute has a dampening value, so use that as delay if (!mainloop_timer_running(a->timer)) { crm_trace("Delayed re-attempted write for %s by %s", name, pcmk__readable_interval(a->timeout_ms)); mainloop_timer_start(a->timer); } } else { /* Set a temporary dampening of 2 seconds (timer will continue * to exist until the attribute's dampening gets set or the * write succeeds). */ a->timer = attrd_add_timer(a->id, 2000, a); mainloop_timer_start(a->timer); } } } /*! * \internal * \brief Add a set-attribute update request to the current CIB transaction * * \param[in] attr Attribute to update * \param[in] attr_id ID of attribute to update * \param[in] node_id ID of node for which to update attribute value * \param[in] set_id ID of attribute set * \param[in] value New value for attribute * * \return Standard Pacemaker return code */ static int add_set_attr_update(const attribute_t *attr, const char *attr_id, const char *node_id, const char *set_id, const char *value) { xmlNode *update = pcmk__xe_create(NULL, PCMK__XE_NODE_STATE); xmlNode *child = update; int rc = ENOMEM; crm_xml_add(child, PCMK_XA_ID, node_id); child = pcmk__xe_create(child, PCMK__XE_TRANSIENT_ATTRIBUTES); crm_xml_add(child, PCMK_XA_ID, node_id); child = pcmk__xe_create(child, attr->set_type); crm_xml_add(child, PCMK_XA_ID, set_id); child = pcmk__xe_create(child, PCMK_XE_NVPAIR); crm_xml_add(child, PCMK_XA_ID, attr_id); crm_xml_add(child, PCMK_XA_NAME, attr->id); crm_xml_add(child, PCMK_XA_VALUE, value); rc = the_cib->cmds->modify(the_cib, PCMK_XE_STATUS, update, cib_can_create|cib_transaction); rc = pcmk_legacy2rc(rc); pcmk__xml_free(update); return rc; } /*! * \internal * \brief Add an unset-attribute update request to the current CIB transaction * * \param[in] attr Attribute to update * \param[in] attr_id ID of attribute to update * \param[in] node_id ID of node for which to update attribute value * \param[in] set_id ID of attribute set * * \return Standard Pacemaker return code */ static int add_unset_attr_update(const attribute_t *attr, const char *attr_id, const char *node_id, const char *set_id) { char *xpath = crm_strdup_printf("/" PCMK_XE_CIB "/" PCMK_XE_STATUS "/" PCMK__XE_NODE_STATE "[@" PCMK_XA_ID "='%s']" "/" PCMK__XE_TRANSIENT_ATTRIBUTES "[@" PCMK_XA_ID "='%s']" "/%s[@" PCMK_XA_ID "='%s']" "/" PCMK_XE_NVPAIR "[@" PCMK_XA_ID "='%s' " "and @" PCMK_XA_NAME "='%s']", node_id, node_id, attr->set_type, set_id, attr_id, attr->id); int rc = the_cib->cmds->remove(the_cib, xpath, NULL, cib_xpath|cib_transaction); free(xpath); return pcmk_legacy2rc(rc); } /*! * \internal * \brief Add an attribute update request to the current CIB transaction * * \param[in] attr Attribute to update * \param[in] value New value for attribute * \param[in] node_id ID of node for which to update attribute value * * \return Standard Pacemaker return code */ static int add_attr_update(const attribute_t *attr, const char *value, const char *node_id) { char *set_id = attrd_set_id(attr, node_id); char *nvpair_id = attrd_nvpair_id(attr, node_id); int rc = pcmk_rc_ok; if (value == NULL) { rc = add_unset_attr_update(attr, nvpair_id, node_id, set_id); } else { rc = add_set_attr_update(attr, nvpair_id, node_id, set_id, value); } free(set_id); free(nvpair_id); return rc; } static void send_alert_attributes_value(attribute_t *a, GHashTable *t) { int rc = 0; attribute_value_t *at = NULL; GHashTableIter vIter; g_hash_table_iter_init(&vIter, t); while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & at)) { rc = attrd_send_attribute_alert(at->nodename, at->nodeid, a->id, at->current); crm_trace("Sent alerts for %s[%s]=%s: nodeid=%d rc=%d", a->id, at->nodename, at->current, at->nodeid, rc); } } static void set_alert_attribute_value(GHashTable *t, attribute_value_t *v) { attribute_value_t *a_v = pcmk__assert_alloc(1, sizeof(attribute_value_t)); a_v->nodeid = v->nodeid; a_v->nodename = pcmk__str_copy(v->nodename); a_v->current = pcmk__str_copy(v->current); g_hash_table_replace(t, a_v->nodename, a_v); } mainloop_timer_t * attrd_add_timer(const char *id, int timeout_ms, attribute_t *attr) { return mainloop_timer_add(id, timeout_ms, FALSE, attribute_timer_cb, attr); } /*! * \internal * \brief Write an attribute's values to the CIB if appropriate * * \param[in,out] a Attribute to write * \param[in] ignore_delay If true, write attribute now regardless of any * configured delay */ static void write_attribute(attribute_t *a, bool ignore_delay) { int private_updates = 0, cib_updates = 0; attribute_value_t *v = NULL; GHashTableIter iter; GHashTable *alert_attribute_value = NULL; int rc = pcmk_ok; if (a == NULL) { return; } /* If this attribute will be written to the CIB ... */ if (!stand_alone && !pcmk_is_set(a->flags, attrd_attr_is_private)) { /* Defer the write if now's not a good time */ if (a->update && (a->update < last_cib_op_done)) { crm_info("Write out of '%s' continuing: update %d considered lost", a->id, a->update); a->update = 0; // Don't log this message again } else if (a->update) { crm_info("Write out of '%s' delayed: update %d in progress", a->id, a->update); goto done; } else if (mainloop_timer_running(a->timer)) { if (ignore_delay) { mainloop_timer_stop(a->timer); crm_debug("Overriding '%s' write delay", a->id); } else { crm_info("Delaying write of '%s'", a->id); goto done; } } // Initiate a transaction for all the peer value updates CRM_CHECK(the_cib != NULL, goto done); the_cib->cmds->set_user(the_cib, a->user); rc = the_cib->cmds->init_transaction(the_cib); if (rc != pcmk_ok) { crm_err("Failed to write %s (set %s): Could not initiate " "CIB transaction", a->id, pcmk__s(a->set_id, "unspecified")); goto done; } } /* Attribute will be written shortly, so clear changed flag and force * write flag, and initialize UUID missing flag to false. */ attrd_clear_attr_flags(a, attrd_attr_changed|attrd_attr_uuid_missing|attrd_attr_force_write); /* Make the table for the attribute trap */ alert_attribute_value = pcmk__strikey_table(NULL, attrd_free_attribute_value); /* Iterate over each peer value of this attribute */ g_hash_table_iter_init(&iter, a->values); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &v)) { const char *uuid = NULL; if (pcmk_is_set(v->flags, attrd_value_remote)) { /* If this is a Pacemaker Remote node, the node's UUID is the same * as its name, which we already have. */ uuid = v->nodename; } else { // This will create a cluster node cache entry if none exists pcmk__node_status_t *peer = pcmk__get_node(v->nodeid, v->nodename, NULL, pcmk__node_search_any); uuid = peer->xml_id; // Remember peer's node ID if we're just now learning it if ((peer->cluster_layer_id != 0) && (v->nodeid == 0)) { crm_trace("Learned ID %" PRIu32 " for node %s", peer->cluster_layer_id, v->nodename); v->nodeid = peer->cluster_layer_id; } } /* If this is a private attribute, no update needs to be sent */ if (stand_alone || pcmk_is_set(a->flags, attrd_attr_is_private)) { private_updates++; continue; } // Defer write if this is a cluster node that's never been seen if (uuid == NULL) { attrd_set_attr_flags(a, attrd_attr_uuid_missing); crm_notice("Cannot update %s[%s]='%s' now because node's UUID is " "unknown (will retry if learned)", a->id, v->nodename, v->current); continue; } // Update this value as part of the CIB transaction we're building rc = add_attr_update(a, v->current, uuid); if (rc != pcmk_rc_ok) { crm_err("Failed to update %s[%s]='%s': %s " QB_XS " node uuid=%s id=%" PRIu32, a->id, v->nodename, v->current, pcmk_rc_str(rc), uuid, v->nodeid); continue; } crm_debug("Writing %s[%s]=%s (node-state-id=%s node-id=%" PRIu32 ")", a->id, v->nodename, pcmk__s(v->current, "(unset)"), uuid, v->nodeid); cib_updates++; /* Preservation of the attribute to transmit alert */ set_alert_attribute_value(alert_attribute_value, v); // Save this value so we can log it when write completes pcmk__str_update(&(v->requested), v->current); } if (private_updates) { crm_info("Processed %d private change%s for %s (set %s)", private_updates, pcmk__plural_s(private_updates), a->id, pcmk__s(a->set_id, "unspecified")); } if (cib_updates > 0) { char *id = pcmk__str_copy(a->id); // Commit transaction a->update = the_cib->cmds->end_transaction(the_cib, true, cib_none); crm_info("Sent CIB request %d with %d change%s for %s (set %s)", a->update, cib_updates, pcmk__plural_s(cib_updates), a->id, pcmk__s(a->set_id, "unspecified")); if (the_cib->cmds->register_callback_full(the_cib, a->update, CIB_OP_TIMEOUT_S, FALSE, id, "attrd_cib_callback", attrd_cib_callback, free)) { // Transmit alert of the attribute send_alert_attributes_value(a, alert_attribute_value); } } done: // Discard transaction (if any) if (the_cib != NULL) { the_cib->cmds->end_transaction(the_cib, false, cib_none); the_cib->cmds->set_user(the_cib, NULL); } if (alert_attribute_value != NULL) { g_hash_table_destroy(alert_attribute_value); } } /*! * \internal * \brief Write out attributes * * \param[in] options Group of enum attrd_write_options */ void attrd_write_attributes(uint32_t options) { GHashTableIter iter; attribute_t *a = NULL; crm_debug("Writing out %s attributes", pcmk_is_set(options, attrd_write_all)? "all" : "changed"); g_hash_table_iter_init(&iter, attributes); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & a)) { if (!pcmk_is_set(options, attrd_write_all) && pcmk_is_set(a->flags, attrd_attr_uuid_missing)) { // Try writing this attribute again, in case peer ID was learned attrd_set_attr_flags(a, attrd_attr_changed); } else if (pcmk_is_set(a->flags, attrd_attr_force_write)) { /* If the force_write flag is set, write the attribute. */ attrd_set_attr_flags(a, attrd_attr_changed); } if (pcmk_is_set(options, attrd_write_all) || pcmk_is_set(a->flags, attrd_attr_changed)) { bool ignore_delay = pcmk_is_set(options, attrd_write_no_delay); if (pcmk_is_set(a->flags, attrd_attr_force_write)) { // Always ignore delay when forced write flag is set ignore_delay = true; } write_attribute(a, ignore_delay); } else { crm_trace("Skipping unchanged attribute %s", a->id); } } } void attrd_write_or_elect_attribute(attribute_t *a) { if (attrd_election_won()) { write_attribute(a, false); } else { attrd_start_election_if_needed(); } } diff --git a/daemons/attrd/pacemaker-attrd.c b/daemons/attrd/pacemaker-attrd.c index 829e4fe370..c2acf83335 100644 --- a/daemons/attrd/pacemaker-attrd.c +++ b/daemons/attrd/pacemaker-attrd.c @@ -1,225 +1,225 @@ /* * Copyright 2013-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "pacemaker-attrd.h" #define SUMMARY "daemon for managing Pacemaker node attributes" gboolean stand_alone = FALSE; gchar **log_files = NULL; static GOptionEntry entries[] = { { "stand-alone", 's', G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE, &stand_alone, "(Advanced use only) Run in stand-alone mode", NULL }, { "logfile", 'l', G_OPTION_FLAG_NONE, G_OPTION_ARG_FILENAME_ARRAY, &log_files, "Send logs to the additional named logfile", NULL }, { NULL } }; static pcmk__output_t *out = NULL; static pcmk__supported_format_t formats[] = { PCMK__SUPPORTED_FORMAT_NONE, PCMK__SUPPORTED_FORMAT_TEXT, PCMK__SUPPORTED_FORMAT_XML, { NULL, NULL, NULL } }; lrmd_t *the_lrmd = NULL; pcmk_cluster_t *attrd_cluster = NULL; crm_trigger_t *attrd_config_read = NULL; crm_exit_t attrd_exit_status = CRM_EX_OK; static bool ipc_already_running(void) { pcmk_ipc_api_t *old_instance = NULL; int rc = pcmk_rc_ok; rc = pcmk_new_ipc_api(&old_instance, pcmk_ipc_attrd); if (rc != pcmk_rc_ok) { return false; } rc = pcmk__connect_ipc(old_instance, pcmk_ipc_dispatch_sync, 2); if (rc != pcmk_rc_ok) { crm_debug("No existing %s manager instance found: %s", pcmk_ipc_name(old_instance, true), pcmk_rc_str(rc)); pcmk_free_ipc_api(old_instance); return false; } pcmk_disconnect_ipc(old_instance); pcmk_free_ipc_api(old_instance); return true; } static GOptionContext * build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) { GOptionContext *context = NULL; context = pcmk__build_arg_context(args, "text (default), xml", group, NULL); pcmk__add_main_args(context, entries); return context; } int main(int argc, char **argv) { int rc = pcmk_rc_ok; GError *error = NULL; bool initialized = false; GOptionGroup *output_group = NULL; pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY); gchar **processed_args = pcmk__cmdline_preproc(argv, NULL); GOptionContext *context = build_arg_context(args, &output_group); attrd_init_mainloop(); crm_log_preinit(NULL, argc, argv); mainloop_add_signal(SIGTERM, attrd_shutdown); pcmk__register_formats(output_group, formats); if (!g_option_context_parse_strv(context, &processed_args, &error)) { attrd_exit_status = CRM_EX_USAGE; goto done; } rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv); if ((rc != pcmk_rc_ok) || (out == NULL)) { attrd_exit_status = CRM_EX_ERROR; g_set_error(&error, PCMK__EXITC_ERROR, attrd_exit_status, "Error creating output format %s: %s", args->output_ty, pcmk_rc_str(rc)); goto done; } if (args->version) { out->version(out, false); goto done; } // Open additional log files pcmk__add_logfiles(log_files, out); crm_log_init(PCMK__VALUE_ATTRD, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); crm_notice("Starting Pacemaker node attribute manager%s", stand_alone ? " in standalone mode" : ""); if (ipc_already_running()) { - const char *msg = "pacemaker-attrd is already active, aborting startup"; - attrd_exit_status = CRM_EX_OK; - g_set_error(&error, PCMK__EXITC_ERROR, attrd_exit_status, "%s", msg); - crm_err("%s", msg); + g_set_error(&error, PCMK__EXITC_ERROR, attrd_exit_status, + "Aborting start-up because an attribute manager " + "instance is already active"); + crm_crit("%s", error->message); goto done; } initialized = true; attributes = pcmk__strkey_table(NULL, attrd_free_attribute); /* Connect to the CIB before connecting to the cluster or listening for IPC. * This allows us to assume the CIB is connected whenever we process a * cluster or IPC message (which also avoids start-up race conditions). */ if (!stand_alone) { if (attrd_cib_connect(30) != pcmk_ok) { attrd_exit_status = CRM_EX_FATAL; g_set_error(&error, PCMK__EXITC_ERROR, attrd_exit_status, "Could not connect to the CIB"); goto done; } crm_info("CIB connection active"); } if (attrd_cluster_connect() != pcmk_ok) { attrd_exit_status = CRM_EX_FATAL; g_set_error(&error, PCMK__EXITC_ERROR, attrd_exit_status, "Could not connect to the cluster"); goto done; } crm_info("Cluster connection active"); // Initialization that requires the cluster to be connected attrd_election_init(); if (!stand_alone) { attrd_cib_init(); } /* Set a private attribute for ourselves with the protocol version we * support. This lets all nodes determine the minimum supported version * across all nodes. It also ensures that the writer learns our node name, * so it can send our attributes to the CIB. */ attrd_broadcast_protocol(); attrd_init_ipc(); crm_notice("Pacemaker node attribute manager successfully started and accepting connections"); attrd_run_mainloop(); done: if (initialized) { crm_info("Shutting down attribute manager"); attrd_election_fini(); attrd_ipc_fini(); attrd_lrmd_disconnect(); if (!stand_alone) { attrd_cib_disconnect(); } attrd_free_waitlist(); pcmk_cluster_disconnect(attrd_cluster); pcmk_cluster_free(attrd_cluster); g_hash_table_destroy(attributes); } g_strfreev(processed_args); pcmk__free_arg_context(context); g_strfreev(log_files); pcmk__output_and_clear_error(&error, out); if (out != NULL) { out->finish(out, attrd_exit_status, true, NULL); pcmk__output_free(out); } pcmk__unregister_formats(); crm_exit(attrd_exit_status); } diff --git a/daemons/based/based_io.c b/daemons/based/based_io.c index bf49af886c..d2215dba3c 100644 --- a/daemons/based/based_io.c +++ b/daemons/based/based_io.c @@ -1,478 +1,479 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include crm_trigger_t *cib_writer = NULL; int write_cib_contents(gpointer p); static void cib_rename(const char *old) { int new_fd; char *new = crm_strdup_printf("%s/cib.auto.XXXXXX", cib_root); umask(S_IWGRP | S_IWOTH | S_IROTH); new_fd = mkstemp(new); if ((new_fd < 0) || (rename(old, new) < 0)) { crm_err("Couldn't archive unusable file %s (disabling disk writes and continuing)", old); cib_writes_enabled = FALSE; } else { crm_err("Archived unusable file %s as %s", old, new); } if (new_fd > 0) { close(new_fd); } free(new); } /* * It is the callers responsibility to free the output of this function */ static xmlNode * retrieveCib(const char *filename, const char *sigfile) { xmlNode *root = NULL; crm_info("Reading cluster configuration file %s (digest: %s)", filename, sigfile); switch (cib_file_read_and_verify(filename, sigfile, &root)) { case -pcmk_err_cib_corrupt: crm_warn("Continuing but %s will NOT be used.", filename); break; case -pcmk_err_cib_modified: /* Archive the original files so the contents are not lost */ crm_warn("Continuing but %s will NOT be used.", filename); cib_rename(filename); cib_rename(sigfile); break; } return root; } /* * for OSs without support for direntry->d_type, like Solaris */ #ifndef DT_UNKNOWN # define DT_UNKNOWN 0 # define DT_FIFO 1 # define DT_CHR 2 # define DT_DIR 4 # define DT_BLK 6 # define DT_REG 8 # define DT_LNK 10 # define DT_SOCK 12 # define DT_WHT 14 #endif /*DT_UNKNOWN*/ static int cib_archive_filter(const struct dirent * a) { int rc = 0; /* Looking for regular files (d_type = 8) starting with 'cib-' and not ending in .sig */ struct stat s; char *a_path = crm_strdup_printf("%s/%s", cib_root, a->d_name); if(stat(a_path, &s) != 0) { rc = errno; crm_trace("%s - stat failed: %s (%d)", a->d_name, pcmk_rc_str(rc), rc); rc = 0; } else if ((s.st_mode & S_IFREG) != S_IFREG) { unsigned char dtype; #ifdef HAVE_STRUCT_DIRENT_D_TYPE dtype = a->d_type; #else switch (s.st_mode & S_IFMT) { case S_IFREG: dtype = DT_REG; break; case S_IFDIR: dtype = DT_DIR; break; case S_IFCHR: dtype = DT_CHR; break; case S_IFBLK: dtype = DT_BLK; break; case S_IFLNK: dtype = DT_LNK; break; case S_IFIFO: dtype = DT_FIFO; break; case S_IFSOCK: dtype = DT_SOCK; break; default: dtype = DT_UNKNOWN; break; } #endif crm_trace("%s - wrong type (%d)", a->d_name, dtype); } else if(strstr(a->d_name, "cib-") != a->d_name) { crm_trace("%s - wrong prefix", a->d_name); } else if (pcmk__ends_with_ext(a->d_name, ".sig")) { crm_trace("%s - wrong suffix", a->d_name); } else { crm_debug("%s - candidate", a->d_name); rc = 1; } free(a_path); return rc; } static int cib_archive_sort(const struct dirent ** a, const struct dirent **b) { /* Order by creation date - most recently created file first */ int rc = 0; struct stat buf; time_t a_age = 0; time_t b_age = 0; char *a_path = crm_strdup_printf("%s/%s", cib_root, a[0]->d_name); char *b_path = crm_strdup_printf("%s/%s", cib_root, b[0]->d_name); if(stat(a_path, &buf) == 0) { a_age = buf.st_ctime; } if(stat(b_path, &buf) == 0) { b_age = buf.st_ctime; } free(a_path); free(b_path); if(a_age > b_age) { rc = 1; } else if(a_age < b_age) { rc = -1; } crm_trace("%s (%lu) vs. %s (%lu) : %d", a[0]->d_name, (unsigned long)a_age, b[0]->d_name, (unsigned long)b_age, rc); return rc; } xmlNode * readCibXmlFile(const char *dir, const char *file, gboolean discard_status) { struct dirent **namelist = NULL; int lpc = 0; char *sigfile = NULL; char *sigfilepath = NULL; char *filename = NULL; const char *name = NULL; const char *value = NULL; const char *validation = NULL; const char *use_valgrind = pcmk__env_option(PCMK__ENV_VALGRIND_ENABLED); xmlNode *root = NULL; xmlNode *status = NULL; sigfile = crm_strdup_printf("%s.sig", file); if (pcmk__daemon_can_write(dir, file) == FALSE || pcmk__daemon_can_write(dir, sigfile) == FALSE) { cib_status = -EACCES; return NULL; } filename = crm_strdup_printf("%s/%s", dir, file); sigfilepath = crm_strdup_printf("%s/%s", dir, sigfile); free(sigfile); cib_status = pcmk_ok; root = retrieveCib(filename, sigfilepath); free(filename); free(sigfilepath); if (root == NULL) { crm_warn("Primary configuration corrupt or unusable, trying backups in %s", cib_root); lpc = scandir(cib_root, &namelist, cib_archive_filter, cib_archive_sort); if (lpc < 0) { crm_err("scandir(%s) failed: %s", cib_root, pcmk_rc_str(errno)); } } while (root == NULL && lpc > 1) { crm_debug("Testing %d candidates", lpc); lpc--; filename = crm_strdup_printf("%s/%s", cib_root, namelist[lpc]->d_name); sigfile = crm_strdup_printf("%s.sig", filename); crm_info("Reading cluster configuration file %s (digest: %s)", filename, sigfile); if (cib_file_read_and_verify(filename, sigfile, &root) < 0) { crm_warn("Continuing but %s will NOT be used.", filename); } else { crm_notice("Continuing with last valid configuration archive: %s", filename); } free(namelist[lpc]); free(filename); free(sigfile); } free(namelist); if (root == NULL) { root = createEmptyCib(0); crm_warn("Continuing with an empty configuration."); } - if (cib_writes_enabled && use_valgrind && - (crm_is_true(use_valgrind) || strstr(use_valgrind, "pacemaker-based"))) { + if (cib_writes_enabled && (use_valgrind != NULL) + && (crm_is_true(use_valgrind) + || (strstr(use_valgrind, PCMK__SERVER_BASED) != NULL))) { cib_writes_enabled = FALSE; crm_err("*** Disabling disk writes to avoid confusing Valgrind ***"); } status = pcmk__xe_first_child(root, PCMK_XE_STATUS, NULL, NULL); if (discard_status && status != NULL) { // Strip out the PCMK_XE_STATUS section if there is one pcmk__xml_free(status); status = NULL; } if (status == NULL) { pcmk__xe_create(root, PCMK_XE_STATUS); } /* Do this before schema validation happens */ /* fill in some defaults */ name = PCMK_XA_ADMIN_EPOCH; value = crm_element_value(root, name); if (value == NULL) { crm_warn("No value for %s was specified in the configuration.", name); crm_warn("The recommended course of action is to shutdown," " run crm_verify and fix any errors it reports."); crm_warn("We will default to zero and continue but may get" " confused about which configuration to use if" " multiple nodes are powered up at the same time."); crm_xml_add_int(root, name, 0); } name = PCMK_XA_EPOCH; value = crm_element_value(root, name); if (value == NULL) { crm_xml_add_int(root, name, 0); } name = PCMK_XA_NUM_UPDATES; value = crm_element_value(root, name); if (value == NULL) { crm_xml_add_int(root, name, 0); } // Unset (DC should set appropriate value) pcmk__xe_remove_attr(root, PCMK_XA_DC_UUID); if (discard_status) { crm_log_xml_trace(root, "[on-disk]"); } validation = crm_element_value(root, PCMK_XA_VALIDATE_WITH); if (!pcmk__configured_schema_validates(root)) { crm_err("CIB does not validate with %s", pcmk__s(validation, "no schema specified")); cib_status = -pcmk_err_schema_validation; // @COMPAT Not specifying validate-with is deprecated since 2.1.8 } else if (validation == NULL) { pcmk__update_schema(&root, NULL, false, false); validation = crm_element_value(root, PCMK_XA_VALIDATE_WITH); if (validation != NULL) { crm_notice("Enabling %s validation on" " the existing (sane) configuration", validation); } else { crm_err("CIB does not validate with any known schema"); cib_status = -pcmk_err_schema_validation; } } return root; } gboolean uninitializeCib(void) { xmlNode *tmp_cib = the_cib; if (tmp_cib == NULL) { crm_debug("The CIB has already been deallocated."); return FALSE; } the_cib = NULL; crm_debug("Deallocating the CIB."); pcmk__xml_free(tmp_cib); crm_debug("The CIB has been deallocated."); return TRUE; } /* * This method will free the old CIB pointer on success and the new one * on failure. */ int activateCibXml(xmlNode * new_cib, gboolean to_disk, const char *op) { if (new_cib) { xmlNode *saved_cib = the_cib; CRM_ASSERT(new_cib != saved_cib); the_cib = new_cib; pcmk__xml_free(saved_cib); if (cib_writes_enabled && cib_status == pcmk_ok && to_disk) { crm_debug("Triggering CIB write for %s op", op); mainloop_set_trigger(cib_writer); } return pcmk_ok; } crm_err("Ignoring invalid CIB"); if (the_cib) { crm_warn("Reverting to last known CIB"); } else { crm_crit("Could not write out new CIB and no saved version to revert to"); } return -ENODATA; } static void cib_diskwrite_complete(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode) { const char *errmsg = "Could not write CIB to disk"; if ((exitcode != 0) && cib_writes_enabled) { cib_writes_enabled = FALSE; errmsg = "Disabling CIB disk writes after failure"; } if ((signo == 0) && (exitcode == 0)) { crm_trace("Disk write [%d] succeeded", (int) pid); } else if (signo == 0) { crm_err("%s: process %d exited %d", errmsg, (int) pid, exitcode); } else { crm_err("%s: process %d terminated with signal %d (%s)%s", errmsg, (int) pid, signo, strsignal(signo), (core? " and dumped core" : "")); } mainloop_trigger_complete(cib_writer); } int write_cib_contents(gpointer p) { int exit_rc = pcmk_ok; xmlNode *cib_local = NULL; /* Make a copy of the CIB to write (possibly in a forked child) */ if (p) { /* Synchronous write out */ cib_local = pcmk__xml_copy(NULL, p); } else { int pid = 0; int bb_state = qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_STATE_GET, 0); /* Turn it off before the fork() to avoid: * - 2 processes writing to the same shared mem * - the child needing to disable it * (which would close it from underneath the parent) * This way, the shared mem files are already closed */ qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_ENABLED, QB_FALSE); pid = fork(); if (pid < 0) { crm_err("Disabling disk writes after fork failure: %s", pcmk_rc_str(errno)); cib_writes_enabled = FALSE; return FALSE; } if (pid) { /* Parent */ mainloop_child_add(pid, 0, "disk-writer", NULL, cib_diskwrite_complete); if (bb_state == QB_LOG_STATE_ENABLED) { /* Re-enable now that it it safe */ qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_ENABLED, QB_TRUE); } return -1; /* -1 means 'still work to do' */ } /* Asynchronous write-out after a fork() */ /* In theory, we can scribble on the_cib here and not affect the parent, * but let's be safe anyway. */ cib_local = pcmk__xml_copy(NULL, the_cib); } /* Write the CIB */ exit_rc = cib_file_write_with_digest(cib_local, cib_root, "cib.xml"); /* A nonzero exit code will cause further writes to be disabled */ pcmk__xml_free(cib_local); if (p == NULL) { crm_exit_t exit_code = CRM_EX_OK; switch (exit_rc) { case pcmk_ok: exit_code = CRM_EX_OK; break; case pcmk_err_cib_modified: exit_code = CRM_EX_DIGEST; // Existing CIB doesn't match digest break; case pcmk_err_cib_backup: // Existing CIB couldn't be backed up case pcmk_err_cib_save: // New CIB couldn't be saved exit_code = CRM_EX_CANTCREAT; break; default: exit_code = CRM_EX_ERROR; break; } /* Use _exit() because exit() could affect the parent adversely */ _exit(exit_code); } return exit_rc; } diff --git a/daemons/based/based_operation.c b/daemons/based/based_operation.c index 8dd07af93b..f19e35622f 100644 --- a/daemons/based/based_operation.c +++ b/daemons/based/based_operation.c @@ -1,60 +1,60 @@ /* * Copyright 2008-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include static const cib__op_fn_t cib_op_functions[] = { [cib__op_abs_delete] = cib_process_delete_absolute, [cib__op_apply_patch] = cib_server_process_diff, [cib__op_bump] = cib_process_bump, [cib__op_commit_transact] = cib_process_commit_transaction, [cib__op_create] = cib_process_create, [cib__op_delete] = cib_process_delete, [cib__op_erase] = cib_process_erase, [cib__op_is_primary] = cib_process_readwrite, [cib__op_modify] = cib_process_modify, [cib__op_noop] = cib_process_noop, [cib__op_ping] = cib_process_ping, [cib__op_primary] = cib_process_readwrite, [cib__op_query] = cib_process_query, [cib__op_replace] = cib_process_replace_svr, [cib__op_secondary] = cib_process_readwrite, [cib__op_shutdown] = cib_process_shutdown_req, [cib__op_sync_all] = cib_process_sync, [cib__op_sync_one] = cib_process_sync_one, [cib__op_upgrade] = cib_process_upgrade_server, [cib__op_schemas] = cib_process_schemas, }; /*! * \internal * \brief Get the function that performs a given server-side CIB operation * * \param[in] operation Operation whose function to look up * - * \return Function that performs \p operation within \c pacemaker-based + * \return Function that performs \p operation within the CIB manager */ cib__op_fn_t based_get_op_function(const cib__operation_t *operation) { enum cib__op_type type = operation->type; CRM_ASSERT(type >= 0); if (type >= PCMK__NELEM(cib_op_functions)) { return NULL; } return cib_op_functions[type]; } diff --git a/daemons/based/pacemaker-based.c b/daemons/based/pacemaker-based.c index ab00e4fedc..6fc2d912a4 100644 --- a/daemons/based/pacemaker-based.c +++ b/daemons/based/pacemaker-based.c @@ -1,435 +1,436 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define SUMMARY "daemon for managing the configuration of a Pacemaker cluster" extern int init_remote_listener(int port, gboolean encrypted); gboolean cib_shutdown_flag = FALSE; int cib_status = pcmk_ok; pcmk_cluster_t *crm_cluster = NULL; GMainLoop *mainloop = NULL; gchar *cib_root = NULL; static gboolean preserve_status = FALSE; gboolean cib_writes_enabled = TRUE; gboolean stand_alone = FALSE; int remote_fd = 0; int remote_tls_fd = 0; GHashTable *config_hash = NULL; static void cib_init(void); void cib_shutdown(int nsig); static bool startCib(const char *filename); extern int write_cib_contents(gpointer p); static crm_exit_t exit_code = CRM_EX_OK; static void cib_enable_writes(int nsig) { crm_info("(Re)enabling disk writes"); cib_writes_enabled = TRUE; } /*! * \internal * \brief Set up options, users, and groups for stand-alone mode * * \param[out] error GLib error object * * \return Standard Pacemaker return code */ static int setup_stand_alone(GError **error) { int rc = 0; struct passwd *pwentry = NULL; preserve_status = TRUE; cib_writes_enabled = FALSE; errno = 0; pwentry = getpwnam(CRM_DAEMON_USER); if (pwentry == NULL) { exit_code = CRM_EX_FATAL; if (errno != 0) { g_set_error(error, PCMK__EXITC_ERROR, exit_code, "Error getting password DB entry for %s: %s", CRM_DAEMON_USER, strerror(errno)); return errno; } g_set_error(error, PCMK__EXITC_ERROR, exit_code, "Password DB entry for '%s' not found", CRM_DAEMON_USER); return ENXIO; } rc = setgid(pwentry->pw_gid); if (rc < 0) { exit_code = CRM_EX_FATAL; g_set_error(error, PCMK__EXITC_ERROR, exit_code, "Could not set group to %d: %s", pwentry->pw_gid, strerror(errno)); return errno; } rc = initgroups(CRM_DAEMON_USER, pwentry->pw_gid); if (rc < 0) { exit_code = CRM_EX_FATAL; g_set_error(error, PCMK__EXITC_ERROR, exit_code, "Could not setup groups for user %d: %s", pwentry->pw_uid, strerror(errno)); return errno; } rc = setuid(pwentry->pw_uid); if (rc < 0) { exit_code = CRM_EX_FATAL; g_set_error(error, PCMK__EXITC_ERROR, exit_code, "Could not set user to %d: %s", pwentry->pw_uid, strerror(errno)); return errno; } return pcmk_rc_ok; } /* @COMPAT Deprecated since 2.1.8. Use pcmk_list_cluster_options() or * crm_attribute --list-options=cluster instead of querying daemon metadata. * * NOTE: pcs (as of at least 0.11.8) uses this */ static int based_metadata(pcmk__output_t *out) { - return pcmk__daemon_metadata(out, "pacemaker-based", + return pcmk__daemon_metadata(out, PCMK__SERVER_BASED, "Cluster Information Base manager options", "Cluster options used by Pacemaker's Cluster " "Information Base manager", pcmk__opt_based); } static GOptionEntry entries[] = { { "stand-alone", 's', G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE, &stand_alone, "(Advanced use only) Run in stand-alone mode", NULL }, { "disk-writes", 'w', G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE, &cib_writes_enabled, "(Advanced use only) Enable disk writes (enabled by default unless in " "stand-alone mode)", NULL }, { "cib-root", 'r', G_OPTION_FLAG_NONE, G_OPTION_ARG_FILENAME, &cib_root, "(Advanced use only) Directory where the CIB XML file should be located " "(default: " CRM_CONFIG_DIR ")", NULL }, { NULL } }; static pcmk__supported_format_t formats[] = { PCMK__SUPPORTED_FORMAT_NONE, PCMK__SUPPORTED_FORMAT_TEXT, PCMK__SUPPORTED_FORMAT_XML, { NULL, NULL, NULL } }; static GOptionContext * build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) { GOptionContext *context = NULL; context = pcmk__build_arg_context(args, "text (default), xml", group, NULL); pcmk__add_main_args(context, entries); return context; } int main(int argc, char **argv) { int rc = pcmk_rc_ok; crm_ipc_t *old_instance = NULL; pcmk__output_t *out = NULL; GError *error = NULL; GOptionGroup *output_group = NULL; pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY); gchar **processed_args = pcmk__cmdline_preproc(argv, "r"); GOptionContext *context = build_arg_context(args, &output_group); crm_log_preinit(NULL, argc, argv); pcmk__register_formats(output_group, formats); if (!g_option_context_parse_strv(context, &processed_args, &error)) { exit_code = CRM_EX_USAGE; goto done; } rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv); if (rc != pcmk_rc_ok) { exit_code = CRM_EX_ERROR; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Error creating output format %s: %s", args->output_ty, pcmk_rc_str(rc)); goto done; } if (args->version) { out->version(out, false); goto done; } mainloop_add_signal(SIGTERM, cib_shutdown); mainloop_add_signal(SIGPIPE, cib_enable_writes); cib_writer = mainloop_add_trigger(G_PRIORITY_LOW, write_cib_contents, NULL); if ((g_strv_length(processed_args) >= 2) && pcmk__str_eq(processed_args[1], "metadata", pcmk__str_none)) { rc = based_metadata(out); if (rc != pcmk_rc_ok) { exit_code = CRM_EX_FATAL; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Unable to display metadata: %s", pcmk_rc_str(rc)); } goto done; } - pcmk__cli_init_logging("pacemaker-based", args->verbosity); + pcmk__cli_init_logging(PCMK__SERVER_BASED, args->verbosity); crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); crm_notice("Starting Pacemaker CIB manager"); old_instance = crm_ipc_new(PCMK__SERVER_BASED_RO, 0); if (old_instance == NULL) { /* crm_ipc_new() will have already logged an error message with * crm_err() */ exit_code = CRM_EX_FATAL; goto done; } if (pcmk__connect_generic_ipc(old_instance) == pcmk_rc_ok) { /* IPC end-point already up */ crm_ipc_close(old_instance); crm_ipc_destroy(old_instance); - crm_err("pacemaker-based is already active, aborting startup"); + crm_crit("Aborting start-up because another CIB manager instance is " + "already active"); goto done; } else { /* not up or not authentic, we'll proceed either way */ crm_ipc_destroy(old_instance); old_instance = NULL; } if (stand_alone) { rc = setup_stand_alone(&error); if (rc != pcmk_rc_ok) { goto done; } } if (cib_root == NULL) { cib_root = g_strdup(CRM_CONFIG_DIR); } else { crm_notice("Using custom config location: %s", cib_root); } if (!pcmk__daemon_can_write(cib_root, NULL)) { exit_code = CRM_EX_FATAL; crm_err("Terminating due to bad permissions on %s", cib_root); g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Bad permissions on %s (see logs for details)", cib_root); goto done; } pcmk__cluster_init_node_caches(); // Read initial CIB, connect to cluster, and start IPC servers cib_init(); // Run the main loop mainloop = g_main_loop_new(NULL, FALSE); crm_notice("Pacemaker CIB manager successfully started and accepting connections"); g_main_loop_run(mainloop); /* If main loop returned, clean up and exit. We disconnect in case * terminate_cib() was called with fast=-1. */ pcmk_cluster_disconnect(crm_cluster); pcmk__stop_based_ipc(ipcs_ro, ipcs_rw, ipcs_shm); done: g_strfreev(processed_args); pcmk__free_arg_context(context); pcmk__cluster_destroy_node_caches(); if (config_hash != NULL) { g_hash_table_destroy(config_hash); } pcmk__client_cleanup(); pcmk_cluster_free(crm_cluster); g_free(cib_root); pcmk__output_and_clear_error(&error, out); if (out != NULL) { out->finish(out, exit_code, true, NULL); pcmk__output_free(out); } pcmk__unregister_formats(); crm_exit(exit_code); } #if SUPPORT_COROSYNC static void cib_cs_dispatch(cpg_handle_t handle, const struct cpg_name *groupName, uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) { xmlNode *xml = NULL; const char *from = NULL; char *data = pcmk__cpg_message_data(handle, nodeid, pid, msg, &from); if(data == NULL) { return; } xml = pcmk__xml_parse(data); if (xml == NULL) { crm_err("Invalid XML: '%.120s'", data); free(data); return; } crm_xml_add(xml, PCMK__XA_SRC, from); cib_peer_callback(xml, NULL); pcmk__xml_free(xml); free(data); } static void cib_cs_destroy(gpointer user_data) { if (cib_shutdown_flag) { crm_info("Corosync disconnection complete"); } else { crm_crit("Lost connection to cluster layer, shutting down"); terminate_cib(__func__, CRM_EX_DISCONNECT); } } #endif static void cib_peer_update_callback(enum pcmk__node_update type, pcmk__node_status_t *node, const void *data) { switch (type) { case pcmk__node_update_name: case pcmk__node_update_state: if (cib_shutdown_flag && (pcmk__cluster_num_active_nodes() < 2) && (pcmk__ipc_client_count() == 0)) { crm_info("No more peers"); terminate_cib(__func__, -1); } break; default: break; } } static void cib_init(void) { crm_cluster = pcmk_cluster_new(); #if SUPPORT_COROSYNC if (pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync) { pcmk_cluster_set_destroy_fn(crm_cluster, cib_cs_destroy); pcmk_cpg_set_deliver_fn(crm_cluster, cib_cs_dispatch); pcmk_cpg_set_confchg_fn(crm_cluster, pcmk__cpg_confchg_cb); } #endif // SUPPORT_COROSYNC config_hash = pcmk__strkey_table(free, free); if (startCib("cib.xml") == FALSE) { crm_crit("Cannot start CIB... terminating"); crm_exit(CRM_EX_NOINPUT); } if (!stand_alone) { pcmk__cluster_set_status_callback(&cib_peer_update_callback); if (pcmk_cluster_connect(crm_cluster) != pcmk_rc_ok) { crm_crit("Cannot sign in to the cluster... terminating"); crm_exit(CRM_EX_FATAL); } } pcmk__serve_based_ipc(&ipcs_ro, &ipcs_rw, &ipcs_shm, &ipc_ro_callbacks, &ipc_rw_callbacks); if (stand_alone) { based_is_primary = true; } } static bool startCib(const char *filename) { gboolean active = FALSE; xmlNode *cib = readCibXmlFile(cib_root, filename, !preserve_status); if (activateCibXml(cib, TRUE, "start") == 0) { int port = 0; active = TRUE; cib_read_config(config_hash, cib); pcmk__scan_port(crm_element_value(cib, PCMK_XA_REMOTE_TLS_PORT), &port); if (port >= 0) { remote_tls_fd = init_remote_listener(port, TRUE); } pcmk__scan_port(crm_element_value(cib, PCMK_XA_REMOTE_CLEAR_PORT), &port); if (port >= 0) { remote_fd = init_remote_listener(port, FALSE); } } return active; } diff --git a/daemons/controld/controld_attrd.c b/daemons/controld/controld_attrd.c index 66b2667d8b..eff8070818 100644 --- a/daemons/controld/controld_attrd.c +++ b/daemons/controld/controld_attrd.c @@ -1,158 +1,158 @@ /* * Copyright 2006-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include static pcmk_ipc_api_t *attrd_api = NULL; void controld_close_attrd_ipc(void) { if (attrd_api != NULL) { - crm_trace("Closing connection to pacemaker-attrd"); + crm_trace("Closing connection to " PCMK__SERVER_ATTRD); pcmk_disconnect_ipc(attrd_api); pcmk_free_ipc_api(attrd_api); attrd_api = NULL; } } static inline const char * node_type(bool is_remote) { return is_remote? "Pacemaker Remote" : "cluster"; } static inline const char * when(void) { return pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)? " at shutdown" : ""; } static void handle_attr_error(void) { if (AM_I_DC) { /* We are unable to provide accurate information to the * scheduler, so allow another node to take over DC. * @TODO Should we do this unconditionally on any failure? */ crmd_exit(CRM_EX_FATAL); } else if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { // Fast-track shutdown since unable to request via attribute register_fsa_input(C_FSA_INTERNAL, I_FAIL, NULL); } } void update_attrd(const char *host, const char *name, const char *value, const char *user_name, gboolean is_remote_node) { int rc = pcmk_rc_ok; if (attrd_api == NULL) { rc = pcmk_new_ipc_api(&attrd_api, pcmk_ipc_attrd); } if (rc == pcmk_rc_ok) { uint32_t attrd_opts = pcmk__node_attr_value; if (is_remote_node) { pcmk__set_node_attr_flags(attrd_opts, pcmk__node_attr_remote); } rc = pcmk__attrd_api_update(attrd_api, host, name, value, NULL, NULL, user_name, attrd_opts); } if (rc != pcmk_rc_ok) { do_crm_log(AM_I_DC? LOG_CRIT : LOG_ERR, "Could not update attribute %s=%s for %s node %s%s: %s " QB_XS " rc=%d", name, value, node_type(is_remote_node), host, when(), pcmk_rc_str(rc), rc); handle_attr_error(); } } void update_attrd_list(GList *attrs, uint32_t opts) { int rc = pcmk_rc_ok; if (attrd_api == NULL) { rc = pcmk_new_ipc_api(&attrd_api, pcmk_ipc_attrd); } if (rc == pcmk_rc_ok) { rc = pcmk__attrd_api_update_list(attrd_api, attrs, NULL, NULL, NULL, opts | pcmk__node_attr_value); } if (rc != pcmk_rc_ok) { do_crm_log(AM_I_DC? LOG_CRIT : LOG_ERR, "Could not update multiple node attributes: %s " QB_XS " rc=%d", pcmk_rc_str(rc), rc); handle_attr_error(); } } void update_attrd_remote_node_removed(const char *host, const char *user_name) { int rc = pcmk_rc_ok; if (attrd_api == NULL) { rc = pcmk_new_ipc_api(&attrd_api, pcmk_ipc_attrd); } if (rc == pcmk_rc_ok) { crm_trace("Asking attribute manager to purge Pacemaker Remote node %s", host); rc = pcmk__attrd_api_purge(attrd_api, host, true); } if (rc != pcmk_rc_ok) { crm_err("Could not purge Pacemaker Remote node %s " "in attribute manager%s: %s " QB_XS " rc=%d", host, when(), pcmk_rc_str(rc), rc); } } void update_attrd_clear_failures(const char *host, const char *rsc, const char *op, const char *interval_spec, gboolean is_remote_node) { int rc = pcmk_rc_ok; if (attrd_api == NULL) { rc = pcmk_new_ipc_api(&attrd_api, pcmk_ipc_attrd); } if (rc == pcmk_rc_ok) { uint32_t attrd_opts = pcmk__node_attr_none; if (is_remote_node) { pcmk__set_node_attr_flags(attrd_opts, pcmk__node_attr_remote); } rc = pcmk__attrd_api_clear_failures(attrd_api, host, rsc, op, interval_spec, NULL, attrd_opts); } if (rc != pcmk_rc_ok) { const char *interval_desc = "all"; if (op != NULL) { interval_desc = pcmk__s(interval_spec, "nonrecurring"); } crm_err("Could not clear failure of %s %s for %s on %s node %s%s: %s " QB_XS " rc=%d", interval_desc, pcmk__s(op, "operations"), pcmk__s(rsc, "all resources"), node_type(is_remote_node), host, when(), pcmk_rc_str(rc), rc); } } diff --git a/daemons/controld/controld_fsa.h b/daemons/controld/controld_fsa.h index ad1c4fa9fa..7777145f0e 100644 --- a/daemons/controld/controld_fsa.h +++ b/daemons/controld/controld_fsa.h @@ -1,696 +1,696 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef CRMD_FSA__H # define CRMD_FSA__H # include # include # include # include # include # include # include /*! States the controller can be in */ enum crmd_fsa_state { S_IDLE = 0, /* Nothing happening */ S_ELECTION, /* Take part in the election algorithm as * described below */ S_INTEGRATION, /* integrate that status of new nodes (which is * all of them if we have just been elected DC) * to form a complete and up-to-date picture of * the CIB */ S_FINALIZE_JOIN, /* integrate that status of new nodes (which is * all of them if we have just been elected DC) * to form a complete and up-to-date picture of * the CIB */ S_NOT_DC, /* we are in non-DC mode */ S_POLICY_ENGINE, /* Determine next stable state of the cluster */ S_RECOVERY, /* Something bad happened, check everything is ok * before continuing and attempt to recover if * required */ S_RELEASE_DC, /* we were the DC, but now we arent anymore, * possibly by our own request, and we should * release all unnecessary sub-systems, finish * any pending actions, do general cleanup and * unset anything that makes us think we are * special :) */ S_STARTING, /* we are just starting out */ S_PENDING, /* we are not a full/active member yet */ S_STOPPING, /* We are in the final stages of shutting down */ S_TERMINATE, /* We are going to shutdown, this is the equiv of * "Sending TERM signal to all processes" in Linux * and in worst case scenarios could be considered * a self STONITH */ S_TRANSITION_ENGINE, /* Attempt to make the calculated next stable * state of the cluster a reality */ S_HALT, /* Freeze - don't do anything * Something bad happened that needs the admin to fix * Wait for I_ELECTION */ /* ----------- Last input found in table is above ---------- */ S_ILLEGAL /* This is an illegal FSA state */ /* (must be last) */ }; # define MAXSTATE S_ILLEGAL /* Once we start and do some basic sanity checks, we go into the S_NOT_DC state and await instructions from the DC or input from the cluster layer which indicates the election algorithm needs to run. If the election algorithm is triggered, we enter the S_ELECTION state from where we can either go back to the S_NOT_DC state or progress to the S_INTEGRATION state (or S_RELEASE_DC if we used to be the DC but aren't anymore). See the libcrmcluster API documentation for more information about the election algorithm. Once the election is complete, if we are the DC, we enter the S_INTEGRATION state which is a DC-in-waiting style state. We are the DC, but we shouldn't do anything yet because we may not have an up-to-date picture of the cluster. There may of course be times when this fails, so we should go back to the S_RECOVERY stage and check everything is ok. We may also end up here if a new node came online, since each node is authoritative about itself, and we would want to incorporate its information into the CIB. Once we have the latest CIB, we then enter the S_POLICY_ENGINE state where invoke the scheduler. It is possible that between invoking the scheduler and receiving an answer, that we receive more input. In this case, we would discard the orginal result and invoke it again. Once we are satisfied with the output from the scheduler, we enter S_TRANSITION_ENGINE and feed the scheduler's output to the Transition Engine who attempts to make the scheduler's calculation a reality. If the transition completes successfully, we enter S_IDLE, otherwise we go back to S_POLICY_ENGINE with the current unstable state and try again. Of course, we may be asked to shutdown at any time, however we must progress to S_NOT_DC before doing so. Once we have handed over DC duties to another node, we can then shut down like everyone else, that is, by asking the DC for permission and waiting for it to take all our resources away. The case where we are the DC and the only node in the cluster is a special case and handled as an escalation which takes us to S_SHUTDOWN. Similarly, if any other point in the shutdown fails or stalls, this is escalated and we end up in S_TERMINATE. At any point, the controller can relay messages for its subsystems, but outbound messages (from subsystems) should probably be blocked until S_INTEGRATION (for the DC) or the join protocol has completed (for non-DC controllers). */ /*====================================== * * Inputs/Events/Stimuli to be given to the finite state machine * * Some of these a true events, and others are synthesised based on * the "register" (see below) and the contents or source of messages. * * The machine keeps processing until receiving I_NULL * *======================================*/ enum crmd_fsa_input { /* 0 */ I_NULL, /* Nothing happened */ /* 1 */ I_CIB_OP, /* An update to the CIB occurred */ I_CIB_UPDATE, /* An update to the CIB occurred */ I_DC_TIMEOUT, /* We have lost communication with the DC */ I_ELECTION, /* Someone started an election */ I_PE_CALC, /* The scheduler needs to be invoked */ I_RELEASE_DC, /* The election completed and we were not * elected, but we were the DC beforehand */ I_ELECTION_DC, /* The election completed and we were (re-)elected * DC */ I_ERROR, /* Something bad happened (more serious than * I_FAIL) and may not have been due to the action * being performed. For example, we may have lost * our connection to the CIB. */ /* 9 */ I_FAIL, /* The action failed to complete successfully */ I_INTEGRATED, I_FINALIZED, I_NODE_JOIN, /* A node has entered the cluster */ I_NOT_DC, /* We are not and were not the DC before or after * the current operation or state */ I_RECOVERED, /* The recovery process completed successfully */ I_RELEASE_FAIL, /* We could not give up DC status for some reason */ I_RELEASE_SUCCESS, /* We are no longer the DC */ I_RESTART, /* The current set of actions needs to be * restarted */ I_TE_SUCCESS, /* Some non-resource, non-cluster-layer action * is required of us, e.g. ping */ /* 20 */ I_ROUTER, /* Do our job as router and forward this to the * right place */ I_SHUTDOWN, /* We are asking to shutdown */ I_STOP, /* We have been told to shutdown */ I_TERMINATE, /* Actually exit */ I_STARTUP, I_PE_SUCCESS, /* The action completed successfully */ I_JOIN_OFFER, /* The DC is offering membership */ I_JOIN_REQUEST, /* The client is requesting membership */ I_JOIN_RESULT, /* If not the DC: The result of a join request * Else: A client is responding with its local state info */ I_WAIT_FOR_EVENT, /* we may be waiting for an async task to "happen" * and until it does, we can't do anything else */ I_DC_HEARTBEAT, /* The DC is telling us that it is alive and well */ I_LRM_EVENT, /* 30 */ I_PENDING, I_HALT, /* ------------ Last input found in table is above ----------- */ I_ILLEGAL /* This is an illegal value for an FSA input */ /* (must be last) */ }; # define MAXINPUT I_ILLEGAL # define I_MESSAGE I_ROUTER /*====================================== * * actions * * Some of the actions below will always occur together for now, but this may * not always be the case, so they are split up so that they can easily be * called independently in the future, if necessary. * * For example, separating A_LRM_CONNECT from A_STARTUP might be useful * if we ever try to recover from a faulty or disconnected executor. * *======================================*/ /* Don't do anything */ # define A_NOTHING 0x0000000000000000ULL /* -- Startup actions -- */ /* Hook to perform any actions (other than connecting to other daemons) * that might be needed as part of the startup. */ # define A_STARTUP 0x0000000000000001ULL /* Hook to perform any actions that might be needed as part * after startup is successful. */ # define A_STARTED 0x0000000000000002ULL /* Connect to cluster layer */ # define A_HA_CONNECT 0x0000000000000004ULL # define A_HA_DISCONNECT 0x0000000000000008ULL # define A_INTEGRATE_TIMER_START 0x0000000000000010ULL # define A_INTEGRATE_TIMER_STOP 0x0000000000000020ULL # define A_FINALIZE_TIMER_START 0x0000000000000040ULL # define A_FINALIZE_TIMER_STOP 0x0000000000000080ULL /* -- Election actions -- */ # define A_DC_TIMER_START 0x0000000000000100ULL # define A_DC_TIMER_STOP 0x0000000000000200ULL # define A_ELECTION_COUNT 0x0000000000000400ULL # define A_ELECTION_VOTE 0x0000000000000800ULL # define A_ELECTION_START 0x0000000000001000ULL /* -- Message processing -- */ /* Process the queue of requests */ # define A_MSG_PROCESS 0x0000000000002000ULL /* Send the message to the correct recipient */ # define A_MSG_ROUTE 0x0000000000004000ULL /* Send a welcome message to new node(s) */ # define A_DC_JOIN_OFFER_ONE 0x0000000000008000ULL /* -- Server Join protocol actions -- */ /* Send a welcome message to all nodes */ # define A_DC_JOIN_OFFER_ALL 0x0000000000010000ULL /* Process the remote node's ack of our join message */ # define A_DC_JOIN_PROCESS_REQ 0x0000000000020000ULL /* Send out the results of the Join phase */ # define A_DC_JOIN_FINALIZE 0x0000000000040000ULL /* Send out the results of the Join phase */ # define A_DC_JOIN_PROCESS_ACK 0x0000000000080000ULL /* -- Client Join protocol actions -- */ # define A_CL_JOIN_QUERY 0x0000000000100000ULL # define A_CL_JOIN_ANNOUNCE 0x0000000000200000ULL /* Request membership to the DC list */ # define A_CL_JOIN_REQUEST 0x0000000000400000ULL /* Did the DC accept or reject the request */ # define A_CL_JOIN_RESULT 0x0000000000800000ULL /* -- Recovery, DC start/stop -- */ /* Something bad happened, try to recover */ # define A_RECOVER 0x0000000001000000ULL /* Hook to perform any actions (apart from starting, the TE, scheduler, * and gathering the latest CIB) that might be necessary before * giving up the responsibilities of being the DC. */ # define A_DC_RELEASE 0x0000000002000000ULL /* */ # define A_DC_RELEASED 0x0000000004000000ULL /* Hook to perform any actions (apart from starting, the TE, scheduler, * and gathering the latest CIB) that might be necessary before * taking over the responsibilities of being the DC. */ # define A_DC_TAKEOVER 0x0000000008000000ULL /* -- Shutdown actions -- */ # define A_SHUTDOWN 0x0000000010000000ULL # define A_STOP 0x0000000020000000ULL # define A_EXIT_0 0x0000000040000000ULL # define A_EXIT_1 0x0000000080000000ULL # define A_SHUTDOWN_REQ 0x0000000100000000ULL # define A_ELECTION_CHECK 0x0000000200000000ULL # define A_DC_JOIN_FINAL 0x0000000400000000ULL /* -- CIB actions -- */ # define A_CIB_START 0x0000020000000000ULL # define A_CIB_STOP 0x0000040000000000ULL /* -- Transition Engine actions -- */ /* Attempt to reach the newly calculated cluster state. This is * only called once per transition (except if it is asked to * stop the transition or start a new one). * Once given a cluster state to reach, the TE will determine * tasks that can be performed in parallel, execute them, wait * for replies and then determine the next set until the new * state is reached or no further tasks can be taken. */ # define A_TE_INVOKE 0x0000100000000000ULL # define A_TE_START 0x0000200000000000ULL # define A_TE_STOP 0x0000400000000000ULL # define A_TE_CANCEL 0x0000800000000000ULL # define A_TE_HALT 0x0001000000000000ULL /* -- Scheduler actions -- */ /* Calculate the next state for the cluster. This is only * invoked once per needed calculation. */ # define A_PE_INVOKE 0x0002000000000000ULL # define A_PE_START 0x0004000000000000ULL # define A_PE_STOP 0x0008000000000000ULL /* -- Misc actions -- */ /* Add a system generate "block" so that resources arent moved * to or are activly moved away from the affected node. This * way we can return quickly even if busy with other things. */ # define A_NODE_BLOCK 0x0010000000000000ULL /* Update our information in the local CIB */ # define A_UPDATE_NODESTATUS 0x0020000000000000ULL # define A_READCONFIG 0x0080000000000000ULL /* -- LRM Actions -- */ - /* Connect to pacemaker-execd */ + // Connect to the local executor # define A_LRM_CONNECT 0x0100000000000000ULL - /* Disconnect from pacemaker-execd */ + // Disconnect from the local executor # define A_LRM_DISCONNECT 0x0200000000000000ULL # define A_LRM_INVOKE 0x0400000000000000ULL # define A_LRM_EVENT 0x0800000000000000ULL /* -- Logging actions -- */ # define A_LOG 0x1000000000000000ULL # define A_ERROR 0x2000000000000000ULL # define A_WARN 0x4000000000000000ULL # define O_EXIT (A_SHUTDOWN|A_STOP|A_LRM_DISCONNECT|A_HA_DISCONNECT|A_EXIT_0|A_CIB_STOP) # define O_RELEASE (A_DC_TIMER_STOP|A_DC_RELEASE|A_PE_STOP|A_TE_STOP|A_DC_RELEASED) # define O_PE_RESTART (A_PE_START|A_PE_STOP) # define O_TE_RESTART (A_TE_START|A_TE_STOP) # define O_CIB_RESTART (A_CIB_START|A_CIB_STOP) # define O_LRM_RECONNECT (A_LRM_CONNECT|A_LRM_DISCONNECT) # define O_DC_TIMER_RESTART (A_DC_TIMER_STOP|A_DC_TIMER_START) /*====================================== * * "register" contents * * Things we may want to remember regardless of which state we are in. * * These also count as inputs for synthesizing I_* * *======================================*/ # define R_THE_DC 0x00000001ULL /* Are we the DC? */ # define R_STARTING 0x00000002ULL /* Are we starting up? */ # define R_SHUTDOWN 0x00000004ULL /* Are we trying to shut down? */ # define R_STAYDOWN 0x00000008ULL /* Should we restart? */ # define R_JOIN_OK 0x00000010ULL /* Have we completed the join process */ # define R_READ_CONFIG 0x00000040ULL # define R_INVOKE_PE 0x00000080ULL // Should the scheduler be invoked? # define R_CIB_CONNECTED 0x00000100ULL /* Is the CIB connected? */ # define R_PE_CONNECTED 0x00000200ULL // Is the scheduler connected? # define R_TE_CONNECTED 0x00000400ULL /* Is the Transition Engine connected? */ -# define R_LRM_CONNECTED 0x00000800ULL // Is pacemaker-execd connected? +# define R_LRM_CONNECTED 0x00000800ULL // Is the executor connected? # define R_CIB_REQUIRED 0x00001000ULL /* Is the CIB required? */ # define R_PE_REQUIRED 0x00002000ULL // Is the scheduler required? # define R_TE_REQUIRED 0x00004000ULL /* Is the Transition Engine required? */ # define R_ST_REQUIRED 0x00008000ULL /* Is the Stonith daemon required? */ # define R_CIB_DONE 0x00010000ULL /* Have we calculated the CIB? */ # define R_HAVE_CIB 0x00020000ULL /* Do we have an up-to-date CIB */ # define R_MEMBERSHIP 0x00100000ULL /* Have we got cluster layer data yet */ // Ever received membership-layer data # define R_PEER_DATA 0x00200000ULL # define R_HA_DISCONNECTED 0x00400000ULL /* did we sign out of our own accord */ # define R_REQ_PEND 0x01000000ULL /* Are there Requests waiting for processing? */ # define R_PE_PEND 0x02000000ULL // Are we awaiting reply from scheduler? # define R_TE_PEND 0x04000000ULL /* Has the TE been invoked and we're awaiting completion? */ # define R_RESP_PEND 0x08000000ULL /* Do we have clients waiting on a response? if so perhaps we shouldn't stop yet */ # define R_SENT_RSC_STOP 0x20000000ULL /* Have we sent a stop action to all * resources in preparation for * shutting down */ # define R_IN_RECOVERY 0x80000000ULL #define CRM_DIRECT_NACK_RC (99) // Deprecated (see PCMK_EXEC_INVALID) enum crmd_fsa_cause { C_UNKNOWN = 0, C_STARTUP, C_IPC_MESSAGE, C_HA_MESSAGE, C_CRMD_STATUS_CALLBACK, C_LRM_OP_CALLBACK, C_TIMER_POPPED, C_SHUTDOWN, C_FSA_INTERNAL, }; enum fsa_data_type { fsa_dt_none, fsa_dt_ha_msg, fsa_dt_xml, fsa_dt_lrm, }; typedef struct fsa_data_s fsa_data_t; struct fsa_data_s { int id; enum crmd_fsa_input fsa_input; enum crmd_fsa_cause fsa_cause; uint64_t actions; const char *origin; void *data; enum fsa_data_type data_type; }; #define controld_set_fsa_input_flags(flags_to_set) do { \ controld_globals.fsa_input_register \ = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \ "FSA input", "controller", \ controld_globals.fsa_input_register, \ (flags_to_set), #flags_to_set); \ } while (0) #define controld_clear_fsa_input_flags(flags_to_clear) do { \ controld_globals.fsa_input_register \ = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, \ "FSA input", "controller", \ controld_globals.fsa_input_register, \ (flags_to_clear), \ #flags_to_clear); \ } while (0) #define controld_set_fsa_action_flags(flags_to_set) do { \ controld_globals.fsa_actions \ = pcmk__set_flags_as(__func__, __LINE__, LOG_DEBUG, \ "FSA action", "controller", \ controld_globals.fsa_actions, \ (flags_to_set), #flags_to_set); \ } while (0) #define controld_clear_fsa_action_flags(flags_to_clear) do { \ controld_globals.fsa_actions \ = pcmk__clear_flags_as(__func__, __LINE__, LOG_DEBUG, \ "FSA action", "controller", \ controld_globals.fsa_actions, \ (flags_to_clear), #flags_to_clear); \ } while (0) // This should be moved elsewhere xmlNode *controld_query_executor_state(void); const char *fsa_input2string(enum crmd_fsa_input input); const char *fsa_state2string(enum crmd_fsa_state state); const char *fsa_cause2string(enum crmd_fsa_cause cause); const char *fsa_action2string(long long action); enum crmd_fsa_state s_crmd_fsa(enum crmd_fsa_cause cause); enum crmd_fsa_state controld_fsa_get_next_state(enum crmd_fsa_input input); uint64_t controld_fsa_get_action(enum crmd_fsa_input input); void controld_init_fsa_trigger(void); void controld_destroy_fsa_trigger(void); void free_max_generation(void); # define AM_I_DC pcmk_is_set(controld_globals.fsa_input_register, R_THE_DC) # define controld_trigger_fsa() controld_trigger_fsa_as(__func__, __LINE__) void controld_trigger_fsa_as(const char *fn, int line); /* A_READCONFIG */ void do_read_config(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data); /* A_PE_INVOKE */ void do_pe_invoke(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data); /* A_LOG */ void do_log(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_STARTUP */ void do_startup(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_CIB_START, STOP, RESTART */ void do_cib_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_HA_CONNECT */ void do_ha_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_LRM_CONNECT */ void do_lrm_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_PE_START, STOP, RESTART */ void do_pe_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_TE_START, STOP, RESTART */ void do_te_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_STARTED */ void do_started(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_MSG_ROUTE */ void do_msg_route(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_RECOVER */ void do_recover(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_ELECTION_VOTE */ void do_election_vote(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_ELECTION_COUNT */ void do_election_count_vote(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_ELECTION_CHECK */ void do_election_check(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_DC_TIMER_STOP */ void do_timer_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_DC_TAKEOVER */ void do_dc_takeover(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_DC_RELEASE */ void do_dc_release(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_DC_JOIN_OFFER_ALL */ void do_dc_join_offer_all(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_DC_JOIN_OFFER_ONE */ void do_dc_join_offer_one(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_DC_JOIN_ACK */ void do_dc_join_ack(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_DC_JOIN_REQ */ void do_dc_join_filter_offer(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_DC_JOIN_FINALIZE */ void do_dc_join_finalize(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_CL_JOIN_QUERY */ /* is there a DC out there? */ void do_cl_join_query(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data); /* A_CL_JOIN_ANNOUNCE */ void do_cl_join_announce(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data); /* A_CL_JOIN_REQUEST */ void do_cl_join_offer_respond(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data); /* A_CL_JOIN_RESULT */ void do_cl_join_finalize_respond(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data); /* A_LRM_INVOKE */ void do_lrm_invoke(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_LRM_EVENT */ void do_lrm_event(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_TE_INVOKE, A_TE_CANCEL */ void do_te_invoke(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_SHUTDOWN_REQ */ void do_shutdown_req(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_SHUTDOWN */ void do_shutdown(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_STOP */ void do_stop(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_EXIT_0, A_EXIT_1 */ void do_exit(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t *msg_data); /* A_DC_JOIN_FINAL */ void do_dc_join_final(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t *msg_data); #endif diff --git a/daemons/controld/controld_throttle.c b/daemons/controld/controld_throttle.c index 2863e7eca3..e01e1714ad 100644 --- a/daemons/controld/controld_throttle.c +++ b/daemons/controld/controld_throttle.c @@ -1,575 +1,575 @@ /* * Copyright 2013-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include /* These values don't need to be bits, but these particular values must be kept * for backward compatibility during rolling upgrades. */ enum throttle_state_e { throttle_none = 0x0000, throttle_low = 0x0001, throttle_med = 0x0010, throttle_high = 0x0100, throttle_extreme = 0x1000, }; struct throttle_record_s { int max; enum throttle_state_e mode; char *node; }; static int throttle_job_max = 0; static float throttle_load_target = 0.0; #define THROTTLE_FACTOR_LOW 1.2 #define THROTTLE_FACTOR_MEDIUM 1.6 #define THROTTLE_FACTOR_HIGH 2.0 static GHashTable *throttle_records = NULL; static mainloop_timer_t *throttle_timer = NULL; static const char * load2str(enum throttle_state_e mode) { switch (mode) { case throttle_extreme: return "extreme"; case throttle_high: return "high"; case throttle_med: return "medium"; case throttle_low: return "low"; case throttle_none: return "negligible"; default: return "undetermined"; } } #if HAVE_LINUX_PROCFS /*! * \internal * \brief Return name of /proc file containing the CIB daemon's load statistics * * \return Newly allocated memory with file name on success, NULL otherwise * * \note It is the caller's responsibility to free the return value. * This will return NULL if the daemon is being run via valgrind. * This should be called only on Linux systems. */ static char * find_cib_loadfile(void) { - pid_t pid = pcmk__procfs_pid_of("pacemaker-based"); + pid_t pid = pcmk__procfs_pid_of(PCMK__SERVER_BASED); return pid? crm_strdup_printf("/proc/%lld/stat", (long long) pid) : NULL; } static bool throttle_cib_load(float *load) { /* /proc/[pid]/stat Status information about the process. This is used by ps(1). It is defined in /usr/src/linux/fs/proc/array.c. The fields, in order, with their proper scanf(3) format specifiers, are: pid %d (1) The process ID. comm %s (2) The filename of the executable, in parentheses. This is visible whether or not the executable is swapped out. state %c (3) One character from the string "RSDZTW" where R is running, S is sleeping in an interruptible wait, D is waiting in uninterruptible disk sleep, Z is zombie, T is traced or stopped (on a signal), and W is paging. ppid %d (4) The PID of the parent. pgrp %d (5) The process group ID of the process. session %d (6) The session ID of the process. tty_nr %d (7) The controlling terminal of the process. (The minor device number is contained in the combination of bits 31 to 20 and 7 to 0; the major device number is in bits 15 to 8.) tpgid %d (8) The ID of the foreground process group of the controlling terminal of the process. flags %u (%lu before Linux 2.6.22) (9) The kernel flags word of the process. For bit meanings, see the PF_* defines in the Linux kernel source file include/linux/sched.h. Details depend on the kernel version. minflt %lu (10) The number of minor faults the process has made which have not required loading a memory page from disk. cminflt %lu (11) The number of minor faults that the process's waited-for children have made. majflt %lu (12) The number of major faults the process has made which have required loading a memory page from disk. cmajflt %lu (13) The number of major faults that the process's waited-for children have made. utime %lu (14) Amount of time that this process has been scheduled in user mode, measured in clock ticks (divide by sysconf(_SC_CLK_TCK)). This includes guest time, guest_time (time spent running a virtual CPU, see below), so that applications that are not aware of the guest time field do not lose that time from their calculations. stime %lu (15) Amount of time that this process has been scheduled in kernel mode, measured in clock ticks (divide by sysconf(_SC_CLK_TCK)). */ static char *loadfile = NULL; static time_t last_call = 0; static long ticks_per_s = 0; static unsigned long last_utime, last_stime; char buffer[64*1024]; FILE *stream = NULL; time_t now = time(NULL); if(load == NULL) { return FALSE; } else { *load = 0.0; } if(loadfile == NULL) { last_call = 0; last_utime = 0; last_stime = 0; loadfile = find_cib_loadfile(); if (loadfile == NULL) { crm_warn("Couldn't find CIB load file"); return FALSE; } ticks_per_s = sysconf(_SC_CLK_TCK); crm_trace("Found %s", loadfile); } stream = fopen(loadfile, "r"); if(stream == NULL) { int rc = errno; crm_warn("Couldn't read %s: %s (%d)", loadfile, pcmk_rc_str(rc), rc); free(loadfile); loadfile = NULL; return FALSE; } if(fgets(buffer, sizeof(buffer), stream)) { char *comm = pcmk__assert_alloc(1, 256); char state = 0; int rc = 0, pid = 0, ppid = 0, pgrp = 0, session = 0, tty_nr = 0, tpgid = 0; unsigned long flags = 0, minflt = 0, cminflt = 0, majflt = 0, cmajflt = 0, utime = 0, stime = 0; rc = sscanf(buffer, "%d %[^ ] %c %d %d %d %d %d %lu %lu %lu %lu %lu %lu %lu", &pid, comm, &state, &ppid, &pgrp, &session, &tty_nr, &tpgid, &flags, &minflt, &cminflt, &majflt, &cmajflt, &utime, &stime); free(comm); if(rc != 15) { crm_err("Only %d of 15 fields found in %s", rc, loadfile); fclose(stream); return FALSE; } else if(last_call > 0 && last_call < now && last_utime <= utime && last_stime <= stime) { time_t elapsed = now - last_call; unsigned long delta_utime = utime - last_utime; unsigned long delta_stime = stime - last_stime; *load = (delta_utime + delta_stime); /* Cast to a float before division */ *load /= ticks_per_s; *load /= elapsed; crm_debug("cib load: %f (%lu ticks in %lds)", *load, delta_utime + delta_stime, (long)elapsed); } else { crm_debug("Init %lu + %lu ticks at %ld (%lu tps)", utime, stime, (long)now, ticks_per_s); } last_call = now; last_utime = utime; last_stime = stime; fclose(stream); return TRUE; } fclose(stream); return FALSE; } static bool throttle_load_avg(float *load) { char buffer[256]; FILE *stream = NULL; const char *loadfile = "/proc/loadavg"; if(load == NULL) { return FALSE; } stream = fopen(loadfile, "r"); if(stream == NULL) { int rc = errno; crm_warn("Couldn't read %s: %s (%d)", loadfile, pcmk_rc_str(rc), rc); return FALSE; } if(fgets(buffer, sizeof(buffer), stream)) { char *nl = strstr(buffer, "\n"); /* Grab the 1-minute average, ignore the rest */ *load = strtof(buffer, NULL); if(nl) { nl[0] = 0; } fclose(stream); return TRUE; } fclose(stream); return FALSE; } /*! * \internal * \brief Check a load value against throttling thresholds * * \param[in] load Load value to check * \param[in] desc Description of metric (for logging) * \param[in] thresholds Low/medium/high/extreme thresholds * * \return Throttle mode corresponding to load value */ static enum throttle_state_e throttle_check_thresholds(float load, const char *desc, const float thresholds[4]) { if (load > thresholds[3]) { crm_notice("Extreme %s detected: %f", desc, load); return throttle_extreme; } else if (load > thresholds[2]) { crm_notice("High %s detected: %f", desc, load); return throttle_high; } else if (load > thresholds[1]) { crm_info("Moderate %s detected: %f", desc, load); return throttle_med; } else if (load > thresholds[0]) { crm_debug("Noticeable %s detected: %f", desc, load); return throttle_low; } crm_trace("Negligible %s detected: %f", desc, load); return throttle_none; } static enum throttle_state_e throttle_handle_load(float load, const char *desc, int cores) { float normalize; float thresholds[4]; if (cores == 1) { /* On a single core machine, a load of 1.0 is already too high */ normalize = 0.6; } else { /* Normalize the load to be per-core */ normalize = cores; } thresholds[0] = throttle_load_target * normalize * THROTTLE_FACTOR_LOW; thresholds[1] = throttle_load_target * normalize * THROTTLE_FACTOR_MEDIUM; thresholds[2] = throttle_load_target * normalize * THROTTLE_FACTOR_HIGH; thresholds[3] = load + 1.0; /* never extreme */ return throttle_check_thresholds(load, desc, thresholds); } #endif // HAVE_LINUX_PROCFS static enum throttle_state_e throttle_mode(void) { enum throttle_state_e mode = throttle_none; #if HAVE_LINUX_PROCFS unsigned int cores; float load; float thresholds[4]; cores = pcmk__procfs_num_cores(); if(throttle_cib_load(&load)) { float cib_max_cpu = 0.95; /* The CIB is a single-threaded task and thus cannot consume * more than 100% of a CPU (and 1/cores of the overall system * load). * * On a many-cored system, the CIB might therefore be maxed out * (causing operations to fail or appear to fail) even though * the overall system load is still reasonable. * * Therefore, the 'normal' thresholds can not apply here, and we * need a special case. */ if(cores == 1) { cib_max_cpu = 0.4; } if(throttle_load_target > 0.0 && throttle_load_target < cib_max_cpu) { cib_max_cpu = throttle_load_target; } thresholds[0] = cib_max_cpu * 0.8; thresholds[1] = cib_max_cpu * 0.9; thresholds[2] = cib_max_cpu; /* Can only happen on machines with a low number of cores */ thresholds[3] = cib_max_cpu * 1.5; mode = throttle_check_thresholds(load, "CIB load", thresholds); } if(throttle_load_target <= 0) { /* If we ever make this a valid value, the cluster will at least behave as expected */ return mode; } if(throttle_load_avg(&load)) { enum throttle_state_e cpu_load; cpu_load = throttle_handle_load(load, "CPU load", cores); if (cpu_load > mode) { mode = cpu_load; } crm_debug("Current load is %f across %u core(s)", load, cores); } #endif // HAVE_LINUX_PROCFS return mode; } static void throttle_send_command(enum throttle_state_e mode) { xmlNode *xml = NULL; static enum throttle_state_e last = -1; if(mode != last) { crm_info("New throttle mode: %s load (was %s)", load2str(mode), load2str(last)); last = mode; xml = pcmk__new_request(pcmk_ipc_controld, CRM_SYSTEM_CRMD, NULL, CRM_SYSTEM_CRMD, CRM_OP_THROTTLE, NULL); crm_xml_add_int(xml, PCMK__XA_CRM_LIMIT_MODE, mode); crm_xml_add_int(xml, PCMK__XA_CRM_LIMIT_MAX, throttle_job_max); pcmk__cluster_send_message(NULL, pcmk_ipc_controld, xml); pcmk__xml_free(xml); } } static gboolean throttle_timer_cb(gpointer data) { throttle_send_command(throttle_mode()); return TRUE; } static void throttle_record_free(gpointer p) { struct throttle_record_s *r = p; free(r->node); free(r); } static void throttle_set_load_target(float target) { throttle_load_target = target; } /*! * \internal * \brief Update the maximum number of simultaneous jobs * * \param[in] preference Cluster-wide \c PCMK_OPT_NODE_ACTION_LIMIT from the * CIB */ static void throttle_update_job_max(const char *preference) { long long max = 0LL; const char *env_limit = pcmk__env_option(PCMK__ENV_NODE_ACTION_LIMIT); if (env_limit != NULL) { preference = env_limit; // Per-node override } if (preference != NULL) { pcmk__scan_ll(preference, &max, 0LL); } if (max > 0) { throttle_job_max = (max >= INT_MAX)? INT_MAX : (int) max; } else { // Default is based on the number of cores detected throttle_job_max = 2 * pcmk__procfs_num_cores(); } } void throttle_init(void) { if(throttle_records == NULL) { throttle_records = pcmk__strkey_table(NULL, throttle_record_free); throttle_timer = mainloop_timer_add("throttle", 30 * 1000, TRUE, throttle_timer_cb, NULL); } throttle_update_job_max(NULL); mainloop_timer_start(throttle_timer); } /*! * \internal * \brief Configure throttle options based on the CIB * * \param[in,out] options Name/value pairs for configured options */ void controld_configure_throttle(GHashTable *options) { const char *value = g_hash_table_lookup(options, PCMK_OPT_LOAD_THRESHOLD); if (value != NULL) { throttle_set_load_target(strtof(value, NULL) / 100.0); } value = g_hash_table_lookup(options, PCMK_OPT_NODE_ACTION_LIMIT); throttle_update_job_max(value); } void throttle_fini(void) { if (throttle_timer != NULL) { mainloop_timer_del(throttle_timer); throttle_timer = NULL; } if (throttle_records != NULL) { g_hash_table_destroy(throttle_records); throttle_records = NULL; } } int throttle_get_total_job_limit(int l) { /* Cluster-wide limit */ GHashTableIter iter; int limit = l; int peers = pcmk__cluster_num_active_nodes(); struct throttle_record_s *r = NULL; g_hash_table_iter_init(&iter, throttle_records); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &r)) { switch(r->mode) { case throttle_extreme: if(limit == 0 || limit > peers/4) { limit = QB_MAX(1, peers/4); } break; case throttle_high: if(limit == 0 || limit > peers/2) { limit = QB_MAX(1, peers/2); } break; default: break; } } if(limit == l) { } else if(l == 0) { crm_trace("Using " PCMK_OPT_BATCH_LIMIT "=%d", limit); } else { crm_trace("Using " PCMK_OPT_BATCH_LIMIT "=%d instead of %d", limit, l); } return limit; } int throttle_get_job_limit(const char *node) { int jobs = 1; struct throttle_record_s *r = NULL; r = g_hash_table_lookup(throttle_records, node); if(r == NULL) { r = pcmk__assert_alloc(1, sizeof(struct throttle_record_s)); r->node = pcmk__str_copy(node); r->mode = throttle_low; r->max = throttle_job_max; crm_trace("Defaulting to local values for unknown node %s", node); g_hash_table_insert(throttle_records, r->node, r); } switch(r->mode) { case throttle_extreme: case throttle_high: jobs = 1; /* At least one job must always be allowed */ break; case throttle_med: jobs = QB_MAX(1, r->max / 4); break; case throttle_low: jobs = QB_MAX(1, r->max / 2); break; case throttle_none: jobs = QB_MAX(1, r->max); break; default: crm_err("Unknown throttle mode %.4x on %s", r->mode, node); break; } return jobs; } void throttle_update(xmlNode *xml) { int max = 0; int mode = 0; struct throttle_record_s *r = NULL; const char *from = crm_element_value(xml, PCMK__XA_SRC); crm_element_value_int(xml, PCMK__XA_CRM_LIMIT_MODE, &mode); crm_element_value_int(xml, PCMK__XA_CRM_LIMIT_MAX, &max); r = g_hash_table_lookup(throttle_records, from); if(r == NULL) { r = pcmk__assert_alloc(1, sizeof(struct throttle_record_s)); r->node = pcmk__str_copy(from); g_hash_table_insert(throttle_records, r->node, r); } r->max = max; r->mode = (enum throttle_state_e) mode; crm_debug("Node %s has %s load and supports at most %d jobs; new job limit %d", from, load2str((enum throttle_state_e) mode), max, throttle_get_job_limit(from)); } diff --git a/daemons/controld/pacemaker-controld.c b/daemons/controld/pacemaker-controld.c index b1e5938c07..7a2f26e164 100644 --- a/daemons/controld/pacemaker-controld.c +++ b/daemons/controld/pacemaker-controld.c @@ -1,225 +1,226 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define SUMMARY "daemon for coordinating a Pacemaker cluster's response " \ "to events" _Noreturn void crmd_init(void); extern void init_dotfile(void); controld_globals_t controld_globals = { // Automatic initialization to 0, false, or NULL is fine for most members .fsa_state = S_STARTING, .fsa_actions = A_NOTHING, }; static pcmk__supported_format_t formats[] = { PCMK__SUPPORTED_FORMAT_NONE, PCMK__SUPPORTED_FORMAT_TEXT, PCMK__SUPPORTED_FORMAT_XML, { NULL, NULL, NULL } }; /* @COMPAT Deprecated since 2.1.8. Use pcmk_list_cluster_options() or * crm_attribute --list-options=cluster instead of querying daemon metadata. * * NOTE: pcs (as of at least 0.11.8) uses this */ static int controld_metadata(pcmk__output_t *out) { - return pcmk__daemon_metadata(out, "pacemaker-controld", + return pcmk__daemon_metadata(out, PCMK__SERVER_CONTROLD, "Pacemaker controller options", "Cluster options used by Pacemaker's " "controller", pcmk__opt_controld); } static GOptionContext * build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) { return pcmk__build_arg_context(args, "text (default), xml", group, NULL); } int main(int argc, char **argv) { int rc = pcmk_rc_ok; crm_exit_t exit_code = CRM_EX_OK; bool initialize = true; crm_ipc_t *old_instance = NULL; pcmk__output_t *out = NULL; GError *error = NULL; GOptionGroup *output_group = NULL; pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY); gchar **processed_args = pcmk__cmdline_preproc(argv, NULL); GOptionContext *context = build_arg_context(args, &output_group); crm_log_preinit(NULL, argc, argv); pcmk__register_formats(output_group, formats); if (!g_option_context_parse_strv(context, &processed_args, &error)) { exit_code = CRM_EX_USAGE; goto done; } rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv); if (rc != pcmk_rc_ok) { exit_code = CRM_EX_ERROR; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Error creating output format %s: %s", args->output_ty, pcmk_rc_str(rc)); goto done; } if (args->version) { out->version(out, false); initialize = false; goto done; } if ((g_strv_length(processed_args) >= 2) && pcmk__str_eq(processed_args[1], "metadata", pcmk__str_none)) { initialize = false; rc = controld_metadata(out); if (rc != pcmk_rc_ok) { exit_code = CRM_EX_FATAL; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Unable to display metadata: %s", pcmk_rc_str(rc)); } goto done; } - pcmk__cli_init_logging("pacemaker-controld", args->verbosity); + pcmk__cli_init_logging(PCMK__SERVER_CONTROLD, args->verbosity); crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); crm_notice("Starting Pacemaker controller"); old_instance = crm_ipc_new(CRM_SYSTEM_CRMD, 0); if (old_instance == NULL) { /* crm_ipc_new will have already printed an error message with crm_err. */ exit_code = CRM_EX_FATAL; goto done; } if (pcmk__connect_generic_ipc(old_instance) == pcmk_rc_ok) { /* IPC end-point already up */ crm_ipc_close(old_instance); crm_ipc_destroy(old_instance); - crm_err("pacemaker-controld is already active, aborting startup"); + crm_crit("Aborting start-up because another controller instance is " + "already active"); initialize = false; goto done; } else { /* not up or not authentic, we'll proceed either way */ crm_ipc_destroy(old_instance); old_instance = NULL; } if (pcmk__daemon_can_write(PE_STATE_DIR, NULL) == FALSE) { exit_code = CRM_EX_FATAL; crm_err("Terminating due to bad permissions on " PE_STATE_DIR); g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Bad permissions on " PE_STATE_DIR " (see logs for details)"); goto done; } else if (pcmk__daemon_can_write(CRM_CONFIG_DIR, NULL) == FALSE) { exit_code = CRM_EX_FATAL; crm_err("Terminating due to bad permissions on " CRM_CONFIG_DIR); g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Bad permissions on " CRM_CONFIG_DIR " (see logs for details)"); goto done; } if (pcmk__log_output_new(&(controld_globals.logger_out)) != pcmk_rc_ok) { exit_code = CRM_EX_FATAL; goto done; } pcmk__output_set_log_level(controld_globals.logger_out, LOG_TRACE); done: g_strfreev(processed_args); pcmk__free_arg_context(context); pcmk__output_and_clear_error(&error, out); if (out != NULL) { out->finish(out, exit_code, true, NULL); pcmk__output_free(out); } pcmk__unregister_formats(); if ((exit_code == CRM_EX_OK) && initialize) { // Does not return crmd_init(); } crm_exit(exit_code); } void crmd_init(void) { crm_exit_t exit_code = CRM_EX_OK; enum crmd_fsa_state state; init_dotfile(); register_fsa_input(C_STARTUP, I_STARTUP, NULL); pcmk__cluster_init_node_caches(); state = s_crmd_fsa(C_STARTUP); if (state == S_PENDING || state == S_STARTING) { /* Create the mainloop and run it... */ crm_trace("Starting %s's mainloop", crm_system_name); controld_globals.mainloop = g_main_loop_new(NULL, FALSE); g_main_loop_run(controld_globals.mainloop); if (pcmk_is_set(controld_globals.fsa_input_register, R_STAYDOWN)) { crm_info("Inhibiting automated respawn"); exit_code = CRM_EX_FATAL; } } else { crm_err("Startup of %s failed. Current state: %s", crm_system_name, fsa_state2string(state)); exit_code = CRM_EX_ERROR; } crm_info("%s[%lu] exiting with status %d (%s)", crm_system_name, (unsigned long) getpid(), exit_code, crm_exit_str(exit_code)); crmd_fast_exit(exit_code); } diff --git a/daemons/execd/pacemaker-execd.c b/daemons/execd/pacemaker-execd.c index 23227ee85f..8f760fa899 100644 --- a/daemons/execd/pacemaker-execd.c +++ b/daemons/execd/pacemaker-execd.c @@ -1,587 +1,587 @@ /* * Copyright 2012-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "pacemaker-execd.h" #ifdef PCMK__COMPILE_REMOTE # define EXECD_TYPE "remote" -# define EXECD_NAME "pacemaker-remoted" +# define EXECD_NAME PCMK__SERVER_REMOTED # define SUMMARY "resource agent executor daemon for Pacemaker Remote nodes" #else # define EXECD_TYPE "local" -# define EXECD_NAME "pacemaker-execd" +# define EXECD_NAME PCMK__SERVER_EXECD # define SUMMARY "resource agent executor daemon for Pacemaker cluster nodes" #endif static GMainLoop *mainloop = NULL; static qb_ipcs_service_t *ipcs = NULL; static stonith_t *stonith_api = NULL; int lrmd_call_id = 0; time_t start_time; static struct { gchar **log_files; #ifdef PCMK__COMPILE_REMOTE gchar *port; #endif // PCMK__COMPILE_REMOTE } options; #ifdef PCMK__COMPILE_REMOTE /* whether shutdown request has been sent */ static gboolean shutting_down = FALSE; /* timer for waiting for acknowledgment of shutdown request */ static guint shutdown_ack_timer = 0; static gboolean lrmd_exit(gpointer data); #endif static void stonith_connection_destroy_cb(stonith_t * st, stonith_event_t * e) { stonith_api->state = stonith_disconnected; stonith_connection_failed(); } stonith_t * get_stonith_connection(void) { if (stonith_api && stonith_api->state == stonith_disconnected) { stonith_api_delete(stonith_api); stonith_api = NULL; } if (stonith_api == NULL) { int rc = pcmk_ok; stonith_api = stonith_api_new(); if (stonith_api == NULL) { crm_err("Could not connect to fencer: API memory allocation failed"); return NULL; } rc = stonith_api_connect_retry(stonith_api, crm_system_name, 10); if (rc != pcmk_ok) { crm_err("Could not connect to fencer in 10 attempts: %s " QB_XS " rc=%d", pcmk_strerror(rc), rc); stonith_api_delete(stonith_api); stonith_api = NULL; } else { stonith_api_operations_t *cmds = stonith_api->cmds; cmds->register_notification(stonith_api, PCMK__VALUE_ST_NOTIFY_DISCONNECT, stonith_connection_destroy_cb); } } return stonith_api; } static int32_t lrmd_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { crm_trace("Connection %p", c); if (pcmk__new_client(c, uid, gid) == NULL) { return -ENOMEM; } return 0; } static void lrmd_ipc_created(qb_ipcs_connection_t * c) { pcmk__client_t *new_client = pcmk__find_client(c); crm_trace("Connection %p", c); CRM_ASSERT(new_client != NULL); /* Now that the connection is offically established, alert * the other clients a new connection exists. */ notify_of_new_client(new_client); } static int32_t lrmd_ipc_dispatch(qb_ipcs_connection_t * c, void *data, size_t size) { uint32_t id = 0; uint32_t flags = 0; pcmk__client_t *client = pcmk__find_client(c); xmlNode *request = pcmk__client_data2xml(client, data, &id, &flags); CRM_CHECK(client != NULL, crm_err("Invalid client"); return FALSE); CRM_CHECK(client->id != NULL, crm_err("Invalid client: %p", client); return FALSE); CRM_CHECK(flags & crm_ipc_client_response, crm_err("Invalid client request: %p", client); return FALSE); if (!request) { return 0; } if (!client->name) { const char *value = crm_element_value(request, PCMK__XA_LRMD_CLIENTNAME); if (value == NULL) { client->name = pcmk__itoa(pcmk__client_pid(c)); } else { client->name = pcmk__str_copy(value); } } lrmd_call_id++; if (lrmd_call_id < 1) { lrmd_call_id = 1; } crm_xml_add(request, PCMK__XA_LRMD_CLIENTID, client->id); crm_xml_add(request, PCMK__XA_LRMD_CLIENTNAME, client->name); crm_xml_add_int(request, PCMK__XA_LRMD_CALLID, lrmd_call_id); process_lrmd_message(client, id, request); pcmk__xml_free(request); return 0; } /*! * \internal * \brief Free a client connection, and exit if appropriate * * \param[in,out] client Client connection to free */ void lrmd_client_destroy(pcmk__client_t *client) { pcmk__free_client(client); #ifdef PCMK__COMPILE_REMOTE /* If we were waiting to shut down, we can now safely do so * if there are no more proxied IPC providers */ if (shutting_down && (ipc_proxy_get_provider() == NULL)) { lrmd_exit(NULL); } #endif } static int32_t lrmd_ipc_closed(qb_ipcs_connection_t * c) { pcmk__client_t *client = pcmk__find_client(c); if (client == NULL) { return 0; } crm_trace("Connection %p", c); client_disconnect_cleanup(client->id); #ifdef PCMK__COMPILE_REMOTE ipc_proxy_remove_provider(client); #endif lrmd_client_destroy(client); return 0; } static void lrmd_ipc_destroy(qb_ipcs_connection_t * c) { lrmd_ipc_closed(c); crm_trace("Connection %p", c); } static struct qb_ipcs_service_handlers lrmd_ipc_callbacks = { .connection_accept = lrmd_ipc_accept, .connection_created = lrmd_ipc_created, .msg_process = lrmd_ipc_dispatch, .connection_closed = lrmd_ipc_closed, .connection_destroyed = lrmd_ipc_destroy }; // \return Standard Pacemaker return code int lrmd_server_send_reply(pcmk__client_t *client, uint32_t id, xmlNode *reply) { crm_trace("Sending reply (%d) to client (%s)", id, client->id); switch (PCMK__CLIENT_TYPE(client)) { case pcmk__client_ipc: return pcmk__ipc_send_xml(client, id, reply, FALSE); #ifdef PCMK__COMPILE_REMOTE case pcmk__client_tls: return lrmd__remote_send_xml(client->remote, reply, id, "reply"); #endif default: crm_err("Could not send reply: unknown type for client %s " QB_XS " flags=%#llx", pcmk__client_name(client), client->flags); } return ENOTCONN; } // \return Standard Pacemaker return code int lrmd_server_send_notify(pcmk__client_t *client, xmlNode *msg) { crm_trace("Sending notification to client (%s)", client->id); switch (PCMK__CLIENT_TYPE(client)) { case pcmk__client_ipc: if (client->ipcs == NULL) { crm_trace("Could not notify local client: disconnected"); return ENOTCONN; } return pcmk__ipc_send_xml(client, 0, msg, crm_ipc_server_event); #ifdef PCMK__COMPILE_REMOTE case pcmk__client_tls: if (client->remote == NULL) { crm_trace("Could not notify remote client: disconnected"); return ENOTCONN; } else { return lrmd__remote_send_xml(client->remote, msg, 0, "notify"); } #endif default: crm_err("Could not notify client %s with unknown transport " QB_XS " flags=%#llx", pcmk__client_name(client), client->flags); } return ENOTCONN; } /*! * \internal * \brief Clean up and exit immediately * * \param[in] data Ignored * * \return Doesn't return * \note This can be used as a timer callback. */ static gboolean lrmd_exit(gpointer data) { crm_info("Terminating with %d clients", pcmk__ipc_client_count()); stonith_api_delete(stonith_api); if (ipcs) { mainloop_del_ipc_server(ipcs); } #ifdef PCMK__COMPILE_REMOTE execd_stop_tls_server(); ipc_proxy_cleanup(); #endif pcmk__client_cleanup(); g_hash_table_destroy(rsc_list); if (mainloop) { lrmd_drain_alerts(mainloop); } crm_exit(CRM_EX_OK); return FALSE; } /*! * \internal * \brief Request cluster shutdown if appropriate, otherwise exit immediately * * \param[in] nsig Signal that caused invocation (ignored) */ static void lrmd_shutdown(int nsig) { #ifdef PCMK__COMPILE_REMOTE pcmk__client_t *ipc_proxy = ipc_proxy_get_provider(); /* If there are active proxied IPC providers, then we may be running * resources, so notify the cluster that we wish to shut down. */ if (ipc_proxy) { if (shutting_down) { crm_notice("Waiting for cluster to stop resources before exiting"); return; } crm_info("Sending shutdown request to cluster"); if (ipc_proxy_shutdown_req(ipc_proxy) < 0) { crm_crit("Shutdown request failed, exiting immediately"); } else { /* We requested a shutdown. Now, we need to wait for an * acknowledgement from the proxy host (which ensures the proxy host * supports shutdown requests), then wait for all proxy hosts to * disconnect (which ensures that all resources have been stopped). */ shutting_down = TRUE; /* Stop accepting new proxy connections */ execd_stop_tls_server(); /* Older controller versions will never acknowledge our request, so * set a fairly short timeout to exit quickly in that case. If we * get the ack, we'll defuse this timer. */ shutdown_ack_timer = g_timeout_add_seconds(20, lrmd_exit, NULL); /* Currently, we let the OS kill us if the clients don't disconnect * in a reasonable time. We could instead set a long timer here * (shorter than what the OS is likely to use) and exit immediately * if it pops. */ return; } } #endif lrmd_exit(NULL); } /*! * \internal * \brief Defuse short exit timer if shutting down */ void handle_shutdown_ack(void) { #ifdef PCMK__COMPILE_REMOTE if (shutting_down) { crm_info("Received shutdown ack"); if (shutdown_ack_timer > 0) { g_source_remove(shutdown_ack_timer); shutdown_ack_timer = 0; } return; } #endif crm_debug("Ignoring unexpected shutdown ack"); } /*! * \internal * \brief Make short exit timer fire immediately */ void handle_shutdown_nack(void) { #ifdef PCMK__COMPILE_REMOTE if (shutting_down) { crm_info("Received shutdown nack"); if (shutdown_ack_timer > 0) { g_source_remove(shutdown_ack_timer); shutdown_ack_timer = g_timeout_add(0, lrmd_exit, NULL); } return; } #endif crm_debug("Ignoring unexpected shutdown nack"); } static GOptionEntry entries[] = { { "logfile", 'l', G_OPTION_FLAG_NONE, G_OPTION_ARG_FILENAME_ARRAY, &options.log_files, "Send logs to the additional named logfile", NULL }, #ifdef PCMK__COMPILE_REMOTE { "port", 'p', G_OPTION_FLAG_NONE, G_OPTION_ARG_STRING, &options.port, "Port to listen on (defaults to " G_STRINGIFY(DEFAULT_REMOTE_PORT) ")", NULL }, #endif // PCMK__COMPILE_REMOTE { NULL } }; static pcmk__supported_format_t formats[] = { PCMK__SUPPORTED_FORMAT_NONE, PCMK__SUPPORTED_FORMAT_TEXT, PCMK__SUPPORTED_FORMAT_XML, { NULL, NULL, NULL } }; static GOptionContext * build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) { GOptionContext *context = NULL; context = pcmk__build_arg_context(args, "text (default), xml", group, NULL); pcmk__add_main_args(context, entries); return context; } int main(int argc, char **argv, char **envp) { int rc = pcmk_rc_ok; crm_exit_t exit_code = CRM_EX_OK; const char *option = NULL; pcmk__output_t *out = NULL; GError *error = NULL; GOptionGroup *output_group = NULL; pcmk__common_args_t *args = NULL; gchar **processed_args = NULL; GOptionContext *context = NULL; #ifdef PCMK__COMPILE_REMOTE // If necessary, create PID 1 now before any file descriptors are opened remoted_spawn_pidone(argc, argv, envp); #endif args = pcmk__new_common_args(SUMMARY); #ifdef PCMK__COMPILE_REMOTE processed_args = pcmk__cmdline_preproc(argv, "lp"); #else processed_args = pcmk__cmdline_preproc(argv, "l"); #endif // PCMK__COMPILE_REMOTE context = build_arg_context(args, &output_group); crm_log_preinit(EXECD_NAME, argc, argv); pcmk__register_formats(output_group, formats); if (!g_option_context_parse_strv(context, &processed_args, &error)) { exit_code = CRM_EX_USAGE; goto done; } rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv); if (rc != pcmk_rc_ok) { exit_code = CRM_EX_ERROR; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Error creating output format %s: %s", args->output_ty, pcmk_rc_str(rc)); goto done; } if (args->version) { out->version(out, false); goto done; } // Open additional log files if (options.log_files != NULL) { for (gchar **fname = options.log_files; *fname != NULL; fname++) { rc = pcmk__add_logfile(*fname); if (rc != pcmk_rc_ok) { out->err(out, "Logging to %s is disabled: %s", *fname, pcmk_rc_str(rc)); } } } pcmk__cli_init_logging(EXECD_NAME, args->verbosity); crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); // ocf_log() (in resource-agents) uses the capitalized env options below option = pcmk__env_option(PCMK__ENV_LOGFACILITY); if (!pcmk__str_eq(option, PCMK_VALUE_NONE, pcmk__str_casei|pcmk__str_null_matches) && !pcmk__str_eq(option, "/dev/null", pcmk__str_none)) { pcmk__set_env_option("LOGFACILITY", option, true); } option = pcmk__env_option(PCMK__ENV_LOGFILE); if (!pcmk__str_eq(option, PCMK_VALUE_NONE, pcmk__str_casei|pcmk__str_null_matches)) { pcmk__set_env_option("LOGFILE", option, true); if (pcmk__env_option_enabled(crm_system_name, PCMK__ENV_DEBUG)) { pcmk__set_env_option("DEBUGLOG", option, true); } } #ifdef PCMK__COMPILE_REMOTE if (options.port != NULL) { pcmk__set_env_option(PCMK__ENV_REMOTE_PORT, options.port, false); } #endif // PCMK__COMPILE_REMOTE start_time = time(NULL); crm_notice("Starting Pacemaker " EXECD_TYPE " executor"); /* The presence of this variable allegedly controls whether child * processes like httpd will try and use Systemd's sd_notify * API */ unsetenv("NOTIFY_SOCKET"); { // Temporary directory for resource agent use (leave owned by root) int rc = pcmk__build_path(CRM_RSCTMP_DIR, 0755); if (rc != pcmk_rc_ok) { crm_warn("Could not create resource agent temporary directory " CRM_RSCTMP_DIR ": %s", pcmk_rc_str(rc)); } } rsc_list = pcmk__strkey_table(NULL, free_rsc); ipcs = mainloop_add_ipc_server(CRM_SYSTEM_LRMD, QB_IPC_SHM, &lrmd_ipc_callbacks); if (ipcs == NULL) { crm_err("Failed to create IPC server: shutting down and inhibiting respawn"); exit_code = CRM_EX_FATAL; goto done; } #ifdef PCMK__COMPILE_REMOTE if (lrmd_init_remote_tls_server() < 0) { crm_err("Failed to create TLS listener: shutting down and staying down"); exit_code = CRM_EX_FATAL; goto done; } ipc_proxy_init(); #endif mainloop_add_signal(SIGTERM, lrmd_shutdown); mainloop = g_main_loop_new(NULL, FALSE); crm_notice("Pacemaker " EXECD_TYPE " executor successfully started and accepting connections"); crm_notice("OCF resource agent search path is %s", OCF_RA_PATH); g_main_loop_run(mainloop); /* should never get here */ lrmd_exit(NULL); done: g_strfreev(options.log_files); #ifdef PCMK__COMPILE_REMOTE g_free(options.port); #endif // PCMK__COMPILE_REMOTE g_strfreev(processed_args); pcmk__free_arg_context(context); pcmk__output_and_clear_error(&error, out); if (out != NULL) { out->finish(out, exit_code, true, NULL); pcmk__output_free(out); } pcmk__unregister_formats(); crm_exit(exit_code); } diff --git a/daemons/execd/remoted_pidone.c b/daemons/execd/remoted_pidone.c index 0a6c251066..187de51d04 100644 --- a/daemons/execd/remoted_pidone.c +++ b/daemons/execd/remoted_pidone.c @@ -1,302 +1,303 @@ /* * Copyright 2017-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include "pacemaker-execd.h" static pid_t main_pid = 0; static void sigdone(void) { crm_exit(CRM_EX_OK); } static void sigreap(void) { pid_t pid = 0; int status; do { /* * Opinions seem to differ as to what to put here: * -1, any child process * 0, any child process whose process group ID is equal to that of the calling process */ pid = waitpid(-1, &status, WNOHANG); if (pid == main_pid) { /* Exit when pacemaker-remote exits and use the same return code */ if (WIFEXITED(status)) { crm_exit(WEXITSTATUS(status)); } crm_exit(CRM_EX_ERROR); } } while (pid > 0); } static struct { int sig; void (*handler)(void); } sigmap[] = { { SIGCHLD, sigreap }, { SIGINT, sigdone }, }; /*! * \internal * \brief Check a line of text for a valid environment variable name * * \param[in] line Text to check * \param[out] first First character of valid name if found, NULL otherwise * \param[out] last Last character of valid name if found, NULL otherwise * * \return TRUE if valid name found, FALSE otherwise * \note It's reasonable to impose limitations on environment variable names * beyond what C or setenv() does: We only allow names that contain only * [a-zA-Z0-9_] characters and do not start with a digit. */ static bool find_env_var_name(char *line, char **first, char **last) { // Skip leading whitespace *first = line; while (isspace(**first)) { ++*first; } if (isalpha(**first) || (**first == '_')) { // Valid first character *last = *first; while (isalnum(*(*last + 1)) || (*(*last + 1) == '_')) { ++*last; } return TRUE; } *first = *last = NULL; return FALSE; } static void load_env_vars(const char *filename) { /* We haven't forked or initialized logging yet, so don't leave any file * descriptors open, and don't log -- silently ignore errors. */ FILE *fp = fopen(filename, "r"); if (fp != NULL) { char line[LINE_MAX] = { '\0', }; while (fgets(line, LINE_MAX, fp) != NULL) { char *name = NULL; char *end = NULL; char *value = NULL; char *quote = NULL; // Look for valid name immediately followed by equals sign if (find_env_var_name(line, &name, &end) && (*++end == '=')) { // Null-terminate name, and advance beyond equals sign *end++ = '\0'; // Check whether value is quoted if ((*end == '\'') || (*end == '"')) { quote = end++; } value = end; if (quote) { /* Value is remaining characters up to next non-backslashed * matching quote character. */ while (((*end != *quote) || (*(end - 1) == '\\')) && (*end != '\0')) { end++; } if (*end == *quote) { // Null-terminate value, and advance beyond close quote *end++ = '\0'; } else { // Matching closing quote wasn't found value = NULL; } } else { /* Value is remaining characters up to next non-backslashed * whitespace. */ while ((!isspace(*end) || (*(end - 1) == '\\')) && (*end != '\0')) { ++end; } if (end == (line + LINE_MAX - 1)) { // Line was too long value = NULL; } // Do NOT null-terminate value (yet) } /* We have a valid name and value, and end is now the character * after the closing quote or the first whitespace after the * unquoted value. Make sure the rest of the line is just * whitespace or a comment. */ if (value) { char *value_end = end; while (isspace(*end) && (*end != '\n')) { ++end; } if ((*end == '\n') || (*end == '#')) { if (quote == NULL) { // Now we can null-terminate an unquoted value *value_end = '\0'; } // Don't overwrite (bundle options take precedence) setenv(name, value, 0); } else { value = NULL; } } } if ((value == NULL) && (strchr(line, '\n') == NULL)) { // Eat remainder of line beyond LINE_MAX if (fscanf(fp, "%*[^\n]\n") == EOF) { value = NULL; // Don't care, make compiler happy } } } fclose(fp); } } void remoted_spawn_pidone(int argc, char **argv, char **envp) { sigset_t set; /* This environment variable exists for two purposes: * - For testing, setting it to "full" enables full PID 1 behavior even * when PID is not 1 * - Setting to "vars" enables just the loading of environment variables * from /etc/pacemaker/pcmk-init.env, which could be useful for testing or - * containers with a custom PID 1 script that launches pacemaker-remoted. + * containers with a custom PID 1 script that launches the remote + * executor. */ const char *pid1 = PCMK_VALUE_DEFAULT; if (getpid() != 1) { pid1 = pcmk__env_option(PCMK__ENV_REMOTE_PID1); if (!pcmk__str_any_of(pid1, "full", "vars", NULL)) { // Default, unset, or invalid return; } } /* When a container is launched, it may be given specific environment * variables, which for Pacemaker bundles are given in the bundle * configuration. However, that does not allow for host-specific values. * To allow for that, look for a special file containing a shell-like syntax * of name/value pairs, and export those into the environment. */ load_env_vars("/etc/pacemaker/pcmk-init.env"); if (strcmp(pid1, "vars") == 0) { return; } /* Containers can be expected to have /var/log, but they may not have * /var/log/pacemaker, so use a different default if no value has been * explicitly configured in the container's environment. */ if (pcmk__env_option(PCMK__ENV_LOGFILE) == NULL) { pcmk__set_env_option(PCMK__ENV_LOGFILE, "/var/log/pcmk-init.log", true); } sigfillset(&set); sigprocmask(SIG_BLOCK, &set, 0); main_pid = fork(); switch (main_pid) { case 0: sigprocmask(SIG_UNBLOCK, &set, NULL); setsid(); setpgid(0, 0); // Child remains as pacemaker-remoted return; case -1: crm_err("fork failed: %s", pcmk_rc_str(errno)); } /* Parent becomes the reaper of zombie processes */ /* Safe to initialize logging now if needed */ # ifdef HAVE_PROGNAME /* Differentiate ourselves in the 'ps' output */ { char *p; int i, maxlen; char *LastArgv = NULL; const char *name = "pcmk-init"; for (i = 0; i < argc; i++) { if (!i || (LastArgv + 1 == argv[i])) LastArgv = argv[i] + strlen(argv[i]); } for (i = 0; envp[i] != NULL; i++) { if ((LastArgv + 1) == envp[i]) { LastArgv = envp[i] + strlen(envp[i]); } } maxlen = (LastArgv - argv[0]) - 2; i = strlen(name); /* We can overwrite individual argv[] arguments */ snprintf(argv[0], maxlen, "%s", name); /* Now zero out everything else */ p = &argv[0][i]; while (p < LastArgv) { *p++ = '\0'; } argv[1] = NULL; } # endif // HAVE_PROGNAME while (1) { int sig; size_t i; sigwait(&set, &sig); for (i = 0; i < PCMK__NELEM(sigmap); i++) { if (sigmap[i].sig == sig) { sigmap[i].handler(); break; } } } } diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c index 94b1428e8e..9dc36d62ce 100644 --- a/daemons/fenced/pacemaker-fenced.c +++ b/daemons/fenced/pacemaker-fenced.c @@ -1,676 +1,677 @@ /* * Copyright 2009-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include // PRIu32, PRIx32 #include #include #include #include #include #include #include #include #include #include #include #include #define SUMMARY "daemon for executing fencing devices in a Pacemaker cluster" long long stonith_watchdog_timeout_ms = 0; GList *stonith_watchdog_targets = NULL; static GMainLoop *mainloop = NULL; gboolean stand_alone = FALSE; gboolean stonith_shutdown_flag = FALSE; static qb_ipcs_service_t *ipcs = NULL; static pcmk__output_t *out = NULL; pcmk__supported_format_t formats[] = { PCMK__SUPPORTED_FORMAT_NONE, PCMK__SUPPORTED_FORMAT_TEXT, PCMK__SUPPORTED_FORMAT_XML, { NULL, NULL, NULL } }; static struct { bool no_cib_connect; gchar **log_files; } options; crm_exit_t exit_code = CRM_EX_OK; static void stonith_cleanup(void); static int32_t st_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { if (stonith_shutdown_flag) { crm_info("Ignoring new client [%d] during shutdown", pcmk__client_pid(c)); return -ECONNREFUSED; } if (pcmk__new_client(c, uid, gid) == NULL) { return -ENOMEM; } return 0; } /* Exit code means? */ static int32_t st_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size) { uint32_t id = 0; uint32_t flags = 0; int call_options = 0; xmlNode *request = NULL; pcmk__client_t *c = pcmk__find_client(qbc); const char *op = NULL; if (c == NULL) { crm_info("Invalid client: %p", qbc); return 0; } request = pcmk__client_data2xml(c, data, &id, &flags); if (request == NULL) { pcmk__ipc_send_ack(c, id, flags, PCMK__XE_NACK, NULL, CRM_EX_PROTOCOL); return 0; } op = crm_element_value(request, PCMK__XA_CRM_TASK); if(pcmk__str_eq(op, CRM_OP_RM_NODE_CACHE, pcmk__str_casei)) { crm_xml_add(request, PCMK__XA_T, PCMK__VALUE_STONITH_NG); crm_xml_add(request, PCMK__XA_ST_OP, op); crm_xml_add(request, PCMK__XA_ST_CLIENTID, c->id); crm_xml_add(request, PCMK__XA_ST_CLIENTNAME, pcmk__client_name(c)); crm_xml_add(request, PCMK__XA_ST_CLIENTNODE, fenced_get_local_node()); pcmk__cluster_send_message(NULL, pcmk_ipc_fenced, request); pcmk__xml_free(request); return 0; } if (c->name == NULL) { const char *value = crm_element_value(request, PCMK__XA_ST_CLIENTNAME); c->name = crm_strdup_printf("%s.%u", pcmk__s(value, "unknown"), c->pid); } crm_element_value_int(request, PCMK__XA_ST_CALLOPT, &call_options); crm_trace("Flags %#08" PRIx32 "/%#08x for command %" PRIu32 " from client %s", flags, call_options, id, pcmk__client_name(c)); if (pcmk_is_set(call_options, st_opt_sync_call)) { CRM_ASSERT(flags & crm_ipc_client_response); CRM_LOG_ASSERT(c->request_id == 0); /* This means the client has two synchronous events in-flight */ c->request_id = id; /* Reply only to the last one */ } crm_xml_add(request, PCMK__XA_ST_CLIENTID, c->id); crm_xml_add(request, PCMK__XA_ST_CLIENTNAME, pcmk__client_name(c)); crm_xml_add(request, PCMK__XA_ST_CLIENTNODE, fenced_get_local_node()); crm_log_xml_trace(request, "ipc-received"); stonith_command(c, id, flags, request, NULL); pcmk__xml_free(request); return 0; } /* Error code means? */ static int32_t st_ipc_closed(qb_ipcs_connection_t * c) { pcmk__client_t *client = pcmk__find_client(c); if (client == NULL) { return 0; } crm_trace("Connection %p closed", c); pcmk__free_client(client); /* 0 means: yes, go ahead and destroy the connection */ return 0; } static void st_ipc_destroy(qb_ipcs_connection_t * c) { crm_trace("Connection %p destroyed", c); st_ipc_closed(c); } static void stonith_peer_callback(xmlNode * msg, void *private_data) { const char *remote_peer = crm_element_value(msg, PCMK__XA_SRC); const char *op = crm_element_value(msg, PCMK__XA_ST_OP); if (pcmk__str_eq(op, STONITH_OP_POKE, pcmk__str_none)) { return; } crm_log_xml_trace(msg, "Peer[inbound]"); stonith_command(NULL, 0, 0, msg, remote_peer); } #if SUPPORT_COROSYNC static void stonith_peer_ais_callback(cpg_handle_t handle, const struct cpg_name *groupName, uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) { xmlNode *xml = NULL; const char *from = NULL; char *data = pcmk__cpg_message_data(handle, nodeid, pid, msg, &from); if(data == NULL) { return; } xml = pcmk__xml_parse(data); if (xml == NULL) { crm_err("Invalid XML: '%.120s'", data); free(data); return; } crm_xml_add(xml, PCMK__XA_SRC, from); stonith_peer_callback(xml, NULL); pcmk__xml_free(xml); free(data); } static void stonith_peer_cs_destroy(gpointer user_data) { crm_crit("Lost connection to cluster layer, shutting down"); stonith_shutdown(0); } #endif void do_local_reply(const xmlNode *notify_src, pcmk__client_t *client, int call_options) { /* send callback to originating child */ int local_rc = pcmk_rc_ok; int rid = 0; uint32_t ipc_flags = crm_ipc_server_event; if (pcmk_is_set(call_options, st_opt_sync_call)) { CRM_LOG_ASSERT(client->request_id); rid = client->request_id; client->request_id = 0; ipc_flags = crm_ipc_flags_none; } local_rc = pcmk__ipc_send_xml(client, rid, notify_src, ipc_flags); if (local_rc == pcmk_rc_ok) { crm_trace("Sent response %d to client %s", rid, pcmk__client_name(client)); } else { crm_warn("%synchronous reply to client %s failed: %s", (pcmk_is_set(call_options, st_opt_sync_call)? "S" : "As"), pcmk__client_name(client), pcmk_rc_str(local_rc)); } } uint64_t get_stonith_flag(const char *name) { if (pcmk__str_eq(name, PCMK__VALUE_ST_NOTIFY_FENCE, pcmk__str_none)) { return st_callback_notify_fence; } else if (pcmk__str_eq(name, STONITH_OP_DEVICE_ADD, pcmk__str_casei)) { return st_callback_device_add; } else if (pcmk__str_eq(name, STONITH_OP_DEVICE_DEL, pcmk__str_casei)) { return st_callback_device_del; } else if (pcmk__str_eq(name, PCMK__VALUE_ST_NOTIFY_HISTORY, pcmk__str_none)) { return st_callback_notify_history; } else if (pcmk__str_eq(name, PCMK__VALUE_ST_NOTIFY_HISTORY_SYNCED, pcmk__str_none)) { return st_callback_notify_history_synced; } return st_callback_unknown; } static void stonith_notify_client(gpointer key, gpointer value, gpointer user_data) { const xmlNode *update_msg = user_data; pcmk__client_t *client = value; const char *type = NULL; CRM_CHECK(client != NULL, return); CRM_CHECK(update_msg != NULL, return); type = crm_element_value(update_msg, PCMK__XA_SUBT); CRM_CHECK(type != NULL, crm_log_xml_err(update_msg, "notify"); return); if (client->ipcs == NULL) { crm_trace("Skipping client with NULL channel"); return; } if (pcmk_is_set(client->flags, get_stonith_flag(type))) { int rc = pcmk__ipc_send_xml(client, 0, update_msg, crm_ipc_server_event); if (rc != pcmk_rc_ok) { crm_warn("%s notification of client %s failed: %s " QB_XS " id=%.8s rc=%d", type, pcmk__client_name(client), pcmk_rc_str(rc), client->id, rc); } else { crm_trace("Sent %s notification to client %s", type, pcmk__client_name(client)); } } } void do_stonith_async_timeout_update(const char *client_id, const char *call_id, int timeout) { pcmk__client_t *client = NULL; xmlNode *notify_data = NULL; if (!timeout || !call_id || !client_id) { return; } client = pcmk__find_client_by_id(client_id); if (!client) { return; } notify_data = pcmk__xe_create(NULL, PCMK__XE_ST_ASYNC_TIMEOUT_VALUE); crm_xml_add(notify_data, PCMK__XA_T, PCMK__VALUE_ST_ASYNC_TIMEOUT_VALUE); crm_xml_add(notify_data, PCMK__XA_ST_CALLID, call_id); crm_xml_add_int(notify_data, PCMK__XA_ST_TIMEOUT, timeout); crm_trace("timeout update is %d for client %s and call id %s", timeout, client_id, call_id); if (client) { pcmk__ipc_send_xml(client, 0, notify_data, crm_ipc_server_event); } pcmk__xml_free(notify_data); } /*! * \internal * \brief Notify relevant IPC clients of a fencing operation result * * \param[in] type Notification type * \param[in] result Result of fencing operation (assume success if NULL) * \param[in] data If not NULL, add to notification as call data */ void fenced_send_notification(const char *type, const pcmk__action_result_t *result, xmlNode *data) { /* TODO: Standardize the contents of data */ xmlNode *update_msg = pcmk__xe_create(NULL, PCMK__XE_NOTIFY); CRM_LOG_ASSERT(type != NULL); crm_xml_add(update_msg, PCMK__XA_T, PCMK__VALUE_ST_NOTIFY); crm_xml_add(update_msg, PCMK__XA_SUBT, type); crm_xml_add(update_msg, PCMK__XA_ST_OP, type); stonith__xe_set_result(update_msg, result); if (data != NULL) { xmlNode *wrapper = pcmk__xe_create(update_msg, PCMK__XE_ST_CALLDATA); pcmk__xml_copy(wrapper, data); } crm_trace("Notifying clients"); pcmk__foreach_ipc_client(stonith_notify_client, update_msg); pcmk__xml_free(update_msg); crm_trace("Notify complete"); } /*! * \internal * \brief Send notifications for a configuration change to subscribed clients * * \param[in] op Notification type (\c STONITH_OP_DEVICE_ADD, * \c STONITH_OP_DEVICE_DEL, \c STONITH_OP_LEVEL_ADD, or * \c STONITH_OP_LEVEL_DEL) * \param[in] result Operation result * \param[in] desc Description of what changed (either device ID or string * representation of level * ([])) */ void fenced_send_config_notification(const char *op, const pcmk__action_result_t *result, const char *desc) { xmlNode *notify_data = pcmk__xe_create(NULL, op); crm_xml_add(notify_data, PCMK__XA_ST_DEVICE_ID, desc); fenced_send_notification(op, result, notify_data); pcmk__xml_free(notify_data); } /*! * \internal * \brief Check whether a node does watchdog-fencing * * \param[in] node Name of node to check * * \return TRUE if node found in stonith_watchdog_targets * or stonith_watchdog_targets is empty indicating * all nodes are doing watchdog-fencing */ gboolean node_does_watchdog_fencing(const char *node) { return ((stonith_watchdog_targets == NULL) || pcmk__str_in_list(node, stonith_watchdog_targets, pcmk__str_casei)); } void stonith_shutdown(int nsig) { crm_info("Terminating with %d clients", pcmk__ipc_client_count()); stonith_shutdown_flag = TRUE; if (mainloop != NULL && g_main_loop_is_running(mainloop)) { g_main_loop_quit(mainloop); } } static void stonith_cleanup(void) { fenced_cib_cleanup(); if (ipcs) { qb_ipcs_destroy(ipcs); } pcmk__cluster_destroy_node_caches(); pcmk__client_cleanup(); free_stonith_remote_op_list(); free_topology_list(); free_device_list(); free_metadata_cache(); fenced_unregister_handlers(); } static gboolean stand_alone_cpg_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { stand_alone = FALSE; options.no_cib_connect = true; return TRUE; } struct qb_ipcs_service_handlers ipc_callbacks = { .connection_accept = st_ipc_accept, .connection_created = NULL, .msg_process = st_ipc_dispatch, .connection_closed = st_ipc_closed, .connection_destroyed = st_ipc_destroy }; /*! * \internal * \brief Callback for peer status changes * * \param[in] type What changed * \param[in] node What peer had the change * \param[in] data Previous value of what changed */ static void st_peer_update_callback(enum pcmk__node_update type, pcmk__node_status_t *node, const void *data) { if ((type != pcmk__node_update_processes) && !pcmk_is_set(node->flags, pcmk__node_status_remote)) { /* * This is a hack until we can send to a nodeid and/or we fix node name lookups * These messages are ignored in stonith_peer_callback() */ xmlNode *query = pcmk__xe_create(NULL, PCMK__XE_STONITH_COMMAND); crm_xml_add(query, PCMK__XA_T, PCMK__VALUE_STONITH_NG); crm_xml_add(query, PCMK__XA_ST_OP, STONITH_OP_POKE); crm_debug("Broadcasting our uname because of node %" PRIu32, node->cluster_layer_id); pcmk__cluster_send_message(NULL, pcmk_ipc_fenced, query); pcmk__xml_free(query); } } /* @COMPAT Deprecated since 2.1.8. Use pcmk_list_fence_attrs() or * crm_resource --list-options=fencing instead of querying daemon metadata. * * NOTE: pcs (as of at least 0.11.8) uses this */ static int fencer_metadata(void) { - const char *name = "pacemaker-fenced"; + const char *name = PCMK__SERVER_FENCED; const char *desc_short = N_("Instance attributes available for all " "\"stonith\"-class resources"); const char *desc_long = N_("Instance attributes available for all " "\"stonith\"-class resources and used by " "Pacemaker's fence daemon"); return pcmk__daemon_metadata(out, name, desc_short, desc_long, pcmk__opt_fencing); } static GOptionEntry entries[] = { { "stand-alone", 's', G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE, &stand_alone, N_("Deprecated (will be removed in a future release)"), NULL }, { "stand-alone-w-cpg", 'c', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, stand_alone_cpg_cb, N_("Intended for use in regression testing only"), NULL }, { "logfile", 'l', G_OPTION_FLAG_NONE, G_OPTION_ARG_FILENAME_ARRAY, &options.log_files, N_("Send logs to the additional named logfile"), NULL }, { NULL } }; static GOptionContext * build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) { GOptionContext *context = NULL; context = pcmk__build_arg_context(args, "text (default), xml", group, NULL); pcmk__add_main_args(context, entries); return context; } int main(int argc, char **argv) { int rc = pcmk_rc_ok; pcmk_cluster_t *cluster = NULL; crm_ipc_t *old_instance = NULL; GError *error = NULL; GOptionGroup *output_group = NULL; pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY); gchar **processed_args = pcmk__cmdline_preproc(argv, "l"); GOptionContext *context = build_arg_context(args, &output_group); crm_log_preinit(NULL, argc, argv); pcmk__register_formats(output_group, formats); if (!g_option_context_parse_strv(context, &processed_args, &error)) { exit_code = CRM_EX_USAGE; goto done; } rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv); if (rc != pcmk_rc_ok) { exit_code = CRM_EX_ERROR; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Error creating output format %s: %s", args->output_ty, pcmk_rc_str(rc)); goto done; } if (args->version) { out->version(out, false); goto done; } if ((g_strv_length(processed_args) >= 2) && pcmk__str_eq(processed_args[1], "metadata", pcmk__str_none)) { rc = fencer_metadata(); if (rc != pcmk_rc_ok) { exit_code = CRM_EX_FATAL; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Unable to display metadata: %s", pcmk_rc_str(rc)); } goto done; } // Open additional log files pcmk__add_logfiles(options.log_files, out); crm_log_init(NULL, LOG_INFO + args->verbosity, TRUE, (args->verbosity > 0), argc, argv, FALSE); crm_notice("Starting Pacemaker fencer"); old_instance = crm_ipc_new("stonith-ng", 0); if (old_instance == NULL) { /* crm_ipc_new() will have already logged an error message with * crm_err() */ exit_code = CRM_EX_FATAL; goto done; } if (pcmk__connect_generic_ipc(old_instance) == pcmk_rc_ok) { // IPC endpoint already up crm_ipc_close(old_instance); crm_ipc_destroy(old_instance); - crm_err("pacemaker-fenced is already active, aborting startup"); + crm_crit("Aborting start-up because another fencer instance is " + "already active"); goto done; } else { // Not up or not authentic, we'll proceed either way crm_ipc_destroy(old_instance); old_instance = NULL; } mainloop_add_signal(SIGTERM, stonith_shutdown); pcmk__cluster_init_node_caches(); rc = fenced_scheduler_init(); if (rc != pcmk_rc_ok) { exit_code = CRM_EX_FATAL; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Error initializing scheduler data: %s", pcmk_rc_str(rc)); goto done; } cluster = pcmk_cluster_new(); if (!stand_alone) { #if SUPPORT_COROSYNC if (pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync) { pcmk_cluster_set_destroy_fn(cluster, stonith_peer_cs_destroy); pcmk_cpg_set_deliver_fn(cluster, stonith_peer_ais_callback); pcmk_cpg_set_confchg_fn(cluster, pcmk__cpg_confchg_cb); } #endif // SUPPORT_COROSYNC pcmk__cluster_set_status_callback(&st_peer_update_callback); if (pcmk_cluster_connect(cluster) != pcmk_rc_ok) { exit_code = CRM_EX_FATAL; crm_crit("Cannot sign in to the cluster... terminating"); goto done; } fenced_set_local_node(cluster->priv->node_name); if (!options.no_cib_connect) { setup_cib(); } } else { fenced_set_local_node("localhost"); crm_warn("Stand-alone mode is deprecated and will be removed " "in a future release"); } init_device_list(); init_topology_list(); pcmk__serve_fenced_ipc(&ipcs, &ipc_callbacks); // Create the mainloop and run it... mainloop = g_main_loop_new(NULL, FALSE); crm_notice("Pacemaker fencer successfully started and accepting connections"); g_main_loop_run(mainloop); done: g_strfreev(processed_args); pcmk__free_arg_context(context); g_strfreev(options.log_files); stonith_cleanup(); pcmk_cluster_free(cluster); fenced_scheduler_cleanup(); pcmk__output_and_clear_error(&error, out); if (out != NULL) { out->finish(out, exit_code, true, NULL); pcmk__output_free(out); } pcmk__unregister_formats(); crm_exit(exit_code); } diff --git a/daemons/pacemakerd/pacemakerd.c b/daemons/pacemakerd/pacemakerd.c index 5f28ed5645..1beaf8e117 100644 --- a/daemons/pacemakerd/pacemakerd.c +++ b/daemons/pacemakerd/pacemakerd.c @@ -1,486 +1,486 @@ /* * Copyright 2010-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include "pacemakerd.h" #if SUPPORT_COROSYNC #include "pcmkd_corosync.h" #endif #include #include #include #include #include #include #include #include #include #include /* indirectly: CRM_EX_* */ #include #include #include #include #include #include #include #define SUMMARY "pacemakerd - primary Pacemaker daemon that launches and monitors all subsidiary Pacemaker daemons" struct { gboolean features; gboolean foreground; gboolean shutdown; gboolean standby; } options; static pcmk__output_t *out = NULL; static pcmk__supported_format_t formats[] = { PCMK__SUPPORTED_FORMAT_NONE, PCMK__SUPPORTED_FORMAT_TEXT, PCMK__SUPPORTED_FORMAT_XML, { NULL, NULL, NULL } }; PCMK__OUTPUT_ARGS("features") static int pacemakerd_features(pcmk__output_t *out, va_list args) { out->info(out, "Pacemaker %s (Build: %s)\n Supporting v%s: %s", PACEMAKER_VERSION, BUILD_VERSION, CRM_FEATURE_SET, CRM_FEATURES); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("features") static int pacemakerd_features_xml(pcmk__output_t *out, va_list args) { gchar **feature_list = g_strsplit(CRM_FEATURES, " ", 0); pcmk__output_xml_create_parent(out, PCMK_XE_PACEMAKERD, PCMK_XA_VERSION, PACEMAKER_VERSION, PCMK_XA_BUILD, BUILD_VERSION, PCMK_XA_FEATURE_SET, CRM_FEATURE_SET, NULL); out->begin_list(out, NULL, NULL, PCMK_XE_FEATURES); for (char **s = feature_list; *s != NULL; s++) { pcmk__output_create_xml_text_node(out, PCMK_XE_FEATURE, *s); } out->end_list(out); pcmk__output_xml_pop_parent(out); g_strfreev(feature_list); return pcmk_rc_ok; } static pcmk__message_entry_t fmt_functions[] = { { "features", "default", pacemakerd_features }, { "features", "xml", pacemakerd_features_xml }, { NULL, NULL, NULL } }; static gboolean pid_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { return TRUE; } static gboolean standby_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { options.standby = TRUE; pcmk__set_env_option(PCMK__ENV_NODE_START_STATE, PCMK_VALUE_STANDBY, false); return TRUE; } static GOptionEntry entries[] = { { "features", 'F', 0, G_OPTION_ARG_NONE, &options.features, "Display full version and list of features Pacemaker was built with", NULL }, { "foreground", 'f', 0, G_OPTION_ARG_NONE, &options.foreground, "(Ignored) Pacemaker always runs in the foreground", NULL }, { "pid-file", 'p', 0, G_OPTION_ARG_CALLBACK, pid_cb, "(Ignored) Daemon pid file location", "FILE" }, { "shutdown", 'S', 0, G_OPTION_ARG_NONE, &options.shutdown, "Instruct Pacemaker to shutdown on this machine", NULL }, { "standby", 's', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, standby_cb, "Start node in standby state", NULL }, { NULL } }; static void pcmk_ignore(int nsig) { crm_info("Ignoring signal %s (%d)", strsignal(nsig), nsig); } static void pcmk_sigquit(int nsig) { pcmk__panic(__func__); } static void pacemakerd_chown(const char *path, uid_t uid, gid_t gid) { int rc = chown(path, uid, gid); if (rc < 0) { crm_warn("Cannot change the ownership of %s to user %s and gid %d: %s", path, CRM_DAEMON_USER, gid, pcmk_rc_str(errno)); } } static void create_pcmk_dirs(void) { uid_t pcmk_uid = 0; gid_t pcmk_gid = 0; const char *dirs[] = { CRM_PACEMAKER_DIR, // core/blackbox/scheduler/CIB files CRM_CORE_DIR, // core files CRM_BLACKBOX_DIR, // blackbox dumps PE_STATE_DIR, // scheduler inputs CRM_CONFIG_DIR, // the Cluster Information Base (CIB) - // Don't build CRM_RSCTMP_DIR, pacemaker-execd will do it + // Don't build CRM_RSCTMP_DIR, the executor will do it NULL }; if (pcmk_daemon_user(&pcmk_uid, &pcmk_gid) < 0) { crm_err("Cluster user %s does not exist, aborting Pacemaker startup", CRM_DAEMON_USER); crm_exit(CRM_EX_NOUSER); } // Used by some resource agents if ((mkdir(CRM_STATE_DIR, 0750) < 0) && (errno != EEXIST)) { crm_warn("Could not create directory " CRM_STATE_DIR ": %s", pcmk_rc_str(errno)); } else { pacemakerd_chown(CRM_STATE_DIR, pcmk_uid, pcmk_gid); } for (int i = 0; dirs[i] != NULL; ++i) { int rc = pcmk__build_path(dirs[i], 0750); if (rc != pcmk_rc_ok) { crm_warn("Could not create directory %s: %s", dirs[i], pcmk_rc_str(rc)); } else { pacemakerd_chown(dirs[i], pcmk_uid, pcmk_gid); } } } static void remove_core_file_limit(void) { struct rlimit cores; // Get current limits if (getrlimit(RLIMIT_CORE, &cores) < 0) { crm_notice("Unable to check system core file limits " "(consider ensuring the size is unlimited): %s", strerror(errno)); return; } // Check whether core dumps are disabled if (cores.rlim_max == 0) { if (geteuid() != 0) { // Yes, and there's nothing we can do about it crm_notice("Core dumps are disabled (consider enabling them)"); return; } cores.rlim_max = RLIM_INFINITY; // Yes, but we're root, so enable them } // Raise soft limit to hard limit (if not already done) if (cores.rlim_cur != cores.rlim_max) { cores.rlim_cur = cores.rlim_max; if (setrlimit(RLIMIT_CORE, &cores) < 0) { crm_notice("Unable to raise system limit on core file size " "(consider doing so manually): %s", strerror(errno)); return; } } if (cores.rlim_cur == RLIM_INFINITY) { crm_trace("Core file size is unlimited"); } else { crm_trace("Core file size is limited to %llu bytes", (unsigned long long) cores.rlim_cur); } } static void pacemakerd_event_cb(pcmk_ipc_api_t *pacemakerd_api, enum pcmk_ipc_event event_type, crm_exit_t status, void *event_data, void *user_data) { pcmk_pacemakerd_api_reply_t *reply = event_data; switch (event_type) { case pcmk_ipc_event_reply: break; default: return; } if (status != CRM_EX_OK) { out->err(out, "Bad reply from pacemakerd: %s", crm_exit_str(status)); return; } if (reply->reply_type != pcmk_pacemakerd_reply_shutdown) { out->err(out, "Unknown reply type %d from pacemakerd", reply->reply_type); } } static GOptionContext * build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) { GOptionContext *context = NULL; context = pcmk__build_arg_context(args, "text (default), xml", group, NULL); pcmk__add_main_args(context, entries); return context; } int main(int argc, char **argv) { int rc = pcmk_rc_ok; crm_exit_t exit_code = CRM_EX_OK; GError *error = NULL; GOptionGroup *output_group = NULL; pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY); gchar **processed_args = pcmk__cmdline_preproc(argv, "p"); GOptionContext *context = build_arg_context(args, &output_group); bool old_instance_connected = false; pcmk_ipc_api_t *old_instance = NULL; qb_ipcs_service_t *ipcs = NULL; subdaemon_check_progress = time(NULL); setenv("LC_ALL", "C", 1); // Ensure logs are in a common language crm_log_preinit(NULL, argc, argv); mainloop_add_signal(SIGHUP, pcmk_ignore); mainloop_add_signal(SIGQUIT, pcmk_sigquit); pcmk__register_formats(output_group, formats); if (!g_option_context_parse_strv(context, &processed_args, &error)) { exit_code = CRM_EX_USAGE; goto done; } rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv); if ((rc != pcmk_rc_ok) || (out == NULL)) { exit_code = CRM_EX_ERROR; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Error creating output format %s: %s", args->output_ty, pcmk_rc_str(rc)); goto done; } pcmk__register_messages(out, fmt_functions); if (options.features) { out->message(out, "features"); exit_code = CRM_EX_OK; goto done; } if (args->version) { out->version(out, false); goto done; } if (options.shutdown) { - pcmk__cli_init_logging("pacemakerd", args->verbosity); + pcmk__cli_init_logging(PCMK__SERVER_PACEMAKERD, args->verbosity); } else { crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); } crm_debug("Checking for existing Pacemaker instance"); rc = pcmk_new_ipc_api(&old_instance, pcmk_ipc_pacemakerd); if (old_instance == NULL) { out->err(out, "Could not check for existing pacemakerd: %s", pcmk_rc_str(rc)); exit_code = pcmk_rc2exitc(rc); goto done; } pcmk_register_ipc_callback(old_instance, pacemakerd_event_cb, NULL); rc = pcmk__connect_ipc(old_instance, pcmk_ipc_dispatch_sync, 2); if (rc != pcmk_rc_ok) { crm_debug("No existing %s instance found: %s", pcmk_ipc_name(old_instance, true), pcmk_rc_str(rc)); } old_instance_connected = pcmk_ipc_is_connected(old_instance); if (options.shutdown) { if (old_instance_connected) { rc = pcmk_pacemakerd_api_shutdown(old_instance, crm_system_name); pcmk_dispatch_ipc(old_instance); exit_code = pcmk_rc2exitc(rc); if (exit_code != CRM_EX_OK) { pcmk_free_ipc_api(old_instance); goto done; } /* We get the ACK immediately, and the response right after that, * but it might take a while for pacemakerd to get around to * shutting down. Wait for that to happen (with 30-minute timeout). */ for (int i = 0; i < 900; i++) { if (!pcmk_ipc_is_connected(old_instance)) { exit_code = CRM_EX_OK; pcmk_free_ipc_api(old_instance); goto done; } sleep(2); } exit_code = CRM_EX_TIMEOUT; pcmk_free_ipc_api(old_instance); goto done; } else { out->err(out, "Could not request shutdown " "of existing Pacemaker instance: %s", pcmk_rc_str(rc)); pcmk_free_ipc_api(old_instance); exit_code = CRM_EX_DISCONNECT; goto done; } } else if (old_instance_connected) { pcmk_free_ipc_api(old_instance); crm_err("Aborting start-up because active Pacemaker instance found"); exit_code = CRM_EX_FATAL; goto done; } pcmk_free_ipc_api(old_instance); /* Don't allow any accidental output after this point. */ if (out != NULL) { out->finish(out, exit_code, true, NULL); pcmk__output_free(out); out = NULL; } #ifdef SUPPORT_COROSYNC if (pacemakerd_read_config() == FALSE) { crm_exit(CRM_EX_UNAVAILABLE); } #endif // OCF shell functions and cluster-glue need facility under different name { const char *facility = pcmk__env_option(PCMK__ENV_LOGFACILITY); if (!pcmk__str_eq(facility, PCMK_VALUE_NONE, pcmk__str_casei|pcmk__str_null_matches)) { pcmk__set_env_option("LOGFACILITY", facility, true); } } crm_notice("Starting Pacemaker %s " QB_XS " build=%s features:%s", PACEMAKER_VERSION, BUILD_VERSION, CRM_FEATURES); mainloop = g_main_loop_new(NULL, FALSE); remove_core_file_limit(); create_pcmk_dirs(); pcmk__serve_pacemakerd_ipc(&ipcs, &pacemakerd_ipc_callbacks); #ifdef SUPPORT_COROSYNC /* Allows us to block shutdown */ if (!cluster_connect_cfg()) { exit_code = CRM_EX_PROTOCOL; goto done; } #endif if (pcmk__locate_sbd() > 0) { running_with_sbd = TRUE; } switch (find_and_track_existing_processes()) { case pcmk_rc_ok: break; case pcmk_rc_ipc_unauthorized: exit_code = CRM_EX_CANTCREAT; goto done; default: exit_code = CRM_EX_FATAL; goto done; }; mainloop_add_signal(SIGTERM, pcmk_shutdown); mainloop_add_signal(SIGINT, pcmk_shutdown); if ((running_with_sbd) && pcmk__get_sbd_sync_resource_startup()) { crm_notice("Waiting for startup-trigger from SBD."); pacemakerd_state = PCMK__VALUE_WAIT_FOR_PING; startup_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, init_children_processes, NULL); } else { if (running_with_sbd) { crm_warn("Enabling SBD_SYNC_RESOURCE_STARTUP would (if supported " "by your SBD version) improve reliability of " "interworking between SBD & pacemaker."); } pacemakerd_state = PCMK__VALUE_STARTING_DAEMONS; init_children_processes(NULL); } crm_notice("Pacemaker daemon successfully started and accepting connections"); g_main_loop_run(mainloop); if (ipcs) { crm_trace("Closing IPC server"); mainloop_del_ipc_server(ipcs); ipcs = NULL; } g_main_loop_unref(mainloop); #ifdef SUPPORT_COROSYNC cluster_disconnect_cfg(); #endif done: g_strfreev(processed_args); pcmk__free_arg_context(context); pcmk__output_and_clear_error(&error, out); if (out != NULL) { out->finish(out, exit_code, true, NULL); pcmk__output_free(out); } pcmk__unregister_formats(); crm_exit(exit_code); } diff --git a/daemons/pacemakerd/pcmkd_subdaemons.c b/daemons/pacemakerd/pcmkd_subdaemons.c index 5ead43f3c1..999946abf9 100644 --- a/daemons/pacemakerd/pcmkd_subdaemons.c +++ b/daemons/pacemakerd/pcmkd_subdaemons.c @@ -1,875 +1,886 @@ /* * Copyright 2010-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include "pacemakerd.h" #if SUPPORT_COROSYNC #include "pcmkd_corosync.h" #endif #include #include #include #include #include #include #include #include #include #include #include enum child_daemon_flags { child_none = 0, child_respawn = 1 << 0, child_needs_cluster = 1 << 1, child_needs_retry = 1 << 2, child_active_before_startup = 1 << 3, }; typedef struct pcmk_child_s { + enum pcmk_ipc_server server; pid_t pid; int respawn_count; - const char *name; const char *uid; - const char *command; - const char *endpoint; /* IPC server name */ int check_count; uint32_t flags; } pcmk_child_t; #define PCMK_PROCESS_CHECK_INTERVAL 1 #define PCMK_PROCESS_CHECK_RETRIES 5 #define SHUTDOWN_ESCALATION_PERIOD 180000 /* 3m */ /* Index into the array below */ #define PCMK_CHILD_CONTROLD 5 static pcmk_child_t pcmk_children[] = { { - 0, 0, "pacemaker-based", CRM_DAEMON_USER, - CRM_DAEMON_DIR "/pacemaker-based", PCMK__SERVER_BASED_RO, + pcmk_ipc_based, 0, 0, CRM_DAEMON_USER, 0, child_respawn | child_needs_cluster }, { - 0, 0, "pacemaker-fenced", NULL, - CRM_DAEMON_DIR "/pacemaker-fenced", "stonith-ng", + pcmk_ipc_fenced, 0, 0, NULL, 0, child_respawn | child_needs_cluster }, { - 0, 0, "pacemaker-execd", NULL, - CRM_DAEMON_DIR "/pacemaker-execd", CRM_SYSTEM_LRMD, + pcmk_ipc_execd, 0, 0, NULL, 0, child_respawn }, { - 0, 0, "pacemaker-attrd", CRM_DAEMON_USER, - CRM_DAEMON_DIR "/pacemaker-attrd", PCMK__VALUE_ATTRD, + pcmk_ipc_attrd, 0, 0, CRM_DAEMON_USER, 0, child_respawn | child_needs_cluster }, { - 0, 0, "pacemaker-schedulerd", CRM_DAEMON_USER, - CRM_DAEMON_DIR "/pacemaker-schedulerd", CRM_SYSTEM_PENGINE, + pcmk_ipc_schedulerd, 0, 0, CRM_DAEMON_USER, 0, child_respawn }, { - 0, 0, "pacemaker-controld", CRM_DAEMON_USER, - CRM_DAEMON_DIR "/pacemaker-controld", CRM_SYSTEM_CRMD, + pcmk_ipc_controld, 0, 0, CRM_DAEMON_USER, 0, child_respawn | child_needs_cluster }, }; static char *opts_default[] = { NULL, NULL }; static char *opts_vgrind[] = { NULL, NULL, NULL, NULL, NULL }; crm_trigger_t *shutdown_trigger = NULL; crm_trigger_t *startup_trigger = NULL; time_t subdaemon_check_progress = 0; // Whether we need root group access to talk to cluster layer static bool need_root_group = true; /* When contacted via pacemakerd-api by a client having sbd in * the name we assume it is sbd-daemon which wants to know * if pacemakerd shutdown gracefully. * Thus when everything is shutdown properly pacemakerd * waits till it has reported the graceful completion of * shutdown to sbd and just when sbd-client closes the * connection we can assume that the report has arrived * properly so that pacemakerd can finally exit. * Following two variables are used to track that handshake. */ unsigned int shutdown_complete_state_reported_to = 0; gboolean shutdown_complete_state_reported_client_closed = FALSE; /* state we report when asked via pacemakerd-api status-ping */ const char *pacemakerd_state = PCMK__VALUE_INIT; gboolean running_with_sbd = FALSE; /* local copy */ GMainLoop *mainloop = NULL; static gboolean fatal_error = FALSE; static int child_liveness(pcmk_child_t *child); static gboolean escalate_shutdown(gpointer data); static int start_child(pcmk_child_t * child); static void pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode); static void pcmk_process_exit(pcmk_child_t * child); static gboolean pcmk_shutdown_worker(gpointer user_data); static gboolean stop_child(pcmk_child_t * child, int signal); +/*! + * \internal + * \brief Get path to subdaemon executable + * + * \param[in] subdaemon Subdaemon to get path for + * + * \return Newly allocated string with path to subdaemon executable + * \note It is the caller's responsibility to free() the return value + */ +static inline char * +subdaemon_path(pcmk_child_t *subdaemon) +{ + return crm_strdup_printf(CRM_DAEMON_DIR "/%s", + pcmk__server_name(subdaemon->server)); +} + static bool pcmkd_cluster_connected(void) { #if SUPPORT_COROSYNC return pcmkd_corosync_connected(); #else return true; #endif } static gboolean check_next_subdaemon(gpointer user_data) { static int next_child = 0; pcmk_child_t *child = &(pcmk_children[next_child]); + const char *name = pcmk__server_name(child->server); const long long pid = PCMK__SPECIAL_PID_AS_0(child->pid); int rc = child_liveness(child); - crm_trace("Checked %s[%lld]: %s (%d)", - child->name, pid, pcmk_rc_str(rc), rc); + crm_trace("Checked subdaemon %s[%lld]: %s (%d)", + name, pid, pcmk_rc_str(rc), rc); switch (rc) { case pcmk_rc_ok: child->check_count = 0; subdaemon_check_progress = time(NULL); break; case pcmk_rc_ipc_pid_only: // Child was previously OK if (++(child->check_count) >= PCMK_PROCESS_CHECK_RETRIES) { - crm_crit("%s[%lld] is unresponsive to IPC after %d attempt%s " - "and will now be killed", - child->name, pid, child->check_count, + // cts-lab looks for this message + crm_crit("Subdaemon %s[%lld] is unresponsive to IPC " + "after %d attempt%s and will now be killed", + name, pid, child->check_count, pcmk__plural_s(child->check_count)); stop_child(child, SIGKILL); if (pcmk_is_set(child->flags, child_respawn)) { // Respawn limit hasn't been reached, so retry another round child->check_count = 0; } } else { - crm_notice("%s[%lld] is unresponsive to IPC after %d attempt%s", - child->name, pid, child->check_count, + crm_notice("Subdaemon %s[%lld] is unresponsive to IPC " + "after %d attempt%s (will recheck later)", + name, pid, child->check_count, pcmk__plural_s(child->check_count)); if (pcmk_is_set(child->flags, child_respawn)) { /* as long as the respawn-limit isn't reached and we haven't run out of connect retries we account this as progress we are willing to tell to sbd */ subdaemon_check_progress = time(NULL); } } /* go to the next child and see if we can make progress there */ break; case pcmk_rc_ipc_unresponsive: if (!pcmk_is_set(child->flags, child_respawn)) { /* if a subdaemon is down and we don't want it to be restarted this is a success during shutdown. if it isn't restarted anymore due to MAX_RESPAWN it is rather no success. */ if (child->respawn_count <= MAX_RESPAWN) { subdaemon_check_progress = time(NULL); } } if (!pcmk_is_set(child->flags, child_active_before_startup)) { - crm_trace("%s[%lld] terminated (relying on SIGCHLD handler)", - child->name, pid); + crm_trace("Subdaemon %s[%lld] terminated", name, pid); break; } if (pcmk_is_set(child->flags, child_respawn)) { - crm_err("%s[%lld] terminated", child->name, pid); + // cts-lab looks for this message + crm_err("Subdaemon %s[%lld] terminated", name, pid); } else { /* orderly shutdown */ - crm_notice("%s[%lld] terminated", child->name, pid); + crm_notice("Subdaemon %s[%lld] terminated", name, pid); } pcmk_process_exit(child); break; default: crm_exit(CRM_EX_FATAL); break; /* static analysis/noreturn */ } if (++next_child >= PCMK__NELEM(pcmk_children)) { next_child = 0; } return G_SOURCE_CONTINUE; } static gboolean escalate_shutdown(gpointer data) { pcmk_child_t *child = data; if (child->pid == PCMK__SPECIAL_PID) { pcmk_process_exit(child); } else if (child->pid != 0) { /* Use SIGSEGV instead of SIGKILL to create a core so we can see what it was up to */ - crm_err("Child %s not terminating in a timely manner, forcing", child->name); + crm_err("Subdaemon %s not terminating in a timely manner, forcing", + pcmk__server_name(child->server)); stop_child(child, SIGSEGV); } return FALSE; } static void pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode) { pcmk_child_t *child = mainloop_child_userdata(p); const char *name = mainloop_child_name(p); if (signo) { + // cts-lab looks for this message do_crm_log(((signo == SIGKILL)? LOG_WARNING : LOG_ERR), "%s[%d] terminated with signal %d (%s)%s", name, pid, signo, strsignal(signo), (core? " and dumped core" : "")); } else { switch(exitcode) { case CRM_EX_OK: crm_info("%s[%d] exited with status %d (%s)", name, pid, exitcode, crm_exit_str(exitcode)); break; case CRM_EX_FATAL: crm_warn("Shutting cluster down because %s[%d] had fatal failure", name, pid); child->flags &= ~child_respawn; fatal_error = TRUE; pcmk_shutdown(SIGTERM); break; case CRM_EX_PANIC: crm_emerg("%s[%d] instructed the machine to reset", name, pid); child->flags &= ~child_respawn; fatal_error = TRUE; pcmk__panic(__func__); pcmk_shutdown(SIGTERM); break; default: + // cts-lab looks for this message crm_err("%s[%d] exited with status %d (%s)", name, pid, exitcode, crm_exit_str(exitcode)); break; } } pcmk_process_exit(child); } static void pcmk_process_exit(pcmk_child_t * child) { + const char *name = pcmk__server_name(child->server); child->pid = 0; child->flags &= ~child_active_before_startup; child->check_count = 0; child->respawn_count += 1; if (child->respawn_count > MAX_RESPAWN) { - crm_err("Child respawn count exceeded by %s", child->name); + crm_err("Subdaemon %s exceeded maximum respawn count", name); child->flags &= ~child_respawn; } if (shutdown_trigger) { /* resume step-wise shutdown (returned TRUE yields no parallelizing) */ mainloop_set_trigger(shutdown_trigger); } else if (!pcmk_is_set(child->flags, child_respawn)) { /* nothing to do */ } else if (crm_is_true(pcmk__env_option(PCMK__ENV_FAIL_FAST))) { - crm_err("Rebooting system because of %s", child->name); + crm_err("Rebooting system because of subdaemon %s failure", name); pcmk__panic(__func__); } else if (child_liveness(child) == pcmk_rc_ok) { - crm_warn("One-off suppressing strict respawning of a child process %s," - " appears alright per %s IPC end-point", - child->name, child->endpoint); + crm_warn("Not respawning subdaemon %s because IPC endpoint %s is OK", + name, pcmk__server_ipc_name(child->server)); } else if (pcmk_is_set(child->flags, child_needs_cluster) && !pcmkd_cluster_connected()) { - crm_notice("Not respawning %s subdaemon until cluster returns", - child->name); + crm_notice("Not respawning subdaemon %s until cluster returns", name); child->flags |= child_needs_retry; } else { - crm_notice("Respawning %s subdaemon after unexpected exit", - child->name); + // cts-lab looks for this message + crm_notice("Respawning subdaemon %s after unexpected exit", name); start_child(child); } } static gboolean pcmk_shutdown_worker(gpointer user_data) { static int phase = PCMK__NELEM(pcmk_children) - 1; static time_t next_log = 0; if (phase == PCMK__NELEM(pcmk_children) - 1) { crm_notice("Shutting down Pacemaker"); pacemakerd_state = PCMK__VALUE_SHUTTING_DOWN; } for (; phase >= 0; phase--) { pcmk_child_t *child = &(pcmk_children[phase]); + const char *name = pcmk__server_name(child->server); if (child->pid != 0) { time_t now = time(NULL); if (pcmk_is_set(child->flags, child_respawn)) { if (child->pid == PCMK__SPECIAL_PID) { - crm_warn("The process behind %s IPC cannot be" - " terminated, so either wait the graceful" - " period of %ld s for its native termination" - " if it vitally depends on some other daemons" - " going down in a controlled way already," - " or locate and kill the correct %s process" - " on your own; set PCMK_" PCMK__ENV_FAIL_FAST "=1" - " to avoid this altogether next time around", - child->name, (long) SHUTDOWN_ESCALATION_PERIOD, - child->command); + crm_warn("Subdaemon %s cannot be terminated (shutdown " + "will be escalated after %ld seconds if it does " + "not terminate on its own; set PCMK_" + PCMK__ENV_FAIL_FAST "=1 to exit immediately " + "instead)", + name, (long) SHUTDOWN_ESCALATION_PERIOD); } next_log = now + 30; child->flags &= ~child_respawn; stop_child(child, SIGTERM); if (phase < PCMK_CHILD_CONTROLD) { g_timeout_add(SHUTDOWN_ESCALATION_PERIOD, escalate_shutdown, child); } } else if (now >= next_log) { next_log = now + 30; - crm_notice("Still waiting for %s to terminate " - QB_XS " pid=%lld", - child->name, (long long) child->pid); + crm_notice("Still waiting for subdaemon %s to terminate " + QB_XS " pid=%lld", name, (long long) child->pid); } return TRUE; } /* cleanup */ - crm_debug("%s confirmed stopped", child->name); + crm_debug("Subdaemon %s confirmed stopped", name); child->pid = 0; } crm_notice("Shutdown complete"); pacemakerd_state = PCMK__VALUE_SHUTDOWN_COMPLETE; if (!fatal_error && running_with_sbd && pcmk__get_sbd_sync_resource_startup() && !shutdown_complete_state_reported_client_closed) { crm_notice("Waiting for SBD to pick up shutdown-complete-state."); return TRUE; } g_main_loop_quit(mainloop); if (fatal_error) { crm_notice("Shutting down and staying down after fatal error"); #ifdef SUPPORT_COROSYNC pcmkd_shutdown_corosync(); #endif crm_exit(CRM_EX_FATAL); } return TRUE; } /* TODO once libqb is taught to juggle with IPC end-points carried over as bare file descriptor (https://github.com/ClusterLabs/libqb/issues/325) it shall hand over these descriptors here if/once they are successfully pre-opened in (presumably) child_liveness(), to avoid any remaining room for races */ // \return Standard Pacemaker return code static int start_child(pcmk_child_t * child) { uid_t uid = 0; gid_t gid = 0; gboolean use_valgrind = FALSE; gboolean use_callgrind = FALSE; + const char *name = pcmk__server_name(child->server); const char *env_valgrind = pcmk__env_option(PCMK__ENV_VALGRIND_ENABLED); const char *env_callgrind = pcmk__env_option(PCMK__ENV_CALLGRIND_ENABLED); child->flags &= ~child_active_before_startup; child->check_count = 0; - if (child->command == NULL) { - crm_info("Nothing to do for child \"%s\"", child->name); - return pcmk_rc_ok; - } - if (env_callgrind != NULL && crm_is_true(env_callgrind)) { use_callgrind = TRUE; use_valgrind = TRUE; - } else if (env_callgrind != NULL && strstr(env_callgrind, child->name)) { + } else if ((env_callgrind != NULL) + && (strstr(env_callgrind, name) != NULL)) { use_callgrind = TRUE; use_valgrind = TRUE; } else if (env_valgrind != NULL && crm_is_true(env_valgrind)) { use_valgrind = TRUE; - } else if (env_valgrind != NULL && strstr(env_valgrind, child->name)) { + } else if ((env_valgrind != NULL) + && (strstr(env_valgrind, name) != NULL)) { use_valgrind = TRUE; } if (use_valgrind && strlen(VALGRIND_BIN) == 0) { - crm_warn("Cannot enable valgrind for %s:" - " The location of the valgrind binary is unknown", child->name); + crm_warn("Cannot enable valgrind for subdaemon %s: valgrind not found", + name); use_valgrind = FALSE; } if (child->uid) { if (crm_user_lookup(child->uid, &uid, &gid) < 0) { - crm_err("Invalid user (%s) for %s: not found", child->uid, child->name); + crm_err("Invalid user (%s) for subdaemon %s: not found", + child->uid, name); return EACCES; } - crm_info("Using uid=%u and group=%u for process %s", uid, gid, child->name); + crm_info("Using uid %lu and group %lu for subdaemon %s", + (unsigned long) uid, (unsigned long) gid, name); } child->pid = fork(); CRM_ASSERT(child->pid != -1); if (child->pid > 0) { /* parent */ - mainloop_child_add(child->pid, 0, child->name, child, pcmk_child_exit); + mainloop_child_add(child->pid, 0, name, child, pcmk_child_exit); - crm_info("Forked child %lld for process %s%s", - (long long) child->pid, child->name, + crm_info("Forked process %lld for subdaemon %s%s", + (long long) child->pid, name, use_valgrind ? " (valgrind enabled: " VALGRIND_BIN ")" : ""); return pcmk_rc_ok; } else { /* Start a new session */ (void)setsid(); /* Setup the two alternate arg arrays */ opts_vgrind[0] = pcmk__str_copy(VALGRIND_BIN); if (use_callgrind) { opts_vgrind[1] = pcmk__str_copy("--tool=callgrind"); opts_vgrind[2] = pcmk__str_copy("--callgrind-out-file=" CRM_STATE_DIR "/callgrind.out.%p"); - opts_vgrind[3] = pcmk__str_copy(child->command); + opts_vgrind[3] = subdaemon_path(child); opts_vgrind[4] = NULL; } else { - opts_vgrind[1] = pcmk__str_copy(child->command); + opts_vgrind[1] = subdaemon_path(child); opts_vgrind[2] = NULL; opts_vgrind[3] = NULL; opts_vgrind[4] = NULL; } - opts_default[0] = pcmk__str_copy(child->command); + opts_default[0] = subdaemon_path(child); if(gid) { // Drop root group access if not needed if (!need_root_group && (setgid(gid) < 0)) { - crm_warn("Could not set group to %d: %s", gid, strerror(errno)); + crm_warn("Could not set subdaemon %s group to %lu: %s", + name, (unsigned long) gid, strerror(errno)); } /* Initialize supplementary groups to only those always granted to * the user, plus haclient (so we can access IPC). */ if (initgroups(child->uid, gid) < 0) { - crm_err("Cannot initialize groups for %s: %s (%d)", - child->uid, pcmk_rc_str(errno), errno); + crm_err("Cannot initialize system groups for subdaemon %s: %s " + QB_XS " errno=%d", + name, pcmk_rc_str(errno), errno); } } if (uid && setuid(uid) < 0) { - crm_warn("Could not set user to %s (id %d): %s", - child->uid, uid, strerror(errno)); + crm_warn("Could not set subdaemon %s user to %s: %s " + QB_XS " uid=%lu errno=%d", + name, strerror(errno), child->uid, (unsigned long) uid, + errno); } pcmk__close_fds_in_child(true); pcmk__open_devnull(O_RDONLY); // stdin (fd 0) pcmk__open_devnull(O_WRONLY); // stdout (fd 1) pcmk__open_devnull(O_WRONLY); // stderr (fd 2) if (use_valgrind) { (void)execvp(VALGRIND_BIN, opts_vgrind); } else { - (void)execvp(child->command, opts_default); + char *path = subdaemon_path(child); + + (void) execvp(path, opts_default); + free(path); } - crm_crit("Could not execute %s: %s", child->command, strerror(errno)); + crm_crit("Could not execute subdaemon %s: %s", name, strerror(errno)); crm_exit(CRM_EX_FATAL); } return pcmk_rc_ok; /* never reached */ } /*! * \internal * \brief Check the liveness of the child based on IPC name and PID if tracked * * \param[in,out] child Child tracked data * * \return Standard Pacemaker return code * * \note Return codes of particular interest include pcmk_rc_ipc_unresponsive * indicating that no trace of IPC liveness was detected, * pcmk_rc_ipc_unauthorized indicating that the IPC endpoint is blocked by * an unauthorized process, and pcmk_rc_ipc_pid_only indicating that * the child is up by PID but not IPC end-point (possibly starting). * \note This function doesn't modify any of \p child members but \c pid, * and is not actively toying with processes as such but invoking * \c stop_child in one particular case (there's for some reason * a different authentic holder of the IPC end-point). */ static int child_liveness(pcmk_child_t *child) { uid_t cl_uid = 0; gid_t cl_gid = 0; const uid_t root_uid = 0; const gid_t root_gid = 0; const uid_t *ref_uid; const gid_t *ref_gid; + const char *name = pcmk__server_name(child->server); int rc = pcmk_rc_ipc_unresponsive; + int legacy_rc = pcmk_ok; pid_t ipc_pid = 0; - if (child->endpoint == NULL - && (child->pid <= 0 || child->pid == PCMK__SPECIAL_PID)) { - crm_err("Cannot track child %s for missing both API end-point and PID", - child->name); - rc = EINVAL; // Misuse of function when child is not trackable - - } else if (child->endpoint != NULL) { - int legacy_rc = pcmk_ok; - - if (child->uid == NULL) { - ref_uid = &root_uid; - ref_gid = &root_gid; - } else { - ref_uid = &cl_uid; - ref_gid = &cl_gid; - legacy_rc = pcmk_daemon_user(&cl_uid, &cl_gid); - } + if (child->uid == NULL) { + ref_uid = &root_uid; + ref_gid = &root_gid; + } else { + ref_uid = &cl_uid; + ref_gid = &cl_gid; + legacy_rc = pcmk_daemon_user(&cl_uid, &cl_gid); + } - if (legacy_rc < 0) { - rc = pcmk_legacy2rc(legacy_rc); - crm_err("Could not find user and group IDs for user %s: %s " - QB_XS " rc=%d", CRM_DAEMON_USER, pcmk_rc_str(rc), rc); - } else { - rc = pcmk__ipc_is_authentic_process_active(child->endpoint, - *ref_uid, *ref_gid, - &ipc_pid); - if ((rc == pcmk_rc_ok) || (rc == pcmk_rc_ipc_unresponsive)) { - if (child->pid <= 0) { - /* If rc is pcmk_rc_ok, ipc_pid is nonzero and this - * initializes a new child. If rc is - * pcmk_rc_ipc_unresponsive, ipc_pid is zero, and we will - * investigate further. - */ - child->pid = ipc_pid; - } else if ((ipc_pid != 0) && (child->pid != ipc_pid)) { - /* An unexpected (but authorized) process is responding to - * IPC. Investigate further. - */ - rc = pcmk_rc_ipc_unresponsive; - } + if (legacy_rc < 0) { + rc = pcmk_legacy2rc(legacy_rc); + crm_err("Could not find user and group IDs for user %s: %s " + QB_XS " rc=%d", CRM_DAEMON_USER, pcmk_rc_str(rc), rc); + } else { + const char *ipc_name = pcmk__server_ipc_name(child->server); + + rc = pcmk__ipc_is_authentic_process_active(ipc_name, + *ref_uid, *ref_gid, + &ipc_pid); + if ((rc == pcmk_rc_ok) || (rc == pcmk_rc_ipc_unresponsive)) { + if (child->pid <= 0) { + /* If rc is pcmk_rc_ok, ipc_pid is nonzero and this + * initializes a new child. If rc is + * pcmk_rc_ipc_unresponsive, ipc_pid is zero, and we will + * investigate further. + */ + child->pid = ipc_pid; + } else if ((ipc_pid != 0) && (child->pid != ipc_pid)) { + /* An unexpected (but authorized) process is responding to + * IPC. Investigate further. + */ + rc = pcmk_rc_ipc_unresponsive; } } } if (rc == pcmk_rc_ipc_unresponsive) { /* If we get here, a child without IPC is being tracked, no IPC liveness * has been detected, or IPC liveness has been detected with an * unexpected (but authorized) process. This is safe on FreeBSD since * the only change possible from a proper child's PID into "special" PID * of 1 behind more loosely related process. */ - int ret = pcmk__pid_active(child->pid, child->name); + int ret = pcmk__pid_active(child->pid, name); if (ipc_pid && ((ret != pcmk_rc_ok) || ipc_pid == PCMK__SPECIAL_PID - || (pcmk__pid_active(ipc_pid, - child->name) == pcmk_rc_ok))) { + || (pcmk__pid_active(ipc_pid, name) == pcmk_rc_ok))) { /* An unexpected (but authorized) process was detected at the IPC * endpoint, and either it is active, or the child we're tracking is * not. */ if (ret == pcmk_rc_ok) { /* The child we're tracking is active. Kill it, and adopt the * detected process. This assumes that our children don't fork * (thus getting a different PID owning the IPC), but rather the * tracking got out of sync because of some means external to * Pacemaker, and adopting the detected process is better than * killing it and possibly having to spawn a new child. */ /* not possessing IPC, afterall (what about corosync CPG?) */ stop_child(child, SIGKILL); } rc = pcmk_rc_ok; child->pid = ipc_pid; } else if (ret == pcmk_rc_ok) { // Our tracked child's PID was found active, but not its IPC rc = pcmk_rc_ipc_pid_only; } else if ((child->pid == 0) && (ret == EINVAL)) { // FreeBSD can return EINVAL rc = pcmk_rc_ipc_unresponsive; } else { switch (ret) { case EACCES: rc = pcmk_rc_ipc_unauthorized; break; case ESRCH: rc = pcmk_rc_ipc_unresponsive; break; default: rc = ret; break; } } } return rc; } /*! * \internal * \brief Initial one-off check of the pre-existing "child" processes * * With "child" process, we mean the subdaemon that defines an API end-point * (all of them do as of the comment) -- the possible complement is skipped * as it is deemed it has no such shared resources to cause conflicts about, * hence it can presumably be started anew without hesitation. * If that won't hold true in the future, the concept of a shared resource * will have to be generalized beyond the API end-point. * * For boundary cases that the "child" is still starting (IPC end-point is yet * to be witnessed), or more rarely (practically FreeBSD only), when there's * a pre-existing "untrackable" authentic process, we give the situation some * time to possibly unfold in the right direction, meaning that said socket * will appear or the unattainable process will disappear per the observable * IPC, respectively. * * \return Standard Pacemaker return code * * \note Since this gets run at the very start, \c respawn_count fields * for particular children get temporarily overloaded with "rounds * of waiting" tracking, restored once we are about to finish with * success (i.e. returning value >=0) and will remain unrestored * otherwise. One way to suppress liveness detection logic for * particular child is to set the said value to a negative number. */ #define WAIT_TRIES 4 /* together with interleaved sleeps, worst case ~ 1s */ int find_and_track_existing_processes(void) { bool wait_in_progress; int rc; size_t i, rounds; for (rounds = 1; rounds <= WAIT_TRIES; rounds++) { wait_in_progress = false; for (i = 0; i < PCMK__NELEM(pcmk_children); i++) { + const char *name = pcmk__server_name(pcmk_children[i].server); + const char *ipc_name = NULL; - if ((pcmk_children[i].endpoint == NULL) - || (pcmk_children[i].respawn_count < 0)) { + if (pcmk_children[i].respawn_count < 0) { continue; } rc = child_liveness(&pcmk_children[i]); if (rc == pcmk_rc_ipc_unresponsive) { /* As a speculation, don't give up if there are more rounds to * come for other reasons, but don't artificially wait just * because of this, since we would preferably start ASAP. */ continue; } // @TODO Functionize more of this to reduce nesting + ipc_name = pcmk__server_ipc_name(pcmk_children[i].server); pcmk_children[i].respawn_count = rounds; switch (rc) { case pcmk_rc_ok: if (pcmk_children[i].pid == PCMK__SPECIAL_PID) { if (crm_is_true(pcmk__env_option(PCMK__ENV_FAIL_FAST))) { crm_crit("Cannot reliably track pre-existing" " authentic process behind %s IPC on this" " platform and PCMK_" PCMK__ENV_FAIL_FAST - " requested", - pcmk_children[i].endpoint); + " requested", ipc_name); return EOPNOTSUPP; } else if (pcmk_children[i].respawn_count == WAIT_TRIES) { crm_notice("Assuming pre-existing authentic, though" " on this platform untrackable, process" " behind %s IPC is stable (was in %d" " previous samples) so rather than" " bailing out (PCMK_" PCMK__ENV_FAIL_FAST " not requested), we just switch to a" " less optimal IPC liveness monitoring" " (not very suitable for heavy load)", - pcmk_children[i].name, WAIT_TRIES - 1); + name, WAIT_TRIES - 1); crm_warn("The process behind %s IPC cannot be" " terminated, so the overall shutdown" " will get delayed implicitly (%ld s)," " which serves as a graceful period for" " its native termination if it vitally" " depends on some other daemons going" " down in a controlled way already", - pcmk_children[i].name, - (long) SHUTDOWN_ESCALATION_PERIOD); + name, (long) SHUTDOWN_ESCALATION_PERIOD); } else { wait_in_progress = true; crm_warn("Cannot reliably track pre-existing" " authentic process behind %s IPC on this" " platform, can still disappear in %d" - " attempt(s)", pcmk_children[i].endpoint, + " attempt(s)", ipc_name, WAIT_TRIES - pcmk_children[i].respawn_count); continue; } } crm_notice("Tracking existing %s process (pid=%lld)", - pcmk_children[i].name, + name, (long long) PCMK__SPECIAL_PID_AS_0( pcmk_children[i].pid)); pcmk_children[i].respawn_count = -1; /* 0~keep watching */ pcmk_children[i].flags |= child_active_before_startup; break; case pcmk_rc_ipc_pid_only: if (pcmk_children[i].respawn_count == WAIT_TRIES) { - crm_crit("%s IPC end-point for existing authentic" + crm_crit("%s IPC endpoint for existing authentic" " process %lld did not (re)appear", - pcmk_children[i].endpoint, + ipc_name, (long long) PCMK__SPECIAL_PID_AS_0( pcmk_children[i].pid)); return rc; } wait_in_progress = true; - crm_warn("Cannot find %s IPC end-point for existing" + crm_warn("Cannot find %s IPC endpoint for existing" " authentic process %lld, can still (re)appear" " in %d attempts (?)", - pcmk_children[i].endpoint, + ipc_name, (long long) PCMK__SPECIAL_PID_AS_0( pcmk_children[i].pid), WAIT_TRIES - pcmk_children[i].respawn_count); continue; default: crm_crit("Checked liveness of %s: %s " QB_XS " rc=%d", - pcmk_children[i].name, pcmk_rc_str(rc), rc); + name, pcmk_rc_str(rc), rc); return rc; } } if (!wait_in_progress) { break; } pcmk__sleep_ms(250); // Wait a bit for changes to possibly happen } for (i = 0; i < PCMK__NELEM(pcmk_children); i++) { pcmk_children[i].respawn_count = 0; /* restore pristine state */ } g_timeout_add_seconds(PCMK_PROCESS_CHECK_INTERVAL, check_next_subdaemon, NULL); return pcmk_rc_ok; } gboolean init_children_processes(void *user_data) { if (pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync) { /* Corosync clusters can drop root group access, because we set * uidgid.gid.${gid}=1 via CMAP, which allows these processes to connect * to corosync. */ need_root_group = false; } /* start any children that have not been detected */ for (int i = 0; i < PCMK__NELEM(pcmk_children); i++) { if (pcmk_children[i].pid != 0) { /* we are already tracking it */ continue; } start_child(&(pcmk_children[i])); } /* From this point on, any daemons being started will be due to * respawning rather than node start. * * This may be useful for the daemons to know */ pcmk__set_env_option(PCMK__ENV_RESPAWNED, PCMK_VALUE_TRUE, false); pacemakerd_state = PCMK__VALUE_RUNNING; return TRUE; } void pcmk_shutdown(int nsig) { if (shutdown_trigger == NULL) { shutdown_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, pcmk_shutdown_worker, NULL); } mainloop_set_trigger(shutdown_trigger); } void restart_cluster_subdaemons(void) { for (int i = 0; i < PCMK__NELEM(pcmk_children); i++) { if (!pcmk_is_set(pcmk_children[i].flags, child_needs_retry) || pcmk_children[i].pid != 0) { continue; } - crm_notice("Respawning cluster-based subdaemon: %s", pcmk_children[i].name); + crm_notice("Respawning cluster-based subdaemon %s", + pcmk__server_name(pcmk_children[i].server)); if (start_child(&pcmk_children[i])) { pcmk_children[i].flags &= ~child_needs_retry; } } } static gboolean stop_child(pcmk_child_t * child, int signal) { + const char *name = pcmk__server_name(child->server); + if (signal == 0) { signal = SIGTERM; } /* why to skip PID of 1? - FreeBSD ~ how untrackable process behind IPC is masqueraded as - elsewhere: how "init" task is designated; in particular, in systemd arrangement of socket-based activation, this is pretty real */ - if (child->command == NULL || child->pid == PCMK__SPECIAL_PID) { - crm_debug("Nothing to do for child \"%s\" (process %lld)", - child->name, (long long) PCMK__SPECIAL_PID_AS_0(child->pid)); + if (child->pid == PCMK__SPECIAL_PID) { + crm_debug("Nothing to do to stop subdaemon %s[%lld]", + name, (long long) PCMK__SPECIAL_PID_AS_0(child->pid)); return TRUE; } if (child->pid <= 0) { - crm_trace("Client %s not running", child->name); + crm_trace("Nothing to do to stop subdaemon %s: Not running", name); return TRUE; } errno = 0; if (kill(child->pid, signal) == 0) { - crm_notice("Stopping %s " QB_XS " sent signal %d to process %lld", - child->name, signal, (long long) child->pid); - + crm_notice("Stopping subdaemon %s " + QB_XS " via signal %d to process %lld", + name, signal, (long long) child->pid); } else { - crm_err("Could not stop %s (process %lld) with signal %d: %s", - child->name, (long long) child->pid, signal, strerror(errno)); + crm_err("Could not stop subdaemon %s[%lld] with signal %d: %s", + name, (long long) child->pid, signal, strerror(errno)); } return TRUE; } - diff --git a/daemons/schedulerd/pacemaker-schedulerd.c b/daemons/schedulerd/pacemaker-schedulerd.c index 3dda91cefc..457eb52787 100644 --- a/daemons/schedulerd/pacemaker-schedulerd.c +++ b/daemons/schedulerd/pacemaker-schedulerd.c @@ -1,201 +1,203 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include "pacemaker-schedulerd.h" -#define SUMMARY "pacemaker-schedulerd - daemon for calculating a Pacemaker cluster's response to events" +#define SUMMARY PCMK__SERVER_SCHEDULERD " - daemon for calculating a " \ + "Pacemaker cluster's response to events" struct { gchar **remainder; } options; pcmk__output_t *logger_out = NULL; static pcmk__output_t *out = NULL; static GMainLoop *mainloop = NULL; static qb_ipcs_service_t *ipcs = NULL; static crm_exit_t exit_code = CRM_EX_OK; pcmk__supported_format_t formats[] = { PCMK__SUPPORTED_FORMAT_NONE, PCMK__SUPPORTED_FORMAT_TEXT, PCMK__SUPPORTED_FORMAT_XML, { NULL, NULL, NULL } }; void pengine_shutdown(int nsig); /* @COMPAT Deprecated since 2.1.8. Use pcmk_list_cluster_options() or * crm_attribute --list-options=cluster instead of querying daemon metadata. * * NOTE: pcs (as of at least 0.11.8) uses this */ static int scheduler_metadata(pcmk__output_t *out) { - return pcmk__daemon_metadata(out, "pacemaker-schedulerd", + return pcmk__daemon_metadata(out, PCMK__SERVER_SCHEDULERD, "Pacemaker scheduler options", "Cluster options used by Pacemaker's " "scheduler", pcmk__opt_schedulerd); } static GOptionContext * build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) { GOptionContext *context = NULL; GOptionEntry extra_prog_entries[] = { { G_OPTION_REMAINING, 0, G_OPTION_FLAG_NONE, G_OPTION_ARG_STRING_ARRAY, &options.remainder, NULL, NULL }, { NULL } }; context = pcmk__build_arg_context(args, "text (default), xml", group, NULL); pcmk__add_main_args(context, extra_prog_entries); return context; } int main(int argc, char **argv) { GError *error = NULL; int rc = pcmk_rc_ok; GOptionGroup *output_group = NULL; pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY); gchar **processed_args = pcmk__cmdline_preproc(argv, NULL); GOptionContext *context = build_arg_context(args, &output_group); crm_log_preinit(NULL, argc, argv); mainloop_add_signal(SIGTERM, pengine_shutdown); pcmk__register_formats(output_group, formats); if (!g_option_context_parse_strv(context, &processed_args, &error)) { exit_code = CRM_EX_USAGE; goto done; } rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv); if ((rc != pcmk_rc_ok) || (out == NULL)) { exit_code = CRM_EX_FATAL; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Error creating output format %s: %s", args->output_ty, pcmk_rc_str(rc)); goto done; } pe__register_messages(out); pcmk__register_lib_messages(out); if (options.remainder) { if (g_strv_length(options.remainder) == 1 && pcmk__str_eq("metadata", options.remainder[0], pcmk__str_casei)) { rc = scheduler_metadata(out); if (rc != pcmk_rc_ok) { exit_code = CRM_EX_FATAL; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Unable to display metadata: %s", pcmk_rc_str(rc)); } } else { exit_code = CRM_EX_USAGE; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Unsupported extra command line parameters"); } goto done; } if (args->version) { out->version(out, false); goto done; } - pcmk__cli_init_logging("pacemaker-schedulerd", args->verbosity); + pcmk__cli_init_logging(PCMK__SERVER_SCHEDULERD, args->verbosity); crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); crm_notice("Starting Pacemaker scheduler"); if (pcmk__daemon_can_write(PE_STATE_DIR, NULL) == FALSE) { crm_err("Terminating due to bad permissions on " PE_STATE_DIR); exit_code = CRM_EX_FATAL; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "ERROR: Bad permissions on %s (see logs for details)", PE_STATE_DIR); goto done; } ipcs = pcmk__serve_schedulerd_ipc(&ipc_callbacks); if (ipcs == NULL) { g_set_error(&error, PCMK__EXITC_ERROR, exit_code, - "Failed to create pacemaker-schedulerd server: exiting and inhibiting respawn"); + "Exiting fatally because unable to serve " + "scheduler server IPC"); exit_code = CRM_EX_FATAL; goto done; } if (pcmk__log_output_new(&logger_out) != pcmk_rc_ok) { exit_code = CRM_EX_FATAL; goto done; } pe__register_messages(logger_out); pcmk__register_lib_messages(logger_out); pcmk__output_set_log_level(logger_out, LOG_TRACE); /* Create the mainloop and run it... */ mainloop = g_main_loop_new(NULL, FALSE); crm_notice("Pacemaker scheduler successfully started and accepting connections"); g_main_loop_run(mainloop); done: g_strfreev(options.remainder); g_strfreev(processed_args); pcmk__free_arg_context(context); pcmk__output_and_clear_error(&error, out); pengine_shutdown(0); } void pengine_shutdown(int nsig) { if (ipcs != NULL) { crm_trace("Closing IPC server"); mainloop_del_ipc_server(ipcs); ipcs = NULL; } if (logger_out != NULL) { logger_out->finish(logger_out, exit_code, true, NULL); pcmk__output_free(logger_out); logger_out = NULL; } if (out != NULL) { out->finish(out, exit_code, true, NULL); pcmk__output_free(out); out = NULL; } pcmk__unregister_formats(); crm_exit(exit_code); } diff --git a/include/crm/cib/cib_types.h b/include/crm/cib/cib_types.h index 7c3a4cfc5b..56ae4f407c 100644 --- a/include/crm/cib/cib_types.h +++ b/include/crm/cib/cib_types.h @@ -1,336 +1,336 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PCMK__CRM_CIB_CIB_TYPES__H # define PCMK__CRM_CIB_CIB_TYPES__H # include // gboolean, GList # include // xmlNode # include # include #ifdef __cplusplus extern "C" { #endif /** * \file * \brief Data types for Cluster Information Base access * \ingroup cib */ enum cib_variant { cib_undefined = 0, cib_native = 1, cib_file = 2, cib_remote = 3, }; enum cib_state { // NOTE: sbd (as of at least 1.5.2) uses this value cib_connected_command, // NOTE: sbd (as of at least 1.5.2) uses this value cib_connected_query, cib_disconnected }; enum cib_conn_type { cib_command, // NOTE: sbd (as of at least 1.5.2) uses this value cib_query, cib_no_connection, cib_command_nonblocking, }; enum cib_call_options { cib_none = 0, cib_verbose = (1 << 0), //!< Prefer stderr to logs cib_xpath = (1 << 1), cib_multiple = (1 << 2), cib_can_create = (1 << 3), cib_discard_reply = (1 << 4), cib_no_children = (1 << 5), cib_xpath_address = (1 << 6), #if !defined(PCMK_ALLOW_DEPRECATED) || (PCMK_ALLOW_DEPRECATED == 1) // NOTE: sbd (as of at least 1.5.2) uses this value //! \deprecated This value will be removed in a future release cib_scope_local = (1 << 8), #endif // !defined(PCMK_ALLOW_DEPRECATED) || (PCMK_ALLOW_DEPRECATED == 1) cib_dryrun = (1 << 9), /*! * \brief Process request when the client commits the active transaction * * Add the request to the client's active transaction instead of processing * it immediately. If the client has no active transaction, or if the * request is not supported in transactions, the call will fail. * * The request is added to the transaction synchronously, and the return * value indicates whether it was added successfully. * * Refer to \p cib_api_operations_t:init_transaction() and * \p cib_api_operations_t:end_transaction() for more details on CIB * transactions. */ cib_transaction = (1 << 10), /*! * \brief Treat new attribute values as atomic score updates where possible * * This option takes effect when updating XML attributes. For an attribute * named \c "name", if the new value is \c "name++" or \c "name+=X" for some * score \c X, the new value is set as follows: * * If attribute \c "name" is not already set to some value in the element * being updated, the new value is set as a literal string. * * If the new value is \c "name++", then the attribute is set to its * existing value (parsed as a score) plus 1. * * If the new value is \c "name+=X" for some score \c X, then the * attribute is set to its existing value plus \c X, where the existing * value and \c X are parsed and added as scores. * * Scores are integer values capped at \c INFINITY and \c -INFINITY. Refer * to Pacemaker Explained and to the \c char2score() function for more * details on scores, including how they're parsed and added. * * Note: This is implemented only for modify operations. */ cib_score_update = (1 << 11), // NOTE: sbd (as of at least 1.5.2) uses this value cib_sync_call = (1 << 12), cib_no_mtime = (1 << 13), cib_inhibit_notify = (1 << 16), cib_force_diff = (1 << 28), }; typedef struct cib_s cib_t; typedef struct cib_api_operations_s { // NOTE: sbd (as of at least 1.5.2) uses this // @COMPAT At compatibility break, drop name (always use crm_system_name) int (*signon) (cib_t *cib, const char *name, enum cib_conn_type type); // NOTE: sbd (as of at least 1.5.2) uses this int (*signoff) (cib_t *cib); int (*free) (cib_t *cib); // NOTE: sbd (as of at least 1.5.2) uses this int (*add_notify_callback) (cib_t *cib, const char *event, void (*callback) (const char *event, xmlNode *msg)); // NOTE: sbd (as of at least 1.5.2) uses this int (*del_notify_callback) (cib_t *cib, const char *event, void (*callback) (const char *event, xmlNode *msg)); // NOTE: sbd (as of at least 1.5.2) uses this int (*set_connection_dnotify) (cib_t *cib, void (*dnotify) (gpointer user_data)); // NOTE: sbd (as of at least 1.5.2) uses this //! \deprecated This method will be removed and should not be used int (*noop) (cib_t *cib, int call_options); int (*ping) (cib_t *cib, xmlNode **output_data, int call_options); // NOTE: sbd (as of at least 1.5.2) uses this int (*query) (cib_t *cib, const char *section, xmlNode **output_data, int call_options); int (*query_from) (cib_t *cib, const char *host, const char *section, xmlNode **output_data, int call_options); int (*sync) (cib_t *cib, const char *section, int call_options); int (*sync_from) (cib_t *cib, const char *host, const char *section, int call_options); int (*upgrade) (cib_t *cib, int call_options); int (*bump_epoch) (cib_t *cib, int call_options); /*! * The \c element in the reply to a failed creation call is * deprecated since 2.1.8. */ int (*create) (cib_t *cib, const char *section, xmlNode *data, int call_options); int (*modify) (cib_t *cib, const char *section, xmlNode *data, int call_options); int (*replace) (cib_t *cib, const char *section, xmlNode *data, int call_options); int (*remove) (cib_t *cib, const char *section, xmlNode *data, int call_options); int (*erase) (cib_t *cib, xmlNode **output_data, int call_options); int (*register_notification) (cib_t *cib, const char *callback, int enabled); gboolean (*register_callback) (cib_t *cib, int call_id, int timeout, gboolean only_success, void *user_data, const char *callback_name, void (*callback) (xmlNode*, int, int, xmlNode*, void *)); gboolean (*register_callback_full)(cib_t *cib, int call_id, int timeout, gboolean only_success, void *user_data, const char *callback_name, void (*callback)(xmlNode *, int, int, xmlNode *, void *), void (*free_func)(void *)); /*! * \brief Set the local CIB manager as the cluster's primary instance * * \param[in,out] cib CIB connection * \param[in] call_options Group of enum cib_call_options flags * * \return Legacy Pacemaker return code (in particular, pcmk_ok on success) */ int (*set_primary)(cib_t *cib, int call_options); /*! * \brief Set the local CIB manager as a secondary instance * * \param[in,out] cib CIB connection * \param[in] call_options Group of enum cib_call_options flags * * \return Legacy Pacemaker return code (in particular, pcmk_ok on success) */ int (*set_secondary)(cib_t *cib, int call_options); /*! * \brief Get the given CIB connection's unique client identifier(s) * * These can be used to check whether this client requested the action that * triggered a CIB notification. * * \param[in] cib CIB connection * \param[out] async_id If not \p NULL, where to store asynchronous client * ID * \param[out] sync_id If not \p NULL, where to store synchronous client * ID * * \return Legacy Pacemaker return code * * \note Some variants may have only one client for both asynchronous and * synchronous requests. */ int (*client_id)(const cib_t *cib, const char **async_id, const char **sync_id); /*! * \brief Initiate an atomic CIB transaction for this client * * If the client has initiated a transaction and a new request's call * options contain \p cib_transaction, the new request is appended to the * transaction for later processing. * * Supported requests are those that meet the following conditions: * * can be processed synchronously (with any changes applied to a working * CIB copy) * * are not queries * * do not involve other nodes - * * do not affect the state of pacemaker-based itself + * * do not affect the state of the CIB manager itself * * Currently supported CIB API functions include: * * \p bump_epoch() * * \p create() * * \p erase() * * \p modify() * * \p remove() * * \p replace() * * \p upgrade() * * Because the transaction is atomic, individual requests do not trigger * callbacks or notifications when they are processed, and they do not * receive output XML. The commit request itself can trigger callbacks and * notifications if any are registered. * * An \c init_transaction() call is always synchronous. * * \param[in,out] cib CIB connection * * \return Legacy Pacemaker return code */ int (*init_transaction)(cib_t *cib); /*! * \brief End and optionally commit this client's CIB transaction * * When a client commits a transaction, all requests in the transaction are * processed in a FIFO manner until either a request fails or all requests * have been processed. Changes are applied to a working copy of the CIB. * If a request fails, the transaction and working CIB copy are discarded, * and an error is returned. If all requests succeed, the working CIB copy * replaces the initial CIB copy. * * Callbacks and notifications can be triggered by the commit request itself * but not by the individual requests in a transaction. * * An \c end_transaction() call with \p commit set to \c false is always * synchronous. * * \param[in,out] cib CIB connection * \param[in] commit If \p true, commit transaction; otherwise, * discard it * \param[in] call_options Group of enum cib_call_options * flags * * \return Legacy Pacemaker return code */ int (*end_transaction)(cib_t *cib, bool commit, int call_options); /*! * \brief Set the user as whom all CIB requests via methods will be executed * * By default, the value of the \c CIB_user environment variable is used if * set. Otherwise, the current effective user is used. * * \param[in,out] cib CIB connection * \param[in] user Name of user whose permissions to use when * processing requests */ void (*set_user)(cib_t *cib, const char *user); int (*fetch_schemas)(cib_t *cib, xmlNode **output_data, const char *after_ver, int call_options); } cib_api_operations_t; struct cib_s { // NOTE: sbd (as of at least 1.5.2) uses this enum cib_state state; enum cib_conn_type type; enum cib_variant variant; int call_id; int call_timeout; void *variant_opaque; void *delegate_fn; GList *notify_list; // NOTE: sbd (as of at least 1.5.2) uses this cib_api_operations_t *cmds; xmlNode *transaction; char *user; }; #ifdef __cplusplus } #endif #endif // PCMK__CRM_CIB_CIB_TYPES__H diff --git a/include/crm/common/ipc_attrd_internal.h b/include/crm/common/ipc_attrd_internal.h index de88e4089d..15450a6679 100644 --- a/include/crm/common/ipc_attrd_internal.h +++ b/include/crm/common/ipc_attrd_internal.h @@ -1,199 +1,199 @@ /* * Copyright 2022-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PCMK__CRM_COMMON_IPC_ATTRD_INTERNAL__H #define PCMK__CRM_COMMON_IPC_ATTRD_INTERNAL__H #include // GList #include // pcmk_ipc_api_t #ifdef __cplusplus extern "C" { #endif //! Possible types of attribute manager replies enum pcmk__attrd_api_reply { pcmk__attrd_reply_unknown, pcmk__attrd_reply_query, }; // Information passed with pcmk__attrd_reply_query typedef struct { const char *node; const char *name; const char *value; } pcmk__attrd_query_pair_t; /*! * Attribute manager reply passed to event callback * * \note The pointers in the reply are only guaranteed to be meaningful for the * execution of the callback; if the values are needed for later, the * callback should copy them. */ typedef struct { enum pcmk__attrd_api_reply reply_type; union { // pcmk__attrd_reply_query GList *pairs; } data; } pcmk__attrd_api_reply_t; /*! * \internal - * \brief Send a request to pacemaker-attrd to clear resource failure + * \brief Send a request to the attribute manager to clear resource failure * - * \param[in,out] api pacemaker-attrd IPC object + * \param[in,out] api Attribute manager IPC object * \param[in] node Affect only this node (or NULL for all nodes) * \param[in] resource Name of resource to clear (or NULL for all) * \param[in] operation Name of operation to clear (or NULL for all) * \param[in] interval_spec If operation is not NULL, its interval - * \param[in] user_name ACL user to pass to pacemaker-attrd + * \param[in] user_name ACL user to pass to the attribute manager * \param[in] options Bitmask of pcmk__node_attr_opts * * \note If \p api is NULL, a new temporary connection will be created * just for this operation and destroyed afterwards. If \p api is - * not NULL but is not yet connected to pacemaker-attrd, the object + * not NULL but is not yet connected to the attribute manager, the object * will be connected for this operation and left connected afterwards. * This allows for reusing an IPC connection. * * \return Standard Pacemaker return code */ int pcmk__attrd_api_clear_failures(pcmk_ipc_api_t *api, const char *node, const char *resource, const char *operation, const char *interval_spec, const char *user_name, uint32_t options); /*! * \internal * * \brief Delete a previously set attribute by setting its value to NULL * - * \param[in,out] api Connection to pacemaker-attrd (or NULL to use + * \param[in,out] api Connection to the attribute manager (or NULL to use * a temporary new connection) * \param[in] node Delete attribute for this node (or NULL for local) * \param[in] name Attribute name * \param[in] options Bitmask of pcmk__node_attr_opts * * \return Standard Pacemaker return code */ int pcmk__attrd_api_delete(pcmk_ipc_api_t *api, const char *node, const char *name, uint32_t options); /*! * \internal * \brief Request removal of a node's transient attributes * - * \param[in,out] api pacemaker-attrd IPC object + * \param[in,out] api Attribute manager IPC object * \param[in] node Node whose attributes should be purged * \param[in] reap If true, also request removal from node caches * * \note If \p api is NULL, a new temporary connection will be created * just for this operation and destroyed afterwards. If \p api is - * not NULL but is not yet connected to pacemaker-attrd, the object + * not NULL but is not yet connected to the attribute manager, the object * will be connected for this operation and left connected afterwards. * This allows for reusing an IPC connection. * * \return Standard Pacemaker return code */ int pcmk__attrd_api_purge(pcmk_ipc_api_t *api, const char *node, bool reap); /*! * \internal - * \brief Get the value of an attribute from pacemaker-attrd + * \brief Get the value of an attribute from the attribute manager * - * \param[in,out] api Connection to pacemaker-attrd + * \param[in,out] api Connection to the attribute manager * \param[in] node Look up the attribute for this node * (or NULL for the local node) * \param[in] name Attribute name * \param[in] options Bitmask of pcmk__node_attr_opts * * \note Passing pcmk__node_attr_query_all will cause the function to query * the value of \p name on all nodes, regardless of the value of \p node. * * \return Standard Pacemaker return code */ int pcmk__attrd_api_query(pcmk_ipc_api_t *api, const char *node, const char *name, uint32_t options); /*! * \internal - * \brief Tell pacemaker-attrd to update the CIB with current values + * \brief Tell the attribute manager to update the CIB with current values * - * \param[in,out] api pacemaker-attrd IPC object + * \param[in,out] api Attribute manager IPC object * \param[in] node Affect only this node (or NULL for all nodes) * * \note If \p api is NULL, a new temporary connection will be created * just for this operation and destroyed afterwards. If \p api is - * not NULL but is not yet connected to pacemaker-attrd, the object + * not NULL but is not yet connected to the attribute manager, the object * will be connected for this operation and left connected afterwards. * This allows for reusing an IPC connection. * * \return Standard Pacemaker return code */ int pcmk__attrd_api_refresh(pcmk_ipc_api_t *api, const char *node); /*! * \internal * \brief Update an attribute's value, time to wait, or both * - * \param[in,out] api pacemaker-attrd IPC object + * \param[in,out] api Attribute manager IPC object * \param[in] node Affect only this node (or NULL for current node) * \param[in] name Attribute name * \param[in] value The attribute's new value, or NULL to unset * \param[in] dampen The new time to wait value, or NULL to unset * \param[in] set ID of attribute set to use (or NULL for first) - * \param[in] user_name ACL user to pass to pacemaker-attrd + * \param[in] user_name ACL user to pass to the attribute manager * \param[in] options Bitmask of pcmk__node_attr_opts * * \note If \p api is NULL, a new temporary connection will be created * just for this operation and destroyed afterwards. If \p api is - * not NULL but is not yet connected to pacemaker-attrd, the object + * not NULL but is not yet connected to the attribute manager, the object * will be connected for this operation and left connected afterwards. * This allows for reusing an IPC connection. * * \return Standard Pacemaker return code */ int pcmk__attrd_api_update(pcmk_ipc_api_t *api, const char *node, const char *name, const char *value, const char *dampen, const char *set, const char *user_name, uint32_t options); /*! * \internal * \brief Like pcmk__attrd_api_update, but for multiple attributes at once * - * \param[in,out] api pacemaker-attrd IPC object + * \param[in,out] api Attribute manager IPC object * \param[in,out] attrs A list of pcmk__attr_query_pair_t structs * \param[in] dampen The new time to wait value, or NULL to unset * \param[in] set ID of attribute set to use (or NULL for first) - * \param[in] user_name ACL user to pass to pacemaker-attrd + * \param[in] user_name ACL user to pass to the attribute manager * \param[in] options Bitmask of pcmk__node_attr_opts * * \note If \p api is NULL, a new temporary connection will be created * just for this operation and destroyed afterwards. If \p api is - * not NULL but is not yet connected to pacemaker-attrd, the object + * not NULL but is not yet connected to the attribute manager, the object * will be connected for this operation and left connected afterwards. * This allows for reusing an IPC connection. * * \note Not all attrd versions support setting multiple attributes at once. * For those servers that do not, this function will fall back to just * sending a separate IPC request for each attribute. * * \return Standard Pacemaker return code */ int pcmk__attrd_api_update_list(pcmk_ipc_api_t *api, GList *attrs, const char *dampen, const char *set, const char *user_name, uint32_t options); #ifdef __cplusplus } #endif #endif // PCMK__CRM_COMMON_IPC_ATTRD_INTERNAL__H diff --git a/include/crm/common/servers_internal.h b/include/crm/common/servers_internal.h index 071fc50dbd..b092c9e7cb 100644 --- a/include/crm/common/servers_internal.h +++ b/include/crm/common/servers_internal.h @@ -1,28 +1,39 @@ /* * Copyright 2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PCMK__CRM_COMMON_SERVERS_INTERNAL__H #define PCMK__CRM_COMMON_SERVERS_INTERNAL__H #include // enum pcmk_ipc_server #ifdef __cplusplus extern "C" { #endif +// Pacemaker server names +#define PCMK__SERVER_ATTRD "pacemaker-attrd" +#define PCMK__SERVER_BASED "pacemaker-based" +#define PCMK__SERVER_CONTROLD "pacemaker-controld" +#define PCMK__SERVER_EXECD "pacemaker-execd" +#define PCMK__SERVER_FENCED "pacemaker-fenced" +#define PCMK__SERVER_PACEMAKERD "pacemakerd" +#define PCMK__SERVER_REMOTED "pacemaker-remoted" +#define PCMK__SERVER_SCHEDULERD "pacemaker-schedulerd" + +const char *pcmk__server_name(enum pcmk_ipc_server server); const char *pcmk__server_log_name(enum pcmk_ipc_server server); const char *pcmk__server_ipc_name(enum pcmk_ipc_server server); const char *pcmk__server_message_type(enum pcmk_ipc_server server); enum pcmk_ipc_server pcmk__parse_server(const char *text); #ifdef __cplusplus } #endif #endif // PCMK__CRM_COMMON_SERVERS_INTERNAL__H diff --git a/include/pacemaker.h b/include/pacemaker.h index 75d1b1f961..c9fb326f28 100644 --- a/include/pacemaker.h +++ b/include/pacemaker.h @@ -1,714 +1,714 @@ /* * Copyright 2019-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PCMK__PACEMAKER__H # define PCMK__PACEMAKER__H # include # include # include # include # include #ifdef __cplusplus extern "C" { #endif /** * \file * \brief High Level API * \ingroup pacemaker */ /*! * \brief Modify operation of running a cluster simulation. */ enum pcmk_sim_flags { pcmk_sim_none = 0, pcmk_sim_all_actions = 1 << 0, pcmk_sim_show_pending = 1 << 1, pcmk_sim_process = 1 << 2, pcmk_sim_show_scores = 1 << 3, pcmk_sim_show_utilization = 1 << 4, pcmk_sim_simulate = 1 << 5, pcmk_sim_sanitized = 1 << 6, pcmk_sim_verbose = 1 << 7, }; /*! * \brief Synthetic cluster events that can be injected into the cluster * for running simulations. */ typedef struct { /*! A list of node names (gchar *) to simulate bringing online */ GList *node_up; /*! A list of node names (gchar *) to simulate bringing offline */ GList *node_down; /*! A list of node names (gchar *) to simulate failing */ GList *node_fail; /*! A list of operations (gchar *) to inject. The format of these strings * is described in the "Operation Specification" section of crm_simulate * help output. */ GList *op_inject; /*! A list of operations (gchar *) that should return a given error code * if they fail. The format of these strings is described in the * "Operation Specification" section of crm_simulate help output. */ GList *op_fail; /*! A list of tickets (gchar *) to simulate granting */ GList *ticket_grant; /*! A list of tickets (gchar *) to simulate revoking */ GList *ticket_revoke; /*! A list of tickets (gchar *) to simulate putting on standby */ GList *ticket_standby; /*! A list of tickets (gchar *) to simulate activating */ GList *ticket_activate; /*! Does the cluster have an active watchdog device? */ char *watchdog; /*! Does the cluster have quorum? */ char *quorum; } pcmk_injections_t; /*! * \brief Get and output controller status * * \param[in,out] xml Destination for the result, as an XML tree * \param[in] node_name Name of node whose status is desired * (\p NULL for DC) * \param[in] message_timeout_ms How long to wait for a reply from the - * \p pacemaker-controld API. If 0, + * controller API. If 0, * \p pcmk_ipc_dispatch_sync will be used. * Otherwise, \p pcmk_ipc_dispatch_poll will * be used. * * \return Standard Pacemaker return code */ int pcmk_controller_status(xmlNodePtr *xml, const char *node_name, unsigned int message_timeout_ms); /*! * \brief Get and output designated controller node name * * \param[in,out] xml Destination for the result, as an XML tree * \param[in] message_timeout_ms How long to wait for a reply from the - * \p pacemaker-controld API. If 0, + * controller API. If 0, * \p pcmk_ipc_dispatch_sync will be used. * Otherwise, \p pcmk_ipc_dispatch_poll will * be used. * * \return Standard Pacemaker return code */ int pcmk_designated_controller(xmlNodePtr *xml, unsigned int message_timeout_ms); /*! * \brief Free a :pcmk_injections_t structure * * \param[in,out] injections The structure to be freed */ void pcmk_free_injections(pcmk_injections_t *injections); /*! * \brief Get and optionally output node info corresponding to a node ID from * the controller * * \param[in,out] xml Destination for the result, as an XML tree * \param[in,out] node_id ID of node whose name to get. If \p NULL * or 0, get the local node name. If not * \p NULL, store the true node ID here on * success. * \param[out] node_name If not \p NULL, where to store the node * name * \param[out] uuid If not \p NULL, where to store the node * UUID * \param[out] state If not \p NULL, where to store the * membership state * \param[out] is_remote If not \p NULL, where to store whether the * node is a Pacemaker Remote node * \param[out] have_quorum If not \p NULL, where to store whether the * node has quorum * \param[in] show_output Whether to output the node info * \param[in] message_timeout_ms How long to wait for a reply from the - * \p pacemaker-controld API. If 0, + * controller API. If 0, * \p pcmk_ipc_dispatch_sync will be used. * Otherwise, \p pcmk_ipc_dispatch_poll will * be used. * * \return Standard Pacemaker return code * * \note The caller is responsible for freeing \p *node_name, \p *uuid, and * \p *state using \p free(). */ int pcmk_query_node_info(xmlNodePtr *xml, uint32_t *node_id, char **node_name, char **uuid, char **state, bool *have_quorum, bool *is_remote, bool show_output, unsigned int message_timeout_ms); /*! * \brief Get the node name corresponding to a node ID from the controller * * \param[in,out] xml Destination for the result, as an XML tree * \param[in,out] node_id ID of node whose name to get (or 0 for the * local node) * \param[out] node_name If not \p NULL, where to store the node * name * \param[in] message_timeout_ms How long to wait for a reply from the - * \p pacemaker-controld API. If 0, + * controller API. If 0, * \p pcmk_ipc_dispatch_sync will be used. * Otherwise, \p pcmk_ipc_dispatch_poll will * be used. * * \return Standard Pacemaker return code * * \note The caller is responsible for freeing \p *node_name using \p free(). */ static inline int pcmk_query_node_name(xmlNodePtr *xml, uint32_t node_id, char **node_name, unsigned int message_timeout_ms) { return pcmk_query_node_info(xml, &node_id, node_name, NULL, NULL, NULL, NULL, false, message_timeout_ms); } /*! * \brief Get and output \p pacemakerd status * * \param[in,out] xml Destination for the result, as an XML tree * \param[in] ipc_name IPC name for request * \param[in] message_timeout_ms How long to wait for a reply from the * \p pacemakerd API. If 0, * \p pcmk_ipc_dispatch_sync will be used. * Otherwise, \p pcmk_ipc_dispatch_poll will * be used. * * \return Standard Pacemaker return code */ int pcmk_pacemakerd_status(xmlNodePtr *xml, const char *ipc_name, unsigned int message_timeout_ms); /*! * \brief Remove a resource * * \param[in,out] xml Destination for the result, as an XML tree * \param[in] rsc_id Resource to remove * \param[in] rsc_type Type of the resource ("primitive", "group", etc.) * * \return Standard Pacemaker return code * \note This function will return \p pcmk_rc_ok if \p rsc_id doesn't exist * or if \p rsc_type is incorrect for \p rsc_id (deleting something * that doesn't exist always succeeds). */ int pcmk_resource_delete(xmlNodePtr *xml, const char *rsc_id, const char *rsc_type); /*! * \brief Calculate and output resource operation digests * * \param[out] xml Where to store XML with result * \param[in,out] rsc Resource to calculate digests for * \param[in] node Node whose operation history should be used * \param[in] overrides Hash table of configuration parameters to override * * \return Standard Pacemaker return code */ int pcmk_resource_digests(xmlNodePtr *xml, pcmk_resource_t *rsc, const pcmk_node_t *node, GHashTable *overrides); /*! * \brief Simulate a cluster's response to events * * This high-level function essentially implements crm_simulate(8). It operates * on an input CIB file and various lists of events that can be simulated. It * optionally writes out a variety of artifacts to show the results of the * simulation. Output can be modified with various flags. * * \param[in,out] xml The destination for the result, as an XML tree * \param[in,out] scheduler Scheduler data * \param[in] injections A structure containing cluster events * (node up/down, tickets, injected operations) * \param[in] flags A bitfield of :pcmk_sim_flags to modify * operation of the simulation * \param[in] section_opts Which portions of the cluster status output * should be displayed? * \param[in] use_date Date to set the cluster's time to (may be NULL) * \param[in] input_file The source CIB file, which may be overwritten by * this function (may be NULL) * \param[in] graph_file Where to write the XML-formatted transition graph * (may be NULL, in which case no file will be * written) * \param[in] dot_file Where to write the dot(1) formatted transition * graph (may be NULL, in which case no file will * be written) * * \return Standard Pacemaker return code */ int pcmk_simulate(xmlNodePtr *xml, pcmk_scheduler_t *scheduler, const pcmk_injections_t *injections, unsigned int flags, unsigned int section_opts, const char *use_date, const char *input_file, const char *graph_file, const char *dot_file); /*! * \brief Verify that a CIB is error-free or output errors and warnings * * This high-level function essentially implements crm_verify(8). It operates * on an input CIB file, which can be inputted through one of several ways. It * writes out XML-formatted output. * * \param[in,out] xml The destination for the result, as an XML tree * \param[in] cib_source Source of the CIB: * NULL -> use live cib, "-" -> stdin * "<..." -> xml str, otherwise -> xml file name * * \return Standard Pacemaker return code */ int pcmk_verify(xmlNodePtr *xml, const char *cib_source); /*! * \brief Get nodes list * * \param[in,out] xml The destination for the result, as an XML tree * \param[in] node_types Node type(s) to return (default: all) * * \return Standard Pacemaker return code */ int pcmk_list_nodes(xmlNodePtr *xml, const char *node_types); /*! * \brief Output cluster status formatted like `crm_mon --output-as=xml` * * \param[in,out] xml The destination for the result, as an XML tree * * \return Standard Pacemaker return code */ int pcmk_status(xmlNodePtr *xml); /*! * \brief Check whether each rule in a list is in effect * * \param[in,out] xml The destination for the result, as an XML tree * \param[in] input The CIB XML to check (if \c NULL, use current CIB) * \param[in] date Check whether the rule is in effect at this date and * time (if \c NULL, use current date and time) * \param[in] rule_ids The IDs of the rules to check, as a NULL- * terminated list. * * \return Standard Pacemaker return code */ int pcmk_check_rules(xmlNodePtr *xml, xmlNodePtr input, const crm_time_t *date, const char **rule_ids); /*! * \brief Check whether a given rule is in effect * * \param[in,out] xml The destination for the result, as an XML tree * \param[in] input The CIB XML to check (if \c NULL, use current CIB) * \param[in] date Check whether the rule is in effect at this date and * time (if \c NULL, use current date and time) * \param[in] rule_ids The ID of the rule to check * * \return Standard Pacemaker return code */ static inline int pcmk_check_rule(xmlNodePtr *xml, xmlNodePtr input, const crm_time_t *date, const char *rule_id) { const char *rule_ids[] = {rule_id, NULL}; return pcmk_check_rules(xml, input, date, rule_ids); } /*! * \enum pcmk_rc_disp_flags * \brief Bit flags to control which fields of result code info are displayed */ enum pcmk_rc_disp_flags { pcmk_rc_disp_none = 0, //!< (Does nothing) pcmk_rc_disp_code = (1 << 0), //!< Display result code number pcmk_rc_disp_name = (1 << 1), //!< Display result code name pcmk_rc_disp_desc = (1 << 2), //!< Display result code description }; /*! * \brief Display the name and/or description of a result code * * \param[in,out] xml The destination for the result, as an XML tree * \param[in] code The result code * \param[in] type Interpret \c code as this type of result code. * Supported values: \c pcmk_result_legacy, * \c pcmk_result_rc, \c pcmk_result_exitcode. * \param[in] flags Group of \c pcmk_rc_disp_flags * * \return Standard Pacemaker return code */ int pcmk_show_result_code(xmlNodePtr *xml, int code, enum pcmk_result_type type, uint32_t flags); /*! * \brief List all valid result codes in a particular family * * \param[in,out] xml The destination for the result, as an XML tree * \param[in] type The family of result codes to list. Supported * values: \c pcmk_result_legacy, \c pcmk_result_rc, * \c pcmk_result_exitcode. * \param[in] flags Group of \c pcmk_rc_disp_flags * * \return Standard Pacemaker return code */ int pcmk_list_result_codes(xmlNodePtr *xml, enum pcmk_result_type type, uint32_t flags); /*! * \brief List available providers for the given OCF agent * * \param[in,out] xml The destination for the result, as an XML tree * \param[in] agent_spec Resource agent name * * \return Standard Pacemaker return code */ int pcmk_list_alternatives(xmlNodePtr *xml, const char *agent_spec); /*! * \brief List all agents available for the named standard and/or provider * * \param[in,out] xml The destination for the result, as an XML tree * \param[in] agent_spec STD[:PROV] * * \return Standard Pacemaker return code */ int pcmk_list_agents(xmlNodePtr *xml, char *agent_spec); /*! * \brief List all available OCF providers for the given agent * * \param[in,out] xml The destination for the result, as an XML tree * \param[in] agent_spec Resource agent name * * \return Standard Pacemaker return code */ int pcmk_list_providers(xmlNodePtr *xml, const char *agent_spec); /*! * \brief List all available resource agent standards * * \param[in,out] xml The destination for the result, as an XML tree * * \return Standard Pacemaker return code */ int pcmk_list_standards(xmlNodePtr *xml); /*! * \brief List all available cluster options * * These are options that affect the entire cluster. * * \param[in,out] xml The destination for the result, as an XML tree * \param[in] all If \c true, include advanced and deprecated options * (currently always treated as true) * * \return Standard Pacemaker return code */ int pcmk_list_cluster_options(xmlNode **xml, bool all); /*! * \brief List common fencing resource parameters * * These are parameters that are available for all fencing resources, regardless * of type. They are processed by Pacemaker, rather than by the fence agent or * the fencing library. * * \param[in,out] xml The destination for the result, as an XML tree * \param[in] all If \c true, include advanced and deprecated options * (currently always treated as true) * * \return Standard Pacemaker return code */ int pcmk_list_fencing_params(xmlNode **xml, bool all); /*! * \internal * \brief List meta-attributes applicable to primitive resources as OCF-like XML * * \param[in,out] out Output object * \param[in] all If \c true, include advanced and deprecated options (this * is always treated as true for XML output objects) * * \return Standard Pacemaker return code */ int pcmk_list_primitive_meta(xmlNode **xml, bool all); /*! * \brief Return constraints that apply to the given ticket * * \param[in,out] xml The destination for the result, as an XML tree * \param[in] ticket_id Ticket to find constraint for, or \c NULL for * all ticket constraints * * \return Standard Pacemaker return code */ int pcmk_ticket_constraints(xmlNodePtr *xml, const char *ticket_id); /*! * \brief Delete a ticket's state from the local cluster site * * \param[in,out] xml The destination for the result, as an XML tree * \param[in] ticket_id Ticket to delete * \param[in] force If \c true, delete the ticket even if it has * been granted * * \return Standard Pacemaker return code */ int pcmk_ticket_delete(xmlNodePtr *xml, const char *ticket_id, bool force); /*! * \brief Return the value of a ticket's attribute * * \param[in,out] xml The destination for the result, as an XML tree * \param[in] ticket_id Ticket to find attribute value for * \param[in] attr_name Attribute's name to find value for * \param[in] attr_default If either the ticket or the attribute do not * exist, use this as the value in \p xml * * \return Standard Pacemaker return code */ int pcmk_ticket_get_attr(xmlNodePtr *xml, const char *ticket_id, const char *attr_name, const char *attr_default); /*! * \brief Return information about the given ticket * * \param[in,out] xml The destination for the result, as an XML tree * \param[in] ticket_id Ticket to find info value for, or \c NULL for * all tickets * * \return Standard Pacemaker return code */ int pcmk_ticket_info(xmlNodePtr *xml, const char *ticket_id); /*! * \brief Remove the given attribute(s) from a ticket * * \param[in,out] xml The destination for the result, as an XML tree * \param[in] ticket_id Ticket to remove attributes from * \param[in] attr_delete A list of attribute names * \param[in] force Attempting to remove the granted attribute of * \p ticket_id will cause this function to return * \c EACCES unless \p force is set to \c true * * \return Standard Pacemaker return code */ int pcmk_ticket_remove_attr(xmlNodePtr *xml, const char *ticket_id, GList *attr_delete, bool force); /*! * \brief Set the given attribute(s) on a ticket * * \param[in,out] xml The destination for the result, as an XML tree * \param[in] ticket_id Ticket to set attributes on * \param[in] attr_set A hash table of attributes, where keys are the * attribute names and the values are the attribute * values * \param[in] force Attempting to change the granted status of * \p ticket_id will cause this function to return * \c EACCES unless \p force is set to \c true * * \return Standard Pacemaker return code * * \note If no \p ticket_id attribute exists but \p attr_set is non-NULL, the * ticket will be created with the given attributes. */ int pcmk_ticket_set_attr(xmlNodePtr *xml, const char *ticket_id, GHashTable *attr_set, bool force); /*! * \brief Return a ticket's state XML * * \param[in,out] xml The destination for the result, as an XML tree * \param[in] ticket_id Ticket to find state for, or \c NULL for all * tickets * * \return Standard Pacemaker return code * * \note If \p ticket_id is not \c NULL and more than one ticket exists with * that ID, this function returns \c pcmk_rc_duplicate_id. */ int pcmk_ticket_state(xmlNodePtr *xml, const char *ticket_id); #ifdef BUILD_PUBLIC_LIBPACEMAKER /*! * \brief Ask the cluster to perform fencing * * \param[in,out] st A connection to the fencer API * \param[in] target The node that should be fenced * \param[in] action The fencing action (on, off, reboot) to perform * \param[in] name Who requested the fence action? * \param[in] timeout How long to wait for operation to complete (in ms) * \param[in] tolerance If a successful action for \p target happened within * this many ms, return 0 without performing the action * again * \param[in] delay Apply this delay (in milliseconds) before initiating * fencing action (-1 applies no delay and also * disables any fencing delay from pcmk_delay_base and * pcmk_delay_max) * \param[out] reason If not NULL, where to put descriptive failure reason * * \return Standard Pacemaker return code * \note If \p reason is not NULL, the caller is responsible for freeing its * returned value. */ int pcmk_request_fencing(stonith_t *st, const char *target, const char *action, const char *name, unsigned int timeout, unsigned int tolerance, int delay, char **reason); /*! * \brief List the fencing operations that have occurred for a specific node * * \note If \p xml is not NULL, it will be freed first and the previous * contents lost. * * \param[in,out] xml The destination for the result, as an XML tree * \param[in,out] st A connection to the fencer API * \param[in] target The node to get history for * \param[in] timeout How long to wait for operation to complete (in ms) * \param[in] quiet Suppress most output * \param[in] verbose Include additional output * \param[in] broadcast Gather fencing history from all nodes * \param[in] cleanup Clean up fencing history after listing * * \return Standard Pacemaker return code */ int pcmk_fence_history(xmlNodePtr *xml, stonith_t *st, const char *target, unsigned int timeout, bool quiet, int verbose, bool broadcast, bool cleanup); /*! * \brief List all installed fence agents * * \param[in,out] xml The destination for the result, as an XML tree (if * not NULL, previous contents will be freed and lost) * \param[in,out] st A connection to the fencer API * \param[in] timeout How long to wait for operation to complete (in ms) * * \return Standard Pacemaker return code */ int pcmk_fence_installed(xmlNodePtr *xml, stonith_t *st, unsigned int timeout); /*! * \brief When was a device last fenced? * * \param[in,out] xml The destination for the result, as an XML tree (if * not NULL, previous contents will be freed and lost) * \param[in] target The node that was fenced * \param[in] as_nodeid If true, \p target has node ID rather than name * * \return Standard Pacemaker return code */ int pcmk_fence_last(xmlNodePtr *xml, const char *target, bool as_nodeid); /*! * \brief List nodes that can be fenced * * \param[in,out] xml The destination for the result, as an XML tree (if * not NULL, previous contents will be freed and lost) * \param[in,out] st A connection to the fencer API * \param[in] device_id Resource ID of fence device to check * \param[in] timeout How long to wait for operation to complete (in ms) * * \return Standard Pacemaker return code */ int pcmk_fence_list_targets(xmlNodePtr *xml, stonith_t *st, const char *device_id, unsigned int timeout); /*! * \brief Get metadata for a fence agent * * \note If \p xml is not NULL, it will be freed first and the previous * contents lost. * * \param[in,out] xml The destination for the result, as an XML tree (if * not NULL, previous contents will be freed and lost) * \param[in,out] st A connection to the fencer API * \param[in] agent The fence agent to get metadata for * \param[in] timeout How long to wait for operation to complete (in ms) * * \return Standard Pacemaker return code */ int pcmk_fence_metadata(xmlNodePtr *xml, stonith_t *st, const char *agent, unsigned int timeout); /*! * \brief List registered fence devices * * \param[in,out] xml The destination for the result, as an XML tree (if * not NULL, previous contents will be freed and lost) * \param[in,out] st A connection to the fencer API * \param[in] target If not NULL, return only devices that can fence this * \param[in] timeout How long to wait for operation to complete (in ms) * * \return Standard Pacemaker return code */ int pcmk_fence_registered(xmlNodePtr *xml, stonith_t *st, const char *target, unsigned int timeout); /*! * \brief Register a fencing topology level * * \param[in,out] st A connection to the fencer API * \param[in] target What fencing level targets (as "name=value" to * target by given node attribute, or "@pattern" to * target by node name pattern, or a node name) * \param[in] fence_level Index number of level to add * \param[in] devices Devices to use in level * * \return Standard Pacemaker return code */ int pcmk_fence_register_level(stonith_t *st, const char *target, int fence_level, const stonith_key_value_t *devices); /*! * \brief Unregister a fencing topology level * * \param[in,out] st A connection to the fencer API * \param[in] target What fencing level targets (as "name=value" to * target by given node attribute, or "@pattern" to * target by node name pattern, or a node name) * \param[in] fence_level Index number of level to remove * * \return Standard Pacemaker return code */ int pcmk_fence_unregister_level(stonith_t *st, const char *target, int fence_level); /*! * \brief Validate a fence device configuration * * \param[in,out] xml The destination for the result, as an XML tree (if * not NULL, previous contents will be freed and lost) * \param[in,out] st A connection to the fencer API * \param[in] agent The agent to validate (for example, "fence_xvm") * \param[in] id Fence device ID (may be NULL) * \param[in] params Fence device configuration parameters * \param[in] timeout How long to wait for operation to complete (in ms) * * \return Standard Pacemaker return code */ int pcmk_fence_validate(xmlNodePtr *xml, stonith_t *st, const char *agent, const char *id, const stonith_key_value_t *params, unsigned int timeout); #endif #ifdef __cplusplus } #endif #endif diff --git a/include/pcmki/pcmki_cluster_queries.h b/include/pcmki/pcmki_cluster_queries.h index d023066ebe..dddda1e165 100644 --- a/include/pcmki/pcmki_cluster_queries.h +++ b/include/pcmki/pcmki_cluster_queries.h @@ -1,78 +1,78 @@ /* * Copyright 2020-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PCMK__PCMKI_PCMKI_CLUSTER_QUERIES__H #define PCMK__PCMKI_PCMKI_CLUSTER_QUERIES__H #include #include #include #include #include #include #ifdef __cplusplus extern "C" { #endif // CIB queries int pcmk__list_nodes(pcmk__output_t *out, const char *node_types, bool bash_export); // Controller queries int pcmk__controller_status(pcmk__output_t *out, const char *node_name, unsigned int message_timeout_ms); int pcmk__designated_controller(pcmk__output_t *out, unsigned int message_timeout_ms); int pcmk__pacemakerd_status(pcmk__output_t *out, const char *ipc_name, unsigned int message_timeout_ms, bool show_output, enum pcmk_pacemakerd_state *state); int pcmk__query_node_info(pcmk__output_t *out, uint32_t *node_id, char **node_name, char **uuid, char **state, bool *have_quorum, bool *is_remote, bool show_output, unsigned int message_timeout_ms); /*! * \internal * \brief Get the node name corresponding to a node ID from the controller * * \param[in,out] out Output object * \param[in] node_id ID of node whose name to get (or 0 for * the local node) * \param[out] node_name If not \p NULL, where to store the node * name * \param[in] message_timeout_ms How long to wait for a reply from the - * \p pacemaker-controld API. If 0, + * controller API. If 0, * \p pcmk_ipc_dispatch_sync will be used. * Otherwise, \p pcmk_ipc_dispatch_poll will * be used. * * \return Standard Pacemaker return code * * \note The caller is responsible for freeing \p *node_name using \p free(). */ static inline int pcmk__query_node_name(pcmk__output_t *out, uint32_t nodeid, char **node_name, unsigned int message_timeout_ms) { return pcmk__query_node_info(out, &nodeid, node_name, NULL, NULL, NULL, NULL, false, message_timeout_ms); } // pacemakerd queries int pcmk__pacemakerd_status(pcmk__output_t *out, const char *ipc_name, unsigned int message_timeout_ms, bool show_output, enum pcmk_pacemakerd_state *state); #ifdef __cplusplus } #endif #endif // PCMK__PCMKI_PCMKI_CLUSTER_QUERIES__H diff --git a/lib/cib/cib_file.c b/lib/cib/cib_file.c index d38ba55706..4a5b45340e 100644 --- a/lib/cib/cib_file.c +++ b/lib/cib/cib_file.c @@ -1,1164 +1,1164 @@ /* * Original copyright 2004 International Business Machines * Later changes copyright 2008-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define CIB_SERIES "cib" #define CIB_SERIES_MAX 100 #define CIB_SERIES_BZIP FALSE /* Must be false because archived copies are created with hard links */ #define CIB_LIVE_NAME CIB_SERIES ".xml" // key: client ID (const char *) -> value: client (cib_t *) static GHashTable *client_table = NULL; enum cib_file_flags { cib_file_flag_dirty = (1 << 0), cib_file_flag_live = (1 << 1), }; typedef struct cib_file_opaque_s { char *id; char *filename; uint32_t flags; // Group of enum cib_file_flags xmlNode *cib_xml; } cib_file_opaque_t; static int cib_file_process_commit_transaction(const char *op, int options, const char *section, xmlNode *req, xmlNode *input, xmlNode *existing_cib, xmlNode **result_cib, xmlNode **answer); /*! * \internal * \brief Add a CIB file client to client table * * \param[in] cib CIB client */ static void register_client(const cib_t *cib) { cib_file_opaque_t *private = cib->variant_opaque; if (client_table == NULL) { client_table = pcmk__strkey_table(NULL, NULL); } g_hash_table_insert(client_table, private->id, (gpointer) cib); } /*! * \internal * \brief Remove a CIB file client from client table * * \param[in] cib CIB client */ static void unregister_client(const cib_t *cib) { cib_file_opaque_t *private = cib->variant_opaque; if (client_table == NULL) { return; } g_hash_table_remove(client_table, private->id); /* @COMPAT: Add to crm_exit() when libcib and libcrmcommon are merged, * instead of destroying the client table when there are no more clients. */ if (g_hash_table_size(client_table) == 0) { g_hash_table_destroy(client_table); client_table = NULL; } } /*! * \internal * \brief Look up a CIB file client by its ID * * \param[in] client_id CIB client ID * * \return CIB client with matching ID if found, or \p NULL otherwise */ static cib_t * get_client(const char *client_id) { if (client_table == NULL) { return NULL; } return g_hash_table_lookup(client_table, (gpointer) client_id); } static const cib__op_fn_t cib_op_functions[] = { [cib__op_apply_patch] = cib_process_diff, [cib__op_bump] = cib_process_bump, [cib__op_commit_transact] = cib_file_process_commit_transaction, [cib__op_create] = cib_process_create, [cib__op_delete] = cib_process_delete, [cib__op_erase] = cib_process_erase, [cib__op_modify] = cib_process_modify, [cib__op_query] = cib_process_query, [cib__op_replace] = cib_process_replace, [cib__op_upgrade] = cib_process_upgrade, }; /* cib_file_backup() and cib_file_write_with_digest() need to chown the * written files only in limited circumstances, so these variables allow * that to be indicated without affecting external callers */ static uid_t cib_file_owner = 0; static uid_t cib_file_group = 0; static gboolean cib_do_chown = FALSE; #define cib_set_file_flags(cibfile, flags_to_set) do { \ (cibfile)->flags = pcmk__set_flags_as(__func__, __LINE__, \ LOG_TRACE, "CIB file", \ cibfile->filename, \ (cibfile)->flags, \ (flags_to_set), \ #flags_to_set); \ } while (0) #define cib_clear_file_flags(cibfile, flags_to_clear) do { \ (cibfile)->flags = pcmk__clear_flags_as(__func__, __LINE__, \ LOG_TRACE, "CIB file", \ cibfile->filename, \ (cibfile)->flags, \ (flags_to_clear), \ #flags_to_clear); \ } while (0) /*! * \internal * \brief Get the function that performs a given CIB file operation * * \param[in] operation Operation whose function to look up * * \return Function that performs \p operation for a CIB file client */ static cib__op_fn_t file_get_op_function(const cib__operation_t *operation) { enum cib__op_type type = operation->type; CRM_ASSERT(type >= 0); if (type >= PCMK__NELEM(cib_op_functions)) { return NULL; } return cib_op_functions[type]; } /*! * \internal * \brief Check whether a file is the live CIB * * \param[in] filename Name of file to check * * \return TRUE if file exists and its real path is same as live CIB's */ static gboolean cib_file_is_live(const char *filename) { gboolean same = FALSE; if (filename != NULL) { // Canonicalize file names for true comparison char *real_filename = NULL; if (pcmk__real_path(filename, &real_filename) == pcmk_rc_ok) { char *real_livename = NULL; if (pcmk__real_path(CRM_CONFIG_DIR "/" CIB_LIVE_NAME, &real_livename) == pcmk_rc_ok) { same = !strcmp(real_filename, real_livename); free(real_livename); } free(real_filename); } } return same; } static int cib_file_process_request(cib_t *cib, xmlNode *request, xmlNode **output) { int rc = pcmk_ok; const cib__operation_t *operation = NULL; cib__op_fn_t op_function = NULL; int call_id = 0; int call_options = cib_none; const char *op = crm_element_value(request, PCMK__XA_CIB_OP); const char *section = crm_element_value(request, PCMK__XA_CIB_SECTION); xmlNode *wrapper = pcmk__xe_first_child(request, PCMK__XE_CIB_CALLDATA, NULL, NULL); xmlNode *data = pcmk__xe_first_child(wrapper, NULL, NULL, NULL); bool changed = false; bool read_only = false; xmlNode *result_cib = NULL; xmlNode *cib_diff = NULL; cib_file_opaque_t *private = cib->variant_opaque; // We error checked these in callers cib__get_operation(op, &operation); op_function = file_get_op_function(operation); crm_element_value_int(request, PCMK__XA_CIB_CALLID, &call_id); crm_element_value_int(request, PCMK__XA_CIB_CALLOPT, &call_options); read_only = !pcmk_is_set(operation->flags, cib__op_attr_modifies); - // Mirror the logic in prepare_input() in pacemaker-based + // Mirror the logic in prepare_input() in the CIB manager if ((section != NULL) && pcmk__xe_is(data, PCMK_XE_CIB)) { data = pcmk_find_cib_element(data, section); } rc = cib_perform_op(cib, op, call_options, op_function, read_only, section, request, data, true, &changed, &private->cib_xml, &result_cib, &cib_diff, output); if (pcmk_is_set(call_options, cib_transaction)) { /* The rest of the logic applies only to the transaction as a whole, not * to individual requests. */ goto done; } if (rc == -pcmk_err_schema_validation) { // Show validation errors to stderr pcmk__validate_xml(result_cib, NULL, NULL, NULL); } else if ((rc == pcmk_ok) && !read_only) { pcmk__log_xml_patchset(LOG_DEBUG, cib_diff); if (result_cib != private->cib_xml) { pcmk__xml_free(private->cib_xml); private->cib_xml = result_cib; } cib_set_file_flags(private, cib_file_flag_dirty); } done: if ((result_cib != private->cib_xml) && (result_cib != *output)) { pcmk__xml_free(result_cib); } pcmk__xml_free(cib_diff); return rc; } static int cib_file_perform_op_delegate(cib_t *cib, const char *op, const char *host, const char *section, xmlNode *data, xmlNode **output_data, int call_options, const char *user_name) { int rc = pcmk_ok; xmlNode *request = NULL; xmlNode *output = NULL; cib_file_opaque_t *private = cib->variant_opaque; const cib__operation_t *operation = NULL; crm_info("Handling %s operation for %s as %s", pcmk__s(op, "invalid"), pcmk__s(section, "entire CIB"), pcmk__s(user_name, "default user")); if (output_data != NULL) { *output_data = NULL; } if (cib->state == cib_disconnected) { return -ENOTCONN; } rc = cib__get_operation(op, &operation); rc = pcmk_rc2legacy(rc); if (rc != pcmk_ok) { // @COMPAT: At compatibility break, use rc directly return -EPROTONOSUPPORT; } if (file_get_op_function(operation) == NULL) { // @COMPAT: At compatibility break, use EOPNOTSUPP crm_err("Operation %s is not supported by CIB file clients", op); return -EPROTONOSUPPORT; } cib__set_call_options(call_options, "file operation", cib_no_mtime); rc = cib__create_op(cib, op, host, section, data, call_options, user_name, NULL, &request); if (rc != pcmk_ok) { return rc; } crm_xml_add(request, PCMK__XA_ACL_TARGET, user_name); crm_xml_add(request, PCMK__XA_CIB_CLIENTID, private->id); if (pcmk_is_set(call_options, cib_transaction)) { rc = cib__extend_transaction(cib, request); goto done; } rc = cib_file_process_request(cib, request, &output); if ((output_data != NULL) && (output != NULL)) { if (output->doc == private->cib_xml->doc) { *output_data = pcmk__xml_copy(NULL, output); } else { *output_data = output; } } done: if ((output != NULL) && (output->doc != private->cib_xml->doc) && ((output_data == NULL) || (output != *output_data))) { pcmk__xml_free(output); } pcmk__xml_free(request); return rc; } /*! * \internal * \brief Read CIB from disk and validate it against XML schema * * \param[in] filename Name of file to read CIB from * \param[out] output Where to store the read CIB XML * * \return pcmk_ok on success, * -ENXIO if file does not exist (or stat() otherwise fails), or * -pcmk_err_schema_validation if XML doesn't parse or validate * \note If filename is the live CIB, this will *not* verify its digest, * though that functionality would be trivial to add here. * Also, this will *not* verify that the file is writable, * because some callers might not need to write. */ static int load_file_cib(const char *filename, xmlNode **output) { struct stat buf; xmlNode *root = NULL; /* Ensure file is readable */ if (strcmp(filename, "-") && (stat(filename, &buf) < 0)) { return -ENXIO; } /* Parse XML from file */ root = pcmk__xml_read(filename); if (root == NULL) { return -pcmk_err_schema_validation; } /* Add a status section if not already present */ if (pcmk__xe_first_child(root, PCMK_XE_STATUS, NULL, NULL) == NULL) { pcmk__xe_create(root, PCMK_XE_STATUS); } /* Validate XML against its specified schema */ if (!pcmk__configured_schema_validates(root)) { const char *schema = crm_element_value(root, PCMK_XA_VALIDATE_WITH); crm_err("CIB does not validate against %s, or that schema is unknown", schema); pcmk__xml_free(root); return -pcmk_err_schema_validation; } /* Remember the parsed XML for later use */ *output = root; return pcmk_ok; } static int cib_file_signon(cib_t *cib, const char *name, enum cib_conn_type type) { int rc = pcmk_ok; cib_file_opaque_t *private = cib->variant_opaque; if (private->filename == NULL) { rc = -EINVAL; } else { rc = load_file_cib(private->filename, &private->cib_xml); } if (rc == pcmk_ok) { crm_debug("Opened connection to local file '%s' for %s", private->filename, pcmk__s(name, "client")); cib->state = cib_connected_command; cib->type = cib_command; register_client(cib); } else { crm_info("Connection to local file '%s' for %s (client %s) failed: %s", private->filename, pcmk__s(name, "client"), private->id, pcmk_strerror(rc)); } return rc; } /*! * \internal * \brief Write out the in-memory CIB to a live CIB file * * \param[in] cib_root Root of XML tree to write * \param[in,out] path Full path to file to write * * \return 0 on success, -1 on failure */ static int cib_file_write_live(xmlNode *cib_root, char *path) { uid_t uid = geteuid(); struct passwd *daemon_pwent; char *sep = strrchr(path, '/'); const char *cib_dirname, *cib_filename; int rc = 0; /* Get the desired uid/gid */ errno = 0; daemon_pwent = getpwnam(CRM_DAEMON_USER); if (daemon_pwent == NULL) { crm_perror(LOG_ERR, "Could not find %s user", CRM_DAEMON_USER); return -1; } /* If we're root, we can change the ownership; * if we're daemon, anything we create will be OK; * otherwise, block access so we don't create wrong owner */ if ((uid != 0) && (uid != daemon_pwent->pw_uid)) { crm_perror(LOG_ERR, "Must be root or %s to modify live CIB", CRM_DAEMON_USER); return 0; } /* fancy footwork to separate dirname from filename * (we know the canonical name maps to the live CIB, * but the given name might be relative, or symlinked) */ if (sep == NULL) { /* no directory component specified */ cib_dirname = "./"; cib_filename = path; } else if (sep == path) { /* given name is in / */ cib_dirname = "/"; cib_filename = path + 1; } else { /* typical case; split given name into parts */ *sep = '\0'; cib_dirname = path; cib_filename = sep + 1; } /* if we're root, we want to update the file ownership */ if (uid == 0) { cib_file_owner = daemon_pwent->pw_uid; cib_file_group = daemon_pwent->pw_gid; cib_do_chown = TRUE; } /* write the file */ if (cib_file_write_with_digest(cib_root, cib_dirname, cib_filename) != pcmk_ok) { rc = -1; } /* turn off file ownership changes, for other callers */ if (uid == 0) { cib_do_chown = FALSE; } /* undo fancy stuff */ if ((sep != NULL) && (*sep == '\0')) { *sep = '/'; } return rc; } /*! * \internal * \brief Sign-off method for CIB file variants * * This will write the file to disk if needed, and free the in-memory CIB. If * the file is the live CIB, it will compute and write a signature as well. * * \param[in,out] cib CIB object to sign off * * \return pcmk_ok on success, pcmk_err_generic on failure * \todo This method should refuse to write the live CIB if the CIB manager is * running. */ static int cib_file_signoff(cib_t *cib) { int rc = pcmk_ok; cib_file_opaque_t *private = cib->variant_opaque; crm_debug("Disconnecting from the CIB manager"); cib->state = cib_disconnected; cib->type = cib_no_connection; unregister_client(cib); cib->cmds->end_transaction(cib, false, cib_none); /* If the in-memory CIB has been changed, write it to disk */ if (pcmk_is_set(private->flags, cib_file_flag_dirty)) { /* If this is the live CIB, write it out with a digest */ if (pcmk_is_set(private->flags, cib_file_flag_live)) { if (cib_file_write_live(private->cib_xml, private->filename) < 0) { rc = pcmk_err_generic; } /* Otherwise, it's a simple write */ } else { bool compress = pcmk__ends_with_ext(private->filename, ".bz2"); if (pcmk__xml_write_file(private->cib_xml, private->filename, compress) != pcmk_rc_ok) { rc = pcmk_err_generic; } } if (rc == pcmk_ok) { crm_info("Wrote CIB to %s", private->filename); cib_clear_file_flags(private, cib_file_flag_dirty); } else { crm_err("Could not write CIB to %s", private->filename); } } /* Free the in-memory CIB */ pcmk__xml_free(private->cib_xml); private->cib_xml = NULL; return rc; } static int cib_file_free(cib_t *cib) { int rc = pcmk_ok; if (cib->state != cib_disconnected) { rc = cib_file_signoff(cib); } if (rc == pcmk_ok) { cib_file_opaque_t *private = cib->variant_opaque; free(private->id); free(private->filename); free(private); free(cib->cmds); free(cib->user); free(cib); } else { fprintf(stderr, "Couldn't sign off: %d\n", rc); } return rc; } static int cib_file_register_notification(cib_t *cib, const char *callback, int enabled) { return -EPROTONOSUPPORT; } static int cib_file_set_connection_dnotify(cib_t *cib, void (*dnotify) (gpointer user_data)) { return -EPROTONOSUPPORT; } /*! * \internal * \brief Get the given CIB connection's unique client identifier * * \param[in] cib CIB connection * \param[out] async_id If not \p NULL, where to store asynchronous client ID * \param[out] sync_id If not \p NULL, where to store synchronous client ID * * \return Legacy Pacemaker return code * * \note This is the \p cib_file variant implementation of * \p cib_api_operations_t:client_id(). */ static int cib_file_client_id(const cib_t *cib, const char **async_id, const char **sync_id) { cib_file_opaque_t *private = cib->variant_opaque; if (async_id != NULL) { *async_id = private->id; } if (sync_id != NULL) { *sync_id = private->id; } return pcmk_ok; } cib_t * cib_file_new(const char *cib_location) { cib_file_opaque_t *private = NULL; cib_t *cib = cib_new_variant(); if (cib == NULL) { return NULL; } private = calloc(1, sizeof(cib_file_opaque_t)); if (private == NULL) { free(cib); return NULL; } private->id = crm_generate_uuid(); cib->variant = cib_file; cib->variant_opaque = private; if (cib_location == NULL) { cib_location = getenv("CIB_file"); CRM_CHECK(cib_location != NULL, return NULL); // Shouldn't be possible } private->flags = 0; if (cib_file_is_live(cib_location)) { cib_set_file_flags(private, cib_file_flag_live); crm_trace("File %s detected as live CIB", cib_location); } private->filename = strdup(cib_location); /* assign variant specific ops */ cib->delegate_fn = cib_file_perform_op_delegate; cib->cmds->signon = cib_file_signon; cib->cmds->signoff = cib_file_signoff; cib->cmds->free = cib_file_free; cib->cmds->register_notification = cib_file_register_notification; cib->cmds->set_connection_dnotify = cib_file_set_connection_dnotify; cib->cmds->client_id = cib_file_client_id; return cib; } /*! * \internal * \brief Compare the calculated digest of an XML tree against a signature file * * \param[in] root Root of XML tree to compare * \param[in] sigfile Name of signature file containing digest to compare * * \return TRUE if digests match or signature file does not exist, else FALSE */ static gboolean cib_file_verify_digest(xmlNode *root, const char *sigfile) { gboolean passed = FALSE; char *expected; int rc = pcmk__file_contents(sigfile, &expected); switch (rc) { case pcmk_rc_ok: if (expected == NULL) { crm_err("On-disk digest at %s is empty", sigfile); return FALSE; } break; case ENOENT: crm_warn("No on-disk digest present at %s", sigfile); return TRUE; default: crm_err("Could not read on-disk digest from %s: %s", sigfile, pcmk_rc_str(rc)); return FALSE; } passed = pcmk__verify_digest(root, expected); free(expected); return passed; } /*! * \internal * \brief Read an XML tree from a file and verify its digest * * \param[in] filename Name of XML file to read * \param[in] sigfile Name of signature file containing digest to compare * \param[out] root If non-NULL, will be set to pointer to parsed XML tree * * \return 0 if file was successfully read, parsed and verified, otherwise: * -errno on stat() failure, * -pcmk_err_cib_corrupt if file size is 0 or XML is not parseable, or * -pcmk_err_cib_modified if digests do not match * \note If root is non-NULL, it is the caller's responsibility to free *root on * successful return. */ int cib_file_read_and_verify(const char *filename, const char *sigfile, xmlNode **root) { int s_res; struct stat buf; char *local_sigfile = NULL; xmlNode *local_root = NULL; CRM_ASSERT(filename != NULL); if (root) { *root = NULL; } /* Verify that file exists and its size is nonzero */ s_res = stat(filename, &buf); if (s_res < 0) { crm_perror(LOG_WARNING, "Could not verify cluster configuration file %s", filename); return -errno; } else if (buf.st_size == 0) { crm_warn("Cluster configuration file %s is corrupt (size is zero)", filename); return -pcmk_err_cib_corrupt; } /* Parse XML */ local_root = pcmk__xml_read(filename); if (local_root == NULL) { crm_warn("Cluster configuration file %s is corrupt (unparseable as XML)", filename); return -pcmk_err_cib_corrupt; } /* If sigfile is not specified, use original file name plus .sig */ if (sigfile == NULL) { sigfile = local_sigfile = crm_strdup_printf("%s.sig", filename); } /* Verify that digests match */ if (cib_file_verify_digest(local_root, sigfile) == FALSE) { free(local_sigfile); pcmk__xml_free(local_root); return -pcmk_err_cib_modified; } free(local_sigfile); if (root) { *root = local_root; } else { pcmk__xml_free(local_root); } return pcmk_ok; } /*! * \internal * \brief Back up a CIB * * \param[in] cib_dirname Directory containing CIB file and backups * \param[in] cib_filename Name (relative to cib_dirname) of CIB file to back up * * \return 0 on success, -1 on error */ static int cib_file_backup(const char *cib_dirname, const char *cib_filename) { int rc = 0; unsigned int seq; char *cib_path = crm_strdup_printf("%s/%s", cib_dirname, cib_filename); char *cib_digest = crm_strdup_printf("%s.sig", cib_path); char *backup_path; char *backup_digest; // Determine backup and digest file names if (pcmk__read_series_sequence(cib_dirname, CIB_SERIES, &seq) != pcmk_rc_ok) { // @TODO maybe handle errors better ... seq = 0; } backup_path = pcmk__series_filename(cib_dirname, CIB_SERIES, seq, CIB_SERIES_BZIP); backup_digest = crm_strdup_printf("%s.sig", backup_path); /* Remove the old backups if they exist */ unlink(backup_path); unlink(backup_digest); /* Back up the CIB, by hard-linking it to the backup name */ if ((link(cib_path, backup_path) < 0) && (errno != ENOENT)) { crm_perror(LOG_ERR, "Could not archive %s by linking to %s", cib_path, backup_path); rc = -1; /* Back up the CIB signature similarly */ } else if ((link(cib_digest, backup_digest) < 0) && (errno != ENOENT)) { crm_perror(LOG_ERR, "Could not archive %s by linking to %s", cib_digest, backup_digest); rc = -1; /* Update the last counter and ensure everything is sync'd to media */ } else { pcmk__write_series_sequence(cib_dirname, CIB_SERIES, ++seq, CIB_SERIES_MAX); if (cib_do_chown) { int rc2; if ((chown(backup_path, cib_file_owner, cib_file_group) < 0) && (errno != ENOENT)) { crm_perror(LOG_ERR, "Could not set owner of %s", backup_path); rc = -1; } if ((chown(backup_digest, cib_file_owner, cib_file_group) < 0) && (errno != ENOENT)) { crm_perror(LOG_ERR, "Could not set owner of %s", backup_digest); rc = -1; } rc2 = pcmk__chown_series_sequence(cib_dirname, CIB_SERIES, cib_file_owner, cib_file_group); if (rc2 != pcmk_rc_ok) { crm_err("Could not set owner of sequence file in %s: %s", cib_dirname, pcmk_rc_str(rc2)); rc = -1; } } pcmk__sync_directory(cib_dirname); crm_info("Archived previous version as %s", backup_path); } free(cib_path); free(cib_digest); free(backup_path); free(backup_digest); return rc; } /*! * \internal * \brief Prepare CIB XML to be written to disk * * Set \c PCMK_XA_NUM_UPDATES to 0, set \c PCMK_XA_CIB_LAST_WRITTEN to the * current timestamp, and strip out the status section. * * \param[in,out] root Root of CIB XML tree * * \return void */ static void cib_file_prepare_xml(xmlNode *root) { xmlNode *cib_status_root = NULL; /* Always write out with num_updates=0 and current last-written timestamp */ crm_xml_add(root, PCMK_XA_NUM_UPDATES, "0"); pcmk__xe_add_last_written(root); /* Delete status section before writing to file, because * we discard it on startup anyway, and users get confused by it */ cib_status_root = pcmk__xe_first_child(root, PCMK_XE_STATUS, NULL, NULL); CRM_CHECK(cib_status_root != NULL, return); pcmk__xml_free(cib_status_root); } /*! * \internal * \brief Write CIB to disk, along with a signature file containing its digest * * \param[in,out] cib_root Root of XML tree to write * \param[in] cib_dirname Directory containing CIB and signature files * \param[in] cib_filename Name (relative to cib_dirname) of file to write * * \return pcmk_ok on success, * pcmk_err_cib_modified if existing cib_filename doesn't match digest, * pcmk_err_cib_backup if existing cib_filename couldn't be backed up, * or pcmk_err_cib_save if new cib_filename couldn't be saved */ int cib_file_write_with_digest(xmlNode *cib_root, const char *cib_dirname, const char *cib_filename) { int exit_rc = pcmk_ok; int rc, fd; char *digest = NULL; /* Detect CIB version for diagnostic purposes */ const char *epoch = crm_element_value(cib_root, PCMK_XA_EPOCH); const char *admin_epoch = crm_element_value(cib_root, PCMK_XA_ADMIN_EPOCH); /* Determine full CIB and signature pathnames */ char *cib_path = crm_strdup_printf("%s/%s", cib_dirname, cib_filename); char *digest_path = crm_strdup_printf("%s.sig", cib_path); /* Create temporary file name patterns for writing out CIB and signature */ char *tmp_cib = crm_strdup_printf("%s/cib.XXXXXX", cib_dirname); char *tmp_digest = crm_strdup_printf("%s/cib.XXXXXX", cib_dirname); /* Ensure the admin didn't modify the existing CIB underneath us */ crm_trace("Reading cluster configuration file %s", cib_path); rc = cib_file_read_and_verify(cib_path, NULL, NULL); if ((rc != pcmk_ok) && (rc != -ENOENT)) { crm_err("%s was manually modified while the cluster was active!", cib_path); exit_rc = pcmk_err_cib_modified; goto cleanup; } /* Back up the existing CIB */ if (cib_file_backup(cib_dirname, cib_filename) < 0) { exit_rc = pcmk_err_cib_backup; goto cleanup; } crm_debug("Writing CIB to disk"); umask(S_IWGRP | S_IWOTH | S_IROTH); cib_file_prepare_xml(cib_root); /* Write the CIB to a temporary file, so we can deploy (near) atomically */ fd = mkstemp(tmp_cib); if (fd < 0) { crm_perror(LOG_ERR, "Couldn't open temporary file %s for writing CIB", tmp_cib); exit_rc = pcmk_err_cib_save; goto cleanup; } /* Protect the temporary file */ if (fchmod(fd, S_IRUSR | S_IWUSR) < 0) { crm_perror(LOG_ERR, "Couldn't protect temporary file %s for writing CIB", tmp_cib); exit_rc = pcmk_err_cib_save; goto cleanup; } if (cib_do_chown && (fchown(fd, cib_file_owner, cib_file_group) < 0)) { crm_perror(LOG_ERR, "Couldn't protect temporary file %s for writing CIB", tmp_cib); exit_rc = pcmk_err_cib_save; goto cleanup; } /* Write out the CIB */ if (pcmk__xml_write_fd(cib_root, tmp_cib, fd) != pcmk_rc_ok) { crm_err("Changes couldn't be written to %s", tmp_cib); exit_rc = pcmk_err_cib_save; goto cleanup; } /* Calculate CIB digest */ digest = pcmk__digest_on_disk_cib(cib_root); CRM_ASSERT(digest != NULL); crm_info("Wrote version %s.%s.0 of the CIB to disk (digest: %s)", (admin_epoch ? admin_epoch : "0"), (epoch ? epoch : "0"), digest); /* Write the CIB digest to a temporary file */ fd = mkstemp(tmp_digest); if (fd < 0) { crm_perror(LOG_ERR, "Could not create temporary file for CIB digest"); exit_rc = pcmk_err_cib_save; goto cleanup; } if (cib_do_chown && (fchown(fd, cib_file_owner, cib_file_group) < 0)) { crm_perror(LOG_ERR, "Couldn't protect temporary file %s for writing CIB", tmp_cib); exit_rc = pcmk_err_cib_save; close(fd); goto cleanup; } rc = pcmk__write_sync(fd, digest); if (rc != pcmk_rc_ok) { crm_err("Could not write digest to %s: %s", tmp_digest, pcmk_rc_str(rc)); exit_rc = pcmk_err_cib_save; close(fd); goto cleanup; } close(fd); crm_debug("Wrote digest %s to disk", digest); /* Verify that what we wrote is sane */ crm_info("Reading cluster configuration file %s (digest: %s)", tmp_cib, tmp_digest); rc = cib_file_read_and_verify(tmp_cib, tmp_digest, NULL); CRM_ASSERT(rc == 0); /* Rename temporary files to live, and sync directory changes to media */ crm_debug("Activating %s", tmp_cib); if (rename(tmp_cib, cib_path) < 0) { crm_perror(LOG_ERR, "Couldn't rename %s as %s", tmp_cib, cib_path); exit_rc = pcmk_err_cib_save; } if (rename(tmp_digest, digest_path) < 0) { crm_perror(LOG_ERR, "Couldn't rename %s as %s", tmp_digest, digest_path); exit_rc = pcmk_err_cib_save; } pcmk__sync_directory(cib_dirname); cleanup: free(cib_path); free(digest_path); free(digest); free(tmp_digest); free(tmp_cib); return exit_rc; } /*! * \internal * \brief Process requests in a CIB transaction * * Stop when a request fails or when all requests have been processed. * * \param[in,out] cib CIB client * \param[in,out] transaction CIB transaction * * \return Standard Pacemaker return code */ static int cib_file_process_transaction_requests(cib_t *cib, xmlNode *transaction) { cib_file_opaque_t *private = cib->variant_opaque; for (xmlNode *request = pcmk__xe_first_child(transaction, PCMK__XE_CIB_COMMAND, NULL, NULL); request != NULL; request = pcmk__xe_next_same(request)) { xmlNode *output = NULL; const char *op = crm_element_value(request, PCMK__XA_CIB_OP); int rc = cib_file_process_request(cib, request, &output); rc = pcmk_legacy2rc(rc); if (rc != pcmk_rc_ok) { crm_err("Aborting transaction for CIB file client (%s) on file " "'%s' due to failed %s request: %s", private->id, private->filename, op, pcmk_rc_str(rc)); crm_log_xml_info(request, "Failed request"); return rc; } crm_trace("Applied %s request to transaction working CIB for CIB file " "client (%s) on file '%s'", op, private->id, private->filename); crm_log_xml_trace(request, "Successful request"); } return pcmk_rc_ok; } /*! * \internal * \brief Commit a given CIB file client's transaction to a working CIB copy * * \param[in,out] cib CIB file client * \param[in] transaction CIB transaction * \param[in,out] result_cib Where to store result CIB * * \return Standard Pacemaker return code * * \note The caller is responsible for replacing the \p cib argument's * \p private->cib_xml with \p result_cib on success, and for freeing * \p result_cib using \p pcmk__xml_free() on failure. */ static int cib_file_commit_transaction(cib_t *cib, xmlNode *transaction, xmlNode **result_cib) { int rc = pcmk_rc_ok; cib_file_opaque_t *private = cib->variant_opaque; xmlNode *saved_cib = private->cib_xml; CRM_CHECK(pcmk__xe_is(transaction, PCMK__XE_CIB_TRANSACTION), return pcmk_rc_no_transaction); /* *result_cib should be a copy of private->cib_xml (created by * cib_perform_op()). If not, make a copy now. Change tracking isn't * strictly required here because: * * Each request in the transaction will have changes tracked and ACLs * checked if appropriate. * * cib_perform_op() will infer changes for the commit request at the end. */ CRM_CHECK((*result_cib != NULL) && (*result_cib != private->cib_xml), *result_cib = pcmk__xml_copy(NULL, private->cib_xml)); crm_trace("Committing transaction for CIB file client (%s) on file '%s' to " "working CIB", private->id, private->filename); // Apply all changes to a working copy of the CIB private->cib_xml = *result_cib; rc = cib_file_process_transaction_requests(cib, transaction); crm_trace("Transaction commit %s for CIB file client (%s) on file '%s'", ((rc == pcmk_rc_ok)? "succeeded" : "failed"), private->id, private->filename); /* Some request types (for example, erase) may have freed private->cib_xml * (the working copy) and pointed it at a new XML object. In that case, it * follows that *result_cib (the working copy) was freed. * * Point *result_cib at the updated working copy stored in private->cib_xml. */ *result_cib = private->cib_xml; // Point private->cib_xml back to the unchanged original copy private->cib_xml = saved_cib; return rc; } static int cib_file_process_commit_transaction(const char *op, int options, const char *section, xmlNode *req, xmlNode *input, xmlNode *existing_cib, xmlNode **result_cib, xmlNode **answer) { int rc = pcmk_rc_ok; const char *client_id = crm_element_value(req, PCMK__XA_CIB_CLIENTID); cib_t *cib = NULL; CRM_CHECK(client_id != NULL, return -EINVAL); cib = get_client(client_id); CRM_CHECK(cib != NULL, return -EINVAL); rc = cib_file_commit_transaction(cib, input, result_cib); if (rc != pcmk_rc_ok) { cib_file_opaque_t *private = cib->variant_opaque; crm_err("Could not commit transaction for CIB file client (%s) on " "file '%s': %s", private->id, private->filename, pcmk_rc_str(rc)); } return pcmk_rc2legacy(rc); } diff --git a/lib/cluster/election.c b/lib/cluster/election.c index 3ccbea75c3..ce4d822b7b 100644 --- a/lib/cluster/election.c +++ b/lib/cluster/election.c @@ -1,732 +1,738 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #define STORM_INTERVAL 2 /* in seconds */ struct election_s { enum pcmk_ipc_server server; // For message type enum election_result state; guint count; // How many times local node has voted char *name; // Descriptive name for this election char *uname; // Local node's name GSourceFunc cb; // Function to call if election is won GHashTable *voted; // Key = node name, value = how node voted mainloop_timer_t *timeout; // When to abort if all votes not received int election_wins; // Track wins, for storm detection bool wrote_blackbox; // Write a storm blackbox at most once time_t expires; // When storm detection period ends time_t last_election_loss; // When dampening period ends }; static void election_complete(election_t *e) { e->state = election_won; if (e->cb != NULL) { e->cb(e); } election_reset(e); } static gboolean election_timer_cb(gpointer user_data) { election_t *e = user_data; crm_info("%s timed out, declaring local node as winner", e->name); election_complete(e); return FALSE; } /*! * \brief Get current state of an election * * \param[in] e Election object * * \return Current state of \e */ enum election_result election_state(const election_t *e) { return (e == NULL)? election_error : e->state; } /*! * \brief Create a new election object * * Every node that wishes to participate in an election must create an election * object. Typically, this should be done once, at start-up. A caller should * only create a single election object. * * \param[in] server Server to use for message type in election messages * \param[in] name Label for election (for logging) * \param[in] uname Local node's name * \param[in] period_ms How long to wait for all peers to vote * \param[in] cb Function to call if local node wins election * * \return Newly allocated election object on success, NULL on error * \note The caller is responsible for freeing the returned value using * election_fini(). */ election_t * election_init(enum pcmk_ipc_server server, const char *name, const char *uname, guint period_ms, GSourceFunc cb) { election_t *e = NULL; static guint count = 0; CRM_CHECK(uname != NULL, return NULL); e = calloc(1, sizeof(election_t)); if (e == NULL) { crm_perror(LOG_CRIT, "Cannot create election"); return NULL; } e->server = server; e->uname = strdup(uname); if (e->uname == NULL) { crm_perror(LOG_CRIT, "Cannot create election"); free(e); return NULL; } e->name = name? crm_strdup_printf("election-%s", name) : crm_strdup_printf("election-%u", count++); e->cb = cb; e->timeout = mainloop_timer_add(e->name, period_ms, FALSE, election_timer_cb, e); crm_trace("Created %s", e->name); return e; } /*! * \brief Disregard any previous vote by specified peer * * This discards any recorded vote from a specified peer. Election users should * call this whenever a voting peer becomes inactive. * * \param[in,out] e Election object * \param[in] uname Name of peer to disregard */ void election_remove(election_t *e, const char *uname) { if ((e != NULL) && (uname != NULL) && (e->voted != NULL)) { crm_trace("Discarding %s (no-)vote from lost peer %s", e->name, uname); g_hash_table_remove(e->voted, uname); } } /*! * \brief Stop election timer and disregard all votes * * \param[in,out] e Election object */ void election_reset(election_t *e) { if (e != NULL) { crm_trace("Resetting election %s", e->name); mainloop_timer_stop(e->timeout); if (e->voted) { crm_trace("Destroying voted cache with %d members", g_hash_table_size(e->voted)); g_hash_table_destroy(e->voted); e->voted = NULL; } } } /*! * \brief Free an election object * * Free all memory associated with an election object, stopping its * election timer (if running). * * \param[in,out] e Election object */ void election_fini(election_t *e) { if (e != NULL) { election_reset(e); crm_trace("Destroying %s", e->name); mainloop_timer_del(e->timeout); free(e->uname); free(e->name); free(e); } } static void election_timeout_start(election_t *e) { if (e != NULL) { mainloop_timer_start(e->timeout); } } /*! * \brief Stop an election's timer, if running * * \param[in,out] e Election object */ void election_timeout_stop(election_t *e) { if (e != NULL) { mainloop_timer_stop(e->timeout); } } /*! * \brief Change an election's timeout (restarting timer if running) * * \param[in,out] e Election object * \param[in] period New timeout */ void election_timeout_set_period(election_t *e, guint period) { if (e != NULL) { mainloop_timer_set_period(e->timeout, period); } else { crm_err("No election defined"); } } static int get_uptime(struct timeval *output) { static time_t expires = 0; static struct rusage info; time_t tm_now = time(NULL); if (expires < tm_now) { int rc = 0; info.ru_utime.tv_sec = 0; info.ru_utime.tv_usec = 0; rc = getrusage(RUSAGE_SELF, &info); output->tv_sec = 0; output->tv_usec = 0; if (rc < 0) { crm_perror(LOG_ERR, "Could not calculate the current uptime"); expires = 0; return -1; } crm_debug("Current CPU usage is: %lds, %ldus", (long)info.ru_utime.tv_sec, (long)info.ru_utime.tv_usec); } expires = tm_now + STORM_INTERVAL; /* N seconds after the last _access_ */ output->tv_sec = info.ru_utime.tv_sec; output->tv_usec = info.ru_utime.tv_usec; return 1; } static int compare_age(struct timeval your_age) { struct timeval our_age; get_uptime(&our_age); /* If an error occurred, our_age will be compared as {0,0} */ if (our_age.tv_sec > your_age.tv_sec) { crm_debug("Win: %ld vs %ld (seconds)", (long)our_age.tv_sec, (long)your_age.tv_sec); return 1; } else if (our_age.tv_sec < your_age.tv_sec) { crm_debug("Lose: %ld vs %ld (seconds)", (long)our_age.tv_sec, (long)your_age.tv_sec); return -1; } else if (our_age.tv_usec > your_age.tv_usec) { crm_debug("Win: %ld.%06ld vs %ld.%06ld (usec)", (long)our_age.tv_sec, (long)our_age.tv_usec, (long)your_age.tv_sec, (long)your_age.tv_usec); return 1; } else if (our_age.tv_usec < your_age.tv_usec) { crm_debug("Lose: %ld.%06ld vs %ld.%06ld (usec)", (long)our_age.tv_sec, (long)our_age.tv_usec, (long)your_age.tv_sec, (long)your_age.tv_usec); return -1; } return 0; } /*! * \brief Start a new election by offering local node's candidacy * * Broadcast a "vote" election message containing the local node's ID, * (incremented) election counter, and uptime, and start the election timer. * * \param[in,out] e Election object * * \note Any nodes agreeing to the candidacy will send a "no-vote" reply, and if * all active peers do so, or if the election times out, the local node * wins the election. (If we lose to any peer vote, we will stop the * timer, so a timeout means we did not lose -- either some peer did not * vote, or we did not call election_check() in time.) */ void election_vote(election_t *e) { struct timeval age; xmlNode *vote = NULL; pcmk__node_status_t *our_node = NULL; + const char *message_type = NULL; if (e == NULL) { crm_trace("Election vote requested, but no election available"); return; } our_node = pcmk__get_node(0, e->uname, NULL, pcmk__node_search_cluster_member); if (!pcmk__cluster_is_node_active(our_node)) { crm_trace("Cannot vote in %s yet: local node not connected to cluster", e->name); return; } election_reset(e); e->state = election_in_progress; - vote = pcmk__new_request(e->server, CRM_SYSTEM_CRMD, NULL, - CRM_SYSTEM_CRMD, CRM_OP_VOTE, NULL); + message_type = pcmk__server_message_type(e->server); + + /* @COMPAT We use message_type as the sender and recipient system for + * backward compatibility (see T566). + */ + vote = pcmk__new_request(e->server, message_type, NULL, + message_type, CRM_OP_VOTE, NULL); e->count++; crm_xml_add(vote, PCMK__XA_ELECTION_OWNER, our_node->xml_id); crm_xml_add_int(vote, PCMK__XA_ELECTION_ID, e->count); // Warning: PCMK__XA_ELECTION_AGE_NANO_SEC value is actually microseconds get_uptime(&age); crm_xml_add_timeval(vote, PCMK__XA_ELECTION_AGE_SEC, PCMK__XA_ELECTION_AGE_NANO_SEC, &age); - pcmk__cluster_send_message(NULL, pcmk_ipc_controld, vote); + pcmk__cluster_send_message(NULL, e->server, vote); pcmk__xml_free(vote); crm_debug("Started %s round %d", e->name, e->count); election_timeout_start(e); return; } /*! * \brief Check whether local node has won an election * * If all known peers have sent no-vote messages, stop the election timer, set * the election state to won, and call any registered win callback. * * \param[in,out] e Election object * * \return TRUE if local node has won, FALSE otherwise * \note If all known peers have sent no-vote messages, but the election owner * does not call this function, the election will not be won (and the * callback will not be called) until the election times out. * \note This should be called when election_count_vote() returns * \c election_in_progress. */ bool election_check(election_t *e) { int voted_size = 0; int num_members = 0; if (e == NULL) { crm_trace("Election check requested, but no election available"); return FALSE; } if (e->voted == NULL) { crm_trace("%s check requested, but no votes received yet", e->name); return FALSE; } voted_size = g_hash_table_size(e->voted); num_members = pcmk__cluster_num_active_nodes(); /* in the case of #voted > #members, it is better to * wait for the timeout and give the cluster time to * stabilize */ if (voted_size >= num_members) { /* we won and everyone has voted */ election_timeout_stop(e); if (voted_size > num_members) { GHashTableIter gIter; const pcmk__node_status_t *node = NULL; char *key = NULL; crm_warn("Received too many votes in %s", e->name); g_hash_table_iter_init(&gIter, pcmk__peer_cache); while (g_hash_table_iter_next(&gIter, NULL, (gpointer *) & node)) { if (pcmk__cluster_is_node_active(node)) { crm_warn("* expected vote: %s", node->name); } } g_hash_table_iter_init(&gIter, e->voted); while (g_hash_table_iter_next(&gIter, (gpointer *) & key, NULL)) { crm_warn("* actual vote: %s", key); } } crm_info("%s won by local node", e->name); election_complete(e); return TRUE; } else { crm_debug("%s still waiting on %d of %d votes", e->name, num_members - voted_size, num_members); } return FALSE; } #define LOSS_DAMPEN 2 /* in seconds */ struct vote { const char *op; const char *from; const char *version; const char *election_owner; int election_id; struct timeval age; }; /*! * \brief Unpack an election message * * \param[in] e Election object (for logging only) * \param[in] message Election message XML * \param[out] vote Parsed fields from message * * \return TRUE if election message and election are valid, FALSE otherwise * \note The parsed struct's pointer members are valid only for the lifetime of * the message argument. */ static bool parse_election_message(const election_t *e, const xmlNode *message, struct vote *vote) { CRM_CHECK(message && vote, return FALSE); vote->election_id = -1; vote->age.tv_sec = -1; vote->age.tv_usec = -1; vote->op = crm_element_value(message, PCMK__XA_CRM_TASK); vote->from = crm_element_value(message, PCMK__XA_SRC); vote->version = crm_element_value(message, PCMK_XA_VERSION); vote->election_owner = crm_element_value(message, PCMK__XA_ELECTION_OWNER); crm_element_value_int(message, PCMK__XA_ELECTION_ID, &(vote->election_id)); if ((vote->op == NULL) || (vote->from == NULL) || (vote->version == NULL) || (vote->election_owner == NULL) || (vote->election_id < 0)) { crm_warn("Invalid %s message from %s in %s ", (vote->op? vote->op : "election"), (vote->from? vote->from : "unspecified node"), (e? e->name : "election")); return FALSE; } // Op-specific validation if (pcmk__str_eq(vote->op, CRM_OP_VOTE, pcmk__str_none)) { /* Only vote ops have uptime. Warning: PCMK__XA_ELECTION_AGE_NANO_SEC value is in microseconds. */ crm_element_value_timeval(message, PCMK__XA_ELECTION_AGE_SEC, PCMK__XA_ELECTION_AGE_NANO_SEC, &(vote->age)); if ((vote->age.tv_sec < 0) || (vote->age.tv_usec < 0)) { crm_warn("Cannot count %s %s from %s because it is missing uptime", (e? e->name : "election"), vote->op, vote->from); return FALSE; } } else if (!pcmk__str_eq(vote->op, CRM_OP_NOVOTE, pcmk__str_none)) { crm_info("Cannot process %s message from %s because %s is not a known election op", (e? e->name : "election"), vote->from, vote->op); return FALSE; } // Election validation if (e == NULL) { crm_info("Cannot count %s from %s because no election available", vote->op, vote->from); return FALSE; } /* If the membership cache is NULL, we REALLY shouldn't be voting -- * the question is how we managed to get here. */ if (pcmk__peer_cache == NULL) { crm_info("Cannot count %s %s from %s because no peer information available", e->name, vote->op, vote->from); return FALSE; } return TRUE; } static void record_vote(election_t *e, struct vote *vote) { CRM_ASSERT(e && vote && vote->from && vote->op); if (e->voted == NULL) { e->voted = pcmk__strkey_table(free, free); } pcmk__insert_dup(e->voted, vote->from, vote->op); } static void send_no_vote(election_t *e, pcmk__node_status_t *peer, struct vote *vote) { - // @TODO shouldn't hardcode CRM_SYSTEM_CRMD - xmlNode *novote = pcmk__new_request(e->server, CRM_SYSTEM_CRMD, - vote->from, CRM_SYSTEM_CRMD, + const char *message_type = pcmk__server_message_type(e->server); + xmlNode *novote = pcmk__new_request(e->server, message_type, + vote->from, message_type, CRM_OP_NOVOTE, NULL); crm_xml_add(novote, PCMK__XA_ELECTION_OWNER, vote->election_owner); crm_xml_add_int(novote, PCMK__XA_ELECTION_ID, vote->election_id); - pcmk__cluster_send_message(peer, pcmk_ipc_controld, novote); + pcmk__cluster_send_message(peer, e->server, novote); pcmk__xml_free(novote); } /*! * \brief Process an election message (vote or no-vote) from a peer * * \param[in,out] e Election object * \param[in] message Election message XML from peer * \param[in] can_win Whether local node is eligible to win * * \return Election state after new vote is considered * \note If the peer message is a vote, and we prefer the peer to win, this will * send a no-vote reply to the peer. * \note The situations "we lost to this vote" from "this is a late no-vote * after we've already lost" both return election_lost. If a caller needs * to distinguish them, it should save the current state before calling * this function, and then compare the result. */ enum election_result election_count_vote(election_t *e, const xmlNode *message, bool can_win) { int log_level = LOG_INFO; gboolean done = FALSE; gboolean we_lose = FALSE; const char *reason = "unknown"; bool we_are_owner = FALSE; pcmk__node_status_t *our_node = NULL; pcmk__node_status_t *your_node = NULL; time_t tm_now = time(NULL); struct vote vote; CRM_CHECK(message != NULL, return election_error); if (parse_election_message(e, message, &vote) == FALSE) { return election_error; } your_node = pcmk__get_node(0, vote.from, NULL, pcmk__node_search_cluster_member); our_node = pcmk__get_node(0, e->uname, NULL, pcmk__node_search_cluster_member); we_are_owner = (our_node != NULL) && pcmk__str_eq(our_node->xml_id, vote.election_owner, pcmk__str_none); if (!can_win) { reason = "Not eligible"; we_lose = TRUE; } else if (!pcmk__cluster_is_node_active(our_node)) { reason = "We are not part of the cluster"; log_level = LOG_ERR; we_lose = TRUE; } else if (we_are_owner && (vote.election_id != e->count)) { log_level = LOG_TRACE; reason = "Superseded"; done = TRUE; } else if (!pcmk__cluster_is_node_active(your_node)) { /* Possibly we cached the message in the FSA queue at a point that it wasn't */ reason = "Peer is not part of our cluster"; log_level = LOG_WARNING; done = TRUE; } else if (pcmk__str_eq(vote.op, CRM_OP_NOVOTE, pcmk__str_none) || pcmk__str_eq(vote.from, e->uname, pcmk__str_none)) { /* Receiving our own broadcast vote, or a no-vote from peer, is a vote * for us to win */ if (!we_are_owner) { crm_warn("Cannot count %s round %d %s from %s because we are not election owner (%s)", e->name, vote.election_id, vote.op, vote.from, vote.election_owner); return election_error; } if (e->state != election_in_progress) { // Should only happen if we already lost crm_debug("Not counting %s round %d %s from %s because no election in progress", e->name, vote.election_id, vote.op, vote.from); return e->state; } record_vote(e, &vote); reason = "Recorded"; done = TRUE; } else { // A peer vote requires a comparison to determine which node is better int age_result = compare_age(vote.age); int version_result = compare_version(vote.version, CRM_FEATURE_SET); if (version_result < 0) { reason = "Version"; we_lose = TRUE; } else if (version_result > 0) { reason = "Version"; } else if (age_result < 0) { reason = "Uptime"; we_lose = TRUE; } else if (age_result > 0) { reason = "Uptime"; } else if (strcasecmp(e->uname, vote.from) > 0) { reason = "Host name"; we_lose = TRUE; } else { reason = "Host name"; } } if (e->expires < tm_now) { e->election_wins = 0; e->expires = tm_now + STORM_INTERVAL; } else if (done == FALSE && we_lose == FALSE) { int peers = 1 + g_hash_table_size(pcmk__peer_cache); /* If every node has to vote down every other node, thats N*(N-1) total elections * Allow some leeway before _really_ complaining */ e->election_wins++; if (e->election_wins > (peers * peers)) { crm_warn("%s election storm detected: %d wins in %d seconds", e->name, e->election_wins, STORM_INTERVAL); e->election_wins = 0; e->expires = tm_now + STORM_INTERVAL; if (e->wrote_blackbox == FALSE) { /* It's questionable whether a black box (from every node in the * cluster) would be truly helpful in diagnosing an election * storm. It's also highly doubtful a production environment * would get multiple election storms from distinct causes, so * saving one blackbox per process lifetime should be * sufficient. Alternatives would be to save a timestamp of the * last blackbox write instead of a boolean, and write a new one * if some amount of time has passed; or to save a storm count, * write a blackbox on every Nth occurrence. */ crm_write_blackbox(0, NULL); e->wrote_blackbox = TRUE; } } } if (done) { do_crm_log(log_level + 1, "Processed %s round %d %s (current round %d) from %s (%s)", e->name, vote.election_id, vote.op, e->count, vote.from, reason); return e->state; } else if (we_lose == FALSE) { /* We track the time of the last election loss to implement an election * dampening period, reducing the likelihood of an election storm. If * this node has lost within the dampening period, don't start a new * election, even if we win against a peer's vote -- the peer we lost to * should win again. * * @TODO This has a problem case: if an election winner immediately * leaves the cluster, and a new election is immediately called, all * nodes could lose, with no new winner elected. The ideal solution * would be to tie the election structure with the peer caches, which * would allow us to clear the dampening when the previous winner * leaves (and would allow other improvements as well). */ if ((e->last_election_loss == 0) || ((tm_now - e->last_election_loss) > (time_t) LOSS_DAMPEN)) { do_crm_log(log_level, "%s round %d (owner node ID %s) pass: %s from %s (%s)", e->name, vote.election_id, vote.election_owner, vote.op, vote.from, reason); e->last_election_loss = 0; election_timeout_stop(e); /* Start a new election by voting down this, and other, peers */ e->state = election_start; return e->state; } else { char *loss_time = ctime(&e->last_election_loss); if (loss_time) { // Show only HH:MM:SS loss_time += 11; loss_time[8] = '\0'; } crm_info("Ignoring %s round %d (owner node ID %s) pass vs %s because we lost less than %ds ago at %s", e->name, vote.election_id, vote.election_owner, vote.from, LOSS_DAMPEN, (loss_time? loss_time : "unknown")); } } e->last_election_loss = tm_now; do_crm_log(log_level, "%s round %d (owner node ID %s) lost: %s from %s (%s)", e->name, vote.election_id, vote.election_owner, vote.op, vote.from, reason); election_reset(e); send_no_vote(e, your_node, &vote); e->state = election_lost; return e->state; } /*! * \brief Reset any election dampening currently in effect * * \param[in,out] e Election object to clear */ void election_clear_dampening(election_t *e) { e->last_election_loss = 0; } diff --git a/lib/common/ipc_attrd.c b/lib/common/ipc_attrd.c index 6e91819c19..4a3808b873 100644 --- a/lib/common/ipc_attrd.c +++ b/lib/common/ipc_attrd.c @@ -1,481 +1,481 @@ /* * Copyright 2011-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include "crmcommon_private.h" static void set_pairs_data(pcmk__attrd_api_reply_t *data, xmlNode *msg_data) { const char *name = NULL; pcmk__attrd_query_pair_t *pair; name = crm_element_value(msg_data, PCMK__XA_ATTR_NAME); for (xmlNode *node = pcmk__xe_first_child(msg_data, PCMK_XE_NODE, NULL, NULL); node != NULL; node = pcmk__xe_next_same(node)) { pair = pcmk__assert_alloc(1, sizeof(pcmk__attrd_query_pair_t)); pair->node = crm_element_value(node, PCMK__XA_ATTR_HOST); pair->name = name; pair->value = crm_element_value(node, PCMK__XA_ATTR_VALUE); data->data.pairs = g_list_prepend(data->data.pairs, pair); } } static bool reply_expected(pcmk_ipc_api_t *api, const xmlNode *request) { const char *command = crm_element_value(request, PCMK_XA_TASK); return pcmk__str_any_of(command, PCMK__ATTRD_CMD_CLEAR_FAILURE, PCMK__ATTRD_CMD_QUERY, PCMK__ATTRD_CMD_REFRESH, PCMK__ATTRD_CMD_UPDATE, PCMK__ATTRD_CMD_UPDATE_BOTH, PCMK__ATTRD_CMD_UPDATE_DELAY, NULL); } static bool dispatch(pcmk_ipc_api_t *api, xmlNode *reply) { const char *value = NULL; crm_exit_t status = CRM_EX_OK; pcmk__attrd_api_reply_t reply_data = { pcmk__attrd_reply_unknown }; if (pcmk__xe_is(reply, PCMK__XE_ACK)) { return false; } /* Do some basic validation of the reply */ value = crm_element_value(reply, PCMK__XA_T); if (pcmk__str_empty(value) || !pcmk__str_eq(value, PCMK__VALUE_ATTRD, pcmk__str_none)) { crm_info("Unrecognizable message from attribute manager: " "message type '%s' not '" PCMK__VALUE_ATTRD "'", pcmk__s(value, "")); status = CRM_EX_PROTOCOL; goto done; } value = crm_element_value(reply, PCMK__XA_SUBT); /* Only the query command gets a reply for now. NULL counts as query for * backward compatibility with attribute managers <2.1.3 that didn't set it. */ if (pcmk__str_eq(value, PCMK__ATTRD_CMD_QUERY, pcmk__str_null_matches)) { if (!xmlHasProp(reply, (pcmkXmlStr) PCMK__XA_ATTR_NAME)) { status = ENXIO; // Most likely, the attribute doesn't exist goto done; } reply_data.reply_type = pcmk__attrd_reply_query; set_pairs_data(&reply_data, reply); } else { crm_info("Unrecognizable message from attribute manager: " "message subtype '%s' unknown", pcmk__s(value, "")); status = CRM_EX_PROTOCOL; goto done; } done: pcmk__call_ipc_callback(api, pcmk_ipc_event_reply, status, &reply_data); /* Free any reply data that was allocated */ if (reply_data.data.pairs) { g_list_free_full(reply_data.data.pairs, free); } return false; } pcmk__ipc_methods_t * pcmk__attrd_api_methods(void) { pcmk__ipc_methods_t *cmds = calloc(1, sizeof(pcmk__ipc_methods_t)); if (cmds != NULL) { cmds->new_data = NULL; cmds->free_data = NULL; cmds->post_connect = NULL; cmds->reply_expected = reply_expected; cmds->dispatch = dispatch; } return cmds; } /*! * \internal - * \brief Create a generic pacemaker-attrd operation + * \brief Create a generic attribute manager operation * * \param[in] user_name If not NULL, ACL user to set for operation * - * \return XML of pacemaker-attrd operation + * \return XML of attribute manager operation */ static xmlNode * create_attrd_op(const char *user_name) { xmlNode *attrd_op = pcmk__xe_create(NULL, __func__); crm_xml_add(attrd_op, PCMK__XA_T, PCMK__VALUE_ATTRD); crm_xml_add(attrd_op, PCMK__XA_SRC, pcmk__s(crm_system_name, "unknown")); crm_xml_add(attrd_op, PCMK__XA_ATTR_USER, user_name); return attrd_op; } static int connect_and_send_attrd_request(pcmk_ipc_api_t *api, const xmlNode *request) { int rc = pcmk_rc_ok; bool created_api = false; if (api == NULL) { rc = pcmk_new_ipc_api(&api, pcmk_ipc_attrd); if (rc != pcmk_rc_ok) { return rc; } created_api = true; } rc = pcmk__connect_ipc(api, pcmk_ipc_dispatch_sync, 5); if (rc == pcmk_rc_ok) { rc = pcmk__send_ipc_request(api, request); } if (created_api) { pcmk_free_ipc_api(api); } return rc; } int pcmk__attrd_api_clear_failures(pcmk_ipc_api_t *api, const char *node, const char *resource, const char *operation, const char *interval_spec, const char *user_name, uint32_t options) { int rc = pcmk_rc_ok; xmlNode *request = create_attrd_op(user_name); const char *interval_desc = NULL; const char *op_desc = NULL; const char *target = pcmk__node_attr_target(node); if (target != NULL) { node = target; } if (operation) { interval_desc = pcmk__s(interval_spec, "nonrecurring"); op_desc = operation; } else { interval_desc = "all"; op_desc = "operations"; } crm_debug("Asking %s to clear failure of %s %s for %s on %s", pcmk_ipc_name(api, true), interval_desc, op_desc, pcmk__s(resource, "all resources"), pcmk__s(node, "all nodes")); crm_xml_add(request, PCMK_XA_TASK, PCMK__ATTRD_CMD_CLEAR_FAILURE); pcmk__xe_add_node(request, node, 0); crm_xml_add(request, PCMK__XA_ATTR_RESOURCE, resource); crm_xml_add(request, PCMK__XA_ATTR_CLEAR_OPERATION, operation); crm_xml_add(request, PCMK__XA_ATTR_CLEAR_INTERVAL, interval_spec); crm_xml_add_int(request, PCMK__XA_ATTR_IS_REMOTE, pcmk_is_set(options, pcmk__node_attr_remote)); rc = connect_and_send_attrd_request(api, request); pcmk__xml_free(request); return rc; } int pcmk__attrd_api_delete(pcmk_ipc_api_t *api, const char *node, const char *name, uint32_t options) { const char *target = NULL; if (name == NULL) { return EINVAL; } target = pcmk__node_attr_target(node); if (target != NULL) { node = target; } /* Make sure the right update option is set. */ options &= ~pcmk__node_attr_delay; options |= pcmk__node_attr_value; return pcmk__attrd_api_update(api, node, name, NULL, NULL, NULL, NULL, options); } int pcmk__attrd_api_purge(pcmk_ipc_api_t *api, const char *node, bool reap) { int rc = pcmk_rc_ok; xmlNode *request = NULL; const char *target = pcmk__node_attr_target(node); if (target != NULL) { node = target; } crm_debug("Asking %s to purge transient attributes%s for %s", pcmk_ipc_name(api, true), (reap? " and node cache entries" : ""), pcmk__s(node, "local node")); request = create_attrd_op(NULL); crm_xml_add(request, PCMK_XA_TASK, PCMK__ATTRD_CMD_PEER_REMOVE); pcmk__xe_set_bool_attr(request, PCMK__XA_REAP, reap); pcmk__xe_add_node(request, node, 0); rc = connect_and_send_attrd_request(api, request); pcmk__xml_free(request); return rc; } int pcmk__attrd_api_query(pcmk_ipc_api_t *api, const char *node, const char *name, uint32_t options) { int rc = pcmk_rc_ok; xmlNode *request = NULL; const char *target = NULL; if (name == NULL) { return EINVAL; } if (pcmk_is_set(options, pcmk__node_attr_query_all)) { node = NULL; } else { target = pcmk__node_attr_target(node); if (target != NULL) { node = target; } } crm_debug("Querying %s for value of '%s'%s%s", pcmk_ipc_name(api, true), name, ((node == NULL)? "" : " on "), pcmk__s(node, "")); request = create_attrd_op(NULL); crm_xml_add(request, PCMK__XA_ATTR_NAME, name); crm_xml_add(request, PCMK_XA_TASK, PCMK__ATTRD_CMD_QUERY); pcmk__xe_add_node(request, node, 0); rc = connect_and_send_attrd_request(api, request); pcmk__xml_free(request); return rc; } int pcmk__attrd_api_refresh(pcmk_ipc_api_t *api, const char *node) { int rc = pcmk_rc_ok; xmlNode *request = NULL; const char *target = pcmk__node_attr_target(node); if (target != NULL) { node = target; } crm_debug("Asking %s to write all transient attributes for %s to CIB", pcmk_ipc_name(api, true), pcmk__s(node, "local node")); request = create_attrd_op(NULL); crm_xml_add(request, PCMK_XA_TASK, PCMK__ATTRD_CMD_REFRESH); pcmk__xe_add_node(request, node, 0); rc = connect_and_send_attrd_request(api, request); pcmk__xml_free(request); return rc; } static void add_op_attr(xmlNode *op, uint32_t options) { if (pcmk_all_flags_set(options, pcmk__node_attr_value | pcmk__node_attr_delay)) { crm_xml_add(op, PCMK_XA_TASK, PCMK__ATTRD_CMD_UPDATE_BOTH); } else if (pcmk_is_set(options, pcmk__node_attr_value)) { crm_xml_add(op, PCMK_XA_TASK, PCMK__ATTRD_CMD_UPDATE); } else if (pcmk_is_set(options, pcmk__node_attr_delay)) { crm_xml_add(op, PCMK_XA_TASK, PCMK__ATTRD_CMD_UPDATE_DELAY); } } static void populate_update_op(xmlNode *op, const char *node, const char *name, const char *value, const char *dampen, const char *set, uint32_t options) { if (pcmk_is_set(options, pcmk__node_attr_pattern)) { crm_xml_add(op, PCMK__XA_ATTR_REGEX, name); } else { crm_xml_add(op, PCMK__XA_ATTR_NAME, name); } if (pcmk_is_set(options, pcmk__node_attr_utilization)) { crm_xml_add(op, PCMK__XA_ATTR_SET_TYPE, PCMK_XE_UTILIZATION); } else { crm_xml_add(op, PCMK__XA_ATTR_SET_TYPE, PCMK_XE_INSTANCE_ATTRIBUTES); } add_op_attr(op, options); crm_xml_add(op, PCMK__XA_ATTR_VALUE, value); crm_xml_add(op, PCMK__XA_ATTR_DAMPENING, dampen); pcmk__xe_add_node(op, node, 0); crm_xml_add(op, PCMK__XA_ATTR_SET, set); crm_xml_add_int(op, PCMK__XA_ATTR_IS_REMOTE, pcmk_is_set(options, pcmk__node_attr_remote)); crm_xml_add_int(op, PCMK__XA_ATTR_IS_PRIVATE, pcmk_is_set(options, pcmk__node_attr_private)); if (pcmk_is_set(options, pcmk__node_attr_sync_local)) { crm_xml_add(op, PCMK__XA_ATTR_SYNC_POINT, PCMK__VALUE_LOCAL); } else if (pcmk_is_set(options, pcmk__node_attr_sync_cluster)) { crm_xml_add(op, PCMK__XA_ATTR_SYNC_POINT, PCMK__VALUE_CLUSTER); } } int pcmk__attrd_api_update(pcmk_ipc_api_t *api, const char *node, const char *name, const char *value, const char *dampen, const char *set, const char *user_name, uint32_t options) { int rc = pcmk_rc_ok; xmlNode *request = NULL; const char *target = NULL; if (name == NULL) { return EINVAL; } target = pcmk__node_attr_target(node); if (target != NULL) { node = target; } crm_debug("Asking %s to update '%s' to '%s' for %s", pcmk_ipc_name(api, true), name, pcmk__s(value, "(null)"), pcmk__s(node, "local node")); request = create_attrd_op(user_name); populate_update_op(request, node, name, value, dampen, set, options); rc = connect_and_send_attrd_request(api, request); pcmk__xml_free(request); return rc; } int pcmk__attrd_api_update_list(pcmk_ipc_api_t *api, GList *attrs, const char *dampen, const char *set, const char *user_name, uint32_t options) { int rc = pcmk_rc_ok; xmlNode *request = NULL; if (attrs == NULL) { return EINVAL; } /* There are two different ways of handling a list of attributes: * * (1) For messages originating from some command line tool, we have to send * them one at a time. In this loop, we just call pcmk__attrd_api_update * for each, letting it deal with creating the API object if it doesn't * already exist. * * The reason we can't use a single message in this case is that we can't * trust that the server supports it. Remote nodes could be involved * here, and there's no guarantee that a newer client running on a remote * node is talking to (or proxied through) a cluster node with a newer * attrd. We also can't just try sending a single message and then falling * back on multiple. There's no handshake with the attrd server to * determine its version. And then we would need to do that fallback in the * dispatch function for this to work for all connection types (mainloop in * particular), and at that point we won't know what the original message * was in order to break it apart and resend as individual messages. * * (2) For messages between daemons, we can be assured that the local attrd * will support the new message and that it can send to the other attrds * as one request or split up according to the minimum supported version. */ for (GList *iter = attrs; iter != NULL; iter = iter->next) { pcmk__attrd_query_pair_t *pair = (pcmk__attrd_query_pair_t *) iter->data; if (pcmk__is_daemon) { const char *target = NULL; xmlNode *child = NULL; /* First time through this loop - create the basic request. */ if (request == NULL) { request = create_attrd_op(user_name); add_op_attr(request, options); } /* Add a child node for this operation. We add the task to the top * level XML node so attrd_ipc_dispatch doesn't need changes. And * then we also add the task to each child node in populate_update_op * so attrd_client_update knows what form of update is taking place. */ child = pcmk__xe_create(request, PCMK_XE_OP); target = pcmk__node_attr_target(pair->node); if (target != NULL) { pair->node = target; } populate_update_op(child, pair->node, pair->name, pair->value, dampen, set, options); } else { rc = pcmk__attrd_api_update(api, pair->node, pair->name, pair->value, dampen, set, user_name, options); } } /* If we were doing multiple attributes at once, we still need to send the * request. Do that now, creating and destroying the API object if needed. */ if (pcmk__is_daemon) { rc = connect_and_send_attrd_request(api, request); pcmk__xml_free(request); } return rc; } diff --git a/lib/common/ipc_pacemakerd.c b/lib/common/ipc_pacemakerd.c index 663bed50ad..aba4299ffe 100644 --- a/lib/common/ipc_pacemakerd.c +++ b/lib/common/ipc_pacemakerd.c @@ -1,335 +1,336 @@ /* * Copyright 2020-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include "crmcommon_private.h" typedef struct pacemakerd_api_private_s { enum pcmk_pacemakerd_state state; char *client_uuid; } pacemakerd_api_private_t; static const char *pacemakerd_state_str[] = { PCMK__VALUE_INIT, PCMK__VALUE_STARTING_DAEMONS, PCMK__VALUE_WAIT_FOR_PING, PCMK__VALUE_RUNNING, PCMK__VALUE_SHUTTING_DOWN, PCMK__VALUE_SHUTDOWN_COMPLETE, PCMK_VALUE_REMOTE, }; enum pcmk_pacemakerd_state pcmk_pacemakerd_api_daemon_state_text2enum(const char *state) { int i; if (state == NULL) { return pcmk_pacemakerd_state_invalid; } for (i=pcmk_pacemakerd_state_init; i <= pcmk_pacemakerd_state_max; i++) { if (pcmk__str_eq(state, pacemakerd_state_str[i], pcmk__str_none)) { return i; } } return pcmk_pacemakerd_state_invalid; } const char * pcmk_pacemakerd_api_daemon_state_enum2text( enum pcmk_pacemakerd_state state) { if ((state >= pcmk_pacemakerd_state_init) && (state <= pcmk_pacemakerd_state_max)) { return pacemakerd_state_str[state]; } return "invalid"; } /*! * \internal * \brief Return a friendly string representation of a \p pacemakerd state * * \param[in] state \p pacemakerd state * * \return A user-friendly string representation of \p state, or * "Invalid pacemakerd state" */ const char * pcmk__pcmkd_state_enum2friendly(enum pcmk_pacemakerd_state state) { switch (state) { case pcmk_pacemakerd_state_init: return "Initializing pacemaker"; case pcmk_pacemakerd_state_starting_daemons: return "Pacemaker daemons are starting"; case pcmk_pacemakerd_state_wait_for_ping: return "Waiting for startup trigger from SBD"; case pcmk_pacemakerd_state_running: return "Pacemaker is running"; case pcmk_pacemakerd_state_shutting_down: return "Pacemaker daemons are shutting down"; case pcmk_pacemakerd_state_shutdown_complete: /* Assuming pacemakerd won't process messages while in * shutdown_complete state unless reporting to SBD */ return "Pacemaker daemons are shut down (reporting to SBD)"; case pcmk_pacemakerd_state_remote: - return "pacemaker-remoted is running (on a Pacemaker Remote node)"; + return PCMK__SERVER_REMOTED " is running " + "(on a Pacemaker Remote node)"; default: return "Invalid pacemakerd state"; } } /*! * \internal * \brief Get a string representation of a \p pacemakerd API reply type * * \param[in] reply \p pacemakerd API reply type * * \return String representation of a \p pacemakerd API reply type */ const char * pcmk__pcmkd_api_reply2str(enum pcmk_pacemakerd_api_reply reply) { switch (reply) { case pcmk_pacemakerd_reply_ping: return "ping"; case pcmk_pacemakerd_reply_shutdown: return "shutdown"; default: return "unknown"; } } // \return Standard Pacemaker return code static int new_data(pcmk_ipc_api_t *api) { struct pacemakerd_api_private_s *private = NULL; api->api_data = calloc(1, sizeof(struct pacemakerd_api_private_s)); if (api->api_data == NULL) { return errno; } private = api->api_data; private->state = pcmk_pacemakerd_state_invalid; /* other as with cib, controld, ... we are addressing pacemakerd just from the local node -> pid is unique and thus sufficient as an ID */ private->client_uuid = pcmk__getpid_s(); return pcmk_rc_ok; } static void free_data(void *data) { free(((struct pacemakerd_api_private_s *) data)->client_uuid); free(data); } // \return Standard Pacemaker return code static int post_connect(pcmk_ipc_api_t *api) { struct pacemakerd_api_private_s *private = NULL; if (api->api_data == NULL) { return EINVAL; } private = api->api_data; private->state = pcmk_pacemakerd_state_invalid; return pcmk_rc_ok; } static void post_disconnect(pcmk_ipc_api_t *api) { struct pacemakerd_api_private_s *private = NULL; if (api->api_data == NULL) { return; } private = api->api_data; private->state = pcmk_pacemakerd_state_invalid; return; } static bool reply_expected(pcmk_ipc_api_t *api, const xmlNode *request) { const char *command = crm_element_value(request, PCMK__XA_CRM_TASK); if (command == NULL) { return false; } // We only need to handle commands that functions in this file can send return pcmk__str_any_of(command, CRM_OP_PING, CRM_OP_QUIT, NULL); } static bool dispatch(pcmk_ipc_api_t *api, xmlNode *reply) { crm_exit_t status = CRM_EX_OK; xmlNode *wrapper = NULL; xmlNode *msg_data = NULL; pcmk_pacemakerd_api_reply_t reply_data = { pcmk_pacemakerd_reply_unknown }; const char *value = NULL; long long value_ll = 0; if (pcmk__xe_is(reply, PCMK__XE_ACK)) { long long int ack_status = 0; pcmk__scan_ll(crm_element_value(reply, PCMK_XA_STATUS), &ack_status, CRM_EX_OK); return ack_status == CRM_EX_INDETERMINATE; } value = crm_element_value(reply, PCMK__XA_T); if (pcmk__parse_server(value) != pcmk_ipc_pacemakerd) { /* @COMPAT pacemakerd <3.0.0 sets PCMK__VALUE_CRMD as the message type, * so we can't enforce this check until we no longer support * Pacemaker Remote nodes connecting to cluster nodes older than that. */ crm_trace("Message from %s has unexpected message type '%s' " "(bug if not from pacemakerd <3.0.0)", pcmk_ipc_name(api, true), pcmk__s(value, "")); } value = crm_element_value(reply, PCMK__XA_SUBT); if (!pcmk__str_eq(value, PCMK__VALUE_RESPONSE, pcmk__str_none)) { crm_info("Unrecognizable message from %s: " "message type '%s' not '" PCMK__VALUE_RESPONSE "'", pcmk_ipc_name(api, true), pcmk__s(value, "")); status = CRM_EX_PROTOCOL; goto done; } if (pcmk__str_empty(crm_element_value(reply, PCMK_XA_REFERENCE))) { crm_info("Unrecognizable message from %s: no reference", pcmk_ipc_name(api, true)); status = CRM_EX_PROTOCOL; goto done; } value = crm_element_value(reply, PCMK__XA_CRM_TASK); // Parse useful info from reply wrapper = pcmk__xe_first_child(reply, PCMK__XE_CRM_XML, NULL, NULL); msg_data = pcmk__xe_first_child(wrapper, NULL, NULL, NULL); crm_element_value_ll(msg_data, PCMK_XA_CRM_TIMESTAMP, &value_ll); if (pcmk__str_eq(value, CRM_OP_PING, pcmk__str_none)) { reply_data.reply_type = pcmk_pacemakerd_reply_ping; reply_data.data.ping.state = pcmk_pacemakerd_api_daemon_state_text2enum( crm_element_value(msg_data, PCMK__XA_PACEMAKERD_STATE)); reply_data.data.ping.status = pcmk__str_eq(crm_element_value(msg_data, PCMK_XA_RESULT), "ok", pcmk__str_casei)?pcmk_rc_ok:pcmk_rc_error; reply_data.data.ping.last_good = (value_ll < 0)? 0 : (time_t) value_ll; reply_data.data.ping.sys_from = crm_element_value(msg_data, PCMK__XA_CRM_SUBSYSTEM); } else if (pcmk__str_eq(value, CRM_OP_QUIT, pcmk__str_none)) { const char *op_status = crm_element_value(msg_data, PCMK__XA_OP_STATUS); reply_data.reply_type = pcmk_pacemakerd_reply_shutdown; reply_data.data.shutdown.status = atoi(op_status); } else { crm_info("Unrecognizable message from %s: unknown command '%s'", pcmk_ipc_name(api, true), pcmk__s(value, "")); status = CRM_EX_PROTOCOL; goto done; } done: pcmk__call_ipc_callback(api, pcmk_ipc_event_reply, status, &reply_data); return false; } pcmk__ipc_methods_t * pcmk__pacemakerd_api_methods(void) { pcmk__ipc_methods_t *cmds = calloc(1, sizeof(pcmk__ipc_methods_t)); if (cmds != NULL) { cmds->new_data = new_data; cmds->free_data = free_data; cmds->post_connect = post_connect; cmds->reply_expected = reply_expected; cmds->dispatch = dispatch; cmds->post_disconnect = post_disconnect; } return cmds; } static int do_pacemakerd_api_call(pcmk_ipc_api_t *api, const char *ipc_name, const char *task) { pacemakerd_api_private_t *private; char *sender_system = NULL; xmlNode *cmd; int rc; if (api == NULL) { return EINVAL; } private = api->api_data; CRM_ASSERT(private != NULL); sender_system = crm_strdup_printf("%s_%s", private->client_uuid, pcmk__ipc_sys_name(ipc_name, "client")); cmd = pcmk__new_request(pcmk_ipc_pacemakerd, sender_system, NULL, CRM_SYSTEM_MCP, task, NULL); free(sender_system); if (cmd) { rc = pcmk__send_ipc_request(api, cmd); if (rc != pcmk_rc_ok) { crm_debug("Couldn't send request to %s: %s rc=%d", pcmk_ipc_name(api, true), pcmk_rc_str(rc), rc); } pcmk__xml_free(cmd); } else { rc = ENOMSG; } return rc; } int pcmk_pacemakerd_api_ping(pcmk_ipc_api_t *api, const char *ipc_name) { return do_pacemakerd_api_call(api, ipc_name, CRM_OP_PING); } int pcmk_pacemakerd_api_shutdown(pcmk_ipc_api_t *api, const char *ipc_name) { return do_pacemakerd_api_call(api, ipc_name, CRM_OP_QUIT); } diff --git a/lib/common/ipc_schedulerd.c b/lib/common/ipc_schedulerd.c index 7e23132779..9d7844c56d 100644 --- a/lib/common/ipc_schedulerd.c +++ b/lib/common/ipc_schedulerd.c @@ -1,196 +1,196 @@ /* * Copyright 2021-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include "crmcommon_private.h" typedef struct schedulerd_api_private_s { char *client_uuid; } schedulerd_api_private_t; // \return Standard Pacemaker return code static int new_data(pcmk_ipc_api_t *api) { struct schedulerd_api_private_s *private = NULL; api->api_data = calloc(1, sizeof(struct schedulerd_api_private_s)); if (api->api_data == NULL) { return errno; } private = api->api_data; /* See comments in ipc_pacemakerd.c. */ private->client_uuid = pcmk__getpid_s(); return pcmk_rc_ok; } static void free_data(void *data) { free(((struct schedulerd_api_private_s *) data)->client_uuid); free(data); } // \return Standard Pacemaker return code static int post_connect(pcmk_ipc_api_t *api) { if (api->api_data == NULL) { return EINVAL; } return pcmk_rc_ok; } static bool reply_expected(pcmk_ipc_api_t *api, const xmlNode *request) { const char *command = crm_element_value(request, PCMK__XA_CRM_TASK); if (command == NULL) { return false; } // We only need to handle commands that functions in this file can send return pcmk__str_any_of(command, CRM_OP_PECALC, NULL); } static bool dispatch(pcmk_ipc_api_t *api, xmlNode *reply) { crm_exit_t status = CRM_EX_OK; xmlNode *wrapper = NULL; xmlNode *msg_data = NULL; pcmk_schedulerd_api_reply_t reply_data = { pcmk_schedulerd_reply_unknown }; const char *value = NULL; if (pcmk__xe_is(reply, PCMK__XE_ACK)) { return false; } value = crm_element_value(reply, PCMK__XA_T); if (pcmk__parse_server(value) != pcmk_ipc_schedulerd) { crm_info("Unrecognizable message from schedulerd: " "unexpected message type '%s'", pcmk__s(value, "")); status = CRM_EX_PROTOCOL; goto done; } value = crm_element_value(reply, PCMK__XA_SUBT); if (!pcmk__str_eq(value, PCMK__VALUE_RESPONSE, pcmk__str_none)) { crm_info("Unrecognizable message from schedulerd: " - "message type '%s' not '" PCMK__VALUE_RESPONSE "'", - pcmk__s(value, "")); + "message type '%s' not '" PCMK__VALUE_RESPONSE "'", + pcmk__s(value, "")); status = CRM_EX_PROTOCOL; goto done; } if (pcmk__str_empty(crm_element_value(reply, PCMK_XA_REFERENCE))) { crm_info("Unrecognizable message from schedulerd: no reference"); status = CRM_EX_PROTOCOL; goto done; } // Parse useful info from reply wrapper = pcmk__xe_first_child(reply, PCMK__XE_CRM_XML, NULL, NULL); msg_data = pcmk__xe_first_child(wrapper, NULL, NULL, NULL); value = crm_element_value(reply, PCMK__XA_CRM_TASK); if (pcmk__str_eq(value, CRM_OP_PECALC, pcmk__str_none)) { reply_data.reply_type = pcmk_schedulerd_reply_graph; reply_data.data.graph.reference = crm_element_value(reply, PCMK_XA_REFERENCE); reply_data.data.graph.input = crm_element_value(reply, PCMK__XA_CRM_TGRAPH_IN); reply_data.data.graph.tgraph = msg_data; } else { crm_info("Unrecognizable message from schedulerd: " "unknown command '%s'", pcmk__s(value, "")); status = CRM_EX_PROTOCOL; goto done; } done: pcmk__call_ipc_callback(api, pcmk_ipc_event_reply, status, &reply_data); return false; } pcmk__ipc_methods_t * pcmk__schedulerd_api_methods(void) { pcmk__ipc_methods_t *cmds = calloc(1, sizeof(pcmk__ipc_methods_t)); if (cmds != NULL) { cmds->new_data = new_data; cmds->free_data = free_data; cmds->post_connect = post_connect; cmds->reply_expected = reply_expected; cmds->dispatch = dispatch; } return cmds; } static int do_schedulerd_api_call(pcmk_ipc_api_t *api, const char *task, xmlNode *cib, char **ref) { schedulerd_api_private_t *private; xmlNode *cmd = NULL; int rc; char *sender_system = NULL; if (!pcmk_ipc_is_connected(api)) { return ENOTCONN; } private = api->api_data; CRM_ASSERT(private != NULL); sender_system = crm_strdup_printf("%s_%s", private->client_uuid, pcmk__s(crm_system_name, "client")); cmd = pcmk__new_request(pcmk_ipc_schedulerd, sender_system, NULL, CRM_SYSTEM_PENGINE, task, cib); free(sender_system); if (cmd) { rc = pcmk__send_ipc_request(api, cmd); if (rc != pcmk_rc_ok) { crm_debug("Couldn't send request to schedulerd: %s rc=%d", pcmk_rc_str(rc), rc); } *ref = strdup(crm_element_value(cmd, PCMK_XA_REFERENCE)); pcmk__xml_free(cmd); } else { rc = ENOMSG; } return rc; } int pcmk_schedulerd_api_graph(pcmk_ipc_api_t *api, xmlNode *cib, char **ref) { return do_schedulerd_api_call(api, CRM_OP_PECALC, cib, ref); } diff --git a/lib/common/ipc_server.c b/lib/common/ipc_server.c index 21a69c8dbc..65f7854381 100644 --- a/lib/common/ipc_server.c +++ b/lib/common/ipc_server.c @@ -1,988 +1,991 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include "crmcommon_private.h" /* Evict clients whose event queue grows this large (by default) */ #define PCMK_IPC_DEFAULT_QUEUE_MAX 500 static GHashTable *client_connections = NULL; /*! * \internal * \brief Count IPC clients * * \return Number of active IPC client connections */ guint pcmk__ipc_client_count(void) { return client_connections? g_hash_table_size(client_connections) : 0; } /*! * \internal * \brief Execute a function for each active IPC client connection * * \param[in] func Function to call * \param[in,out] user_data Pointer to pass to function * * \note The parameters are the same as for g_hash_table_foreach(). */ void pcmk__foreach_ipc_client(GHFunc func, gpointer user_data) { if ((func != NULL) && (client_connections != NULL)) { g_hash_table_foreach(client_connections, func, user_data); } } pcmk__client_t * pcmk__find_client(const qb_ipcs_connection_t *c) { if (client_connections) { return g_hash_table_lookup(client_connections, c); } crm_trace("No client found for %p", c); return NULL; } pcmk__client_t * pcmk__find_client_by_id(const char *id) { if ((client_connections != NULL) && (id != NULL)) { gpointer key; pcmk__client_t *client = NULL; GHashTableIter iter; g_hash_table_iter_init(&iter, client_connections); while (g_hash_table_iter_next(&iter, &key, (gpointer *) & client)) { if (strcmp(client->id, id) == 0) { return client; } } } crm_trace("No client found with id='%s'", pcmk__s(id, "")); return NULL; } /*! * \internal * \brief Get a client identifier for use in log messages * * \param[in] c Client * * \return Client's name, client's ID, or a string literal, as available * \note This is intended to be used in format strings like "client %s". */ const char * pcmk__client_name(const pcmk__client_t *c) { if (c == NULL) { return "(unspecified)"; } else if (c->name != NULL) { return c->name; } else if (c->id != NULL) { return c->id; } else { return "(unidentified)"; } } void pcmk__client_cleanup(void) { if (client_connections != NULL) { int active = g_hash_table_size(client_connections); if (active > 0) { crm_warn("Exiting with %d active IPC client%s", active, pcmk__plural_s(active)); } g_hash_table_destroy(client_connections); client_connections = NULL; } } void pcmk__drop_all_clients(qb_ipcs_service_t *service) { qb_ipcs_connection_t *c = NULL; if (service == NULL) { return; } c = qb_ipcs_connection_first_get(service); while (c != NULL) { qb_ipcs_connection_t *last = c; c = qb_ipcs_connection_next_get(service, last); /* There really shouldn't be anyone connected at this point */ crm_notice("Disconnecting client %p, pid=%d...", last, pcmk__client_pid(last)); qb_ipcs_disconnect(last); qb_ipcs_connection_unref(last); } } /*! * \internal * \brief Allocate a new pcmk__client_t object based on an IPC connection * * \param[in] c IPC connection (NULL to allocate generic client) * \param[in] key Connection table key (NULL to use sane default) * \param[in] uid_client UID corresponding to c (ignored if c is NULL) * * \return Pointer to new pcmk__client_t (guaranteed not to be \c NULL) */ static pcmk__client_t * client_from_connection(qb_ipcs_connection_t *c, void *key, uid_t uid_client) { pcmk__client_t *client = pcmk__assert_alloc(1, sizeof(pcmk__client_t)); if (c) { client->user = pcmk__uid2username(uid_client); if (client->user == NULL) { client->user = pcmk__str_copy("#unprivileged"); crm_err("Unable to enforce ACLs for user ID %d, assuming unprivileged", uid_client); } client->ipcs = c; pcmk__set_client_flags(client, pcmk__client_ipc); client->pid = pcmk__client_pid(c); if (key == NULL) { key = c; } } client->id = crm_generate_uuid(); if (key == NULL) { key = client->id; } if (client_connections == NULL) { crm_trace("Creating IPC client table"); client_connections = g_hash_table_new(g_direct_hash, g_direct_equal); } g_hash_table_insert(client_connections, key, client); return client; } /*! * \brief Allocate a new pcmk__client_t object and generate its ID * * \param[in] key What to use as connections hash table key (NULL to use ID) * * \return Pointer to new pcmk__client_t (asserts on failure) */ pcmk__client_t * pcmk__new_unauth_client(void *key) { return client_from_connection(NULL, key, 0); } pcmk__client_t * pcmk__new_client(qb_ipcs_connection_t *c, uid_t uid_client, gid_t gid_client) { gid_t uid_cluster = 0; gid_t gid_cluster = 0; pcmk__client_t *client = NULL; CRM_CHECK(c != NULL, return NULL); if (pcmk_daemon_user(&uid_cluster, &gid_cluster) < 0) { static bool need_log = TRUE; if (need_log) { crm_warn("Could not find user and group IDs for user %s", CRM_DAEMON_USER); need_log = FALSE; } } if (uid_client != 0) { crm_trace("Giving group %u access to new IPC connection", gid_cluster); /* Passing -1 to chown(2) means don't change */ qb_ipcs_connection_auth_set(c, -1, gid_cluster, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP); } /* TODO: Do our own auth checking, return NULL if unauthorized */ client = client_from_connection(c, NULL, uid_client); if ((uid_client == 0) || (uid_client == uid_cluster)) { /* Remember when a connection came from root or hacluster */ pcmk__set_client_flags(client, pcmk__client_privileged); } crm_debug("New IPC client %s for PID %u with uid %d and gid %d", client->id, client->pid, uid_client, gid_client); return client; } static struct iovec * pcmk__new_ipc_event(void) { return (struct iovec *) pcmk__assert_alloc(2, sizeof(struct iovec)); } /*! * \brief Free an I/O vector created by pcmk__ipc_prepare_iov() * * \param[in,out] event I/O vector to free */ void pcmk_free_ipc_event(struct iovec *event) { if (event != NULL) { free(event[0].iov_base); free(event[1].iov_base); free(event); } } static void free_event(gpointer data) { pcmk_free_ipc_event((struct iovec *) data); } static void add_event(pcmk__client_t *c, struct iovec *iov) { if (c->event_queue == NULL) { c->event_queue = g_queue_new(); } g_queue_push_tail(c->event_queue, iov); } void pcmk__free_client(pcmk__client_t *c) { if (c == NULL) { return; } if (client_connections) { if (c->ipcs) { crm_trace("Destroying %p/%p (%d remaining)", c, c->ipcs, g_hash_table_size(client_connections) - 1); g_hash_table_remove(client_connections, c->ipcs); } else { crm_trace("Destroying remote connection %p (%d remaining)", c, g_hash_table_size(client_connections) - 1); g_hash_table_remove(client_connections, c->id); } } if (c->event_timer) { g_source_remove(c->event_timer); } if (c->event_queue) { crm_debug("Destroying %d events", g_queue_get_length(c->event_queue)); g_queue_free_full(c->event_queue, free_event); } free(c->id); free(c->name); free(c->user); if (c->remote) { if (c->remote->auth_timeout) { g_source_remove(c->remote->auth_timeout); } if (c->remote->tls_session != NULL) { /* @TODO Reduce duplication at callers. Put here everything * necessary to tear down and free tls_session. */ gnutls_free(c->remote->tls_session); } free(c->remote->buffer); free(c->remote); } free(c); } /*! * \internal * \brief Raise IPC eviction threshold for a client, if allowed * * \param[in,out] client Client to modify * \param[in] qmax New threshold (as non-NULL string) * * \return true if change was allowed, false otherwise */ bool pcmk__set_client_queue_max(pcmk__client_t *client, const char *qmax) { if (pcmk_is_set(client->flags, pcmk__client_privileged)) { long long qmax_ll; if ((pcmk__scan_ll(qmax, &qmax_ll, 0LL) == pcmk_rc_ok) && (qmax_ll > 0LL) && (qmax_ll <= UINT_MAX)) { client->queue_max = (unsigned int) qmax_ll; return true; } } return false; } int pcmk__client_pid(qb_ipcs_connection_t *c) { struct qb_ipcs_connection_stats stats; stats.client_pid = 0; qb_ipcs_connection_stats_get(c, &stats, 0); return stats.client_pid; } /*! * \internal * \brief Retrieve message XML from data read from client IPC * * \param[in,out] c IPC client connection * \param[in] data Data read from client connection * \param[out] id Where to store message ID from libqb header * \param[out] flags Where to store flags from libqb header * * \return Message XML on success, NULL otherwise */ xmlNode * pcmk__client_data2xml(pcmk__client_t *c, void *data, uint32_t *id, uint32_t *flags) { xmlNode *xml = NULL; char *uncompressed = NULL; char *text = ((char *)data) + sizeof(pcmk__ipc_header_t); pcmk__ipc_header_t *header = data; if (!pcmk__valid_ipc_header(header)) { return NULL; } if (id) { *id = ((struct qb_ipc_response_header *)data)->id; } if (flags) { *flags = header->flags; } if (pcmk_is_set(header->flags, crm_ipc_proxied)) { /* Mark this client as being the endpoint of a proxy connection. * Proxy connections responses are sent on the event channel, to avoid * blocking the controller serving as proxy. */ pcmk__set_client_flags(c, pcmk__client_proxied); } if (header->size_compressed) { int rc = 0; unsigned int size_u = 1 + header->size_uncompressed; uncompressed = pcmk__assert_alloc(1, size_u); crm_trace("Decompressing message data %u bytes into %u bytes", header->size_compressed, size_u); rc = BZ2_bzBuffToBuffDecompress(uncompressed, &size_u, text, header->size_compressed, 1, 0); text = uncompressed; rc = pcmk__bzlib2rc(rc); if (rc != pcmk_rc_ok) { crm_err("Decompression failed: %s " QB_XS " rc=%d", pcmk_rc_str(rc), rc); free(uncompressed); return NULL; } } CRM_ASSERT(text[header->size_uncompressed - 1] == 0); xml = pcmk__xml_parse(text); crm_log_xml_trace(xml, "[IPC received]"); free(uncompressed); return xml; } static int crm_ipcs_flush_events(pcmk__client_t *c); static gboolean crm_ipcs_flush_events_cb(gpointer data) { pcmk__client_t *c = data; c->event_timer = 0; crm_ipcs_flush_events(c); return FALSE; } /*! * \internal * \brief Add progressive delay before next event queue flush * * \param[in,out] c Client connection to add delay to * \param[in] queue_len Current event queue length */ static inline void delay_next_flush(pcmk__client_t *c, unsigned int queue_len) { /* Delay a maximum of 1.5 seconds */ guint delay = (queue_len < 5)? (1000 + 100 * queue_len) : 1500; c->event_timer = g_timeout_add(delay, crm_ipcs_flush_events_cb, c); } /*! * \internal * \brief Send client any messages in its queue * * \param[in,out] c Client to flush * * \return Standard Pacemaker return value */ static int crm_ipcs_flush_events(pcmk__client_t *c) { int rc = pcmk_rc_ok; ssize_t qb_rc = 0; unsigned int sent = 0; unsigned int queue_len = 0; if (c == NULL) { return rc; } else if (c->event_timer) { /* There is already a timer, wait until it goes off */ crm_trace("Timer active for %p - %d", c->ipcs, c->event_timer); return rc; } if (c->event_queue) { queue_len = g_queue_get_length(c->event_queue); } while (sent < 100) { pcmk__ipc_header_t *header = NULL; struct iovec *event = NULL; if (c->event_queue) { // We don't pop unless send is successful event = g_queue_peek_head(c->event_queue); } if (event == NULL) { // Queue is empty break; } qb_rc = qb_ipcs_event_sendv(c->ipcs, event, 2); if (qb_rc < 0) { rc = (int) -qb_rc; break; } event = g_queue_pop_head(c->event_queue); sent++; header = event[0].iov_base; if (header->size_compressed) { crm_trace("Event %d to %p[%d] (%lld compressed bytes) sent", header->qb.id, c->ipcs, c->pid, (long long) qb_rc); } else { crm_trace("Event %d to %p[%d] (%lld bytes) sent: %.120s", header->qb.id, c->ipcs, c->pid, (long long) qb_rc, (char *) (event[1].iov_base)); } pcmk_free_ipc_event(event); } queue_len -= sent; if (sent > 0 || queue_len) { crm_trace("Sent %d events (%d remaining) for %p[%d]: %s (%lld)", sent, queue_len, c->ipcs, c->pid, pcmk_rc_str(rc), (long long) qb_rc); } if (queue_len) { /* Allow clients to briefly fall behind on processing incoming messages, * but drop completely unresponsive clients so the connection doesn't * consume resources indefinitely. */ if (queue_len > QB_MAX(c->queue_max, PCMK_IPC_DEFAULT_QUEUE_MAX)) { if ((c->queue_backlog <= 1) || (queue_len < c->queue_backlog)) { /* Don't evict for a new or shrinking backlog */ crm_warn("Client with process ID %u has a backlog of %u messages " QB_XS " %p", c->pid, queue_len, c->ipcs); } else { crm_err("Evicting client with process ID %u due to backlog of %u messages " QB_XS " %p", c->pid, queue_len, c->ipcs); c->queue_backlog = 0; qb_ipcs_disconnect(c->ipcs); return rc; } } c->queue_backlog = queue_len; delay_next_flush(c, queue_len); } else { /* Event queue is empty, there is no backlog */ c->queue_backlog = 0; } return rc; } /*! * \internal * \brief Create an I/O vector for sending an IPC XML message * * \param[in] request Identifier for libqb response header * \param[in] message XML message to send * \param[in] max_send_size If 0, default IPC buffer size is used * \param[out] result Where to store prepared I/O vector * \param[out] bytes Size of prepared data in bytes * * \return Standard Pacemaker return code */ int pcmk__ipc_prepare_iov(uint32_t request, const xmlNode *message, uint32_t max_send_size, struct iovec **result, ssize_t *bytes) { struct iovec *iov; unsigned int total = 0; GString *buffer = NULL; pcmk__ipc_header_t *header = NULL; int rc = pcmk_rc_ok; if ((message == NULL) || (result == NULL)) { rc = EINVAL; goto done; } header = calloc(1, sizeof(pcmk__ipc_header_t)); if (header == NULL) { rc = ENOMEM; goto done; } buffer = g_string_sized_new(1024); pcmk__xml_string(message, 0, buffer, 0); if (max_send_size == 0) { max_send_size = crm_ipc_default_buffer_size(); } CRM_LOG_ASSERT(max_send_size != 0); *result = NULL; iov = pcmk__new_ipc_event(); iov[0].iov_len = sizeof(pcmk__ipc_header_t); iov[0].iov_base = header; header->version = PCMK__IPC_VERSION; header->size_uncompressed = 1 + buffer->len; total = iov[0].iov_len + header->size_uncompressed; if (total < max_send_size) { iov[1].iov_base = pcmk__str_copy(buffer->str); iov[1].iov_len = header->size_uncompressed; } else { static unsigned int biggest = 0; char *compressed = NULL; unsigned int new_size = 0; if (pcmk__compress(buffer->str, (unsigned int) header->size_uncompressed, (unsigned int) max_send_size, &compressed, &new_size) == pcmk_rc_ok) { pcmk__set_ipc_flags(header->flags, "send data", crm_ipc_compressed); header->size_compressed = new_size; iov[1].iov_len = header->size_compressed; iov[1].iov_base = compressed; biggest = QB_MAX(header->size_compressed, biggest); } else { crm_log_xml_trace(message, "EMSGSIZE"); biggest = QB_MAX(header->size_uncompressed, biggest); crm_err("Could not compress %u-byte message into less than IPC " "limit of %u bytes; set PCMK_ipc_buffer to higher value " "(%u bytes suggested)", header->size_uncompressed, max_send_size, 4 * biggest); free(compressed); pcmk_free_ipc_event(iov); rc = EMSGSIZE; goto done; } } header->qb.size = iov[0].iov_len + iov[1].iov_len; header->qb.id = (int32_t)request; /* Replying to a specific request */ *result = iov; CRM_ASSERT(header->qb.size > 0); if (bytes != NULL) { *bytes = header->qb.size; } done: if (buffer != NULL) { g_string_free(buffer, TRUE); } return rc; } int pcmk__ipc_send_iov(pcmk__client_t *c, struct iovec *iov, uint32_t flags) { int rc = pcmk_rc_ok; static uint32_t id = 1; pcmk__ipc_header_t *header = iov[0].iov_base; if (c->flags & pcmk__client_proxied) { /* _ALL_ replies to proxied connections need to be sent as events */ if (!pcmk_is_set(flags, crm_ipc_server_event)) { /* The proxied flag lets us know this was originally meant to be a * response, even though we're sending it over the event channel. */ pcmk__set_ipc_flags(flags, "server event", crm_ipc_server_event |crm_ipc_proxied_relay_response); } } pcmk__set_ipc_flags(header->flags, "server event", flags); if (flags & crm_ipc_server_event) { header->qb.id = id++; /* We don't really use it, but doesn't hurt to set one */ if (flags & crm_ipc_server_free) { crm_trace("Sending the original to %p[%d]", c->ipcs, c->pid); add_event(c, iov); } else { struct iovec *iov_copy = pcmk__new_ipc_event(); crm_trace("Sending a copy to %p[%d]", c->ipcs, c->pid); iov_copy[0].iov_len = iov[0].iov_len; iov_copy[0].iov_base = malloc(iov[0].iov_len); memcpy(iov_copy[0].iov_base, iov[0].iov_base, iov[0].iov_len); iov_copy[1].iov_len = iov[1].iov_len; iov_copy[1].iov_base = malloc(iov[1].iov_len); memcpy(iov_copy[1].iov_base, iov[1].iov_base, iov[1].iov_len); add_event(c, iov_copy); } } else { ssize_t qb_rc; CRM_LOG_ASSERT(header->qb.id != 0); /* Replying to a specific request */ qb_rc = qb_ipcs_response_sendv(c->ipcs, iov, 2); if (qb_rc < header->qb.size) { if (qb_rc < 0) { rc = (int) -qb_rc; } crm_notice("Response %d to pid %d failed: %s " QB_XS " bytes=%u rc=%lld ipcs=%p", header->qb.id, c->pid, pcmk_rc_str(rc), header->qb.size, (long long) qb_rc, c->ipcs); } else { crm_trace("Response %d sent, %lld bytes to %p[%d]", header->qb.id, (long long) qb_rc, c->ipcs, c->pid); } if (flags & crm_ipc_server_free) { pcmk_free_ipc_event(iov); } } if (flags & crm_ipc_server_event) { rc = crm_ipcs_flush_events(c); } else { crm_ipcs_flush_events(c); } if ((rc == EPIPE) || (rc == ENOTCONN)) { crm_trace("Client %p disconnected", c->ipcs); } return rc; } int pcmk__ipc_send_xml(pcmk__client_t *c, uint32_t request, const xmlNode *message, uint32_t flags) { struct iovec *iov = NULL; int rc = pcmk_rc_ok; if (c == NULL) { return EINVAL; } rc = pcmk__ipc_prepare_iov(request, message, crm_ipc_default_buffer_size(), &iov, NULL); if (rc == pcmk_rc_ok) { pcmk__set_ipc_flags(flags, "send data", crm_ipc_server_free); rc = pcmk__ipc_send_iov(c, iov, flags); } else { pcmk_free_ipc_event(iov); crm_notice("IPC message to pid %d failed: %s " QB_XS " rc=%d", c->pid, pcmk_rc_str(rc), rc); } return rc; } /*! * \internal * \brief Create an acknowledgement with a status code to send to a client * * \param[in] function Calling function * \param[in] line Source file line within calling function * \param[in] flags IPC flags to use when sending * \param[in] tag Element name to use for acknowledgement * \param[in] ver IPC protocol version (can be NULL) * \param[in] status Exit status code to add to ack * * \return Newly created XML for ack * * \note The caller is responsible for freeing the return value with * \c pcmk__xml_free(). */ xmlNode * pcmk__ipc_create_ack_as(const char *function, int line, uint32_t flags, const char *tag, const char *ver, crm_exit_t status) { xmlNode *ack = NULL; if (pcmk_is_set(flags, crm_ipc_client_response)) { ack = pcmk__xe_create(NULL, tag); crm_xml_add(ack, PCMK_XA_FUNCTION, function); crm_xml_add_int(ack, PCMK__XA_LINE, line); crm_xml_add_int(ack, PCMK_XA_STATUS, (int) status); crm_xml_add(ack, PCMK__XA_IPC_PROTO_VERSION, ver); } return ack; } /*! * \internal * \brief Send an acknowledgement with a status code to a client * * \param[in] function Calling function * \param[in] line Source file line within calling function * \param[in] c Client to send ack to * \param[in] request Request ID being replied to * \param[in] flags IPC flags to use when sending * \param[in] tag Element name to use for acknowledgement * \param[in] ver IPC protocol version (can be NULL) * \param[in] status Status code to send with acknowledgement * * \return Standard Pacemaker return code */ int pcmk__ipc_send_ack_as(const char *function, int line, pcmk__client_t *c, uint32_t request, uint32_t flags, const char *tag, const char *ver, crm_exit_t status) { int rc = pcmk_rc_ok; xmlNode *ack = pcmk__ipc_create_ack_as(function, line, flags, tag, ver, status); if (ack != NULL) { crm_trace("Ack'ing IPC message from client %s as <%s status=%d>", pcmk__client_name(c), tag, status); crm_log_xml_trace(ack, "sent-ack"); c->request_id = 0; rc = pcmk__ipc_send_xml(c, request, ack, flags); pcmk__xml_free(ack); } return rc; } /*! * \internal - * \brief Add an IPC server to the main loop for the pacemaker-based API + * \brief Add an IPC server to the main loop for the CIB manager API * - * \param[out] ipcs_ro New IPC server for read-only pacemaker-based API - * \param[out] ipcs_rw New IPC server for read/write pacemaker-based API - * \param[out] ipcs_shm New IPC server for shared-memory pacemaker-based API + * \param[out] ipcs_ro New IPC server for read-only CIB manager API + * \param[out] ipcs_rw New IPC server for read/write CIB manager API + * \param[out] ipcs_shm New IPC server for shared-memory CIB manager API * \param[in] ro_cb IPC callbacks for read-only API * \param[in] rw_cb IPC callbacks for read/write and shared-memory APIs * * \note This function exits fatally if unable to create the servers. + * \note There is no actual difference between the three IPC endpoints other + * than their names. */ void pcmk__serve_based_ipc(qb_ipcs_service_t **ipcs_ro, qb_ipcs_service_t **ipcs_rw, qb_ipcs_service_t **ipcs_shm, struct qb_ipcs_service_handlers *ro_cb, struct qb_ipcs_service_handlers *rw_cb) { *ipcs_ro = mainloop_add_ipc_server(PCMK__SERVER_BASED_RO, QB_IPC_NATIVE, ro_cb); *ipcs_rw = mainloop_add_ipc_server(PCMK__SERVER_BASED_RW, QB_IPC_NATIVE, rw_cb); *ipcs_shm = mainloop_add_ipc_server(PCMK__SERVER_BASED_SHM, QB_IPC_SHM, rw_cb); if (*ipcs_ro == NULL || *ipcs_rw == NULL || *ipcs_shm == NULL) { crm_err("Failed to create the CIB manager: exiting and inhibiting respawn"); crm_warn("Verify pacemaker and pacemaker_remote are not both enabled"); crm_exit(CRM_EX_FATAL); } } /*! * \internal - * \brief Destroy IPC servers for pacemaker-based API + * \brief Destroy IPC servers for the CIB manager API * - * \param[out] ipcs_ro IPC server for read-only pacemaker-based API - * \param[out] ipcs_rw IPC server for read/write pacemaker-based API - * \param[out] ipcs_shm IPC server for shared-memory pacemaker-based API + * \param[out] ipcs_ro IPC server for read-only the CIB manager API + * \param[out] ipcs_rw IPC server for read/write the CIB manager API + * \param[out] ipcs_shm IPC server for shared-memory the CIB manager API * * \note This is a convenience function for calling qb_ipcs_destroy() for each * argument. */ void pcmk__stop_based_ipc(qb_ipcs_service_t *ipcs_ro, qb_ipcs_service_t *ipcs_rw, qb_ipcs_service_t *ipcs_shm) { qb_ipcs_destroy(ipcs_ro); qb_ipcs_destroy(ipcs_rw); qb_ipcs_destroy(ipcs_shm); } /*! * \internal - * \brief Add an IPC server to the main loop for the pacemaker-controld API + * \brief Add an IPC server to the main loop for the controller API * * \param[in] cb IPC callbacks * * \return Newly created IPC server */ qb_ipcs_service_t * pcmk__serve_controld_ipc(struct qb_ipcs_service_handlers *cb) { return mainloop_add_ipc_server(CRM_SYSTEM_CRMD, QB_IPC_NATIVE, cb); } /*! * \internal - * \brief Add an IPC server to the main loop for the pacemaker-attrd API + * \brief Add an IPC server to the main loop for the attribute manager API * * \param[out] ipcs Where to store newly created IPC server * \param[in] cb IPC callbacks * * \note This function exits fatally if unable to create the servers. */ void pcmk__serve_attrd_ipc(qb_ipcs_service_t **ipcs, struct qb_ipcs_service_handlers *cb) { *ipcs = mainloop_add_ipc_server(PCMK__VALUE_ATTRD, QB_IPC_NATIVE, cb); if (*ipcs == NULL) { - crm_err("Failed to create pacemaker-attrd server: exiting and inhibiting respawn"); - crm_warn("Verify pacemaker and pacemaker_remote are not both enabled."); + crm_crit("Exiting fatally because unable to serve " PCMK__SERVER_ATTRD + " IPC (verify pacemaker and pacemaker_remote are not both " + "enabled)"); crm_exit(CRM_EX_FATAL); } } /*! * \internal - * \brief Add an IPC server to the main loop for the pacemaker-fenced API + * \brief Add an IPC server to the main loop for the fencer API * * \param[out] ipcs Where to store newly created IPC server * \param[in] cb IPC callbacks * * \note This function exits fatally if unable to create the servers. */ void pcmk__serve_fenced_ipc(qb_ipcs_service_t **ipcs, struct qb_ipcs_service_handlers *cb) { *ipcs = mainloop_add_ipc_server_with_prio("stonith-ng", QB_IPC_NATIVE, cb, QB_LOOP_HIGH); if (*ipcs == NULL) { crm_err("Failed to create fencer: exiting and inhibiting respawn."); crm_warn("Verify pacemaker and pacemaker_remote are not both enabled."); crm_exit(CRM_EX_FATAL); } } /*! * \internal * \brief Add an IPC server to the main loop for the pacemakerd API * * \param[out] ipcs Where to store newly created IPC server * \param[in] cb IPC callbacks * * \note This function exits with CRM_EX_OSERR if unable to create the servers. */ void pcmk__serve_pacemakerd_ipc(qb_ipcs_service_t **ipcs, struct qb_ipcs_service_handlers *cb) { *ipcs = mainloop_add_ipc_server(CRM_SYSTEM_MCP, QB_IPC_NATIVE, cb); if (*ipcs == NULL) { crm_err("Couldn't start pacemakerd IPC server"); crm_warn("Verify pacemaker and pacemaker_remote are not both enabled."); /* sub-daemons are observed by pacemakerd. Thus we exit CRM_EX_FATAL * if we want to prevent pacemakerd from restarting them. * With pacemakerd we leave the exit-code shown to e.g. systemd * to what it was prior to moving the code here from pacemakerd.c */ crm_exit(CRM_EX_OSERR); } } /*! * \internal - * \brief Add an IPC server to the main loop for the pacemaker-schedulerd API + * \brief Add an IPC server to the main loop for the scheduler API * * \param[in] cb IPC callbacks * * \return Newly created IPC server * \note This function exits fatally if unable to create the servers. */ qb_ipcs_service_t * pcmk__serve_schedulerd_ipc(struct qb_ipcs_service_handlers *cb) { return mainloop_add_ipc_server(CRM_SYSTEM_PENGINE, QB_IPC_NATIVE, cb); } diff --git a/lib/common/servers.c b/lib/common/servers.c index 222e717b2c..f2e6d79ef5 100644 --- a/lib/common/servers.c +++ b/lib/common/servers.c @@ -1,187 +1,211 @@ /* * Copyright 2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include // NULL #include /* Each Pacemaker subdaemon offers an IPC interface, and most exchange cluster * messages as well. Particular names need to be used for logging, connecting * IPC, and IPC/cluster message types. * * This array is indexed by enum pcmk_ipc_server and gathers all those names for * easier mapping. Most members are lists with the first value listed being the * "main" one returned if another value is mapped to it. * * @COMPAT Ideally, we'd use a single string (such as the server's * crm_system_name) as the sole IPC name and sole message type for each server, * making most of this unnecessary. However, backward compatiblity with older * nodes involved in a rolling upgrade or Pacemaker Remote connection would * be a nightmare: we'd have to add duplicate message attributes, struct * members, and libqb IPC server endpoints for both the old and new names, and * could drop the old names only after we no longer supported connections with * older nodes. */ static struct { const char *log_name; // Readable server name for use in logs const char *system_names[2]; // crm_system_name values (subdaemon names) const char *ipc_names[3]; // libqb IPC names used to contact server const char *message_types[3]; // IPC/cluster message types sent to server } server_info[] = { [pcmk_ipc_unknown] = { NULL, { NULL, NULL, }, { NULL, NULL, NULL, }, { NULL, NULL, NULL, }, }, [pcmk_ipc_attrd] = { "attribute manager", - { "pacemaker-attrd", NULL, }, + { PCMK__SERVER_ATTRD, NULL, }, { PCMK__VALUE_ATTRD, NULL, NULL, }, { PCMK__VALUE_ATTRD, NULL, NULL, }, }, [pcmk_ipc_based] = { "CIB manager", - { "pacemaker-based", NULL, }, + { PCMK__SERVER_BASED, NULL, }, { PCMK__SERVER_BASED_RW, PCMK__SERVER_BASED_RO, PCMK__SERVER_BASED_SHM, }, { CRM_SYSTEM_CIB, NULL, NULL, }, }, [pcmk_ipc_controld] = { "controller", - { "pacemaker-controld", NULL, }, + { PCMK__SERVER_CONTROLD, NULL, }, { PCMK__VALUE_CRMD, NULL, NULL, }, { PCMK__VALUE_CRMD, CRM_SYSTEM_DC, CRM_SYSTEM_TENGINE, }, }, [pcmk_ipc_execd] = { "executor", - { "pacemaker-execd", "pacemaker-remoted", }, + { PCMK__SERVER_EXECD, PCMK__SERVER_REMOTED, }, { PCMK__VALUE_LRMD, NULL, NULL, }, { PCMK__VALUE_LRMD, NULL, NULL, }, }, [pcmk_ipc_fenced] = { "fencer", - { "pacemaker-fenced", NULL, }, + { PCMK__SERVER_FENCED, NULL, }, { PCMK__VALUE_STONITH_NG, NULL, NULL, }, { PCMK__VALUE_STONITH_NG, NULL, NULL, }, }, [pcmk_ipc_pacemakerd] = { "launcher", - { "pacemakerd", NULL, }, + { PCMK__SERVER_PACEMAKERD, NULL, }, { CRM_SYSTEM_MCP, NULL, NULL, }, { CRM_SYSTEM_MCP, NULL, NULL, }, }, [pcmk_ipc_schedulerd] = { "scheduler", - { "pacemaker-schedulerd", NULL, }, + { PCMK__SERVER_SCHEDULERD, NULL, }, { CRM_SYSTEM_PENGINE, NULL, NULL, }, { CRM_SYSTEM_PENGINE, NULL, NULL, }, }, }; +/*! + * \internal + * \brief Return server's (primary) system name + * + * \param[in] server Server to get system name for + * + * \return System name for server (or NULL if invalid) + * \note If \p server is an \c enum pcmk_ipc_server value other than + * \c pcmk_ipc_unknown, the return value is guaranteed to be non-NULL. + */ +const char * +pcmk__server_name(enum pcmk_ipc_server server) +{ + CRM_CHECK((server > 0) && (server < PCMK__NELEM(server_info)), + return NULL); + return server_info[server].system_names[0]; +} + /*! * \internal * \brief Return a readable description of server for logging * * \param[in] server Server to get log name for * * \return Log name for server (or NULL if invalid) + * \note If \p server is an \c enum pcmk_ipc_server value other than + * \c pcmk_ipc_unknown, the return value is guaranteed to be non-NULL. */ const char * pcmk__server_log_name(enum pcmk_ipc_server server) { CRM_CHECK((server > 0) && (server < PCMK__NELEM(server_info)), return NULL); return server_info[server].log_name; } /*! * \internal * \brief Return the (primary) IPC endpoint name for a server * * \param[in] server Server to get IPC endpoint for * * \return IPC endpoint for server (or NULL if invalid) + * \note If \p server is an \c enum pcmk_ipc_server value other than + * \c pcmk_ipc_unknown, the return value is guaranteed to be non-NULL. */ const char * pcmk__server_ipc_name(enum pcmk_ipc_server server) { CRM_CHECK((server > 0) && (server < PCMK__NELEM(server_info)), return NULL); return server_info[server].ipc_names[0]; } /*! * \internal * \brief Return the (primary) message type for a server * * \param[in] server Server to get message type for * * \return Message type for server (or NULL if invalid) + * \note If \p server is an \c enum pcmk_ipc_server value other than + * \c pcmk_ipc_unknown, the return value is guaranteed to be non-NULL. */ const char * pcmk__server_message_type(enum pcmk_ipc_server server) { CRM_CHECK((server > 0) && (server < PCMK__NELEM(server_info)), return NULL); return server_info[server].message_types[0]; } /*! * \internal * \brief Get the server corresponding to a name * * \param[in] text A system name, IPC endpoint name, or message type * * \return Server corresponding to \p text */ enum pcmk_ipc_server pcmk__parse_server(const char *text) { if (text == NULL) { return pcmk_ipc_unknown; } for (enum pcmk_ipc_server server = pcmk_ipc_attrd; server <= pcmk_ipc_schedulerd; ++server) { int name; for (name = 0; (name < 2) && (server_info[server].system_names[name] != NULL); ++name) { if (strcmp(text, server_info[server].system_names[name]) == 0) { return server; } } for (name = 0; (name < 3) && (server_info[server].ipc_names[name] != NULL); ++name) { if (strcmp(text, server_info[server].ipc_names[name]) == 0) { return server; } } for (name = 0; (name < 3) && (server_info[server].message_types[name] != NULL); ++name) { if (strcmp(text, server_info[server].message_types[name]) == 0) { return server; } } } return pcmk_ipc_unknown; } diff --git a/lib/common/tests/options/pcmk__env_option_enabled_test.c b/lib/common/tests/options/pcmk__env_option_enabled_test.c index b7d5d25daa..575711ab5a 100644 --- a/lib/common/tests/options/pcmk__env_option_enabled_test.c +++ b/lib/common/tests/options/pcmk__env_option_enabled_test.c @@ -1,101 +1,102 @@ /* - * Copyright 2022 the Pacemaker project contributors + * Copyright 2022-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include "mock_private.h" static void disabled_null_value(void **state) { // Return false if option value not found (NULL accomplishes this) assert_false(pcmk__env_option_enabled(NULL, NULL)); - assert_false(pcmk__env_option_enabled("pacemaker-execd", NULL)); + assert_false(pcmk__env_option_enabled(PCMK__SERVER_EXECD, NULL)); } static void enabled_true_value(void **state) { // Return true if option value is true, with or without daemon name pcmk__mock_getenv = true; expect_string(__wrap_getenv, name, "PCMK_env_var"); will_return(__wrap_getenv, "true"); assert_true(pcmk__env_option_enabled(NULL, "env_var")); expect_string(__wrap_getenv, name, "PCMK_env_var"); will_return(__wrap_getenv, "true"); - assert_true(pcmk__env_option_enabled("pacemaker-execd", "env_var")); + assert_true(pcmk__env_option_enabled(PCMK__SERVER_EXECD, "env_var")); pcmk__mock_getenv = false; } static void disabled_false_value(void **state) { // Return false if option value is false (no daemon list) pcmk__mock_getenv = true; expect_string(__wrap_getenv, name, "PCMK_env_var"); will_return(__wrap_getenv, "false"); assert_false(pcmk__env_option_enabled(NULL, "env_var")); expect_string(__wrap_getenv, name, "PCMK_env_var"); will_return(__wrap_getenv, "false"); - assert_false(pcmk__env_option_enabled("pacemaker-execd", "env_var")); + assert_false(pcmk__env_option_enabled(PCMK__SERVER_EXECD, "env_var")); pcmk__mock_getenv = false; } static void enabled_daemon_in_list(void **state) { // Return true if daemon is in the option's value pcmk__mock_getenv = true; expect_string(__wrap_getenv, name, "PCMK_env_var"); - will_return(__wrap_getenv, "pacemaker-execd"); - assert_true(pcmk__env_option_enabled("pacemaker-execd", "env_var")); + will_return(__wrap_getenv, PCMK__SERVER_EXECD); + assert_true(pcmk__env_option_enabled(PCMK__SERVER_EXECD, "env_var")); expect_string(__wrap_getenv, name, "PCMK_env_var"); - will_return(__wrap_getenv, "pacemaker-execd,pacemaker-fenced"); - assert_true(pcmk__env_option_enabled("pacemaker-execd", "env_var")); + will_return(__wrap_getenv, PCMK__SERVER_EXECD "," PCMK__SERVER_FENCED); + assert_true(pcmk__env_option_enabled(PCMK__SERVER_EXECD, "env_var")); expect_string(__wrap_getenv, name, "PCMK_env_var"); - will_return(__wrap_getenv, "pacemaker-controld,pacemaker-execd"); - assert_true(pcmk__env_option_enabled("pacemaker-execd", "env_var")); + will_return(__wrap_getenv, PCMK__SERVER_CONTROLD "," PCMK__SERVER_EXECD); + assert_true(pcmk__env_option_enabled(PCMK__SERVER_EXECD, "env_var")); expect_string(__wrap_getenv, name, "PCMK_env_var"); will_return(__wrap_getenv, - "pacemaker-controld,pacemaker-execd,pacemaker-fenced"); - assert_true(pcmk__env_option_enabled("pacemaker-execd", "env_var")); + PCMK__SERVER_CONTROLD "," PCMK__SERVER_EXECD + "," PCMK__SERVER_FENCED); + assert_true(pcmk__env_option_enabled(PCMK__SERVER_EXECD, "env_var")); pcmk__mock_getenv = false; } static void disabled_daemon_not_in_list(void **state) { // Return false if value is not true and daemon is not in the option's value pcmk__mock_getenv = true; expect_string(__wrap_getenv, name, "PCMK_env_var"); - will_return(__wrap_getenv, "pacemaker-controld,pacemaker-fenced"); - assert_false(pcmk__env_option_enabled("pacemaker-execd", "env_var")); + will_return(__wrap_getenv, PCMK__SERVER_CONTROLD "," PCMK__SERVER_FENCED); + assert_false(pcmk__env_option_enabled(PCMK__SERVER_EXECD, "env_var")); pcmk__mock_getenv = false; } PCMK__UNIT_TEST(NULL, NULL, cmocka_unit_test(disabled_null_value), cmocka_unit_test(enabled_true_value), cmocka_unit_test(disabled_false_value), cmocka_unit_test(enabled_daemon_in_list), cmocka_unit_test(disabled_daemon_not_in_list)) diff --git a/lib/common/watchdog.c b/lib/common/watchdog.c index bb964a806f..ad57ccd872 100644 --- a/lib/common/watchdog.c +++ b/lib/common/watchdog.c @@ -1,327 +1,327 @@ /* * Copyright 2013-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include static pid_t sbd_pid = 0; static void sysrq_trigger(char t) { #if HAVE_LINUX_PROCFS FILE *procf; // Root can always write here, regardless of kernel.sysrq value procf = fopen("/proc/sysrq-trigger", "a"); if (!procf) { crm_perror(LOG_WARNING, "Opening sysrq-trigger failed"); return; } crm_info("sysrq-trigger: %c", t); fprintf(procf, "%c\n", t); fclose(procf); #endif // HAVE_LINUX_PROCFS return; } /*! * \internal * \brief Panic the local host (if root) or tell pacemakerd to do so */ static void panic_local(void) { int rc = pcmk_ok; uid_t uid = geteuid(); pid_t ppid = getppid(); const char *panic_action = pcmk__env_option(PCMK__ENV_PANIC_ACTION); // Default panic action is to reboot char sysrq = 'b'; int reboot_cmd = RB_AUTOBOOT; if(uid != 0 && ppid > 1) { /* We're a non-root pacemaker daemon (pacemaker-based, * pacemaker-controld, pacemaker-schedulerd, pacemaker-attrd, etc.) with * the original pacemakerd parent. * * Of these, only the controller is likely to be initiating resets. */ crm_emerg("Signaling parent %lld to panic", (long long) ppid); crm_exit(CRM_EX_PANIC); return; } else if (uid != 0) { #if HAVE_LINUX_PROCFS /* * No permissions, and no pacemakerd parent to escalate to. * Track down the new pacemakerd process and send a signal instead. */ union sigval signal_value; memset(&signal_value, 0, sizeof(signal_value)); - ppid = pcmk__procfs_pid_of("pacemakerd"); + ppid = pcmk__procfs_pid_of(PCMK__SERVER_PACEMAKERD); crm_emerg("Signaling pacemakerd[%lld] to panic", (long long) ppid); if(ppid > 1 && sigqueue(ppid, SIGQUIT, signal_value) < 0) { crm_perror(LOG_EMERG, "Cannot signal pacemakerd[%lld] to panic", (long long) ppid); } #endif // HAVE_LINUX_PROCFS /* The best we can do now is die */ crm_exit(CRM_EX_PANIC); return; } /* We're either pacemakerd, or a pacemaker daemon running as root */ if (pcmk__starts_with(panic_action, "sync-")) { sync(); panic_action += strlen("sync-"); }; if (pcmk__str_eq(panic_action, "crash", pcmk__str_casei)) { sysrq = 'c'; } else if (pcmk__str_eq(panic_action, "off", pcmk__str_casei)) { sysrq = 'o'; #ifdef RB_POWER_OFF reboot_cmd = RB_POWER_OFF; #elif defined(RB_POWEROFF) reboot_cmd = RB_POWEROFF; #endif } sysrq_trigger(sysrq); reboot(reboot_cmd); rc = errno; crm_emerg("Reboot failed, escalating to parent %lld: %s " QB_XS " rc=%d", (long long) ppid, pcmk_rc_str(rc), rc); if(ppid > 1) { /* child daemon */ crm_exit(CRM_EX_PANIC); } else { /* pacemakerd or orphan child */ crm_exit(CRM_EX_FATAL); } } /*! * \internal * \brief Tell sbd to kill the local host, then exit */ static void panic_sbd(void) { union sigval signal_value; pid_t ppid = getppid(); crm_emerg("Signaling sbd[%lld] to panic", (long long) sbd_pid); memset(&signal_value, 0, sizeof(signal_value)); /* TODO: Arrange for a slightly less brutal option? */ if(sigqueue(sbd_pid, SIGKILL, signal_value) < 0) { crm_perror(LOG_EMERG, "Cannot signal sbd[%lld] to terminate", (long long) sbd_pid); panic_local(); } if(ppid > 1) { /* child daemon */ crm_exit(CRM_EX_PANIC); } else { /* pacemakerd or orphan child */ crm_exit(CRM_EX_FATAL); } } /*! * \internal * \brief Panic the local host * * Panic the local host either by sbd (if running), directly, or by asking * pacemakerd. If trace logging this function, exit instead. * * \param[in] origin Function caller (for logging only) */ void pcmk__panic(const char *origin) { /* Ensure sbd_pid is set */ (void) pcmk__locate_sbd(); pcmk__if_tracing( { // getppid() == 1 means our original parent no longer exists crm_emerg("Shutting down instead of panicking the node " QB_XS " origin=%s sbd=%lld parent=%d", origin, (long long) sbd_pid, getppid()); crm_exit(CRM_EX_FATAL); return; }, {} ); if(sbd_pid > 1) { crm_emerg("Signaling sbd[%lld] to panic the system: %s", (long long) sbd_pid, origin); panic_sbd(); } else { crm_emerg("Panicking the system directly: %s", origin); panic_local(); } } /*! * \internal * \brief Return the process ID of sbd (or 0 if it is not running) */ pid_t pcmk__locate_sbd(void) { char *pidfile = NULL; char *sbd_path = NULL; int rc; if(sbd_pid > 1) { return sbd_pid; } /* Look for the pid file */ pidfile = crm_strdup_printf(PCMK_RUN_DIR "/sbd.pid"); sbd_path = crm_strdup_printf("%s/sbd", SBIN_DIR); /* Read the pid file */ rc = pcmk__pidfile_matches(pidfile, 0, sbd_path, &sbd_pid); if (rc == pcmk_rc_ok) { crm_trace("SBD detected at pid %lld (via PID file %s)", (long long) sbd_pid, pidfile); #if HAVE_LINUX_PROCFS } else { /* Fall back to /proc for systems that support it */ sbd_pid = pcmk__procfs_pid_of("sbd"); crm_trace("SBD detected at pid %lld (via procfs)", (long long) sbd_pid); #endif // HAVE_LINUX_PROCFS } if(sbd_pid < 0) { sbd_pid = 0; crm_trace("SBD not detected"); } free(pidfile); free(sbd_path); return sbd_pid; } long pcmk__get_sbd_watchdog_timeout(void) { static long sbd_timeout = -2; if (sbd_timeout == -2) { sbd_timeout = crm_get_msec(getenv("SBD_WATCHDOG_TIMEOUT")); } return sbd_timeout; } bool pcmk__get_sbd_sync_resource_startup(void) { static int sync_resource_startup = PCMK__SBD_SYNC_DEFAULT; static bool checked_sync_resource_startup = false; if (!checked_sync_resource_startup) { const char *sync_env = getenv("SBD_SYNC_RESOURCE_STARTUP"); if (sync_env == NULL) { crm_trace("Defaulting to %sstart-up synchronization with sbd", (PCMK__SBD_SYNC_DEFAULT? "" : "no ")); } else if (crm_str_to_boolean(sync_env, &sync_resource_startup) < 0) { crm_warn("Defaulting to %sstart-up synchronization with sbd " "because environment value '%s' is invalid", (PCMK__SBD_SYNC_DEFAULT? "" : "no "), sync_env); } checked_sync_resource_startup = true; } return sync_resource_startup != 0; } long pcmk__auto_stonith_watchdog_timeout(void) { long sbd_timeout = pcmk__get_sbd_watchdog_timeout(); return (sbd_timeout <= 0)? 0 : (2 * sbd_timeout); } bool pcmk__valid_stonith_watchdog_timeout(const char *value) { /* @COMPAT At a compatibility break, accept either negative values or a * specific string like "auto" (but not both) to mean "auto-calculate the * timeout." Reject other values that aren't parsable as timeouts. */ long st_timeout = value? crm_get_msec(value) : 0; if (st_timeout < 0) { st_timeout = pcmk__auto_stonith_watchdog_timeout(); crm_debug("Using calculated value %ld for " PCMK_OPT_STONITH_WATCHDOG_TIMEOUT " (%s)", st_timeout, value); } if (st_timeout == 0) { crm_debug("Watchdog may be enabled but " PCMK_OPT_STONITH_WATCHDOG_TIMEOUT " is disabled (%s)", value? value : "default"); } else if (pcmk__locate_sbd() == 0) { crm_emerg("Shutting down: " PCMK_OPT_STONITH_WATCHDOG_TIMEOUT " configured (%s) but SBD not active", pcmk__s(value, "auto")); crm_exit(CRM_EX_FATAL); return false; } else { long sbd_timeout = pcmk__get_sbd_watchdog_timeout(); if (st_timeout < sbd_timeout) { crm_emerg("Shutting down: " PCMK_OPT_STONITH_WATCHDOG_TIMEOUT " (%s) too short (must be >%ldms)", value, sbd_timeout); crm_exit(CRM_EX_FATAL); return false; } crm_info("Watchdog configured with " PCMK_OPT_STONITH_WATCHDOG_TIMEOUT " %s and SBD timeout %ldms", value, sbd_timeout); } return true; } diff --git a/lib/lrmd/proxy_common.c b/lib/lrmd/proxy_common.c index c85cb54d14..35d272a919 100644 --- a/lib/lrmd/proxy_common.c +++ b/lib/lrmd/proxy_common.c @@ -1,327 +1,327 @@ /* * Copyright 2015-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include int lrmd_internal_proxy_send(lrmd_t * lrmd, xmlNode *msg); GHashTable *proxy_table = NULL; static void remote_proxy_notify_destroy(lrmd_t *lrmd, const char *session_id) { /* sending to the remote node that an ipc connection has been destroyed */ xmlNode *msg = pcmk__xe_create(NULL, PCMK__XE_LRMD_IPC_PROXY); crm_xml_add(msg, PCMK__XA_LRMD_IPC_OP, LRMD_IPC_OP_DESTROY); crm_xml_add(msg, PCMK__XA_LRMD_IPC_SESSION, session_id); lrmd_internal_proxy_send(lrmd, msg); pcmk__xml_free(msg); } /*! * \internal * \brief Acknowledge a remote proxy shutdown request * * \param[in,out] lrmd Connection to proxy */ void remote_proxy_ack_shutdown(lrmd_t *lrmd) { xmlNode *msg = pcmk__xe_create(NULL, PCMK__XE_LRMD_IPC_PROXY); crm_xml_add(msg, PCMK__XA_LRMD_IPC_OP, LRMD_IPC_OP_SHUTDOWN_ACK); lrmd_internal_proxy_send(lrmd, msg); pcmk__xml_free(msg); } /*! * \internal * \brief Reject a remote proxy shutdown request * * \param[in,out] lrmd Connection to proxy */ void remote_proxy_nack_shutdown(lrmd_t *lrmd) { xmlNode *msg = pcmk__xe_create(NULL, PCMK__XE_LRMD_IPC_PROXY); crm_xml_add(msg, PCMK__XA_LRMD_IPC_OP, LRMD_IPC_OP_SHUTDOWN_NACK); lrmd_internal_proxy_send(lrmd, msg); pcmk__xml_free(msg); } void remote_proxy_relay_event(remote_proxy_t *proxy, xmlNode *msg) { /* sending to the remote node an event msg. */ xmlNode *event = pcmk__xe_create(NULL, PCMK__XE_LRMD_IPC_PROXY); xmlNode *wrapper = NULL; crm_xml_add(event, PCMK__XA_LRMD_IPC_OP, LRMD_IPC_OP_EVENT); crm_xml_add(event, PCMK__XA_LRMD_IPC_SESSION, proxy->session_id); wrapper = pcmk__xe_create(event, PCMK__XE_LRMD_IPC_MSG); pcmk__xml_copy(wrapper, msg); crm_log_xml_explicit(event, "EventForProxy"); lrmd_internal_proxy_send(proxy->lrm, event); pcmk__xml_free(event); } void remote_proxy_relay_response(remote_proxy_t *proxy, xmlNode *msg, int msg_id) { /* sending to the remote node a response msg. */ xmlNode *response = pcmk__xe_create(NULL, PCMK__XE_LRMD_IPC_PROXY); xmlNode *wrapper = NULL; crm_xml_add(response, PCMK__XA_LRMD_IPC_OP, LRMD_IPC_OP_RESPONSE); crm_xml_add(response, PCMK__XA_LRMD_IPC_SESSION, proxy->session_id); crm_xml_add_int(response, PCMK__XA_LRMD_IPC_MSG_ID, msg_id); wrapper = pcmk__xe_create(response, PCMK__XE_LRMD_IPC_MSG); pcmk__xml_copy(wrapper, msg); lrmd_internal_proxy_send(proxy->lrm, response); pcmk__xml_free(response); } static void remote_proxy_end_session(remote_proxy_t *proxy) { if (proxy == NULL) { return; } crm_trace("ending session ID %s", proxy->session_id); if (proxy->source) { mainloop_del_ipc_client(proxy->source); } } void remote_proxy_free(gpointer data) { remote_proxy_t *proxy = data; crm_trace("freed proxy session ID %s", proxy->session_id); free(proxy->node_name); free(proxy->session_id); free(proxy); } int remote_proxy_dispatch(const char *buffer, ssize_t length, gpointer userdata) { - // Async responses from cib and friends to clients via pacemaker-remoted + // Async responses from servers to clients via the remote executor xmlNode *xml = NULL; uint32_t flags = 0; remote_proxy_t *proxy = userdata; xml = pcmk__xml_parse(buffer); if (xml == NULL) { crm_warn("Received a NULL msg from IPC service."); return 1; } flags = crm_ipc_buffer_flags(proxy->ipc); if (flags & crm_ipc_proxied_relay_response) { crm_trace("Passing response back to %.8s on %s: %.200s - request id: %d", proxy->session_id, proxy->node_name, buffer, proxy->last_request_id); remote_proxy_relay_response(proxy, xml, proxy->last_request_id); proxy->last_request_id = 0; } else { crm_trace("Passing event back to %.8s on %s: %.200s", proxy->session_id, proxy->node_name, buffer); remote_proxy_relay_event(proxy, xml); } pcmk__xml_free(xml); return 1; } void remote_proxy_disconnected(gpointer userdata) { remote_proxy_t *proxy = userdata; crm_trace("destroying %p", proxy); proxy->source = NULL; proxy->ipc = NULL; if(proxy->lrm) { remote_proxy_notify_destroy(proxy->lrm, proxy->session_id); proxy->lrm = NULL; } g_hash_table_remove(proxy_table, proxy->session_id); } remote_proxy_t * remote_proxy_new(lrmd_t *lrmd, struct ipc_client_callbacks *proxy_callbacks, const char *node_name, const char *session_id, const char *channel) { remote_proxy_t *proxy = NULL; if(channel == NULL) { crm_err("No channel specified to proxy"); remote_proxy_notify_destroy(lrmd, session_id); return NULL; } proxy = pcmk__assert_alloc(1, sizeof(remote_proxy_t)); proxy->node_name = strdup(node_name); proxy->session_id = strdup(session_id); proxy->lrm = lrmd; if ((pcmk__parse_server(crm_system_name) == pcmk_ipc_controld) && (pcmk__parse_server(channel) == pcmk_ipc_controld)) { // The controller doesn't need to connect to itself proxy->is_local = TRUE; } else { proxy->source = mainloop_add_ipc_client(channel, G_PRIORITY_LOW, 0, proxy, proxy_callbacks); proxy->ipc = mainloop_get_ipc_client(proxy->source); if (proxy->source == NULL) { remote_proxy_free(proxy); remote_proxy_notify_destroy(lrmd, session_id); return NULL; } } crm_trace("new remote proxy client established to %s on %s, session id %s", channel, node_name, session_id); g_hash_table_insert(proxy_table, proxy->session_id, proxy); return proxy; } void remote_proxy_cb(lrmd_t *lrmd, const char *node_name, xmlNode *msg) { const char *op = crm_element_value(msg, PCMK__XA_LRMD_IPC_OP); const char *session = crm_element_value(msg, PCMK__XA_LRMD_IPC_SESSION); remote_proxy_t *proxy = g_hash_table_lookup(proxy_table, session); int msg_id = 0; /* sessions are raw ipc connections to IPC, * all we do is proxy requests/responses exactly * like they are given to us at the ipc level. */ CRM_CHECK(op != NULL, return); CRM_CHECK(session != NULL, return); crm_element_value_int(msg, PCMK__XA_LRMD_IPC_MSG_ID, &msg_id); /* This is msg from remote ipc client going to real ipc server */ if (pcmk__str_eq(op, LRMD_IPC_OP_DESTROY, pcmk__str_casei)) { remote_proxy_end_session(proxy); } else if (pcmk__str_eq(op, LRMD_IPC_OP_REQUEST, pcmk__str_casei)) { int flags = 0; const char *name = crm_element_value(msg, PCMK__XA_LRMD_IPC_CLIENT); xmlNode *wrapper = pcmk__xe_first_child(msg, PCMK__XE_LRMD_IPC_MSG, NULL, NULL); xmlNode *request = pcmk__xe_first_child(wrapper, NULL, NULL, NULL); CRM_CHECK(request != NULL, return); if (proxy == NULL) { /* proxy connection no longer exists */ remote_proxy_notify_destroy(lrmd, session); return; } // Controller requests MUST be handled by the controller, not us CRM_CHECK(proxy->is_local == FALSE, remote_proxy_end_session(proxy); return); if (!crm_ipc_connected(proxy->ipc)) { remote_proxy_end_session(proxy); return; } proxy->last_request_id = 0; crm_element_value_int(msg, PCMK__XA_LRMD_IPC_MSG_FLAGS, &flags); crm_xml_add(request, PCMK_XE_ACL_ROLE, "pacemaker-remote"); CRM_ASSERT(node_name); pcmk__update_acl_user(request, PCMK__XA_LRMD_IPC_USER, node_name); if (pcmk_is_set(flags, crm_ipc_proxied)) { const char *type = crm_element_value(request, PCMK__XA_T); int rc = 0; if (pcmk__str_eq(type, PCMK__VALUE_ATTRD, pcmk__str_none) && (crm_element_value(request, PCMK__XA_ATTR_HOST) == NULL) && pcmk__str_any_of(crm_element_value(request, PCMK_XA_TASK), PCMK__ATTRD_CMD_UPDATE, PCMK__ATTRD_CMD_UPDATE_BOTH, PCMK__ATTRD_CMD_UPDATE_DELAY, NULL)) { pcmk__xe_add_node(request, proxy->node_name, 0); } rc = crm_ipc_send(proxy->ipc, request, flags, 5000, NULL); if(rc < 0) { xmlNode *op_reply = pcmk__xe_create(NULL, PCMK__XE_NACK); crm_err("Could not relay %s request %d from %s to %s for %s: %s (%d)", op, msg_id, proxy->node_name, crm_ipc_name(proxy->ipc), name, pcmk_strerror(rc), rc); /* Send a n'ack so the caller doesn't block */ crm_xml_add(op_reply, PCMK_XA_FUNCTION, __func__); crm_xml_add_int(op_reply, PCMK__XA_LINE, __LINE__); crm_xml_add_int(op_reply, PCMK_XA_RC, rc); remote_proxy_relay_response(proxy, op_reply, msg_id); pcmk__xml_free(op_reply); } else { crm_trace("Relayed %s request %d from %s to %s for %s", op, msg_id, proxy->node_name, crm_ipc_name(proxy->ipc), name); proxy->last_request_id = msg_id; } } else { int rc = pcmk_ok; xmlNode *op_reply = NULL; // @COMPAT pacemaker_remoted <= 1.1.10 crm_trace("Relaying %s request %d from %s to %s for %s", op, msg_id, proxy->node_name, crm_ipc_name(proxy->ipc), name); rc = crm_ipc_send(proxy->ipc, request, flags, 10000, &op_reply); if(rc < 0) { crm_err("Could not relay %s request %d from %s to %s for %s: %s (%d)", op, msg_id, proxy->node_name, crm_ipc_name(proxy->ipc), name, pcmk_strerror(rc), rc); } else { crm_trace("Relayed %s request %d from %s to %s for %s", op, msg_id, proxy->node_name, crm_ipc_name(proxy->ipc), name); } if(op_reply) { remote_proxy_relay_response(proxy, op_reply, msg_id); pcmk__xml_free(op_reply); } } } else { crm_err("Unknown proxy operation: %s", op); } } diff --git a/lib/pacemaker/pcmk_cluster_queries.c b/lib/pacemaker/pcmk_cluster_queries.c index 027c69511f..b125f6b36f 100644 --- a/lib/pacemaker/pcmk_cluster_queries.c +++ b/lib/pacemaker/pcmk_cluster_queries.c @@ -1,902 +1,902 @@ /* * Copyright 2020-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include // xmlNode #include #include #include #include #include #include #include #include #include #include #include //! Object to store node info from the controller API typedef struct { /* Adapted from pcmk_controld_api_reply_t:data:node_info. * (char **) are convenient here for use within callbacks: we can skip * copying strings unless the caller passes a non-NULL value. */ uint32_t id; char **node_name; char **uuid; char **state; bool have_quorum; bool is_remote; } node_info_t; //! Object to store API results, a timeout, and an output object typedef struct { pcmk__output_t *out; bool show_output; int rc; unsigned int message_timeout_ms; enum pcmk_pacemakerd_state pcmkd_state; node_info_t node_info; } data_t; /*! * \internal * \brief Validate that an IPC API event is a good reply * * \param[in,out] data API results and options * \param[in] api IPC API connection * \param[in] event_type Type of event that occurred * \param[in] status Event status * * \return Standard Pacemaker return code */ static int validate_reply_event(data_t *data, const pcmk_ipc_api_t *api, enum pcmk_ipc_event event_type, crm_exit_t status) { pcmk__output_t *out = data->out; switch (event_type) { case pcmk_ipc_event_reply: break; case pcmk_ipc_event_disconnect: if (data->rc == ECONNRESET) { // Unexpected out->err(out, "error: Lost connection to %s", pcmk_ipc_name(api, true)); } // Nothing bad but not the reply we're looking for return ENOTSUP; default: // Ditto return ENOTSUP; } if (status != CRM_EX_OK) { out->err(out, "error: Bad reply from %s: %s", pcmk_ipc_name(api, true), crm_exit_str(status)); data->rc = EBADMSG; return data->rc; } return pcmk_rc_ok; } /*! * \internal * \brief Validate that a controller API event is a good reply of expected type * * \param[in,out] data API results and options * \param[in] api Controller connection * \param[in] event_type Type of event that occurred * \param[in] status Event status * \param[in] event_data Event-specific data * \param[in] expected_type Expected reply type * * \return Standard Pacemaker return code */ static int validate_controld_reply(data_t *data, const pcmk_ipc_api_t *api, enum pcmk_ipc_event event_type, crm_exit_t status, const void *event_data, enum pcmk_controld_api_reply expected_type) { pcmk__output_t *out = data->out; int rc = pcmk_rc_ok; const pcmk_controld_api_reply_t *reply = NULL; rc = validate_reply_event(data, api, event_type, status); if (rc != pcmk_rc_ok) { return rc; } reply = (const pcmk_controld_api_reply_t *) event_data; if (reply->reply_type != expected_type) { out->err(out, "error: Unexpected reply type '%s' from controller", pcmk__controld_api_reply2str(reply->reply_type)); data->rc = EBADMSG; return data->rc; } return pcmk_rc_ok; } /*! * \internal * \brief Validate that a \p pacemakerd API event is a good reply of expected * type * * \param[in,out] data API results and options * \param[in] api \p pacemakerd connection * \param[in] event_type Type of event that occurred * \param[in] status Event status * \param[in] event_data Event-specific data * \param[in] expected_type Expected reply type * * \return Standard Pacemaker return code */ static int validate_pcmkd_reply(data_t *data, const pcmk_ipc_api_t *api, enum pcmk_ipc_event event_type, crm_exit_t status, const void *event_data, enum pcmk_pacemakerd_api_reply expected_type) { pcmk__output_t *out = data->out; const pcmk_pacemakerd_api_reply_t *reply = NULL; int rc = validate_reply_event(data, api, event_type, status); if (rc != pcmk_rc_ok) { return rc; } reply = (const pcmk_pacemakerd_api_reply_t *) event_data; if (reply->reply_type != expected_type) { out->err(out, "error: Unexpected reply type '%s' from pacemakerd", pcmk__pcmkd_api_reply2str(reply->reply_type)); data->rc = EBADMSG; return data->rc; } return pcmk_rc_ok; } /*! * \internal * \brief Process a controller status IPC event * * \param[in,out] controld_api Controller connection * \param[in] event_type Type of event that occurred * \param[in] status Event status * \param[in,out] event_data \p pcmk_controld_api_reply_t object containing * event-specific data * \param[in,out] user_data \p data_t object for API results and options */ static void controller_status_event_cb(pcmk_ipc_api_t *controld_api, enum pcmk_ipc_event event_type, crm_exit_t status, void *event_data, void *user_data) { data_t *data = (data_t *) user_data; pcmk__output_t *out = data->out; const pcmk_controld_api_reply_t *reply = NULL; int rc = validate_controld_reply(data, controld_api, event_type, status, event_data, pcmk_controld_reply_ping); if (rc != pcmk_rc_ok) { return; } reply = (const pcmk_controld_api_reply_t *) event_data; out->message(out, "health", reply->data.ping.sys_from, reply->host_from, reply->data.ping.fsa_state, reply->data.ping.result); data->rc = pcmk_rc_ok; } /*! * \internal * \brief Process a designated controller IPC event * * \param[in,out] controld_api Controller connection * \param[in] event_type Type of event that occurred * \param[in] status Event status * \param[in,out] event_data \p pcmk_controld_api_reply_t object containing * event-specific data * \param[in,out] user_data \p data_t object for API results and options */ static void designated_controller_event_cb(pcmk_ipc_api_t *controld_api, enum pcmk_ipc_event event_type, crm_exit_t status, void *event_data, void *user_data) { data_t *data = (data_t *) user_data; pcmk__output_t *out = data->out; const pcmk_controld_api_reply_t *reply = NULL; int rc = validate_controld_reply(data, controld_api, event_type, status, event_data, pcmk_controld_reply_ping); if (rc != pcmk_rc_ok) { return; } reply = (const pcmk_controld_api_reply_t *) event_data; out->message(out, "dc", reply->host_from); data->rc = pcmk_rc_ok; } /*! * \internal * \brief Process a node info IPC event * * \param[in,out] controld_api Controller connection * \param[in] event_type Type of event that occurred * \param[in] status Event status * \param[in,out] event_data \p pcmk_controld_api_reply_t object containing * event-specific data * \param[in,out] user_data \p data_t object for API results and options */ static void node_info_event_cb(pcmk_ipc_api_t *controld_api, enum pcmk_ipc_event event_type, crm_exit_t status, void *event_data, void *user_data) { data_t *data = (data_t *) user_data; pcmk__output_t *out = data->out; const pcmk_controld_api_reply_t *reply = NULL; int rc = validate_controld_reply(data, controld_api, event_type, status, event_data, pcmk_controld_reply_info); if (rc != pcmk_rc_ok) { return; } reply = (const pcmk_controld_api_reply_t *) event_data; if (reply->data.node_info.uname == NULL) { out->err(out, "Node is not known to cluster"); data->rc = pcmk_rc_node_unknown; return; } data->node_info.have_quorum = reply->data.node_info.have_quorum; data->node_info.is_remote = reply->data.node_info.is_remote; data->node_info.id = (uint32_t) reply->data.node_info.id; pcmk__str_update(data->node_info.node_name, reply->data.node_info.uname); pcmk__str_update(data->node_info.uuid, reply->data.node_info.uuid); pcmk__str_update(data->node_info.state, reply->data.node_info.state); if (data->show_output) { out->message(out, "node-info", (uint32_t) reply->data.node_info.id, reply->data.node_info.uname, reply->data.node_info.uuid, reply->data.node_info.state, reply->data.node_info.have_quorum, reply->data.node_info.is_remote); } data->rc = pcmk_rc_ok; } /*! * \internal * \brief Process a \p pacemakerd status IPC event * * \param[in,out] pacemakerd_api \p pacemakerd connection * \param[in] event_type Type of event that occurred * \param[in] status Event status * \param[in,out] event_data \p pcmk_pacemakerd_api_reply_t object * containing event-specific data * \param[in,out] user_data \p data_t object for API results and options */ static void pacemakerd_event_cb(pcmk_ipc_api_t *pacemakerd_api, enum pcmk_ipc_event event_type, crm_exit_t status, void *event_data, void *user_data) { data_t *data = user_data; pcmk__output_t *out = data->out; const pcmk_pacemakerd_api_reply_t *reply = NULL; int rc = validate_pcmkd_reply(data, pacemakerd_api, event_type, status, event_data, pcmk_pacemakerd_reply_ping); if (rc != pcmk_rc_ok) { return; } // Parse desired information from reply reply = (const pcmk_pacemakerd_api_reply_t *) event_data; data->pcmkd_state = reply->data.ping.state; data->rc = pcmk_rc_ok; if (!data->show_output) { return; } if (reply->data.ping.status == pcmk_rc_ok) { out->message(out, "pacemakerd-health", reply->data.ping.sys_from, reply->data.ping.state, NULL, reply->data.ping.last_good); } else { out->message(out, "pacemakerd-health", reply->data.ping.sys_from, reply->data.ping.state, "query failed", time(NULL)); } } static pcmk_ipc_api_t * ipc_connect(data_t *data, enum pcmk_ipc_server server, pcmk_ipc_callback_t cb, enum pcmk_ipc_dispatch dispatch_type, bool eremoteio_ok) { int rc; pcmk__output_t *out = data->out; pcmk_ipc_api_t *api = NULL; rc = pcmk_new_ipc_api(&api, server); if (api == NULL) { out->err(out, "error: Could not connect to %s: %s", pcmk_ipc_name(api, true), pcmk_rc_str(rc)); data->rc = rc; return NULL; } if (cb != NULL) { pcmk_register_ipc_callback(api, cb, data); } rc = pcmk__connect_ipc(api, dispatch_type, 5); if (rc != pcmk_rc_ok) { if (rc == EREMOTEIO) { data->pcmkd_state = pcmk_pacemakerd_state_remote; if (eremoteio_ok) { /* EREMOTEIO may be expected and acceptable for some callers * on a Pacemaker Remote node */ crm_debug("Ignoring %s connection failure: No " "Pacemaker Remote connection", pcmk_ipc_name(api, true)); rc = pcmk_rc_ok; } else { out->err(out, "error: Could not connect to %s: %s", pcmk_ipc_name(api, true), pcmk_rc_str(rc)); } } data->rc = rc; pcmk_free_ipc_api(api); return NULL; } return api; } /*! * \internal * \brief Poll an IPC API connection until timeout or a reply is received * * \param[in,out] data API results and options * \param[in,out] api IPC API connection * \param[in] on_node If not \p NULL, name of the node to poll (used only * for logging) * * \note Sets the \p rc member of \p data on error */ static void poll_until_reply(data_t *data, pcmk_ipc_api_t *api, const char *on_node) { pcmk__output_t *out = data->out; uint64_t start_nsec = qb_util_nano_current_get(); uint64_t end_nsec = 0; uint64_t elapsed_ms = 0; uint64_t remaining_ms = data->message_timeout_ms; while (remaining_ms > 0) { int rc = pcmk_poll_ipc(api, remaining_ms); if (rc == EAGAIN) { // Poll timed out break; } if (rc != pcmk_rc_ok) { out->err(out, "error: Failed to poll %s API%s%s: %s", pcmk_ipc_name(api, true), (on_node != NULL)? " on " : "", pcmk__s(on_node, ""), pcmk_rc_str(rc)); data->rc = rc; return; } pcmk_dispatch_ipc(api); if (data->rc != EAGAIN) { // Received a reply return; } end_nsec = qb_util_nano_current_get(); elapsed_ms = (end_nsec - start_nsec) / QB_TIME_NS_IN_MSEC; remaining_ms = data->message_timeout_ms - elapsed_ms; } out->err(out, "error: Timed out after %ums waiting for reply from %s API%s%s", data->message_timeout_ms, pcmk_ipc_name(api, true), (on_node != NULL)? " on " : "", pcmk__s(on_node, "")); data->rc = EAGAIN; } /*! * \internal * \brief Get and output controller status * * \param[in,out] out Output object * \param[in] node_name Name of node whose status is desired * (\p NULL for DC) * \param[in] message_timeout_ms How long to wait for a reply from the - * \p pacemaker-controld API. If 0, + * the controller API. If 0, * \p pcmk_ipc_dispatch_sync will be used. * Otherwise, \p pcmk_ipc_dispatch_poll will * be used. * * \return Standard Pacemaker return code */ int pcmk__controller_status(pcmk__output_t *out, const char *node_name, unsigned int message_timeout_ms) { data_t data = { .out = out, .rc = EAGAIN, .message_timeout_ms = message_timeout_ms, }; enum pcmk_ipc_dispatch dispatch_type = pcmk_ipc_dispatch_poll; pcmk_ipc_api_t *controld_api = NULL; if (message_timeout_ms == 0) { dispatch_type = pcmk_ipc_dispatch_sync; } controld_api = ipc_connect(&data, pcmk_ipc_controld, controller_status_event_cb, dispatch_type, false); if (controld_api != NULL) { int rc = pcmk_controld_api_ping(controld_api, node_name); if (rc != pcmk_rc_ok) { out->err(out, "error: Could not ping controller API on %s: %s", pcmk__s(node_name, "DC"), pcmk_rc_str(rc)); data.rc = rc; } if (dispatch_type == pcmk_ipc_dispatch_poll) { poll_until_reply(&data, controld_api, pcmk__s(node_name, "DC")); } pcmk_free_ipc_api(controld_api); } return data.rc; } // Documented in header int pcmk_controller_status(xmlNodePtr *xml, const char *node_name, unsigned int message_timeout_ms) { pcmk__output_t *out = NULL; int rc = pcmk_rc_ok; rc = pcmk__xml_output_new(&out, xml); if (rc != pcmk_rc_ok) { return rc; } pcmk__register_lib_messages(out); rc = pcmk__controller_status(out, node_name, message_timeout_ms); pcmk__xml_output_finish(out, pcmk_rc2exitc(rc), xml); return rc; } /*! * \internal * \brief Get and output designated controller node name * * \param[in,out] out Output object * \param[in] message_timeout_ms How long to wait for a reply from the - * \p pacemaker-controld API. If 0, + * the controller API. If 0, * \p pcmk_ipc_dispatch_sync will be used. * Otherwise, \p pcmk_ipc_dispatch_poll will * be used. * * \return Standard Pacemaker return code */ int pcmk__designated_controller(pcmk__output_t *out, unsigned int message_timeout_ms) { data_t data = { .out = out, .rc = EAGAIN, .message_timeout_ms = message_timeout_ms, }; enum pcmk_ipc_dispatch dispatch_type = pcmk_ipc_dispatch_poll; pcmk_ipc_api_t *controld_api = NULL; if (message_timeout_ms == 0) { dispatch_type = pcmk_ipc_dispatch_sync; } controld_api = ipc_connect(&data, pcmk_ipc_controld, designated_controller_event_cb, dispatch_type, false); if (controld_api != NULL) { int rc = pcmk_controld_api_ping(controld_api, NULL); if (rc != pcmk_rc_ok) { out->err(out, "error: Could not ping controller API on DC: %s", pcmk_rc_str(rc)); data.rc = rc; } if (dispatch_type == pcmk_ipc_dispatch_poll) { poll_until_reply(&data, controld_api, "DC"); } pcmk_free_ipc_api(controld_api); } return data.rc; } // Documented in header int pcmk_designated_controller(xmlNodePtr *xml, unsigned int message_timeout_ms) { pcmk__output_t *out = NULL; int rc = pcmk_rc_ok; rc = pcmk__xml_output_new(&out, xml); if (rc != pcmk_rc_ok) { return rc; } pcmk__register_lib_messages(out); rc = pcmk__designated_controller(out, message_timeout_ms); pcmk__xml_output_finish(out, pcmk_rc2exitc(rc), xml); return rc; } /*! * \internal * \brief Get and optionally output node info corresponding to a node ID from * the controller * * \param[in,out] out Output object * \param[in,out] node_id ID of node whose info to get. If \p NULL * or 0, get the local node's info. If not * \c NULL, store the true node ID here on * success. * \param[out] node_name If not \c NULL, where to store the node * name * \param[out] uuid If not \c NULL, where to store the node * UUID * \param[out] state If not \c NULL, where to store the * membership state * \param[out] is_remote If not \c NULL, where to store whether the * node is a Pacemaker Remote node * \param[out] have_quorum If not \c NULL, where to store whether the * node has quorum * \param[in] show_output Whether to show the node info * \param[in] message_timeout_ms How long to wait for a reply from the - * \c pacemaker-controld API. If 0, + * the controller API. If 0, * \c pcmk_ipc_dispatch_sync will be used. * Otherwise, \c pcmk_ipc_dispatch_poll will * be used. * * \return Standard Pacemaker return code * * \note The caller is responsible for freeing \p *node_name, \p *uuid, and * \p *state using \p free(). */ int pcmk__query_node_info(pcmk__output_t *out, uint32_t *node_id, char **node_name, char **uuid, char **state, bool *have_quorum, bool *is_remote, bool show_output, unsigned int message_timeout_ms) { data_t data = { .out = out, .show_output = show_output, .rc = EAGAIN, .message_timeout_ms = message_timeout_ms, .node_info = { .id = (node_id == NULL)? 0 : *node_id, .node_name = node_name, .uuid = uuid, .state = state, }, }; enum pcmk_ipc_dispatch dispatch_type = pcmk_ipc_dispatch_poll; pcmk_ipc_api_t *controld_api = NULL; if (node_name != NULL) { *node_name = NULL; } if (uuid != NULL) { *uuid = NULL; } if (state != NULL) { *state = NULL; } if (message_timeout_ms == 0) { dispatch_type = pcmk_ipc_dispatch_sync; } controld_api = ipc_connect(&data, pcmk_ipc_controld, node_info_event_cb, dispatch_type, false); if (controld_api != NULL) { int rc = pcmk_controld_api_node_info(controld_api, (node_id != NULL)? *node_id : 0); if (rc != pcmk_rc_ok) { out->err(out, "error: Could not send request to controller API on local " "node: %s", pcmk_rc_str(rc)); data.rc = rc; } if (dispatch_type == pcmk_ipc_dispatch_poll) { poll_until_reply(&data, controld_api, "local node"); } pcmk_free_ipc_api(controld_api); } if (data.rc != pcmk_rc_ok) { return data.rc; } // String outputs are set in callback if (node_id != NULL) { *node_id = data.node_info.id; } if (have_quorum != NULL) { *have_quorum = data.node_info.have_quorum; } if (is_remote != NULL) { *is_remote = data.node_info.is_remote; } return data.rc; } // Documented in header int pcmk_query_node_info(xmlNodePtr *xml, uint32_t *node_id, char **node_name, char **uuid, char **state, bool *have_quorum, bool *is_remote, bool show_output, unsigned int message_timeout_ms) { pcmk__output_t *out = NULL; int rc = pcmk_rc_ok; CRM_ASSERT(node_name != NULL); rc = pcmk__xml_output_new(&out, xml); if (rc != pcmk_rc_ok) { return rc; } pcmk__register_lib_messages(out); rc = pcmk__query_node_info(out, node_id, node_name, uuid, state, have_quorum, is_remote, show_output, message_timeout_ms); pcmk__xml_output_finish(out, pcmk_rc2exitc(rc), xml); return rc; } /*! * \internal * \brief Get and optionally output \p pacemakerd status * * \param[in,out] out Output object * \param[in] ipc_name IPC name for request * \param[in] message_timeout_ms How long to wait for a reply from the * \p pacemakerd API. If 0, * \p pcmk_ipc_dispatch_sync will be used. * Otherwise, \p pcmk_ipc_dispatch_poll will * be used. * \param[in] show_output Whether to output the \p pacemakerd state * \param[out] state Where to store the \p pacemakerd state, if * not \p NULL * * \return Standard Pacemaker return code * * \note This function sets \p state to \p pcmk_pacemakerd_state_remote and * returns \p pcmk_rc_ok if the IPC connection attempt returns * \p EREMOTEIO. That code indicates that this is a Pacemaker Remote node - * with \p pacemaker-remoted running. The node may be connected to the + * with the remote executor running. The node may be connected to the * cluster. */ int pcmk__pacemakerd_status(pcmk__output_t *out, const char *ipc_name, unsigned int message_timeout_ms, bool show_output, enum pcmk_pacemakerd_state *state) { data_t data = { .out = out, .show_output = show_output, .rc = EAGAIN, .message_timeout_ms = message_timeout_ms, .pcmkd_state = pcmk_pacemakerd_state_invalid, }; enum pcmk_ipc_dispatch dispatch_type = pcmk_ipc_dispatch_poll; pcmk_ipc_api_t *pacemakerd_api = NULL; if (message_timeout_ms == 0) { dispatch_type = pcmk_ipc_dispatch_sync; } pacemakerd_api = ipc_connect(&data, pcmk_ipc_pacemakerd, pacemakerd_event_cb, dispatch_type, true); if (pacemakerd_api != NULL) { int rc = pcmk_pacemakerd_api_ping(pacemakerd_api, ipc_name); if (rc != pcmk_rc_ok) { out->err(out, "error: Could not ping launcher API: %s", pcmk_rc_str(rc)); data.rc = rc; } if (dispatch_type == pcmk_ipc_dispatch_poll) { poll_until_reply(&data, pacemakerd_api, NULL); } pcmk_free_ipc_api(pacemakerd_api); } else if ((data.pcmkd_state == pcmk_pacemakerd_state_remote) && show_output) { // No API connection so the callback wasn't run out->message(out, "pacemakerd-health", NULL, data.pcmkd_state, NULL, time(NULL)); } if (state != NULL) { *state = data.pcmkd_state; } return data.rc; } // Documented in header int pcmk_pacemakerd_status(xmlNodePtr *xml, const char *ipc_name, unsigned int message_timeout_ms) { pcmk__output_t *out = NULL; int rc = pcmk_rc_ok; rc = pcmk__xml_output_new(&out, xml); if (rc != pcmk_rc_ok) { return rc; } pcmk__register_lib_messages(out); rc = pcmk__pacemakerd_status(out, ipc_name, message_timeout_ms, true, NULL); pcmk__xml_output_finish(out, pcmk_rc2exitc(rc), xml); return rc; } /* user data for looping through remote node xpath searches */ struct node_data { pcmk__output_t *out; int found; const char *field; /* XML attribute to check for node name */ const char *type; bool bash_export; }; static void remote_node_print_helper(xmlNode *result, void *user_data) { struct node_data *data = user_data; pcmk__output_t *out = data->out; const char *name = crm_element_value(result, PCMK_XA_UNAME); const char *id = crm_element_value(result, data->field); // node name and node id are the same for remote/guest nodes out->message(out, "crmadmin-node", data->type, pcmk__s(name, id), id, data->bash_export); data->found++; } // \return Standard Pacemaker return code int pcmk__list_nodes(pcmk__output_t *out, const char *node_types, bool bash_export) { xmlNode *xml_node = NULL; int rc; rc = cib__signon_query(out, NULL, &xml_node); if (rc == pcmk_rc_ok) { struct node_data data = { .out = out, .found = 0, .bash_export = bash_export }; /* PCMK_XE_NODES acts as the list's element name for CLI tools that * use pcmk__output_enable_list_element. Otherwise PCMK_XE_NODES is * the value of the list's PCMK_XA_NAME attribute. */ out->begin_list(out, NULL, NULL, PCMK_XE_NODES); if (!pcmk__str_empty(node_types) && strstr(node_types, "all")) { node_types = NULL; } if (pcmk__str_empty(node_types) || strstr(node_types, "cluster")) { data.field = PCMK_XA_ID; data.type = "cluster"; crm_foreach_xpath_result(xml_node, PCMK__XP_MEMBER_NODE_CONFIG, remote_node_print_helper, &data); } if (pcmk__str_empty(node_types) || strstr(node_types, "guest")) { data.field = PCMK_XA_VALUE; data.type = "guest"; crm_foreach_xpath_result(xml_node, PCMK__XP_GUEST_NODE_CONFIG, remote_node_print_helper, &data); } if (pcmk__str_empty(node_types) || pcmk__str_eq(node_types, ",|^remote", pcmk__str_regex)) { data.field = PCMK_XA_ID; data.type = "remote"; crm_foreach_xpath_result(xml_node, PCMK__XP_REMOTE_NODE_CONFIG, remote_node_print_helper, &data); } out->end_list(out); if (data.found == 0) { out->info(out, "No nodes configured"); } pcmk__xml_free(xml_node); } return rc; } int pcmk_list_nodes(xmlNodePtr *xml, const char *node_types) { pcmk__output_t *out = NULL; int rc = pcmk_rc_ok; rc = pcmk__xml_output_new(&out, xml); if (rc != pcmk_rc_ok) { return rc; } pcmk__register_lib_messages(out); rc = pcmk__list_nodes(out, node_types, FALSE); pcmk__xml_output_finish(out, pcmk_rc2exitc(rc), xml); return rc; } diff --git a/lib/pacemaker/pcmk_output.c b/lib/pacemaker/pcmk_output.c index 5c09012742..e682f108a1 100644 --- a/lib/pacemaker/pcmk_output.c +++ b/lib/pacemaker/pcmk_output.c @@ -1,2715 +1,2715 @@ /* * Copyright 2019-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include // stonith__* #include #include #include #include #include static char * colocations_header(pcmk_resource_t *rsc, pcmk__colocation_t *cons, bool dependents) { char *retval = NULL; if (cons->primary_role > pcmk_role_started) { retval = crm_strdup_printf("%s (score=%s, %s role=%s, id=%s)", rsc->id, pcmk_readable_score(cons->score), (dependents? "needs" : "with"), pcmk_role_text(cons->primary_role), cons->id); } else { retval = crm_strdup_printf("%s (score=%s, id=%s)", rsc->id, pcmk_readable_score(cons->score), cons->id); } return retval; } static void colocations_xml_node(pcmk__output_t *out, pcmk_resource_t *rsc, pcmk__colocation_t *cons) { xmlNodePtr node = NULL; node = pcmk__output_create_xml_node(out, PCMK_XE_RSC_COLOCATION, PCMK_XA_ID, cons->id, PCMK_XA_RSC, cons->dependent->id, PCMK_XA_WITH_RSC, cons->primary->id, PCMK_XA_SCORE, pcmk_readable_score(cons->score), NULL); if (cons->node_attribute) { xmlSetProp(node, (pcmkXmlStr) PCMK_XA_NODE_ATTRIBUTE, (pcmkXmlStr) cons->node_attribute); } if (cons->dependent_role != pcmk_role_unknown) { xmlSetProp(node, (pcmkXmlStr) PCMK_XA_RSC_ROLE, (pcmkXmlStr) pcmk_role_text(cons->dependent_role)); } if (cons->primary_role != pcmk_role_unknown) { xmlSetProp(node, (pcmkXmlStr) PCMK_XA_WITH_RSC_ROLE, (pcmkXmlStr) pcmk_role_text(cons->primary_role)); } } static int do_locations_list_xml(pcmk__output_t *out, pcmk_resource_t *rsc, bool add_header) { GList *lpc = NULL; int rc = pcmk_rc_no_output; for (lpc = rsc->priv->location_constraints; lpc != NULL; lpc = lpc->next) { pcmk__location_t *cons = lpc->data; GList *lpc2 = NULL; for (lpc2 = cons->nodes; lpc2 != NULL; lpc2 = lpc2->next) { pcmk_node_t *node = (pcmk_node_t *) lpc2->data; if (add_header) { PCMK__OUTPUT_LIST_HEADER(out, false, rc, "locations"); } pcmk__output_create_xml_node(out, PCMK_XE_RSC_LOCATION, PCMK_XA_NODE, node->priv->name, PCMK_XA_RSC, rsc->id, PCMK_XA_ID, cons->id, PCMK_XA_SCORE, pcmk_readable_score(node->assign->score), NULL); } } if (add_header) { PCMK__OUTPUT_LIST_FOOTER(out, rc); } return rc; } PCMK__OUTPUT_ARGS("rsc-action-item", "const char *", "pcmk_resource_t *", "pcmk_node_t *", "pcmk_node_t *", "pcmk_action_t *", "pcmk_action_t *") static int rsc_action_item(pcmk__output_t *out, va_list args) { const char *change = va_arg(args, const char *); pcmk_resource_t *rsc = va_arg(args, pcmk_resource_t *); pcmk_node_t *origin = va_arg(args, pcmk_node_t *); pcmk_node_t *destination = va_arg(args, pcmk_node_t *); pcmk_action_t *action = va_arg(args, pcmk_action_t *); pcmk_action_t *source = va_arg(args, pcmk_action_t *); int len = 0; char *reason = NULL; char *details = NULL; bool same_host = false; bool same_role = false; bool need_role = false; static int rsc_width = 5; static int detail_width = 5; CRM_ASSERT(action); CRM_ASSERT(destination != NULL || origin != NULL); if (source == NULL) { source = action; } len = strlen(rsc->id); if (len > rsc_width) { rsc_width = len + 2; } if ((rsc->priv->orig_role > pcmk_role_started) || (rsc->priv->next_role > pcmk_role_unpromoted)) { need_role = true; } if (pcmk__same_node(origin, destination)) { same_host = true; } if (rsc->priv->orig_role == rsc->priv->next_role) { same_role = true; } if (need_role && (origin == NULL)) { /* Starting and promoting a promotable clone instance */ details = crm_strdup_printf("%s -> %s %s", pcmk_role_text(rsc->priv->orig_role), pcmk_role_text(rsc->priv->next_role), pcmk__node_name(destination)); } else if (origin == NULL) { /* Starting a resource */ details = crm_strdup_printf("%s", pcmk__node_name(destination)); } else if (need_role && (destination == NULL)) { /* Stopping a promotable clone instance */ details = crm_strdup_printf("%s %s", pcmk_role_text(rsc->priv->orig_role), pcmk__node_name(origin)); } else if (destination == NULL) { /* Stopping a resource */ details = crm_strdup_printf("%s", pcmk__node_name(origin)); } else if (need_role && same_role && same_host) { /* Recovering, restarting or re-promoting a promotable clone instance */ details = crm_strdup_printf("%s %s", pcmk_role_text(rsc->priv->orig_role), pcmk__node_name(origin)); } else if (same_role && same_host) { /* Recovering or Restarting a normal resource */ details = crm_strdup_printf("%s", pcmk__node_name(origin)); } else if (need_role && same_role) { /* Moving a promotable clone instance */ details = crm_strdup_printf("%s -> %s %s", pcmk__node_name(origin), pcmk__node_name(destination), pcmk_role_text(rsc->priv->orig_role)); } else if (same_role) { /* Moving a normal resource */ details = crm_strdup_printf("%s -> %s", pcmk__node_name(origin), pcmk__node_name(destination)); } else if (same_host) { /* Promoting or demoting a promotable clone instance */ details = crm_strdup_printf("%s -> %s %s", pcmk_role_text(rsc->priv->orig_role), pcmk_role_text(rsc->priv->next_role), pcmk__node_name(origin)); } else { /* Moving and promoting/demoting */ details = crm_strdup_printf("%s %s -> %s %s", pcmk_role_text(rsc->priv->orig_role), pcmk__node_name(origin), pcmk_role_text(rsc->priv->next_role), pcmk__node_name(destination)); } len = strlen(details); if (len > detail_width) { detail_width = len; } if ((source->reason != NULL) && !pcmk_is_set(action->flags, pcmk__action_runnable)) { reason = crm_strdup_printf("due to %s (blocked)", source->reason); } else if (source->reason) { reason = crm_strdup_printf("due to %s", source->reason); } else if (!pcmk_is_set(action->flags, pcmk__action_runnable)) { reason = strdup("blocked"); } out->list_item(out, NULL, "%-8s %-*s ( %*s )%s%s", change, rsc_width, rsc->id, detail_width, details, ((reason == NULL)? "" : " "), pcmk__s(reason, "")); free(details); free(reason); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("rsc-action-item", "const char *", "pcmk_resource_t *", "pcmk_node_t *", "pcmk_node_t *", "pcmk_action_t *", "pcmk_action_t *") static int rsc_action_item_xml(pcmk__output_t *out, va_list args) { const char *change = va_arg(args, const char *); pcmk_resource_t *rsc = va_arg(args, pcmk_resource_t *); pcmk_node_t *origin = va_arg(args, pcmk_node_t *); pcmk_node_t *destination = va_arg(args, pcmk_node_t *); pcmk_action_t *action = va_arg(args, pcmk_action_t *); pcmk_action_t *source = va_arg(args, pcmk_action_t *); char *change_str = NULL; bool same_host = false; bool same_role = false; bool need_role = false; xmlNode *xml = NULL; CRM_ASSERT(action); CRM_ASSERT(destination != NULL || origin != NULL); if (source == NULL) { source = action; } if ((rsc->priv->orig_role > pcmk_role_started) || (rsc->priv->next_role > pcmk_role_unpromoted)) { need_role = true; } if (pcmk__same_node(origin, destination)) { same_host = true; } if (rsc->priv->orig_role == rsc->priv->next_role) { same_role = true; } change_str = g_ascii_strdown(change, -1); xml = pcmk__output_create_xml_node(out, PCMK_XE_RSC_ACTION, PCMK_XA_ACTION, change_str, PCMK_XA_RESOURCE, rsc->id, NULL); g_free(change_str); if (need_role && (origin == NULL)) { /* Starting and promoting a promotable clone instance */ pcmk__xe_set_props(xml, PCMK_XA_ROLE, pcmk_role_text(rsc->priv->orig_role), PCMK_XA_NEXT_ROLE, pcmk_role_text(rsc->priv->next_role), PCMK_XA_DEST, destination->priv->name, NULL); } else if (origin == NULL) { /* Starting a resource */ crm_xml_add(xml, PCMK_XA_NODE, destination->priv->name); } else if (need_role && (destination == NULL)) { /* Stopping a promotable clone instance */ pcmk__xe_set_props(xml, PCMK_XA_ROLE, pcmk_role_text(rsc->priv->orig_role), PCMK_XA_NODE, origin->priv->name, NULL); } else if (destination == NULL) { /* Stopping a resource */ crm_xml_add(xml, PCMK_XA_NODE, origin->priv->name); } else if (need_role && same_role && same_host) { /* Recovering, restarting or re-promoting a promotable clone instance */ pcmk__xe_set_props(xml, PCMK_XA_ROLE, pcmk_role_text(rsc->priv->orig_role), PCMK_XA_SOURCE, origin->priv->name, NULL); } else if (same_role && same_host) { /* Recovering or Restarting a normal resource */ crm_xml_add(xml, PCMK_XA_SOURCE, origin->priv->name); } else if (need_role && same_role) { /* Moving a promotable clone instance */ pcmk__xe_set_props(xml, PCMK_XA_SOURCE, origin->priv->name, PCMK_XA_DEST, destination->priv->name, PCMK_XA_ROLE, pcmk_role_text(rsc->priv->orig_role), NULL); } else if (same_role) { /* Moving a normal resource */ pcmk__xe_set_props(xml, PCMK_XA_SOURCE, origin->priv->name, PCMK_XA_DEST, destination->priv->name, NULL); } else if (same_host) { /* Promoting or demoting a promotable clone instance */ pcmk__xe_set_props(xml, PCMK_XA_ROLE, pcmk_role_text(rsc->priv->orig_role), PCMK_XA_NEXT_ROLE, pcmk_role_text(rsc->priv->next_role), PCMK_XA_SOURCE, origin->priv->name, NULL); } else { /* Moving and promoting/demoting */ pcmk__xe_set_props(xml, PCMK_XA_ROLE, pcmk_role_text(rsc->priv->orig_role), PCMK_XA_SOURCE, origin->priv->name, PCMK_XA_NEXT_ROLE, pcmk_role_text(rsc->priv->next_role), PCMK_XA_DEST, destination->priv->name, NULL); } if ((source->reason != NULL) && !pcmk_is_set(action->flags, pcmk__action_runnable)) { pcmk__xe_set_props(xml, PCMK_XA_REASON, source->reason, PCMK_XA_BLOCKED, PCMK_VALUE_TRUE, NULL); } else if (source->reason != NULL) { crm_xml_add(xml, PCMK_XA_REASON, source->reason); } else if (!pcmk_is_set(action->flags, pcmk__action_runnable)) { pcmk__xe_set_bool_attr(xml, PCMK_XA_BLOCKED, true); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("rsc-is-colocated-with-list", "pcmk_resource_t *", "bool") static int rsc_is_colocated_with_list(pcmk__output_t *out, va_list args) { pcmk_resource_t *rsc = va_arg(args, pcmk_resource_t *); bool recursive = va_arg(args, int); int rc = pcmk_rc_no_output; if (pcmk_is_set(rsc->flags, pcmk__rsc_detect_loop)) { return rc; } /* We're listing constraints explicitly involving rsc, so use * rsc->private->this_with_colocations directly rather than call * rsc->private->cmds->this_with_colocations(). */ pcmk__set_rsc_flags(rsc, pcmk__rsc_detect_loop); for (GList *lpc = rsc->priv->this_with_colocations; lpc != NULL; lpc = lpc->next) { pcmk__colocation_t *cons = (pcmk__colocation_t *) lpc->data; char *hdr = NULL; PCMK__OUTPUT_LIST_HEADER(out, false, rc, "Resources %s is colocated with", rsc->id); if (pcmk_is_set(cons->primary->flags, pcmk__rsc_detect_loop)) { out->list_item(out, NULL, "%s (id=%s - loop)", cons->primary->id, cons->id); continue; } hdr = colocations_header(cons->primary, cons, false); out->list_item(out, NULL, "%s", hdr); free(hdr); // Empty list header for indentation of information about this resource out->begin_list(out, NULL, NULL, NULL); out->message(out, "locations-list", cons->primary); if (recursive) { out->message(out, "rsc-is-colocated-with-list", cons->primary, recursive); } out->end_list(out); } PCMK__OUTPUT_LIST_FOOTER(out, rc); return rc; } PCMK__OUTPUT_ARGS("rsc-is-colocated-with-list", "pcmk_resource_t *", "bool") static int rsc_is_colocated_with_list_xml(pcmk__output_t *out, va_list args) { pcmk_resource_t *rsc = va_arg(args, pcmk_resource_t *); bool recursive = va_arg(args, int); int rc = pcmk_rc_no_output; if (pcmk_is_set(rsc->flags, pcmk__rsc_detect_loop)) { return rc; } /* We're listing constraints explicitly involving rsc, so use * rsc->private->this_with_colocations directly rather than call * rsc->private->cmds->this_with_colocations(). */ pcmk__set_rsc_flags(rsc, pcmk__rsc_detect_loop); for (GList *lpc = rsc->priv->this_with_colocations; lpc != NULL; lpc = lpc->next) { pcmk__colocation_t *cons = (pcmk__colocation_t *) lpc->data; if (pcmk_is_set(cons->primary->flags, pcmk__rsc_detect_loop)) { colocations_xml_node(out, cons->primary, cons); continue; } colocations_xml_node(out, cons->primary, cons); do_locations_list_xml(out, cons->primary, false); if (recursive) { out->message(out, "rsc-is-colocated-with-list", cons->primary, recursive); } } return rc; } PCMK__OUTPUT_ARGS("rscs-colocated-with-list", "pcmk_resource_t *", "bool") static int rscs_colocated_with_list(pcmk__output_t *out, va_list args) { pcmk_resource_t *rsc = va_arg(args, pcmk_resource_t *); bool recursive = va_arg(args, int); int rc = pcmk_rc_no_output; if (pcmk_is_set(rsc->flags, pcmk__rsc_detect_loop)) { return rc; } /* We're listing constraints explicitly involving rsc, so use * rsc->private->with_this_colocations directly rather than * rsc->private->cmds->with_this_colocations(). */ pcmk__set_rsc_flags(rsc, pcmk__rsc_detect_loop); for (GList *lpc = rsc->priv->with_this_colocations; lpc != NULL; lpc = lpc->next) { pcmk__colocation_t *cons = (pcmk__colocation_t *) lpc->data; char *hdr = NULL; PCMK__OUTPUT_LIST_HEADER(out, false, rc, "Resources colocated with %s", rsc->id); if (pcmk_is_set(cons->dependent->flags, pcmk__rsc_detect_loop)) { out->list_item(out, NULL, "%s (id=%s - loop)", cons->dependent->id, cons->id); continue; } hdr = colocations_header(cons->dependent, cons, true); out->list_item(out, NULL, "%s", hdr); free(hdr); // Empty list header for indentation of information about this resource out->begin_list(out, NULL, NULL, NULL); out->message(out, "locations-list", cons->dependent); if (recursive) { out->message(out, "rscs-colocated-with-list", cons->dependent, recursive); } out->end_list(out); } PCMK__OUTPUT_LIST_FOOTER(out, rc); return rc; } PCMK__OUTPUT_ARGS("rscs-colocated-with-list", "pcmk_resource_t *", "bool") static int rscs_colocated_with_list_xml(pcmk__output_t *out, va_list args) { pcmk_resource_t *rsc = va_arg(args, pcmk_resource_t *); bool recursive = va_arg(args, int); int rc = pcmk_rc_no_output; if (pcmk_is_set(rsc->flags, pcmk__rsc_detect_loop)) { return rc; } /* We're listing constraints explicitly involving rsc, so use * rsc->private->with_this_colocations directly rather than * rsc->private->cmds->with_this_colocations(). */ pcmk__set_rsc_flags(rsc, pcmk__rsc_detect_loop); for (GList *lpc = rsc->priv->with_this_colocations; lpc != NULL; lpc = lpc->next) { pcmk__colocation_t *cons = (pcmk__colocation_t *) lpc->data; if (pcmk_is_set(cons->dependent->flags, pcmk__rsc_detect_loop)) { colocations_xml_node(out, cons->dependent, cons); continue; } colocations_xml_node(out, cons->dependent, cons); do_locations_list_xml(out, cons->dependent, false); if (recursive) { out->message(out, "rscs-colocated-with-list", cons->dependent, recursive); } } return rc; } PCMK__OUTPUT_ARGS("locations-list", "pcmk_resource_t *") static int locations_list(pcmk__output_t *out, va_list args) { pcmk_resource_t *rsc = va_arg(args, pcmk_resource_t *); GList *lpc = NULL; int rc = pcmk_rc_no_output; for (lpc = rsc->priv->location_constraints; lpc != NULL; lpc = lpc->next) { pcmk__location_t *cons = lpc->data; GList *lpc2 = NULL; for (lpc2 = cons->nodes; lpc2 != NULL; lpc2 = lpc2->next) { pcmk_node_t *node = (pcmk_node_t *) lpc2->data; PCMK__OUTPUT_LIST_HEADER(out, false, rc, "Locations"); out->list_item(out, NULL, "Node %s (score=%s, id=%s, rsc=%s)", pcmk__node_name(node), pcmk_readable_score(node->assign->score), cons->id, rsc->id); } } PCMK__OUTPUT_LIST_FOOTER(out, rc); return rc; } PCMK__OUTPUT_ARGS("locations-list", "pcmk_resource_t *") static int locations_list_xml(pcmk__output_t *out, va_list args) { pcmk_resource_t *rsc = va_arg(args, pcmk_resource_t *); return do_locations_list_xml(out, rsc, true); } PCMK__OUTPUT_ARGS("locations-and-colocations", "pcmk_resource_t *", "bool", "bool") static int locations_and_colocations(pcmk__output_t *out, va_list args) { pcmk_resource_t *rsc = va_arg(args, pcmk_resource_t *); bool recursive = va_arg(args, int); bool force = va_arg(args, int); pcmk__unpack_constraints(rsc->priv->scheduler); // Constraints apply to group/clone, not member/instance if (!force) { rsc = uber_parent(rsc); } out->message(out, "locations-list", rsc); pe__clear_resource_flags_on_all(rsc->priv->scheduler, pcmk__rsc_detect_loop); out->message(out, "rscs-colocated-with-list", rsc, recursive); pe__clear_resource_flags_on_all(rsc->priv->scheduler, pcmk__rsc_detect_loop); out->message(out, "rsc-is-colocated-with-list", rsc, recursive); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("locations-and-colocations", "pcmk_resource_t *", "bool", "bool") static int locations_and_colocations_xml(pcmk__output_t *out, va_list args) { pcmk_resource_t *rsc = va_arg(args, pcmk_resource_t *); bool recursive = va_arg(args, int); bool force = va_arg(args, int); pcmk__unpack_constraints(rsc->priv->scheduler); // Constraints apply to group/clone, not member/instance if (!force) { rsc = uber_parent(rsc); } pcmk__output_xml_create_parent(out, PCMK_XE_CONSTRAINTS, NULL); do_locations_list_xml(out, rsc, false); pe__clear_resource_flags_on_all(rsc->priv->scheduler, pcmk__rsc_detect_loop); out->message(out, "rscs-colocated-with-list", rsc, recursive); pe__clear_resource_flags_on_all(rsc->priv->scheduler, pcmk__rsc_detect_loop); out->message(out, "rsc-is-colocated-with-list", rsc, recursive); pcmk__output_xml_pop_parent(out); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("health", "const char *", "const char *", "const char *", "const char *") static int health(pcmk__output_t *out, va_list args) { const char *sys_from G_GNUC_UNUSED = va_arg(args, const char *); const char *host_from = va_arg(args, const char *); const char *fsa_state = va_arg(args, const char *); const char *result = va_arg(args, const char *); return out->info(out, "Controller on %s in state %s: %s", pcmk__s(host_from, "unknown node"), pcmk__s(fsa_state, "unknown"), pcmk__s(result, "unknown result")); } PCMK__OUTPUT_ARGS("health", "const char *", "const char *", "const char *", "const char *") static int health_text(pcmk__output_t *out, va_list args) { if (!out->is_quiet(out)) { return health(out, args); } else { const char *sys_from G_GNUC_UNUSED = va_arg(args, const char *); const char *host_from G_GNUC_UNUSED = va_arg(args, const char *); const char *fsa_state = va_arg(args, const char *); const char *result G_GNUC_UNUSED = va_arg(args, const char *); if (fsa_state != NULL) { pcmk__formatted_printf(out, "%s\n", fsa_state); return pcmk_rc_ok; } } return pcmk_rc_no_output; } PCMK__OUTPUT_ARGS("health", "const char *", "const char *", "const char *", "const char *") static int health_xml(pcmk__output_t *out, va_list args) { const char *sys_from = va_arg(args, const char *); const char *host_from = va_arg(args, const char *); const char *fsa_state = va_arg(args, const char *); const char *result = va_arg(args, const char *); pcmk__output_create_xml_node(out, pcmk__s(sys_from, ""), PCMK_XA_NODE_NAME, pcmk__s(host_from, ""), PCMK_XA_STATE, pcmk__s(fsa_state, ""), PCMK_XA_RESULT, pcmk__s(result, ""), NULL); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("pacemakerd-health", "const char *", "enum pcmk_pacemakerd_state", "const char *", "time_t") static int pacemakerd_health(pcmk__output_t *out, va_list args) { const char *sys_from = va_arg(args, const char *); enum pcmk_pacemakerd_state state = (enum pcmk_pacemakerd_state) va_arg(args, int); const char *state_s = va_arg(args, const char *); time_t last_updated = va_arg(args, time_t); char *last_updated_s = NULL; int rc = pcmk_rc_ok; if (sys_from == NULL) { if (state == pcmk_pacemakerd_state_remote) { - sys_from = "pacemaker-remoted"; + sys_from = PCMK__SERVER_REMOTED; } else { sys_from = CRM_SYSTEM_MCP; } } if (state_s == NULL) { state_s = pcmk__pcmkd_state_enum2friendly(state); } if (last_updated != 0) { last_updated_s = pcmk__epoch2str(&last_updated, crm_time_log_date |crm_time_log_timeofday |crm_time_log_with_timezone); } rc = out->info(out, "Status of %s: '%s' (last updated %s)", sys_from, state_s, pcmk__s(last_updated_s, "at unknown time")); free(last_updated_s); return rc; } PCMK__OUTPUT_ARGS("pacemakerd-health", "const char *", "enum pcmk_pacemakerd_state", "const char *", "time_t") static int pacemakerd_health_html(pcmk__output_t *out, va_list args) { const char *sys_from = va_arg(args, const char *); enum pcmk_pacemakerd_state state = (enum pcmk_pacemakerd_state) va_arg(args, int); const char *state_s = va_arg(args, const char *); time_t last_updated = va_arg(args, time_t); char *last_updated_s = NULL; char *msg = NULL; if (sys_from == NULL) { if (state == pcmk_pacemakerd_state_remote) { - sys_from = "pacemaker-remoted"; + sys_from = PCMK__SERVER_REMOTED; } else { sys_from = CRM_SYSTEM_MCP; } } if (state_s == NULL) { state_s = pcmk__pcmkd_state_enum2friendly(state); } if (last_updated != 0) { last_updated_s = pcmk__epoch2str(&last_updated, crm_time_log_date |crm_time_log_timeofday |crm_time_log_with_timezone); } msg = crm_strdup_printf("Status of %s: '%s' (last updated %s)", sys_from, state_s, pcmk__s(last_updated_s, "at unknown time")); pcmk__output_create_html_node(out, "li", NULL, NULL, msg); free(msg); free(last_updated_s); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("pacemakerd-health", "const char *", "enum pcmk_pacemakerd_state", "const char *", "time_t") static int pacemakerd_health_text(pcmk__output_t *out, va_list args) { if (!out->is_quiet(out)) { return pacemakerd_health(out, args); } else { const char *sys_from G_GNUC_UNUSED = va_arg(args, const char *); enum pcmk_pacemakerd_state state = (enum pcmk_pacemakerd_state) va_arg(args, int); const char *state_s = va_arg(args, const char *); time_t last_updated G_GNUC_UNUSED = va_arg(args, time_t); if (state_s == NULL) { state_s = pcmk_pacemakerd_api_daemon_state_enum2text(state); } pcmk__formatted_printf(out, "%s\n", state_s); return pcmk_rc_ok; } } PCMK__OUTPUT_ARGS("pacemakerd-health", "const char *", "enum pcmk_pacemakerd_state", "const char *", "time_t") static int pacemakerd_health_xml(pcmk__output_t *out, va_list args) { const char *sys_from = va_arg(args, const char *); enum pcmk_pacemakerd_state state = (enum pcmk_pacemakerd_state) va_arg(args, int); const char *state_s = va_arg(args, const char *); time_t last_updated = va_arg(args, time_t); char *last_updated_s = NULL; if (sys_from == NULL) { if (state == pcmk_pacemakerd_state_remote) { - sys_from = "pacemaker-remoted"; + sys_from = PCMK__SERVER_REMOTED; } else { sys_from = CRM_SYSTEM_MCP; } } if (state_s == NULL) { state_s = pcmk_pacemakerd_api_daemon_state_enum2text(state); } if (last_updated != 0) { last_updated_s = pcmk__epoch2str(&last_updated, crm_time_log_date |crm_time_log_timeofday |crm_time_log_with_timezone); } pcmk__output_create_xml_node(out, PCMK_XE_PACEMAKERD, PCMK_XA_SYS_FROM, sys_from, PCMK_XA_STATE, state_s, PCMK_XA_LAST_UPDATED, last_updated_s, NULL); free(last_updated_s); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("profile", "const char *", "clock_t", "clock_t") static int profile_default(pcmk__output_t *out, va_list args) { const char *xml_file = va_arg(args, const char *); clock_t start = va_arg(args, clock_t); clock_t end = va_arg(args, clock_t); out->list_item(out, NULL, "Testing %s ... %.2f secs", xml_file, (end - start) / (float) CLOCKS_PER_SEC); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("profile", "const char *", "clock_t", "clock_t") static int profile_xml(pcmk__output_t *out, va_list args) { const char *xml_file = va_arg(args, const char *); clock_t start = va_arg(args, clock_t); clock_t end = va_arg(args, clock_t); char *duration = pcmk__ftoa((end - start) / (float) CLOCKS_PER_SEC); pcmk__output_create_xml_node(out, PCMK_XE_TIMING, PCMK_XA_FILE, xml_file, PCMK_XA_DURATION, duration, NULL); free(duration); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("dc", "const char *") static int dc(pcmk__output_t *out, va_list args) { const char *dc = va_arg(args, const char *); return out->info(out, "Designated Controller is: %s", pcmk__s(dc, "not yet elected")); } PCMK__OUTPUT_ARGS("dc", "const char *") static int dc_text(pcmk__output_t *out, va_list args) { if (!out->is_quiet(out)) { return dc(out, args); } else { const char *dc = va_arg(args, const char *); if (dc != NULL) { pcmk__formatted_printf(out, "%s\n", pcmk__s(dc, "")); return pcmk_rc_ok; } } return pcmk_rc_no_output; } PCMK__OUTPUT_ARGS("dc", "const char *") static int dc_xml(pcmk__output_t *out, va_list args) { const char *dc = va_arg(args, const char *); pcmk__output_create_xml_node(out, PCMK_XE_DC, PCMK_XA_NODE_NAME, pcmk__s(dc, ""), NULL); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("crmadmin-node", "const char *", "const char *", "const char *", "bool") static int crmadmin_node(pcmk__output_t *out, va_list args) { const char *type = va_arg(args, const char *); const char *name = va_arg(args, const char *); const char *id = va_arg(args, const char *); bool bash_export = va_arg(args, int); if (bash_export) { return out->info(out, "export %s=%s", pcmk__s(name, ""), pcmk__s(id, "")); } else { return out->info(out, "%s node: %s (%s)", type ? type : "cluster", pcmk__s(name, ""), pcmk__s(id, "")); } } PCMK__OUTPUT_ARGS("crmadmin-node", "const char *", "const char *", "const char *", "bool") static int crmadmin_node_text(pcmk__output_t *out, va_list args) { if (!out->is_quiet(out)) { return crmadmin_node(out, args); } else { const char *type G_GNUC_UNUSED = va_arg(args, const char *); const char *name = va_arg(args, const char *); const char *id G_GNUC_UNUSED = va_arg(args, const char *); bool bash_export G_GNUC_UNUSED = va_arg(args, int); pcmk__formatted_printf(out, "%s\n", pcmk__s(name, "")); return pcmk_rc_ok; } } PCMK__OUTPUT_ARGS("crmadmin-node", "const char *", "const char *", "const char *", "bool") static int crmadmin_node_xml(pcmk__output_t *out, va_list args) { const char *type = va_arg(args, const char *); const char *name = va_arg(args, const char *); const char *id = va_arg(args, const char *); bool bash_export G_GNUC_UNUSED = va_arg(args, int); pcmk__output_create_xml_node(out, PCMK_XE_NODE, PCMK_XA_TYPE, pcmk__s(type, "cluster"), PCMK_XA_NAME, pcmk__s(name, ""), PCMK_XA_ID, pcmk__s(id, ""), NULL); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("digests", "const pcmk_resource_t *", "const pcmk_node_t *", "const char *", "guint", "const pcmk__op_digest_t *") static int digests_text(pcmk__output_t *out, va_list args) { const pcmk_resource_t *rsc = va_arg(args, const pcmk_resource_t *); const pcmk_node_t *node = va_arg(args, const pcmk_node_t *); const char *task = va_arg(args, const char *); guint interval_ms = va_arg(args, guint); const pcmk__op_digest_t *digests = va_arg(args, const pcmk__op_digest_t *); char *action_desc = NULL; const char *rsc_desc = "unknown resource"; const char *node_desc = "unknown node"; if (interval_ms != 0) { action_desc = crm_strdup_printf("%ums-interval %s action", interval_ms, ((task == NULL)? "unknown" : task)); } else if (pcmk__str_eq(task, PCMK_ACTION_MONITOR, pcmk__str_none)) { action_desc = strdup("probe action"); } else { action_desc = crm_strdup_printf("%s action", ((task == NULL)? "unknown" : task)); } if ((rsc != NULL) && (rsc->id != NULL)) { rsc_desc = rsc->id; } if ((node != NULL) && (node->priv->name != NULL)) { node_desc = node->priv->name; } out->begin_list(out, NULL, NULL, "Digests for %s %s on %s", rsc_desc, action_desc, node_desc); free(action_desc); if (digests == NULL) { out->list_item(out, NULL, "none"); out->end_list(out); return pcmk_rc_ok; } if (digests->digest_all_calc != NULL) { out->list_item(out, NULL, "%s (all parameters)", digests->digest_all_calc); } if (digests->digest_secure_calc != NULL) { out->list_item(out, NULL, "%s (non-private parameters)", digests->digest_secure_calc); } if (digests->digest_restart_calc != NULL) { out->list_item(out, NULL, "%s (non-reloadable parameters)", digests->digest_restart_calc); } out->end_list(out); return pcmk_rc_ok; } static void add_digest_xml(xmlNode *parent, const char *type, const char *digest, xmlNode *digest_source) { if (digest != NULL) { xmlNodePtr digest_xml = pcmk__xe_create(parent, PCMK_XE_DIGEST); crm_xml_add(digest_xml, PCMK_XA_TYPE, pcmk__s(type, "unspecified")); crm_xml_add(digest_xml, PCMK_XA_HASH, digest); pcmk__xml_copy(digest_xml, digest_source); } } PCMK__OUTPUT_ARGS("digests", "const pcmk_resource_t *", "const pcmk_node_t *", "const char *", "guint", "const pcmk__op_digest_t *") static int digests_xml(pcmk__output_t *out, va_list args) { const pcmk_resource_t *rsc = va_arg(args, const pcmk_resource_t *); const pcmk_node_t *node = va_arg(args, const pcmk_node_t *); const char *task = va_arg(args, const char *); guint interval_ms = va_arg(args, guint); const pcmk__op_digest_t *digests = va_arg(args, const pcmk__op_digest_t *); char *interval_s = crm_strdup_printf("%ums", interval_ms); xmlNode *xml = NULL; xml = pcmk__output_create_xml_node(out, PCMK_XE_DIGESTS, PCMK_XA_RESOURCE, pcmk__s(rsc->id, ""), PCMK_XA_NODE, pcmk__s(node->priv->name, ""), PCMK_XA_TASK, pcmk__s(task, ""), PCMK_XA_INTERVAL, interval_s, NULL); free(interval_s); if (digests != NULL) { add_digest_xml(xml, "all", digests->digest_all_calc, digests->params_all); add_digest_xml(xml, "nonprivate", digests->digest_secure_calc, digests->params_secure); add_digest_xml(xml, "nonreloadable", digests->digest_restart_calc, digests->params_restart); } return pcmk_rc_ok; } #define STOP_SANITY_ASSERT(lineno) do { \ if ((current != NULL) && current->details->unclean) { \ /* It will be a pseudo op */ \ } else if (stop == NULL) { \ crm_err("%s:%d: No stop action exists for %s", \ __func__, lineno, rsc->id); \ CRM_ASSERT(stop != NULL); \ } else if (pcmk_is_set(stop->flags, pcmk__action_optional)) { \ crm_err("%s:%d: Action %s is still optional", \ __func__, lineno, stop->uuid); \ CRM_ASSERT(!pcmk_is_set(stop->flags, pcmk__action_optional)); \ } \ } while (0) PCMK__OUTPUT_ARGS("rsc-action", "pcmk_resource_t *", "pcmk_node_t *", "pcmk_node_t *") static int rsc_action_default(pcmk__output_t *out, va_list args) { pcmk_resource_t *rsc = va_arg(args, pcmk_resource_t *); pcmk_node_t *current = va_arg(args, pcmk_node_t *); pcmk_node_t *next = va_arg(args, pcmk_node_t *); GList *possible_matches = NULL; char *key = NULL; int rc = pcmk_rc_no_output; bool moving = false; pcmk_node_t *start_node = NULL; pcmk_action_t *start = NULL; pcmk_action_t *stop = NULL; pcmk_action_t *promote = NULL; pcmk_action_t *demote = NULL; pcmk_action_t *reason_op = NULL; if (!pcmk_is_set(rsc->flags, pcmk__rsc_managed) || (current == NULL && next == NULL)) { const bool managed = pcmk_is_set(rsc->flags, pcmk__rsc_managed); pcmk__rsc_info(rsc, "Leave %s\t(%s%s)", rsc->id, pcmk_role_text(rsc->priv->orig_role), (managed? "" : " unmanaged")); return rc; } moving = (current != NULL) && (next != NULL) && !pcmk__same_node(current, next); possible_matches = pe__resource_actions(rsc, next, PCMK_ACTION_START, false); if (possible_matches) { start = possible_matches->data; g_list_free(possible_matches); } if ((start == NULL) || !pcmk_is_set(start->flags, pcmk__action_runnable)) { start_node = NULL; } else { start_node = current; } possible_matches = pe__resource_actions(rsc, start_node, PCMK_ACTION_STOP, false); if (possible_matches) { stop = possible_matches->data; g_list_free(possible_matches); } else if (pcmk_is_set(rsc->flags, pcmk__rsc_stop_unexpected)) { /* The resource is multiply active with PCMK_META_MULTIPLE_ACTIVE set to * PCMK_VALUE_STOP_UNEXPECTED, and not stopping on its current node, but * it should be stopping elsewhere. */ possible_matches = pe__resource_actions(rsc, NULL, PCMK_ACTION_STOP, false); if (possible_matches != NULL) { stop = possible_matches->data; g_list_free(possible_matches); } } possible_matches = pe__resource_actions(rsc, next, PCMK_ACTION_PROMOTE, false); if (possible_matches) { promote = possible_matches->data; g_list_free(possible_matches); } possible_matches = pe__resource_actions(rsc, next, PCMK_ACTION_DEMOTE, false); if (possible_matches) { demote = possible_matches->data; g_list_free(possible_matches); } if (rsc->priv->orig_role == rsc->priv->next_role) { pcmk_action_t *migrate_op = NULL; CRM_CHECK(next != NULL, return rc); possible_matches = pe__resource_actions(rsc, next, PCMK_ACTION_MIGRATE_FROM, false); if (possible_matches) { migrate_op = possible_matches->data; } if ((migrate_op != NULL) && (current != NULL) && pcmk_is_set(migrate_op->flags, pcmk__action_runnable)) { rc = out->message(out, "rsc-action-item", "Migrate", rsc, current, next, start, NULL); } else if (pcmk_is_set(rsc->flags, pcmk__rsc_reload)) { rc = out->message(out, "rsc-action-item", "Reload", rsc, current, next, start, NULL); } else if ((start == NULL) || pcmk_is_set(start->flags, pcmk__action_optional)) { if ((demote != NULL) && (promote != NULL) && !pcmk_is_set(demote->flags, pcmk__action_optional) && !pcmk_is_set(promote->flags, pcmk__action_optional)) { rc = out->message(out, "rsc-action-item", "Re-promote", rsc, current, next, promote, demote); } else { pcmk__rsc_info(rsc, "Leave %s\t(%s %s)", rsc->id, pcmk_role_text(rsc->priv->orig_role), pcmk__node_name(next)); } } else if (!pcmk_is_set(start->flags, pcmk__action_runnable)) { if ((stop == NULL) || (stop->reason == NULL)) { reason_op = start; } else { reason_op = stop; } rc = out->message(out, "rsc-action-item", "Stop", rsc, current, NULL, stop, reason_op); STOP_SANITY_ASSERT(__LINE__); } else if (moving && current) { const bool failed = pcmk_is_set(rsc->flags, pcmk__rsc_failed); rc = out->message(out, "rsc-action-item", (failed? "Recover" : "Move"), rsc, current, next, stop, NULL); } else if (pcmk_is_set(rsc->flags, pcmk__rsc_failed)) { rc = out->message(out, "rsc-action-item", "Recover", rsc, current, NULL, stop, NULL); STOP_SANITY_ASSERT(__LINE__); } else { rc = out->message(out, "rsc-action-item", "Restart", rsc, current, next, start, NULL); #if 0 /* @TODO This can be reached in situations that should really be * "Start" (see for example the migrate-fail-7 regression test) */ STOP_SANITY_ASSERT(__LINE__); #endif } g_list_free(possible_matches); return rc; } if ((stop != NULL) && ((rsc->priv->next_role == pcmk_role_stopped) || ((start != NULL) && !pcmk_is_set(start->flags, pcmk__action_runnable)))) { key = stop_key(rsc); for (GList *iter = rsc->priv->active_nodes; iter != NULL; iter = iter->next) { pcmk_node_t *node = iter->data; pcmk_action_t *stop_op = NULL; reason_op = start; possible_matches = find_actions(rsc->priv->actions, key, node); if (possible_matches) { stop_op = possible_matches->data; g_list_free(possible_matches); } if (stop_op != NULL) { if (pcmk_is_set(stop_op->flags, pcmk__action_runnable)) { STOP_SANITY_ASSERT(__LINE__); } if (stop_op->reason != NULL) { reason_op = stop_op; } } if (out->message(out, "rsc-action-item", "Stop", rsc, node, NULL, stop_op, reason_op) == pcmk_rc_ok) { rc = pcmk_rc_ok; } } free(key); } else if ((stop != NULL) && pcmk_all_flags_set(rsc->flags, pcmk__rsc_failed |pcmk__rsc_stop_if_failed)) { /* 'stop' may be NULL if the failure was ignored */ rc = out->message(out, "rsc-action-item", "Recover", rsc, current, next, stop, start); STOP_SANITY_ASSERT(__LINE__); } else if (moving) { rc = out->message(out, "rsc-action-item", "Move", rsc, current, next, stop, NULL); STOP_SANITY_ASSERT(__LINE__); } else if (pcmk_is_set(rsc->flags, pcmk__rsc_reload)) { rc = out->message(out, "rsc-action-item", "Reload", rsc, current, next, start, NULL); } else if ((stop != NULL) && !pcmk_is_set(stop->flags, pcmk__action_optional)) { rc = out->message(out, "rsc-action-item", "Restart", rsc, current, next, start, NULL); STOP_SANITY_ASSERT(__LINE__); } else if (rsc->priv->orig_role == pcmk_role_promoted) { CRM_LOG_ASSERT(current != NULL); rc = out->message(out, "rsc-action-item", "Demote", rsc, current, next, demote, NULL); } else if (rsc->priv->next_role == pcmk_role_promoted) { CRM_LOG_ASSERT(next); rc = out->message(out, "rsc-action-item", "Promote", rsc, current, next, promote, NULL); } else if ((rsc->priv->orig_role == pcmk_role_stopped) && (rsc->priv->next_role > pcmk_role_stopped)) { rc = out->message(out, "rsc-action-item", "Start", rsc, current, next, start, NULL); } return rc; } PCMK__OUTPUT_ARGS("node-action", "const char *", "const char *", "const char *") static int node_action(pcmk__output_t *out, va_list args) { const char *task = va_arg(args, const char *); const char *node_name = va_arg(args, const char *); const char *reason = va_arg(args, const char *); if (task == NULL) { return pcmk_rc_no_output; } else if (reason) { out->list_item(out, NULL, "%s %s '%s'", task, node_name, reason); } else { crm_notice(" * %s %s", task, node_name); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("node-action", "const char *", "const char *", "const char *") static int node_action_xml(pcmk__output_t *out, va_list args) { const char *task = va_arg(args, const char *); const char *node_name = va_arg(args, const char *); const char *reason = va_arg(args, const char *); if (task == NULL) { return pcmk_rc_no_output; } else if (reason) { pcmk__output_create_xml_node(out, PCMK_XE_NODE_ACTION, PCMK_XA_TASK, task, PCMK_XA_NODE, node_name, PCMK_XA_REASON, reason, NULL); } else { crm_notice(" * %s %s", task, node_name); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("node-info", "uint32_t", "const char *", "const char *", "const char *", "bool", "bool") static int node_info_default(pcmk__output_t *out, va_list args) { uint32_t node_id = va_arg(args, uint32_t); const char *node_name = va_arg(args, const char *); const char *uuid = va_arg(args, const char *); const char *state = va_arg(args, const char *); bool have_quorum = (bool) va_arg(args, int); bool is_remote = (bool) va_arg(args, int); return out->info(out, "Node %" PRIu32 ": %s " "(uuid=%s, state=%s, have_quorum=%s, is_remote=%s)", node_id, pcmk__s(node_name, "unknown"), pcmk__s(uuid, "unknown"), pcmk__s(state, "unknown"), pcmk__btoa(have_quorum), pcmk__btoa(is_remote)); } PCMK__OUTPUT_ARGS("node-info", "uint32_t", "const char *", "const char *", "const char *", "bool", "bool") static int node_info_xml(pcmk__output_t *out, va_list args) { uint32_t node_id = va_arg(args, uint32_t); const char *node_name = va_arg(args, const char *); const char *uuid = va_arg(args, const char *); const char *state = va_arg(args, const char *); bool have_quorum = (bool) va_arg(args, int); bool is_remote = (bool) va_arg(args, int); char *id_s = crm_strdup_printf("%" PRIu32, node_id); pcmk__output_create_xml_node(out, PCMK_XE_NODE_INFO, PCMK_XA_NODEID, id_s, PCMK_XA_UNAME, node_name, PCMK_XA_ID, uuid, PCMK_XA_CRMD, state, PCMK_XA_HAVE_QUORUM, pcmk__btoa(have_quorum), PCMK_XA_REMOTE_NODE, pcmk__btoa(is_remote), NULL); free(id_s); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("inject-cluster-action", "const char *", "const char *", "xmlNode *") static int inject_cluster_action(pcmk__output_t *out, va_list args) { const char *node = va_arg(args, const char *); const char *task = va_arg(args, const char *); xmlNodePtr rsc = va_arg(args, xmlNodePtr); if (out->is_quiet(out)) { return pcmk_rc_no_output; } if (rsc != NULL) { out->list_item(out, NULL, "Cluster action: %s for %s on %s", task, pcmk__xe_id(rsc), node); } else { out->list_item(out, NULL, "Cluster action: %s on %s", task, node); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("inject-cluster-action", "const char *", "const char *", "xmlNode *") static int inject_cluster_action_xml(pcmk__output_t *out, va_list args) { const char *node = va_arg(args, const char *); const char *task = va_arg(args, const char *); xmlNodePtr rsc = va_arg(args, xmlNodePtr); xmlNodePtr xml_node = NULL; if (out->is_quiet(out)) { return pcmk_rc_no_output; } xml_node = pcmk__output_create_xml_node(out, PCMK_XE_CLUSTER_ACTION, PCMK_XA_TASK, task, PCMK_XA_NODE, node, NULL); if (rsc) { crm_xml_add(xml_node, PCMK_XA_ID, pcmk__xe_id(rsc)); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("inject-fencing-action", "const char *", "const char *") static int inject_fencing_action(pcmk__output_t *out, va_list args) { const char *target = va_arg(args, const char *); const char *op = va_arg(args, const char *); if (out->is_quiet(out)) { return pcmk_rc_no_output; } out->list_item(out, NULL, "Fencing %s (%s)", target, op); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("inject-fencing-action", "const char *", "const char *") static int inject_fencing_action_xml(pcmk__output_t *out, va_list args) { const char *target = va_arg(args, const char *); const char *op = va_arg(args, const char *); if (out->is_quiet(out)) { return pcmk_rc_no_output; } pcmk__output_create_xml_node(out, PCMK_XE_FENCING_ACTION, PCMK_XA_TARGET, target, PCMK_XA_OP, op, NULL); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("inject-attr", "const char *", "const char *", "xmlNode *") static int inject_attr(pcmk__output_t *out, va_list args) { const char *name = va_arg(args, const char *); const char *value = va_arg(args, const char *); xmlNodePtr cib_node = va_arg(args, xmlNodePtr); xmlChar *node_path = NULL; if (out->is_quiet(out)) { return pcmk_rc_no_output; } node_path = xmlGetNodePath(cib_node); out->list_item(out, NULL, "Injecting attribute %s=%s into %s '%s'", name, value, node_path, pcmk__xe_id(cib_node)); free(node_path); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("inject-attr", "const char *", "const char *", "xmlNode *") static int inject_attr_xml(pcmk__output_t *out, va_list args) { const char *name = va_arg(args, const char *); const char *value = va_arg(args, const char *); xmlNodePtr cib_node = va_arg(args, xmlNodePtr); xmlChar *node_path = NULL; if (out->is_quiet(out)) { return pcmk_rc_no_output; } node_path = xmlGetNodePath(cib_node); pcmk__output_create_xml_node(out, PCMK_XE_INJECT_ATTR, PCMK_XA_NAME, name, PCMK_XA_VALUE, value, PCMK_XA_NODE_PATH, node_path, PCMK_XA_CIB_NODE, pcmk__xe_id(cib_node), NULL); free(node_path); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("inject-spec", "const char *") static int inject_spec(pcmk__output_t *out, va_list args) { const char *spec = va_arg(args, const char *); if (out->is_quiet(out)) { return pcmk_rc_no_output; } out->list_item(out, NULL, "Injecting %s into the configuration", spec); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("inject-spec", "const char *") static int inject_spec_xml(pcmk__output_t *out, va_list args) { const char *spec = va_arg(args, const char *); if (out->is_quiet(out)) { return pcmk_rc_no_output; } pcmk__output_create_xml_node(out, PCMK_XE_INJECT_SPEC, PCMK_XA_SPEC, spec, NULL); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("inject-modify-config", "const char *", "const char *") static int inject_modify_config(pcmk__output_t *out, va_list args) { const char *quorum = va_arg(args, const char *); const char *watchdog = va_arg(args, const char *); if (out->is_quiet(out)) { return pcmk_rc_no_output; } out->begin_list(out, NULL, NULL, "Performing Requested Modifications"); if (quorum) { out->list_item(out, NULL, "Setting quorum: %s", quorum); } if (watchdog) { out->list_item(out, NULL, "Setting watchdog: %s", watchdog); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("inject-modify-config", "const char *", "const char *") static int inject_modify_config_xml(pcmk__output_t *out, va_list args) { const char *quorum = va_arg(args, const char *); const char *watchdog = va_arg(args, const char *); xmlNodePtr node = NULL; if (out->is_quiet(out)) { return pcmk_rc_no_output; } node = pcmk__output_xml_create_parent(out, PCMK_XE_MODIFICATIONS, NULL); if (quorum) { crm_xml_add(node, PCMK_XA_QUORUM, quorum); } if (watchdog) { crm_xml_add(node, PCMK_XA_WATCHDOG, watchdog); } pcmk__output_xml_pop_parent(out); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("inject-modify-node", "const char *", "const char *") static int inject_modify_node(pcmk__output_t *out, va_list args) { const char *action = va_arg(args, const char *); const char *node = va_arg(args, const char *); if (out->is_quiet(out)) { return pcmk_rc_no_output; } if (pcmk__str_eq(action, "Online", pcmk__str_none)) { out->list_item(out, NULL, "Bringing node %s online", node); return pcmk_rc_ok; } else if (pcmk__str_eq(action, "Offline", pcmk__str_none)) { out->list_item(out, NULL, "Taking node %s offline", node); return pcmk_rc_ok; } else if (pcmk__str_eq(action, "Failing", pcmk__str_none)) { out->list_item(out, NULL, "Failing node %s", node); return pcmk_rc_ok; } return pcmk_rc_no_output; } PCMK__OUTPUT_ARGS("inject-modify-node", "const char *", "const char *") static int inject_modify_node_xml(pcmk__output_t *out, va_list args) { const char *action = va_arg(args, const char *); const char *node = va_arg(args, const char *); if (out->is_quiet(out)) { return pcmk_rc_no_output; } pcmk__output_create_xml_node(out, PCMK_XE_MODIFY_NODE, PCMK_XA_ACTION, action, PCMK_XA_NODE, node, NULL); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("inject-modify-ticket", "const char *", "const char *") static int inject_modify_ticket(pcmk__output_t *out, va_list args) { const char *action = va_arg(args, const char *); const char *ticket = va_arg(args, const char *); if (out->is_quiet(out)) { return pcmk_rc_no_output; } if (pcmk__str_eq(action, "Standby", pcmk__str_none)) { out->list_item(out, NULL, "Making ticket %s standby", ticket); } else { out->list_item(out, NULL, "%s ticket %s", action, ticket); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("inject-modify-ticket", "const char *", "const char *") static int inject_modify_ticket_xml(pcmk__output_t *out, va_list args) { const char *action = va_arg(args, const char *); const char *ticket = va_arg(args, const char *); if (out->is_quiet(out)) { return pcmk_rc_no_output; } pcmk__output_create_xml_node(out, PCMK_XE_MODIFY_TICKET, PCMK_XA_ACTION, action, PCMK_XA_TICKET, ticket, NULL); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("inject-pseudo-action", "const char *", "const char *") static int inject_pseudo_action(pcmk__output_t *out, va_list args) { const char *node = va_arg(args, const char *); const char *task = va_arg(args, const char *); if (out->is_quiet(out)) { return pcmk_rc_no_output; } out->list_item(out, NULL, "Pseudo action: %s%s%s", task, ((node == NULL)? "" : " on "), pcmk__s(node, "")); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("inject-pseudo-action", "const char *", "const char *") static int inject_pseudo_action_xml(pcmk__output_t *out, va_list args) { const char *node = va_arg(args, const char *); const char *task = va_arg(args, const char *); xmlNodePtr xml_node = NULL; if (out->is_quiet(out)) { return pcmk_rc_no_output; } xml_node = pcmk__output_create_xml_node(out, PCMK_XE_PSEUDO_ACTION, PCMK_XA_TASK, task, NULL); if (node) { crm_xml_add(xml_node, PCMK_XA_NODE, node); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("inject-rsc-action", "const char *", "const char *", "const char *", "guint") static int inject_rsc_action(pcmk__output_t *out, va_list args) { const char *rsc = va_arg(args, const char *); const char *operation = va_arg(args, const char *); const char *node = va_arg(args, const char *); guint interval_ms = va_arg(args, guint); if (out->is_quiet(out)) { return pcmk_rc_no_output; } if (interval_ms) { out->list_item(out, NULL, "Resource action: %-15s %s=%u on %s", rsc, operation, interval_ms, node); } else { out->list_item(out, NULL, "Resource action: %-15s %s on %s", rsc, operation, node); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("inject-rsc-action", "const char *", "const char *", "const char *", "guint") static int inject_rsc_action_xml(pcmk__output_t *out, va_list args) { const char *rsc = va_arg(args, const char *); const char *operation = va_arg(args, const char *); const char *node = va_arg(args, const char *); guint interval_ms = va_arg(args, guint); xmlNodePtr xml_node = NULL; if (out->is_quiet(out)) { return pcmk_rc_no_output; } xml_node = pcmk__output_create_xml_node(out, PCMK_XE_RSC_ACTION, PCMK_XA_RESOURCE, rsc, PCMK_XA_OP, operation, PCMK_XA_NODE, node, NULL); if (interval_ms) { char *interval_s = pcmk__itoa(interval_ms); crm_xml_add(xml_node, PCMK_XA_INTERVAL, interval_s); free(interval_s); } return pcmk_rc_ok; } #define CHECK_RC(retcode, retval) \ if (retval == pcmk_rc_ok) { \ retcode = pcmk_rc_ok; \ } PCMK__OUTPUT_ARGS("cluster-status", "pcmk_scheduler_t *", "enum pcmk_pacemakerd_state", "crm_exit_t", "stonith_history_t *", "enum pcmk__fence_history", "uint32_t", "uint32_t", "const char *", "GList *", "GList *") int pcmk__cluster_status_text(pcmk__output_t *out, va_list args) { pcmk_scheduler_t *scheduler = va_arg(args, pcmk_scheduler_t *); enum pcmk_pacemakerd_state pcmkd_state = (enum pcmk_pacemakerd_state) va_arg(args, int); crm_exit_t history_rc = va_arg(args, crm_exit_t); stonith_history_t *stonith_history = va_arg(args, stonith_history_t *); enum pcmk__fence_history fence_history = va_arg(args, int); uint32_t section_opts = va_arg(args, uint32_t); uint32_t show_opts = va_arg(args, uint32_t); const char *prefix = va_arg(args, const char *); GList *unames = va_arg(args, GList *); GList *resources = va_arg(args, GList *); int rc = pcmk_rc_no_output; bool already_printed_failure = false; CHECK_RC(rc, out->message(out, "cluster-summary", scheduler, pcmkd_state, section_opts, show_opts)); if (pcmk_is_set(section_opts, pcmk_section_nodes) && unames) { CHECK_RC(rc, out->message(out, "node-list", scheduler->nodes, unames, resources, show_opts, rc == pcmk_rc_ok)); } /* Print resources section, if needed */ if (pcmk_is_set(section_opts, pcmk_section_resources)) { CHECK_RC(rc, out->message(out, "resource-list", scheduler, show_opts, true, unames, resources, rc == pcmk_rc_ok)); } /* print Node Attributes section if requested */ if (pcmk_is_set(section_opts, pcmk_section_attributes)) { CHECK_RC(rc, out->message(out, "node-attribute-list", scheduler, show_opts, (rc == pcmk_rc_ok), unames, resources)); } /* If requested, print resource operations (which includes failcounts) * or just failcounts */ if (pcmk_any_flags_set(section_opts, pcmk_section_operations|pcmk_section_failcounts)) { CHECK_RC(rc, out->message(out, "node-summary", scheduler, unames, resources, section_opts, show_opts, (rc == pcmk_rc_ok))); } /* If there were any failed actions, print them */ if (pcmk_is_set(section_opts, pcmk_section_failures) && (scheduler->priv->failed != NULL) && (scheduler->priv->failed->children != NULL)) { CHECK_RC(rc, out->message(out, "failed-action-list", scheduler, unames, resources, show_opts, rc == pcmk_rc_ok)); } /* Print failed stonith actions */ if (pcmk_is_set(section_opts, pcmk_section_fence_failed) && fence_history != pcmk__fence_history_none) { if (history_rc == 0) { stonith_history_t *hp = NULL; hp = stonith__first_matching_event(stonith_history, stonith__event_state_eq, GINT_TO_POINTER(st_failed)); if (hp) { CHECK_RC(rc, out->message(out, "failed-fencing-list", stonith_history, unames, section_opts, show_opts, rc == pcmk_rc_ok)); } } else { PCMK__OUTPUT_SPACER_IF(out, rc == pcmk_rc_ok); out->begin_list(out, NULL, NULL, "Failed Fencing Actions"); out->list_item(out, NULL, "Failed to get fencing history: %s", crm_exit_str(history_rc)); out->end_list(out); already_printed_failure = true; } } /* Print tickets if requested */ if (pcmk_is_set(section_opts, pcmk_section_tickets)) { CHECK_RC(rc, out->message(out, "ticket-list", scheduler->priv->ticket_constraints, (rc == pcmk_rc_ok), false, false)); } /* Print negative location constraints if requested */ if (pcmk_is_set(section_opts, pcmk_section_bans)) { CHECK_RC(rc, out->message(out, "ban-list", scheduler, prefix, resources, show_opts, rc == pcmk_rc_ok)); } /* Print stonith history */ if (pcmk_any_flags_set(section_opts, pcmk_section_fencing_all) && fence_history != pcmk__fence_history_none) { if (history_rc != 0) { if (!already_printed_failure) { PCMK__OUTPUT_SPACER_IF(out, rc == pcmk_rc_ok); out->begin_list(out, NULL, NULL, "Failed Fencing Actions"); out->list_item(out, NULL, "Failed to get fencing history: %s", crm_exit_str(history_rc)); out->end_list(out); } } else if (pcmk_is_set(section_opts, pcmk_section_fence_worked)) { stonith_history_t *hp = NULL; hp = stonith__first_matching_event(stonith_history, stonith__event_state_neq, GINT_TO_POINTER(st_failed)); if (hp) { CHECK_RC(rc, out->message(out, "fencing-list", hp, unames, section_opts, show_opts, rc == pcmk_rc_ok)); } } else if (pcmk_is_set(section_opts, pcmk_section_fence_pending)) { stonith_history_t *hp = NULL; hp = stonith__first_matching_event(stonith_history, stonith__event_state_pending, NULL); if (hp) { CHECK_RC(rc, out->message(out, "pending-fencing-list", hp, unames, section_opts, show_opts, rc == pcmk_rc_ok)); } } } return rc; } PCMK__OUTPUT_ARGS("cluster-status", "pcmk_scheduler_t *", "enum pcmk_pacemakerd_state", "crm_exit_t", "stonith_history_t *", "enum pcmk__fence_history", "uint32_t", "uint32_t", "const char *", "GList *", "GList *") static int cluster_status_xml(pcmk__output_t *out, va_list args) { pcmk_scheduler_t *scheduler = va_arg(args, pcmk_scheduler_t *); enum pcmk_pacemakerd_state pcmkd_state = (enum pcmk_pacemakerd_state) va_arg(args, int); crm_exit_t history_rc = va_arg(args, crm_exit_t); stonith_history_t *stonith_history = va_arg(args, stonith_history_t *); enum pcmk__fence_history fence_history = va_arg(args, int); uint32_t section_opts = va_arg(args, uint32_t); uint32_t show_opts = va_arg(args, uint32_t); const char *prefix = va_arg(args, const char *); GList *unames = va_arg(args, GList *); GList *resources = va_arg(args, GList *); out->message(out, "cluster-summary", scheduler, pcmkd_state, section_opts, show_opts); /*** NODES ***/ if (pcmk_is_set(section_opts, pcmk_section_nodes)) { out->message(out, "node-list", scheduler->nodes, unames, resources, show_opts, false); } /* Print resources section, if needed */ if (pcmk_is_set(section_opts, pcmk_section_resources)) { /* XML output always displays full details. */ uint32_t full_show_opts = show_opts & ~pcmk_show_brief; out->message(out, "resource-list", scheduler, full_show_opts, false, unames, resources, false); } /* print Node Attributes section if requested */ if (pcmk_is_set(section_opts, pcmk_section_attributes)) { out->message(out, "node-attribute-list", scheduler, show_opts, false, unames, resources); } /* If requested, print resource operations (which includes failcounts) * or just failcounts */ if (pcmk_any_flags_set(section_opts, pcmk_section_operations|pcmk_section_failcounts)) { out->message(out, "node-summary", scheduler, unames, resources, section_opts, show_opts, false); } /* If there were any failed actions, print them */ if (pcmk_is_set(section_opts, pcmk_section_failures) && (scheduler->priv->failed != NULL) && (scheduler->priv->failed->children != NULL)) { out->message(out, "failed-action-list", scheduler, unames, resources, show_opts, false); } /* Print stonith history */ if (pcmk_is_set(section_opts, pcmk_section_fencing_all) && fence_history != pcmk__fence_history_none) { out->message(out, "full-fencing-list", history_rc, stonith_history, unames, section_opts, show_opts, false); } /* Print tickets if requested */ if (pcmk_is_set(section_opts, pcmk_section_tickets)) { out->message(out, "ticket-list", scheduler->priv->ticket_constraints, false, false, false); } /* Print negative location constraints if requested */ if (pcmk_is_set(section_opts, pcmk_section_bans)) { out->message(out, "ban-list", scheduler, prefix, resources, show_opts, false); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-status", "pcmk_scheduler_t *", "enum pcmk_pacemakerd_state", "crm_exit_t", "stonith_history_t *", "enum pcmk__fence_history", "uint32_t", "uint32_t", "const char *", "GList *", "GList *") static int cluster_status_html(pcmk__output_t *out, va_list args) { pcmk_scheduler_t *scheduler = va_arg(args, pcmk_scheduler_t *); enum pcmk_pacemakerd_state pcmkd_state = (enum pcmk_pacemakerd_state) va_arg(args, int); crm_exit_t history_rc = va_arg(args, crm_exit_t); stonith_history_t *stonith_history = va_arg(args, stonith_history_t *); enum pcmk__fence_history fence_history = va_arg(args, int); uint32_t section_opts = va_arg(args, uint32_t); uint32_t show_opts = va_arg(args, uint32_t); const char *prefix = va_arg(args, const char *); GList *unames = va_arg(args, GList *); GList *resources = va_arg(args, GList *); bool already_printed_failure = false; out->message(out, "cluster-summary", scheduler, pcmkd_state, section_opts, show_opts); /*** NODE LIST ***/ if (pcmk_is_set(section_opts, pcmk_section_nodes) && unames) { out->message(out, "node-list", scheduler->nodes, unames, resources, show_opts, false); } /* Print resources section, if needed */ if (pcmk_is_set(section_opts, pcmk_section_resources)) { out->message(out, "resource-list", scheduler, show_opts, true, unames, resources, false); } /* print Node Attributes section if requested */ if (pcmk_is_set(section_opts, pcmk_section_attributes)) { out->message(out, "node-attribute-list", scheduler, show_opts, false, unames, resources); } /* If requested, print resource operations (which includes failcounts) * or just failcounts */ if (pcmk_any_flags_set(section_opts, pcmk_section_operations|pcmk_section_failcounts)) { out->message(out, "node-summary", scheduler, unames, resources, section_opts, show_opts, false); } /* If there were any failed actions, print them */ if (pcmk_is_set(section_opts, pcmk_section_failures) && (scheduler->priv->failed != NULL) && (scheduler->priv->failed->children != NULL)) { out->message(out, "failed-action-list", scheduler, unames, resources, show_opts, false); } /* Print failed stonith actions */ if (pcmk_is_set(section_opts, pcmk_section_fence_failed) && fence_history != pcmk__fence_history_none) { if (history_rc == 0) { stonith_history_t *hp = NULL; hp = stonith__first_matching_event(stonith_history, stonith__event_state_eq, GINT_TO_POINTER(st_failed)); if (hp) { out->message(out, "failed-fencing-list", stonith_history, unames, section_opts, show_opts, false); } } else { out->begin_list(out, NULL, NULL, "Failed Fencing Actions"); out->list_item(out, NULL, "Failed to get fencing history: %s", crm_exit_str(history_rc)); out->end_list(out); } } /* Print stonith history */ if (pcmk_any_flags_set(section_opts, pcmk_section_fencing_all) && fence_history != pcmk__fence_history_none) { if (history_rc != 0) { if (!already_printed_failure) { out->begin_list(out, NULL, NULL, "Failed Fencing Actions"); out->list_item(out, NULL, "Failed to get fencing history: %s", crm_exit_str(history_rc)); out->end_list(out); } } else if (pcmk_is_set(section_opts, pcmk_section_fence_worked)) { stonith_history_t *hp = NULL; hp = stonith__first_matching_event(stonith_history, stonith__event_state_neq, GINT_TO_POINTER(st_failed)); if (hp) { out->message(out, "fencing-list", hp, unames, section_opts, show_opts, false); } } else if (pcmk_is_set(section_opts, pcmk_section_fence_pending)) { stonith_history_t *hp = NULL; hp = stonith__first_matching_event(stonith_history, stonith__event_state_pending, NULL); if (hp) { out->message(out, "pending-fencing-list", hp, unames, section_opts, show_opts, false); } } } /* Print tickets if requested */ if (pcmk_is_set(section_opts, pcmk_section_tickets)) { out->message(out, "ticket-list", scheduler->priv->ticket_constraints, false, false, false); } /* Print negative location constraints if requested */ if (pcmk_is_set(section_opts, pcmk_section_bans)) { out->message(out, "ban-list", scheduler, prefix, resources, show_opts, false); } return pcmk_rc_ok; } #define KV_PAIR(k, v) do { \ if (legacy) { \ pcmk__g_strcat(s, k "=", pcmk__s(v, ""), " ", NULL); \ } else { \ pcmk__g_strcat(s, k "=\"", pcmk__s(v, ""), "\" ", NULL); \ } \ } while (0) PCMK__OUTPUT_ARGS("attribute", "const char *", "const char *", "const char *", "const char *", "const char *", "bool", "bool") static int attribute_default(pcmk__output_t *out, va_list args) { const char *scope = va_arg(args, const char *); const char *instance = va_arg(args, const char *); const char *name = va_arg(args, const char *); const char *value = va_arg(args, const char *); const char *host = va_arg(args, const char *); bool quiet = va_arg(args, int); bool legacy = va_arg(args, int); gchar *value_esc = NULL; GString *s = NULL; if (quiet) { if (value != NULL) { /* Quiet needs to be turned off for ->info() to do anything */ bool was_quiet = out->is_quiet(out); if (was_quiet) { out->quiet = false; } out->info(out, "%s", value); out->quiet = was_quiet; } return pcmk_rc_ok; } s = g_string_sized_new(50); if (pcmk__xml_needs_escape(value, pcmk__xml_escape_attr_pretty)) { value_esc = pcmk__xml_escape(value, pcmk__xml_escape_attr_pretty); value = value_esc; } if (!pcmk__str_empty(scope)) { KV_PAIR(PCMK_XA_SCOPE, scope); } if (!pcmk__str_empty(instance)) { KV_PAIR(PCMK_XA_ID, instance); } KV_PAIR(PCMK_XA_NAME, name); if (!pcmk__str_empty(host)) { KV_PAIR(PCMK_XA_HOST, host); } if (legacy) { pcmk__g_strcat(s, PCMK_XA_VALUE "=", pcmk__s(value, "(null)"), NULL); } else { pcmk__g_strcat(s, PCMK_XA_VALUE "=\"", pcmk__s(value, ""), "\"", NULL); } out->info(out, "%s", s->str); g_free(value_esc); g_string_free(s, TRUE); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("attribute", "const char *", "const char *", "const char *", "const char *", "const char *", "bool", "bool") static int attribute_xml(pcmk__output_t *out, va_list args) { const char *scope = va_arg(args, const char *); const char *instance = va_arg(args, const char *); const char *name = va_arg(args, const char *); const char *value = va_arg(args, const char *); const char *host = va_arg(args, const char *); bool quiet G_GNUC_UNUSED = va_arg(args, int); bool legacy G_GNUC_UNUSED = va_arg(args, int); xmlNodePtr node = NULL; node = pcmk__output_create_xml_node(out, PCMK_XE_ATTRIBUTE, PCMK_XA_NAME, name, PCMK_XA_VALUE, pcmk__s(value, ""), NULL); if (!pcmk__str_empty(scope)) { crm_xml_add(node, PCMK_XA_SCOPE, scope); } if (!pcmk__str_empty(instance)) { crm_xml_add(node, PCMK_XA_ID, instance); } if (!pcmk__str_empty(host)) { crm_xml_add(node, PCMK_XA_HOST, host); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("rule-check", "const char *", "int", "const char *") static int rule_check_default(pcmk__output_t *out, va_list args) { const char *rule_id = va_arg(args, const char *); int result = va_arg(args, int); const char *error = va_arg(args, const char *); switch (result) { case pcmk_rc_within_range: return out->info(out, "Rule %s is still in effect", rule_id); case pcmk_rc_ok: return out->info(out, "Rule %s satisfies conditions", rule_id); case pcmk_rc_after_range: return out->info(out, "Rule %s is expired", rule_id); case pcmk_rc_before_range: return out->info(out, "Rule %s has not yet taken effect", rule_id); case pcmk_rc_op_unsatisfied: return out->info(out, "Rule %s does not satisfy conditions", rule_id); default: out->err(out, "Could not determine whether rule %s is in effect: %s", rule_id, ((error != NULL)? error : "unexpected error")); return pcmk_rc_ok; } } PCMK__OUTPUT_ARGS("rule-check", "const char *", "int", "const char *") static int rule_check_xml(pcmk__output_t *out, va_list args) { const char *rule_id = va_arg(args, const char *); int result = va_arg(args, int); const char *error = va_arg(args, const char *); char *rc_str = pcmk__itoa(pcmk_rc2exitc(result)); pcmk__output_create_xml_node(out, PCMK_XE_RULE_CHECK, PCMK_XA_RULE_ID, rule_id, PCMK_XA_RC, rc_str, NULL); free(rc_str); switch (result) { case pcmk_rc_within_range: case pcmk_rc_ok: case pcmk_rc_after_range: case pcmk_rc_before_range: case pcmk_rc_op_unsatisfied: return pcmk_rc_ok; default: out->err(out, "Could not determine whether rule %s is in effect: %s", rule_id, ((error != NULL)? error : "unexpected error")); return pcmk_rc_ok; } } PCMK__OUTPUT_ARGS("result-code", "int", "const char *", "const char *") static int result_code_none(pcmk__output_t *out, va_list args) { return pcmk_rc_no_output; } PCMK__OUTPUT_ARGS("result-code", "int", "const char *", "const char *") static int result_code_text(pcmk__output_t *out, va_list args) { int code = va_arg(args, int); const char *name = va_arg(args, const char *); const char *desc = va_arg(args, const char *); static int code_width = 0; if (out->is_quiet(out)) { /* If out->is_quiet(), don't print the code. Print name and/or desc in a * compact format for text output, or print nothing at all for none-type * output. */ if ((name != NULL) && (desc != NULL)) { pcmk__formatted_printf(out, "%s - %s\n", name, desc); } else if ((name != NULL) || (desc != NULL)) { pcmk__formatted_printf(out, "%s\n", ((name != NULL)? name : desc)); } return pcmk_rc_ok; } /* Get length of longest (most negative) standard Pacemaker return code * This should be longer than all the values of any other type of return * code. */ if (code_width == 0) { long long most_negative = pcmk_rc_error - (long long) pcmk__n_rc + 1; code_width = (int) snprintf(NULL, 0, "%lld", most_negative); } if ((name != NULL) && (desc != NULL)) { static int name_width = 0; if (name_width == 0) { // Get length of longest standard Pacemaker return code name for (int lpc = 0; lpc < pcmk__n_rc; lpc++) { int len = (int) strlen(pcmk_rc_name(pcmk_rc_error - lpc)); name_width = QB_MAX(name_width, len); } } return out->info(out, "% *d: %-*s %s", code_width, code, name_width, name, desc); } if ((name != NULL) || (desc != NULL)) { return out->info(out, "% *d: %s", code_width, code, ((name != NULL)? name : desc)); } return out->info(out, "% *d", code_width, code); } PCMK__OUTPUT_ARGS("result-code", "int", "const char *", "const char *") static int result_code_xml(pcmk__output_t *out, va_list args) { int code = va_arg(args, int); const char *name = va_arg(args, const char *); const char *desc = va_arg(args, const char *); char *code_str = pcmk__itoa(code); pcmk__output_create_xml_node(out, PCMK_XE_RESULT_CODE, PCMK_XA_CODE, code_str, PCMK_XA_NAME, name, PCMK_XA_DESCRIPTION, desc, NULL); free(code_str); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("ticket-attribute", "const char *", "const char *", "const char *") static int ticket_attribute_default(pcmk__output_t *out, va_list args) { const char *ticket_id G_GNUC_UNUSED = va_arg(args, const char *); const char *name G_GNUC_UNUSED = va_arg(args, const char *); const char *value = va_arg(args, const char *); out->info(out, "%s", value); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("ticket-attribute", "const char *", "const char *", "const char *") static int ticket_attribute_xml(pcmk__output_t *out, va_list args) { const char *ticket_id = va_arg(args, const char *); const char *name = va_arg(args, const char *); const char *value = va_arg(args, const char *); /* Create: * * * * * */ pcmk__output_xml_create_parent(out, PCMK_XE_TICKETS, NULL); pcmk__output_xml_create_parent(out, PCMK_XE_TICKET, PCMK_XA_ID, ticket_id, NULL); pcmk__output_create_xml_node(out, PCMK_XA_ATTRIBUTE, PCMK_XA_NAME, name, PCMK_XA_VALUE, value, NULL); pcmk__output_xml_pop_parent(out); pcmk__output_xml_pop_parent(out); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("ticket-constraints", "xmlNode *") static int ticket_constraints_default(pcmk__output_t *out, va_list args) { xmlNode *constraint_xml = va_arg(args, xmlNode *); /* constraint_xml can take two forms: * * * * for when there's only one ticket in the CIB, or when the user asked * for a specific ticket (crm_ticket -c -t for instance) * * * * * * * for when there's multiple tickets in the and the user did not ask for * a specific one. * * In both cases, we simply output a element for each ticket * in the results. */ out->info(out, "Constraints XML:\n"); if (pcmk__xe_is(constraint_xml, PCMK__XE_XPATH_QUERY)) { xmlNode *child = pcmk__xe_first_child(constraint_xml, NULL, NULL, NULL); do { GString *buf = g_string_sized_new(1024); pcmk__xml_string(child, pcmk__xml_fmt_pretty, buf, 0); out->output_xml(out, PCMK_XE_CONSTRAINT, buf->str); g_string_free(buf, TRUE); child = pcmk__xe_next(child); } while (child != NULL); } else { GString *buf = g_string_sized_new(1024); pcmk__xml_string(constraint_xml, pcmk__xml_fmt_pretty, buf, 0); out->output_xml(out, PCMK_XE_CONSTRAINT, buf->str); g_string_free(buf, TRUE); } return pcmk_rc_ok; } static int add_ticket_element_with_constraints(xmlNode *node, void *userdata) { pcmk__output_t *out = (pcmk__output_t *) userdata; const char *ticket_id = crm_element_value(node, PCMK_XA_TICKET); pcmk__output_xml_create_parent(out, PCMK_XE_TICKET, PCMK_XA_ID, ticket_id, NULL); pcmk__output_xml_create_parent(out, PCMK_XE_CONSTRAINTS, NULL); pcmk__output_xml_add_node_copy(out, node); /* Pop two parents so now we are back under the element */ pcmk__output_xml_pop_parent(out); pcmk__output_xml_pop_parent(out); return pcmk_rc_ok; } static int add_resource_element(xmlNode *node, void *userdata) { pcmk__output_t *out = (pcmk__output_t *) userdata; const char *rsc = crm_element_value(node, PCMK_XA_RSC); pcmk__output_create_xml_node(out, PCMK_XE_RESOURCE, PCMK_XA_ID, rsc, NULL); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("ticket-constraints", "xmlNode *") static int ticket_constraints_xml(pcmk__output_t *out, va_list args) { xmlNode *constraint_xml = va_arg(args, xmlNode *); /* Create: * * * * * * * ... * */ pcmk__output_xml_create_parent(out, PCMK_XE_TICKETS, NULL); if (pcmk__xe_is(constraint_xml, PCMK__XE_XPATH_QUERY)) { /* Iterate through the list of children once to create all the * ticket/constraint elements. */ pcmk__xe_foreach_child(constraint_xml, NULL, add_ticket_element_with_constraints, out); /* Put us back at the same level as where was created. */ pcmk__output_xml_pop_parent(out); /* Constraints can reference a resource ID that is defined in the XML * schema as an IDREF. This requires some other element to be present * with an id= attribute that matches. * * Iterate through the list of children a second time to create the * following: * * * * ... * */ pcmk__output_xml_create_parent(out, PCMK_XE_RESOURCES, NULL); pcmk__xe_foreach_child(constraint_xml, NULL, add_resource_element, out); pcmk__output_xml_pop_parent(out); } else { /* Creating the output for a single constraint is much easier. All the * comments in the above block apply here. */ add_ticket_element_with_constraints(constraint_xml, out); pcmk__output_xml_pop_parent(out); pcmk__output_xml_create_parent(out, PCMK_XE_RESOURCES, NULL); add_resource_element(constraint_xml, out); pcmk__output_xml_pop_parent(out); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("ticket-state", "xmlNode *") static int ticket_state_default(pcmk__output_t *out, va_list args) { xmlNode *state_xml = va_arg(args, xmlNode *); GString *buf = g_string_sized_new(1024); out->info(out, "State XML:\n"); pcmk__xml_string(state_xml, pcmk__xml_fmt_pretty, buf, 0); out->output_xml(out, PCMK__XE_TICKET_STATE, buf->str); g_string_free(buf, TRUE); return pcmk_rc_ok; } static int add_ticket_element(xmlNode *node, void *userdata) { pcmk__output_t *out = (pcmk__output_t *) userdata; xmlNode *ticket_node = NULL; ticket_node = pcmk__output_create_xml_node(out, PCMK_XE_TICKET, NULL); pcmk__xe_copy_attrs(ticket_node, node, pcmk__xaf_none); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("ticket-state", "xmlNode *") static int ticket_state_xml(pcmk__output_t *out, va_list args) { xmlNode *state_xml = va_arg(args, xmlNode *); /* Create: * * * ... * */ pcmk__output_xml_create_parent(out, PCMK_XE_TICKETS, NULL); if (state_xml->children != NULL) { /* Iterate through the list of children once to create all the * ticket elements. */ pcmk__xe_foreach_child(state_xml, PCMK__XE_TICKET_STATE, add_ticket_element, out); } else { add_ticket_element(state_xml, out); } pcmk__output_xml_pop_parent(out); return pcmk_rc_ok; } static pcmk__message_entry_t fmt_functions[] = { { "attribute", "default", attribute_default }, { "attribute", "xml", attribute_xml }, { "cluster-status", "default", pcmk__cluster_status_text }, { "cluster-status", "html", cluster_status_html }, { "cluster-status", "xml", cluster_status_xml }, { "crmadmin-node", "default", crmadmin_node }, { "crmadmin-node", "text", crmadmin_node_text }, { "crmadmin-node", "xml", crmadmin_node_xml }, { "dc", "default", dc }, { "dc", "text", dc_text }, { "dc", "xml", dc_xml }, { "digests", "default", digests_text }, { "digests", "xml", digests_xml }, { "health", "default", health }, { "health", "text", health_text }, { "health", "xml", health_xml }, { "inject-attr", "default", inject_attr }, { "inject-attr", "xml", inject_attr_xml }, { "inject-cluster-action", "default", inject_cluster_action }, { "inject-cluster-action", "xml", inject_cluster_action_xml }, { "inject-fencing-action", "default", inject_fencing_action }, { "inject-fencing-action", "xml", inject_fencing_action_xml }, { "inject-modify-config", "default", inject_modify_config }, { "inject-modify-config", "xml", inject_modify_config_xml }, { "inject-modify-node", "default", inject_modify_node }, { "inject-modify-node", "xml", inject_modify_node_xml }, { "inject-modify-ticket", "default", inject_modify_ticket }, { "inject-modify-ticket", "xml", inject_modify_ticket_xml }, { "inject-pseudo-action", "default", inject_pseudo_action }, { "inject-pseudo-action", "xml", inject_pseudo_action_xml }, { "inject-rsc-action", "default", inject_rsc_action }, { "inject-rsc-action", "xml", inject_rsc_action_xml }, { "inject-spec", "default", inject_spec }, { "inject-spec", "xml", inject_spec_xml }, { "locations-and-colocations", "default", locations_and_colocations }, { "locations-and-colocations", "xml", locations_and_colocations_xml }, { "locations-list", "default", locations_list }, { "locations-list", "xml", locations_list_xml }, { "node-action", "default", node_action }, { "node-action", "xml", node_action_xml }, { "node-info", "default", node_info_default }, { "node-info", "xml", node_info_xml }, { "pacemakerd-health", "default", pacemakerd_health }, { "pacemakerd-health", "html", pacemakerd_health_html }, { "pacemakerd-health", "text", pacemakerd_health_text }, { "pacemakerd-health", "xml", pacemakerd_health_xml }, { "profile", "default", profile_default, }, { "profile", "xml", profile_xml }, { "result-code", PCMK_VALUE_NONE, result_code_none }, { "result-code", "text", result_code_text }, { "result-code", "xml", result_code_xml }, { "rsc-action", "default", rsc_action_default }, { "rsc-action-item", "default", rsc_action_item }, { "rsc-action-item", "xml", rsc_action_item_xml }, { "rsc-is-colocated-with-list", "default", rsc_is_colocated_with_list }, { "rsc-is-colocated-with-list", "xml", rsc_is_colocated_with_list_xml }, { "rscs-colocated-with-list", "default", rscs_colocated_with_list }, { "rscs-colocated-with-list", "xml", rscs_colocated_with_list_xml }, { "rule-check", "default", rule_check_default }, { "rule-check", "xml", rule_check_xml }, { "ticket-attribute", "default", ticket_attribute_default }, { "ticket-attribute", "xml", ticket_attribute_xml }, { "ticket-constraints", "default", ticket_constraints_default }, { "ticket-constraints", "xml", ticket_constraints_xml }, { "ticket-state", "default", ticket_state_default }, { "ticket-state", "xml", ticket_state_xml }, { NULL, NULL, NULL } }; void pcmk__register_lib_messages(pcmk__output_t *out) { pcmk__register_messages(out, fmt_functions); } diff --git a/lib/pacemaker/pcmk_sched_bundle.c b/lib/pacemaker/pcmk_sched_bundle.c index 34579ffde5..0d6f2729aa 100644 --- a/lib/pacemaker/pcmk_sched_bundle.c +++ b/lib/pacemaker/pcmk_sched_bundle.c @@ -1,1075 +1,1075 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include "libpacemaker_private.h" struct assign_data { const pcmk_node_t *prefer; bool stop_if_fail; }; /*! * \internal * \brief Assign a single bundle replica's resources (other than container) * * \param[in,out] replica Replica to assign * \param[in] user_data Preferred node, if any * * \return true (to indicate that any further replicas should be processed) */ static bool assign_replica(pcmk__bundle_replica_t *replica, void *user_data) { pcmk_node_t *container_host = NULL; struct assign_data *assign_data = user_data; const pcmk_node_t *prefer = assign_data->prefer; bool stop_if_fail = assign_data->stop_if_fail; const pcmk_resource_t *bundle = pe__const_top_resource(replica->container, true); if (replica->ip != NULL) { pcmk__rsc_trace(bundle, "Assigning bundle %s IP %s", bundle->id, replica->ip->id); replica->ip->priv->cmds->assign(replica->ip, prefer, stop_if_fail); } container_host = replica->container->priv->assigned_node; if (replica->remote != NULL) { if (pcmk__is_pacemaker_remote_node(container_host)) { /* REMOTE_CONTAINER_HACK: "Nested" connection resources must be on * the same host because Pacemaker Remote only supports a single * active connection. */ pcmk__new_colocation("#replica-remote-with-host-remote", NULL, PCMK_SCORE_INFINITY, replica->remote, container_host->priv->remote, NULL, NULL, pcmk__coloc_influence); } pcmk__rsc_trace(bundle, "Assigning bundle %s connection %s", bundle->id, replica->remote->id); replica->remote->priv->cmds->assign(replica->remote, prefer, stop_if_fail); } if (replica->child != NULL) { pcmk_node_t *node = NULL; GHashTableIter iter; g_hash_table_iter_init(&iter, replica->child->priv->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { if (!pcmk__same_node(node, replica->node)) { node->assign->score = -PCMK_SCORE_INFINITY; } else if (!pcmk__threshold_reached(replica->child, node, NULL)) { node->assign->score = PCMK_SCORE_INFINITY; } } pcmk__set_rsc_flags(replica->child->priv->parent, pcmk__rsc_assigning); pcmk__rsc_trace(bundle, "Assigning bundle %s replica child %s", bundle->id, replica->child->id); replica->child->priv->cmds->assign(replica->child, replica->node, stop_if_fail); pcmk__clear_rsc_flags(replica->child->priv->parent, pcmk__rsc_assigning); } return true; } /*! * \internal * \brief Assign a bundle resource to a node * * \param[in,out] rsc Resource to assign to a node * \param[in] prefer Node to prefer, if all else is equal * \param[in] stop_if_fail If \c true and a primitive descendant of \p rsc * can't be assigned to a node, set the * descendant's next role to stopped and update * existing actions * * \return Node that \p rsc is assigned to, if assigned entirely to one node * * \note If \p stop_if_fail is \c false, then \c pcmk__unassign_resource() can * completely undo the assignment. A successful assignment can be either * undone or left alone as final. A failed assignment has the same effect * as calling pcmk__unassign_resource(); there are no side effects on * roles or actions. */ pcmk_node_t * pcmk__bundle_assign(pcmk_resource_t *rsc, const pcmk_node_t *prefer, bool stop_if_fail) { GList *containers = NULL; pcmk_resource_t *bundled_resource = NULL; struct assign_data assign_data = { prefer, stop_if_fail }; CRM_ASSERT(pcmk__is_bundle(rsc)); pcmk__rsc_trace(rsc, "Assigning bundle %s", rsc->id); pcmk__set_rsc_flags(rsc, pcmk__rsc_assigning); pe__show_node_scores(!pcmk_is_set(rsc->priv->scheduler->flags, pcmk__sched_output_scores), rsc, __func__, rsc->priv->allowed_nodes, rsc->priv->scheduler); // Assign all containers first, so we know what nodes the bundle will be on containers = g_list_sort(pe__bundle_containers(rsc), pcmk__cmp_instance); pcmk__assign_instances(rsc, containers, pe__bundle_max(rsc), rsc->priv->fns->max_per_node(rsc)); g_list_free(containers); // Then assign remaining replica resources pe__foreach_bundle_replica(rsc, assign_replica, (void *) &assign_data); // Finally, assign the bundled resources to each bundle node bundled_resource = pe__bundled_resource(rsc); if (bundled_resource != NULL) { pcmk_node_t *node = NULL; GHashTableIter iter; g_hash_table_iter_init(&iter, bundled_resource->priv->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & node)) { if (pe__node_is_bundle_instance(rsc, node)) { node->assign->score = 0; } else { node->assign->score = -PCMK_SCORE_INFINITY; } } bundled_resource->priv->cmds->assign(bundled_resource, prefer, stop_if_fail); } pcmk__clear_rsc_flags(rsc, pcmk__rsc_assigning|pcmk__rsc_unassigned); return NULL; } /*! * \internal * \brief Create actions for a bundle replica's resources (other than child) * * \param[in,out] replica Replica to create actions for * \param[in] user_data Unused * * \return true (to indicate that any further replicas should be processed) */ static bool create_replica_actions(pcmk__bundle_replica_t *replica, void *user_data) { if (replica->ip != NULL) { replica->ip->priv->cmds->create_actions(replica->ip); } if (replica->container != NULL) { replica->container->priv->cmds->create_actions(replica->container); } if (replica->remote != NULL) { replica->remote->priv->cmds->create_actions(replica->remote); } return true; } /*! * \internal * \brief Create all actions needed for a given bundle resource * * \param[in,out] rsc Bundle resource to create actions for */ void pcmk__bundle_create_actions(pcmk_resource_t *rsc) { pcmk_action_t *action = NULL; GList *containers = NULL; pcmk_resource_t *bundled_resource = NULL; CRM_ASSERT(pcmk__is_bundle(rsc)); pe__foreach_bundle_replica(rsc, create_replica_actions, NULL); containers = pe__bundle_containers(rsc); pcmk__create_instance_actions(rsc, containers); g_list_free(containers); bundled_resource = pe__bundled_resource(rsc); if (bundled_resource != NULL) { bundled_resource->priv->cmds->create_actions(bundled_resource); if (pcmk_is_set(bundled_resource->flags, pcmk__rsc_promotable)) { pe__new_rsc_pseudo_action(rsc, PCMK_ACTION_PROMOTE, true, true); action = pe__new_rsc_pseudo_action(rsc, PCMK_ACTION_PROMOTED, true, true); action->priority = PCMK_SCORE_INFINITY; pe__new_rsc_pseudo_action(rsc, PCMK_ACTION_DEMOTE, true, true); action = pe__new_rsc_pseudo_action(rsc, PCMK_ACTION_DEMOTED, true, true); action->priority = PCMK_SCORE_INFINITY; } } } /*! * \internal * \brief Create internal constraints for a bundle replica's resources * * \param[in,out] replica Replica to create internal constraints for * \param[in,out] user_data Replica's parent bundle * * \return true (to indicate that any further replicas should be processed) */ static bool replica_internal_constraints(pcmk__bundle_replica_t *replica, void *user_data) { pcmk_resource_t *bundle = user_data; replica->container->priv->cmds->internal_constraints(replica->container); // Start bundle -> start replica container pcmk__order_starts(bundle, replica->container, pcmk__ar_unrunnable_first_blocks |pcmk__ar_then_implies_first_graphed); // Stop bundle -> stop replica child and container if (replica->child != NULL) { pcmk__order_stops(bundle, replica->child, pcmk__ar_then_implies_first_graphed); } pcmk__order_stops(bundle, replica->container, pcmk__ar_then_implies_first_graphed); // Start replica container -> bundle is started pcmk__order_resource_actions(replica->container, PCMK_ACTION_START, bundle, PCMK_ACTION_RUNNING, pcmk__ar_first_implies_then_graphed); // Stop replica container -> bundle is stopped pcmk__order_resource_actions(replica->container, PCMK_ACTION_STOP, bundle, PCMK_ACTION_STOPPED, pcmk__ar_first_implies_then_graphed); if (replica->ip != NULL) { replica->ip->priv->cmds->internal_constraints(replica->ip); // Replica IP address -> replica container (symmetric) pcmk__order_starts(replica->ip, replica->container, pcmk__ar_unrunnable_first_blocks |pcmk__ar_guest_allowed); pcmk__order_stops(replica->container, replica->ip, pcmk__ar_then_implies_first|pcmk__ar_guest_allowed); pcmk__new_colocation("#ip-with-container", NULL, PCMK_SCORE_INFINITY, replica->ip, replica->container, NULL, NULL, pcmk__coloc_influence); } if (replica->remote != NULL) { /* This handles ordering and colocating remote relative to container * (via "#resource-with-container"). Since IP is also ordered and * colocated relative to the container, we don't need to do anything * explicit here with IP. */ replica->remote->priv->cmds->internal_constraints(replica->remote); } if (replica->child != NULL) { CRM_ASSERT(replica->remote != NULL); // "Start remote then child" is implicit in scheduler's remote logic } return true; } /*! * \internal * \brief Create implicit constraints needed for a bundle resource * * \param[in,out] rsc Bundle resource to create implicit constraints for */ void pcmk__bundle_internal_constraints(pcmk_resource_t *rsc) { pcmk_resource_t *bundled_resource = NULL; CRM_ASSERT(pcmk__is_bundle(rsc)); pe__foreach_bundle_replica(rsc, replica_internal_constraints, rsc); bundled_resource = pe__bundled_resource(rsc); if (bundled_resource == NULL) { return; } // Start bundle -> start bundled clone pcmk__order_resource_actions(rsc, PCMK_ACTION_START, bundled_resource, PCMK_ACTION_START, pcmk__ar_then_implies_first_graphed); // Bundled clone is started -> bundle is started pcmk__order_resource_actions(bundled_resource, PCMK_ACTION_RUNNING, rsc, PCMK_ACTION_RUNNING, pcmk__ar_first_implies_then_graphed); // Stop bundle -> stop bundled clone pcmk__order_resource_actions(rsc, PCMK_ACTION_STOP, bundled_resource, PCMK_ACTION_STOP, pcmk__ar_then_implies_first_graphed); // Bundled clone is stopped -> bundle is stopped pcmk__order_resource_actions(bundled_resource, PCMK_ACTION_STOPPED, rsc, PCMK_ACTION_STOPPED, pcmk__ar_first_implies_then_graphed); bundled_resource->priv->cmds->internal_constraints(bundled_resource); if (!pcmk_is_set(bundled_resource->flags, pcmk__rsc_promotable)) { return; } pcmk__promotable_restart_ordering(rsc); // Demote bundle -> demote bundled clone pcmk__order_resource_actions(rsc, PCMK_ACTION_DEMOTE, bundled_resource, PCMK_ACTION_DEMOTE, pcmk__ar_then_implies_first_graphed); // Bundled clone is demoted -> bundle is demoted pcmk__order_resource_actions(bundled_resource, PCMK_ACTION_DEMOTED, rsc, PCMK_ACTION_DEMOTED, pcmk__ar_first_implies_then_graphed); // Promote bundle -> promote bundled clone pcmk__order_resource_actions(rsc, PCMK_ACTION_PROMOTE, bundled_resource, PCMK_ACTION_PROMOTE, pcmk__ar_then_implies_first_graphed); // Bundled clone is promoted -> bundle is promoted pcmk__order_resource_actions(bundled_resource, PCMK_ACTION_PROMOTED, rsc, PCMK_ACTION_PROMOTED, pcmk__ar_first_implies_then_graphed); } struct match_data { const pcmk_node_t *node; // Node to compare against replica pcmk_resource_t *container; // Replica container corresponding to node }; /*! * \internal * \brief Check whether a replica container is assigned to a given node * * \param[in] replica Replica to check * \param[in,out] user_data struct match_data with node to compare against * * \return true if the replica does not match (to indicate further replicas * should be processed), otherwise false */ static bool match_replica_container(const pcmk__bundle_replica_t *replica, void *user_data) { struct match_data *match_data = user_data; if (pcmk__instance_matches(replica->container, match_data->node, pcmk_role_unknown, false)) { match_data->container = replica->container; return false; // Match found, don't bother searching further replicas } return true; // No match, keep searching } /*! * \internal * \brief Get the host to which a bundle node is assigned * * \param[in] node Possible bundle node to check * * \return Node to which the container for \p node is assigned if \p node is a * bundle node, otherwise \p node itself */ static const pcmk_node_t * get_bundle_node_host(const pcmk_node_t *node) { if (pcmk__is_bundle_node(node)) { const pcmk_resource_t *container = NULL; container = node->priv->remote->priv->launcher; return container->priv->fns->location(container, NULL, 0); } return node; } /*! * \internal * \brief Find a bundle container compatible with a dependent resource * * \param[in] dependent Dependent resource in colocation with bundle * \param[in] bundle Bundle that \p dependent is colocated with * * \return A container from \p bundle assigned to the same node as \p dependent * if assigned, otherwise assigned to any of dependent's allowed nodes, * otherwise NULL. */ static pcmk_resource_t * compatible_container(const pcmk_resource_t *dependent, const pcmk_resource_t *bundle) { GList *scratch = NULL; struct match_data match_data = { NULL, NULL }; // If dependent is assigned, only check there match_data.node = dependent->priv->fns->location(dependent, NULL, 0); match_data.node = get_bundle_node_host(match_data.node); if (match_data.node != NULL) { pe__foreach_const_bundle_replica(bundle, match_replica_container, &match_data); return match_data.container; } // Otherwise, check for any of the dependent's allowed nodes scratch = g_hash_table_get_values(dependent->priv->allowed_nodes); scratch = pcmk__sort_nodes(scratch, NULL); for (const GList *iter = scratch; iter != NULL; iter = iter->next) { match_data.node = iter->data; match_data.node = get_bundle_node_host(match_data.node); if (match_data.node == NULL) { continue; } pe__foreach_const_bundle_replica(bundle, match_replica_container, &match_data); if (match_data.container != NULL) { break; } } g_list_free(scratch); return match_data.container; } struct coloc_data { const pcmk__colocation_t *colocation; pcmk_resource_t *dependent; GList *container_hosts; int priority_delta; }; /*! * \internal * \brief Apply a colocation score to replica node scores or resource priority * * \param[in] replica Replica of primary bundle resource in colocation * \param[in,out] user_data struct coloc_data for colocation being applied * * \return true (to indicate that any further replicas should be processed) */ static bool replica_apply_coloc_score(const pcmk__bundle_replica_t *replica, void *user_data) { struct coloc_data *coloc_data = user_data; pcmk_node_t *chosen = NULL; pcmk_resource_t *container = replica->container; if (coloc_data->colocation->score < PCMK_SCORE_INFINITY) { int priority_delta = container->priv->cmds->apply_coloc_score(coloc_data->dependent, container, coloc_data->colocation, false); coloc_data->priority_delta = pcmk__add_scores(coloc_data->priority_delta, priority_delta); return true; } chosen = container->priv->fns->location(container, NULL, 0); if ((chosen == NULL) || is_set_recursive(container, pcmk__rsc_blocked, true)) { return true; } if ((coloc_data->colocation->primary_role >= pcmk_role_promoted) && ((replica->child == NULL) || (replica->child->priv->next_role < pcmk_role_promoted))) { return true; } pcmk__rsc_trace(pe__const_top_resource(container, true), "Allowing mandatory colocation %s using %s @%d", coloc_data->colocation->id, pcmk__node_name(chosen), chosen->assign->score); coloc_data->container_hosts = g_list_prepend(coloc_data->container_hosts, chosen); return true; } /*! * \internal * \brief Apply a colocation's score to node scores or resource priority * * Given a colocation constraint, apply its score to the dependent's * allowed node scores (if we are still placing resources) or priority (if * we are choosing promotable clone instance roles). * * \param[in,out] dependent Dependent resource in colocation * \param[in] primary Primary resource in colocation * \param[in] colocation Colocation constraint to apply * \param[in] for_dependent true if called on behalf of dependent * * \return The score added to the dependent's priority */ int pcmk__bundle_apply_coloc_score(pcmk_resource_t *dependent, const pcmk_resource_t *primary, const pcmk__colocation_t *colocation, bool for_dependent) { struct coloc_data coloc_data = { colocation, dependent, NULL, 0 }; /* This should never be called for the bundle itself as a dependent. * Instead, we add its colocation constraints to its containers and bundled * primitive and call the apply_coloc_score() method for them as dependents. */ CRM_ASSERT(pcmk__is_bundle(primary) && pcmk__is_primitive(dependent) && (colocation != NULL) && !for_dependent); if (pcmk_is_set(primary->flags, pcmk__rsc_unassigned)) { pcmk__rsc_trace(primary, "Skipping applying colocation %s " "because %s is still provisional", colocation->id, primary->id); return 0; } pcmk__rsc_trace(primary, "Applying colocation %s (%s with %s at %s)", colocation->id, dependent->id, primary->id, pcmk_readable_score(colocation->score)); /* If the constraint dependent is a clone or bundle, "dependent" here is one * of its instances. Look for a compatible instance of this bundle. */ if (colocation->dependent->priv->variant > pcmk__rsc_variant_group) { const pcmk_resource_t *primary_container = NULL; primary_container = compatible_container(dependent, primary); if (primary_container != NULL) { // Success, we found one pcmk__rsc_debug(primary, "Pairing %s with %s", dependent->id, primary_container->id); return dependent->priv->cmds->apply_coloc_score(dependent, primary_container, colocation, true); } if (colocation->score >= PCMK_SCORE_INFINITY) { // Failure, and it's fatal crm_notice("%s cannot run because there is no compatible " "instance of %s to colocate with", dependent->id, primary->id); pcmk__assign_resource(dependent, NULL, true, true); } else { // Failure, but we can ignore it pcmk__rsc_debug(primary, "%s cannot be colocated with any instance of %s", dependent->id, primary->id); } return 0; } pe__foreach_const_bundle_replica(primary, replica_apply_coloc_score, &coloc_data); if (colocation->score >= PCMK_SCORE_INFINITY) { pcmk__colocation_intersect_nodes(dependent, primary, colocation, coloc_data.container_hosts, false); } g_list_free(coloc_data.container_hosts); return coloc_data.priority_delta; } // Bundle implementation of pcmk__assignment_methods_t:with_this_colocations() void pcmk__with_bundle_colocations(const pcmk_resource_t *rsc, const pcmk_resource_t *orig_rsc, GList **list) { const pcmk_resource_t *bundled_rsc = NULL; CRM_ASSERT(pcmk__is_bundle(rsc) && (orig_rsc != NULL) && (list != NULL)); // The bundle itself and its containers always get its colocations if ((orig_rsc == rsc) || pcmk_is_set(orig_rsc->flags, pcmk__rsc_replica_container)) { pcmk__add_with_this_list(list, rsc->priv->with_this_colocations, orig_rsc); return; } /* The bundled resource gets the colocations if it's promotable and we've * begun choosing roles */ bundled_rsc = pe__bundled_resource(rsc); if ((bundled_rsc == NULL) || !pcmk_is_set(bundled_rsc->flags, pcmk__rsc_promotable) || (pe__const_top_resource(orig_rsc, false) != bundled_rsc)) { return; } if (orig_rsc == bundled_rsc) { if (pe__clone_flag_is_set(orig_rsc, pcmk__clone_promotion_constrained)) { /* orig_rsc is the clone and we're setting roles (or have already * done so) */ pcmk__add_with_this_list(list, rsc->priv->with_this_colocations, orig_rsc); } } else if (!pcmk_is_set(orig_rsc->flags, pcmk__rsc_unassigned)) { /* orig_rsc is an instance and is already assigned. If something * requests colocations for orig_rsc now, it's for setting roles. */ pcmk__add_with_this_list(list, rsc->priv->with_this_colocations, orig_rsc); } } // Bundle implementation of pcmk__assignment_methods_t:this_with_colocations() void pcmk__bundle_with_colocations(const pcmk_resource_t *rsc, const pcmk_resource_t *orig_rsc, GList **list) { const pcmk_resource_t *bundled_rsc = NULL; CRM_ASSERT(pcmk__is_bundle(rsc) && (orig_rsc != NULL) && (list != NULL)); // The bundle itself and its containers always get its colocations if ((orig_rsc == rsc) || pcmk_is_set(orig_rsc->flags, pcmk__rsc_replica_container)) { pcmk__add_this_with_list(list, rsc->priv->this_with_colocations, orig_rsc); return; } /* The bundled resource gets the colocations if it's promotable and we've * begun choosing roles */ bundled_rsc = pe__bundled_resource(rsc); if ((bundled_rsc == NULL) || !pcmk_is_set(bundled_rsc->flags, pcmk__rsc_promotable) || (pe__const_top_resource(orig_rsc, false) != bundled_rsc)) { return; } if (orig_rsc == bundled_rsc) { if (pe__clone_flag_is_set(orig_rsc, pcmk__clone_promotion_constrained)) { /* orig_rsc is the clone and we're setting roles (or have already * done so) */ pcmk__add_this_with_list(list, rsc->priv->this_with_colocations, orig_rsc); } } else if (!pcmk_is_set(orig_rsc->flags, pcmk__rsc_unassigned)) { /* orig_rsc is an instance and is already assigned. If something * requests colocations for orig_rsc now, it's for setting roles. */ pcmk__add_this_with_list(list, rsc->priv->this_with_colocations, orig_rsc); } } /*! * \internal * \brief Return action flags for a given bundle resource action * * \param[in,out] action Bundle resource action to get flags for * \param[in] node If not NULL, limit effects to this node * * \return Flags appropriate to \p action on \p node */ uint32_t pcmk__bundle_action_flags(pcmk_action_t *action, const pcmk_node_t *node) { GList *containers = NULL; uint32_t flags = 0; pcmk_resource_t *bundled_resource = NULL; CRM_ASSERT((action != NULL) && pcmk__is_bundle(action->rsc)); bundled_resource = pe__bundled_resource(action->rsc); if (bundled_resource != NULL) { GList *children = bundled_resource->priv->children; // Clone actions are done on the bundled clone resource, not container switch (get_complex_task(bundled_resource, action->task)) { case pcmk__action_unspecified: case pcmk__action_notify: case pcmk__action_notified: case pcmk__action_promote: case pcmk__action_promoted: case pcmk__action_demote: case pcmk__action_demoted: return pcmk__collective_action_flags(action, children, node); default: break; } } containers = pe__bundle_containers(action->rsc); flags = pcmk__collective_action_flags(action, containers, node); g_list_free(containers); return flags; } /*! * \internal * \brief Apply a location constraint to a bundle replica * * \param[in,out] replica Replica to apply constraint to * \param[in,out] user_data Location constraint to apply * * \return true (to indicate that any further replicas should be processed) */ static bool apply_location_to_replica(pcmk__bundle_replica_t *replica, void *user_data) { pcmk__location_t *location = user_data; replica->container->priv->cmds->apply_location(replica->container, location); if (replica->ip != NULL) { replica->ip->priv->cmds->apply_location(replica->ip, location); } return true; } /*! * \internal * \brief Apply a location constraint to a bundle resource's allowed node scores * * \param[in,out] rsc Bundle resource to apply constraint to * \param[in,out] location Location constraint to apply */ void pcmk__bundle_apply_location(pcmk_resource_t *rsc, pcmk__location_t *location) { pcmk_resource_t *bundled_resource = NULL; CRM_ASSERT((location != NULL) && pcmk__is_bundle(rsc)); pcmk__apply_location(rsc, location); pe__foreach_bundle_replica(rsc, apply_location_to_replica, location); bundled_resource = pe__bundled_resource(rsc); if ((bundled_resource != NULL) && ((location->role_filter == pcmk_role_unpromoted) || (location->role_filter == pcmk_role_promoted))) { bundled_resource->priv->cmds->apply_location(bundled_resource, location); bundled_resource->priv->location_constraints = g_list_prepend(bundled_resource->priv->location_constraints, location); } } #define XPATH_REMOTE "//nvpair[@name='" PCMK_REMOTE_RA_ADDR "']" /*! * \internal * \brief Add a bundle replica's actions to transition graph * * \param[in,out] replica Replica to add to graph * \param[in] user_data Bundle that replica belongs to (for logging only) * * \return true (to indicate that any further replicas should be processed) */ static bool add_replica_actions_to_graph(pcmk__bundle_replica_t *replica, void *user_data) { if ((replica->remote != NULL) && pe__bundle_needs_remote_name(replica->remote)) { /* REMOTE_CONTAINER_HACK: Allow remote nodes to run containers that - * run pacemaker-remoted inside, without needing a separate IP for + * run the remote executor inside, without needing a separate IP for * the container. This is done by configuring the inner remote's * connection host as the magic string "#uname", then * replacing it with the underlying host when needed. */ xmlNode *nvpair = get_xpath_object(XPATH_REMOTE, replica->remote->priv->xml, LOG_ERR); const char *calculated_addr = NULL; // Replace the value in replica->remote->xml (if appropriate) calculated_addr = pe__add_bundle_remote_name(replica->remote, nvpair, PCMK_XA_VALUE); if (calculated_addr != NULL) { /* Since this is for the bundle as a resource, and not any * particular action, replace the value in the default * parameters (not evaluated for node). create_graph_action() * will grab it from there to replace it in node-evaluated * parameters. */ GHashTable *params = NULL; params = pe_rsc_params(replica->remote, NULL, replica->remote->priv->scheduler); pcmk__insert_dup(params, PCMK_REMOTE_RA_ADDR, calculated_addr); } else { pcmk_resource_t *bundle = user_data; /* The only way to get here is if the remote connection is * neither currently running nor scheduled to run. That means we * won't be doing any operations that require addr (only start * requires it; we additionally use it to compare digests when * unpacking status, promote, and migrate_from history, but * that's already happened by this point). */ pcmk__rsc_info(bundle, "Unable to determine address for bundle %s " "remote connection", bundle->id); } } if (replica->ip != NULL) { replica->ip->priv->cmds->add_actions_to_graph(replica->ip); } replica->container->priv->cmds->add_actions_to_graph(replica->container); if (replica->remote != NULL) { replica->remote->priv->cmds->add_actions_to_graph(replica->remote); } return true; } /*! * \internal * \brief Add a bundle resource's actions to the transition graph * * \param[in,out] rsc Bundle resource whose actions should be added */ void pcmk__bundle_add_actions_to_graph(pcmk_resource_t *rsc) { pcmk_resource_t *bundled_resource = NULL; CRM_ASSERT(pcmk__is_bundle(rsc)); bundled_resource = pe__bundled_resource(rsc); if (bundled_resource != NULL) { bundled_resource->priv->cmds->add_actions_to_graph(bundled_resource); } pe__foreach_bundle_replica(rsc, add_replica_actions_to_graph, rsc); } struct probe_data { pcmk_resource_t *bundle; // Bundle being probed pcmk_node_t *node; // Node to create probes on bool any_created; // Whether any probes have been created }; /*! * \internal * \brief Order a bundle replica's start after another replica's probe * * \param[in,out] replica Replica to order start for * \param[in,out] user_data Replica with probe to order after * * \return true (to indicate that any further replicas should be processed) */ static bool order_replica_start_after(pcmk__bundle_replica_t *replica, void *user_data) { pcmk__bundle_replica_t *probed_replica = user_data; if ((replica == probed_replica) || (replica->container == NULL)) { return true; } pcmk__new_ordering(probed_replica->container, pcmk__op_key(probed_replica->container->id, PCMK_ACTION_MONITOR, 0), NULL, replica->container, pcmk__op_key(replica->container->id, PCMK_ACTION_START, 0), NULL, pcmk__ar_ordered|pcmk__ar_if_on_same_node, replica->container->priv->scheduler); return true; } /*! * \internal * \brief Create probes for a bundle replica's resources * * \param[in,out] replica Replica to create probes for * \param[in,out] user_data struct probe_data * * \return true (to indicate that any further replicas should be processed) */ static bool create_replica_probes(pcmk__bundle_replica_t *replica, void *user_data) { struct probe_data *probe_data = user_data; pcmk_resource_t *bundle = probe_data->bundle; if ((replica->ip != NULL) && replica->ip->priv->cmds->create_probe(replica->ip, probe_data->node)) { probe_data->any_created = true; } if ((replica->child != NULL) && pcmk__same_node(probe_data->node, replica->node) && replica->child->priv->cmds->create_probe(replica->child, probe_data->node)) { probe_data->any_created = true; } if (replica->container->priv->cmds->create_probe(replica->container, probe_data->node)) { probe_data->any_created = true; /* If we're limited to one replica per host (due to * the lack of an IP range probably), then we don't * want any of our peer containers starting until * we've established that no other copies are already * running. * * Partly this is to ensure that the maximum replicas per host is * observed, but also to ensure that the containers * don't fail to start because the necessary port * mappings (which won't include an IP for uniqueness) * are already taken */ if (bundle->priv->fns->max_per_node(bundle) == 1) { pe__foreach_bundle_replica(bundle, order_replica_start_after, replica); } } if ((replica->remote != NULL) && replica->remote->priv->cmds->create_probe(replica->remote, probe_data->node)) { /* Do not probe the remote resource until we know where the container is * running. This is required for REMOTE_CONTAINER_HACK to correctly * probe remote resources. */ char *probe_uuid = pcmk__op_key(replica->remote->id, PCMK_ACTION_MONITOR, 0); pcmk_action_t *probe = NULL; probe = find_first_action(replica->remote->priv->actions, probe_uuid, NULL, probe_data->node); free(probe_uuid); if (probe != NULL) { probe_data->any_created = true; pcmk__rsc_trace(bundle, "Ordering %s probe on %s", replica->remote->id, pcmk__node_name(probe_data->node)); pcmk__new_ordering(replica->container, pcmk__op_key(replica->container->id, PCMK_ACTION_START, 0), NULL, replica->remote, NULL, probe, pcmk__ar_nested_remote_probe, bundle->priv->scheduler); } } return true; } /*! * \internal * * \brief Schedule any probes needed for a bundle resource on a node * * \param[in,out] rsc Bundle resource to create probes for * \param[in,out] node Node to create probe on * * \return true if any probe was created, otherwise false */ bool pcmk__bundle_create_probe(pcmk_resource_t *rsc, pcmk_node_t *node) { struct probe_data probe_data = { rsc, node, false }; CRM_ASSERT(pcmk__is_bundle(rsc)); pe__foreach_bundle_replica(rsc, create_replica_probes, &probe_data); return probe_data.any_created; } /*! * \internal * \brief Output actions for one bundle replica * * \param[in,out] replica Replica to output actions for * \param[in] user_data Unused * * \return true (to indicate that any further replicas should be processed) */ static bool output_replica_actions(pcmk__bundle_replica_t *replica, void *user_data) { if (replica->ip != NULL) { replica->ip->priv->cmds->output_actions(replica->ip); } replica->container->priv->cmds->output_actions(replica->container); if (replica->remote != NULL) { replica->remote->priv->cmds->output_actions(replica->remote); } if (replica->child != NULL) { replica->child->priv->cmds->output_actions(replica->child); } return true; } /*! * \internal * \brief Output a summary of scheduled actions for a bundle resource * * \param[in,out] rsc Bundle resource to output actions for */ void pcmk__output_bundle_actions(pcmk_resource_t *rsc) { CRM_ASSERT(pcmk__is_bundle(rsc)); pe__foreach_bundle_replica(rsc, output_replica_actions, NULL); } // Bundle implementation of pcmk__assignment_methods_t:add_utilization() void pcmk__bundle_add_utilization(const pcmk_resource_t *rsc, const pcmk_resource_t *orig_rsc, GList *all_rscs, GHashTable *utilization) { pcmk_resource_t *container = NULL; CRM_ASSERT(pcmk__is_bundle(rsc)); if (!pcmk_is_set(rsc->flags, pcmk__rsc_unassigned)) { return; } /* All bundle replicas are identical, so using the utilization of the first * is sufficient for any. Only the implicit container resource can have * utilization values. */ container = pe__first_container(rsc); if (container != NULL) { container->priv->cmds->add_utilization(container, orig_rsc, all_rscs, utilization); } } // Bundle implementation of pcmk__assignment_methods_t:shutdown_lock() void pcmk__bundle_shutdown_lock(pcmk_resource_t *rsc) { CRM_ASSERT(pcmk__is_bundle(rsc)); // Bundles currently don't support shutdown locks } diff --git a/lib/pengine/bundle.c b/lib/pengine/bundle.c index 985a5a652e..d61201f214 100644 --- a/lib/pengine/bundle.c +++ b/lib/pengine/bundle.c @@ -1,2146 +1,2146 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include enum pe__bundle_mount_flags { pe__bundle_mount_none = 0x00, // mount instance-specific subdirectory rather than source directly pe__bundle_mount_subdir = 0x01 }; typedef struct { char *source; char *target; char *options; uint32_t flags; // bitmask of pe__bundle_mount_flags } pe__bundle_mount_t; typedef struct { char *source; char *target; } pe__bundle_port_t; enum pe__container_agent { PE__CONTAINER_AGENT_UNKNOWN, PE__CONTAINER_AGENT_DOCKER, PE__CONTAINER_AGENT_RKT, PE__CONTAINER_AGENT_PODMAN, }; #define PE__CONTAINER_AGENT_UNKNOWN_S "unknown" #define PE__CONTAINER_AGENT_DOCKER_S "docker" #define PE__CONTAINER_AGENT_RKT_S "rkt" #define PE__CONTAINER_AGENT_PODMAN_S "podman" typedef struct pe__bundle_variant_data_s { int promoted_max; int nreplicas; int nreplicas_per_host; char *prefix; char *image; const char *ip_last; char *host_network; char *host_netmask; char *control_port; char *container_network; char *ip_range_start; gboolean add_host; gchar *container_host_options; char *container_command; char *launcher_options; const char *attribute_target; pcmk_resource_t *child; GList *replicas; // pcmk__bundle_replica_t * GList *ports; // pe__bundle_port_t * GList *mounts; // pe__bundle_mount_t * enum pe__container_agent agent_type; } pe__bundle_variant_data_t; #define get_bundle_variant_data(data, rsc) do { \ CRM_ASSERT(pcmk__is_bundle(rsc)); \ data = rsc->priv->variant_opaque; \ } while (0) /*! * \internal * \brief Get maximum number of bundle replicas allowed to run * * \param[in] rsc Bundle or bundled resource to check * * \return Maximum replicas for bundle corresponding to \p rsc */ int pe__bundle_max(const pcmk_resource_t *rsc) { const pe__bundle_variant_data_t *bundle_data = NULL; get_bundle_variant_data(bundle_data, pe__const_top_resource(rsc, true)); return bundle_data->nreplicas; } /*! * \internal * \brief Get the resource inside a bundle * * \param[in] bundle Bundle to check * * \return Resource inside \p bundle if any, otherwise NULL */ pcmk_resource_t * pe__bundled_resource(const pcmk_resource_t *rsc) { const pe__bundle_variant_data_t *bundle_data = NULL; get_bundle_variant_data(bundle_data, pe__const_top_resource(rsc, true)); return bundle_data->child; } /*! * \internal * \brief Get containerized resource corresponding to a given bundle container * * \param[in] instance Collective instance that might be a bundle container * * \return Bundled resource instance inside \p instance if it is a bundle * container instance, otherwise NULL */ const pcmk_resource_t * pe__get_rsc_in_container(const pcmk_resource_t *instance) { const pe__bundle_variant_data_t *data = NULL; const pcmk_resource_t *top = pe__const_top_resource(instance, true); if (!pcmk__is_bundle(top)) { return NULL; } get_bundle_variant_data(data, top); for (const GList *iter = data->replicas; iter != NULL; iter = iter->next) { const pcmk__bundle_replica_t *replica = iter->data; if (instance == replica->container) { return replica->child; } } return NULL; } /*! * \internal * \brief Check whether a given node is created by a bundle * * \param[in] bundle Bundle resource to check * \param[in] node Node to check * * \return true if \p node is an instance of \p bundle, otherwise false */ bool pe__node_is_bundle_instance(const pcmk_resource_t *bundle, const pcmk_node_t *node) { pe__bundle_variant_data_t *bundle_data = NULL; get_bundle_variant_data(bundle_data, bundle); for (GList *iter = bundle_data->replicas; iter != NULL; iter = iter->next) { pcmk__bundle_replica_t *replica = iter->data; if (pcmk__same_node(node, replica->node)) { return true; } } return false; } /*! * \internal * \brief Get the container of a bundle's first replica * * \param[in] bundle Bundle resource to get container for * * \return Container resource from first replica of \p bundle if any, * otherwise NULL */ pcmk_resource_t * pe__first_container(const pcmk_resource_t *bundle) { const pe__bundle_variant_data_t *bundle_data = NULL; const pcmk__bundle_replica_t *replica = NULL; get_bundle_variant_data(bundle_data, bundle); if (bundle_data->replicas == NULL) { return NULL; } replica = bundle_data->replicas->data; return replica->container; } /*! * \internal * \brief Iterate over bundle replicas * * \param[in,out] bundle Bundle to iterate over * \param[in] fn Function to call for each replica (its return value * indicates whether to continue iterating) * \param[in,out] user_data Pointer to pass to \p fn */ void pe__foreach_bundle_replica(pcmk_resource_t *bundle, bool (*fn)(pcmk__bundle_replica_t *, void *), void *user_data) { const pe__bundle_variant_data_t *bundle_data = NULL; get_bundle_variant_data(bundle_data, bundle); for (GList *iter = bundle_data->replicas; iter != NULL; iter = iter->next) { if (!fn((pcmk__bundle_replica_t *) iter->data, user_data)) { break; } } } /*! * \internal * \brief Iterate over const bundle replicas * * \param[in] bundle Bundle to iterate over * \param[in] fn Function to call for each replica (its return value * indicates whether to continue iterating) * \param[in,out] user_data Pointer to pass to \p fn */ void pe__foreach_const_bundle_replica(const pcmk_resource_t *bundle, bool (*fn)(const pcmk__bundle_replica_t *, void *), void *user_data) { const pe__bundle_variant_data_t *bundle_data = NULL; get_bundle_variant_data(bundle_data, bundle); for (const GList *iter = bundle_data->replicas; iter != NULL; iter = iter->next) { if (!fn((const pcmk__bundle_replica_t *) iter->data, user_data)) { break; } } } static char * next_ip(const char *last_ip) { unsigned int oct1 = 0; unsigned int oct2 = 0; unsigned int oct3 = 0; unsigned int oct4 = 0; int rc = sscanf(last_ip, "%u.%u.%u.%u", &oct1, &oct2, &oct3, &oct4); if (rc != 4) { /*@ TODO check for IPv6 */ return NULL; } else if (oct3 > 253) { return NULL; } else if (oct4 > 253) { ++oct3; oct4 = 1; } else { ++oct4; } return crm_strdup_printf("%u.%u.%u.%u", oct1, oct2, oct3, oct4); } static void allocate_ip(pe__bundle_variant_data_t *data, pcmk__bundle_replica_t *replica, GString *buffer) { if(data->ip_range_start == NULL) { return; } else if(data->ip_last) { replica->ipaddr = next_ip(data->ip_last); } else { replica->ipaddr = strdup(data->ip_range_start); } data->ip_last = replica->ipaddr; switch (data->agent_type) { case PE__CONTAINER_AGENT_DOCKER: case PE__CONTAINER_AGENT_PODMAN: if (data->add_host) { g_string_append_printf(buffer, " --add-host=%s-%d:%s", data->prefix, replica->offset, replica->ipaddr); } else { g_string_append_printf(buffer, " --hosts-entry=%s=%s-%d", replica->ipaddr, data->prefix, replica->offset); } break; case PE__CONTAINER_AGENT_RKT: g_string_append_printf(buffer, " --hosts-entry=%s=%s-%d", replica->ipaddr, data->prefix, replica->offset); break; default: // PE__CONTAINER_AGENT_UNKNOWN break; } } static xmlNode * create_resource(const char *name, const char *provider, const char *kind) { xmlNode *rsc = pcmk__xe_create(NULL, PCMK_XE_PRIMITIVE); crm_xml_add(rsc, PCMK_XA_ID, name); crm_xml_add(rsc, PCMK_XA_CLASS, PCMK_RESOURCE_CLASS_OCF); crm_xml_add(rsc, PCMK_XA_PROVIDER, provider); crm_xml_add(rsc, PCMK_XA_TYPE, kind); return rsc; } /*! * \internal * \brief Check whether cluster can manage resource inside container * * \param[in,out] data Container variant data * * \return TRUE if networking configuration is acceptable, FALSE otherwise * * \note The resource is manageable if an IP range or control port has been * specified. If a control port is used without an IP range, replicas per * host must be 1. */ static bool valid_network(pe__bundle_variant_data_t *data) { if(data->ip_range_start) { return TRUE; } if(data->control_port) { if(data->nreplicas_per_host > 1) { pcmk__config_err("Specifying the '" PCMK_XA_CONTROL_PORT "' for %s " "requires '" PCMK_XA_REPLICAS_PER_HOST "=1'", data->prefix); data->nreplicas_per_host = 1; // @TODO to be sure: // pcmk__clear_rsc_flags(rsc, pcmk__rsc_unique); } return TRUE; } return FALSE; } static int create_ip_resource(pcmk_resource_t *parent, pe__bundle_variant_data_t *data, pcmk__bundle_replica_t *replica) { if(data->ip_range_start) { char *id = NULL; xmlNode *xml_ip = NULL; xmlNode *xml_obj = NULL; id = crm_strdup_printf("%s-ip-%s", data->prefix, replica->ipaddr); pcmk__xml_sanitize_id(id); xml_ip = create_resource(id, "heartbeat", "IPaddr2"); free(id); xml_obj = pcmk__xe_create(xml_ip, PCMK_XE_INSTANCE_ATTRIBUTES); pcmk__xe_set_id(xml_obj, "%s-attributes-%d", data->prefix, replica->offset); crm_create_nvpair_xml(xml_obj, NULL, "ip", replica->ipaddr); if(data->host_network) { crm_create_nvpair_xml(xml_obj, NULL, "nic", data->host_network); } if(data->host_netmask) { crm_create_nvpair_xml(xml_obj, NULL, "cidr_netmask", data->host_netmask); } else { crm_create_nvpair_xml(xml_obj, NULL, "cidr_netmask", "32"); } xml_obj = pcmk__xe_create(xml_ip, PCMK_XE_OPERATIONS); crm_create_op_xml(xml_obj, pcmk__xe_id(xml_ip), PCMK_ACTION_MONITOR, "60s", NULL); // TODO: Other ops? Timeouts and intervals from underlying resource? if (pe__unpack_resource(xml_ip, &replica->ip, parent, parent->priv->scheduler) != pcmk_rc_ok) { return pcmk_rc_unpack_error; } parent->priv->children = g_list_append(parent->priv->children, replica->ip); } return pcmk_rc_ok; } static const char* container_agent_str(enum pe__container_agent t) { switch (t) { case PE__CONTAINER_AGENT_DOCKER: return PE__CONTAINER_AGENT_DOCKER_S; case PE__CONTAINER_AGENT_RKT: return PE__CONTAINER_AGENT_RKT_S; case PE__CONTAINER_AGENT_PODMAN: return PE__CONTAINER_AGENT_PODMAN_S; default: // PE__CONTAINER_AGENT_UNKNOWN break; } return PE__CONTAINER_AGENT_UNKNOWN_S; } static int create_container_resource(pcmk_resource_t *parent, const pe__bundle_variant_data_t *data, pcmk__bundle_replica_t *replica) { char *id = NULL; xmlNode *xml_container = NULL; xmlNode *xml_obj = NULL; // Agent-specific const char *hostname_opt = NULL; const char *env_opt = NULL; const char *agent_str = NULL; int volid = 0; // rkt-only GString *buffer = NULL; GString *dbuffer = NULL; // Where syntax differences are drop-in replacements, set them now switch (data->agent_type) { case PE__CONTAINER_AGENT_DOCKER: case PE__CONTAINER_AGENT_PODMAN: hostname_opt = "-h "; env_opt = "-e "; break; case PE__CONTAINER_AGENT_RKT: hostname_opt = "--hostname="; env_opt = "--environment="; break; default: // PE__CONTAINER_AGENT_UNKNOWN return pcmk_rc_unpack_error; } agent_str = container_agent_str(data->agent_type); buffer = g_string_sized_new(4096); id = crm_strdup_printf("%s-%s-%d", data->prefix, agent_str, replica->offset); pcmk__xml_sanitize_id(id); xml_container = create_resource(id, "heartbeat", agent_str); free(id); xml_obj = pcmk__xe_create(xml_container, PCMK_XE_INSTANCE_ATTRIBUTES); pcmk__xe_set_id(xml_obj, "%s-attributes-%d", data->prefix, replica->offset); crm_create_nvpair_xml(xml_obj, NULL, "image", data->image); crm_create_nvpair_xml(xml_obj, NULL, "allow_pull", PCMK_VALUE_TRUE); crm_create_nvpair_xml(xml_obj, NULL, "force_kill", PCMK_VALUE_FALSE); crm_create_nvpair_xml(xml_obj, NULL, "reuse", PCMK_VALUE_FALSE); if (data->agent_type == PE__CONTAINER_AGENT_DOCKER) { g_string_append(buffer, " --restart=no"); } /* Set a container hostname only if we have an IP to map it to. The user can * set -h or --uts=host themselves if they want a nicer name for logs, but * this makes applications happy who need their hostname to match the IP * they bind to. */ if (data->ip_range_start != NULL) { g_string_append_printf(buffer, " %s%s-%d", hostname_opt, data->prefix, replica->offset); } pcmk__g_strcat(buffer, " ", env_opt, "PCMK_stderr=1", NULL); if (data->container_network != NULL) { pcmk__g_strcat(buffer, " --net=", data->container_network, NULL); } if (data->control_port != NULL) { pcmk__g_strcat(buffer, " ", env_opt, "PCMK_" PCMK__ENV_REMOTE_PORT "=", data->control_port, NULL); } else { g_string_append_printf(buffer, " %sPCMK_" PCMK__ENV_REMOTE_PORT "=%d", env_opt, DEFAULT_REMOTE_PORT); } for (GList *iter = data->mounts; iter != NULL; iter = iter->next) { pe__bundle_mount_t *mount = (pe__bundle_mount_t *) iter->data; char *source = NULL; if (pcmk_is_set(mount->flags, pe__bundle_mount_subdir)) { source = crm_strdup_printf("%s/%s-%d", mount->source, data->prefix, replica->offset); pcmk__add_separated_word(&dbuffer, 1024, source, ","); } switch (data->agent_type) { case PE__CONTAINER_AGENT_DOCKER: case PE__CONTAINER_AGENT_PODMAN: pcmk__g_strcat(buffer, " -v ", pcmk__s(source, mount->source), ":", mount->target, NULL); if (mount->options != NULL) { pcmk__g_strcat(buffer, ":", mount->options, NULL); } break; case PE__CONTAINER_AGENT_RKT: g_string_append_printf(buffer, " --volume vol%d,kind=host," "source=%s%s%s " "--mount volume=vol%d,target=%s", volid, pcmk__s(source, mount->source), (mount->options != NULL)? "," : "", pcmk__s(mount->options, ""), volid, mount->target); volid++; break; default: break; } free(source); } for (GList *iter = data->ports; iter != NULL; iter = iter->next) { pe__bundle_port_t *port = (pe__bundle_port_t *) iter->data; switch (data->agent_type) { case PE__CONTAINER_AGENT_DOCKER: case PE__CONTAINER_AGENT_PODMAN: if (replica->ipaddr != NULL) { pcmk__g_strcat(buffer, " -p ", replica->ipaddr, ":", port->source, ":", port->target, NULL); } else if (!pcmk__str_eq(data->container_network, PCMK_VALUE_HOST, pcmk__str_none)) { // No need to do port mapping if net == host pcmk__g_strcat(buffer, " -p ", port->source, ":", port->target, NULL); } break; case PE__CONTAINER_AGENT_RKT: if (replica->ipaddr != NULL) { pcmk__g_strcat(buffer, " --port=", port->target, ":", replica->ipaddr, ":", port->source, NULL); } else { pcmk__g_strcat(buffer, " --port=", port->target, ":", port->source, NULL); } break; default: break; } } /* @COMPAT: We should use pcmk__add_word() here, but we can't yet, because * it would cause restarts during rolling upgrades. * * In a previous version of the container resource creation logic, if * data->launcher_options is not NULL, we append * (" %s", data->launcher_options) even if data->launcher_options is an * empty string. Likewise for data->container_host_options. Using * * pcmk__add_word(buffer, 0, data->launcher_options) * * removes that extra trailing space, causing a resource definition change. */ if (data->launcher_options != NULL) { pcmk__g_strcat(buffer, " ", data->launcher_options, NULL); } if (data->container_host_options != NULL) { pcmk__g_strcat(buffer, " ", data->container_host_options, NULL); } crm_create_nvpair_xml(xml_obj, NULL, "run_opts", (const char *) buffer->str); g_string_free(buffer, TRUE); crm_create_nvpair_xml(xml_obj, NULL, "mount_points", (dbuffer != NULL)? (const char *) dbuffer->str : ""); if (dbuffer != NULL) { g_string_free(dbuffer, TRUE); } if (replica->child != NULL) { if (data->container_command != NULL) { crm_create_nvpair_xml(xml_obj, NULL, "run_cmd", data->container_command); } else { crm_create_nvpair_xml(xml_obj, NULL, "run_cmd", - SBIN_DIR "/pacemaker-remoted"); + SBIN_DIR "/" PCMK__SERVER_REMOTED); } /* TODO: Allow users to specify their own? * * We just want to know if the container is alive; we'll monitor the * child independently. */ crm_create_nvpair_xml(xml_obj, NULL, "monitor_cmd", "/bin/true"); #if 0 /* @TODO Consider supporting the use case where we can start and stop * resources, but not proxy local commands (such as setting node * attributes), by running the local executor in stand-alone mode. * However, this would probably be better done via ACLs as with other * Pacemaker Remote nodes. */ } else if ((child != NULL) && data->untrusted) { crm_create_nvpair_xml(xml_obj, NULL, "run_cmd", - CRM_DAEMON_DIR "/pacemaker-execd"); + CRM_DAEMON_DIR "/" PCMK__SERVER_EXECD); crm_create_nvpair_xml(xml_obj, NULL, "monitor_cmd", CRM_DAEMON_DIR "/pacemaker/cts-exec-helper -c poke"); #endif } else { if (data->container_command != NULL) { crm_create_nvpair_xml(xml_obj, NULL, "run_cmd", data->container_command); } /* TODO: Allow users to specify their own? * * We don't know what's in the container, so we just want to know if it * is alive. */ crm_create_nvpair_xml(xml_obj, NULL, "monitor_cmd", "/bin/true"); } xml_obj = pcmk__xe_create(xml_container, PCMK_XE_OPERATIONS); crm_create_op_xml(xml_obj, pcmk__xe_id(xml_container), PCMK_ACTION_MONITOR, "60s", NULL); // TODO: Other ops? Timeouts and intervals from underlying resource? if (pe__unpack_resource(xml_container, &replica->container, parent, parent->priv->scheduler) != pcmk_rc_ok) { return pcmk_rc_unpack_error; } pcmk__set_rsc_flags(replica->container, pcmk__rsc_replica_container); parent->priv->children = g_list_append(parent->priv->children, replica->container); return pcmk_rc_ok; } /*! * \brief Ban a node from a resource's (and its children's) allowed nodes list * * \param[in,out] rsc Resource to modify * \param[in] uname Name of node to ban */ static void disallow_node(pcmk_resource_t *rsc, const char *uname) { gpointer match = g_hash_table_lookup(rsc->priv->allowed_nodes, uname); if (match) { ((pcmk_node_t *) match)->assign->score = -PCMK_SCORE_INFINITY; ((pcmk_node_t *) match)->assign->probe_mode = pcmk__probe_never; } g_list_foreach(rsc->priv->children, (GFunc) disallow_node, (gpointer) uname); } static int create_remote_resource(pcmk_resource_t *parent, pe__bundle_variant_data_t *data, pcmk__bundle_replica_t *replica) { if (replica->child && valid_network(data)) { GHashTableIter gIter; pcmk_node_t *node = NULL; xmlNode *xml_remote = NULL; char *id = crm_strdup_printf("%s-%d", data->prefix, replica->offset); char *port_s = NULL; const char *uname = NULL; const char *connect_name = NULL; pcmk_scheduler_t *scheduler = parent->priv->scheduler; if (pe_find_resource(scheduler->priv->resources, id) != NULL) { free(id); // The biggest hammer we have id = crm_strdup_printf("pcmk-internal-%s-remote-%d", replica->child->id, replica->offset); //@TODO return error instead of asserting? CRM_ASSERT(pe_find_resource(scheduler->priv->resources, id) == NULL); } /* REMOTE_CONTAINER_HACK: Using "#uname" as the server name when the * connection does not have its own IP is a magic string that we use to * support nested remotes (i.e. a bundle running on a remote node). */ connect_name = (replica->ipaddr? replica->ipaddr : "#uname"); if (data->control_port == NULL) { port_s = pcmk__itoa(DEFAULT_REMOTE_PORT); } /* This sets replica->container as replica->remote's container, which is * similar to what happens with guest nodes. This is how the scheduler * knows that the bundle node is fenced by recovering the container, and * that remote should be ordered relative to the container. */ xml_remote = pe_create_remote_xml(NULL, id, replica->container->id, NULL, NULL, NULL, connect_name, (data->control_port? data->control_port : port_s)); free(port_s); /* Abandon our created ID, and pull the copy from the XML, because we * need something that will get freed during scheduler data cleanup to * use as the node ID and uname. */ free(id); id = NULL; uname = pcmk__xe_id(xml_remote); /* Ensure a node has been created for the guest (it may have already * been, if it has a permanent node attribute), and ensure its weight is * -INFINITY so no other resources can run on it. */ node = pcmk_find_node(scheduler, uname); if (node == NULL) { node = pe_create_node(uname, uname, PCMK_VALUE_REMOTE, PCMK_VALUE_MINUS_INFINITY, scheduler); } else { node->assign->score = -PCMK_SCORE_INFINITY; } node->assign->probe_mode = pcmk__probe_never; /* unpack_remote_nodes() ensures that each remote node and guest node * has a pcmk_node_t entry. Ideally, it would do the same for bundle * nodes. Unfortunately, a bundle has to be mostly unpacked before it's * obvious what nodes will be needed, so we do it just above. * * Worse, that means that the node may have been utilized while * unpacking other resources, without our weight correction. The most * likely place for this to happen is when pe__unpack_resource() calls * resource_location() to set a default score in symmetric clusters. * This adds a node *copy* to each resource's allowed nodes, and these * copies will have the wrong weight. * * As a hacky workaround, fix those copies here. * * @TODO Possible alternative: ensure bundles are unpacked before other * resources, so the weight is correct before any copies are made. */ g_list_foreach(scheduler->priv->resources, (GFunc) disallow_node, (gpointer) uname); replica->node = pe__copy_node(node); replica->node->assign->score = 500; replica->node->assign->probe_mode = pcmk__probe_exclusive; /* Ensure the node shows up as allowed and with the correct discovery set */ if (replica->child->priv->allowed_nodes != NULL) { g_hash_table_destroy(replica->child->priv->allowed_nodes); } replica->child->priv->allowed_nodes = pcmk__strkey_table(NULL, free); g_hash_table_insert(replica->child->priv->allowed_nodes, (gpointer) replica->node->priv->id, pe__copy_node(replica->node)); { const pcmk_resource_t *parent = replica->child->priv->parent; pcmk_node_t *copy = pe__copy_node(replica->node); copy->assign->score = -PCMK_SCORE_INFINITY; g_hash_table_insert(parent->priv->allowed_nodes, (gpointer) replica->node->priv->id, copy); } if (pe__unpack_resource(xml_remote, &replica->remote, parent, scheduler) != pcmk_rc_ok) { return pcmk_rc_unpack_error; } g_hash_table_iter_init(&gIter, replica->remote->priv->allowed_nodes); while (g_hash_table_iter_next(&gIter, NULL, (void **)&node)) { if (pcmk__is_pacemaker_remote_node(node)) { /* Remote resources can only run on 'normal' cluster node */ node->assign->score = -PCMK_SCORE_INFINITY; } } replica->node->priv->remote = replica->remote; // Ensure pcmk__is_guest_or_bundle_node() functions correctly replica->remote->priv->launcher = replica->container; /* A bundle's #kind is closer to "container" (guest node) than the * "remote" set by pe_create_node(). */ pcmk__insert_dup(replica->node->priv->attrs, CRM_ATTR_KIND, "container"); /* One effect of this is that unpack_launcher() will add * replica->remote to replica->container's launched resources, which * will make pe__resource_contains_guest_node() true for * replica->container. * * replica->child does NOT get added to replica->container's launched * resources. The only noticeable effect if it did would be for its * fail count to be taken into account when checking * replica->container's migration threshold. */ parent->priv->children = g_list_append(parent->priv->children, replica->remote); } return pcmk_rc_ok; } static int create_replica_resources(pcmk_resource_t *parent, pe__bundle_variant_data_t *data, pcmk__bundle_replica_t *replica) { int rc = pcmk_rc_ok; rc = create_container_resource(parent, data, replica); if (rc != pcmk_rc_ok) { return rc; } rc = create_ip_resource(parent, data, replica); if (rc != pcmk_rc_ok) { return rc; } rc = create_remote_resource(parent, data, replica); if (rc != pcmk_rc_ok) { return rc; } if ((replica->child != NULL) && (replica->ipaddr != NULL)) { pcmk__insert_meta(replica->child->priv, "external-ip", replica->ipaddr); } if (replica->remote != NULL) { /* * Allow the remote connection resource to be allocated to a * different node than the one on which the container is active. * * This makes it possible to have Pacemaker Remote nodes running - * containers with pacemaker-remoted inside in order to start + * containers with the remote executor inside in order to start * services inside those containers. */ pcmk__set_rsc_flags(replica->remote, pcmk__rsc_remote_nesting_allowed); } return rc; } static void mount_add(pe__bundle_variant_data_t *bundle_data, const char *source, const char *target, const char *options, uint32_t flags) { pe__bundle_mount_t *mount = pcmk__assert_alloc(1, sizeof(pe__bundle_mount_t)); mount->source = pcmk__str_copy(source); mount->target = pcmk__str_copy(target); mount->options = pcmk__str_copy(options); mount->flags = flags; bundle_data->mounts = g_list_append(bundle_data->mounts, mount); } static void mount_free(pe__bundle_mount_t *mount) { free(mount->source); free(mount->target); free(mount->options); free(mount); } static void port_free(pe__bundle_port_t *port) { free(port->source); free(port->target); free(port); } static pcmk__bundle_replica_t * replica_for_remote(pcmk_resource_t *remote) { pcmk_resource_t *top = remote; pe__bundle_variant_data_t *bundle_data = NULL; if (top == NULL) { return NULL; } while (top->priv->parent != NULL) { top = top->priv->parent; } get_bundle_variant_data(bundle_data, top); for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pcmk__bundle_replica_t *replica = gIter->data; if (replica->remote == remote) { return replica; } } CRM_LOG_ASSERT(FALSE); return NULL; } bool pe__bundle_needs_remote_name(pcmk_resource_t *rsc) { const char *value; GHashTable *params = NULL; if (rsc == NULL) { return false; } // Use NULL node since pcmk__bundle_expand() uses that to set value params = pe_rsc_params(rsc, NULL, rsc->priv->scheduler); value = g_hash_table_lookup(params, PCMK_REMOTE_RA_ADDR); return pcmk__str_eq(value, "#uname", pcmk__str_casei) && xml_contains_remote_node(rsc->priv->xml); } const char * pe__add_bundle_remote_name(pcmk_resource_t *rsc, xmlNode *xml, const char *field) { // REMOTE_CONTAINER_HACK: Allow remote nodes that start containers with pacemaker remote inside pcmk_node_t *node = NULL; pcmk__bundle_replica_t *replica = NULL; if (!pe__bundle_needs_remote_name(rsc)) { return NULL; } replica = replica_for_remote(rsc); if (replica == NULL) { return NULL; } node = replica->container->priv->assigned_node; if (node == NULL) { /* If it won't be running anywhere after the * transition, go with where it's running now. */ node = pcmk__current_node(replica->container); } if(node == NULL) { crm_trace("Cannot determine address for bundle connection %s", rsc->id); return NULL; } crm_trace("Setting address for bundle connection %s to bundle host %s", rsc->id, pcmk__node_name(node)); if(xml != NULL && field != NULL) { crm_xml_add(xml, field, node->priv->name); } return node->priv->name; } #define pe__set_bundle_mount_flags(mount_xml, flags, flags_to_set) do { \ flags = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \ "Bundle mount", pcmk__xe_id(mount_xml), \ flags, (flags_to_set), #flags_to_set); \ } while (0) gboolean pe__unpack_bundle(pcmk_resource_t *rsc, pcmk_scheduler_t *scheduler) { const char *value = NULL; xmlNode *xml_obj = NULL; const xmlNode *xml_child = NULL; xmlNode *xml_resource = NULL; pe__bundle_variant_data_t *bundle_data = NULL; bool need_log_mount = TRUE; CRM_ASSERT(rsc != NULL); pcmk__rsc_trace(rsc, "Processing resource %s...", rsc->id); bundle_data = pcmk__assert_alloc(1, sizeof(pe__bundle_variant_data_t)); rsc->priv->variant_opaque = bundle_data; bundle_data->prefix = strdup(rsc->id); xml_obj = pcmk__xe_first_child(rsc->priv->xml, PCMK_XE_DOCKER, NULL, NULL); if (xml_obj != NULL) { bundle_data->agent_type = PE__CONTAINER_AGENT_DOCKER; } else { xml_obj = pcmk__xe_first_child(rsc->priv->xml, PCMK__XE_RKT, NULL, NULL); if (xml_obj != NULL) { pcmk__warn_once(pcmk__wo_rkt, "Support for " PCMK__XE_RKT " in bundles " "(such as %s) is deprecated and will be " "removed in a future release", rsc->id); bundle_data->agent_type = PE__CONTAINER_AGENT_RKT; } else { xml_obj = pcmk__xe_first_child(rsc->priv->xml, PCMK_XE_PODMAN, NULL, NULL); if (xml_obj != NULL) { bundle_data->agent_type = PE__CONTAINER_AGENT_PODMAN; } else { return FALSE; } } } // Use 0 for default, minimum, and invalid PCMK_XA_PROMOTED_MAX value = crm_element_value(xml_obj, PCMK_XA_PROMOTED_MAX); if (value == NULL) { // @COMPAT deprecated since 2.0.0 value = crm_element_value(xml_obj, PCMK__XA_PROMOTED_MAX_LEGACY); if (value != NULL) { pcmk__warn_once(pcmk__wo_bundle_master, "Support for the " PCMK__XA_PROMOTED_MAX_LEGACY " attribute (such as in %s) is deprecated and " "will be removed in a future release. Use " PCMK_XA_PROMOTED_MAX " instead.", rsc->id); } } pcmk__scan_min_int(value, &bundle_data->promoted_max, 0); /* Default replicas to PCMK_XA_PROMOTED_MAX if it was specified and 1 * otherwise */ value = crm_element_value(xml_obj, PCMK_XA_REPLICAS); if ((value == NULL) && (bundle_data->promoted_max > 0)) { bundle_data->nreplicas = bundle_data->promoted_max; } else { pcmk__scan_min_int(value, &bundle_data->nreplicas, 1); } /* * Communication between containers on the same host via the * floating IPs only works if the container is started with: * --userland-proxy=false --ip-masq=false */ value = crm_element_value(xml_obj, PCMK_XA_REPLICAS_PER_HOST); pcmk__scan_min_int(value, &bundle_data->nreplicas_per_host, 1); if (bundle_data->nreplicas_per_host == 1) { pcmk__clear_rsc_flags(rsc, pcmk__rsc_unique); } bundle_data->container_command = crm_element_value_copy(xml_obj, PCMK_XA_RUN_COMMAND); bundle_data->launcher_options = crm_element_value_copy(xml_obj, PCMK_XA_OPTIONS); bundle_data->image = crm_element_value_copy(xml_obj, PCMK_XA_IMAGE); bundle_data->container_network = crm_element_value_copy(xml_obj, PCMK_XA_NETWORK); xml_obj = pcmk__xe_first_child(rsc->priv->xml, PCMK_XE_NETWORK, NULL, NULL); if(xml_obj) { bundle_data->ip_range_start = crm_element_value_copy(xml_obj, PCMK_XA_IP_RANGE_START); bundle_data->host_netmask = crm_element_value_copy(xml_obj, PCMK_XA_HOST_NETMASK); bundle_data->host_network = crm_element_value_copy(xml_obj, PCMK_XA_HOST_INTERFACE); bundle_data->control_port = crm_element_value_copy(xml_obj, PCMK_XA_CONTROL_PORT); value = crm_element_value(xml_obj, PCMK_XA_ADD_HOST); if (crm_str_to_boolean(value, &bundle_data->add_host) != 1) { bundle_data->add_host = TRUE; } for (xml_child = pcmk__xe_first_child(xml_obj, PCMK_XE_PORT_MAPPING, NULL, NULL); xml_child != NULL; xml_child = pcmk__xe_next_same(xml_child)) { pe__bundle_port_t *port = pcmk__assert_alloc(1, sizeof(pe__bundle_port_t)); port->source = crm_element_value_copy(xml_child, PCMK_XA_PORT); if(port->source == NULL) { port->source = crm_element_value_copy(xml_child, PCMK_XA_RANGE); } else { port->target = crm_element_value_copy(xml_child, PCMK_XA_INTERNAL_PORT); } if(port->source != NULL && strlen(port->source) > 0) { if(port->target == NULL) { port->target = strdup(port->source); } bundle_data->ports = g_list_append(bundle_data->ports, port); } else { pcmk__config_err("Invalid " PCMK_XA_PORT " directive %s", pcmk__xe_id(xml_child)); port_free(port); } } } xml_obj = pcmk__xe_first_child(rsc->priv->xml, PCMK_XE_STORAGE, NULL, NULL); for (xml_child = pcmk__xe_first_child(xml_obj, PCMK_XE_STORAGE_MAPPING, NULL, NULL); xml_child != NULL; xml_child = pcmk__xe_next_same(xml_child)) { const char *source = crm_element_value(xml_child, PCMK_XA_SOURCE_DIR); const char *target = crm_element_value(xml_child, PCMK_XA_TARGET_DIR); const char *options = crm_element_value(xml_child, PCMK_XA_OPTIONS); int flags = pe__bundle_mount_none; if (source == NULL) { source = crm_element_value(xml_child, PCMK_XA_SOURCE_DIR_ROOT); pe__set_bundle_mount_flags(xml_child, flags, pe__bundle_mount_subdir); } if (source && target) { mount_add(bundle_data, source, target, options, flags); if (strcmp(target, "/var/log") == 0) { need_log_mount = FALSE; } } else { pcmk__config_err("Invalid mount directive %s", pcmk__xe_id(xml_child)); } } xml_obj = pcmk__xe_first_child(rsc->priv->xml, PCMK_XE_PRIMITIVE, NULL, NULL); if (xml_obj && valid_network(bundle_data)) { const char *suffix = NULL; char *value = NULL; xmlNode *xml_set = NULL; xml_resource = pcmk__xe_create(NULL, PCMK_XE_CLONE); /* @COMPAT We no longer use the tag, but we need to keep it as * part of the resource name, so that bundles don't restart in a rolling * upgrade. (It also avoids needing to change regression tests.) */ suffix = (const char *) xml_resource->name; if (bundle_data->promoted_max > 0) { suffix = "master"; } pcmk__xe_set_id(xml_resource, "%s-%s", bundle_data->prefix, suffix); xml_set = pcmk__xe_create(xml_resource, PCMK_XE_META_ATTRIBUTES); pcmk__xe_set_id(xml_set, "%s-%s-meta", bundle_data->prefix, xml_resource->name); crm_create_nvpair_xml(xml_set, NULL, PCMK_META_ORDERED, PCMK_VALUE_TRUE); value = pcmk__itoa(bundle_data->nreplicas); crm_create_nvpair_xml(xml_set, NULL, PCMK_META_CLONE_MAX, value); free(value); value = pcmk__itoa(bundle_data->nreplicas_per_host); crm_create_nvpair_xml(xml_set, NULL, PCMK_META_CLONE_NODE_MAX, value); free(value); crm_create_nvpair_xml(xml_set, NULL, PCMK_META_GLOBALLY_UNIQUE, pcmk__btoa(bundle_data->nreplicas_per_host > 1)); if (bundle_data->promoted_max) { crm_create_nvpair_xml(xml_set, NULL, PCMK_META_PROMOTABLE, PCMK_VALUE_TRUE); value = pcmk__itoa(bundle_data->promoted_max); crm_create_nvpair_xml(xml_set, NULL, PCMK_META_PROMOTED_MAX, value); free(value); } //crm_xml_add(xml_obj, PCMK_XA_ID, bundle_data->prefix); pcmk__xml_copy(xml_resource, xml_obj); } else if(xml_obj) { pcmk__config_err("Cannot control %s inside %s without either " PCMK_XA_IP_RANGE_START " or " PCMK_XA_CONTROL_PORT, rsc->id, pcmk__xe_id(xml_obj)); return FALSE; } if(xml_resource) { int lpc = 0; GList *childIter = NULL; pe__bundle_port_t *port = NULL; GString *buffer = NULL; if (pe__unpack_resource(xml_resource, &(bundle_data->child), rsc, scheduler) != pcmk_rc_ok) { return FALSE; } /* Currently, we always map the default authentication key location * into the same location inside the container. * * Ideally, we would respect the host's PCMK_authkey_location, but: * - it may be different on different nodes; * - the actual connection will do extra checking to make sure the key * file exists and is readable, that we can't do here on the DC * - tools such as crm_resource and crm_simulate may not have the same * environment variables as the cluster, causing operation digests to * differ * * Always using the default location inside the container is fine, * because we control the pacemaker_remote environment, and it avoids * having to pass another environment variable to the container. * * @TODO A better solution may be to have only pacemaker_remote use the * environment variable, and have the cluster nodes use a new * cluster option for key location. This would introduce the limitation * of the location being the same on all cluster nodes, but that's * reasonable. */ mount_add(bundle_data, DEFAULT_REMOTE_KEY_LOCATION, DEFAULT_REMOTE_KEY_LOCATION, NULL, pe__bundle_mount_none); if (need_log_mount) { mount_add(bundle_data, CRM_BUNDLE_DIR, "/var/log", NULL, pe__bundle_mount_subdir); } port = pcmk__assert_alloc(1, sizeof(pe__bundle_port_t)); if(bundle_data->control_port) { port->source = strdup(bundle_data->control_port); } else { /* If we wanted to respect PCMK_remote_port, we could use * crm_default_remote_port() here and elsewhere in this file instead * of DEFAULT_REMOTE_PORT. * * However, it gains nothing, since we control both the container * environment and the connection resource parameters, and the user * can use a different port if desired by setting * PCMK_XA_CONTROL_PORT. */ port->source = pcmk__itoa(DEFAULT_REMOTE_PORT); } port->target = strdup(port->source); bundle_data->ports = g_list_append(bundle_data->ports, port); buffer = g_string_sized_new(1024); for (childIter = bundle_data->child->priv->children; childIter != NULL; childIter = childIter->next) { pcmk__bundle_replica_t *replica = NULL; replica = pcmk__assert_alloc(1, sizeof(pcmk__bundle_replica_t)); replica->child = childIter->data; pcmk__set_rsc_flags(replica->child, pcmk__rsc_exclusive_probes); replica->offset = lpc++; // Ensure the child's notify gets set based on the underlying primitive's value if (pcmk_is_set(replica->child->flags, pcmk__rsc_notify)) { pcmk__set_rsc_flags(bundle_data->child, pcmk__rsc_notify); } allocate_ip(bundle_data, replica, buffer); bundle_data->replicas = g_list_append(bundle_data->replicas, replica); bundle_data->attribute_target = g_hash_table_lookup(replica->child->priv->meta, PCMK_META_CONTAINER_ATTRIBUTE_TARGET); } bundle_data->container_host_options = g_string_free(buffer, FALSE); if (bundle_data->attribute_target) { pcmk__insert_dup(rsc->priv->meta, PCMK_META_CONTAINER_ATTRIBUTE_TARGET, bundle_data->attribute_target); pcmk__insert_dup(bundle_data->child->priv->meta, PCMK_META_CONTAINER_ATTRIBUTE_TARGET, bundle_data->attribute_target); } } else { // Just a naked container, no pacemaker-remote GString *buffer = g_string_sized_new(1024); for (int lpc = 0; lpc < bundle_data->nreplicas; lpc++) { pcmk__bundle_replica_t *replica = NULL; replica = pcmk__assert_alloc(1, sizeof(pcmk__bundle_replica_t)); replica->offset = lpc; allocate_ip(bundle_data, replica, buffer); bundle_data->replicas = g_list_append(bundle_data->replicas, replica); } bundle_data->container_host_options = g_string_free(buffer, FALSE); } for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pcmk__bundle_replica_t *replica = gIter->data; if (create_replica_resources(rsc, bundle_data, replica) != pcmk_rc_ok) { pcmk__config_err("Failed unpacking resource %s", rsc->id); rsc->priv->fns->free(rsc); return FALSE; } /* Utilization needs special handling for bundles. It makes no sense for * the inner primitive to have utilization, because it is tied * one-to-one to the guest node created by the container resource -- and * there's no way to set capacities for that guest node anyway. * * What the user really wants is to configure utilization for the * container. However, the schema only allows utilization for * primitives, and the container resource is implicit anyway, so the * user can *only* configure utilization for the inner primitive. If * they do, move the primitive's utilization values to the container. * * @TODO This means that bundles without an inner primitive can't have * utilization. An alternative might be to allow utilization values in * the top-level bundle XML in the schema, and copy those to each * container. */ if (replica->child != NULL) { GHashTable *empty = replica->container->priv->utilization; replica->container->priv->utilization = replica->child->priv->utilization; replica->child->priv->utilization = empty; } } if (bundle_data->child) { rsc->priv->children = g_list_append(rsc->priv->children, bundle_data->child); } return TRUE; } static int replica_resource_active(pcmk_resource_t *rsc, gboolean all) { if (rsc) { gboolean child_active = rsc->priv->fns->active(rsc, all); if (child_active && !all) { return TRUE; } else if (!child_active && all) { return FALSE; } } return -1; } gboolean pe__bundle_active(pcmk_resource_t *rsc, gboolean all) { pe__bundle_variant_data_t *bundle_data = NULL; GList *iter = NULL; get_bundle_variant_data(bundle_data, rsc); for (iter = bundle_data->replicas; iter != NULL; iter = iter->next) { pcmk__bundle_replica_t *replica = iter->data; int rsc_active; rsc_active = replica_resource_active(replica->ip, all); if (rsc_active >= 0) { return (gboolean) rsc_active; } rsc_active = replica_resource_active(replica->child, all); if (rsc_active >= 0) { return (gboolean) rsc_active; } rsc_active = replica_resource_active(replica->container, all); if (rsc_active >= 0) { return (gboolean) rsc_active; } rsc_active = replica_resource_active(replica->remote, all); if (rsc_active >= 0) { return (gboolean) rsc_active; } } /* If "all" is TRUE, we've already checked that no resources were inactive, * so return TRUE; if "all" is FALSE, we didn't find any active resources, * so return FALSE. */ return all; } /*! * \internal * \brief Find the bundle replica corresponding to a given node * * \param[in] bundle Top-level bundle resource * \param[in] node Node to search for * * \return Bundle replica if found, NULL otherwise */ pcmk_resource_t * pe__find_bundle_replica(const pcmk_resource_t *bundle, const pcmk_node_t *node) { pe__bundle_variant_data_t *bundle_data = NULL; CRM_ASSERT(bundle && node); get_bundle_variant_data(bundle_data, bundle); for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pcmk__bundle_replica_t *replica = gIter->data; CRM_ASSERT(replica && replica->node); if (pcmk__same_node(replica->node, node)) { return replica->child; } } return NULL; } PCMK__OUTPUT_ARGS("bundle", "uint32_t", "pcmk_resource_t *", "GList *", "GList *") int pe__bundle_xml(pcmk__output_t *out, va_list args) { uint32_t show_opts = va_arg(args, uint32_t); pcmk_resource_t *rsc = va_arg(args, pcmk_resource_t *); GList *only_node = va_arg(args, GList *); GList *only_rsc = va_arg(args, GList *); pe__bundle_variant_data_t *bundle_data = NULL; int rc = pcmk_rc_no_output; gboolean printed_header = FALSE; gboolean print_everything = TRUE; const char *desc = NULL; CRM_ASSERT(rsc != NULL); get_bundle_variant_data(bundle_data, rsc); if (rsc->priv->fns->is_filtered(rsc, only_rsc, TRUE)) { return rc; } print_everything = pcmk__str_in_list(rsc->id, only_rsc, pcmk__str_star_matches); for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pcmk__bundle_replica_t *replica = gIter->data; pcmk_resource_t *ip = replica->ip; pcmk_resource_t *child = replica->child; pcmk_resource_t *container = replica->container; pcmk_resource_t *remote = replica->remote; char *id = NULL; gboolean print_ip, print_child, print_ctnr, print_remote; CRM_ASSERT(replica); if (pcmk__rsc_filtered_by_node(container, only_node)) { continue; } print_ip = (ip != NULL) && !ip->priv->fns->is_filtered(ip, only_rsc, print_everything); print_child = (child != NULL) && !child->priv->fns->is_filtered(child, only_rsc, print_everything); print_ctnr = !container->priv->fns->is_filtered(container, only_rsc, print_everything); print_remote = (remote != NULL) && !remote->priv->fns->is_filtered(remote, only_rsc, print_everything); if (!print_everything && !print_ip && !print_child && !print_ctnr && !print_remote) { continue; } if (!printed_header) { const char *type = container_agent_str(bundle_data->agent_type); const char *unique = pcmk__flag_text(rsc->flags, pcmk__rsc_unique); const char *maintenance = pcmk__flag_text(rsc->flags, pcmk__rsc_maintenance); const char *managed = pcmk__flag_text(rsc->flags, pcmk__rsc_managed); const char *failed = pcmk__flag_text(rsc->flags, pcmk__rsc_failed); printed_header = TRUE; desc = pe__resource_description(rsc, show_opts); rc = pe__name_and_nvpairs_xml(out, true, PCMK_XE_BUNDLE, PCMK_XA_ID, rsc->id, PCMK_XA_TYPE, type, PCMK_XA_IMAGE, bundle_data->image, PCMK_XA_UNIQUE, unique, PCMK_XA_MAINTENANCE, maintenance, PCMK_XA_MANAGED, managed, PCMK_XA_FAILED, failed, PCMK_XA_DESCRIPTION, desc, NULL); CRM_ASSERT(rc == pcmk_rc_ok); } id = pcmk__itoa(replica->offset); rc = pe__name_and_nvpairs_xml(out, true, PCMK_XE_REPLICA, PCMK_XA_ID, id, NULL); free(id); CRM_ASSERT(rc == pcmk_rc_ok); if (print_ip) { out->message(out, (const char *) ip->priv->xml->name, show_opts, ip, only_node, only_rsc); } if (print_child) { out->message(out, (const char *) child->priv->xml->name, show_opts, child, only_node, only_rsc); } if (print_ctnr) { out->message(out, (const char *) container->priv->xml->name, show_opts, container, only_node, only_rsc); } if (print_remote) { out->message(out, (const char *) remote->priv->xml->name, show_opts, remote, only_node, only_rsc); } pcmk__output_xml_pop_parent(out); // replica } if (printed_header) { pcmk__output_xml_pop_parent(out); // bundle } return rc; } static void pe__bundle_replica_output_html(pcmk__output_t *out, pcmk__bundle_replica_t *replica, pcmk_node_t *node, uint32_t show_opts) { pcmk_resource_t *rsc = replica->child; int offset = 0; char buffer[LINE_MAX]; if(rsc == NULL) { rsc = replica->container; } if (replica->remote) { offset += snprintf(buffer + offset, LINE_MAX - offset, "%s", rsc_printable_id(replica->remote)); } else { offset += snprintf(buffer + offset, LINE_MAX - offset, "%s", rsc_printable_id(replica->container)); } if (replica->ipaddr) { offset += snprintf(buffer + offset, LINE_MAX - offset, " (%s)", replica->ipaddr); } pe__common_output_html(out, rsc, buffer, node, show_opts); } /*! * \internal * \brief Get a string describing a resource's unmanaged state or lack thereof * * \param[in] rsc Resource to describe * * \return A string indicating that a resource is in maintenance mode or * otherwise unmanaged, or an empty string otherwise */ static const char * get_unmanaged_str(const pcmk_resource_t *rsc) { if (pcmk_is_set(rsc->flags, pcmk__rsc_maintenance)) { return " (maintenance)"; } if (!pcmk_is_set(rsc->flags, pcmk__rsc_managed)) { return " (unmanaged)"; } return ""; } PCMK__OUTPUT_ARGS("bundle", "uint32_t", "pcmk_resource_t *", "GList *", "GList *") int pe__bundle_html(pcmk__output_t *out, va_list args) { uint32_t show_opts = va_arg(args, uint32_t); pcmk_resource_t *rsc = va_arg(args, pcmk_resource_t *); GList *only_node = va_arg(args, GList *); GList *only_rsc = va_arg(args, GList *); const char *desc = NULL; pe__bundle_variant_data_t *bundle_data = NULL; int rc = pcmk_rc_no_output; gboolean print_everything = TRUE; CRM_ASSERT(rsc != NULL); get_bundle_variant_data(bundle_data, rsc); desc = pe__resource_description(rsc, show_opts); if (rsc->priv->fns->is_filtered(rsc, only_rsc, TRUE)) { return rc; } print_everything = pcmk__str_in_list(rsc->id, only_rsc, pcmk__str_star_matches); for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pcmk__bundle_replica_t *replica = gIter->data; pcmk_resource_t *ip = replica->ip; pcmk_resource_t *child = replica->child; pcmk_resource_t *container = replica->container; pcmk_resource_t *remote = replica->remote; gboolean print_ip, print_child, print_ctnr, print_remote; CRM_ASSERT(replica); if (pcmk__rsc_filtered_by_node(container, only_node)) { continue; } print_ip = (ip != NULL) && !ip->priv->fns->is_filtered(ip, only_rsc, print_everything); print_child = (child != NULL) && !child->priv->fns->is_filtered(child, only_rsc, print_everything); print_ctnr = !container->priv->fns->is_filtered(container, only_rsc, print_everything); print_remote = (remote != NULL) && !remote->priv->fns->is_filtered(remote, only_rsc, print_everything); if (pcmk_is_set(show_opts, pcmk_show_implicit_rscs) || (print_everything == FALSE && (print_ip || print_child || print_ctnr || print_remote))) { /* The text output messages used below require pe_print_implicit to * be set to do anything. */ uint32_t new_show_opts = show_opts | pcmk_show_implicit_rscs; PCMK__OUTPUT_LIST_HEADER(out, FALSE, rc, "Container bundle%s: %s [%s]%s%s%s%s%s", (bundle_data->nreplicas > 1)? " set" : "", rsc->id, bundle_data->image, pcmk_is_set(rsc->flags, pcmk__rsc_unique)? " (unique)" : "", desc ? " (" : "", desc ? desc : "", desc ? ")" : "", get_unmanaged_str(rsc)); if (pcmk__list_of_multiple(bundle_data->replicas)) { out->begin_list(out, NULL, NULL, "Replica[%d]", replica->offset); } if (print_ip) { out->message(out, (const char *) ip->priv->xml->name, new_show_opts, ip, only_node, only_rsc); } if (print_child) { out->message(out, (const char *) child->priv->xml->name, new_show_opts, child, only_node, only_rsc); } if (print_ctnr) { out->message(out, (const char *) container->priv->xml->name, new_show_opts, container, only_node, only_rsc); } if (print_remote) { out->message(out, (const char *) remote->priv->xml->name, new_show_opts, remote, only_node, only_rsc); } if (pcmk__list_of_multiple(bundle_data->replicas)) { out->end_list(out); } } else if (print_everything == FALSE && !(print_ip || print_child || print_ctnr || print_remote)) { continue; } else { PCMK__OUTPUT_LIST_HEADER(out, FALSE, rc, "Container bundle%s: %s [%s]%s%s%s%s%s", (bundle_data->nreplicas > 1)? " set" : "", rsc->id, bundle_data->image, pcmk_is_set(rsc->flags, pcmk__rsc_unique)? " (unique)" : "", desc ? " (" : "", desc ? desc : "", desc ? ")" : "", get_unmanaged_str(rsc)); pe__bundle_replica_output_html(out, replica, pcmk__current_node(container), show_opts); } } PCMK__OUTPUT_LIST_FOOTER(out, rc); return rc; } static void pe__bundle_replica_output_text(pcmk__output_t *out, pcmk__bundle_replica_t *replica, pcmk_node_t *node, uint32_t show_opts) { const pcmk_resource_t *rsc = replica->child; int offset = 0; char buffer[LINE_MAX]; if(rsc == NULL) { rsc = replica->container; } if (replica->remote) { offset += snprintf(buffer + offset, LINE_MAX - offset, "%s", rsc_printable_id(replica->remote)); } else { offset += snprintf(buffer + offset, LINE_MAX - offset, "%s", rsc_printable_id(replica->container)); } if (replica->ipaddr) { offset += snprintf(buffer + offset, LINE_MAX - offset, " (%s)", replica->ipaddr); } pe__common_output_text(out, rsc, buffer, node, show_opts); } PCMK__OUTPUT_ARGS("bundle", "uint32_t", "pcmk_resource_t *", "GList *", "GList *") int pe__bundle_text(pcmk__output_t *out, va_list args) { uint32_t show_opts = va_arg(args, uint32_t); pcmk_resource_t *rsc = va_arg(args, pcmk_resource_t *); GList *only_node = va_arg(args, GList *); GList *only_rsc = va_arg(args, GList *); const char *desc = NULL; pe__bundle_variant_data_t *bundle_data = NULL; int rc = pcmk_rc_no_output; gboolean print_everything = TRUE; desc = pe__resource_description(rsc, show_opts); CRM_ASSERT(rsc != NULL); get_bundle_variant_data(bundle_data, rsc); if (rsc->priv->fns->is_filtered(rsc, only_rsc, TRUE)) { return rc; } print_everything = pcmk__str_in_list(rsc->id, only_rsc, pcmk__str_star_matches); for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pcmk__bundle_replica_t *replica = gIter->data; pcmk_resource_t *ip = replica->ip; pcmk_resource_t *child = replica->child; pcmk_resource_t *container = replica->container; pcmk_resource_t *remote = replica->remote; gboolean print_ip, print_child, print_ctnr, print_remote; CRM_ASSERT(replica); if (pcmk__rsc_filtered_by_node(container, only_node)) { continue; } print_ip = (ip != NULL) && !ip->priv->fns->is_filtered(ip, only_rsc, print_everything); print_child = (child != NULL) && !child->priv->fns->is_filtered(child, only_rsc, print_everything); print_ctnr = !container->priv->fns->is_filtered(container, only_rsc, print_everything); print_remote = (remote != NULL) && !remote->priv->fns->is_filtered(remote, only_rsc, print_everything); if (pcmk_is_set(show_opts, pcmk_show_implicit_rscs) || (print_everything == FALSE && (print_ip || print_child || print_ctnr || print_remote))) { /* The text output messages used below require pe_print_implicit to * be set to do anything. */ uint32_t new_show_opts = show_opts | pcmk_show_implicit_rscs; PCMK__OUTPUT_LIST_HEADER(out, FALSE, rc, "Container bundle%s: %s [%s]%s%s%s%s%s", (bundle_data->nreplicas > 1)? " set" : "", rsc->id, bundle_data->image, pcmk_is_set(rsc->flags, pcmk__rsc_unique)? " (unique)" : "", desc ? " (" : "", desc ? desc : "", desc ? ")" : "", get_unmanaged_str(rsc)); if (pcmk__list_of_multiple(bundle_data->replicas)) { out->list_item(out, NULL, "Replica[%d]", replica->offset); } out->begin_list(out, NULL, NULL, NULL); if (print_ip) { out->message(out, (const char *) ip->priv->xml->name, new_show_opts, ip, only_node, only_rsc); } if (print_child) { out->message(out, (const char *) child->priv->xml->name, new_show_opts, child, only_node, only_rsc); } if (print_ctnr) { out->message(out, (const char *) container->priv->xml->name, new_show_opts, container, only_node, only_rsc); } if (print_remote) { out->message(out, (const char *) remote->priv->xml->name, new_show_opts, remote, only_node, only_rsc); } out->end_list(out); } else if (print_everything == FALSE && !(print_ip || print_child || print_ctnr || print_remote)) { continue; } else { PCMK__OUTPUT_LIST_HEADER(out, FALSE, rc, "Container bundle%s: %s [%s]%s%s%s%s%s", (bundle_data->nreplicas > 1)? " set" : "", rsc->id, bundle_data->image, pcmk_is_set(rsc->flags, pcmk__rsc_unique)? " (unique)" : "", desc ? " (" : "", desc ? desc : "", desc ? ")" : "", get_unmanaged_str(rsc)); pe__bundle_replica_output_text(out, replica, pcmk__current_node(container), show_opts); } } PCMK__OUTPUT_LIST_FOOTER(out, rc); return rc; } static void free_bundle_replica(pcmk__bundle_replica_t *replica) { if (replica == NULL) { return; } if (replica->node) { free(replica->node); replica->node = NULL; } if (replica->ip) { pcmk__xml_free(replica->ip->priv->xml); replica->ip->priv->xml = NULL; replica->ip->priv->fns->free(replica->ip); } if (replica->container) { pcmk__xml_free(replica->container->priv->xml); replica->container->priv->xml = NULL; replica->container->priv->fns->free(replica->container); } if (replica->remote) { pcmk__xml_free(replica->remote->priv->xml); replica->remote->priv->xml = NULL; replica->remote->priv->fns->free(replica->remote); } free(replica->ipaddr); free(replica); } void pe__free_bundle(pcmk_resource_t *rsc) { pe__bundle_variant_data_t *bundle_data = NULL; CRM_CHECK(rsc != NULL, return); get_bundle_variant_data(bundle_data, rsc); pcmk__rsc_trace(rsc, "Freeing %s", rsc->id); free(bundle_data->prefix); free(bundle_data->image); free(bundle_data->control_port); free(bundle_data->host_network); free(bundle_data->host_netmask); free(bundle_data->ip_range_start); free(bundle_data->container_network); free(bundle_data->launcher_options); free(bundle_data->container_command); g_free(bundle_data->container_host_options); g_list_free_full(bundle_data->replicas, (GDestroyNotify) free_bundle_replica); g_list_free_full(bundle_data->mounts, (GDestroyNotify)mount_free); g_list_free_full(bundle_data->ports, (GDestroyNotify)port_free); g_list_free(rsc->priv->children); if(bundle_data->child) { pcmk__xml_free(bundle_data->child->priv->xml); bundle_data->child->priv->xml = NULL; bundle_data->child->priv->fns->free(bundle_data->child); } common_free(rsc); } enum rsc_role_e pe__bundle_resource_state(const pcmk_resource_t *rsc, gboolean current) { enum rsc_role_e container_role = pcmk_role_unknown; return container_role; } /*! * \brief Get the number of configured replicas in a bundle * * \param[in] rsc Bundle resource * * \return Number of configured replicas, or 0 on error */ int pe_bundle_replicas(const pcmk_resource_t *rsc) { if (pcmk__is_bundle(rsc)) { pe__bundle_variant_data_t *bundle_data = NULL; get_bundle_variant_data(bundle_data, rsc); return bundle_data->nreplicas; } return 0; } void pe__count_bundle(pcmk_resource_t *rsc) { pe__bundle_variant_data_t *bundle_data = NULL; get_bundle_variant_data(bundle_data, rsc); for (GList *item = bundle_data->replicas; item != NULL; item = item->next) { pcmk__bundle_replica_t *replica = item->data; if (replica->ip) { replica->ip->priv->fns->count(replica->ip); } if (replica->child) { replica->child->priv->fns->count(replica->child); } if (replica->container) { replica->container->priv->fns->count(replica->container); } if (replica->remote) { replica->remote->priv->fns->count(replica->remote); } } } gboolean pe__bundle_is_filtered(const pcmk_resource_t *rsc, GList *only_rsc, gboolean check_parent) { gboolean passes = FALSE; pe__bundle_variant_data_t *bundle_data = NULL; if (pcmk__str_in_list(rsc_printable_id(rsc), only_rsc, pcmk__str_star_matches)) { passes = TRUE; } else { get_bundle_variant_data(bundle_data, rsc); for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) { pcmk__bundle_replica_t *replica = gIter->data; pcmk_resource_t *ip = replica->ip; pcmk_resource_t *child = replica->child; pcmk_resource_t *container = replica->container; pcmk_resource_t *remote = replica->remote; if ((ip != NULL) && !ip->priv->fns->is_filtered(ip, only_rsc, FALSE)) { passes = TRUE; break; } if ((child != NULL) && !child->priv->fns->is_filtered(child, only_rsc, FALSE)) { passes = TRUE; break; } if (!container->priv->fns->is_filtered(container, only_rsc, FALSE)) { passes = TRUE; break; } if ((remote != NULL) && !remote->priv->fns->is_filtered(remote, only_rsc, FALSE)) { passes = TRUE; break; } } } return !passes; } /*! * \internal * \brief Get a list of a bundle's containers * * \param[in] bundle Bundle resource * * \return Newly created list of \p bundle's containers * \note It is the caller's responsibility to free the result with * g_list_free(). */ GList * pe__bundle_containers(const pcmk_resource_t *bundle) { GList *containers = NULL; const pe__bundle_variant_data_t *data = NULL; get_bundle_variant_data(data, bundle); for (GList *iter = data->replicas; iter != NULL; iter = iter->next) { pcmk__bundle_replica_t *replica = iter->data; containers = g_list_append(containers, replica->container); } return containers; } // Bundle implementation of pcmk__rsc_methods_t:active_node() pcmk_node_t * pe__bundle_active_node(const pcmk_resource_t *rsc, unsigned int *count_all, unsigned int *count_clean) { pcmk_node_t *active = NULL; pcmk_node_t *node = NULL; pcmk_resource_t *container = NULL; GList *containers = NULL; GList *iter = NULL; GHashTable *nodes = NULL; const pe__bundle_variant_data_t *data = NULL; if (count_all != NULL) { *count_all = 0; } if (count_clean != NULL) { *count_clean = 0; } if (rsc == NULL) { return NULL; } /* For the purposes of this method, we only care about where the bundle's * containers are active, so build a list of active containers. */ get_bundle_variant_data(data, rsc); for (iter = data->replicas; iter != NULL; iter = iter->next) { pcmk__bundle_replica_t *replica = iter->data; if (replica->container->priv->active_nodes != NULL) { containers = g_list_append(containers, replica->container); } } if (containers == NULL) { return NULL; } /* If the bundle has only a single active container, just use that * container's method. If live migration is ever supported for bundle * containers, this will allow us to prefer the migration source when there * is only one container and it is migrating. For now, this just lets us * avoid creating the nodes table. */ if (pcmk__list_of_1(containers)) { container = containers->data; node = container->priv->fns->active_node(container, count_all, count_clean); g_list_free(containers); return node; } // Add all containers' active nodes to a hash table (for uniqueness) nodes = g_hash_table_new(NULL, NULL); for (iter = containers; iter != NULL; iter = iter->next) { container = iter->data; for (GList *node_iter = container->priv->active_nodes; node_iter != NULL; node_iter = node_iter->next) { node = node_iter->data; // If insert returns true, we haven't counted this node yet if (g_hash_table_insert(nodes, (gpointer) node->details, (gpointer) node) && !pe__count_active_node(rsc, node, &active, count_all, count_clean)) { goto done; } } } done: g_list_free(containers); g_hash_table_destroy(nodes); return active; } /*! * \internal * \brief Get maximum bundle resource instances per node * * \param[in] rsc Bundle resource to check * * \return Maximum number of \p rsc instances that can be active on one node */ unsigned int pe__bundle_max_per_node(const pcmk_resource_t *rsc) { pe__bundle_variant_data_t *bundle_data = NULL; get_bundle_variant_data(bundle_data, rsc); CRM_ASSERT(bundle_data->nreplicas_per_host >= 0); return (unsigned int) bundle_data->nreplicas_per_host; } diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c index 041eeb2d0c..2f0c7d72e7 100644 --- a/lib/pengine/unpack.c +++ b/lib/pengine/unpack.c @@ -1,5160 +1,5160 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include CRM_TRACE_INIT_DATA(pe_status); // A (parsed) resource action history entry struct action_history { pcmk_resource_t *rsc; // Resource that history is for pcmk_node_t *node; // Node that history is for xmlNode *xml; // History entry XML // Parsed from entry XML const char *id; // XML ID of history entry const char *key; // Operation key of action const char *task; // Action name const char *exit_reason; // Exit reason given for result guint interval_ms; // Action interval int call_id; // Call ID of action int expected_exit_status; // Expected exit status of action int exit_status; // Actual exit status of action int execution_status; // Execution status of action }; /* This uses pcmk__set_flags_as()/pcmk__clear_flags_as() directly rather than * use pcmk__set_scheduler_flags()/pcmk__clear_scheduler_flags() so that the * flag is stringified more readably in log messages. */ #define set_config_flag(scheduler, option, flag) do { \ GHashTable *config_hash = (scheduler)->priv->options; \ const char *scf_value = pcmk__cluster_option(config_hash, (option)); \ \ if (scf_value != NULL) { \ if (crm_is_true(scf_value)) { \ (scheduler)->flags = pcmk__set_flags_as(__func__, __LINE__, \ LOG_TRACE, "Scheduler", \ crm_system_name, (scheduler)->flags, \ (flag), #flag); \ } else { \ (scheduler)->flags = pcmk__clear_flags_as(__func__, __LINE__, \ LOG_TRACE, "Scheduler", \ crm_system_name, (scheduler)->flags, \ (flag), #flag); \ } \ } \ } while(0) static void unpack_rsc_op(pcmk_resource_t *rsc, pcmk_node_t *node, xmlNode *xml_op, xmlNode **last_failure, enum pcmk__on_fail *failed); static void determine_remote_online_status(pcmk_scheduler_t *scheduler, pcmk_node_t *this_node); static void add_node_attrs(const xmlNode *xml_obj, pcmk_node_t *node, bool overwrite, pcmk_scheduler_t *scheduler); static void determine_online_status(const xmlNode *node_state, pcmk_node_t *this_node, pcmk_scheduler_t *scheduler); static void unpack_node_lrm(pcmk_node_t *node, const xmlNode *xml, pcmk_scheduler_t *scheduler); /*! * \internal * \brief Check whether a node is a dangling guest node * * \param[in] node Node to check * * \return true if \p node had a Pacemaker Remote connection resource with a * launcher that was removed from the CIB, otherwise false. */ static bool is_dangling_guest_node(pcmk_node_t *node) { return pcmk__is_pacemaker_remote_node(node) && (node->priv->remote != NULL) && (node->priv->remote->priv->launcher == NULL) && pcmk_is_set(node->priv->remote->flags, pcmk__rsc_removed_launched); } /*! * \brief Schedule a fence action for a node * * \param[in,out] scheduler Scheduler data * \param[in,out] node Node to fence * \param[in] reason Text description of why fencing is needed * \param[in] priority_delay Whether to consider * \c PCMK_OPT_PRIORITY_FENCING_DELAY */ void pe_fence_node(pcmk_scheduler_t *scheduler, pcmk_node_t *node, const char *reason, bool priority_delay) { CRM_CHECK(node, return); if (pcmk__is_guest_or_bundle_node(node)) { // Fence a guest or bundle node by marking its launcher as failed pcmk_resource_t *rsc = node->priv->remote->priv->launcher; if (!pcmk_is_set(rsc->flags, pcmk__rsc_failed)) { if (!pcmk_is_set(rsc->flags, pcmk__rsc_managed)) { crm_notice("Not fencing guest node %s " "(otherwise would because %s): " "its guest resource %s is unmanaged", pcmk__node_name(node), reason, rsc->id); } else { pcmk__sched_warn(scheduler, "Guest node %s will be fenced " "(by recovering its guest resource %s): %s", pcmk__node_name(node), rsc->id, reason); /* We don't mark the node as unclean because that would prevent the * node from running resources. We want to allow it to run resources * in this transition if the recovery succeeds. */ pcmk__set_node_flags(node, pcmk__node_remote_reset); pcmk__set_rsc_flags(rsc, pcmk__rsc_failed|pcmk__rsc_stop_if_failed); } } } else if (is_dangling_guest_node(node)) { crm_info("Cleaning up dangling connection for guest node %s: " "fencing was already done because %s, " "and guest resource no longer exists", pcmk__node_name(node), reason); pcmk__set_rsc_flags(node->priv->remote, pcmk__rsc_failed|pcmk__rsc_stop_if_failed); } else if (pcmk__is_remote_node(node)) { pcmk_resource_t *rsc = node->priv->remote; if ((rsc != NULL) && !pcmk_is_set(rsc->flags, pcmk__rsc_managed)) { crm_notice("Not fencing remote node %s " "(otherwise would because %s): connection is unmanaged", pcmk__node_name(node), reason); } else if (!pcmk_is_set(node->priv->flags, pcmk__node_remote_reset)) { pcmk__set_node_flags(node, pcmk__node_remote_reset); pcmk__sched_warn(scheduler, "Remote node %s %s: %s", pcmk__node_name(node), pe_can_fence(scheduler, node)? "will be fenced" : "is unclean", reason); } node->details->unclean = TRUE; // No need to apply PCMK_OPT_PRIORITY_FENCING_DELAY for remote nodes pe_fence_op(node, NULL, TRUE, reason, FALSE, scheduler); } else if (node->details->unclean) { crm_trace("Cluster node %s %s because %s", pcmk__node_name(node), pe_can_fence(scheduler, node)? "would also be fenced" : "also is unclean", reason); } else { pcmk__sched_warn(scheduler, "Cluster node %s %s: %s", pcmk__node_name(node), pe_can_fence(scheduler, node)? "will be fenced" : "is unclean", reason); node->details->unclean = TRUE; pe_fence_op(node, NULL, TRUE, reason, priority_delay, scheduler); } } // @TODO xpaths can't handle templates, rules, or id-refs // nvpair with provides or requires set to unfencing #define XPATH_UNFENCING_NVPAIR PCMK_XE_NVPAIR \ "[(@" PCMK_XA_NAME "='" PCMK_STONITH_PROVIDES "'" \ "or @" PCMK_XA_NAME "='" PCMK_META_REQUIRES "') " \ "and @" PCMK_XA_VALUE "='" PCMK_VALUE_UNFENCING "']" // unfencing in rsc_defaults or any resource #define XPATH_ENABLE_UNFENCING \ "/" PCMK_XE_CIB "/" PCMK_XE_CONFIGURATION "/" PCMK_XE_RESOURCES \ "//" PCMK_XE_META_ATTRIBUTES "/" XPATH_UNFENCING_NVPAIR \ "|/" PCMK_XE_CIB "/" PCMK_XE_CONFIGURATION "/" PCMK_XE_RSC_DEFAULTS \ "/" PCMK_XE_META_ATTRIBUTES "/" XPATH_UNFENCING_NVPAIR static void set_if_xpath(uint64_t flag, const char *xpath, pcmk_scheduler_t *scheduler) { xmlXPathObjectPtr result = NULL; if (!pcmk_is_set(scheduler->flags, flag)) { result = xpath_search(scheduler->input, xpath); if (result && (numXpathResults(result) > 0)) { pcmk__set_scheduler_flags(scheduler, flag); } freeXpathObject(result); } } gboolean unpack_config(xmlNode *config, pcmk_scheduler_t *scheduler) { const char *value = NULL; GHashTable *config_hash = pcmk__strkey_table(free, free); pe_rule_eval_data_t rule_data = { .node_hash = NULL, .now = scheduler->priv->now, .match_data = NULL, .rsc_data = NULL, .op_data = NULL }; scheduler->priv->options = config_hash; pe__unpack_dataset_nvpairs(config, PCMK_XE_CLUSTER_PROPERTY_SET, &rule_data, config_hash, PCMK_VALUE_CIB_BOOTSTRAP_OPTIONS, scheduler); pcmk__validate_cluster_options(config_hash); set_config_flag(scheduler, PCMK_OPT_ENABLE_STARTUP_PROBES, pcmk__sched_probe_resources); if (!pcmk_is_set(scheduler->flags, pcmk__sched_probe_resources)) { crm_info("Startup probes: disabled (dangerous)"); } value = pcmk__cluster_option(config_hash, PCMK_OPT_HAVE_WATCHDOG); if (value && crm_is_true(value)) { crm_info("Watchdog-based self-fencing will be performed via SBD if " "fencing is required and " PCMK_OPT_STONITH_WATCHDOG_TIMEOUT " is nonzero"); pcmk__set_scheduler_flags(scheduler, pcmk__sched_have_fencing); } /* Set certain flags via xpath here, so they can be used before the relevant * configuration sections are unpacked. */ set_if_xpath(pcmk__sched_enable_unfencing, XPATH_ENABLE_UNFENCING, scheduler); value = pcmk__cluster_option(config_hash, PCMK_OPT_STONITH_TIMEOUT); pcmk_parse_interval_spec(value, &(scheduler->priv->fence_timeout_ms)); crm_debug("Default fencing action timeout: %s", pcmk__readable_interval(scheduler->priv->fence_timeout_ms)); set_config_flag(scheduler, PCMK_OPT_STONITH_ENABLED, pcmk__sched_fencing_enabled); if (pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)) { crm_debug("STONITH of failed nodes is enabled"); } else { crm_debug("STONITH of failed nodes is disabled"); } scheduler->priv->fence_action = pcmk__cluster_option(config_hash, PCMK_OPT_STONITH_ACTION); if (!strcmp(scheduler->priv->fence_action, PCMK__ACTION_POWEROFF)) { pcmk__warn_once(pcmk__wo_poweroff, "Support for " PCMK_OPT_STONITH_ACTION " of " "'" PCMK__ACTION_POWEROFF "' is deprecated and will be " "removed in a future release " "(use '" PCMK_ACTION_OFF "' instead)"); scheduler->priv->fence_action = PCMK_ACTION_OFF; } crm_trace("STONITH will %s nodes", scheduler->priv->fence_action); set_config_flag(scheduler, PCMK_OPT_CONCURRENT_FENCING, pcmk__sched_concurrent_fencing); if (pcmk_is_set(scheduler->flags, pcmk__sched_concurrent_fencing)) { crm_debug("Concurrent fencing is enabled"); } else { crm_debug("Concurrent fencing is disabled"); } value = pcmk__cluster_option(config_hash, PCMK_OPT_PRIORITY_FENCING_DELAY); if (value) { pcmk_parse_interval_spec(value, &(scheduler->priv->priority_fencing_ms)); crm_trace("Priority fencing delay is %s", pcmk__readable_interval(scheduler->priv->priority_fencing_ms)); } set_config_flag(scheduler, PCMK_OPT_STOP_ALL_RESOURCES, pcmk__sched_stop_all); crm_debug("Stop all active resources: %s", pcmk__flag_text(scheduler->flags, pcmk__sched_stop_all)); set_config_flag(scheduler, PCMK_OPT_SYMMETRIC_CLUSTER, pcmk__sched_symmetric_cluster); if (pcmk_is_set(scheduler->flags, pcmk__sched_symmetric_cluster)) { crm_debug("Cluster is symmetric" " - resources can run anywhere by default"); } value = pcmk__cluster_option(config_hash, PCMK_OPT_NO_QUORUM_POLICY); if (pcmk__str_eq(value, PCMK_VALUE_IGNORE, pcmk__str_casei)) { scheduler->no_quorum_policy = pcmk_no_quorum_ignore; } else if (pcmk__str_eq(value, PCMK_VALUE_FREEZE, pcmk__str_casei)) { scheduler->no_quorum_policy = pcmk_no_quorum_freeze; } else if (pcmk__str_eq(value, PCMK_VALUE_DEMOTE, pcmk__str_casei)) { scheduler->no_quorum_policy = pcmk_no_quorum_demote; } else if (pcmk__str_eq(value, PCMK_VALUE_FENCE_LEGACY, pcmk__str_casei)) { if (pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)) { int do_panic = 0; crm_element_value_int(scheduler->input, PCMK_XA_NO_QUORUM_PANIC, &do_panic); if (do_panic || pcmk_is_set(scheduler->flags, pcmk__sched_quorate)) { scheduler->no_quorum_policy = pcmk_no_quorum_fence; } else { crm_notice("Resetting " PCMK_OPT_NO_QUORUM_POLICY " to 'stop': cluster has never had quorum"); scheduler->no_quorum_policy = pcmk_no_quorum_stop; } } else { pcmk__config_err("Resetting " PCMK_OPT_NO_QUORUM_POLICY " to 'stop' because fencing is disabled"); scheduler->no_quorum_policy = pcmk_no_quorum_stop; } } else { scheduler->no_quorum_policy = pcmk_no_quorum_stop; } switch (scheduler->no_quorum_policy) { case pcmk_no_quorum_freeze: crm_debug("On loss of quorum: Freeze resources"); break; case pcmk_no_quorum_stop: crm_debug("On loss of quorum: Stop ALL resources"); break; case pcmk_no_quorum_demote: crm_debug("On loss of quorum: " "Demote promotable resources and stop other resources"); break; case pcmk_no_quorum_fence: crm_notice("On loss of quorum: Fence all remaining nodes"); break; case pcmk_no_quorum_ignore: crm_notice("On loss of quorum: Ignore"); break; } set_config_flag(scheduler, PCMK_OPT_STOP_ORPHAN_RESOURCES, pcmk__sched_stop_removed_resources); if (pcmk_is_set(scheduler->flags, pcmk__sched_stop_removed_resources)) { crm_trace("Orphan resources are stopped"); } else { crm_trace("Orphan resources are ignored"); } set_config_flag(scheduler, PCMK_OPT_STOP_ORPHAN_ACTIONS, pcmk__sched_cancel_removed_actions); if (pcmk_is_set(scheduler->flags, pcmk__sched_cancel_removed_actions)) { crm_trace("Orphan resource actions are stopped"); } else { crm_trace("Orphan resource actions are ignored"); } value = pcmk__cluster_option(config_hash, PCMK__OPT_REMOVE_AFTER_STOP); if (value != NULL) { if (crm_is_true(value)) { pcmk__set_scheduler_flags(scheduler, pcmk__sched_remove_after_stop); pcmk__warn_once(pcmk__wo_remove_after, "Support for the " PCMK__OPT_REMOVE_AFTER_STOP " cluster property is deprecated and will be " "removed in a future release"); } else { pcmk__clear_scheduler_flags(scheduler, pcmk__sched_remove_after_stop); } } set_config_flag(scheduler, PCMK_OPT_MAINTENANCE_MODE, pcmk__sched_in_maintenance); crm_trace("Maintenance mode: %s", pcmk__flag_text(scheduler->flags, pcmk__sched_in_maintenance)); set_config_flag(scheduler, PCMK_OPT_START_FAILURE_IS_FATAL, pcmk__sched_start_failure_fatal); if (pcmk_is_set(scheduler->flags, pcmk__sched_start_failure_fatal)) { crm_trace("Start failures are always fatal"); } else { crm_trace("Start failures are handled by failcount"); } if (pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)) { set_config_flag(scheduler, PCMK_OPT_STARTUP_FENCING, pcmk__sched_startup_fencing); } if (pcmk_is_set(scheduler->flags, pcmk__sched_startup_fencing)) { crm_trace("Unseen nodes will be fenced"); } else { pcmk__warn_once(pcmk__wo_blind, "Blind faith: not fencing unseen nodes"); } pe__unpack_node_health_scores(scheduler); scheduler->priv->placement_strategy = pcmk__cluster_option(config_hash, PCMK_OPT_PLACEMENT_STRATEGY); crm_trace("Placement strategy: %s", scheduler->priv->placement_strategy); set_config_flag(scheduler, PCMK_OPT_SHUTDOWN_LOCK, pcmk__sched_shutdown_lock); if (pcmk_is_set(scheduler->flags, pcmk__sched_shutdown_lock)) { value = pcmk__cluster_option(config_hash, PCMK_OPT_SHUTDOWN_LOCK_LIMIT); pcmk_parse_interval_spec(value, &(scheduler->priv->shutdown_lock_ms)); crm_trace("Resources will be locked to nodes that were cleanly " "shut down (locks expire after %s)", pcmk__readable_interval(scheduler->priv->shutdown_lock_ms)); } else { crm_trace("Resources will not be locked to nodes that were cleanly " "shut down"); } value = pcmk__cluster_option(config_hash, PCMK_OPT_NODE_PENDING_TIMEOUT); pcmk_parse_interval_spec(value, &(scheduler->priv->node_pending_ms)); if (scheduler->priv->node_pending_ms == 0U) { crm_trace("Do not fence pending nodes"); } else { crm_trace("Fence pending nodes after %s", pcmk__readable_interval(scheduler->priv->node_pending_ms)); } return TRUE; } pcmk_node_t * pe_create_node(const char *id, const char *uname, const char *type, const char *score, pcmk_scheduler_t *scheduler) { pcmk_node_t *new_node = NULL; if (pcmk_find_node(scheduler, uname) != NULL) { pcmk__config_warn("More than one node entry has name '%s'", uname); } new_node = calloc(1, sizeof(pcmk_node_t)); if (new_node == NULL) { pcmk__sched_err(scheduler, "Could not allocate memory for node %s", uname); return NULL; } new_node->assign = calloc(1, sizeof(struct pcmk__node_assignment)); new_node->details = calloc(1, sizeof(struct pcmk__node_details)); new_node->priv = calloc(1, sizeof(pcmk__node_private_t)); if ((new_node->assign == NULL) || (new_node->details == NULL) || (new_node->priv == NULL)) { free(new_node->assign); free(new_node->details); free(new_node->priv); free(new_node); pcmk__sched_err(scheduler, "Could not allocate memory for node %s", uname); return NULL; } crm_trace("Creating node for entry %s/%s", uname, id); new_node->assign->score = char2score(score); new_node->priv->id = id; new_node->priv->name = uname; new_node->priv->flags = pcmk__node_probes_allowed; new_node->details->online = FALSE; new_node->details->shutdown = FALSE; new_node->details->running_rsc = NULL; new_node->priv->scheduler = scheduler; if (pcmk__str_eq(type, PCMK_VALUE_MEMBER, pcmk__str_null_matches|pcmk__str_casei)) { new_node->priv->variant = pcmk__node_variant_cluster; } else if (pcmk__str_eq(type, PCMK_VALUE_REMOTE, pcmk__str_casei)) { new_node->priv->variant = pcmk__node_variant_remote; pcmk__set_scheduler_flags(scheduler, pcmk__sched_have_remote_nodes); } else { /* @COMPAT 'ping' is the default for backward compatibility, but it * should be changed to 'member' at a compatibility break */ if (!pcmk__str_eq(type, PCMK__VALUE_PING, pcmk__str_casei)) { pcmk__config_warn("Node %s has unrecognized type '%s', " "assuming '" PCMK__VALUE_PING "'", pcmk__s(uname, "without name"), type); } pcmk__warn_once(pcmk__wo_ping_node, "Support for nodes of type '" PCMK__VALUE_PING "' " "(such as %s) is deprecated and will be removed in a " "future release", pcmk__s(uname, "unnamed node")); new_node->priv->variant = pcmk__node_variant_ping; } new_node->priv->attrs = pcmk__strkey_table(free, free); if (pcmk__is_pacemaker_remote_node(new_node)) { pcmk__insert_dup(new_node->priv->attrs, CRM_ATTR_KIND, "remote"); } else { pcmk__insert_dup(new_node->priv->attrs, CRM_ATTR_KIND, "cluster"); } new_node->priv->utilization = pcmk__strkey_table(free, free); new_node->priv->digest_cache = pcmk__strkey_table(free, pe__free_digests); scheduler->nodes = g_list_insert_sorted(scheduler->nodes, new_node, pe__cmp_node_name); return new_node; } static const char * expand_remote_rsc_meta(xmlNode *xml_obj, xmlNode *parent, pcmk_scheduler_t *data) { xmlNode *attr_set = NULL; xmlNode *attr = NULL; const char *container_id = pcmk__xe_id(xml_obj); const char *remote_name = NULL; const char *remote_server = NULL; const char *remote_port = NULL; const char *connect_timeout = "60s"; const char *remote_allow_migrate=NULL; const char *is_managed = NULL; for (attr_set = pcmk__xe_first_child(xml_obj, NULL, NULL, NULL); attr_set != NULL; attr_set = pcmk__xe_next(attr_set)) { if (!pcmk__xe_is(attr_set, PCMK_XE_META_ATTRIBUTES)) { continue; } for (attr = pcmk__xe_first_child(attr_set, NULL, NULL, NULL); attr != NULL; attr = pcmk__xe_next(attr)) { const char *value = crm_element_value(attr, PCMK_XA_VALUE); const char *name = crm_element_value(attr, PCMK_XA_NAME); if (name == NULL) { // Sanity continue; } if (strcmp(name, PCMK_META_REMOTE_NODE) == 0) { remote_name = value; } else if (strcmp(name, PCMK_META_REMOTE_ADDR) == 0) { remote_server = value; } else if (strcmp(name, PCMK_META_REMOTE_PORT) == 0) { remote_port = value; } else if (strcmp(name, PCMK_META_REMOTE_CONNECT_TIMEOUT) == 0) { connect_timeout = value; } else if (strcmp(name, PCMK_META_REMOTE_ALLOW_MIGRATE) == 0) { remote_allow_migrate = value; } else if (strcmp(name, PCMK_META_IS_MANAGED) == 0) { is_managed = value; } } } if (remote_name == NULL) { return NULL; } if (pe_find_resource(data->priv->resources, remote_name) != NULL) { return NULL; } pe_create_remote_xml(parent, remote_name, container_id, remote_allow_migrate, is_managed, connect_timeout, remote_server, remote_port); return remote_name; } static void handle_startup_fencing(pcmk_scheduler_t *scheduler, pcmk_node_t *new_node) { if ((new_node->priv->variant == pcmk__node_variant_remote) && (new_node->priv->remote == NULL)) { /* Ignore fencing for remote nodes that don't have a connection resource * associated with them. This happens when remote node entries get left * in the nodes section after the connection resource is removed. */ return; } if (pcmk_is_set(scheduler->flags, pcmk__sched_startup_fencing)) { // All nodes are unclean until we've seen their status entry new_node->details->unclean = TRUE; } else { // Blind faith ... new_node->details->unclean = FALSE; } } gboolean unpack_nodes(xmlNode *xml_nodes, pcmk_scheduler_t *scheduler) { xmlNode *xml_obj = NULL; pcmk_node_t *new_node = NULL; const char *id = NULL; const char *uname = NULL; const char *type = NULL; const char *score = NULL; for (xml_obj = pcmk__xe_first_child(xml_nodes, NULL, NULL, NULL); xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) { if (pcmk__xe_is(xml_obj, PCMK_XE_NODE)) { new_node = NULL; id = crm_element_value(xml_obj, PCMK_XA_ID); uname = crm_element_value(xml_obj, PCMK_XA_UNAME); type = crm_element_value(xml_obj, PCMK_XA_TYPE); score = crm_element_value(xml_obj, PCMK_XA_SCORE); crm_trace("Processing node %s/%s", uname, id); if (id == NULL) { pcmk__config_err("Ignoring <" PCMK_XE_NODE "> entry in configuration without id"); continue; } new_node = pe_create_node(id, uname, type, score, scheduler); if (new_node == NULL) { return FALSE; } handle_startup_fencing(scheduler, new_node); add_node_attrs(xml_obj, new_node, FALSE, scheduler); crm_trace("Done with node %s", crm_element_value(xml_obj, PCMK_XA_UNAME)); } } if ((scheduler->priv->local_node_name != NULL) && (pcmk_find_node(scheduler, scheduler->priv->local_node_name) == NULL)) { crm_info("Creating a fake local node"); pe_create_node(scheduler->priv->local_node_name, scheduler->priv->local_node_name, NULL, 0, scheduler); } return TRUE; } static void unpack_launcher(pcmk_resource_t *rsc, pcmk_scheduler_t *scheduler) { const char *launcher_id = NULL; if (rsc->priv->children != NULL) { g_list_foreach(rsc->priv->children, (GFunc) unpack_launcher, scheduler); return; } launcher_id = g_hash_table_lookup(rsc->priv->meta, PCMK__META_CONTAINER); if ((launcher_id != NULL) && !pcmk__str_eq(launcher_id, rsc->id, pcmk__str_none)) { pcmk_resource_t *launcher = pe_find_resource(scheduler->priv->resources, launcher_id); if (launcher != NULL) { rsc->priv->launcher = launcher; launcher->priv->launched = g_list_append(launcher->priv->launched, rsc); pcmk__rsc_trace(rsc, "Resource %s's launcher is %s", rsc->id, launcher_id); } else { pcmk__config_err("Resource %s: Unknown " PCMK__META_CONTAINER " %s", rsc->id, launcher_id); } } } gboolean unpack_remote_nodes(xmlNode *xml_resources, pcmk_scheduler_t *scheduler) { xmlNode *xml_obj = NULL; /* Create remote nodes and guest nodes from the resource configuration * before unpacking resources. */ for (xml_obj = pcmk__xe_first_child(xml_resources, NULL, NULL, NULL); xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) { const char *new_node_id = NULL; /* Check for remote nodes, which are defined by ocf:pacemaker:remote * primitives. */ if (xml_contains_remote_node(xml_obj)) { new_node_id = pcmk__xe_id(xml_obj); /* The pcmk_find_node() check ensures we don't iterate over an * expanded node that has already been added to the node list */ if (new_node_id && (pcmk_find_node(scheduler, new_node_id) == NULL)) { crm_trace("Found remote node %s defined by resource %s", new_node_id, pcmk__xe_id(xml_obj)); pe_create_node(new_node_id, new_node_id, PCMK_VALUE_REMOTE, NULL, scheduler); } continue; } /* Check for guest nodes, which are defined by special meta-attributes * of a primitive of any type (for example, VirtualDomain or Xen). */ if (pcmk__xe_is(xml_obj, PCMK_XE_PRIMITIVE)) { /* This will add an ocf:pacemaker:remote primitive to the * configuration for the guest node's connection, to be unpacked * later. */ new_node_id = expand_remote_rsc_meta(xml_obj, xml_resources, scheduler); if (new_node_id && (pcmk_find_node(scheduler, new_node_id) == NULL)) { crm_trace("Found guest node %s in resource %s", new_node_id, pcmk__xe_id(xml_obj)); pe_create_node(new_node_id, new_node_id, PCMK_VALUE_REMOTE, NULL, scheduler); } continue; } /* Check for guest nodes inside a group. Clones are currently not * supported as guest nodes. */ if (pcmk__xe_is(xml_obj, PCMK_XE_GROUP)) { xmlNode *xml_obj2 = NULL; for (xml_obj2 = pcmk__xe_first_child(xml_obj, NULL, NULL, NULL); xml_obj2 != NULL; xml_obj2 = pcmk__xe_next(xml_obj2)) { new_node_id = expand_remote_rsc_meta(xml_obj2, xml_resources, scheduler); if (new_node_id && (pcmk_find_node(scheduler, new_node_id) == NULL)) { crm_trace("Found guest node %s in resource %s inside group %s", new_node_id, pcmk__xe_id(xml_obj2), pcmk__xe_id(xml_obj)); pe_create_node(new_node_id, new_node_id, PCMK_VALUE_REMOTE, NULL, scheduler); } } } } return TRUE; } /* Call this after all the nodes and resources have been * unpacked, but before the status section is read. * * A remote node's online status is reflected by the state * of the remote node's connection resource. We need to link * the remote node to this connection resource so we can have * easy access to the connection resource during the scheduler calculations. */ static void link_rsc2remotenode(pcmk_scheduler_t *scheduler, pcmk_resource_t *new_rsc) { pcmk_node_t *remote_node = NULL; if (!pcmk_is_set(new_rsc->flags, pcmk__rsc_is_remote_connection)) { return; } if (pcmk_is_set(scheduler->flags, pcmk__sched_location_only)) { /* remote_nodes and remote_resources are not linked in quick location calculations */ return; } remote_node = pcmk_find_node(scheduler, new_rsc->id); CRM_CHECK(remote_node != NULL, return); pcmk__rsc_trace(new_rsc, "Linking remote connection resource %s to %s", new_rsc->id, pcmk__node_name(remote_node)); remote_node->priv->remote = new_rsc; if (new_rsc->priv->launcher == NULL) { /* Handle start-up fencing for remote nodes (as opposed to guest nodes) * the same as is done for cluster nodes. */ handle_startup_fencing(scheduler, remote_node); } else { /* pe_create_node() marks the new node as "remote" or "cluster"; now * that we know the node is a guest node, update it correctly. */ pcmk__insert_dup(remote_node->priv->attrs, CRM_ATTR_KIND, "container"); } } /*! * \internal * \brief Parse configuration XML for resource information * * \param[in] xml_resources Top of resource configuration XML * \param[in,out] scheduler Scheduler data * * \return TRUE * * \note unpack_remote_nodes() MUST be called before this, so that the nodes can * be used when pe__unpack_resource() calls resource_location() */ gboolean unpack_resources(const xmlNode *xml_resources, pcmk_scheduler_t *scheduler) { xmlNode *xml_obj = NULL; GList *gIter = NULL; scheduler->priv->templates = pcmk__strkey_table(free, pcmk__free_idref); for (xml_obj = pcmk__xe_first_child(xml_resources, NULL, NULL, NULL); xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) { pcmk_resource_t *new_rsc = NULL; const char *id = pcmk__xe_id(xml_obj); if (pcmk__str_empty(id)) { pcmk__config_err("Ignoring <%s> resource without ID", xml_obj->name); continue; } if (pcmk__xe_is(xml_obj, PCMK_XE_TEMPLATE)) { if (g_hash_table_lookup_extended(scheduler->priv->templates, id, NULL, NULL) == FALSE) { /* Record the template's ID for the knowledge of its existence anyway. */ pcmk__insert_dup(scheduler->priv->templates, id, NULL); } continue; } crm_trace("Unpacking <%s " PCMK_XA_ID "='%s'>", xml_obj->name, id); if (pe__unpack_resource(xml_obj, &new_rsc, NULL, scheduler) == pcmk_rc_ok) { scheduler->priv->resources = g_list_append(scheduler->priv->resources, new_rsc); pcmk__rsc_trace(new_rsc, "Added resource %s", new_rsc->id); } else { pcmk__config_err("Ignoring <%s> resource '%s' " "because configuration is invalid", xml_obj->name, id); } } for (gIter = scheduler->priv->resources; gIter != NULL; gIter = gIter->next) { pcmk_resource_t *rsc = (pcmk_resource_t *) gIter->data; unpack_launcher(rsc, scheduler); link_rsc2remotenode(scheduler, rsc); } scheduler->priv->resources = g_list_sort(scheduler->priv->resources, pe__cmp_rsc_priority); if (pcmk_is_set(scheduler->flags, pcmk__sched_location_only)) { /* Ignore */ } else if (pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled) && !pcmk_is_set(scheduler->flags, pcmk__sched_have_fencing)) { pcmk__config_err("Resource start-up disabled since no STONITH resources have been defined"); pcmk__config_err("Either configure some or disable STONITH with the " PCMK_OPT_STONITH_ENABLED " option"); pcmk__config_err("NOTE: Clusters with shared data need STONITH to ensure data integrity"); } return TRUE; } /*! * \internal * \brief Parse configuration XML for fencing topology information * * \param[in] xml_fencing_topology Top of fencing topology configuration XML * \param[in,out] scheduler Scheduler data * * \return void */ void pcmk__unpack_fencing_topology(const xmlNode *xml_fencing_topology, pcmk_scheduler_t *scheduler) { xmlNode *xml_obj = NULL; int id = 0; for (xml_obj = pcmk__xe_first_child(xml_fencing_topology, PCMK_XE_FENCING_LEVEL, NULL, NULL); xml_obj != NULL; xml_obj = pcmk__xe_next_same(xml_obj)) { crm_element_value_int(xml_obj, PCMK_XA_INDEX, &id); // Ensure an ID was given if (pcmk__str_empty(pcmk__xe_id(xml_obj))) { pcmk__config_warn("Ignoring registration for topology level without ID"); continue; } // Ensure level ID is in allowed range if ((id < ST__LEVEL_MIN) || (id > ST__LEVEL_MAX)) { pcmk__config_warn("Ignoring topology registration with invalid level %d", id); continue; } } } gboolean unpack_tags(xmlNode *xml_tags, pcmk_scheduler_t *scheduler) { xmlNode *xml_tag = NULL; scheduler->priv->tags = pcmk__strkey_table(free, pcmk__free_idref); for (xml_tag = pcmk__xe_first_child(xml_tags, NULL, NULL, NULL); xml_tag != NULL; xml_tag = pcmk__xe_next(xml_tag)) { xmlNode *xml_obj_ref = NULL; const char *tag_id = pcmk__xe_id(xml_tag); if (!pcmk__xe_is(xml_tag, PCMK_XE_TAG)) { continue; } if (tag_id == NULL) { pcmk__config_err("Ignoring <%s> without " PCMK_XA_ID, (const char *) xml_tag->name); continue; } for (xml_obj_ref = pcmk__xe_first_child(xml_tag, NULL, NULL, NULL); xml_obj_ref != NULL; xml_obj_ref = pcmk__xe_next(xml_obj_ref)) { const char *obj_ref = pcmk__xe_id(xml_obj_ref); if (!pcmk__xe_is(xml_obj_ref, PCMK_XE_OBJ_REF)) { continue; } if (obj_ref == NULL) { pcmk__config_err("Ignoring <%s> for tag '%s' without " PCMK_XA_ID, xml_obj_ref->name, tag_id); continue; } pcmk__add_idref(scheduler->priv->tags, tag_id, obj_ref); } } return TRUE; } /* The ticket state section: * "/cib/status/tickets/ticket_state" */ static gboolean unpack_ticket_state(xmlNode *xml_ticket, pcmk_scheduler_t *scheduler) { const char *ticket_id = NULL; const char *granted = NULL; const char *last_granted = NULL; const char *standby = NULL; xmlAttrPtr xIter = NULL; pcmk__ticket_t *ticket = NULL; ticket_id = pcmk__xe_id(xml_ticket); if (pcmk__str_empty(ticket_id)) { return FALSE; } crm_trace("Processing ticket state for %s", ticket_id); ticket = g_hash_table_lookup(scheduler->priv->ticket_constraints, ticket_id); if (ticket == NULL) { ticket = ticket_new(ticket_id, scheduler); if (ticket == NULL) { return FALSE; } } for (xIter = xml_ticket->properties; xIter; xIter = xIter->next) { const char *prop_name = (const char *)xIter->name; const char *prop_value = pcmk__xml_attr_value(xIter); if (pcmk__str_eq(prop_name, PCMK_XA_ID, pcmk__str_none)) { continue; } pcmk__insert_dup(ticket->state, prop_name, prop_value); } granted = g_hash_table_lookup(ticket->state, PCMK__XA_GRANTED); if (granted && crm_is_true(granted)) { pcmk__set_ticket_flags(ticket, pcmk__ticket_granted); crm_info("We have ticket '%s'", ticket->id); } else { pcmk__clear_ticket_flags(ticket, pcmk__ticket_granted); crm_info("We do not have ticket '%s'", ticket->id); } last_granted = g_hash_table_lookup(ticket->state, PCMK_XA_LAST_GRANTED); if (last_granted) { long long last_granted_ll; pcmk__scan_ll(last_granted, &last_granted_ll, 0LL); ticket->last_granted = (time_t) last_granted_ll; } standby = g_hash_table_lookup(ticket->state, PCMK_XA_STANDBY); if (standby && crm_is_true(standby)) { pcmk__set_ticket_flags(ticket, pcmk__ticket_standby); if (pcmk_is_set(ticket->flags, pcmk__ticket_granted)) { crm_info("Granted ticket '%s' is in standby-mode", ticket->id); } } else { pcmk__clear_ticket_flags(ticket, pcmk__ticket_standby); } crm_trace("Done with ticket state for %s", ticket_id); return TRUE; } static gboolean unpack_tickets_state(xmlNode *xml_tickets, pcmk_scheduler_t *scheduler) { xmlNode *xml_obj = NULL; for (xml_obj = pcmk__xe_first_child(xml_tickets, NULL, NULL, NULL); xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) { if (!pcmk__xe_is(xml_obj, PCMK__XE_TICKET_STATE)) { continue; } unpack_ticket_state(xml_obj, scheduler); } return TRUE; } static void unpack_handle_remote_attrs(pcmk_node_t *this_node, const xmlNode *state, pcmk_scheduler_t *scheduler) { const char *discovery = NULL; const xmlNode *attrs = NULL; pcmk_resource_t *rsc = NULL; int maint = 0; if (!pcmk__xe_is(state, PCMK__XE_NODE_STATE)) { return; } if ((this_node == NULL) || !pcmk__is_pacemaker_remote_node(this_node)) { return; } crm_trace("Processing Pacemaker Remote node %s", pcmk__node_name(this_node)); pcmk__scan_min_int(crm_element_value(state, PCMK__XA_NODE_IN_MAINTENANCE), &maint, 0); if (maint) { pcmk__set_node_flags(this_node, pcmk__node_remote_maint); } else { pcmk__clear_node_flags(this_node, pcmk__node_remote_maint); } rsc = this_node->priv->remote; if (!pcmk_is_set(this_node->priv->flags, pcmk__node_remote_reset)) { this_node->details->unclean = FALSE; pcmk__set_node_flags(this_node, pcmk__node_seen); } attrs = pcmk__xe_first_child(state, PCMK__XE_TRANSIENT_ATTRIBUTES, NULL, NULL); add_node_attrs(attrs, this_node, TRUE, scheduler); if (pe__shutdown_requested(this_node)) { crm_info("%s is shutting down", pcmk__node_name(this_node)); this_node->details->shutdown = TRUE; } if (crm_is_true(pcmk__node_attr(this_node, PCMK_NODE_ATTR_STANDBY, NULL, pcmk__rsc_node_current))) { crm_info("%s is in standby mode", pcmk__node_name(this_node)); pcmk__set_node_flags(this_node, pcmk__node_standby); } if (crm_is_true(pcmk__node_attr(this_node, PCMK_NODE_ATTR_MAINTENANCE, NULL, pcmk__rsc_node_current)) || ((rsc != NULL) && !pcmk_is_set(rsc->flags, pcmk__rsc_managed))) { crm_info("%s is in maintenance mode", pcmk__node_name(this_node)); this_node->details->maintenance = TRUE; } discovery = pcmk__node_attr(this_node, PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED, NULL, pcmk__rsc_node_current); if ((discovery != NULL) && !crm_is_true(discovery)) { pcmk__warn_once(pcmk__wo_rdisc_enabled, "Support for the " PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED " node attribute is deprecated and will be removed" " (and behave as 'true') in a future release."); if (pcmk__is_remote_node(this_node) && !pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)) { pcmk__config_warn("Ignoring " PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED " attribute on Pacemaker Remote node %s" " because fencing is disabled", pcmk__node_name(this_node)); } else { /* This is either a remote node with fencing enabled, or a guest * node. We don't care whether fencing is enabled when fencing guest * nodes, because they are "fenced" by recovering their containing * resource. */ crm_info("%s has resource discovery disabled", pcmk__node_name(this_node)); pcmk__clear_node_flags(this_node, pcmk__node_probes_allowed); } } } /*! * \internal * \brief Unpack a cluster node's transient attributes * * \param[in] state CIB node state XML * \param[in,out] node Cluster node whose attributes are being unpacked * \param[in,out] scheduler Scheduler data */ static void unpack_transient_attributes(const xmlNode *state, pcmk_node_t *node, pcmk_scheduler_t *scheduler) { const char *discovery = NULL; const xmlNode *attrs = pcmk__xe_first_child(state, PCMK__XE_TRANSIENT_ATTRIBUTES, NULL, NULL); add_node_attrs(attrs, node, TRUE, scheduler); if (crm_is_true(pcmk__node_attr(node, PCMK_NODE_ATTR_STANDBY, NULL, pcmk__rsc_node_current))) { crm_info("%s is in standby mode", pcmk__node_name(node)); pcmk__set_node_flags(node, pcmk__node_standby); } if (crm_is_true(pcmk__node_attr(node, PCMK_NODE_ATTR_MAINTENANCE, NULL, pcmk__rsc_node_current))) { crm_info("%s is in maintenance mode", pcmk__node_name(node)); node->details->maintenance = TRUE; } discovery = pcmk__node_attr(node, PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED, NULL, pcmk__rsc_node_current); if ((discovery != NULL) && !crm_is_true(discovery)) { pcmk__config_warn("Ignoring " PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED " attribute for %s because disabling resource" " discovery is not allowed for cluster nodes", pcmk__node_name(node)); } } /*! * \internal * \brief Unpack a node state entry (first pass) * * Unpack one node state entry from status. This unpacks information from the * \C PCMK__XE_NODE_STATE element itself and node attributes inside it, but not * the resource history inside it. Multiple passes through the status are needed * to fully unpack everything. * * \param[in] state CIB node state XML * \param[in,out] scheduler Scheduler data */ static void unpack_node_state(const xmlNode *state, pcmk_scheduler_t *scheduler) { const char *id = NULL; const char *uname = NULL; pcmk_node_t *this_node = NULL; id = crm_element_value(state, PCMK_XA_ID); if (id == NULL) { pcmk__config_err("Ignoring invalid " PCMK__XE_NODE_STATE " entry without " PCMK_XA_ID); crm_log_xml_info(state, "missing-id"); return; } uname = crm_element_value(state, PCMK_XA_UNAME); if (uname == NULL) { - /* If a joining peer makes the cluster acquire the quorum from corosync - * meanwhile it has not joined CPG membership of pacemaker-controld yet, - * it's possible that the created PCMK__XE_NODE_STATE entry doesn't have - * a PCMK_XA_UNAME yet. We should recognize the node as `pending` and - * wait for it to join CPG. + /* If a joining peer makes the cluster acquire the quorum from Corosync + * but has not joined the controller CPG membership yet, it's possible + * that the created PCMK__XE_NODE_STATE entry doesn't have a + * PCMK_XA_UNAME yet. Recognize the node as pending and wait for it to + * join CPG. */ crm_trace("Handling " PCMK__XE_NODE_STATE " entry with id=\"%s\" " "without " PCMK_XA_UNAME, id); } this_node = pe_find_node_any(scheduler->nodes, id, uname); if (this_node == NULL) { crm_notice("Ignoring recorded state for removed node with name %s and " PCMK_XA_ID " %s", pcmk__s(uname, "unknown"), id); return; } if (pcmk__is_pacemaker_remote_node(this_node)) { int remote_fenced = 0; /* We can't determine the online status of Pacemaker Remote nodes until * after all resource history has been unpacked. In this first pass, we * do need to mark whether the node has been fenced, as this plays a * role during unpacking cluster node resource state. */ pcmk__scan_min_int(crm_element_value(state, PCMK__XA_NODE_FENCED), &remote_fenced, 0); if (remote_fenced) { pcmk__set_node_flags(this_node, pcmk__node_remote_fenced); } else { pcmk__clear_node_flags(this_node, pcmk__node_remote_fenced); } return; } unpack_transient_attributes(state, this_node, scheduler); /* Provisionally mark this cluster node as clean. We have at least seen it * in the current cluster's lifetime. */ this_node->details->unclean = FALSE; pcmk__set_node_flags(this_node, pcmk__node_seen); crm_trace("Determining online status of cluster node %s (id %s)", pcmk__node_name(this_node), id); determine_online_status(state, this_node, scheduler); if (!pcmk_is_set(scheduler->flags, pcmk__sched_quorate) && this_node->details->online && (scheduler->no_quorum_policy == pcmk_no_quorum_fence)) { /* Everything else should flow from this automatically * (at least until the scheduler becomes able to migrate off * healthy resources) */ pe_fence_node(scheduler, this_node, "cluster does not have quorum", FALSE); } } /*! * \internal * \brief Unpack nodes' resource history as much as possible * * Unpack as many nodes' resource history as possible in one pass through the * status. We need to process Pacemaker Remote nodes' connections/containers * before unpacking their history; the connection/container history will be * in another node's history, so it might take multiple passes to unpack * everything. * * \param[in] status CIB XML status section * \param[in] fence If true, treat any not-yet-unpacked nodes as unseen * \param[in,out] scheduler Scheduler data * * \return Standard Pacemaker return code (specifically pcmk_rc_ok if done, * or EAGAIN if more unpacking remains to be done) */ static int unpack_node_history(const xmlNode *status, bool fence, pcmk_scheduler_t *scheduler) { int rc = pcmk_rc_ok; // Loop through all PCMK__XE_NODE_STATE entries in CIB status for (const xmlNode *state = pcmk__xe_first_child(status, PCMK__XE_NODE_STATE, NULL, NULL); state != NULL; state = pcmk__xe_next_same(state)) { const char *id = pcmk__xe_id(state); const char *uname = crm_element_value(state, PCMK_XA_UNAME); pcmk_node_t *this_node = NULL; if ((id == NULL) || (uname == NULL)) { // Warning already logged in first pass through status section crm_trace("Not unpacking resource history from malformed " PCMK__XE_NODE_STATE " without id and/or uname"); continue; } this_node = pe_find_node_any(scheduler->nodes, id, uname); if (this_node == NULL) { // Warning already logged in first pass through status section crm_trace("Not unpacking resource history for node %s because " "no longer in configuration", id); continue; } if (pcmk_is_set(this_node->priv->flags, pcmk__node_unpacked)) { crm_trace("Not unpacking resource history for node %s because " "already unpacked", id); continue; } if (fence) { // We're processing all remaining nodes } else if (pcmk__is_guest_or_bundle_node(this_node)) { /* We can unpack a guest node's history only after we've unpacked * other resource history to the point that we know that the node's * connection and containing resource are both up. */ const pcmk_resource_t *remote = this_node->priv->remote; const pcmk_resource_t *launcher = remote->priv->launcher; if ((remote->priv->orig_role != pcmk_role_started) || (launcher->priv->orig_role != pcmk_role_started)) { crm_trace("Not unpacking resource history for guest node %s " "because launcher and connection are not known to " "be up", id); continue; } } else if (pcmk__is_remote_node(this_node)) { /* We can unpack a remote node's history only after we've unpacked * other resource history to the point that we know that the node's * connection is up, with the exception of when shutdown locks are * in use. */ pcmk_resource_t *rsc = this_node->priv->remote; if ((rsc == NULL) || (!pcmk_is_set(scheduler->flags, pcmk__sched_shutdown_lock) && (rsc->priv->orig_role != pcmk_role_started))) { crm_trace("Not unpacking resource history for remote node %s " "because connection is not known to be up", id); continue; } /* If fencing and shutdown locks are disabled and we're not processing * unseen nodes, then we don't want to unpack offline nodes until online * nodes have been unpacked. This allows us to number active clone * instances first. */ } else if (!pcmk_any_flags_set(scheduler->flags, pcmk__sched_fencing_enabled |pcmk__sched_shutdown_lock) && !this_node->details->online) { crm_trace("Not unpacking resource history for offline " "cluster node %s", id); continue; } if (pcmk__is_pacemaker_remote_node(this_node)) { determine_remote_online_status(scheduler, this_node); unpack_handle_remote_attrs(this_node, state, scheduler); } crm_trace("Unpacking resource history for %snode %s", (fence? "unseen " : ""), id); pcmk__set_node_flags(this_node, pcmk__node_unpacked); unpack_node_lrm(this_node, state, scheduler); rc = EAGAIN; // Other node histories might depend on this one } return rc; } /* remove nodes that are down, stopping */ /* create positive rsc_to_node constraints between resources and the nodes they are running on */ /* anything else? */ gboolean unpack_status(xmlNode *status, pcmk_scheduler_t *scheduler) { xmlNode *state = NULL; crm_trace("Beginning unpack"); if (scheduler->priv->ticket_constraints == NULL) { scheduler->priv->ticket_constraints = pcmk__strkey_table(free, destroy_ticket); } for (state = pcmk__xe_first_child(status, NULL, NULL, NULL); state != NULL; state = pcmk__xe_next(state)) { if (pcmk__xe_is(state, PCMK_XE_TICKETS)) { unpack_tickets_state((xmlNode *) state, scheduler); } else if (pcmk__xe_is(state, PCMK__XE_NODE_STATE)) { unpack_node_state(state, scheduler); } } while (unpack_node_history(status, FALSE, scheduler) == EAGAIN) { crm_trace("Another pass through node resource histories is needed"); } // Now catch any nodes we didn't see unpack_node_history(status, pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled), scheduler); /* Now that we know where resources are, we can schedule stops of containers * with failed bundle connections */ if (scheduler->priv->stop_needed != NULL) { for (GList *item = scheduler->priv->stop_needed; item != NULL; item = item->next) { pcmk_resource_t *container = item->data; pcmk_node_t *node = pcmk__current_node(container); if (node) { stop_action(container, node, FALSE); } } g_list_free(scheduler->priv->stop_needed); scheduler->priv->stop_needed = NULL; } /* Now that we know status of all Pacemaker Remote connections and nodes, * we can stop connections for node shutdowns, and check the online status * of remote/guest nodes that didn't have any node history to unpack. */ for (GList *gIter = scheduler->nodes; gIter != NULL; gIter = gIter->next) { pcmk_node_t *this_node = gIter->data; if (!pcmk__is_pacemaker_remote_node(this_node)) { continue; } if (this_node->details->shutdown && (this_node->priv->remote != NULL)) { pe__set_next_role(this_node->priv->remote, pcmk_role_stopped, "remote shutdown"); } if (!pcmk_is_set(this_node->priv->flags, pcmk__node_unpacked)) { determine_remote_online_status(scheduler, this_node); } } return TRUE; } /*! * \internal * \brief Unpack node's time when it became a member at the cluster layer * * \param[in] node_state Node's \c PCMK__XE_NODE_STATE entry * \param[in,out] scheduler Scheduler data * * \return Epoch time when node became a cluster member * (or scheduler effective time for legacy entries) if a member, * 0 if not a member, or -1 if no valid information available */ static long long unpack_node_member(const xmlNode *node_state, pcmk_scheduler_t *scheduler) { const char *member_time = crm_element_value(node_state, PCMK__XA_IN_CCM); int member = 0; if (member_time == NULL) { return -1LL; } else if (crm_str_to_boolean(member_time, &member) == 1) { /* If in_ccm=0, we'll return 0 here. If in_ccm=1, either the entry was * recorded as a boolean for a DC < 2.1.7, or the node is pending * shutdown and has left the CPG, in which case it was set to 1 to avoid * fencing for PCMK_OPT_NODE_PENDING_TIMEOUT. * * We return the effective time for in_ccm=1 because what's important to * avoid fencing is that effective time minus this value is less than * the pending node timeout. */ return member? (long long) get_effective_time(scheduler) : 0LL; } else { long long when_member = 0LL; if ((pcmk__scan_ll(member_time, &when_member, 0LL) != pcmk_rc_ok) || (when_member < 0LL)) { crm_warn("Unrecognized value '%s' for " PCMK__XA_IN_CCM " in " PCMK__XE_NODE_STATE " entry", member_time); return -1LL; } return when_member; } } /*! * \internal * \brief Unpack node's time when it became online in process group * * \param[in] node_state Node's \c PCMK__XE_NODE_STATE entry * * \return Epoch time when node became online in process group (or 0 if not * online, or 1 for legacy online entries) */ static long long unpack_node_online(const xmlNode *node_state) { const char *peer_time = crm_element_value(node_state, PCMK_XA_CRMD); // @COMPAT Entries recorded for DCs < 2.1.7 have "online" or "offline" if (pcmk__str_eq(peer_time, PCMK_VALUE_OFFLINE, pcmk__str_casei|pcmk__str_null_matches)) { return 0LL; } else if (pcmk__str_eq(peer_time, PCMK_VALUE_ONLINE, pcmk__str_casei)) { return 1LL; } else { long long when_online = 0LL; if ((pcmk__scan_ll(peer_time, &when_online, 0LL) != pcmk_rc_ok) || (when_online < 0)) { crm_warn("Unrecognized value '%s' for " PCMK_XA_CRMD " in " PCMK__XE_NODE_STATE " entry, assuming offline", peer_time); return 0LL; } return when_online; } } /*! * \internal * \brief Unpack node attribute for user-requested fencing * * \param[in] node Node to check * \param[in] node_state Node's \c PCMK__XE_NODE_STATE entry in CIB status * * \return \c true if fencing has been requested for \p node, otherwise \c false */ static bool unpack_node_terminate(const pcmk_node_t *node, const xmlNode *node_state) { long long value = 0LL; int value_i = 0; const char *value_s = pcmk__node_attr(node, PCMK_NODE_ATTR_TERMINATE, NULL, pcmk__rsc_node_current); // Value may be boolean or an epoch time if (crm_str_to_boolean(value_s, &value_i) == 1) { return (value_i != 0); } if (pcmk__scan_ll(value_s, &value, 0LL) == pcmk_rc_ok) { return (value > 0); } crm_warn("Ignoring unrecognized value '%s' for " PCMK_NODE_ATTR_TERMINATE "node attribute for %s", value_s, pcmk__node_name(node)); return false; } static gboolean determine_online_status_no_fencing(pcmk_scheduler_t *scheduler, const xmlNode *node_state, pcmk_node_t *this_node) { gboolean online = FALSE; const char *join = crm_element_value(node_state, PCMK__XA_JOIN); const char *exp_state = crm_element_value(node_state, PCMK_XA_EXPECTED); long long when_member = unpack_node_member(node_state, scheduler); long long when_online = unpack_node_online(node_state); if (when_member <= 0) { crm_trace("Node %s is %sdown", pcmk__node_name(this_node), ((when_member < 0)? "presumed " : "")); } else if (when_online > 0) { if (pcmk__str_eq(join, CRMD_JOINSTATE_MEMBER, pcmk__str_casei)) { online = TRUE; } else { crm_debug("Node %s is not ready to run resources: %s", pcmk__node_name(this_node), join); } } else if (!pcmk_is_set(this_node->priv->flags, pcmk__node_expected_up)) { crm_trace("Node %s controller is down: " "member@%lld online@%lld join=%s expected=%s", pcmk__node_name(this_node), when_member, when_online, pcmk__s(join, ""), pcmk__s(exp_state, "")); } else { /* mark it unclean */ pe_fence_node(scheduler, this_node, "peer is unexpectedly down", FALSE); crm_info("Node %s member@%lld online@%lld join=%s expected=%s", pcmk__node_name(this_node), when_member, when_online, pcmk__s(join, ""), pcmk__s(exp_state, "")); } return online; } /*! * \internal * \brief Check whether a node has taken too long to join controller group * * \param[in,out] scheduler Scheduler data * \param[in] node Node to check * \param[in] when_member Epoch time when node became a cluster member * \param[in] when_online Epoch time when node joined controller group * * \return true if node has been pending (on the way up) longer than * \c PCMK_OPT_NODE_PENDING_TIMEOUT, otherwise false * \note This will also update the cluster's recheck time if appropriate. */ static inline bool pending_too_long(pcmk_scheduler_t *scheduler, const pcmk_node_t *node, long long when_member, long long when_online) { if ((scheduler->priv->node_pending_ms > 0U) && (when_member > 0) && (when_online <= 0)) { // There is a timeout on pending nodes, and node is pending time_t timeout = when_member + (scheduler->priv->node_pending_ms / 1000U); if (get_effective_time(node->priv->scheduler) >= timeout) { return true; // Node has timed out } // Node is pending, but still has time pe__update_recheck_time(timeout, scheduler, "pending node timeout"); } return false; } static bool determine_online_status_fencing(pcmk_scheduler_t *scheduler, const xmlNode *node_state, pcmk_node_t *this_node) { bool termination_requested = unpack_node_terminate(this_node, node_state); const char *join = crm_element_value(node_state, PCMK__XA_JOIN); const char *exp_state = crm_element_value(node_state, PCMK_XA_EXPECTED); long long when_member = unpack_node_member(node_state, scheduler); long long when_online = unpack_node_online(node_state); /* - PCMK__XA_JOIN ::= member|down|pending|banned - PCMK_XA_EXPECTED ::= member|down @COMPAT with entries recorded for DCs < 2.1.7 - PCMK__XA_IN_CCM ::= true|false - PCMK_XA_CRMD ::= online|offline Since crm_feature_set 3.18.0 (pacemaker-2.1.7): - PCMK__XA_IN_CCM ::= |0 Since when node has been a cluster member. A value 0 of means the node is not a cluster member. - PCMK_XA_CRMD ::= |0 Since when peer has been online in CPG. A value 0 means the peer is offline in CPG. */ crm_trace("Node %s member@%lld online@%lld join=%s expected=%s%s", pcmk__node_name(this_node), when_member, when_online, pcmk__s(join, ""), pcmk__s(exp_state, ""), (termination_requested? " (termination requested)" : "")); if (this_node->details->shutdown) { crm_debug("%s is shutting down", pcmk__node_name(this_node)); /* Slightly different criteria since we can't shut down a dead peer */ return (when_online > 0); } if (when_member < 0) { pe_fence_node(scheduler, this_node, "peer has not been seen by the cluster", FALSE); return false; } if (pcmk__str_eq(join, CRMD_JOINSTATE_NACK, pcmk__str_none)) { pe_fence_node(scheduler, this_node, "peer failed Pacemaker membership criteria", FALSE); } else if (termination_requested) { if ((when_member <= 0) && (when_online <= 0) && pcmk__str_eq(join, CRMD_JOINSTATE_DOWN, pcmk__str_none)) { crm_info("%s was fenced as requested", pcmk__node_name(this_node)); return false; } pe_fence_node(scheduler, this_node, "fencing was requested", false); } else if (pcmk__str_eq(exp_state, CRMD_JOINSTATE_DOWN, pcmk__str_null_matches)) { if (pending_too_long(scheduler, this_node, when_member, when_online)) { pe_fence_node(scheduler, this_node, "peer pending timed out on joining the process group", FALSE); } else if ((when_member > 0) || (when_online > 0)) { crm_info("- %s is not ready to run resources", pcmk__node_name(this_node)); pcmk__set_node_flags(this_node, pcmk__node_standby); this_node->details->pending = TRUE; } else { crm_trace("%s is down or still coming up", pcmk__node_name(this_node)); } } else if (when_member <= 0) { // Consider PCMK_OPT_PRIORITY_FENCING_DELAY for lost nodes pe_fence_node(scheduler, this_node, "peer is no longer part of the cluster", TRUE); } else if (when_online <= 0) { pe_fence_node(scheduler, this_node, "peer process is no longer available", FALSE); /* Everything is running at this point, now check join state */ } else if (pcmk__str_eq(join, CRMD_JOINSTATE_MEMBER, pcmk__str_none)) { crm_info("%s is active", pcmk__node_name(this_node)); } else if (pcmk__str_any_of(join, CRMD_JOINSTATE_PENDING, CRMD_JOINSTATE_DOWN, NULL)) { crm_info("%s is not ready to run resources", pcmk__node_name(this_node)); pcmk__set_node_flags(this_node, pcmk__node_standby); this_node->details->pending = TRUE; } else { pe_fence_node(scheduler, this_node, "peer was in an unknown state", FALSE); } return (when_member > 0); } static void determine_remote_online_status(pcmk_scheduler_t *scheduler, pcmk_node_t *this_node) { pcmk_resource_t *rsc = this_node->priv->remote; pcmk_resource_t *launcher = NULL; pcmk_node_t *host = NULL; const char *node_type = "Remote"; if (rsc == NULL) { /* This is a leftover node state entry for a former Pacemaker Remote * node whose connection resource was removed. Consider it offline. */ crm_trace("Pacemaker Remote node %s is considered OFFLINE because " "its connection resource has been removed from the CIB", this_node->priv->id); this_node->details->online = FALSE; return; } launcher = rsc->priv->launcher; if (launcher != NULL) { node_type = "Guest"; if (pcmk__list_of_1(rsc->priv->active_nodes)) { host = rsc->priv->active_nodes->data; } } /* If the resource is currently started, mark it online. */ if (rsc->priv->orig_role == pcmk_role_started) { this_node->details->online = TRUE; } /* consider this node shutting down if transitioning start->stop */ if ((rsc->priv->orig_role == pcmk_role_started) && (rsc->priv->next_role == pcmk_role_stopped)) { crm_trace("%s node %s shutting down because connection resource is stopping", node_type, this_node->priv->id); this_node->details->shutdown = TRUE; } /* Now check all the failure conditions. */ if ((launcher != NULL) && pcmk_is_set(launcher->flags, pcmk__rsc_failed)) { crm_trace("Guest node %s UNCLEAN because guest resource failed", this_node->priv->id); this_node->details->online = FALSE; pcmk__set_node_flags(this_node, pcmk__node_remote_reset); } else if (pcmk_is_set(rsc->flags, pcmk__rsc_failed)) { crm_trace("%s node %s OFFLINE because connection resource failed", node_type, this_node->priv->id); this_node->details->online = FALSE; } else if ((rsc->priv->orig_role == pcmk_role_stopped) || ((launcher != NULL) && (launcher->priv->orig_role == pcmk_role_stopped))) { crm_trace("%s node %s OFFLINE because its resource is stopped", node_type, this_node->priv->id); this_node->details->online = FALSE; pcmk__clear_node_flags(this_node, pcmk__node_remote_reset); } else if (host && (host->details->online == FALSE) && host->details->unclean) { crm_trace("Guest node %s UNCLEAN because host is unclean", this_node->priv->id); this_node->details->online = FALSE; pcmk__set_node_flags(this_node, pcmk__node_remote_reset); } else { crm_trace("%s node %s is %s", node_type, this_node->priv->id, this_node->details->online? "ONLINE" : "OFFLINE"); } } static void determine_online_status(const xmlNode *node_state, pcmk_node_t *this_node, pcmk_scheduler_t *scheduler) { gboolean online = FALSE; const char *exp_state = crm_element_value(node_state, PCMK_XA_EXPECTED); CRM_CHECK(this_node != NULL, return); this_node->details->shutdown = FALSE; if (pe__shutdown_requested(this_node)) { this_node->details->shutdown = TRUE; } else if (pcmk__str_eq(exp_state, CRMD_JOINSTATE_MEMBER, pcmk__str_casei)) { pcmk__set_node_flags(this_node, pcmk__node_expected_up); } if (this_node->priv->variant == pcmk__node_variant_ping) { this_node->details->unclean = FALSE; online = FALSE; /* As far as resource management is concerned, * the node is safely offline. * Anyone caught abusing this logic will be shot */ } else if (!pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)) { online = determine_online_status_no_fencing(scheduler, node_state, this_node); } else { online = determine_online_status_fencing(scheduler, node_state, this_node); } if (online) { this_node->details->online = TRUE; } else { /* remove node from contention */ this_node->assign->score = -PCMK_SCORE_INFINITY; } if (online && this_node->details->shutdown) { /* don't run resources here */ this_node->assign->score = -PCMK_SCORE_INFINITY; } if (this_node->priv->variant == pcmk__node_variant_ping) { crm_info("%s is not a Pacemaker node", pcmk__node_name(this_node)); } else if (this_node->details->unclean) { pcmk__sched_warn(scheduler, "%s is unclean", pcmk__node_name(this_node)); } else if (!this_node->details->online) { crm_trace("%s is offline", pcmk__node_name(this_node)); } else if (this_node->details->shutdown) { crm_info("%s is shutting down", pcmk__node_name(this_node)); } else if (this_node->details->pending) { crm_info("%s is pending", pcmk__node_name(this_node)); } else if (pcmk_is_set(this_node->priv->flags, pcmk__node_standby)) { crm_info("%s is in standby", pcmk__node_name(this_node)); } else if (this_node->details->maintenance) { crm_info("%s is in maintenance", pcmk__node_name(this_node)); } else { crm_info("%s is online", pcmk__node_name(this_node)); } } /*! * \internal * \brief Find the end of a resource's name, excluding any clone suffix * * \param[in] id Resource ID to check * * \return Pointer to last character of resource's base name */ const char * pe_base_name_end(const char *id) { if (!pcmk__str_empty(id)) { const char *end = id + strlen(id) - 1; for (const char *s = end; s > id; --s) { switch (*s) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': break; case ':': return (s == end)? s : (s - 1); default: return end; } } return end; } return NULL; } /*! * \internal * \brief Get a resource name excluding any clone suffix * * \param[in] last_rsc_id Resource ID to check * * \return Pointer to newly allocated string with resource's base name * \note It is the caller's responsibility to free() the result. * This asserts on error, so callers can assume result is not NULL. */ char * clone_strip(const char *last_rsc_id) { const char *end = pe_base_name_end(last_rsc_id); char *basename = NULL; CRM_ASSERT(end); basename = strndup(last_rsc_id, end - last_rsc_id + 1); CRM_ASSERT(basename); return basename; } /*! * \internal * \brief Get the name of the first instance of a cloned resource * * \param[in] last_rsc_id Resource ID to check * * \return Pointer to newly allocated string with resource's base name plus :0 * \note It is the caller's responsibility to free() the result. * This asserts on error, so callers can assume result is not NULL. */ char * clone_zero(const char *last_rsc_id) { const char *end = pe_base_name_end(last_rsc_id); size_t base_name_len = end - last_rsc_id + 1; char *zero = NULL; CRM_ASSERT(end); zero = pcmk__assert_alloc(base_name_len + 3, sizeof(char)); memcpy(zero, last_rsc_id, base_name_len); zero[base_name_len] = ':'; zero[base_name_len + 1] = '0'; return zero; } static pcmk_resource_t * create_fake_resource(const char *rsc_id, const xmlNode *rsc_entry, pcmk_scheduler_t *scheduler) { pcmk_resource_t *rsc = NULL; xmlNode *xml_rsc = pcmk__xe_create(NULL, PCMK_XE_PRIMITIVE); pcmk__xe_copy_attrs(xml_rsc, rsc_entry, pcmk__xaf_none); crm_xml_add(xml_rsc, PCMK_XA_ID, rsc_id); crm_log_xml_debug(xml_rsc, "Orphan resource"); if (pe__unpack_resource(xml_rsc, &rsc, NULL, scheduler) != pcmk_rc_ok) { return NULL; } if (xml_contains_remote_node(xml_rsc)) { pcmk_node_t *node; crm_debug("Detected orphaned remote node %s", rsc_id); node = pcmk_find_node(scheduler, rsc_id); if (node == NULL) { node = pe_create_node(rsc_id, rsc_id, PCMK_VALUE_REMOTE, NULL, scheduler); } link_rsc2remotenode(scheduler, rsc); if (node) { crm_trace("Setting node %s as shutting down due to orphaned connection resource", rsc_id); node->details->shutdown = TRUE; } } if (crm_element_value(rsc_entry, PCMK__META_CONTAINER)) { // This removed resource needs to be mapped to a launcher crm_trace("Launched resource %s was removed from the configuration", rsc_id); pcmk__set_rsc_flags(rsc, pcmk__rsc_removed_launched); } pcmk__set_rsc_flags(rsc, pcmk__rsc_removed); scheduler->priv->resources = g_list_append(scheduler->priv->resources, rsc); return rsc; } /*! * \internal * \brief Create orphan instance for anonymous clone resource history * * \param[in,out] parent Clone resource that orphan will be added to * \param[in] rsc_id Orphan's resource ID * \param[in] node Where orphan is active (for logging only) * \param[in,out] scheduler Scheduler data * * \return Newly added orphaned instance of \p parent */ static pcmk_resource_t * create_anonymous_orphan(pcmk_resource_t *parent, const char *rsc_id, const pcmk_node_t *node, pcmk_scheduler_t *scheduler) { pcmk_resource_t *top = pe__create_clone_child(parent, scheduler); pcmk_resource_t *orphan = NULL; // find_rsc() because we might be a cloned group orphan = top->priv->fns->find_rsc(top, rsc_id, NULL, pcmk_rsc_match_clone_only); pcmk__rsc_debug(parent, "Created orphan %s for %s: %s on %s", top->id, parent->id, rsc_id, pcmk__node_name(node)); return orphan; } /*! * \internal * \brief Check a node for an instance of an anonymous clone * * Return a child instance of the specified anonymous clone, in order of * preference: (1) the instance running on the specified node, if any; * (2) an inactive instance (i.e. within the total of \c PCMK_META_CLONE_MAX * instances); (3) a newly created orphan (that is, \c PCMK_META_CLONE_MAX * instances are already active). * * \param[in,out] scheduler Scheduler data * \param[in] node Node on which to check for instance * \param[in,out] parent Clone to check * \param[in] rsc_id Name of cloned resource in history (no instance) */ static pcmk_resource_t * find_anonymous_clone(pcmk_scheduler_t *scheduler, const pcmk_node_t *node, pcmk_resource_t *parent, const char *rsc_id) { GList *rIter = NULL; pcmk_resource_t *rsc = NULL; pcmk_resource_t *inactive_instance = NULL; gboolean skip_inactive = FALSE; CRM_ASSERT(pcmk__is_anonymous_clone(parent)); // Check for active (or partially active, for cloned groups) instance pcmk__rsc_trace(parent, "Looking for %s on %s in %s", rsc_id, pcmk__node_name(node), parent->id); for (rIter = parent->priv->children; (rIter != NULL) && (rsc == NULL); rIter = rIter->next) { GList *locations = NULL; pcmk_resource_t *child = rIter->data; /* Check whether this instance is already known to be active or pending * anywhere, at this stage of unpacking. Because this function is called * for a resource before the resource's individual operation history * entries are unpacked, locations will generally not contain the * desired node. * * However, there are three exceptions: * (1) when child is a cloned group and we have already unpacked the * history of another member of the group on the same node; * (2) when we've already unpacked the history of another numbered * instance on the same node (which can happen if * PCMK_META_GLOBALLY_UNIQUE was flipped from true to false); and * (3) when we re-run calculations on the same scheduler data as part of * a simulation. */ child->priv->fns->location(child, &locations, 2); if (locations) { /* We should never associate the same numbered anonymous clone * instance with multiple nodes, and clone instances can't migrate, * so there must be only one location, regardless of history. */ CRM_LOG_ASSERT(locations->next == NULL); if (pcmk__same_node((pcmk_node_t *) locations->data, node)) { /* This child instance is active on the requested node, so check * for a corresponding configured resource. We use find_rsc() * instead of child because child may be a cloned group, and we * need the particular member corresponding to rsc_id. * * If the history entry is orphaned, rsc will be NULL. */ rsc = parent->priv->fns->find_rsc(child, rsc_id, NULL, pcmk_rsc_match_clone_only); if (rsc) { /* If there are multiple instance history entries for an * anonymous clone in a single node's history (which can * happen if PCMK_META_GLOBALLY_UNIQUE is switched from true * to false), we want to consider the instances beyond the * first as orphans, even if there are inactive instance * numbers available. */ if (rsc->priv->active_nodes != NULL) { crm_notice("Active (now-)anonymous clone %s has " "multiple (orphan) instance histories on %s", parent->id, pcmk__node_name(node)); skip_inactive = TRUE; rsc = NULL; } else { pcmk__rsc_trace(parent, "Resource %s, active", rsc->id); } } } g_list_free(locations); } else { pcmk__rsc_trace(parent, "Resource %s, skip inactive", child->id); if (!skip_inactive && !inactive_instance && !pcmk_is_set(child->flags, pcmk__rsc_blocked)) { // Remember one inactive instance in case we don't find active inactive_instance = parent->priv->fns->find_rsc(child, rsc_id, NULL, pcmk_rsc_match_clone_only); /* ... but don't use it if it was already associated with a * pending action on another node */ if (inactive_instance != NULL) { const pcmk_node_t *pending_node = NULL; pending_node = inactive_instance->priv->pending_node; if ((pending_node != NULL) && !pcmk__same_node(pending_node, node)) { inactive_instance = NULL; } } } } } if ((rsc == NULL) && !skip_inactive && (inactive_instance != NULL)) { pcmk__rsc_trace(parent, "Resource %s, empty slot", inactive_instance->id); rsc = inactive_instance; } /* If the resource has PCMK_META_REQUIRES set to PCMK_VALUE_QUORUM or * PCMK_VALUE_NOTHING, and we don't have a clone instance for every node, we * don't want to consume a valid instance number for unclean nodes. Such * instances may appear to be active according to the history, but should be * considered inactive, so we can start an instance elsewhere. Treat such * instances as orphans. * * An exception is instances running on guest nodes -- since guest node * "fencing" is actually just a resource stop, requires shouldn't apply. * * @TODO Ideally, we'd use an inactive instance number if it is not needed * for any clean instances. However, we don't know that at this point. */ if ((rsc != NULL) && !pcmk_is_set(rsc->flags, pcmk__rsc_needs_fencing) && (!node->details->online || node->details->unclean) && !pcmk__is_guest_or_bundle_node(node) && !pe__is_universal_clone(parent, scheduler)) { rsc = NULL; } if (rsc == NULL) { rsc = create_anonymous_orphan(parent, rsc_id, node, scheduler); pcmk__rsc_trace(parent, "Resource %s, orphan", rsc->id); } return rsc; } static pcmk_resource_t * unpack_find_resource(pcmk_scheduler_t *scheduler, const pcmk_node_t *node, const char *rsc_id) { pcmk_resource_t *rsc = NULL; pcmk_resource_t *parent = NULL; crm_trace("looking for %s", rsc_id); rsc = pe_find_resource(scheduler->priv->resources, rsc_id); if (rsc == NULL) { /* If we didn't find the resource by its name in the operation history, * check it again as a clone instance. Even when PCMK_META_CLONE_MAX=0, * we create a single :0 orphan to match against here. */ char *clone0_id = clone_zero(rsc_id); pcmk_resource_t *clone0 = pe_find_resource(scheduler->priv->resources, clone0_id); if (clone0 && !pcmk_is_set(clone0->flags, pcmk__rsc_unique)) { rsc = clone0; parent = uber_parent(clone0); crm_trace("%s found as %s (%s)", rsc_id, clone0_id, parent->id); } else { crm_trace("%s is not known as %s either (orphan)", rsc_id, clone0_id); } free(clone0_id); } else if (rsc->priv->variant > pcmk__rsc_variant_primitive) { crm_trace("Resource history for %s is orphaned " "because it is no longer primitive", rsc_id); return NULL; } else { parent = uber_parent(rsc); } if (pcmk__is_anonymous_clone(parent)) { if (pcmk__is_bundled(parent)) { rsc = pe__find_bundle_replica(parent->priv->parent, node); } else { char *base = clone_strip(rsc_id); rsc = find_anonymous_clone(scheduler, node, parent, base); free(base); CRM_ASSERT(rsc != NULL); } } if (rsc && !pcmk__str_eq(rsc_id, rsc->id, pcmk__str_none) && !pcmk__str_eq(rsc_id, rsc->priv->history_id, pcmk__str_none)) { pcmk__str_update(&(rsc->priv->history_id), rsc_id); pcmk__rsc_debug(rsc, "Internally renamed %s on %s to %s%s", rsc_id, pcmk__node_name(node), rsc->id, pcmk_is_set(rsc->flags, pcmk__rsc_removed)? " (ORPHAN)" : ""); } return rsc; } static pcmk_resource_t * process_orphan_resource(const xmlNode *rsc_entry, const pcmk_node_t *node, pcmk_scheduler_t *scheduler) { pcmk_resource_t *rsc = NULL; const char *rsc_id = crm_element_value(rsc_entry, PCMK_XA_ID); crm_debug("Detected orphan resource %s on %s", rsc_id, pcmk__node_name(node)); rsc = create_fake_resource(rsc_id, rsc_entry, scheduler); if (rsc == NULL) { return NULL; } if (!pcmk_is_set(scheduler->flags, pcmk__sched_stop_removed_resources)) { pcmk__clear_rsc_flags(rsc, pcmk__rsc_managed); } else { CRM_CHECK(rsc != NULL, return NULL); pcmk__rsc_trace(rsc, "Added orphan %s", rsc->id); resource_location(rsc, NULL, -PCMK_SCORE_INFINITY, "__orphan_do_not_run__", scheduler); } return rsc; } static void process_rsc_state(pcmk_resource_t *rsc, pcmk_node_t *node, enum pcmk__on_fail on_fail) { pcmk_node_t *tmpnode = NULL; char *reason = NULL; enum pcmk__on_fail save_on_fail = pcmk__on_fail_ignore; pcmk_scheduler_t *scheduler = NULL; bool known_active = false; CRM_ASSERT(rsc); scheduler = rsc->priv->scheduler; known_active = (rsc->priv->orig_role > pcmk_role_stopped); pcmk__rsc_trace(rsc, "Resource %s is %s on %s: on_fail=%s", rsc->id, pcmk_role_text(rsc->priv->orig_role), pcmk__node_name(node), pcmk__on_fail_text(on_fail)); /* process current state */ if (rsc->priv->orig_role != pcmk_role_unknown) { pcmk_resource_t *iter = rsc; while (iter) { if (g_hash_table_lookup(iter->priv->probed_nodes, node->priv->id) == NULL) { pcmk_node_t *n = pe__copy_node(node); pcmk__rsc_trace(rsc, "%s (%s in history) known on %s", rsc->id, pcmk__s(rsc->priv->history_id, "the same"), pcmk__node_name(n)); g_hash_table_insert(iter->priv->probed_nodes, (gpointer) n->priv->id, n); } if (pcmk_is_set(iter->flags, pcmk__rsc_unique)) { break; } iter = iter->priv->parent; } } /* If a managed resource is believed to be running, but node is down ... */ if (known_active && !node->details->online && !node->details->maintenance && pcmk_is_set(rsc->flags, pcmk__rsc_managed)) { gboolean should_fence = FALSE; /* If this is a guest node, fence it (regardless of whether fencing is * enabled, because guest node fencing is done by recovery of the * container resource rather than by the fencer). Mark the resource * we're processing as failed. When the guest comes back up, its * operation history in the CIB will be cleared, freeing the affected * resource to run again once we are sure we know its state. */ if (pcmk__is_guest_or_bundle_node(node)) { pcmk__set_rsc_flags(rsc, pcmk__rsc_failed|pcmk__rsc_stop_if_failed); should_fence = TRUE; } else if (pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)) { if (pcmk__is_remote_node(node) && (node->priv->remote != NULL) && !pcmk_is_set(node->priv->remote->flags, pcmk__rsc_failed)) { /* Setting unseen means that fencing of the remote node will * occur only if the connection resource is not going to start * somewhere. This allows connection resources on a failed * cluster node to move to another node without requiring the * remote nodes to be fenced as well. */ pcmk__clear_node_flags(node, pcmk__node_seen); reason = crm_strdup_printf("%s is active there (fencing will be" " revoked if remote connection can " "be re-established elsewhere)", rsc->id); } should_fence = TRUE; } if (should_fence) { if (reason == NULL) { reason = crm_strdup_printf("%s is thought to be active there", rsc->id); } pe_fence_node(scheduler, node, reason, FALSE); } free(reason); } /* In order to calculate priority_fencing_delay correctly, save the failure information and pass it to native_add_running(). */ save_on_fail = on_fail; if (node->details->unclean) { /* No extra processing needed * Also allows resources to be started again after a node is shot */ on_fail = pcmk__on_fail_ignore; } switch (on_fail) { case pcmk__on_fail_ignore: /* nothing to do */ break; case pcmk__on_fail_demote: pcmk__set_rsc_flags(rsc, pcmk__rsc_failed); demote_action(rsc, node, FALSE); break; case pcmk__on_fail_fence_node: /* treat it as if it is still running * but also mark the node as unclean */ reason = crm_strdup_printf("%s failed there", rsc->id); pe_fence_node(scheduler, node, reason, FALSE); free(reason); break; case pcmk__on_fail_standby_node: pcmk__set_node_flags(node, pcmk__node_standby|pcmk__node_fail_standby); break; case pcmk__on_fail_block: /* is_managed == FALSE will prevent any * actions being sent for the resource */ pcmk__clear_rsc_flags(rsc, pcmk__rsc_managed); pcmk__set_rsc_flags(rsc, pcmk__rsc_blocked); break; case pcmk__on_fail_ban: /* make sure it comes up somewhere else * or not at all */ resource_location(rsc, node, -PCMK_SCORE_INFINITY, "__action_migration_auto__", scheduler); break; case pcmk__on_fail_stop: pe__set_next_role(rsc, pcmk_role_stopped, PCMK_META_ON_FAIL "=" PCMK_VALUE_STOP); break; case pcmk__on_fail_restart: if (known_active) { pcmk__set_rsc_flags(rsc, pcmk__rsc_failed|pcmk__rsc_stop_if_failed); stop_action(rsc, node, FALSE); } break; case pcmk__on_fail_restart_container: pcmk__set_rsc_flags(rsc, pcmk__rsc_failed|pcmk__rsc_stop_if_failed); if ((rsc->priv->launcher != NULL) && pcmk__is_bundled(rsc)) { /* A bundle's remote connection can run on a different node than * the bundle's container. We don't necessarily know where the * container is running yet, so remember it and add a stop * action for it later. */ scheduler->priv->stop_needed = g_list_prepend(scheduler->priv->stop_needed, rsc->priv->launcher); } else if (rsc->priv->launcher != NULL) { stop_action(rsc->priv->launcher, node, FALSE); } else if (known_active) { stop_action(rsc, node, FALSE); } break; case pcmk__on_fail_reset_remote: pcmk__set_rsc_flags(rsc, pcmk__rsc_failed|pcmk__rsc_stop_if_failed); if (pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)) { tmpnode = NULL; if (pcmk_is_set(rsc->flags, pcmk__rsc_is_remote_connection)) { tmpnode = pcmk_find_node(scheduler, rsc->id); } if (pcmk__is_remote_node(tmpnode) && !pcmk_is_set(tmpnode->priv->flags, pcmk__node_remote_fenced)) { /* The remote connection resource failed in a way that * should result in fencing the remote node. */ pe_fence_node(scheduler, tmpnode, "remote connection is unrecoverable", FALSE); } } /* require the stop action regardless if fencing is occurring or not. */ if (known_active) { stop_action(rsc, node, FALSE); } /* if reconnect delay is in use, prevent the connection from exiting the * "STOPPED" role until the failure is cleared by the delay timeout. */ if (rsc->priv->remote_reconnect_ms > 0U) { pe__set_next_role(rsc, pcmk_role_stopped, "remote reset"); } break; } /* Ensure a remote connection failure forces an unclean Pacemaker Remote * node to be fenced. By marking the node as seen, the failure will result * in a fencing operation regardless if we're going to attempt to reconnect * in this transition. */ if (pcmk_all_flags_set(rsc->flags, pcmk__rsc_failed|pcmk__rsc_is_remote_connection)) { tmpnode = pcmk_find_node(scheduler, rsc->id); if (tmpnode && tmpnode->details->unclean) { pcmk__set_node_flags(tmpnode, pcmk__node_seen); } } if (known_active) { if (pcmk_is_set(rsc->flags, pcmk__rsc_removed)) { if (pcmk_is_set(rsc->flags, pcmk__rsc_managed)) { crm_notice("Removed resource %s is active on %s and will be " "stopped when possible", rsc->id, pcmk__node_name(node)); } else { crm_notice("Removed resource %s must be stopped manually on %s " "because " PCMK_OPT_STOP_ORPHAN_RESOURCES " is set to false", rsc->id, pcmk__node_name(node)); } } native_add_running(rsc, node, scheduler, (save_on_fail != pcmk__on_fail_ignore)); switch (on_fail) { case pcmk__on_fail_ignore: break; case pcmk__on_fail_demote: case pcmk__on_fail_block: pcmk__set_rsc_flags(rsc, pcmk__rsc_failed); break; default: pcmk__set_rsc_flags(rsc, pcmk__rsc_failed|pcmk__rsc_stop_if_failed); break; } } else if ((rsc->priv->history_id != NULL) && (strchr(rsc->priv->history_id, ':') != NULL)) { /* Only do this for older status sections that included instance numbers * Otherwise stopped instances will appear as orphans */ pcmk__rsc_trace(rsc, "Clearing history ID %s for %s (stopped)", rsc->priv->history_id, rsc->id); free(rsc->priv->history_id); rsc->priv->history_id = NULL; } else { GList *possible_matches = pe__resource_actions(rsc, node, PCMK_ACTION_STOP, FALSE); GList *gIter = possible_matches; for (; gIter != NULL; gIter = gIter->next) { pcmk_action_t *stop = (pcmk_action_t *) gIter->data; pcmk__set_action_flags(stop, pcmk__action_optional); } g_list_free(possible_matches); } /* A successful stop after migrate_to on the migration source doesn't make * the partially migrated resource stopped on the migration target. */ if ((rsc->priv->orig_role == pcmk_role_stopped) && (rsc->priv->active_nodes != NULL) && (rsc->priv->partial_migration_target != NULL) && pcmk__same_node(rsc->priv->partial_migration_source, node)) { rsc->priv->orig_role = pcmk_role_started; } } /* create active recurring operations as optional */ static void process_recurring(pcmk_node_t *node, pcmk_resource_t *rsc, int start_index, int stop_index, GList *sorted_op_list, pcmk_scheduler_t *scheduler) { int counter = -1; const char *task = NULL; const char *status = NULL; GList *gIter = sorted_op_list; CRM_ASSERT(rsc); pcmk__rsc_trace(rsc, "%s: Start index %d, stop index = %d", rsc->id, start_index, stop_index); for (; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; guint interval_ms = 0; char *key = NULL; const char *id = pcmk__xe_id(rsc_op); counter++; if (node->details->online == FALSE) { pcmk__rsc_trace(rsc, "Skipping %s on %s: node is offline", rsc->id, pcmk__node_name(node)); break; /* Need to check if there's a monitor for role="Stopped" */ } else if (start_index < stop_index && counter <= stop_index) { pcmk__rsc_trace(rsc, "Skipping %s on %s: resource is not active", id, pcmk__node_name(node)); continue; } else if (counter < start_index) { pcmk__rsc_trace(rsc, "Skipping %s on %s: old %d", id, pcmk__node_name(node), counter); continue; } crm_element_value_ms(rsc_op, PCMK_META_INTERVAL, &interval_ms); if (interval_ms == 0) { pcmk__rsc_trace(rsc, "Skipping %s on %s: non-recurring", id, pcmk__node_name(node)); continue; } status = crm_element_value(rsc_op, PCMK__XA_OP_STATUS); if (pcmk__str_eq(status, "-1", pcmk__str_casei)) { pcmk__rsc_trace(rsc, "Skipping %s on %s: status", id, pcmk__node_name(node)); continue; } task = crm_element_value(rsc_op, PCMK_XA_OPERATION); /* create the action */ key = pcmk__op_key(rsc->id, task, interval_ms); pcmk__rsc_trace(rsc, "Creating %s on %s", key, pcmk__node_name(node)); custom_action(rsc, key, task, node, TRUE, scheduler); } } void calculate_active_ops(const GList *sorted_op_list, int *start_index, int *stop_index) { int counter = -1; int implied_monitor_start = -1; int implied_clone_start = -1; const char *task = NULL; const char *status = NULL; *stop_index = -1; *start_index = -1; for (const GList *iter = sorted_op_list; iter != NULL; iter = iter->next) { const xmlNode *rsc_op = (const xmlNode *) iter->data; counter++; task = crm_element_value(rsc_op, PCMK_XA_OPERATION); status = crm_element_value(rsc_op, PCMK__XA_OP_STATUS); if (pcmk__str_eq(task, PCMK_ACTION_STOP, pcmk__str_casei) && pcmk__str_eq(status, "0", pcmk__str_casei)) { *stop_index = counter; } else if (pcmk__strcase_any_of(task, PCMK_ACTION_START, PCMK_ACTION_MIGRATE_FROM, NULL)) { *start_index = counter; } else if ((implied_monitor_start <= *stop_index) && pcmk__str_eq(task, PCMK_ACTION_MONITOR, pcmk__str_casei)) { const char *rc = crm_element_value(rsc_op, PCMK__XA_RC_CODE); if (pcmk__strcase_any_of(rc, "0", "8", NULL)) { implied_monitor_start = counter; } } else if (pcmk__strcase_any_of(task, PCMK_ACTION_PROMOTE, PCMK_ACTION_DEMOTE, NULL)) { implied_clone_start = counter; } } if (*start_index == -1) { if (implied_clone_start != -1) { *start_index = implied_clone_start; } else if (implied_monitor_start != -1) { *start_index = implied_monitor_start; } } } // If resource history entry has shutdown lock, remember lock node and time static void unpack_shutdown_lock(const xmlNode *rsc_entry, pcmk_resource_t *rsc, const pcmk_node_t *node, pcmk_scheduler_t *scheduler) { time_t lock_time = 0; // When lock started (i.e. node shutdown time) if ((crm_element_value_epoch(rsc_entry, PCMK_OPT_SHUTDOWN_LOCK, &lock_time) == pcmk_ok) && (lock_time != 0)) { if ((scheduler->priv->shutdown_lock_ms > 0U) && (get_effective_time(scheduler) > (lock_time + (scheduler->priv->shutdown_lock_ms / 1000U)))) { pcmk__rsc_info(rsc, "Shutdown lock for %s on %s expired", rsc->id, pcmk__node_name(node)); pe__clear_resource_history(rsc, node); } else { rsc->priv->lock_node = node; rsc->priv->lock_time = lock_time; } } } /*! * \internal * \brief Unpack one \c PCMK__XE_LRM_RESOURCE entry from a node's CIB status * * \param[in,out] node Node whose status is being unpacked * \param[in] rsc_entry \c PCMK__XE_LRM_RESOURCE XML being unpacked * \param[in,out] scheduler Scheduler data * * \return Resource corresponding to the entry, or NULL if no operation history */ static pcmk_resource_t * unpack_lrm_resource(pcmk_node_t *node, const xmlNode *lrm_resource, pcmk_scheduler_t *scheduler) { GList *gIter = NULL; int stop_index = -1; int start_index = -1; enum rsc_role_e req_role = pcmk_role_unknown; const char *rsc_id = pcmk__xe_id(lrm_resource); pcmk_resource_t *rsc = NULL; GList *op_list = NULL; GList *sorted_op_list = NULL; xmlNode *rsc_op = NULL; xmlNode *last_failure = NULL; enum pcmk__on_fail on_fail = pcmk__on_fail_ignore; enum rsc_role_e saved_role = pcmk_role_unknown; if (rsc_id == NULL) { pcmk__config_err("Ignoring invalid " PCMK__XE_LRM_RESOURCE " entry: No " PCMK_XA_ID); crm_log_xml_info(lrm_resource, "missing-id"); return NULL; } crm_trace("Unpacking " PCMK__XE_LRM_RESOURCE " for %s on %s", rsc_id, pcmk__node_name(node)); /* Build a list of individual PCMK__XE_LRM_RSC_OP entries, so we can sort * them */ for (rsc_op = pcmk__xe_first_child(lrm_resource, PCMK__XE_LRM_RSC_OP, NULL, NULL); rsc_op != NULL; rsc_op = pcmk__xe_next_same(rsc_op)) { op_list = g_list_prepend(op_list, rsc_op); } if (!pcmk_is_set(scheduler->flags, pcmk__sched_shutdown_lock)) { if (op_list == NULL) { // If there are no operations, there is nothing to do return NULL; } } /* find the resource */ rsc = unpack_find_resource(scheduler, node, rsc_id); if (rsc == NULL) { if (op_list == NULL) { // If there are no operations, there is nothing to do return NULL; } else { rsc = process_orphan_resource(lrm_resource, node, scheduler); } } CRM_ASSERT(rsc != NULL); // Check whether the resource is "shutdown-locked" to this node if (pcmk_is_set(scheduler->flags, pcmk__sched_shutdown_lock)) { unpack_shutdown_lock(lrm_resource, rsc, node, scheduler); } /* process operations */ saved_role = rsc->priv->orig_role; rsc->priv->orig_role = pcmk_role_unknown; sorted_op_list = g_list_sort(op_list, sort_op_by_callid); for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; unpack_rsc_op(rsc, node, rsc_op, &last_failure, &on_fail); } /* create active recurring operations as optional */ calculate_active_ops(sorted_op_list, &start_index, &stop_index); process_recurring(node, rsc, start_index, stop_index, sorted_op_list, scheduler); /* no need to free the contents */ g_list_free(sorted_op_list); process_rsc_state(rsc, node, on_fail); if (get_target_role(rsc, &req_role)) { if ((rsc->priv->next_role == pcmk_role_unknown) || (req_role < rsc->priv->next_role)) { pe__set_next_role(rsc, req_role, PCMK_META_TARGET_ROLE); } else if (req_role > rsc->priv->next_role) { pcmk__rsc_info(rsc, "%s: Not overwriting calculated next role %s" " with requested next role %s", rsc->id, pcmk_role_text(rsc->priv->next_role), pcmk_role_text(req_role)); } } if (saved_role > rsc->priv->orig_role) { rsc->priv->orig_role = saved_role; } return rsc; } static void handle_removed_launched_resources(const xmlNode *lrm_rsc_list, pcmk_scheduler_t *scheduler) { for (const xmlNode *rsc_entry = pcmk__xe_first_child(lrm_rsc_list, NULL, NULL, NULL); rsc_entry != NULL; rsc_entry = pcmk__xe_next(rsc_entry)) { pcmk_resource_t *rsc; pcmk_resource_t *launcher = NULL; const char *rsc_id; const char *launcher_id = NULL; if (!pcmk__xe_is(rsc_entry, PCMK__XE_LRM_RESOURCE)) { continue; } launcher_id = crm_element_value(rsc_entry, PCMK__META_CONTAINER); rsc_id = crm_element_value(rsc_entry, PCMK_XA_ID); if ((launcher_id == NULL) || (rsc_id == NULL)) { continue; } launcher = pe_find_resource(scheduler->priv->resources, launcher_id); if (launcher == NULL) { continue; } rsc = pe_find_resource(scheduler->priv->resources, rsc_id); if ((rsc == NULL) || (rsc->priv->launcher != NULL) || !pcmk_is_set(rsc->flags, pcmk__rsc_removed_launched)) { continue; } pcmk__rsc_trace(rsc, "Mapped launcher of removed resource %s to %s", rsc->id, launcher_id); rsc->priv->launcher = launcher; launcher->priv->launched = g_list_append(launcher->priv->launched, rsc); } } /*! * \internal * \brief Unpack one node's lrm status section * * \param[in,out] node Node whose status is being unpacked * \param[in] xml CIB node state XML * \param[in,out] scheduler Scheduler data */ static void unpack_node_lrm(pcmk_node_t *node, const xmlNode *xml, pcmk_scheduler_t *scheduler) { bool found_removed_launched_resource = false; // Drill down to PCMK__XE_LRM_RESOURCES section xml = pcmk__xe_first_child(xml, PCMK__XE_LRM, NULL, NULL); if (xml == NULL) { return; } xml = pcmk__xe_first_child(xml, PCMK__XE_LRM_RESOURCES, NULL, NULL); if (xml == NULL) { return; } // Unpack each PCMK__XE_LRM_RESOURCE entry for (const xmlNode *rsc_entry = pcmk__xe_first_child(xml, PCMK__XE_LRM_RESOURCE, NULL, NULL); rsc_entry != NULL; rsc_entry = pcmk__xe_next_same(rsc_entry)) { pcmk_resource_t *rsc = unpack_lrm_resource(node, rsc_entry, scheduler); if ((rsc != NULL) && pcmk_is_set(rsc->flags, pcmk__rsc_removed_launched)) { found_removed_launched_resource = true; } } /* Now that all resource state has been unpacked for this node, map any * removed launched resources to their launchers. */ if (found_removed_launched_resource) { handle_removed_launched_resources(xml, scheduler); } } static void set_active(pcmk_resource_t *rsc) { const pcmk_resource_t *top = pe__const_top_resource(rsc, false); if (top && pcmk_is_set(top->flags, pcmk__rsc_promotable)) { rsc->priv->orig_role = pcmk_role_unpromoted; } else { rsc->priv->orig_role = pcmk_role_started; } } static void set_node_score(gpointer key, gpointer value, gpointer user_data) { pcmk_node_t *node = value; int *score = user_data; node->assign->score = *score; } #define XPATH_NODE_STATE "/" PCMK_XE_CIB "/" PCMK_XE_STATUS \ "/" PCMK__XE_NODE_STATE #define SUB_XPATH_LRM_RESOURCE "/" PCMK__XE_LRM \ "/" PCMK__XE_LRM_RESOURCES \ "/" PCMK__XE_LRM_RESOURCE #define SUB_XPATH_LRM_RSC_OP "/" PCMK__XE_LRM_RSC_OP static xmlNode * find_lrm_op(const char *resource, const char *op, const char *node, const char *source, int target_rc, pcmk_scheduler_t *scheduler) { GString *xpath = NULL; xmlNode *xml = NULL; CRM_CHECK((resource != NULL) && (op != NULL) && (node != NULL), return NULL); xpath = g_string_sized_new(256); pcmk__g_strcat(xpath, XPATH_NODE_STATE "[@" PCMK_XA_UNAME "='", node, "']" SUB_XPATH_LRM_RESOURCE "[@" PCMK_XA_ID "='", resource, "']" SUB_XPATH_LRM_RSC_OP "[@" PCMK_XA_OPERATION "='", op, "'", NULL); /* Need to check against transition_magic too? */ if ((source != NULL) && (strcmp(op, PCMK_ACTION_MIGRATE_TO) == 0)) { pcmk__g_strcat(xpath, " and @" PCMK__META_MIGRATE_TARGET "='", source, "']", NULL); } else if ((source != NULL) && (strcmp(op, PCMK_ACTION_MIGRATE_FROM) == 0)) { pcmk__g_strcat(xpath, " and @" PCMK__META_MIGRATE_SOURCE "='", source, "']", NULL); } else { g_string_append_c(xpath, ']'); } xml = get_xpath_object((const char *) xpath->str, scheduler->input, LOG_DEBUG); g_string_free(xpath, TRUE); if (xml && target_rc >= 0) { int rc = PCMK_OCF_UNKNOWN_ERROR; int status = PCMK_EXEC_ERROR; crm_element_value_int(xml, PCMK__XA_RC_CODE, &rc); crm_element_value_int(xml, PCMK__XA_OP_STATUS, &status); if ((rc != target_rc) || (status != PCMK_EXEC_DONE)) { return NULL; } } return xml; } static xmlNode * find_lrm_resource(const char *rsc_id, const char *node_name, pcmk_scheduler_t *scheduler) { GString *xpath = NULL; xmlNode *xml = NULL; CRM_CHECK((rsc_id != NULL) && (node_name != NULL), return NULL); xpath = g_string_sized_new(256); pcmk__g_strcat(xpath, XPATH_NODE_STATE "[@" PCMK_XA_UNAME "='", node_name, "']" SUB_XPATH_LRM_RESOURCE "[@" PCMK_XA_ID "='", rsc_id, "']", NULL); xml = get_xpath_object((const char *) xpath->str, scheduler->input, LOG_DEBUG); g_string_free(xpath, TRUE); return xml; } /*! * \internal * \brief Check whether a resource has no completed action history on a node * * \param[in,out] rsc Resource to check * \param[in] node_name Node to check * * \return true if \p rsc_id is unknown on \p node_name, otherwise false */ static bool unknown_on_node(pcmk_resource_t *rsc, const char *node_name) { bool result = false; xmlXPathObjectPtr search; char *xpath = NULL; xpath = crm_strdup_printf(XPATH_NODE_STATE "[@" PCMK_XA_UNAME "='%s']" SUB_XPATH_LRM_RESOURCE "[@" PCMK_XA_ID "='%s']" SUB_XPATH_LRM_RSC_OP "[@" PCMK__XA_RC_CODE "!='%d']", node_name, rsc->id, PCMK_OCF_UNKNOWN); search = xpath_search(rsc->priv->scheduler->input, xpath); result = (numXpathResults(search) == 0); freeXpathObject(search); free(xpath); return result; } /*! * \internal * \brief Check whether a probe/monitor indicating the resource was not running * on a node happened after some event * * \param[in] rsc_id Resource being checked * \param[in] node_name Node being checked * \param[in] xml_op Event that monitor is being compared to * \param[in,out] scheduler Scheduler data * * \return true if such a monitor happened after event, false otherwise */ static bool monitor_not_running_after(const char *rsc_id, const char *node_name, const xmlNode *xml_op, pcmk_scheduler_t *scheduler) { /* Any probe/monitor operation on the node indicating it was not running * there */ xmlNode *monitor = find_lrm_op(rsc_id, PCMK_ACTION_MONITOR, node_name, NULL, PCMK_OCF_NOT_RUNNING, scheduler); return (monitor != NULL) && (pe__is_newer_op(monitor, xml_op) > 0); } /*! * \internal * \brief Check whether any non-monitor operation on a node happened after some * event * * \param[in] rsc_id Resource being checked * \param[in] node_name Node being checked * \param[in] xml_op Event that non-monitor is being compared to * \param[in,out] scheduler Scheduler data * * \return true if such a operation happened after event, false otherwise */ static bool non_monitor_after(const char *rsc_id, const char *node_name, const xmlNode *xml_op, pcmk_scheduler_t *scheduler) { xmlNode *lrm_resource = NULL; lrm_resource = find_lrm_resource(rsc_id, node_name, scheduler); if (lrm_resource == NULL) { return false; } for (xmlNode *op = pcmk__xe_first_child(lrm_resource, PCMK__XE_LRM_RSC_OP, NULL, NULL); op != NULL; op = pcmk__xe_next_same(op)) { const char * task = NULL; if (op == xml_op) { continue; } task = crm_element_value(op, PCMK_XA_OPERATION); if (pcmk__str_any_of(task, PCMK_ACTION_START, PCMK_ACTION_STOP, PCMK_ACTION_MIGRATE_TO, PCMK_ACTION_MIGRATE_FROM, NULL) && pe__is_newer_op(op, xml_op) > 0) { return true; } } return false; } /*! * \internal * \brief Check whether the resource has newer state on a node after a migration * attempt * * \param[in] rsc_id Resource being checked * \param[in] node_name Node being checked * \param[in] migrate_to Any migrate_to event that is being compared to * \param[in] migrate_from Any migrate_from event that is being compared to * \param[in,out] scheduler Scheduler data * * \return true if such a operation happened after event, false otherwise */ static bool newer_state_after_migrate(const char *rsc_id, const char *node_name, const xmlNode *migrate_to, const xmlNode *migrate_from, pcmk_scheduler_t *scheduler) { const xmlNode *xml_op = (migrate_from != NULL)? migrate_from : migrate_to; const char *source = crm_element_value(xml_op, PCMK__META_MIGRATE_SOURCE); /* It's preferred to compare to the migrate event on the same node if * existing, since call ids are more reliable. */ if ((xml_op != migrate_to) && (migrate_to != NULL) && pcmk__str_eq(node_name, source, pcmk__str_casei)) { xml_op = migrate_to; } /* If there's any newer non-monitor operation on the node, or any newer * probe/monitor operation on the node indicating it was not running there, * the migration events potentially no longer matter for the node. */ return non_monitor_after(rsc_id, node_name, xml_op, scheduler) || monitor_not_running_after(rsc_id, node_name, xml_op, scheduler); } /*! * \internal * \brief Parse migration source and target node names from history entry * * \param[in] entry Resource history entry for a migration action * \param[in] source_node If not NULL, source must match this node * \param[in] target_node If not NULL, target must match this node * \param[out] source_name Where to store migration source node name * \param[out] target_name Where to store migration target node name * * \return Standard Pacemaker return code */ static int get_migration_node_names(const xmlNode *entry, const pcmk_node_t *source_node, const pcmk_node_t *target_node, const char **source_name, const char **target_name) { *source_name = crm_element_value(entry, PCMK__META_MIGRATE_SOURCE); *target_name = crm_element_value(entry, PCMK__META_MIGRATE_TARGET); if ((*source_name == NULL) || (*target_name == NULL)) { pcmk__config_err("Ignoring resource history entry %s without " PCMK__META_MIGRATE_SOURCE " and " PCMK__META_MIGRATE_TARGET, pcmk__xe_id(entry)); return pcmk_rc_unpack_error; } if ((source_node != NULL) && !pcmk__str_eq(*source_name, source_node->priv->name, pcmk__str_casei|pcmk__str_null_matches)) { pcmk__config_err("Ignoring resource history entry %s because " PCMK__META_MIGRATE_SOURCE "='%s' does not match %s", pcmk__xe_id(entry), *source_name, pcmk__node_name(source_node)); return pcmk_rc_unpack_error; } if ((target_node != NULL) && !pcmk__str_eq(*target_name, target_node->priv->name, pcmk__str_casei|pcmk__str_null_matches)) { pcmk__config_err("Ignoring resource history entry %s because " PCMK__META_MIGRATE_TARGET "='%s' does not match %s", pcmk__xe_id(entry), *target_name, pcmk__node_name(target_node)); return pcmk_rc_unpack_error; } return pcmk_rc_ok; } /* * \internal * \brief Add a migration source to a resource's list of dangling migrations * * If the migrate_to and migrate_from actions in a live migration both * succeeded, but there is no stop on the source, the migration is considered * "dangling." Add the source to the resource's dangling migration list, which * will be used to schedule a stop on the source without affecting the target. * * \param[in,out] rsc Resource involved in migration * \param[in] node Migration source */ static void add_dangling_migration(pcmk_resource_t *rsc, const pcmk_node_t *node) { pcmk__rsc_trace(rsc, "Dangling migration of %s requires stop on %s", rsc->id, pcmk__node_name(node)); rsc->priv->orig_role = pcmk_role_stopped; rsc->priv->dangling_migration_sources = g_list_prepend(rsc->priv->dangling_migration_sources, (gpointer) node); } /*! * \internal * \brief Update resource role etc. after a successful migrate_to action * * \param[in,out] history Parsed action result history */ static void unpack_migrate_to_success(struct action_history *history) { /* A complete migration sequence is: * 1. migrate_to on source node (which succeeded if we get to this function) * 2. migrate_from on target node * 3. stop on source node * * If no migrate_from has happened, the migration is considered to be * "partial". If the migrate_from succeeded but no stop has happened, the * migration is considered to be "dangling". * * If a successful migrate_to and stop have happened on the source node, we * still need to check for a partial migration, due to scenarios (easier to * produce with batch-limit=1) like: * * - A resource is migrating from node1 to node2, and a migrate_to is * initiated for it on node1. * * - node2 goes into standby mode while the migrate_to is pending, which * aborts the transition. * * - Upon completion of the migrate_to, a new transition schedules a stop * on both nodes and a start on node1. * * - If the new transition is aborted for any reason while the resource is * stopping on node1, the transition after that stop completes will see * the migrate_to and stop on the source, but it's still a partial * migration, and the resource must be stopped on node2 because it is * potentially active there due to the migrate_to. * * We also need to take into account that either node's history may be * cleared at any point in the migration process. */ int from_rc = PCMK_OCF_OK; int from_status = PCMK_EXEC_PENDING; pcmk_node_t *target_node = NULL; xmlNode *migrate_from = NULL; const char *source = NULL; const char *target = NULL; bool source_newer_op = false; bool target_newer_state = false; bool active_on_target = false; pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler; // Get source and target node names from XML if (get_migration_node_names(history->xml, history->node, NULL, &source, &target) != pcmk_rc_ok) { return; } // Check for newer state on the source source_newer_op = non_monitor_after(history->rsc->id, source, history->xml, scheduler); // Check for a migrate_from action from this source on the target migrate_from = find_lrm_op(history->rsc->id, PCMK_ACTION_MIGRATE_FROM, target, source, -1, scheduler); if (migrate_from != NULL) { if (source_newer_op) { /* There's a newer non-monitor operation on the source and a * migrate_from on the target, so this migrate_to is irrelevant to * the resource's state. */ return; } crm_element_value_int(migrate_from, PCMK__XA_RC_CODE, &from_rc); crm_element_value_int(migrate_from, PCMK__XA_OP_STATUS, &from_status); } /* If the resource has newer state on both the source and target after the * migration events, this migrate_to is irrelevant to the resource's state. */ target_newer_state = newer_state_after_migrate(history->rsc->id, target, history->xml, migrate_from, scheduler); if (source_newer_op && target_newer_state) { return; } /* Check for dangling migration (migrate_from succeeded but stop not done). * We know there's no stop because we already returned if the target has a * migrate_from and the source has any newer non-monitor operation. */ if ((from_rc == PCMK_OCF_OK) && (from_status == PCMK_EXEC_DONE)) { add_dangling_migration(history->rsc, history->node); return; } /* Without newer state, this migrate_to implies the resource is active. * (Clones are not allowed to migrate, so role can't be promoted.) */ history->rsc->priv->orig_role = pcmk_role_started; target_node = pcmk_find_node(scheduler, target); active_on_target = !target_newer_state && (target_node != NULL) && target_node->details->online; if (from_status != PCMK_EXEC_PENDING) { // migrate_from failed on target if (active_on_target) { native_add_running(history->rsc, target_node, scheduler, TRUE); } else { // Mark resource as failed, require recovery, and prevent migration pcmk__set_rsc_flags(history->rsc, pcmk__rsc_failed|pcmk__rsc_stop_if_failed); pcmk__clear_rsc_flags(history->rsc, pcmk__rsc_migratable); } return; } // The migrate_from is pending, complete but erased, or to be scheduled /* If there is no history at all for the resource on an online target, then * it was likely cleaned. Just return, and we'll schedule a probe. Once we * have the probe result, it will be reflected in target_newer_state. */ if ((target_node != NULL) && target_node->details->online && unknown_on_node(history->rsc, target)) { return; } if (active_on_target) { pcmk_node_t *source_node = pcmk_find_node(scheduler, source); native_add_running(history->rsc, target_node, scheduler, FALSE); if ((source_node != NULL) && source_node->details->online) { /* This is a partial migration: the migrate_to completed * successfully on the source, but the migrate_from has not * completed. Remember the source and target; if the newly * chosen target remains the same when we schedule actions * later, we may continue with the migration. */ history->rsc->priv->partial_migration_target = target_node; history->rsc->priv->partial_migration_source = source_node; } } else if (!source_newer_op) { // Mark resource as failed, require recovery, and prevent migration pcmk__set_rsc_flags(history->rsc, pcmk__rsc_failed|pcmk__rsc_stop_if_failed); pcmk__clear_rsc_flags(history->rsc, pcmk__rsc_migratable); } } /*! * \internal * \brief Update resource role etc. after a failed migrate_to action * * \param[in,out] history Parsed action result history */ static void unpack_migrate_to_failure(struct action_history *history) { xmlNode *target_migrate_from = NULL; const char *source = NULL; const char *target = NULL; pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler; // Get source and target node names from XML if (get_migration_node_names(history->xml, history->node, NULL, &source, &target) != pcmk_rc_ok) { return; } /* If a migration failed, we have to assume the resource is active. Clones * are not allowed to migrate, so role can't be promoted. */ history->rsc->priv->orig_role = pcmk_role_started; // Check for migrate_from on the target target_migrate_from = find_lrm_op(history->rsc->id, PCMK_ACTION_MIGRATE_FROM, target, source, PCMK_OCF_OK, scheduler); if (/* If the resource state is unknown on the target, it will likely be * probed there. * Don't just consider it running there. We will get back here anyway in * case the probe detects it's running there. */ !unknown_on_node(history->rsc, target) /* If the resource has newer state on the target after the migration * events, this migrate_to no longer matters for the target. */ && !newer_state_after_migrate(history->rsc->id, target, history->xml, target_migrate_from, scheduler)) { /* The resource has no newer state on the target, so assume it's still * active there. * (if it is up). */ pcmk_node_t *target_node = pcmk_find_node(scheduler, target); if (target_node && target_node->details->online) { native_add_running(history->rsc, target_node, scheduler, FALSE); } } else if (!non_monitor_after(history->rsc->id, source, history->xml, scheduler)) { /* We know the resource has newer state on the target, but this * migrate_to still matters for the source as long as there's no newer * non-monitor operation there. */ // Mark node as having dangling migration so we can force a stop later history->rsc->priv->dangling_migration_sources = g_list_prepend(history->rsc->priv->dangling_migration_sources, (gpointer) history->node); } } /*! * \internal * \brief Update resource role etc. after a failed migrate_from action * * \param[in,out] history Parsed action result history */ static void unpack_migrate_from_failure(struct action_history *history) { xmlNode *source_migrate_to = NULL; const char *source = NULL; const char *target = NULL; pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler; // Get source and target node names from XML if (get_migration_node_names(history->xml, NULL, history->node, &source, &target) != pcmk_rc_ok) { return; } /* If a migration failed, we have to assume the resource is active. Clones * are not allowed to migrate, so role can't be promoted. */ history->rsc->priv->orig_role = pcmk_role_started; // Check for a migrate_to on the source source_migrate_to = find_lrm_op(history->rsc->id, PCMK_ACTION_MIGRATE_TO, source, target, PCMK_OCF_OK, scheduler); if (/* If the resource state is unknown on the source, it will likely be * probed there. * Don't just consider it running there. We will get back here anyway in * case the probe detects it's running there. */ !unknown_on_node(history->rsc, source) /* If the resource has newer state on the source after the migration * events, this migrate_from no longer matters for the source. */ && !newer_state_after_migrate(history->rsc->id, source, source_migrate_to, history->xml, scheduler)) { /* The resource has no newer state on the source, so assume it's still * active there (if it is up). */ pcmk_node_t *source_node = pcmk_find_node(scheduler, source); if (source_node && source_node->details->online) { native_add_running(history->rsc, source_node, scheduler, TRUE); } } } /*! * \internal * \brief Add an action to cluster's list of failed actions * * \param[in,out] history Parsed action result history */ static void record_failed_op(struct action_history *history) { const pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler; if (!(history->node->details->online)) { return; } for (const xmlNode *xIter = scheduler->priv->failed->children; xIter != NULL; xIter = xIter->next) { const char *key = pcmk__xe_history_key(xIter); const char *uname = crm_element_value(xIter, PCMK_XA_UNAME); if (pcmk__str_eq(history->key, key, pcmk__str_none) && pcmk__str_eq(uname, history->node->priv->name, pcmk__str_casei)) { crm_trace("Skipping duplicate entry %s on %s", history->key, pcmk__node_name(history->node)); return; } } crm_trace("Adding entry for %s on %s to failed action list", history->key, pcmk__node_name(history->node)); crm_xml_add(history->xml, PCMK_XA_UNAME, history->node->priv->name); crm_xml_add(history->xml, PCMK__XA_RSC_ID, history->rsc->id); pcmk__xml_copy(scheduler->priv->failed, history->xml); } static char * last_change_str(const xmlNode *xml_op) { time_t when; char *result = NULL; if (crm_element_value_epoch(xml_op, PCMK_XA_LAST_RC_CHANGE, &when) == pcmk_ok) { char *when_s = pcmk__epoch2str(&when, 0); const char *p = strchr(when_s, ' '); // Skip day of week to make message shorter if ((p != NULL) && (*(++p) != '\0')) { result = pcmk__str_copy(p); } free(when_s); } if (result == NULL) { result = pcmk__str_copy("unknown_time"); } return result; } /*! * \internal * \brief Ban a resource (or its clone if an anonymous instance) from all nodes * * \param[in,out] rsc Resource to ban */ static void ban_from_all_nodes(pcmk_resource_t *rsc) { int score = -PCMK_SCORE_INFINITY; const pcmk_scheduler_t *scheduler = rsc->priv->scheduler; if (rsc->priv->parent != NULL) { pcmk_resource_t *parent = uber_parent(rsc); if (pcmk__is_anonymous_clone(parent)) { /* For anonymous clones, if an operation with * PCMK_META_ON_FAIL=PCMK_VALUE_STOP fails for any instance, the * entire clone must stop. */ rsc = parent; } } // Ban the resource from all nodes crm_notice("%s will not be started under current conditions", rsc->id); if (rsc->priv->allowed_nodes != NULL) { g_hash_table_destroy(rsc->priv->allowed_nodes); } rsc->priv->allowed_nodes = pe__node_list2table(scheduler->nodes); g_hash_table_foreach(rsc->priv->allowed_nodes, set_node_score, &score); } /*! * \internal * \brief Get configured failure handling and role after failure for an action * * \param[in,out] history Unpacked action history entry * \param[out] on_fail Where to set configured failure handling * \param[out] fail_role Where to set to role after failure */ static void unpack_failure_handling(struct action_history *history, enum pcmk__on_fail *on_fail, enum rsc_role_e *fail_role) { xmlNode *config = pcmk__find_action_config(history->rsc, history->task, history->interval_ms, true); GHashTable *meta = pcmk__unpack_action_meta(history->rsc, history->node, history->task, history->interval_ms, config); const char *on_fail_str = g_hash_table_lookup(meta, PCMK_META_ON_FAIL); *on_fail = pcmk__parse_on_fail(history->rsc, history->task, history->interval_ms, on_fail_str); *fail_role = pcmk__role_after_failure(history->rsc, history->task, *on_fail, meta); g_hash_table_destroy(meta); } /*! * \internal * \brief Update resource role, failure handling, etc., after a failed action * * \param[in,out] history Parsed action result history * \param[in] config_on_fail Action failure handling from configuration * \param[in] fail_role Resource's role after failure of this action * \param[out] last_failure This will be set to the history XML * \param[in,out] on_fail Actual handling of action result */ static void unpack_rsc_op_failure(struct action_history *history, enum pcmk__on_fail config_on_fail, enum rsc_role_e fail_role, xmlNode **last_failure, enum pcmk__on_fail *on_fail) { bool is_probe = false; char *last_change_s = NULL; pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler; *last_failure = history->xml; is_probe = pcmk_xe_is_probe(history->xml); last_change_s = last_change_str(history->xml); if (!pcmk_is_set(scheduler->flags, pcmk__sched_symmetric_cluster) && (history->exit_status == PCMK_OCF_NOT_INSTALLED)) { crm_trace("Unexpected result (%s%s%s) was recorded for " "%s of %s on %s at %s " QB_XS " exit-status=%d id=%s", services_ocf_exitcode_str(history->exit_status), (pcmk__str_empty(history->exit_reason)? "" : ": "), pcmk__s(history->exit_reason, ""), (is_probe? "probe" : history->task), history->rsc->id, pcmk__node_name(history->node), last_change_s, history->exit_status, history->id); } else { pcmk__sched_warn(scheduler, "Unexpected result (%s%s%s) was recorded for %s of " "%s on %s at %s " QB_XS " exit-status=%d id=%s", services_ocf_exitcode_str(history->exit_status), (pcmk__str_empty(history->exit_reason)? "" : ": "), pcmk__s(history->exit_reason, ""), (is_probe? "probe" : history->task), history->rsc->id, pcmk__node_name(history->node), last_change_s, history->exit_status, history->id); if (is_probe && (history->exit_status != PCMK_OCF_OK) && (history->exit_status != PCMK_OCF_NOT_RUNNING) && (history->exit_status != PCMK_OCF_RUNNING_PROMOTED)) { /* A failed (not just unexpected) probe result could mean the user * didn't know resources will be probed even where they can't run. */ crm_notice("If it is not possible for %s to run on %s, see " "the " PCMK_XA_RESOURCE_DISCOVERY " option for location " "constraints", history->rsc->id, pcmk__node_name(history->node)); } record_failed_op(history); } free(last_change_s); if (*on_fail < config_on_fail) { pcmk__rsc_trace(history->rsc, "on-fail %s -> %s for %s", pcmk__on_fail_text(*on_fail), pcmk__on_fail_text(config_on_fail), history->key); *on_fail = config_on_fail; } if (strcmp(history->task, PCMK_ACTION_STOP) == 0) { resource_location(history->rsc, history->node, -PCMK_SCORE_INFINITY, "__stop_fail__", scheduler); } else if (strcmp(history->task, PCMK_ACTION_MIGRATE_TO) == 0) { unpack_migrate_to_failure(history); } else if (strcmp(history->task, PCMK_ACTION_MIGRATE_FROM) == 0) { unpack_migrate_from_failure(history); } else if (strcmp(history->task, PCMK_ACTION_PROMOTE) == 0) { history->rsc->priv->orig_role = pcmk_role_promoted; } else if (strcmp(history->task, PCMK_ACTION_DEMOTE) == 0) { if (config_on_fail == pcmk__on_fail_block) { history->rsc->priv->orig_role = pcmk_role_promoted; pe__set_next_role(history->rsc, pcmk_role_stopped, "demote with " PCMK_META_ON_FAIL "=block"); } else if (history->exit_status == PCMK_OCF_NOT_RUNNING) { history->rsc->priv->orig_role = pcmk_role_stopped; } else { /* Staying in the promoted role would put the scheduler and * controller into a loop. Setting the role to unpromoted is not * dangerous because the resource will be stopped as part of * recovery, and any promotion will be ordered after that stop. */ history->rsc->priv->orig_role = pcmk_role_unpromoted; } } if (is_probe && (history->exit_status == PCMK_OCF_NOT_INSTALLED)) { /* leave stopped */ pcmk__rsc_trace(history->rsc, "Leaving %s stopped", history->rsc->id); history->rsc->priv->orig_role = pcmk_role_stopped; } else if (history->rsc->priv->orig_role < pcmk_role_started) { pcmk__rsc_trace(history->rsc, "Setting %s active", history->rsc->id); set_active(history->rsc); } pcmk__rsc_trace(history->rsc, "Resource %s: role=%s unclean=%s on_fail=%s fail_role=%s", history->rsc->id, pcmk_role_text(history->rsc->priv->orig_role), pcmk__btoa(history->node->details->unclean), pcmk__on_fail_text(config_on_fail), pcmk_role_text(fail_role)); if ((fail_role != pcmk_role_started) && (history->rsc->priv->next_role < fail_role)) { pe__set_next_role(history->rsc, fail_role, "failure"); } if (fail_role == pcmk_role_stopped) { ban_from_all_nodes(history->rsc); } } /*! * \internal * \brief Block a resource with a failed action if it cannot be recovered * * If resource action is a failed stop and fencing is not possible, mark the * resource as unmanaged and blocked, since recovery cannot be done. * * \param[in,out] history Parsed action history entry */ static void block_if_unrecoverable(struct action_history *history) { char *last_change_s = NULL; if (strcmp(history->task, PCMK_ACTION_STOP) != 0) { return; // All actions besides stop are always recoverable } if (pe_can_fence(history->node->priv->scheduler, history->node)) { return; // Failed stops are recoverable via fencing } last_change_s = last_change_str(history->xml); pcmk__sched_err(history->node->priv->scheduler, "No further recovery can be attempted for %s " "because %s on %s failed (%s%s%s) at %s " QB_XS " rc=%d id=%s", history->rsc->id, history->task, pcmk__node_name(history->node), services_ocf_exitcode_str(history->exit_status), (pcmk__str_empty(history->exit_reason)? "" : ": "), pcmk__s(history->exit_reason, ""), last_change_s, history->exit_status, history->id); free(last_change_s); pcmk__clear_rsc_flags(history->rsc, pcmk__rsc_managed); pcmk__set_rsc_flags(history->rsc, pcmk__rsc_blocked); } /*! * \internal * \brief Update action history's execution status and why * * \param[in,out] history Parsed action history entry * \param[out] why Where to store reason for update * \param[in] value New value * \param[in] reason Description of why value was changed */ static inline void remap_because(struct action_history *history, const char **why, int value, const char *reason) { if (history->execution_status != value) { history->execution_status = value; *why = reason; } } /*! * \internal * \brief Remap informational monitor results and operation status * * For the monitor results, certain OCF codes are for providing extended information * to the user about services that aren't yet failed but not entirely healthy either. * These must be treated as the "normal" result by Pacemaker. * * For operation status, the action result can be used to determine an appropriate * status for the purposes of responding to the action. The status provided by the * executor is not directly usable since the executor does not know what was expected. * * \param[in,out] history Parsed action history entry * \param[in,out] on_fail What should be done about the result * \param[in] expired Whether result is expired * * \note If the result is remapped and the node is not shutting down or failed, * the operation will be recorded in the scheduler data's list of failed * operations to highlight it for the user. * * \note This may update the resource's current and next role. */ static void remap_operation(struct action_history *history, enum pcmk__on_fail *on_fail, bool expired) { bool is_probe = false; int orig_exit_status = history->exit_status; int orig_exec_status = history->execution_status; const char *why = NULL; const char *task = history->task; // Remap degraded results to their successful counterparts history->exit_status = pcmk__effective_rc(history->exit_status); if (history->exit_status != orig_exit_status) { why = "degraded result"; if (!expired && (!history->node->details->shutdown || history->node->details->online)) { record_failed_op(history); } } if (!pcmk__is_bundled(history->rsc) && pcmk_xe_mask_probe_failure(history->xml) && ((history->execution_status != PCMK_EXEC_DONE) || (history->exit_status != PCMK_OCF_NOT_RUNNING))) { history->execution_status = PCMK_EXEC_DONE; history->exit_status = PCMK_OCF_NOT_RUNNING; why = "equivalent probe result"; } /* If the executor reported an execution status of anything but done or * error, consider that final. But for done or error, we know better whether * it should be treated as a failure or not, because we know the expected * result. */ switch (history->execution_status) { case PCMK_EXEC_DONE: case PCMK_EXEC_ERROR: break; // These should be treated as node-fatal case PCMK_EXEC_NO_FENCE_DEVICE: case PCMK_EXEC_NO_SECRETS: remap_because(history, &why, PCMK_EXEC_ERROR_HARD, "node-fatal error"); goto remap_done; default: goto remap_done; } is_probe = pcmk_xe_is_probe(history->xml); if (is_probe) { task = "probe"; } if (history->expected_exit_status < 0) { /* Pre-1.0 Pacemaker versions, and Pacemaker 1.1.6 or earlier with * Heartbeat 2.0.7 or earlier as the cluster layer, did not include the * expected exit status in the transition key, which (along with the * similar case of a corrupted transition key in the CIB) will be * reported to this function as -1. Pacemaker 2.0+ does not support * rolling upgrades from those versions or processing of saved CIB files * from those versions, so we do not need to care much about this case. */ remap_because(history, &why, PCMK_EXEC_ERROR, "obsolete history format"); pcmk__config_warn("Expected result not found for %s on %s " "(corrupt or obsolete CIB?)", history->key, pcmk__node_name(history->node)); } else if (history->exit_status == history->expected_exit_status) { remap_because(history, &why, PCMK_EXEC_DONE, "expected result"); } else { remap_because(history, &why, PCMK_EXEC_ERROR, "unexpected result"); pcmk__rsc_debug(history->rsc, "%s on %s: expected %d (%s), got %d (%s%s%s)", history->key, pcmk__node_name(history->node), history->expected_exit_status, services_ocf_exitcode_str(history->expected_exit_status), history->exit_status, services_ocf_exitcode_str(history->exit_status), (pcmk__str_empty(history->exit_reason)? "" : ": "), pcmk__s(history->exit_reason, "")); } switch (history->exit_status) { case PCMK_OCF_OK: if (is_probe && (history->expected_exit_status == PCMK_OCF_NOT_RUNNING)) { char *last_change_s = last_change_str(history->xml); remap_because(history, &why, PCMK_EXEC_DONE, "probe"); pcmk__rsc_info(history->rsc, "Probe found %s active on %s at %s", history->rsc->id, pcmk__node_name(history->node), last_change_s); free(last_change_s); } break; case PCMK_OCF_NOT_RUNNING: if (is_probe || (history->expected_exit_status == history->exit_status) || !pcmk_is_set(history->rsc->flags, pcmk__rsc_managed)) { /* For probes, recurring monitors for the Stopped role, and * unmanaged resources, "not running" is not considered a * failure. */ remap_because(history, &why, PCMK_EXEC_DONE, "exit status"); history->rsc->priv->orig_role = pcmk_role_stopped; *on_fail = pcmk__on_fail_ignore; pe__set_next_role(history->rsc, pcmk_role_unknown, "not running"); } break; case PCMK_OCF_RUNNING_PROMOTED: if (is_probe && (history->exit_status != history->expected_exit_status)) { char *last_change_s = last_change_str(history->xml); remap_because(history, &why, PCMK_EXEC_DONE, "probe"); pcmk__rsc_info(history->rsc, "Probe found %s active and promoted on %s at %s", history->rsc->id, pcmk__node_name(history->node), last_change_s); free(last_change_s); } if (!expired || (history->exit_status == history->expected_exit_status)) { history->rsc->priv->orig_role = pcmk_role_promoted; } break; case PCMK_OCF_FAILED_PROMOTED: if (!expired) { history->rsc->priv->orig_role = pcmk_role_promoted; } remap_because(history, &why, PCMK_EXEC_ERROR, "exit status"); break; case PCMK_OCF_NOT_CONFIGURED: remap_because(history, &why, PCMK_EXEC_ERROR_FATAL, "exit status"); break; case PCMK_OCF_UNIMPLEMENT_FEATURE: { guint interval_ms = 0; crm_element_value_ms(history->xml, PCMK_META_INTERVAL, &interval_ms); if (interval_ms == 0) { if (!expired) { block_if_unrecoverable(history); } remap_because(history, &why, PCMK_EXEC_ERROR_HARD, "exit status"); } else { remap_because(history, &why, PCMK_EXEC_NOT_SUPPORTED, "exit status"); } } break; case PCMK_OCF_NOT_INSTALLED: case PCMK_OCF_INVALID_PARAM: case PCMK_OCF_INSUFFICIENT_PRIV: if (!expired) { block_if_unrecoverable(history); } remap_because(history, &why, PCMK_EXEC_ERROR_HARD, "exit status"); break; default: if (history->execution_status == PCMK_EXEC_DONE) { char *last_change_s = last_change_str(history->xml); crm_info("Treating unknown exit status %d from %s of %s " "on %s at %s as failure", history->exit_status, task, history->rsc->id, pcmk__node_name(history->node), last_change_s); remap_because(history, &why, PCMK_EXEC_ERROR, "unknown exit status"); free(last_change_s); } break; } remap_done: if (why != NULL) { pcmk__rsc_trace(history->rsc, "Remapped %s result from [%s: %s] to [%s: %s] " "because of %s", history->key, pcmk_exec_status_str(orig_exec_status), crm_exit_str(orig_exit_status), pcmk_exec_status_str(history->execution_status), crm_exit_str(history->exit_status), why); } } // return TRUE if start or monitor last failure but parameters changed static bool should_clear_for_param_change(const xmlNode *xml_op, const char *task, pcmk_resource_t *rsc, pcmk_node_t *node) { if (pcmk__str_any_of(task, PCMK_ACTION_START, PCMK_ACTION_MONITOR, NULL)) { if (pe__bundle_needs_remote_name(rsc)) { /* We haven't allocated resources yet, so we can't reliably * substitute addr parameters for the REMOTE_CONTAINER_HACK. * When that's needed, defer the check until later. */ pe__add_param_check(xml_op, rsc, node, pcmk__check_last_failure, rsc->priv->scheduler); } else { pcmk__op_digest_t *digest_data = NULL; digest_data = rsc_action_digest_cmp(rsc, xml_op, node, rsc->priv->scheduler); switch (digest_data->rc) { case pcmk__digest_unknown: crm_trace("Resource %s history entry %s on %s" " has no digest to compare", rsc->id, pcmk__xe_history_key(xml_op), node->priv->id); break; case pcmk__digest_match: break; default: return TRUE; } } } return FALSE; } // Order action after fencing of remote node, given connection rsc static void order_after_remote_fencing(pcmk_action_t *action, pcmk_resource_t *remote_conn, pcmk_scheduler_t *scheduler) { pcmk_node_t *remote_node = pcmk_find_node(scheduler, remote_conn->id); if (remote_node) { pcmk_action_t *fence = pe_fence_op(remote_node, NULL, TRUE, NULL, FALSE, scheduler); order_actions(fence, action, pcmk__ar_first_implies_then); } } static bool should_ignore_failure_timeout(const pcmk_resource_t *rsc, const char *task, guint interval_ms, bool is_last_failure) { /* Clearing failures of recurring monitors has special concerns. The * executor reports only changes in the monitor result, so if the * monitor is still active and still getting the same failure result, * that will go undetected after the failure is cleared. * * Also, the operation history will have the time when the recurring * monitor result changed to the given code, not the time when the * result last happened. * * @TODO We probably should clear such failures only when the failure * timeout has passed since the last occurrence of the failed result. * However we don't record that information. We could maybe approximate * that by clearing only if there is a more recent successful monitor or * stop result, but we don't even have that information at this point * since we are still unpacking the resource's operation history. * * This is especially important for remote connection resources with a * reconnect interval, so in that case, we skip clearing failures * if the remote node hasn't been fenced. */ if ((rsc->priv->remote_reconnect_ms > 0U) && pcmk_is_set(rsc->priv->scheduler->flags, pcmk__sched_fencing_enabled) && (interval_ms != 0) && pcmk__str_eq(task, PCMK_ACTION_MONITOR, pcmk__str_casei)) { pcmk_node_t *remote_node = pcmk_find_node(rsc->priv->scheduler, rsc->id); if (remote_node && !pcmk_is_set(remote_node->priv->flags, pcmk__node_remote_fenced)) { if (is_last_failure) { crm_info("Waiting to clear monitor failure for remote node %s" " until fencing has occurred", rsc->id); } return TRUE; } } return FALSE; } /*! * \internal * \brief Check operation age and schedule failure clearing when appropriate * * This function has two distinct purposes. The first is to check whether an * operation history entry is expired (i.e. the resource has a failure timeout, * the entry is older than the timeout, and the resource either has no fail * count or its fail count is entirely older than the timeout). The second is to * schedule fail count clearing when appropriate (i.e. the operation is expired * and either the resource has an expired fail count or the operation is a * last_failure for a remote connection resource with a reconnect interval, * or the operation is a last_failure for a start or monitor operation and the * resource's parameters have changed since the operation). * * \param[in,out] history Parsed action result history * * \return true if operation history entry is expired, otherwise false */ static bool check_operation_expiry(struct action_history *history) { bool expired = false; bool is_last_failure = pcmk__ends_with(history->id, "_last_failure_0"); time_t last_run = 0; int unexpired_fail_count = 0; const char *clear_reason = NULL; const guint expiration_sec = history->rsc->priv->failure_expiration_ms / 1000; pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler; if (history->execution_status == PCMK_EXEC_NOT_INSTALLED) { pcmk__rsc_trace(history->rsc, "Resource history entry %s on %s is not expired: " "Not Installed does not expire", history->id, pcmk__node_name(history->node)); return false; // "Not installed" must always be cleared manually } if ((expiration_sec > 0) && (crm_element_value_epoch(history->xml, PCMK_XA_LAST_RC_CHANGE, &last_run) == 0)) { /* Resource has a PCMK_META_FAILURE_TIMEOUT and history entry has a * timestamp */ time_t now = get_effective_time(scheduler); time_t last_failure = 0; // Is this particular operation history older than the failure timeout? if ((now >= (last_run + expiration_sec)) && !should_ignore_failure_timeout(history->rsc, history->task, history->interval_ms, is_last_failure)) { expired = true; } // Does the resource as a whole have an unexpired fail count? unexpired_fail_count = pe_get_failcount(history->node, history->rsc, &last_failure, pcmk__fc_effective, history->xml); // Update scheduler recheck time according to *last* failure crm_trace("%s@%lld is %sexpired @%lld with unexpired_failures=%d " "expiration=%s last-failure@%lld", history->id, (long long) last_run, (expired? "" : "not "), (long long) now, unexpired_fail_count, pcmk__readable_interval(expiration_sec * 1000), (long long) last_failure); last_failure += expiration_sec + 1; if (unexpired_fail_count && (now < last_failure)) { pe__update_recheck_time(last_failure, scheduler, "fail count expiration"); } } if (expired) { if (pe_get_failcount(history->node, history->rsc, NULL, pcmk__fc_default, history->xml)) { // There is a fail count ignoring timeout if (unexpired_fail_count == 0) { // There is no fail count considering timeout clear_reason = "it expired"; } else { /* This operation is old, but there is an unexpired fail count. * In a properly functioning cluster, this should only be * possible if this operation is not a failure (otherwise the * fail count should be expired too), so this is really just a * failsafe. */ pcmk__rsc_trace(history->rsc, "Resource history entry %s on %s is not " "expired: Unexpired fail count", history->id, pcmk__node_name(history->node)); expired = false; } } else if (is_last_failure && (history->rsc->priv->remote_reconnect_ms > 0U)) { /* Clear any expired last failure when reconnect interval is set, * even if there is no fail count. */ clear_reason = "reconnect interval is set"; } } if (!expired && is_last_failure && should_clear_for_param_change(history->xml, history->task, history->rsc, history->node)) { clear_reason = "resource parameters have changed"; } if (clear_reason != NULL) { pcmk_action_t *clear_op = NULL; // Schedule clearing of the fail count clear_op = pe__clear_failcount(history->rsc, history->node, clear_reason, scheduler); if (pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled) && (history->rsc->priv->remote_reconnect_ms > 0)) { /* If we're clearing a remote connection due to a reconnect * interval, we want to wait until any scheduled fencing * completes. * * We could limit this to remote_node->details->unclean, but at * this point, that's always true (it won't be reliable until * after unpack_node_history() is done). */ crm_info("Clearing %s failure will wait until any scheduled " "fencing of %s completes", history->task, history->rsc->id); order_after_remote_fencing(clear_op, history->rsc, scheduler); } } if (expired && (history->interval_ms == 0) && pcmk__str_eq(history->task, PCMK_ACTION_MONITOR, pcmk__str_none)) { switch (history->exit_status) { case PCMK_OCF_OK: case PCMK_OCF_NOT_RUNNING: case PCMK_OCF_RUNNING_PROMOTED: case PCMK_OCF_DEGRADED: case PCMK_OCF_DEGRADED_PROMOTED: // Don't expire probes that return these values pcmk__rsc_trace(history->rsc, "Resource history entry %s on %s is not " "expired: Probe result", history->id, pcmk__node_name(history->node)); expired = false; break; } } return expired; } int pe__target_rc_from_xml(const xmlNode *xml_op) { int target_rc = 0; const char *key = crm_element_value(xml_op, PCMK__XA_TRANSITION_KEY); if (key == NULL) { return -1; } decode_transition_key(key, NULL, NULL, NULL, &target_rc); return target_rc; } /*! * \internal * \brief Update a resource's state for an action result * * \param[in,out] history Parsed action history entry * \param[in] exit_status Exit status to base new state on * \param[in] last_failure Resource's last_failure entry, if known * \param[in,out] on_fail Resource's current failure handling */ static void update_resource_state(struct action_history *history, int exit_status, const xmlNode *last_failure, enum pcmk__on_fail *on_fail) { bool clear_past_failure = false; if ((exit_status == PCMK_OCF_NOT_INSTALLED) || (!pcmk__is_bundled(history->rsc) && pcmk_xe_mask_probe_failure(history->xml))) { history->rsc->priv->orig_role = pcmk_role_stopped; } else if (exit_status == PCMK_OCF_NOT_RUNNING) { clear_past_failure = true; } else if (pcmk__str_eq(history->task, PCMK_ACTION_MONITOR, pcmk__str_none)) { if ((last_failure != NULL) && pcmk__str_eq(history->key, pcmk__xe_history_key(last_failure), pcmk__str_none)) { clear_past_failure = true; } if (history->rsc->priv->orig_role < pcmk_role_started) { set_active(history->rsc); } } else if (pcmk__str_eq(history->task, PCMK_ACTION_START, pcmk__str_none)) { history->rsc->priv->orig_role = pcmk_role_started; clear_past_failure = true; } else if (pcmk__str_eq(history->task, PCMK_ACTION_STOP, pcmk__str_none)) { history->rsc->priv->orig_role = pcmk_role_stopped; clear_past_failure = true; } else if (pcmk__str_eq(history->task, PCMK_ACTION_PROMOTE, pcmk__str_none)) { history->rsc->priv->orig_role = pcmk_role_promoted; clear_past_failure = true; } else if (pcmk__str_eq(history->task, PCMK_ACTION_DEMOTE, pcmk__str_none)) { if (*on_fail == pcmk__on_fail_demote) { /* Demote clears an error only if * PCMK_META_ON_FAIL=PCMK_VALUE_DEMOTE */ clear_past_failure = true; } history->rsc->priv->orig_role = pcmk_role_unpromoted; } else if (pcmk__str_eq(history->task, PCMK_ACTION_MIGRATE_FROM, pcmk__str_none)) { history->rsc->priv->orig_role = pcmk_role_started; clear_past_failure = true; } else if (pcmk__str_eq(history->task, PCMK_ACTION_MIGRATE_TO, pcmk__str_none)) { unpack_migrate_to_success(history); } else if (history->rsc->priv->orig_role < pcmk_role_started) { pcmk__rsc_trace(history->rsc, "%s active on %s", history->rsc->id, pcmk__node_name(history->node)); set_active(history->rsc); } if (!clear_past_failure) { return; } switch (*on_fail) { case pcmk__on_fail_stop: case pcmk__on_fail_ban: case pcmk__on_fail_standby_node: case pcmk__on_fail_fence_node: pcmk__rsc_trace(history->rsc, "%s (%s) is not cleared by a completed %s", history->rsc->id, pcmk__on_fail_text(*on_fail), history->task); break; case pcmk__on_fail_block: case pcmk__on_fail_ignore: case pcmk__on_fail_demote: case pcmk__on_fail_restart: case pcmk__on_fail_restart_container: *on_fail = pcmk__on_fail_ignore; pe__set_next_role(history->rsc, pcmk_role_unknown, "clear past failures"); break; case pcmk__on_fail_reset_remote: if (history->rsc->priv->remote_reconnect_ms == 0U) { /* With no reconnect interval, the connection is allowed to * start again after the remote node is fenced and * completely stopped. (With a reconnect interval, we wait * for the failure to be cleared entirely before attempting * to reconnect.) */ *on_fail = pcmk__on_fail_ignore; pe__set_next_role(history->rsc, pcmk_role_unknown, "clear past failures and reset remote"); } break; } } /*! * \internal * \brief Check whether a given history entry matters for resource state * * \param[in] history Parsed action history entry * * \return true if action can affect resource state, otherwise false */ static inline bool can_affect_state(struct action_history *history) { #if 0 /* @COMPAT It might be better to parse only actions we know we're interested * in, rather than exclude a couple we don't. However that would be a * behavioral change that should be done at a major or minor series release. * Currently, unknown operations can affect whether a resource is considered * active and/or failed. */ return pcmk__str_any_of(history->task, PCMK_ACTION_MONITOR, PCMK_ACTION_START, PCMK_ACTION_STOP, PCMK_ACTION_PROMOTE, PCMK_ACTION_DEMOTE, PCMK_ACTION_MIGRATE_TO, PCMK_ACTION_MIGRATE_FROM, "asyncmon", NULL); #else return !pcmk__str_any_of(history->task, PCMK_ACTION_NOTIFY, PCMK_ACTION_META_DATA, NULL); #endif } /*! * \internal * \brief Unpack execution/exit status and exit reason from a history entry * * \param[in,out] history Action history entry to unpack * * \return Standard Pacemaker return code */ static int unpack_action_result(struct action_history *history) { if ((crm_element_value_int(history->xml, PCMK__XA_OP_STATUS, &(history->execution_status)) < 0) || (history->execution_status < PCMK_EXEC_PENDING) || (history->execution_status > PCMK_EXEC_MAX) || (history->execution_status == PCMK_EXEC_CANCELLED)) { pcmk__config_err("Ignoring resource history entry %s for %s on %s " "with invalid " PCMK__XA_OP_STATUS " '%s'", history->id, history->rsc->id, pcmk__node_name(history->node), pcmk__s(crm_element_value(history->xml, PCMK__XA_OP_STATUS), "")); return pcmk_rc_unpack_error; } if ((crm_element_value_int(history->xml, PCMK__XA_RC_CODE, &(history->exit_status)) < 0) || (history->exit_status < 0) || (history->exit_status > CRM_EX_MAX)) { #if 0 /* @COMPAT We should ignore malformed entries, but since that would * change behavior, it should be done at a major or minor series * release. */ pcmk__config_err("Ignoring resource history entry %s for %s on %s " "with invalid " PCMK__XA_RC_CODE " '%s'", history->id, history->rsc->id, pcmk__node_name(history->node), pcmk__s(crm_element_value(history->xml, PCMK__XA_RC_CODE), "")); return pcmk_rc_unpack_error; #else history->exit_status = CRM_EX_ERROR; #endif } history->exit_reason = crm_element_value(history->xml, PCMK_XA_EXIT_REASON); return pcmk_rc_ok; } /*! * \internal * \brief Process an action history entry whose result expired * * \param[in,out] history Parsed action history entry * \param[in] orig_exit_status Action exit status before remapping * * \return Standard Pacemaker return code (in particular, pcmk_rc_ok means the * entry needs no further processing) */ static int process_expired_result(struct action_history *history, int orig_exit_status) { if (!pcmk__is_bundled(history->rsc) && pcmk_xe_mask_probe_failure(history->xml) && (orig_exit_status != history->expected_exit_status)) { if (history->rsc->priv->orig_role <= pcmk_role_stopped) { history->rsc->priv->orig_role = pcmk_role_unknown; } crm_trace("Ignoring resource history entry %s for probe of %s on %s: " "Masked failure expired", history->id, history->rsc->id, pcmk__node_name(history->node)); return pcmk_rc_ok; } if (history->exit_status == history->expected_exit_status) { return pcmk_rc_undetermined; // Only failures expire } if (history->interval_ms == 0) { crm_notice("Ignoring resource history entry %s for %s of %s on %s: " "Expired failure", history->id, history->task, history->rsc->id, pcmk__node_name(history->node)); return pcmk_rc_ok; } if (history->node->details->online && !history->node->details->unclean) { /* Reschedule the recurring action. schedule_cancel() won't work at * this stage, so as a hacky workaround, forcibly change the restart * digest so pcmk__check_action_config() does what we want later. * * @TODO We should skip this if there is a newer successful monitor. * Also, this causes rescheduling only if the history entry * has a PCMK__XA_OP_DIGEST (which the expire-non-blocked-failure * scheduler regression test doesn't, but that may not be a * realistic scenario in production). */ crm_notice("Rescheduling %s-interval %s of %s on %s " "after failure expired", pcmk__readable_interval(history->interval_ms), history->task, history->rsc->id, pcmk__node_name(history->node)); crm_xml_add(history->xml, PCMK__XA_OP_RESTART_DIGEST, "calculated-failure-timeout"); return pcmk_rc_ok; } return pcmk_rc_undetermined; } /*! * \internal * \brief Process a masked probe failure * * \param[in,out] history Parsed action history entry * \param[in] orig_exit_status Action exit status before remapping * \param[in] last_failure Resource's last_failure entry, if known * \param[in,out] on_fail Resource's current failure handling */ static void mask_probe_failure(struct action_history *history, int orig_exit_status, const xmlNode *last_failure, enum pcmk__on_fail *on_fail) { pcmk_resource_t *ban_rsc = history->rsc; if (!pcmk_is_set(history->rsc->flags, pcmk__rsc_unique)) { ban_rsc = uber_parent(history->rsc); } crm_notice("Treating probe result '%s' for %s on %s as 'not running'", services_ocf_exitcode_str(orig_exit_status), history->rsc->id, pcmk__node_name(history->node)); update_resource_state(history, history->expected_exit_status, last_failure, on_fail); crm_xml_add(history->xml, PCMK_XA_UNAME, history->node->priv->name); record_failed_op(history); resource_location(ban_rsc, history->node, -PCMK_SCORE_INFINITY, "masked-probe-failure", ban_rsc->priv->scheduler); } /*! * \internal Check whether a given failure is for a given pending action * * \param[in] history Parsed history entry for pending action * \param[in] last_failure Resource's last_failure entry, if known * * \return true if \p last_failure is failure of pending action in \p history, * otherwise false * \note Both \p history and \p last_failure must come from the same * \c PCMK__XE_LRM_RESOURCE block, as node and resource are assumed to be * the same. */ static bool failure_is_newer(const struct action_history *history, const xmlNode *last_failure) { guint failure_interval_ms = 0U; long long failure_change = 0LL; long long this_change = 0LL; if (last_failure == NULL) { return false; // Resource has no last_failure entry } if (!pcmk__str_eq(history->task, crm_element_value(last_failure, PCMK_XA_OPERATION), pcmk__str_none)) { return false; // last_failure is for different action } if ((crm_element_value_ms(last_failure, PCMK_META_INTERVAL, &failure_interval_ms) != pcmk_ok) || (history->interval_ms != failure_interval_ms)) { return false; // last_failure is for action with different interval } if ((pcmk__scan_ll(crm_element_value(history->xml, PCMK_XA_LAST_RC_CHANGE), &this_change, 0LL) != pcmk_rc_ok) || (pcmk__scan_ll(crm_element_value(last_failure, PCMK_XA_LAST_RC_CHANGE), &failure_change, 0LL) != pcmk_rc_ok) || (failure_change < this_change)) { return false; // Failure is not known to be newer } return true; } /*! * \internal * \brief Update a resource's role etc. for a pending action * * \param[in,out] history Parsed history entry for pending action * \param[in] last_failure Resource's last_failure entry, if known */ static void process_pending_action(struct action_history *history, const xmlNode *last_failure) { /* For recurring monitors, a failure is recorded only in RSC_last_failure_0, * and there might be a RSC_monitor_INTERVAL entry with the last successful * or pending result. * * If last_failure contains the failure of the pending recurring monitor * we're processing here, and is newer, the action is no longer pending. * (Pending results have call ID -1, which sorts last, so the last failure * if any should be known.) */ if (failure_is_newer(history, last_failure)) { return; } if (strcmp(history->task, PCMK_ACTION_START) == 0) { pcmk__set_rsc_flags(history->rsc, pcmk__rsc_start_pending); set_active(history->rsc); } else if (strcmp(history->task, PCMK_ACTION_PROMOTE) == 0) { history->rsc->priv->orig_role = pcmk_role_promoted; } else if ((strcmp(history->task, PCMK_ACTION_MIGRATE_TO) == 0) && history->node->details->unclean) { /* A migrate_to action is pending on a unclean source, so force a stop * on the target. */ const char *migrate_target = NULL; pcmk_node_t *target = NULL; migrate_target = crm_element_value(history->xml, PCMK__META_MIGRATE_TARGET); target = pcmk_find_node(history->rsc->priv->scheduler, migrate_target); if (target != NULL) { stop_action(history->rsc, target, FALSE); } } if (history->rsc->priv->pending_action != NULL) { /* There should never be multiple pending actions, but as a failsafe, * just remember the first one processed for display purposes. */ return; } if (pcmk_is_probe(history->task, history->interval_ms)) { /* Pending probes are currently never displayed, even if pending * operations are requested. If we ever want to change that, * enable the below and the corresponding part of * native.c:native_pending_action(). */ #if 0 history->rsc->private->pending_action = strdup("probe"); history->rsc->private->pending_node = history->node; #endif } else { history->rsc->priv->pending_action = strdup(history->task); history->rsc->priv->pending_node = history->node; } } static void unpack_rsc_op(pcmk_resource_t *rsc, pcmk_node_t *node, xmlNode *xml_op, xmlNode **last_failure, enum pcmk__on_fail *on_fail) { int old_rc = 0; bool expired = false; pcmk_resource_t *parent = rsc; enum rsc_role_e fail_role = pcmk_role_unknown; enum pcmk__on_fail failure_strategy = pcmk__on_fail_restart; struct action_history history = { .rsc = rsc, .node = node, .xml = xml_op, .execution_status = PCMK_EXEC_UNKNOWN, }; CRM_CHECK(rsc && node && xml_op, return); history.id = pcmk__xe_id(xml_op); if (history.id == NULL) { pcmk__config_err("Ignoring resource history entry for %s on %s " "without ID", rsc->id, pcmk__node_name(node)); return; } // Task and interval history.task = crm_element_value(xml_op, PCMK_XA_OPERATION); if (history.task == NULL) { pcmk__config_err("Ignoring resource history entry %s for %s on %s " "without " PCMK_XA_OPERATION, history.id, rsc->id, pcmk__node_name(node)); return; } crm_element_value_ms(xml_op, PCMK_META_INTERVAL, &(history.interval_ms)); if (!can_affect_state(&history)) { pcmk__rsc_trace(rsc, "Ignoring resource history entry %s for %s on %s " "with irrelevant action '%s'", history.id, rsc->id, pcmk__node_name(node), history.task); return; } if (unpack_action_result(&history) != pcmk_rc_ok) { return; // Error already logged } history.expected_exit_status = pe__target_rc_from_xml(xml_op); history.key = pcmk__xe_history_key(xml_op); crm_element_value_int(xml_op, PCMK__XA_CALL_ID, &(history.call_id)); pcmk__rsc_trace(rsc, "Unpacking %s (%s call %d on %s): %s (%s)", history.id, history.task, history.call_id, pcmk__node_name(node), pcmk_exec_status_str(history.execution_status), crm_exit_str(history.exit_status)); if (node->details->unclean) { pcmk__rsc_trace(rsc, "%s is running on %s, which is unclean (further action " "depends on value of stop's on-fail attribute)", rsc->id, pcmk__node_name(node)); } expired = check_operation_expiry(&history); old_rc = history.exit_status; remap_operation(&history, on_fail, expired); if (expired && (process_expired_result(&history, old_rc) == pcmk_rc_ok)) { goto done; } if (!pcmk__is_bundled(rsc) && pcmk_xe_mask_probe_failure(xml_op)) { mask_probe_failure(&history, old_rc, *last_failure, on_fail); goto done; } if (!pcmk_is_set(rsc->flags, pcmk__rsc_unique)) { parent = uber_parent(rsc); } switch (history.execution_status) { case PCMK_EXEC_PENDING: process_pending_action(&history, *last_failure); goto done; case PCMK_EXEC_DONE: update_resource_state(&history, history.exit_status, *last_failure, on_fail); goto done; case PCMK_EXEC_NOT_INSTALLED: unpack_failure_handling(&history, &failure_strategy, &fail_role); if (failure_strategy == pcmk__on_fail_ignore) { crm_warn("Cannot ignore failed %s of %s on %s: " "Resource agent doesn't exist " QB_XS " status=%d rc=%d id=%s", history.task, rsc->id, pcmk__node_name(node), history.execution_status, history.exit_status, history.id); /* Also for printing it as "FAILED" by marking it as * pcmk__rsc_failed later */ *on_fail = pcmk__on_fail_ban; } resource_location(parent, node, -PCMK_SCORE_INFINITY, "hard-error", rsc->priv->scheduler); unpack_rsc_op_failure(&history, failure_strategy, fail_role, last_failure, on_fail); goto done; case PCMK_EXEC_NOT_CONNECTED: if (pcmk__is_pacemaker_remote_node(node) && pcmk_is_set(node->priv->remote->flags, pcmk__rsc_managed)) { /* We should never get into a situation where a managed remote * connection resource is considered OK but a resource action * behind the connection gets a "not connected" status. But as a * fail-safe in case a bug or unusual circumstances do lead to * that, ensure the remote connection is considered failed. */ pcmk__set_rsc_flags(node->priv->remote, pcmk__rsc_failed|pcmk__rsc_stop_if_failed); } break; // Not done, do error handling case PCMK_EXEC_ERROR: case PCMK_EXEC_ERROR_HARD: case PCMK_EXEC_ERROR_FATAL: case PCMK_EXEC_TIMEOUT: case PCMK_EXEC_NOT_SUPPORTED: case PCMK_EXEC_INVALID: break; // Not done, do error handling default: // No other value should be possible at this point break; } unpack_failure_handling(&history, &failure_strategy, &fail_role); if ((failure_strategy == pcmk__on_fail_ignore) || ((failure_strategy == pcmk__on_fail_restart_container) && (strcmp(history.task, PCMK_ACTION_STOP) == 0))) { char *last_change_s = last_change_str(xml_op); crm_warn("Pretending failed %s (%s%s%s) of %s on %s at %s succeeded " QB_XS " %s", history.task, services_ocf_exitcode_str(history.exit_status), (pcmk__str_empty(history.exit_reason)? "" : ": "), pcmk__s(history.exit_reason, ""), rsc->id, pcmk__node_name(node), last_change_s, history.id); free(last_change_s); update_resource_state(&history, history.expected_exit_status, *last_failure, on_fail); crm_xml_add(xml_op, PCMK_XA_UNAME, node->priv->name); pcmk__set_rsc_flags(rsc, pcmk__rsc_ignore_failure); record_failed_op(&history); if ((failure_strategy == pcmk__on_fail_restart_container) && (*on_fail <= pcmk__on_fail_restart)) { *on_fail = failure_strategy; } } else { unpack_rsc_op_failure(&history, failure_strategy, fail_role, last_failure, on_fail); if (history.execution_status == PCMK_EXEC_ERROR_HARD) { uint8_t log_level = LOG_ERR; if (history.exit_status == PCMK_OCF_NOT_INSTALLED) { log_level = LOG_NOTICE; } do_crm_log(log_level, "Preventing %s from restarting on %s because " "of hard failure (%s%s%s) " QB_XS " %s", parent->id, pcmk__node_name(node), services_ocf_exitcode_str(history.exit_status), (pcmk__str_empty(history.exit_reason)? "" : ": "), pcmk__s(history.exit_reason, ""), history.id); resource_location(parent, node, -PCMK_SCORE_INFINITY, "hard-error", rsc->priv->scheduler); } else if (history.execution_status == PCMK_EXEC_ERROR_FATAL) { pcmk__sched_err(rsc->priv->scheduler, "Preventing %s from restarting anywhere because " "of fatal failure (%s%s%s) " QB_XS " %s", parent->id, services_ocf_exitcode_str(history.exit_status), (pcmk__str_empty(history.exit_reason)? "" : ": "), pcmk__s(history.exit_reason, ""), history.id); resource_location(parent, NULL, -PCMK_SCORE_INFINITY, "fatal-error", rsc->priv->scheduler); } } done: pcmk__rsc_trace(rsc, "%s role on %s after %s is %s (next %s)", rsc->id, pcmk__node_name(node), history.id, pcmk_role_text(rsc->priv->orig_role), pcmk_role_text(rsc->priv->next_role)); } /*! * \internal * \brief Insert a node attribute with value into a \c GHashTable * * \param[in,out] key Key to insert (either freed or owned by * \p user_data upon return) * \param[in] value Value to insert (owned by \p user_data upon return) * \param[in] user_data \c GHashTable to insert into */ static gboolean insert_attr(gpointer key, gpointer value, gpointer user_data) { GHashTable *table = user_data; g_hash_table_insert(table, key, value); return TRUE; } static void add_node_attrs(const xmlNode *xml_obj, pcmk_node_t *node, bool overwrite, pcmk_scheduler_t *scheduler) { const char *cluster_name = NULL; const char *dc_id = crm_element_value(scheduler->input, PCMK_XA_DC_UUID); pe_rule_eval_data_t rule_data = { .node_hash = NULL, .now = scheduler->priv->now, .match_data = NULL, .rsc_data = NULL, .op_data = NULL }; pcmk__insert_dup(node->priv->attrs, CRM_ATTR_UNAME, node->priv->name); pcmk__insert_dup(node->priv->attrs, CRM_ATTR_ID, node->priv->id); if ((scheduler->dc_node == NULL) && pcmk__str_eq(node->priv->id, dc_id, pcmk__str_casei)) { scheduler->dc_node = node; pcmk__insert_dup(node->priv->attrs, CRM_ATTR_IS_DC, PCMK_VALUE_TRUE); } else if (!pcmk__same_node(node, scheduler->dc_node)) { pcmk__insert_dup(node->priv->attrs, CRM_ATTR_IS_DC, PCMK_VALUE_FALSE); } cluster_name = g_hash_table_lookup(scheduler->priv->options, PCMK_OPT_CLUSTER_NAME); if (cluster_name) { pcmk__insert_dup(node->priv->attrs, CRM_ATTR_CLUSTER_NAME, cluster_name); } if (overwrite) { /* @TODO Try to reorder some unpacking so that we don't need the * overwrite argument or to unpack into a temporary table */ GHashTable *unpacked = pcmk__strkey_table(free, free); pe__unpack_dataset_nvpairs(xml_obj, PCMK_XE_INSTANCE_ATTRIBUTES, &rule_data, unpacked, NULL, scheduler); g_hash_table_foreach_steal(unpacked, insert_attr, node->priv->attrs); g_hash_table_destroy(unpacked); } else { pe__unpack_dataset_nvpairs(xml_obj, PCMK_XE_INSTANCE_ATTRIBUTES, &rule_data, node->priv->attrs, NULL, scheduler); } pe__unpack_dataset_nvpairs(xml_obj, PCMK_XE_UTILIZATION, &rule_data, node->priv->utilization, NULL, scheduler); if (pcmk__node_attr(node, CRM_ATTR_SITE_NAME, NULL, pcmk__rsc_node_current) == NULL) { const char *site_name = pcmk__node_attr(node, "site-name", NULL, pcmk__rsc_node_current); if (site_name) { pcmk__insert_dup(node->priv->attrs, CRM_ATTR_SITE_NAME, site_name); } else if (cluster_name) { /* Default to cluster-name if unset */ pcmk__insert_dup(node->priv->attrs, CRM_ATTR_SITE_NAME, cluster_name); } } } static GList * extract_operations(const char *node, const char *rsc, xmlNode * rsc_entry, gboolean active_filter) { int counter = -1; int stop_index = -1; int start_index = -1; xmlNode *rsc_op = NULL; GList *gIter = NULL; GList *op_list = NULL; GList *sorted_op_list = NULL; /* extract operations */ op_list = NULL; sorted_op_list = NULL; for (rsc_op = pcmk__xe_first_child(rsc_entry, NULL, NULL, NULL); rsc_op != NULL; rsc_op = pcmk__xe_next(rsc_op)) { if (pcmk__xe_is(rsc_op, PCMK__XE_LRM_RSC_OP)) { crm_xml_add(rsc_op, PCMK_XA_RESOURCE, rsc); crm_xml_add(rsc_op, PCMK_XA_UNAME, node); op_list = g_list_prepend(op_list, rsc_op); } } if (op_list == NULL) { /* if there are no operations, there is nothing to do */ return NULL; } sorted_op_list = g_list_sort(op_list, sort_op_by_callid); /* create active recurring operations as optional */ if (active_filter == FALSE) { return sorted_op_list; } op_list = NULL; calculate_active_ops(sorted_op_list, &start_index, &stop_index); for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; counter++; if (start_index < stop_index) { crm_trace("Skipping %s: not active", pcmk__xe_id(rsc_entry)); break; } else if (counter < start_index) { crm_trace("Skipping %s: old", pcmk__xe_id(rsc_op)); continue; } op_list = g_list_append(op_list, rsc_op); } g_list_free(sorted_op_list); return op_list; } GList * find_operations(const char *rsc, const char *node, gboolean active_filter, pcmk_scheduler_t *scheduler) { GList *output = NULL; GList *intermediate = NULL; xmlNode *tmp = NULL; xmlNode *status = pcmk__xe_first_child(scheduler->input, PCMK_XE_STATUS, NULL, NULL); pcmk_node_t *this_node = NULL; xmlNode *node_state = NULL; CRM_CHECK(status != NULL, return NULL); for (node_state = pcmk__xe_first_child(status, NULL, NULL, NULL); node_state != NULL; node_state = pcmk__xe_next(node_state)) { if (pcmk__xe_is(node_state, PCMK__XE_NODE_STATE)) { const char *uname = crm_element_value(node_state, PCMK_XA_UNAME); if (node != NULL && !pcmk__str_eq(uname, node, pcmk__str_casei)) { continue; } this_node = pcmk_find_node(scheduler, uname); if(this_node == NULL) { CRM_LOG_ASSERT(this_node != NULL); continue; } else if (pcmk__is_pacemaker_remote_node(this_node)) { determine_remote_online_status(scheduler, this_node); } else { determine_online_status(node_state, this_node, scheduler); } if (this_node->details->online || pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)) { /* offline nodes run no resources... * unless stonith is enabled in which case we need to * make sure rsc start events happen after the stonith */ xmlNode *lrm_rsc = NULL; tmp = pcmk__xe_first_child(node_state, PCMK__XE_LRM, NULL, NULL); tmp = pcmk__xe_first_child(tmp, PCMK__XE_LRM_RESOURCES, NULL, NULL); for (lrm_rsc = pcmk__xe_first_child(tmp, NULL, NULL, NULL); lrm_rsc != NULL; lrm_rsc = pcmk__xe_next(lrm_rsc)) { if (pcmk__xe_is(lrm_rsc, PCMK__XE_LRM_RESOURCE)) { const char *rsc_id = crm_element_value(lrm_rsc, PCMK_XA_ID); if (rsc != NULL && !pcmk__str_eq(rsc_id, rsc, pcmk__str_casei)) { continue; } intermediate = extract_operations(uname, rsc_id, lrm_rsc, active_filter); output = g_list_concat(output, intermediate); } } } } } return output; } diff --git a/python/pacemaker/_cts/patterns.py b/python/pacemaker/_cts/patterns.py index 03a405c708..4950e95686 100644 --- a/python/pacemaker/_cts/patterns.py +++ b/python/pacemaker/_cts/patterns.py @@ -1,396 +1,397 @@ """Pattern-holding classes for Pacemaker's Cluster Test Suite (CTS).""" __all__ = ["PatternSelector"] __copyright__ = "Copyright 2008-2024 the Pacemaker project contributors" __license__ = "GNU General Public License version 2 or later (GPLv2+)" import argparse from pacemaker.buildoptions import BuildOptions class BasePatterns: """ The base class for holding a stack-specific set of command and log file/stdout patterns. Stack-specific classes need to be built on top of this one. """ def __init__(self): """Create a new BasePatterns instance which holds a very minimal set of basic patterns.""" self._bad_news = [] self._components = {} self._name = "crm-base" self._ignore = [ "avoid confusing Valgrind", # Logging bug in some versions of libvirtd r"libvirtd.*: internal error: Failed to parse PCI config address", # pcs can log this when node is fenced, but fencing is OK in some # tests (and we will catch it in pacemaker logs when not OK) r"pcs.daemon:No response from: .* request: get_configs, error:", # This is overbroad, but there's no way to say that only certain # transition errors are acceptable. We have to rely on causes of a # transition error logging their own error message, which should # always be the case. r"pacemaker-schedulerd.* Calculated transition .*/pe-error", ] self._commands = { "StatusCmd": "crmadmin -t 60 -S %s 2>/dev/null", "CibQuery": "cibadmin -Ql", "CibAddXml": "cibadmin --modify -c --xml-text %s", "CibDelXpath": "cibadmin --delete --xpath %s", "RscRunning": BuildOptions.DAEMON_DIR + "/cts-exec-helper -R -r %s", "CIBfile": "%s:" + BuildOptions.CIB_DIR + "/cib.xml", "TmpDir": "/tmp", "BreakCommCmd": "iptables -A INPUT -s %s -j DROP >/dev/null 2>&1", "FixCommCmd": "iptables -D INPUT -s %s -j DROP >/dev/null 2>&1", "MaintenanceModeOn": "cibadmin --modify -c --xml-text ''", "MaintenanceModeOff": "cibadmin --delete --xpath \"//nvpair[@name='maintenance-mode']\"", "StandbyCmd": "crm_attribute -Vq -U %s -n standby -l forever -v %s 2>/dev/null", "StandbyQueryCmd": "crm_attribute -qG -U %s -n standby -l forever -d off 2>/dev/null", } self._search = { "Pat:DC_IDLE": r"pacemaker-controld.*State transition.*-> S_IDLE", # This won't work if we have multiple partitions "Pat:Local_started": r"%s\W.*controller successfully started", "Pat:NonDC_started": r"%s\W.*State transition.*-> S_NOT_DC", "Pat:DC_started": r"%s\W.*State transition.*-> S_IDLE", "Pat:We_stopped": r"%s\W.*OVERRIDE THIS PATTERN", "Pat:They_stopped": r"%s\W.*LOST:.* %s ", "Pat:They_dead": r"node %s.*: is dead", "Pat:They_up": r"%s %s\W.*OVERRIDE THIS PATTERN", "Pat:TransitionComplete": "Transition status: Complete: complete", "Pat:Fencing_start": r"Requesting peer fencing .* targeting %s", "Pat:Fencing_ok": r"pacemaker-fenced.*:\s*Operation .* targeting %s by .* for .*@.*: OK", "Pat:Fencing_recover": r"pacemaker-schedulerd.*: Recover\s+%s", "Pat:Fencing_active": r"stonith resource .* is active on 2 nodes (attempting recovery)", "Pat:Fencing_probe": r"pacemaker-controld.* Result of probe operation for %s on .*: Error", "Pat:RscOpOK": r"pacemaker-controld.*:\s+Result of %s operation for %s.*: (0 \()?ok", "Pat:RscOpFail": r"pacemaker-schedulerd.*:.*Unexpected result .* recorded for %s of %s ", "Pat:CloneOpFail": r"pacemaker-schedulerd.*:.*Unexpected result .* recorded for %s of (%s|%s) ", "Pat:RscRemoteOpOK": r"pacemaker-controld.*:\s+Result of %s operation for %s on %s: (0 \()?ok", "Pat:NodeFenced": r"pacemaker-controld.*:\s* Peer %s was terminated \(.*\) by .* on behalf of .*: OK", } def get_component(self, key): """ Return the patterns for a single component as a list, given by key. This is typically the name of some subprogram (pacemaker-based, pacemaker-fenced, etc.) or various special purpose keys. If key is unknown, return an empty list. """ if key in self._components: return self._components[key] print("Unknown component '%s' for %s" % (key, self._name)) return [] def get_patterns(self, key): """ Return various patterns supported by this object, given by key. Depending on the key, this could either be a list or a hash. If key is unknown, return None. """ if key == "BadNews": return self._bad_news if key == "BadNewsIgnore": return self._ignore if key == "Commands": return self._commands if key == "Search": return self._search if key == "Components": return self._components print("Unknown pattern '%s' for %s" % (key, self._name)) return None def __getitem__(self, key): if key == "Name": return self._name if key in self._commands: return self._commands[key] if key in self._search: return self._search[key] print("Unknown template '%s' for %s" % (key, self._name)) return None class Corosync2Patterns(BasePatterns): """Patterns for Corosync version 2 cluster manager class.""" def __init__(self): BasePatterns.__init__(self) self._name = "crm-corosync" self._commands.update({ "StartCmd": "service corosync start && service pacemaker start", "StopCmd": "service pacemaker stop; [ ! -e /usr/sbin/pacemaker-remoted ] || service pacemaker_remote stop; service corosync stop", "EpochCmd": "crm_node -e", "QuorumCmd": "crm_node -q", "PartitionCmd": "crm_node -p", }) self._search.update({ # Close enough ... "Corosync Cluster Engine exiting normally" isn't # printed reliably. "Pat:We_stopped": r"%s\W.*Unloading all Corosync service engines", "Pat:They_stopped": r"%s\W.*pacemaker-controld.*Node %s(\[|\s).*state is now lost", "Pat:They_dead": r"pacemaker-controld.*Node %s(\[|\s).*state is now lost", "Pat:They_up": r"\W%s\W.*pacemaker-controld.*Node %s state is now member", "Pat:ChildExit": r"\[[0-9]+\] exited with status [0-9]+ \(", # "with signal 9" == pcmk_child_exit(), "$" == check_active_before_startup_processes() "Pat:ChildKilled": r"%s\W.*pacemakerd.*%s\[[0-9]+\] terminated( with signal 9|$)", - "Pat:ChildRespawn": r"%s\W.*pacemakerd.*Respawning %s subdaemon after unexpected exit", + "Pat:ChildRespawn": r"%s\W.*pacemakerd.*Respawning subdaemon %s after unexpected exit", "Pat:InfraUp": r"%s\W.*corosync.*Initializing transport", "Pat:PacemakerUp": r"%s\W.*pacemakerd.*Starting Pacemaker", }) self._ignore += [ r"crm_mon:", r"crmadmin:", r"update_trace_data", r"async_notify:.*strange, client not found", r"Parse error: Ignoring unknown option .*nodename", r"error.*: Operation 'reboot' .* using FencingFail returned ", r"getinfo response error: 1$", r"sbd.* error: inquisitor_child: DEBUG MODE IS ACTIVE", r"sbd.* pcmk:\s*error:.*Connection to cib_ro.* (failed|closed)", ] self._bad_news = [ r"[^(]error:", r"crit:", r"ERROR:", r"CRIT:", r"Shutting down...NOW", r"Timer I_TERMINATE just popped", r"input=I_ERROR", r"input=I_FAIL", r"input=I_INTEGRATED cause=C_TIMER_POPPED", r"input=I_FINALIZED cause=C_TIMER_POPPED", r"input=I_ERROR", r"(pacemakerd|pacemaker-execd|pacemaker-controld):.*, exiting", r"schedulerd.*Attempting recovery of resource", r"is taking more than 2x its timeout", r"Confirm not received from", r"Welcome reply not received from", r"Attempting to schedule .* after a stop", r"Resource .* was active at shutdown", r"duplicate entries for call_id", r"Search terminated:", r":global_timer_callback", r"Faking parameter digest creation", r"Parameters to .* action changed:", r"Parameters to .* changed", - r"pacemakerd.*\[[0-9]+\] terminated( with signal| as IPC server|$)", + r"pacemakerd.*\[[0-9]+\] terminated( with signal|$)", + r"pacemakerd.*\[[0-9]+\] .* will now be killed", r"pacemaker-schedulerd.*Recover\s+.*\(.* -\> .*\)", r"rsyslogd.* lost .* due to rate-limiting", r"Peer is not part of our cluster", r"We appear to be in an election loop", r"Unknown node -> we will not deliver message", r"(Blackbox dump requested|Problem detected)", r"pacemakerd.*Could not connect to Cluster Configuration Database API", r"Receiving messages from a node we think is dead", r"share the same cluster nodeid", r"share the same name", r"pacemaker-controld:.*Transition failed: terminated", r"Local CIB .* differs from .*:", r"warn.*:\s*Continuing but .* will NOT be used", r"warn.*:\s*Cluster configuration file .* is corrupt", r"Election storm", r"stalled the FSA with pending inputs", ] self._components["common-ignore"] = [ r"Pending action:", r"resource( was|s were) active at shutdown", r"pending LRM operations at shutdown", r"Lost connection to the CIB manager", r"pacemaker-controld.*:\s*Action A_RECOVER .* not supported", r"pacemaker-controld.*:\s*Exiting now due to errors", r".*:\s*Requesting fencing \([^)]+\) targeting node ", r"(Blackbox dump requested|Problem detected)", ] self._components["corosync-ignore"] = [ r"Could not connect to Corosync CFG: CS_ERR_LIBRARY", r"error:.*Connection to the CPG API failed: Library error", r"\[[0-9]+\] exited with status [0-9]+ \(", r"\[[0-9]+\] terminated with signal 15", r"pacemaker-based.*error:.*Corosync connection lost", r"pacemaker-fenced.*error:.*Corosync connection terminated", r"pacemaker-controld.*State transition .* S_RECOVERY", r"pacemaker-controld.*error:.*Input (I_ERROR|I_TERMINATE ) .*received in state", r"pacemaker-controld.*error:.*Could not recover from internal error", r"error:.*Connection to cib_(shm|rw).* (failed|closed)", r"error:.*cib_(shm|rw) IPC provider disconnected while waiting", r"error:.*Connection to (fencer|stonith-ng).* (closed|failed|lost)", r"error: Lost fencer connection", ] self._components["corosync"] = [ # We expect each daemon to lose its cluster connection. # However, if the CIB manager loses its connection first, # it's possible for another daemon to lose that connection and # exit before losing the cluster connection. r"pacemakerd.*:\s*warning:.*Lost connection to cluster layer", r"pacemaker-attrd.*:\s*(crit|error):.*Lost connection to (Corosync process group|the CIB manager)", r"pacemaker-based.*:\s*(crit|error):.*Lost connection to cluster layer", r"pacemaker-controld.*:\s*(crit|error):.*Lost connection to (cluster layer|the CIB manager)", r"pacemaker-fenced.*:\s*(crit|error):.*Lost connection to (cluster layer|the CIB manager)", r"schedulerd.*Scheduling node .* for fencing", r"pacemaker-controld.*:\s*Peer .* was terminated \(.*\) by .* on behalf of .*:\s*OK", ] self._components["pacemaker-based"] = [ r"pacemakerd.* pacemaker-attrd\[[0-9]+\] exited with status 102", r"pacemakerd.* pacemaker-controld\[[0-9]+\] exited with status 1", - r"pacemakerd.* Respawning pacemaker-attrd subdaemon after unexpected exit", - r"pacemakerd.* Respawning pacemaker-based subdaemon after unexpected exit", - r"pacemakerd.* Respawning pacemaker-controld subdaemon after unexpected exit", - r"pacemakerd.* Respawning pacemaker-fenced subdaemon after unexpected exit", + r"pacemakerd.* Respawning subdaemon pacemaker-attrd after unexpected exit", + r"pacemakerd.* Respawning subdaemon pacemaker-based after unexpected exit", + r"pacemakerd.* Respawning subdaemon pacemaker-controld after unexpected exit", + r"pacemakerd.* Respawning subdaemon pacemaker-fenced after unexpected exit", r"pacemaker-.* Connection to cib_.* (failed|closed)", r"pacemaker-attrd.*:.*Lost connection to the CIB manager", r"pacemaker-controld.*:.*Lost connection to the CIB manager", r"pacemaker-controld.*I_ERROR.*handle_cib_disconnect", r"pacemaker-controld.* State transition .* S_RECOVERY", r"pacemaker-controld.*: Input I_TERMINATE .*from do_recover", r"pacemaker-controld.*Could not recover from internal error", ] self._components["pacemaker-based-ignore"] = [ r"pacemaker-execd.*Connection to (fencer|stonith-ng).* (closed|failed|lost)", r"pacemaker-controld.*:\s+Result of .* operation for Fencing.*Error \(Lost connection to fencer\)", r"pacemaker-controld.*:Could not connect to attrd: Connection refused", ] self._components["pacemaker-execd"] = [ r"pacemaker-controld.*Lost connection to local executor", r"pacemaker-controld.*I_ERROR.*lrm_connection_destroy", r"pacemaker-controld.*State transition .* S_RECOVERY", r"pacemaker-controld.*: Input I_TERMINATE .*from do_recover", r"pacemaker-controld.*Could not recover from internal error", r"pacemakerd.*pacemaker-controld\[[0-9]+\] exited with status 1", - r"pacemakerd.* Respawning pacemaker-execd subdaemon after unexpected exit", - r"pacemakerd.* Respawning pacemaker-controld subdaemon after unexpected exit", + r"pacemakerd.* Respawning subdaemon pacemaker-execd after unexpected exit", + r"pacemakerd.* Respawning subdaemon pacemaker-controld after unexpected exit", ] self._components["pacemaker-execd-ignore"] = [ r"pacemaker-(attrd|controld).*Connection to lrmd.* (failed|closed)", r"pacemaker-(attrd|controld).*Could not execute alert", ] self._components["pacemaker-controld"] = [ r"State transition .* -> S_IDLE", ] self._components["pacemaker-controld-ignore"] = [] self._components["pacemaker-attrd"] = [] self._components["pacemaker-attrd-ignore"] = [] self._components["pacemaker-schedulerd"] = [ r"State transition .* S_RECOVERY", - r"pacemakerd.* Respawning pacemaker-controld subdaemon after unexpected exit", + r"pacemakerd.* Respawning subdaemon pacemaker-controld after unexpected exit", r"pacemaker-controld\[[0-9]+\] exited with status 1 \(", r"pacemaker-controld.*Lost connection to the scheduler", r"pacemaker-controld.*I_ERROR.*save_cib_contents", r"pacemaker-controld.*: Input I_TERMINATE .*from do_recover", r"pacemaker-controld.*Could not recover from internal error", ] self._components["pacemaker-schedulerd-ignore"] = [ r"Connection to pengine.* (failed|closed)", ] self._components["pacemaker-fenced"] = [ r"error:.*Connection to (fencer|stonith-ng).* (closed|failed|lost)", r"Lost fencer connection", r"pacemaker-controld.*Fencer successfully connected", ] self._components["pacemaker-fenced-ignore"] = [ r"(error|warning):.*Connection to (fencer|stonith-ng).* (closed|failed|lost)", r"error:.*Lost fencer connection", r"error:.*Fencer connection failed \(will retry\)", r"pacemaker-controld.*:\s+Result of .* operation for Fencing.*Error \(Lost connection to fencer\)", ] self._components["pacemaker-fenced-ignore"].extend(self._components["common-ignore"]) patternVariants = { "crm-base": BasePatterns, "crm-corosync": Corosync2Patterns } class PatternSelector: """Choose from among several Pattern objects and return the information from that object.""" def __init__(self, name="crm-corosync"): """ Create a new PatternSelector object. Instantiate whatever class is given by name. Defaults to Corosync2Patterns for "crm-corosync" or None. While other objects could be supported in the future, only this and the base object are supported at this time. """ self._name = name # If no name was given, use the default. Otherwise, look up the appropriate # class in patternVariants, instantiate it, and use that. if not name: self._base = Corosync2Patterns() else: self._base = patternVariants[name]() def get_patterns(self, kind): """Call get_patterns on the previously instantiated pattern object.""" return self._base.get_patterns(kind) def get_template(self, key): """ Return a single pattern from the previously instantiated pattern object. If no pattern exists for the given key, return None. """ return self._base[key] def get_component(self, kind): """Call get_component on the previously instantiated pattern object.""" return self._base.get_component(kind) def __getitem__(self, key): """Return the pattern for the given key, or None if it does not exist.""" return self.get_template(key) # PYTHONPATH=python python python/pacemaker/_cts/patterns.py -k crm-corosync -t StartCmd if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("-k", "--kind", metavar="KIND") parser.add_argument("-t", "--template", metavar="TEMPLATE") args = parser.parse_args() print(PatternSelector(args.kind)[args.template]) diff --git a/tools/attrd_updater.c b/tools/attrd_updater.c index 5316101609..bee452cfd4 100644 --- a/tools/attrd_updater.c +++ b/tools/attrd_updater.c @@ -1,524 +1,524 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define SUMMARY "query and update Pacemaker node attributes" static pcmk__supported_format_t formats[] = { PCMK__SUPPORTED_FORMAT_NONE, PCMK__SUPPORTED_FORMAT_TEXT, PCMK__SUPPORTED_FORMAT_XML, { NULL, NULL, NULL } }; GError *error = NULL; bool printed_values = false; struct { char command; gchar *attr_dampen; gchar *attr_name; gchar *attr_pattern; gchar *attr_node; gchar *attr_set; char *attr_value; uint32_t attr_options; gboolean query_all; gboolean quiet; } options = { .attr_options = pcmk__node_attr_none, .command = 'Q', }; static gboolean command_cb (const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { pcmk__str_update(&options.attr_value, optarg); if (pcmk__str_any_of(option_name, "--update-both", "-B", NULL)) { options.command = 'B'; } else if (pcmk__str_any_of(option_name, "--delete", "-D", NULL)) { options.command = 'D'; } else if (pcmk__str_any_of(option_name, "--query", "-Q", NULL)) { options.command = 'Q'; } else if (pcmk__str_any_of(option_name, "--refresh", "-R", NULL)) { options.command = 'R'; } else if (pcmk__str_any_of(option_name, "--update", "-U", "-v", NULL)) { options.command = 'U'; } else if (pcmk__str_any_of(option_name, "--update-delay", "-Y", NULL)) { options.command = 'Y'; } return TRUE; } static gboolean private_cb (const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { pcmk__set_node_attr_flags(options.attr_options, pcmk__node_attr_private); return TRUE; } static gboolean section_cb (const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { if (pcmk__str_any_of(optarg, PCMK_XE_NODES, "forever", NULL)) { pcmk__set_node_attr_flags(options.attr_options, pcmk__node_attr_perm); } else if (pcmk__str_any_of(optarg, PCMK_XE_STATUS, "reboot", NULL)) { pcmk__clear_node_attr_flags(options.attr_options, pcmk__node_attr_perm); } else { g_set_error(err, PCMK__EXITC_ERROR, CRM_EX_USAGE, "Unknown value for --lifetime: %s", optarg); return FALSE; } return TRUE; } static gboolean attr_set_type_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { if (pcmk__str_any_of(option_name, "-z", "--utilization", NULL)) { pcmk__set_node_attr_flags(options.attr_options, pcmk__node_attr_utilization); } return TRUE; } static gboolean wait_cb (const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { if (pcmk__str_eq(optarg, "no", pcmk__str_none)) { pcmk__clear_node_attr_flags(options.attr_options, pcmk__node_attr_sync_local | pcmk__node_attr_sync_cluster); return TRUE; } else if (pcmk__str_eq(optarg, PCMK__VALUE_LOCAL, pcmk__str_none)) { pcmk__clear_node_attr_flags(options.attr_options, pcmk__node_attr_sync_local | pcmk__node_attr_sync_cluster); pcmk__set_node_attr_flags(options.attr_options, pcmk__node_attr_sync_local); return TRUE; } else if (pcmk__str_eq(optarg, PCMK__VALUE_CLUSTER, pcmk__str_none)) { pcmk__clear_node_attr_flags(options.attr_options, pcmk__node_attr_sync_local | pcmk__node_attr_sync_cluster); pcmk__set_node_attr_flags(options.attr_options, pcmk__node_attr_sync_cluster); return TRUE; } else { g_set_error(err, PCMK__EXITC_ERROR, CRM_EX_USAGE, "--wait= must be one of 'no', 'local', 'cluster'"); return FALSE; } } #define INDENT " " static GOptionEntry required_entries[] = { { "name", 'n', 0, G_OPTION_ARG_STRING, &options.attr_name, "The attribute's name", "NAME" }, { "pattern", 'P', 0, G_OPTION_ARG_STRING, &options.attr_pattern, "Operate on all attributes matching this pattern\n" INDENT "(with -B, -D, -U, or -Y)", "PATTERN" }, { NULL } }; static GOptionEntry command_entries[] = { { "update", 'U', 0, G_OPTION_ARG_CALLBACK, command_cb, "Update attribute's value. Required: -n/--name or -P/--pattern.\n" INDENT "Optional: -d/--delay (if specified, the delay will be used if\n" INDENT "the attribute needs to be created, but ignored if the\n" INDENT "attribute already exists), -s/--set, -p/--private, -W/--wait,\n" INDENT "-z/--utilization.", "VALUE" }, { "update-both", 'B', 0, G_OPTION_ARG_CALLBACK, command_cb, - "Update attribute's value and time to wait (dampening) in\n" - INDENT "pacemaker-attrd. If this causes the value or dampening to change,\n" + "Update attribute's value and time to wait (dampening) in the\n" + INDENT "attribute manager. If this changes the value or dampening,\n" INDENT "the attribute will also be written to the cluster configuration,\n" INDENT "so be aware that repeatedly changing the dampening reduces its\n" INDENT "effectiveness.\n" INDENT "Requires -d/--delay", "VALUE" }, { "update-delay", 'Y', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb, - "Update attribute's dampening in pacemaker-attrd. If this causes\n" - INDENT "the dampening to change, the attribute will also be written\n" + "Update attribute's dampening in the attribute manager. If this\n" + INDENT "changes the dampening, the attribute will also be written\n" INDENT "to the cluster configuration, so be aware that repeatedly\n" INDENT "changing the dampening reduces its effectiveness.\n" INDENT "Requires -d/--delay", NULL }, { "query", 'Q', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb, - "Query the attribute's value from pacemaker-attrd", + "Query the attribute's value from the attribute manager", NULL }, { "delete", 'D', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb, - "Unset attribute from pacemaker-attrd. At the moment, there is no way\n" - INDENT "to remove an attribute. This option will instead set its value\n" - INDENT "to the empty string.", + "Unset attribute from the attribute manager. At the moment, there is no\n" + INDENT "way to remove an attribute. This option will instead set its\n" + INDENT "value to the empty string.", NULL }, { "refresh", 'R', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb, - "(Advanced) Force the pacemaker-attrd daemon to resend all current\n" + "(Advanced) Force the attribute manager to resend all current\n" INDENT "values to the CIB", NULL }, { NULL } }; static GOptionEntry addl_entries[] = { { "delay", 'd', 0, G_OPTION_ARG_STRING, &options.attr_dampen, "The time to wait (dampening) in seconds for further changes\n" INDENT "before sending to the CIB", "SECONDS" }, { "set", 's', 0, G_OPTION_ARG_STRING, &options.attr_set, "(Advanced) The attribute set in which to place the value", "SET" }, { "node", 'N', 0, G_OPTION_ARG_STRING, &options.attr_node, "Set the attribute for the named node (instead of the local one)", "NODE" }, { "all", 'A', 0, G_OPTION_ARG_NONE, &options.query_all, "Show values of the attribute for all nodes (query only)", NULL }, { "lifetime", 'l', 0, G_OPTION_ARG_CALLBACK, section_cb, "(Not yet implemented) Lifetime of the node attribute (silently\n" INDENT "ignored by cluster)", "SECTION" }, { "private", 'p', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, private_cb, "If this creates a new attribute, never write the attribute to CIB", NULL }, { "wait", 'W', 0, G_OPTION_ARG_CALLBACK, wait_cb, "Wait for some event to occur before returning. Values are 'no' (wait\n" INDENT "only for the attribute daemon to acknowledge the request),\n" INDENT "'local' (wait until the change has propagated to where a local\n" INDENT "query will return the request value, or the value set by a\n" INDENT "later request), or 'cluster' (wait until the change has propagated\n" INDENT "to where a query anywhere on the cluster will return the requested\n" INDENT "value, or the value set by a later request). Default is 'no'.", "UNTIL" }, { "utilization", 'z', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, attr_set_type_cb, "When creating a new attribute, create it as a node utilization attribute\n" INDENT "instead of an instance attribute. If the attribute already exists,\n" INDENT "its existing type (utilization vs. instance) will be used regardless.\n" INDENT "(with -B, -U, -Y)", NULL }, { NULL } }; static GOptionEntry deprecated_entries[] = { { "quiet", 'q', G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_NONE, &options.quiet, NULL, NULL }, { "update", 'v', G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_CALLBACK, command_cb, NULL, NULL }, { "section", 'S', G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_CALLBACK, section_cb, NULL, NULL }, { NULL } }; static int send_attrd_query(pcmk__output_t *out, const char *attr_name, const char *attr_node, gboolean query_all); static int send_attrd_update(char command, const char *attr_node, const char *attr_name, const char *attr_value, const char *attr_set, const char *attr_dampen, uint32_t attr_options); static bool pattern_used_correctly(void) { /* --pattern can only be used with: * -B (update-both), -D (delete), -U (update), or -Y (update-delay) */ return options.command == 'B' || options.command == 'D' || options.command == 'U' || options.command == 'Y'; } static GOptionContext * build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) { GOptionContext *context = NULL; context = pcmk__build_arg_context(args, "text (default), xml", group, NULL); pcmk__add_arg_group(context, "required", "Required Arguments:", "Show required arguments", required_entries); pcmk__add_arg_group(context, "command", "Command:", "Show command options (mutually exclusive)", command_entries); pcmk__add_arg_group(context, "additional", "Additional Options:", "Show additional options", addl_entries); pcmk__add_arg_group(context, "deprecated", "Deprecated Options:", "Show deprecated options", deprecated_entries); return context; } int main(int argc, char **argv) { int rc = pcmk_rc_ok; crm_exit_t exit_code = CRM_EX_OK; pcmk__output_t *out = NULL; GOptionGroup *output_group = NULL; pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY); GOptionContext *context = build_arg_context(args, &output_group); gchar **processed_args = pcmk__cmdline_preproc(argv, "dlnsvBNUS"); pcmk__register_formats(output_group, formats); if (!g_option_context_parse_strv(context, &processed_args, &error)) { exit_code = CRM_EX_USAGE; goto done; } pcmk__cli_init_logging("attrd_updater", args->verbosity); rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv); if (rc != pcmk_rc_ok) { exit_code = CRM_EX_ERROR; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Error creating output format %s: %s", args->output_ty, pcmk_rc_str(rc)); goto done; } if (args->version) { out->version(out, false); goto done; } if (options.attr_pattern) { if (options.attr_name) { exit_code = CRM_EX_USAGE; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Error: --name and --pattern cannot be used at the same time"); goto done; } if (!pattern_used_correctly()) { exit_code = CRM_EX_USAGE; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Error: pattern can only be used with delete or update"); goto done; } g_free(options.attr_name); options.attr_name = options.attr_pattern; options.attr_options |= pcmk__node_attr_pattern; } if (options.command != 'R' && options.attr_name == NULL) { exit_code = CRM_EX_USAGE; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Command requires --name or --pattern argument"); goto done; } else if ((options.command == 'B'|| options.command == 'Y') && options.attr_dampen == NULL) { out->info(out, "Warning: '%c' command given without required --delay", options.command); } pcmk__register_lib_messages(out); if (options.command == 'Q') { int rc = send_attrd_query(out, options.attr_name, options.attr_node, options.query_all); exit_code = pcmk_rc2exitc(rc); } else { /* @TODO We don't know whether the specified node is a Pacemaker Remote * node or not, so we can't set pcmk__node_attr_remote when appropriate. - * However, it's not a big problem, because pacemaker-attrd will learn - * and remember a node's "remoteness". + * However, it's not a big problem, because the attribute manager will + * learn and remember a node's "remoteness". */ int rc = send_attrd_update(options.command, options.attr_node, options.attr_name, options.attr_value, options.attr_set, options.attr_dampen, options.attr_options); exit_code = pcmk_rc2exitc(rc); } done: g_strfreev(processed_args); pcmk__free_arg_context(context); g_free(options.attr_dampen); g_free(options.attr_name); g_free(options.attr_node); g_free(options.attr_set); free(options.attr_value); pcmk__output_and_clear_error(&error, out); if (out != NULL) { out->finish(out, exit_code, true, NULL); pcmk__output_free(out); } pcmk__unregister_formats(); crm_exit(exit_code); } /*! - * \brief Print the attribute values in a pacemaker-attrd XML query reply + * \brief Print the attribute values in an attribute manager XML query reply * * \param[in,out] out Output object * \param[in] reply List of attribute name/value pairs * * \return true if any values were printed */ static void print_attrd_values(pcmk__output_t *out, const GList *reply) { for (const GList *iter = reply; iter != NULL; iter = iter->next) { const pcmk__attrd_query_pair_t *pair = iter->data; out->message(out, "attribute", NULL, NULL, pair->name, pair->value, pair->node, false, false); printed_values = true; } } static void attrd_event_cb(pcmk_ipc_api_t *attrd_api, enum pcmk_ipc_event event_type, crm_exit_t status, void *event_data, void *user_data) { pcmk__output_t *out = (pcmk__output_t *) user_data; pcmk__attrd_api_reply_t *reply = event_data; if (event_type != pcmk_ipc_event_reply || status != CRM_EX_OK) { return; } /* Print the values from the reply. */ if (reply->reply_type == pcmk__attrd_reply_query) { print_attrd_values(out, reply->data.pairs); } } /*! - * \brief Submit a query to pacemaker-attrd and print reply + * \brief Submit a query to the attribute manager and print reply * * \param[in,out] out Output object * \param[in] attr_name Name of attribute to be affected by request * \param[in] attr_node Name of host to query for (or NULL for localhost) * \param[in] query_all If TRUE, ignore attr_node and query all nodes * * \return Standard Pacemaker return code */ static int send_attrd_query(pcmk__output_t *out, const char *attr_name, const char *attr_node, gboolean query_all) { uint32_t options = pcmk__node_attr_none; pcmk_ipc_api_t *attrd_api = NULL; int rc = pcmk_rc_ok; // Create attrd IPC object rc = pcmk_new_ipc_api(&attrd_api, pcmk_ipc_attrd); if (rc != pcmk_rc_ok) { g_set_error(&error, PCMK__RC_ERROR, rc, "Could not connect to attrd: %s", pcmk_rc_str(rc)); return ENOTCONN; } pcmk_register_ipc_callback(attrd_api, attrd_event_cb, out); // Connect to attrd (without main loop) rc = pcmk__connect_ipc(attrd_api, pcmk_ipc_dispatch_sync, 5); if (rc != pcmk_rc_ok) { g_set_error(&error, PCMK__RC_ERROR, rc, "Could not connect to %s: %s", pcmk_ipc_name(attrd_api, true), pcmk_rc_str(rc)); pcmk_free_ipc_api(attrd_api); return rc; } /* Decide which node(s) to query */ if (query_all == TRUE) { options |= pcmk__node_attr_query_all; } rc = pcmk__attrd_api_query(attrd_api, attr_node, attr_name, options); if (rc != pcmk_rc_ok) { g_set_error(&error, PCMK__RC_ERROR, rc, "Could not query value of %s: %s (%d)", attr_name, pcmk_rc_str(rc), rc); } else if (!printed_values) { rc = pcmk_rc_schema_validation; g_set_error(&error, PCMK__RC_ERROR, rc, "Could not query value of %s: attribute does not exist", attr_name); } pcmk_disconnect_ipc(attrd_api); pcmk_free_ipc_api(attrd_api); return rc; } static int send_attrd_update(char command, const char *attr_node, const char *attr_name, const char *attr_value, const char *attr_set, const char *attr_dampen, uint32_t attr_options) { int rc = pcmk_rc_ok; switch (command) { case 'B': rc = pcmk__attrd_api_update(NULL, attr_node, attr_name, attr_value, attr_dampen, attr_set, NULL, attr_options | pcmk__node_attr_value | pcmk__node_attr_delay); break; case 'D': rc = pcmk__attrd_api_delete(NULL, attr_node, attr_name, attr_options); break; case 'R': rc = pcmk__attrd_api_refresh(NULL, attr_node); break; case 'U': rc = pcmk__attrd_api_update(NULL, attr_node, attr_name, attr_value, attr_dampen, attr_set, NULL, attr_options | pcmk__node_attr_value); break; case 'Y': rc = pcmk__attrd_api_update(NULL, attr_node, attr_name, NULL, attr_dampen, attr_set, NULL, attr_options | pcmk__node_attr_delay); break; } if (rc != pcmk_rc_ok) { g_set_error(&error, PCMK__RC_ERROR, rc, "Could not update %s=%s: %s (%d)", attr_name, attr_value, pcmk_rc_str(rc), rc); } return rc; } diff --git a/tools/crm_attribute.c b/tools/crm_attribute.c index 7f45a2ef24..dbc4cd41d5 100644 --- a/tools/crm_attribute.c +++ b/tools/crm_attribute.c @@ -1,989 +1,989 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define SUMMARY "crm_attribute - query and update Pacemaker cluster options and node attributes" enum attr_cmd { attr_cmd_none, attr_cmd_delete, attr_cmd_list, attr_cmd_query, attr_cmd_update, }; GError *error = NULL; crm_exit_t exit_code = CRM_EX_OK; uint64_t cib_opts = cib_sync_call; static pcmk__supported_format_t formats[] = { PCMK__SUPPORTED_FORMAT_NONE, PCMK__SUPPORTED_FORMAT_TEXT, PCMK__SUPPORTED_FORMAT_XML, { NULL, NULL, NULL } }; struct { enum attr_cmd command; gchar *attr_default; gchar *attr_id; gchar *attr_name; uint32_t attr_options; gchar *attr_pattern; char *attr_value; char *dest_node; gchar *dest_uname; gboolean inhibit; gchar *set_name; char *set_type; gchar *type; char *opt_list; gboolean all; bool promotion_score; gboolean score_update; } options = { .command = attr_cmd_query, }; #define INDENT " " static gboolean list_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { options.command = attr_cmd_list; pcmk__str_update(&options.opt_list, optarg); return TRUE; } static gboolean delete_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { options.command = attr_cmd_delete; pcmk__str_update(&options.attr_value, NULL); return TRUE; } static gboolean attr_name_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { options.promotion_score = false; if (options.attr_name != NULL) { g_free(options.attr_name); } options.attr_name = g_strdup(optarg); return TRUE; } static gboolean promotion_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { char *score_name = NULL; options.promotion_score = true; if (options.attr_name) { g_free(options.attr_name); } score_name = pcmk_promotion_score_name(optarg); if (score_name != NULL) { options.attr_name = g_strdup(score_name); free(score_name); } else { options.attr_name = NULL; } return TRUE; } static gboolean update_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { options.command = attr_cmd_update; pcmk__str_update(&options.attr_value, optarg); return TRUE; } static gboolean utilization_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { if (options.type) { g_free(options.type); } options.type = g_strdup(PCMK_XE_NODES); pcmk__str_update(&options.set_type, PCMK_XE_UTILIZATION); return TRUE; } static gboolean value_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { options.command = attr_cmd_query; pcmk__str_update(&options.attr_value, NULL); return TRUE; } static gboolean wait_cb (const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { if (pcmk__str_eq(optarg, "no", pcmk__str_none)) { pcmk__clear_node_attr_flags(options.attr_options, pcmk__node_attr_sync_local | pcmk__node_attr_sync_cluster); return TRUE; } else if (pcmk__str_eq(optarg, PCMK__VALUE_LOCAL, pcmk__str_none)) { pcmk__clear_node_attr_flags(options.attr_options, pcmk__node_attr_sync_local | pcmk__node_attr_sync_cluster); pcmk__set_node_attr_flags(options.attr_options, pcmk__node_attr_sync_local); return TRUE; } else if (pcmk__str_eq(optarg, PCMK__VALUE_CLUSTER, pcmk__str_none)) { pcmk__clear_node_attr_flags(options.attr_options, pcmk__node_attr_sync_local | pcmk__node_attr_sync_cluster); pcmk__set_node_attr_flags(options.attr_options, pcmk__node_attr_sync_cluster); return TRUE; } else { g_set_error(err, PCMK__EXITC_ERROR, CRM_EX_USAGE, "--wait= must be one of 'no', 'local', 'cluster'"); return FALSE; } } static GOptionEntry selecting_entries[] = { { "all", 'a', G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE, &options.all, "With -L/--list-options, include advanced and deprecated options in the\n" INDENT "output. This is always treated as true when --output-as=xml is\n" INDENT "specified.", NULL, }, { "id", 'i', 0, G_OPTION_ARG_STRING, &options.attr_id, "(Advanced) Operate on instance of specified attribute with this\n" INDENT "XML ID", "XML_ID" }, { "name", 'n', G_OPTION_FLAG_NONE, G_OPTION_ARG_CALLBACK, attr_name_cb, "Operate on attribute or option with this name. For queries, this\n" INDENT "is optional, in which case all matching attributes will be\n" INDENT "returned.", "NAME" }, { "pattern", 'P', 0, G_OPTION_ARG_STRING, &options.attr_pattern, "Operate on all attributes matching this pattern\n" INDENT "(with -v, -D, or -G)", "PATTERN" }, { "promotion", 'p', G_OPTION_FLAG_OPTIONAL_ARG, G_OPTION_ARG_CALLBACK, promotion_cb, "Operate on node attribute used as promotion score for specified\n" INDENT "resource, or resource given in OCF_RESOURCE_INSTANCE environment\n" INDENT "variable if none is specified; this also defaults -l/--lifetime\n" INDENT "to reboot (normally invoked from an OCF resource agent)", "RESOURCE" }, { "set-name", 's', 0, G_OPTION_ARG_STRING, &options.set_name, "(Advanced) Operate on instance of specified attribute that is\n" INDENT "within set with this XML ID", "NAME" }, { NULL } }; static GOptionEntry command_entries[] = { { "list-options", 'L', G_OPTION_FLAG_NONE, G_OPTION_ARG_CALLBACK, list_cb, "List all available options of the given type.\n" INDENT "Allowed values: " PCMK__VALUE_CLUSTER, "TYPE" }, { "delete", 'D', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, delete_cb, "Delete the attribute/option (with -n or -P)", NULL }, { "query", 'G', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, value_cb, "Query the current value of the attribute/option.\n" INDENT "See also: -n, -P", NULL }, { "update", 'v', 0, G_OPTION_ARG_CALLBACK, update_cb, "Update the value of the attribute/option (with -n or -P)", "VALUE" }, { NULL } }; static GOptionEntry addl_entries[] = { { "default", 'd', 0, G_OPTION_ARG_STRING, &options.attr_default, "(Advanced) Default value to display if none is found in configuration", "VALUE" }, { "lifetime", 'l', 0, G_OPTION_ARG_STRING, &options.type, "Lifetime of the node attribute.\n" INDENT "Valid values: reboot, forever", "LIFETIME" }, { "node", 'N', 0, G_OPTION_ARG_STRING, &options.dest_uname, "Set a node attribute for named node (instead of a cluster option).\n" INDENT "See also: -l", "NODE" }, { "type", 't', 0, G_OPTION_ARG_STRING, &options.type, "Which part of the configuration to update/delete/query the option in.\n" INDENT "Valid values: crm_config, rsc_defaults, op_defaults, tickets", "SECTION" }, { "score", 0, G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE, &options.score_update, "Treat new attribute values as atomic score updates where possible\n" INDENT "(with --update/-v, when running against a CIB file or updating\n" INDENT "an attribute outside the " PCMK_XE_STATUS " section; enabled\n" INDENT "by default if --promotion/-p is specified)\n\n" INDENT "This currently happens by default and cannot be disabled, but\n" INDENT "this default behavior is deprecated and will be removed in a\n" INDENT "future release (exception: this will remain the default with\n" INDENT "--promotion/-p). Set this flag if this behavior is desired.\n\n" INDENT "This option takes effect when updating XML attributes. For an\n" INDENT "attribute named \"name\", if the new value is \"name++\" or\n" INDENT "\"name+=X\" for some score X, the new value is set as follows:\n" INDENT " * If attribute \"name\" is not already set to some value in\n" INDENT " the element being updated, the new value is set as a literal\n" INDENT " string.\n" INDENT " * If the new value is \"name++\", then the attribute is set to\n" INDENT " its existing value (parsed as a score) plus 1.\n" INDENT " * If the new value is \"name+=X\" for some score X, then the\n" INDENT " attribute is set to its existing value plus X, where the\n" INDENT " existing value and X are parsed and added as scores.\n\n" INDENT "Scores are integer values capped at INFINITY and -INFINITY.\n" INDENT "Refer to Pacemaker Explained and to the char2score() function\n" INDENT "for more details on scores, including how they're parsed and\n" INDENT "added.", NULL }, { "wait", 'W', 0, G_OPTION_ARG_CALLBACK, wait_cb, "Wait for some event to occur before returning. Values are 'no' (wait\n" INDENT "only for the attribute daemon to acknowledge the request),\n" INDENT "'local' (wait until the change has propagated to where a local\n" INDENT "query will return the request value, or the value set by a\n" INDENT "later request), or 'cluster' (wait until the change has propagated\n" INDENT "to where a query anywhere on the cluster will return the requested\n" INDENT "value, or the value set by a later request). Default is 'no'.\n" INDENT "(with -N, and one of -D or -u)", "UNTIL" }, { "utilization", 'z', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, utilization_cb, "Set an utilization attribute for the node.", NULL }, { "inhibit-policy-engine", '!', G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_NONE, &options.inhibit, NULL, NULL }, { NULL } }; static GOptionEntry deprecated_entries[] = { { "attr-id", 0, G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_STRING, &options.attr_id, NULL, NULL }, { "attr-name", 0, G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_CALLBACK, attr_name_cb, NULL, NULL }, { "attr-value", 0, G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_CALLBACK, update_cb, NULL, NULL }, { "delete-attr", 0, G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_CALLBACK, delete_cb, NULL, NULL }, { "get-value", 0, G_OPTION_FLAG_HIDDEN|G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, value_cb, NULL, NULL }, { "node-uname", 'U', G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_STRING, &options.dest_uname, NULL, NULL }, { NULL } }; static void get_node_name_from_local(void) { struct utsname hostinfo; g_free(options.dest_uname); if (uname(&hostinfo) == 0) { options.dest_uname = g_strdup(hostinfo.nodename); } else { options.dest_uname = NULL; } } static int send_attrd_update(enum attr_cmd command, const char *attr_node, const char *attr_name, const char *attr_value, const char *attr_set, const char *attr_dampen, uint32_t attr_options) { int rc = pcmk_rc_ok; uint32_t opts = attr_options; switch (command) { case attr_cmd_delete: rc = pcmk__attrd_api_delete(NULL, attr_node, attr_name, opts); break; case attr_cmd_update: rc = pcmk__attrd_api_update(NULL, attr_node, attr_name, attr_value, NULL, attr_set, NULL, opts | pcmk__node_attr_value); break; default: break; } if (rc != pcmk_rc_ok) { g_set_error(&error, PCMK__RC_ERROR, rc, "Could not update %s=%s: %s (%d)", attr_name, attr_value, pcmk_rc_str(rc), rc); } return rc; } struct delete_data_s { pcmk__output_t *out; cib_t *cib; }; static int delete_attr_on_node(xmlNode *child, void *userdata) { struct delete_data_s *dd = (struct delete_data_s *) userdata; const char *attr_name = crm_element_value(child, PCMK_XA_NAME); int rc = pcmk_rc_ok; if (!pcmk__str_eq(attr_name, options.attr_pattern, pcmk__str_regex)) { return pcmk_rc_ok; } rc = cib__delete_node_attr(dd->out, dd->cib, cib_opts, options.type, options.dest_node, options.set_type, options.set_name, options.attr_id, attr_name, options.attr_value, NULL); if (rc == ENXIO) { rc = pcmk_rc_ok; } return rc; } static void command_list(pcmk__output_t *out) { if (pcmk__str_eq(options.opt_list, PCMK__VALUE_CLUSTER, pcmk__str_none)) { exit_code = pcmk_rc2exitc(pcmk__list_cluster_options(out, options.all)); } else { // @TODO Improve usage messages to reduce duplication exit_code = CRM_EX_USAGE; g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE, "Invalid --list-options value '%s'. Allowed values: " PCMK__VALUE_CLUSTER, pcmk__s(options.opt_list, "(BUG: none)")); } } static int command_delete(pcmk__output_t *out, cib_t *cib) { int rc = pcmk_rc_ok; xmlNode *result = NULL; bool use_pattern = options.attr_pattern != NULL; /* See the comment in command_query regarding xpath and regular expressions. */ if (use_pattern) { struct delete_data_s dd = { out, cib }; rc = cib__get_node_attrs(out, cib, options.type, options.dest_node, options.set_type, options.set_name, NULL, NULL, NULL, &result); if (rc != pcmk_rc_ok) { goto done_deleting; } rc = pcmk__xe_foreach_child(result, NULL, delete_attr_on_node, &dd); } else { rc = cib__delete_node_attr(out, cib, cib_opts, options.type, options.dest_node, options.set_type, options.set_name, options.attr_id, options.attr_name, options.attr_value, NULL); } done_deleting: pcmk__xml_free(result); if (rc == ENXIO) { /* Nothing to delete... * which means it's not there... * which is what the admin wanted */ rc = pcmk_rc_ok; } return rc; } struct update_data_s { pcmk__output_t *out; cib_t *cib; int is_remote_node; }; static int update_attr_on_node(xmlNode *child, void *userdata) { struct update_data_s *ud = (struct update_data_s *) userdata; const char *attr_name = crm_element_value(child, PCMK_XA_NAME); if (!pcmk__str_eq(attr_name, options.attr_pattern, pcmk__str_regex)) { return pcmk_rc_ok; } return cib__update_node_attr(ud->out, ud->cib, cib_opts, options.type, options.dest_node, options.set_type, options.set_name, options.attr_id, attr_name, options.attr_value, NULL, ud->is_remote_node? PCMK_VALUE_REMOTE : NULL); } static int command_update(pcmk__output_t *out, cib_t *cib, int is_remote_node) { int rc = pcmk_rc_ok; xmlNode *result = NULL; bool use_pattern = options.attr_pattern != NULL; /* @COMPAT When we drop default support for expansion in crm_attribute, * guard with `if (options.score_update)` */ cib__set_call_options(cib_opts, crm_system_name, cib_score_update); /* See the comment in command_query regarding xpath and regular expressions. */ if (use_pattern) { struct update_data_s ud = { out, cib, is_remote_node }; rc = cib__get_node_attrs(out, cib, options.type, options.dest_node, options.set_type, options.set_name, NULL, NULL, NULL, &result); if (rc != pcmk_rc_ok) { goto done_updating; } rc = pcmk__xe_foreach_child(result, NULL, update_attr_on_node, &ud); } else { rc = cib__update_node_attr(out, cib, cib_opts, options.type, options.dest_node, options.set_type, options.set_name, options.attr_id, options.attr_name, options.attr_value, NULL, is_remote_node? PCMK_VALUE_REMOTE : NULL); } done_updating: pcmk__xml_free(result); return rc; } struct output_data_s { pcmk__output_t *out; bool use_pattern; bool did_output; }; static int output_one_attribute(xmlNode *node, void *userdata) { struct output_data_s *od = (struct output_data_s *) userdata; const char *name = crm_element_value(node, PCMK_XA_NAME); const char *value = crm_element_value(node, PCMK_XA_VALUE); const char *type = options.type; const char *attr_id = options.attr_id; if (od->use_pattern && !pcmk__str_eq(name, options.attr_pattern, pcmk__str_regex)) { return pcmk_rc_ok; } od->out->message(od->out, "attribute", type, attr_id, name, value, NULL, od->out->quiet, true); od->did_output = true; crm_info("Read %s='%s' %s%s", pcmk__s(name, ""), pcmk__s(value, ""), options.set_name ? "in " : "", options.set_name ? options.set_name : ""); return pcmk_rc_ok; } static int command_query(pcmk__output_t *out, cib_t *cib) { int rc = pcmk_rc_ok; xmlNode *result = NULL; bool use_pattern = options.attr_pattern != NULL; /* libxml2 doesn't support regular expressions in xpath queries (which is how * cib__get_node_attrs -> find_attr finds attributes). So instead, we'll just * find all the attributes for a given node here by passing NULL for attr_id * and attr_name, and then later see if they match the given pattern. */ if (use_pattern) { rc = cib__get_node_attrs(out, cib, options.type, options.dest_node, options.set_type, options.set_name, NULL, NULL, NULL, &result); } else { rc = cib__get_node_attrs(out, cib, options.type, options.dest_node, options.set_type, options.set_name, options.attr_id, options.attr_name, NULL, &result); } if (rc == ENXIO && options.attr_default) { /* Make static analysis happy */ const char *type = options.type; const char *attr_id = options.attr_id; const char *attr_name = options.attr_name; const char *attr_default = options.attr_default; out->message(out, "attribute", type, attr_id, attr_name, attr_default, NULL, out->quiet, true); rc = pcmk_rc_ok; } else if (rc != pcmk_rc_ok) { // Don't do anything. } else if (result->children != NULL) { struct output_data_s od = { out, use_pattern, false }; pcmk__xe_foreach_child(result, NULL, output_one_attribute, &od); if (!od.did_output) { rc = ENXIO; } } else { struct output_data_s od = { out, use_pattern, false }; output_one_attribute(result, &od); } pcmk__xml_free(result); return rc; } static void set_type(void) { if (options.type == NULL) { if (options.promotion_score) { // Updating a promotion score node attribute options.type = g_strdup(PCMK_XE_STATUS); } else if (options.dest_uname != NULL) { // Updating some other node attribute options.type = g_strdup(PCMK_XE_NODES); } else { // Updating cluster options options.type = g_strdup(PCMK_XE_CRM_CONFIG); } } else if (pcmk__str_eq(options.type, "reboot", pcmk__str_casei)) { options.type = g_strdup(PCMK_XE_STATUS); } else if (pcmk__str_eq(options.type, "forever", pcmk__str_casei)) { options.type = g_strdup(PCMK_XE_NODES); } } static bool use_attrd(void) { /* Only go through the attribute manager for transient attributes, and * then only if we're not using a file as the CIB. */ return pcmk__str_eq(options.type, PCMK_XE_STATUS, pcmk__str_casei) && getenv("CIB_file") == NULL && getenv("CIB_shadow") == NULL; } static bool try_ipc_update(void) { return use_attrd() && ((options.command == attr_cmd_delete) || (options.command == attr_cmd_update)); } static bool pattern_used_correctly(void) { /* --pattern can only be used with: * -G (query), -v (update), or -D (delete) */ switch (options.command) { case attr_cmd_delete: case attr_cmd_query: case attr_cmd_update: return true; default: return false; } } static bool delete_used_correctly(void) { return (options.command != attr_cmd_delete) || (options.attr_name != NULL) || (options.attr_pattern != NULL); } static bool update_used_correctly(void) { return (options.command != attr_cmd_update) || (options.attr_name != NULL) || (options.attr_pattern != NULL); } static GOptionContext * build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) { GOptionContext *context = NULL; GOptionEntry extra_prog_entries[] = { { "quiet", 'q', 0, G_OPTION_ARG_NONE, &(args->quiet), "Print only the value on stdout", NULL }, { "quiet", 'Q', G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_NONE, &(args->quiet), NULL, NULL }, { NULL } }; const char *description = "Examples:\n\n" "Add new node attribute called 'location' with the value of 'office' for host 'myhost':\n\n" "\tcrm_attribute --node myhost --name location --update office\n\n" "Query the value of the 'location' node attribute for host 'myhost':\n\n" "\tcrm_attribute --node myhost --name location --query\n\n" "Change the value of the 'location' node attribute for host 'myhost':\n\n" "\tcrm_attribute --node myhost --name location --update backoffice\n\n" "Delete the 'location' node attribute for host 'myhost':\n\n" "\tcrm_attribute --node myhost --name location --delete\n\n" "Query the value of the '" PCMK_OPT_CLUSTER_DELAY "' cluster option:\n\n" "\tcrm_attribute --type crm_config --name " PCMK_OPT_CLUSTER_DELAY " --query\n\n" "Query value of the '" PCMK_OPT_CLUSTER_DELAY "' cluster option and print only the value:\n\n" "\tcrm_attribute --type crm_config --name " PCMK_OPT_CLUSTER_DELAY " --query --quiet\n\n"; context = pcmk__build_arg_context(args, "text (default), xml", group, NULL); pcmk__add_main_args(context, extra_prog_entries); g_option_context_set_description(context, description); pcmk__add_arg_group(context, "selections", "Selecting attributes:", "Show selecting options", selecting_entries); pcmk__add_arg_group(context, "command", "Commands:", "Show command options", command_entries); pcmk__add_arg_group(context, "additional", "Additional options:", "Show additional options", addl_entries); pcmk__add_arg_group(context, "deprecated", "Deprecated Options:", "Show deprecated options", deprecated_entries); return context; } int main(int argc, char **argv) { cib_t *the_cib = NULL; int is_remote_node = 0; int rc = pcmk_rc_ok; pcmk__output_t *out = NULL; GOptionGroup *output_group = NULL; pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY); gchar **processed_args = pcmk__cmdline_preproc(argv, "NPUdilnpstv"); GOptionContext *context = build_arg_context(args, &output_group); pcmk__register_formats(output_group, formats); if (!g_option_context_parse_strv(context, &processed_args, &error)) { exit_code = CRM_EX_USAGE; goto done; } pcmk__cli_init_logging("crm_attribute", args->verbosity); rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv); if (rc != pcmk_rc_ok) { exit_code = CRM_EX_ERROR; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Error creating output format %s: %s", args->output_ty, pcmk_rc_str(rc)); goto done; } pcmk__register_lib_messages(out); if (args->version) { out->version(out, false); goto done; } out->quiet = args->quiet; if (options.command == attr_cmd_list) { command_list(out); goto done; } if (options.promotion_score && options.attr_name == NULL) { exit_code = CRM_EX_USAGE; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "-p/--promotion must be called from an OCF resource agent " "or with a resource ID specified"); goto done; } if (options.inhibit) { crm_warn("Inhibiting notifications for this update"); cib__set_call_options(cib_opts, crm_system_name, cib_inhibit_notify); } the_cib = cib_new(); rc = cib__signon_attempts(the_cib, cib_command, 5); rc = pcmk_legacy2rc(rc); if (rc != pcmk_rc_ok) { exit_code = pcmk_rc2exitc(rc); g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Could not connect to the CIB: %s", pcmk_rc_str(rc)); goto done; } set_type(); // Use default node if not given (except for cluster options and tickets) if (!pcmk__strcase_any_of(options.type, PCMK_XE_CRM_CONFIG, PCMK_XE_TICKETS, NULL)) { /* If we are being called from a resource agent via the cluster, * the correct local node name will be passed as an environment * variable. Otherwise, we have to ask the cluster. */ const char *target = pcmk__node_attr_target(options.dest_uname); if (target != NULL) { /* If options.dest_uname is "auto" or "localhost", then * pcmk__node_attr_target() may return it, depending on environment * variables. In that case, attribute lookups will fail for "auto" * (unless there's a node named "auto"). attrd maps "localhost" to * the true local node name for queries. * * @TODO * * Investigate whether "localhost" is mapped to a real node name * for non-query commands. If not, possibly modify it so that it * is. * * Map "auto" to "localhost" (probably). */ if (target != (const char *) options.dest_uname) { g_free(options.dest_uname); options.dest_uname = g_strdup(target); } } else if (getenv("CIB_file") != NULL && options.dest_uname == NULL) { get_node_name_from_local(); } if (options.dest_uname == NULL) { char *node_name = NULL; rc = pcmk__query_node_name(out, 0, &node_name, 0); if (rc != pcmk_rc_ok) { exit_code = pcmk_rc2exitc(rc); free(node_name); goto done; } options.dest_uname = g_strdup(node_name); free(node_name); } rc = query_node_uuid(the_cib, options.dest_uname, &options.dest_node, &is_remote_node); rc = pcmk_legacy2rc(rc); if (rc != pcmk_rc_ok) { exit_code = pcmk_rc2exitc(rc); g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Could not map name=%s to a UUID", options.dest_uname); goto done; } } if (!delete_used_correctly()) { exit_code = CRM_EX_USAGE; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Error: must specify attribute name or pattern to delete"); goto done; } if (!update_used_correctly()) { exit_code = CRM_EX_USAGE; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Error: must specify attribute name or pattern to update"); goto done; } if (options.attr_pattern) { if (options.attr_name) { exit_code = CRM_EX_USAGE; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Error: --name and --pattern cannot be used at the same time"); goto done; } if (!pattern_used_correctly()) { exit_code = CRM_EX_USAGE; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Error: pattern can only be used with delete, query, or update"); goto done; } g_free(options.attr_name); options.attr_name = options.attr_pattern; options.attr_options |= pcmk__node_attr_pattern; } if (is_remote_node) { options.attr_options |= pcmk__node_attr_remote; } if (pcmk__str_eq(options.set_type, PCMK_XE_UTILIZATION, pcmk__str_none)) { options.attr_options |= pcmk__node_attr_utilization; } if (try_ipc_update() && (send_attrd_update(options.command, options.dest_uname, options.attr_name, options.attr_value, options.set_name, NULL, options.attr_options) == pcmk_rc_ok)) { const char *update = options.attr_value; if (options.command == attr_cmd_delete) { update = ""; } - crm_info("Update %s=%s sent via pacemaker-attrd", + crm_info("Update %s=%s sent to the attribute manager", options.attr_name, update); } else if (options.command == attr_cmd_delete) { rc = command_delete(out, the_cib); } else if (options.command == attr_cmd_update) { rc = command_update(out, the_cib, is_remote_node); } else { rc = command_query(out, the_cib); } if (rc == ENOTUNIQ) { exit_code = pcmk_rc2exitc(rc); g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Please choose from one of the matches below and supply the 'id' with --attr-id"); } else if (rc != pcmk_rc_ok) { exit_code = pcmk_rc2exitc(rc); g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Error performing operation: %s", pcmk_rc_str(rc)); } done: g_strfreev(processed_args); pcmk__free_arg_context(context); free(options.attr_default); g_free(options.attr_id); g_free(options.attr_name); free(options.attr_value); free(options.dest_node); g_free(options.dest_uname); g_free(options.set_name); free(options.set_type); g_free(options.type); cib__clean_up_connection(&the_cib); pcmk__output_and_clear_error(&error, out); if (out != NULL) { out->finish(out, exit_code, true, NULL); pcmk__output_free(out); } pcmk__unregister_formats(); return crm_exit(exit_code); }