diff --git a/daemons/controld/controld_cib.c b/daemons/controld/controld_cib.c index 279a6225f7..e630f97e2b 100644 --- a/daemons/controld/controld_cib.c +++ b/daemons/controld/controld_cib.c @@ -1,1069 +1,1069 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include /* sleep */ #include #include #include #include #include #include // Call ID of the most recent in-progress CIB resource update (or 0 if none) static int pending_rsc_update = 0; /*! * \internal * \brief Respond to a dropped CIB connection * * \param[in] user_data CIB connection that dropped */ static void handle_cib_disconnect(gpointer user_data) { CRM_LOG_ASSERT(user_data == controld_globals.cib_conn); controld_trigger_fsa(); controld_globals.cib_conn->state = cib_disconnected; if (pcmk_is_set(controld_globals.fsa_input_register, R_CIB_CONNECTED)) { // @TODO This should trigger a reconnect, not a shutdown crm_crit("Lost connection to the CIB manager, shutting down"); register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL); controld_clear_fsa_input_flags(R_CIB_CONNECTED); } else { // Expected crm_info("Disconnected from the CIB manager"); } } static void do_cib_updated(const char *event, xmlNode * msg) { const xmlNode *patchset = NULL; const char *client_name = NULL; crm_debug("Received CIB diff notification: DC=%s", pcmk__btoa(AM_I_DC)); if (cib__get_notify_patchset(msg, &patchset) != pcmk_rc_ok) { return; } if (cib__element_in_patchset(patchset, PCMK_XE_ALERTS) || cib__element_in_patchset(patchset, PCMK_XE_CRM_CONFIG)) { controld_trigger_config(); } if (!AM_I_DC) { // We're not in control of the join sequence return; } client_name = crm_element_value(msg, F_CIB_CLIENTNAME); if (!cib__client_triggers_refresh(client_name)) { // The CIB is still accurate return; } if (cib__element_in_patchset(patchset, PCMK_XE_NODES) || cib__element_in_patchset(patchset, PCMK_XE_STATUS)) { /* An unsafe client modified the PCMK_XE_NODES or PCMK_XE_STATUS * section. Ensure the node list is up-to-date, and start the join * process again so we get everyone's current resource history. */ if (client_name == NULL) { client_name = crm_element_value(msg, F_CIB_CLIENTID); } crm_notice("Populating nodes and starting an election after %s event " "triggered by %s", event, pcmk__s(client_name, "(unidentified client)")); populate_cib_nodes(node_update_quick|node_update_all, __func__); register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL); } } void controld_disconnect_cib_manager(void) { cib_t *cib_conn = controld_globals.cib_conn; CRM_ASSERT(cib_conn != NULL); crm_debug("Disconnecting from the CIB manager"); controld_clear_fsa_input_flags(R_CIB_CONNECTED); cib_conn->cmds->del_notify_callback(cib_conn, T_CIB_DIFF_NOTIFY, do_cib_updated); cib_free_callbacks(cib_conn); if (cib_conn->state != cib_disconnected) { cib_conn->cmds->set_secondary(cib_conn, cib_scope_local|cib_discard_reply); cib_conn->cmds->signoff(cib_conn); } } /* A_CIB_STOP, A_CIB_START, O_CIB_RESTART */ void do_cib_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { static int cib_retries = 0; cib_t *cib_conn = controld_globals.cib_conn; void (*dnotify_fn) (gpointer user_data) = handle_cib_disconnect; void (*update_cb) (const char *event, xmlNodePtr msg) = do_cib_updated; int rc = pcmk_ok; CRM_ASSERT(cib_conn != NULL); if (pcmk_is_set(action, A_CIB_STOP)) { if ((cib_conn->state != cib_disconnected) && (pending_rsc_update != 0)) { crm_info("Waiting for resource update %d to complete", pending_rsc_update); crmd_fsa_stall(FALSE); return; } controld_disconnect_cib_manager(); } if (!pcmk_is_set(action, A_CIB_START)) { return; } if (cur_state == S_STOPPING) { crm_err("Ignoring request to connect to the CIB manager after " "shutdown"); return; } rc = cib_conn->cmds->signon(cib_conn, CRM_SYSTEM_CRMD, cib_command_nonblocking); if (rc != pcmk_ok) { // A short wait that usually avoids stalling the FSA sleep(1); rc = cib_conn->cmds->signon(cib_conn, CRM_SYSTEM_CRMD, cib_command_nonblocking); } if (rc != pcmk_ok) { crm_info("Could not connect to the CIB manager: %s", pcmk_strerror(rc)); } else if (cib_conn->cmds->set_connection_dnotify(cib_conn, dnotify_fn) != pcmk_ok) { crm_err("Could not set dnotify callback"); } else if (cib_conn->cmds->add_notify_callback(cib_conn, T_CIB_DIFF_NOTIFY, update_cb) != pcmk_ok) { crm_err("Could not set CIB notification callback (update)"); } else { controld_set_fsa_input_flags(R_CIB_CONNECTED); cib_retries = 0; } if (!pcmk_is_set(controld_globals.fsa_input_register, R_CIB_CONNECTED)) { cib_retries++; if (cib_retries < 30) { crm_warn("Couldn't complete CIB registration %d times... " "pause and retry", cib_retries); controld_start_wait_timer(); crmd_fsa_stall(FALSE); } else { crm_err("Could not complete CIB registration %d times... " "hard error", cib_retries); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } } #define MIN_CIB_OP_TIMEOUT (30) /*! * \internal * \brief Get the timeout (in seconds) that should be used with CIB operations * * \return The maximum of 30 seconds, the value of the PCMK_cib_timeout * environment variable, or 10 seconds times one more than the number of * nodes in the cluster. */ unsigned int cib_op_timeout(void) { // @COMPAT: Drop env_timeout at 3.0.0 static int env_timeout = -1; unsigned int calculated_timeout = 0; if (env_timeout == -1) { const char *env = pcmk__env_option(PCMK__ENV_CIB_TIMEOUT); pcmk__scan_min_int(env, &env_timeout, MIN_CIB_OP_TIMEOUT); crm_trace("Minimum CIB op timeout: %ds (environment: %s)", env_timeout, (env? env : "none")); } calculated_timeout = 1 + crm_active_peers(); if (crm_remote_peer_cache) { calculated_timeout += g_hash_table_size(crm_remote_peer_cache); } calculated_timeout *= 10; calculated_timeout = QB_MAX(calculated_timeout, env_timeout); crm_trace("Calculated timeout: %us", calculated_timeout); if (controld_globals.cib_conn) { controld_globals.cib_conn->call_timeout = calculated_timeout; } return calculated_timeout; } /*! * \internal * \brief Get CIB call options to use local scope if primary is unavailable * * \return CIB call options */ int crmd_cib_smart_opt(void) { int call_opt = cib_none; if ((controld_globals.fsa_state == S_ELECTION) || (controld_globals.fsa_state == S_PENDING)) { crm_info("Sending update to local CIB in state: %s", fsa_state2string(controld_globals.fsa_state)); cib__set_call_options(call_opt, "update", cib_scope_local); } return call_opt; } static void cib_delete_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { char *desc = user_data; if (rc == 0) { crm_debug("Deletion of %s (via CIB call %d) succeeded", desc, call_id); } else { crm_warn("Deletion of %s (via CIB call %d) failed: %s " CRM_XS " rc=%d", desc, call_id, pcmk_strerror(rc), rc); } } // Searches for various portions of PCMK__XE_NODE_STATE to delete // Match a particular node's PCMK__XE_NODE_STATE (takes node name 1x) #define XPATH_NODE_STATE "//" PCMK__XE_NODE_STATE "[@" PCMK_XA_UNAME "='%s']" // Node's lrm section (name 1x) -#define XPATH_NODE_LRM XPATH_NODE_STATE "/" XML_CIB_TAG_LRM +#define XPATH_NODE_LRM XPATH_NODE_STATE "/" PCMK__XE_LRM /* Node's lrm_rsc_op entries and lrm_resource entries without unexpired lock * (name 2x, (seconds_since_epoch - PCMK_OPT_SHUTDOWN_LOCK_LIMIT) 1x) */ #define XPATH_NODE_LRM_UNLOCKED XPATH_NODE_STATE "//" XML_LRM_TAG_RSC_OP \ "|" XPATH_NODE_STATE \ "//" XML_LRM_TAG_RESOURCE \ "[not(@" PCMK_OPT_SHUTDOWN_LOCK ") " \ "or " PCMK_OPT_SHUTDOWN_LOCK "<%lld]" // Node's transient_attributes section (name 1x) #define XPATH_NODE_ATTRS XPATH_NODE_STATE "/" XML_TAG_TRANSIENT_NODEATTRS // Everything under PCMK__XE_NODE_STATE (name 1x) #define XPATH_NODE_ALL XPATH_NODE_STATE "/*" /* Unlocked history + transient attributes * (name 2x, (seconds_since_epoch - PCMK_OPT_SHUTDOWN_LOCK_LIMIT) 1x, name 1x) */ #define XPATH_NODE_ALL_UNLOCKED XPATH_NODE_LRM_UNLOCKED "|" XPATH_NODE_ATTRS /*! * \internal * \brief Get the XPath and description of a node state section to be deleted * * \param[in] uname Desired node * \param[in] section Subsection of \c PCMK__XE_NODE_STATE to be deleted * \param[out] xpath Where to store XPath of \p section * \param[out] desc If not \c NULL, where to store description of \p section */ void controld_node_state_deletion_strings(const char *uname, enum controld_section_e section, char **xpath, char **desc) { const char *desc_pre = NULL; // Shutdown locks that started before this time are expired long long expire = (long long) time(NULL) - controld_globals.shutdown_lock_limit; switch (section) { case controld_section_lrm: *xpath = crm_strdup_printf(XPATH_NODE_LRM, uname); desc_pre = "resource history"; break; case controld_section_lrm_unlocked: *xpath = crm_strdup_printf(XPATH_NODE_LRM_UNLOCKED, uname, uname, expire); desc_pre = "resource history (other than shutdown locks)"; break; case controld_section_attrs: *xpath = crm_strdup_printf(XPATH_NODE_ATTRS, uname); desc_pre = "transient attributes"; break; case controld_section_all: *xpath = crm_strdup_printf(XPATH_NODE_ALL, uname); desc_pre = "all state"; break; case controld_section_all_unlocked: *xpath = crm_strdup_printf(XPATH_NODE_ALL_UNLOCKED, uname, uname, expire, uname); desc_pre = "all state (other than shutdown locks)"; break; default: // We called this function incorrectly CRM_ASSERT(false); break; } if (desc != NULL) { *desc = crm_strdup_printf("%s for node %s", desc_pre, uname); } } /*! * \internal * \brief Delete subsection of a node's CIB \c PCMK__XE_NODE_STATE * * \param[in] uname Desired node * \param[in] section Subsection of \c PCMK__XE_NODE_STATE to delete * \param[in] options CIB call options to use */ void controld_delete_node_state(const char *uname, enum controld_section_e section, int options) { cib_t *cib = controld_globals.cib_conn; char *xpath = NULL; char *desc = NULL; int cib_rc = pcmk_ok; CRM_ASSERT((uname != NULL) && (cib != NULL)); controld_node_state_deletion_strings(uname, section, &xpath, &desc); cib__set_call_options(options, "node state deletion", cib_xpath|cib_multiple); cib_rc = cib->cmds->remove(cib, xpath, NULL, options); fsa_register_cib_callback(cib_rc, desc, cib_delete_callback); crm_info("Deleting %s (via CIB call %d) " CRM_XS " xpath=%s", desc, cib_rc, xpath); // CIB library handles freeing desc free(xpath); } // Takes node name and resource ID -#define XPATH_RESOURCE_HISTORY "//" PCMK__XE_NODE_STATE \ - "[@" PCMK_XA_UNAME "='%s']/" \ - XML_CIB_TAG_LRM "/" XML_LRM_TAG_RESOURCES \ - "/" XML_LRM_TAG_RESOURCE \ +#define XPATH_RESOURCE_HISTORY "//" PCMK__XE_NODE_STATE \ + "[@" PCMK_XA_UNAME "='%s']/" \ + PCMK__XE_LRM "/" XML_LRM_TAG_RESOURCES \ + "/" XML_LRM_TAG_RESOURCE \ "[@" PCMK_XA_ID "='%s']" // @TODO could add "and @PCMK_OPT_SHUTDOWN_LOCK" to limit to locks /*! * \internal * \brief Clear resource history from CIB for a given resource and node * * \param[in] rsc_id ID of resource to be cleared * \param[in] node Node whose resource history should be cleared * \param[in] user_name ACL user name to use * \param[in] call_options CIB call options * * \return Standard Pacemaker return code */ int controld_delete_resource_history(const char *rsc_id, const char *node, const char *user_name, int call_options) { char *desc = NULL; char *xpath = NULL; int rc = pcmk_rc_ok; cib_t *cib = controld_globals.cib_conn; CRM_CHECK((rsc_id != NULL) && (node != NULL), return EINVAL); desc = crm_strdup_printf("resource history for %s on %s", rsc_id, node); if (cib == NULL) { crm_err("Unable to clear %s: no CIB connection", desc); free(desc); return ENOTCONN; } // Ask CIB to delete the entry xpath = crm_strdup_printf(XPATH_RESOURCE_HISTORY, node, rsc_id); cib->cmds->set_user(cib, user_name); rc = cib->cmds->remove(cib, xpath, NULL, call_options|cib_xpath); cib->cmds->set_user(cib, NULL); if (rc < 0) { rc = pcmk_legacy2rc(rc); crm_err("Could not delete resource status of %s on %s%s%s: %s " CRM_XS " rc=%d", rsc_id, node, (user_name? " for user " : ""), (user_name? user_name : ""), pcmk_rc_str(rc), rc); free(desc); free(xpath); return rc; } if (pcmk_is_set(call_options, cib_sync_call)) { if (pcmk_is_set(call_options, cib_dryrun)) { crm_debug("Deletion of %s would succeed", desc); } else { crm_debug("Deletion of %s succeeded", desc); } free(desc); } else { crm_info("Clearing %s (via CIB call %d) " CRM_XS " xpath=%s", desc, rc, xpath); fsa_register_cib_callback(rc, desc, cib_delete_callback); // CIB library handles freeing desc } free(xpath); return pcmk_rc_ok; } /*! * \internal * \brief Build XML and string of parameters meeting some criteria, for digest * * \param[in] op Executor event with parameter table to use * \param[in] metadata Parsed meta-data for executed resource agent * \param[in] param_type Flag used for selection criteria * \param[out] result Will be set to newly created XML with selected * parameters as attributes * * \return Newly allocated space-separated string of parameter names * \note Selection criteria varies by param_type: for the restart digest, we * want parameters that are *not* marked reloadable (OCF 1.1) or that * *are* marked unique (pre-1.1), for both string and XML results; for the * secure digest, we want parameters that *are* marked private for the * string, but parameters that are *not* marked private for the XML. * \note It is the caller's responsibility to free the string return value with * \p g_string_free() and the XML result with \p free_xml(). */ static GString * build_parameter_list(const lrmd_event_data_t *op, const struct ra_metadata_s *metadata, enum ra_param_flags_e param_type, xmlNode **result) { GString *list = NULL; *result = create_xml_node(NULL, PCMK_XE_PARAMETERS); /* Consider all parameters only except private ones to be consistent with * what scheduler does with calculate_secure_digest(). */ if (param_type == ra_param_private && compare_version(controld_globals.dc_version, "3.16.0") >= 0) { g_hash_table_foreach(op->params, hash2field, *result); pcmk__filter_op_for_digest(*result); } for (GList *iter = metadata->ra_params; iter != NULL; iter = iter->next) { struct ra_param_s *param = (struct ra_param_s *) iter->data; bool accept_for_list = false; bool accept_for_xml = false; switch (param_type) { case ra_param_reloadable: accept_for_list = !pcmk_is_set(param->rap_flags, param_type); accept_for_xml = accept_for_list; break; case ra_param_unique: accept_for_list = pcmk_is_set(param->rap_flags, param_type); accept_for_xml = accept_for_list; break; case ra_param_private: accept_for_list = pcmk_is_set(param->rap_flags, param_type); accept_for_xml = !accept_for_list; break; } if (accept_for_list) { crm_trace("Attr %s is %s", param->rap_name, ra_param_flag2text(param_type)); if (list == NULL) { // We will later search for " WORD ", so start list with a space pcmk__add_word(&list, 256, " "); } pcmk__add_word(&list, 0, param->rap_name); } else { crm_trace("Rejecting %s for %s", param->rap_name, ra_param_flag2text(param_type)); } if (accept_for_xml) { const char *v = g_hash_table_lookup(op->params, param->rap_name); if (v != NULL) { crm_trace("Adding attr %s=%s to the xml result", param->rap_name, v); crm_xml_add(*result, param->rap_name, v); } } else { crm_trace("Removing attr %s from the xml result", param->rap_name); xml_remove_prop(*result, param->rap_name); } } if (list != NULL) { // We will later search for " WORD ", so end list with a space pcmk__add_word(&list, 0, " "); } return list; } static void append_restart_list(lrmd_event_data_t *op, struct ra_metadata_s *metadata, xmlNode *update, const char *version) { GString *list = NULL; char *digest = NULL; xmlNode *restart = NULL; CRM_LOG_ASSERT(op->params != NULL); if (op->interval_ms > 0) { /* monitors are not reloadable */ return; } if (pcmk_is_set(metadata->ra_flags, ra_supports_reload_agent)) { /* Add parameters not marked reloadable to the PCMK__XA_OP_FORCE_RESTART * list */ list = build_parameter_list(op, metadata, ra_param_reloadable, &restart); } else if (pcmk_is_set(metadata->ra_flags, ra_supports_legacy_reload)) { /* @COMPAT pre-OCF-1.1 resource agents * * Before OCF 1.1, Pacemaker abused "unique=0" to indicate * reloadability. Add any parameters with unique="1" to the * PCMK__XA_OP_FORCE_RESTART list. */ list = build_parameter_list(op, metadata, ra_param_unique, &restart); } else { // Resource does not support agent reloads return; } digest = calculate_operation_digest(restart, version); /* Add PCMK__XA_OP_FORCE_RESTART and PCMK__XA_OP_RESTART_DIGEST to indicate * the resource supports reload, no matter if it actually supports any * reloadable parameters */ crm_xml_add(update, PCMK__XA_OP_FORCE_RESTART, (list == NULL)? "" : (const char *) list->str); crm_xml_add(update, PCMK__XA_OP_RESTART_DIGEST, digest); if ((list != NULL) && (list->len > 0)) { crm_trace("%s: %s, %s", op->rsc_id, digest, (const char *) list->str); } else { crm_trace("%s: %s", op->rsc_id, digest); } if (list != NULL) { g_string_free(list, TRUE); } free_xml(restart); free(digest); } static void append_secure_list(lrmd_event_data_t *op, struct ra_metadata_s *metadata, xmlNode *update, const char *version) { GString *list = NULL; char *digest = NULL; xmlNode *secure = NULL; CRM_LOG_ASSERT(op->params != NULL); /* To keep PCMK__XA_OP_SECURE_PARAMS short, we want it to contain the secure * parameters but PCMK__XA_OP_SECURE_DIGEST to be based on the insecure ones */ list = build_parameter_list(op, metadata, ra_param_private, &secure); if (list != NULL) { digest = calculate_operation_digest(secure, version); crm_xml_add(update, PCMK__XA_OP_SECURE_PARAMS, (const char *) list->str); crm_xml_add(update, PCMK__XA_OP_SECURE_DIGEST, digest); crm_trace("%s: %s, %s", op->rsc_id, digest, (const char *) list->str); g_string_free(list, TRUE); } else { crm_trace("%s: no secure parameters", op->rsc_id); } free_xml(secure); free(digest); } /*! * \internal * \brief Create XML for a resource history entry * * \param[in] func Function name of caller * \param[in,out] parent XML to add entry to * \param[in] rsc Affected resource * \param[in,out] op Action to add an entry for (or NULL to do nothing) * \param[in] node_name Node where action occurred */ void controld_add_resource_history_xml_as(const char *func, xmlNode *parent, const lrmd_rsc_info_t *rsc, lrmd_event_data_t *op, const char *node_name) { int target_rc = 0; xmlNode *xml_op = NULL; struct ra_metadata_s *metadata = NULL; const char *caller_version = NULL; lrm_state_t *lrm_state = NULL; if (op == NULL) { return; } target_rc = rsc_op_expected_rc(op); caller_version = g_hash_table_lookup(op->params, PCMK_XA_CRM_FEATURE_SET); CRM_CHECK(caller_version != NULL, caller_version = CRM_FEATURE_SET); xml_op = pcmk__create_history_xml(parent, op, caller_version, target_rc, controld_globals.our_nodename, func); if (xml_op == NULL) { return; } if ((rsc == NULL) || (op->params == NULL) || !crm_op_needs_metadata(rsc->standard, op->op_type)) { crm_trace("No digests needed for %s action on %s (params=%p rsc=%p)", op->op_type, op->rsc_id, op->params, rsc); return; } lrm_state = lrm_state_find(node_name); if (lrm_state == NULL) { crm_warn("Cannot calculate digests for operation " PCMK__OP_FMT " because we have no connection to executor for %s", op->rsc_id, op->op_type, op->interval_ms, node_name); return; } /* Ideally the metadata is cached, and the agent is just a fallback. * * @TODO Go through all callers and ensure they get metadata asynchronously * first. */ metadata = controld_get_rsc_metadata(lrm_state, rsc, controld_metadata_from_agent |controld_metadata_from_cache); if (metadata == NULL) { return; } crm_trace("Including additional digests for %s:%s:%s", rsc->standard, rsc->provider, rsc->type); append_restart_list(op, metadata, xml_op, caller_version); append_secure_list(op, metadata, xml_op, caller_version); return; } /*! * \internal * \brief Record an action as pending in the CIB, if appropriate * * \param[in] node_name Node where the action is pending * \param[in] rsc Resource that action is for * \param[in,out] op Pending action * * \return true if action was recorded in CIB, otherwise false */ bool controld_record_pending_op(const char *node_name, const lrmd_rsc_info_t *rsc, lrmd_event_data_t *op) { const char *record_pending = NULL; CRM_CHECK((node_name != NULL) && (rsc != NULL) && (op != NULL), return false); // Never record certain operation types as pending if ((op->op_type == NULL) || (op->params == NULL) || !controld_action_is_recordable(op->op_type)) { return false; } // Check action's PCMK_META_RECORD_PENDING meta-attribute (defaults to true) record_pending = crm_meta_value(op->params, PCMK_META_RECORD_PENDING); if ((record_pending != NULL) && !crm_is_true(record_pending)) { return false; } op->call_id = -1; op->t_run = time(NULL); op->t_rcchange = op->t_run; lrmd__set_result(op, PCMK_OCF_UNKNOWN, PCMK_EXEC_PENDING, NULL); crm_debug("Recording pending %s-interval %s for %s on %s in the CIB", pcmk__readable_interval(op->interval_ms), op->op_type, op->rsc_id, node_name); controld_update_resource_history(node_name, rsc, op, 0); return true; } static void cib_rsc_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { switch (rc) { case pcmk_ok: case -pcmk_err_diff_failed: case -pcmk_err_diff_resync: crm_trace("Resource history update completed (call=%d rc=%d)", call_id, rc); break; default: if (call_id > 0) { crm_warn("Resource history update %d failed: %s " CRM_XS " rc=%d", call_id, pcmk_strerror(rc), rc); } else { crm_warn("Resource history update failed: %s " CRM_XS " rc=%d", pcmk_strerror(rc), rc); } } if (call_id == pending_rsc_update) { pending_rsc_update = 0; controld_trigger_fsa(); } } /* Only successful stops, and probes that found the resource inactive, get locks * recorded in the history. This ensures the resource stays locked to the node * until it is active there again after the node comes back up. */ static bool should_preserve_lock(lrmd_event_data_t *op) { if (!pcmk_is_set(controld_globals.flags, controld_shutdown_lock_enabled)) { return false; } if (!strcmp(op->op_type, PCMK_ACTION_STOP) && (op->rc == PCMK_OCF_OK)) { return true; } if (!strcmp(op->op_type, PCMK_ACTION_MONITOR) && (op->rc == PCMK_OCF_NOT_RUNNING)) { return true; } return false; } /*! * \internal * \brief Request a CIB update * * \param[in] section Section of CIB to update * \param[in] data New XML of CIB section to update * \param[in] options CIB call options * \param[in] callback If not \c NULL, set this as the operation callback * * \return Standard Pacemaker return code * * \note If \p callback is \p cib_rsc_callback(), the CIB update's call ID is * stored in \p pending_rsc_update on success. */ int controld_update_cib(const char *section, xmlNode *data, int options, void (*callback)(xmlNode *, int, int, xmlNode *, void *)) { cib_t *cib = controld_globals.cib_conn; int cib_rc = -ENOTCONN; CRM_ASSERT(data != NULL); if (cib != NULL) { cib_rc = cib->cmds->modify(cib, section, data, options); if (cib_rc >= 0) { crm_debug("Submitted CIB update %d for %s section", cib_rc, section); } } if (callback == NULL) { if (cib_rc < 0) { crm_err("Failed to update CIB %s section: %s", section, pcmk_rc_str(pcmk_legacy2rc(cib_rc))); } } else { if ((cib_rc >= 0) && (callback == cib_rsc_callback)) { /* Checking for a particular callback is a little hacky, but it * didn't seem worth adding an output argument for cib_rc for just * one use case. */ pending_rsc_update = cib_rc; } fsa_register_cib_callback(cib_rc, NULL, callback); } return (cib_rc >= 0)? pcmk_rc_ok : pcmk_legacy2rc(cib_rc); } /*! * \internal * \brief Update resource history entry in CIB * * \param[in] node_name Node where action occurred * \param[in] rsc Resource that action is for * \param[in,out] op Action to record * \param[in] lock_time If nonzero, when resource was locked to node * * \note On success, the CIB update's call ID will be stored in * pending_rsc_update. */ void controld_update_resource_history(const char *node_name, const lrmd_rsc_info_t *rsc, lrmd_event_data_t *op, time_t lock_time) { xmlNode *update = NULL; xmlNode *xml = NULL; int call_opt = crmd_cib_smart_opt(); const char *node_id = NULL; const char *container = NULL; CRM_CHECK((node_name != NULL) && (op != NULL), return); if (rsc == NULL) { crm_warn("Resource %s no longer exists in the executor", op->rsc_id); controld_ack_event_directly(NULL, NULL, rsc, op, op->rsc_id); return; } // update = create_xml_node(NULL, PCMK_XE_STATUS); // xml = create_xml_node(update, PCMK__XE_NODE_STATE); if (pcmk__str_eq(node_name, controld_globals.our_nodename, pcmk__str_casei)) { node_id = controld_globals.our_uuid; } else { node_id = node_name; pcmk__xe_set_bool_attr(xml, XML_NODE_IS_REMOTE, true); } crm_xml_add(xml, PCMK_XA_ID, node_id); crm_xml_add(xml, PCMK_XA_UNAME, node_name); crm_xml_add(xml, PCMK_XA_CRM_DEBUG_ORIGIN, __func__); // - xml = create_xml_node(xml, XML_CIB_TAG_LRM); + xml = create_xml_node(xml, PCMK__XE_LRM); crm_xml_add(xml, PCMK_XA_ID, node_id); // xml = create_xml_node(xml, XML_LRM_TAG_RESOURCES); // xml = create_xml_node(xml, XML_LRM_TAG_RESOURCE); crm_xml_add(xml, PCMK_XA_ID, op->rsc_id); crm_xml_add(xml, PCMK_XA_CLASS, rsc->standard); crm_xml_add(xml, PCMK_XA_PROVIDER, rsc->provider); crm_xml_add(xml, PCMK_XA_TYPE, rsc->type); if (lock_time != 0) { /* Actions on a locked resource should either preserve the lock by * recording it with the action result, or clear it. */ if (!should_preserve_lock(op)) { lock_time = 0; } crm_xml_add_ll(xml, PCMK_OPT_SHUTDOWN_LOCK, (long long) lock_time); } if (op->params != NULL) { container = g_hash_table_lookup(op->params, CRM_META "_" PCMK__META_CONTAINER); if (container != NULL) { crm_trace("Resource %s is a part of container resource %s", op->rsc_id, container); crm_xml_add(xml, PCMK__META_CONTAINER, container); } } // (possibly more than one) controld_add_resource_history_xml(xml, rsc, op, node_name); /* Update CIB asynchronously. Even if it fails, the resource state should be * discovered during the next election. Worst case, the node is wrongly * fenced for running a resource it isn't. */ crm_log_xml_trace(update, __func__); controld_update_cib(PCMK_XE_STATUS, update, call_opt, cib_rsc_callback); free_xml(update); } /*! * \internal * \brief Erase an LRM history entry from the CIB, given the operation data * * \param[in] op Operation whose history should be deleted */ void controld_delete_action_history(const lrmd_event_data_t *op) { xmlNode *xml_top = NULL; CRM_CHECK(op != NULL, return); xml_top = create_xml_node(NULL, XML_LRM_TAG_RSC_OP); crm_xml_add_int(xml_top, PCMK__XA_CALL_ID, op->call_id); crm_xml_add(xml_top, PCMK__XA_TRANSITION_KEY, op->user_data); if (op->interval_ms > 0) { char *op_id = pcmk__op_key(op->rsc_id, op->op_type, op->interval_ms); /* Avoid deleting last_failure too (if it was a result of this recurring op failing) */ crm_xml_add(xml_top, PCMK_XA_ID, op_id); free(op_id); } crm_debug("Erasing resource operation history for " PCMK__OP_FMT " (call=%d)", op->rsc_id, op->op_type, op->interval_ms, op->call_id); controld_globals.cib_conn->cmds->remove(controld_globals.cib_conn, PCMK_XE_STATUS, xml_top, cib_none); crm_log_xml_trace(xml_top, "op:cancel"); free_xml(xml_top); } /* Define xpath to find LRM resource history entry by node and resource */ #define XPATH_HISTORY \ "/" PCMK_XE_CIB "/" PCMK_XE_STATUS \ "/" PCMK__XE_NODE_STATE "[@" PCMK_XA_UNAME "='%s']" \ - "/" XML_CIB_TAG_LRM "/" XML_LRM_TAG_RESOURCES \ + "/" PCMK__XE_LRM "/" XML_LRM_TAG_RESOURCES \ "/" XML_LRM_TAG_RESOURCE "[@" PCMK_XA_ID "='%s']" \ "/" XML_LRM_TAG_RSC_OP /* ... and also by operation key */ #define XPATH_HISTORY_ID XPATH_HISTORY "[@" PCMK_XA_ID "='%s']" /* ... and also by operation key and operation call ID */ #define XPATH_HISTORY_CALL XPATH_HISTORY \ "[@" PCMK_XA_ID "='%s' and @" PCMK__XA_CALL_ID "='%d']" /* ... and also by operation key and original operation key */ #define XPATH_HISTORY_ORIG XPATH_HISTORY \ "[@" PCMK_XA_ID "='%s' and @" PCMK__XA_OPERATION_KEY "='%s']" /*! * \internal * \brief Delete a last_failure resource history entry from the CIB * * \param[in] rsc_id Name of resource to clear history for * \param[in] node Name of node to clear history for * \param[in] action If specified, delete only if this was failed action * \param[in] interval_ms If \p action is specified, it has this interval */ void controld_cib_delete_last_failure(const char *rsc_id, const char *node, const char *action, guint interval_ms) { char *xpath = NULL; char *last_failure_key = NULL; CRM_CHECK((rsc_id != NULL) && (node != NULL), return); // Generate XPath to match desired entry last_failure_key = pcmk__op_key(rsc_id, "last_failure", 0); if (action == NULL) { xpath = crm_strdup_printf(XPATH_HISTORY_ID, node, rsc_id, last_failure_key); } else { char *action_key = pcmk__op_key(rsc_id, action, interval_ms); xpath = crm_strdup_printf(XPATH_HISTORY_ORIG, node, rsc_id, last_failure_key, action_key); free(action_key); } free(last_failure_key); controld_globals.cib_conn->cmds->remove(controld_globals.cib_conn, xpath, NULL, cib_xpath); free(xpath); } /*! * \internal * \brief Delete resource history entry from the CIB, given operation key * * \param[in] rsc_id Name of resource to clear history for * \param[in] node Name of node to clear history for * \param[in] key Operation key of operation to clear history for * \param[in] call_id If specified, delete entry only if it has this call ID */ void controld_delete_action_history_by_key(const char *rsc_id, const char *node, const char *key, int call_id) { char *xpath = NULL; CRM_CHECK((rsc_id != NULL) && (node != NULL) && (key != NULL), return); if (call_id > 0) { xpath = crm_strdup_printf(XPATH_HISTORY_CALL, node, rsc_id, key, call_id); } else { xpath = crm_strdup_printf(XPATH_HISTORY_ID, node, rsc_id, key); } controld_globals.cib_conn->cmds->remove(controld_globals.cib_conn, xpath, NULL, cib_xpath); free(xpath); } diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c index 464e3fce5a..bd3cfbdece 100644 --- a/daemons/controld/controld_execd.c +++ b/daemons/controld/controld_execd.c @@ -1,2447 +1,2447 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include // lrmd_event_data_t, lrmd_rsc_info_t, etc. #include #include #include #include #include #include #include #define START_DELAY_THRESHOLD 5 * 60 * 1000 #define MAX_LRM_REG_FAILS 30 struct delete_event_s { int rc; const char *rsc; lrm_state_t *lrm_state; }; static gboolean is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id); static gboolean build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list); static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer user_data); static lrmd_event_data_t *construct_op(const lrm_state_t *lrm_state, const xmlNode *rsc_op, const char *rsc_id, const char *operation); static void do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, xmlNode *msg, struct ra_metadata_s *md); static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, int log_level); static void lrm_connection_destroy(void) { if (pcmk_is_set(controld_globals.fsa_input_register, R_LRM_CONNECTED)) { crm_crit("Lost connection to local executor"); register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL); controld_clear_fsa_input_flags(R_LRM_CONNECTED); } } static char * make_stop_id(const char *rsc, int call_id) { return crm_strdup_printf("%s:%d", rsc, call_id); } static void copy_instance_keys(gpointer key, gpointer value, gpointer user_data) { if (strstr(key, CRM_META "_") == NULL) { g_hash_table_replace(user_data, strdup((const char *)key), strdup((const char *)value)); } } static void copy_meta_keys(gpointer key, gpointer value, gpointer user_data) { if (strstr(key, CRM_META "_") != NULL) { g_hash_table_replace(user_data, strdup((const char *)key), strdup((const char *)value)); } } /*! * \internal * \brief Remove a recurring operation from a resource's history * * \param[in,out] history Resource history to modify * \param[in] op Operation to remove * * \return TRUE if the operation was found and removed, FALSE otherwise */ static gboolean history_remove_recurring_op(rsc_history_t *history, const lrmd_event_data_t *op) { GList *iter; for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) { lrmd_event_data_t *existing = iter->data; if ((op->interval_ms == existing->interval_ms) && pcmk__str_eq(op->rsc_id, existing->rsc_id, pcmk__str_none) && pcmk__str_eq(op->op_type, existing->op_type, pcmk__str_casei)) { history->recurring_op_list = g_list_delete_link(history->recurring_op_list, iter); lrmd_free_event(existing); return TRUE; } } return FALSE; } /*! * \internal * \brief Free all recurring operations in resource history * * \param[in,out] history Resource history to modify */ static void history_free_recurring_ops(rsc_history_t *history) { GList *iter; for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) { lrmd_free_event(iter->data); } g_list_free(history->recurring_op_list); history->recurring_op_list = NULL; } /*! * \internal * \brief Free resource history * * \param[in,out] history Resource history to free */ void history_free(gpointer data) { rsc_history_t *history = (rsc_history_t*)data; if (history->stop_params) { g_hash_table_destroy(history->stop_params); } /* Don't need to free history->rsc.id because it's set to history->id */ free(history->rsc.type); free(history->rsc.standard); free(history->rsc.provider); lrmd_free_event(history->failed); lrmd_free_event(history->last); free(history->id); history_free_recurring_ops(history); free(history); } static void update_history_cache(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op) { int target_rc = 0; rsc_history_t *entry = NULL; if (op->rsc_deleted) { crm_debug("Purged history for '%s' after %s", op->rsc_id, op->op_type); controld_delete_resource_history(op->rsc_id, lrm_state->node_name, NULL, crmd_cib_smart_opt()); return; } if (pcmk__str_eq(op->op_type, PCMK_ACTION_NOTIFY, pcmk__str_casei)) { return; } crm_debug("Updating history for '%s' with %s op", op->rsc_id, op->op_type); entry = g_hash_table_lookup(lrm_state->resource_history, op->rsc_id); if (entry == NULL && rsc) { entry = calloc(1, sizeof(rsc_history_t)); entry->id = strdup(op->rsc_id); g_hash_table_insert(lrm_state->resource_history, entry->id, entry); entry->rsc.id = entry->id; entry->rsc.type = strdup(rsc->type); entry->rsc.standard = strdup(rsc->standard); pcmk__str_update(&entry->rsc.provider, rsc->provider); } else if (entry == NULL) { crm_info("Resource %s no longer exists, not updating cache", op->rsc_id); return; } entry->last_callid = op->call_id; target_rc = rsc_op_expected_rc(op); if (op->op_status == PCMK_EXEC_CANCELLED) { if (op->interval_ms > 0) { crm_trace("Removing cancelled recurring op: " PCMK__OP_FMT, op->rsc_id, op->op_type, op->interval_ms); history_remove_recurring_op(entry, op); return; } else { crm_trace("Skipping " PCMK__OP_FMT " rc=%d, status=%d", op->rsc_id, op->op_type, op->interval_ms, op->rc, op->op_status); } } else if (did_rsc_op_fail(op, target_rc)) { /* Store failed monitors here, otherwise the block below will cause them * to be forgotten when a stop happens. */ if (entry->failed) { lrmd_free_event(entry->failed); } entry->failed = lrmd_copy_event(op); } else if (op->interval_ms == 0) { if (entry->last) { lrmd_free_event(entry->last); } entry->last = lrmd_copy_event(op); if (op->params && pcmk__strcase_any_of(op->op_type, PCMK_ACTION_START, PCMK_ACTION_RELOAD, PCMK_ACTION_RELOAD_AGENT, PCMK_ACTION_MONITOR, NULL)) { if (entry->stop_params) { g_hash_table_destroy(entry->stop_params); } entry->stop_params = pcmk__strkey_table(free, free); g_hash_table_foreach(op->params, copy_instance_keys, entry->stop_params); } } if (op->interval_ms > 0) { /* Ensure there are no duplicates */ history_remove_recurring_op(entry, op); crm_trace("Adding recurring op: " PCMK__OP_FMT, op->rsc_id, op->op_type, op->interval_ms); entry->recurring_op_list = g_list_prepend(entry->recurring_op_list, lrmd_copy_event(op)); } else if ((entry->recurring_op_list != NULL) && !pcmk__str_eq(op->op_type, PCMK_ACTION_MONITOR, pcmk__str_casei)) { crm_trace("Dropping %d recurring ops because of: " PCMK__OP_FMT, g_list_length(entry->recurring_op_list), op->rsc_id, op->op_type, op->interval_ms); history_free_recurring_ops(entry); } } /*! * \internal * \brief Send a direct OK ack for a resource task * * \param[in] lrm_state LRM connection * \param[in] input Input message being ack'ed * \param[in] rsc_id ID of affected resource * \param[in] rsc Affected resource (if available) * \param[in] task Operation task being ack'ed * \param[in] ack_host Name of host to send ack to * \param[in] ack_sys IPC system name to ack */ static void send_task_ok_ack(const lrm_state_t *lrm_state, const ha_msg_input_t *input, const char *rsc_id, const lrmd_rsc_info_t *rsc, const char *task, const char *ack_host, const char *ack_sys) { lrmd_event_data_t *op = construct_op(lrm_state, input->xml, rsc_id, task); lrmd__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); controld_ack_event_directly(ack_host, ack_sys, rsc, op, rsc_id); lrmd_free_event(op); } static inline const char * op_node_name(lrmd_event_data_t *op) { return pcmk__s(op->remote_nodename, controld_globals.our_nodename); } void lrm_op_callback(lrmd_event_data_t * op) { CRM_CHECK(op != NULL, return); switch (op->type) { case lrmd_event_disconnect: if (op->remote_nodename == NULL) { /* If this is the local executor IPC connection, set the right * bits in the controller when the connection goes down. */ lrm_connection_destroy(); } break; case lrmd_event_exec_complete: { lrm_state_t *lrm_state = lrm_state_find(op_node_name(op)); CRM_ASSERT(lrm_state != NULL); process_lrm_event(lrm_state, op, NULL, NULL); } break; default: break; } } static void try_local_executor_connect(long long action, fsa_data_t *msg_data, lrm_state_t *lrm_state) { int rc = pcmk_rc_ok; crm_debug("Connecting to the local executor"); // If we can connect, great rc = controld_connect_local_executor(lrm_state); if (rc == pcmk_rc_ok) { controld_set_fsa_input_flags(R_LRM_CONNECTED); crm_info("Connection to the local executor established"); return; } // Otherwise, if we can try again, set a timer to do so if (lrm_state->num_lrm_register_fails < MAX_LRM_REG_FAILS) { crm_warn("Failed to connect to the local executor %d time%s " "(%d max): %s", lrm_state->num_lrm_register_fails, pcmk__plural_s(lrm_state->num_lrm_register_fails), MAX_LRM_REG_FAILS, pcmk_rc_str(rc)); controld_start_wait_timer(); crmd_fsa_stall(FALSE); return; } // Otherwise give up crm_err("Failed to connect to the executor the max allowed " "%d time%s: %s", lrm_state->num_lrm_register_fails, pcmk__plural_s(lrm_state->num_lrm_register_fails), pcmk_rc_str(rc)); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } /* A_LRM_CONNECT */ void do_lrm_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { /* This only pertains to local executor connections. Remote connections are * handled as resources within the scheduler. Connecting and disconnecting * from remote executor instances is handled differently. */ lrm_state_t *lrm_state = NULL; if (controld_globals.our_nodename == NULL) { return; /* Nothing to do */ } lrm_state = lrm_state_find_or_create(controld_globals.our_nodename); if (lrm_state == NULL) { register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return; } if (action & A_LRM_DISCONNECT) { if (lrm_state_verify_stopped(lrm_state, cur_state, LOG_INFO) == FALSE) { if (action == A_LRM_DISCONNECT) { crmd_fsa_stall(FALSE); return; } } controld_clear_fsa_input_flags(R_LRM_CONNECTED); lrm_state_disconnect(lrm_state); lrm_state_reset_tables(lrm_state, FALSE); } if (action & A_LRM_CONNECT) { try_local_executor_connect(action, msg_data, lrm_state); } if (action & ~(A_LRM_CONNECT | A_LRM_DISCONNECT)) { crm_err("Unexpected action %s in %s", fsa_action2string(action), __func__); } } static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, int log_level) { int counter = 0; gboolean rc = TRUE; const char *when = "lrm disconnect"; GHashTableIter gIter; const char *key = NULL; rsc_history_t *entry = NULL; active_op_t *pending = NULL; crm_debug("Checking for active resources before exit"); if (cur_state == S_TERMINATE) { log_level = LOG_ERR; when = "shutdown"; } else if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { when = "shutdown... waiting"; } if ((lrm_state->active_ops != NULL) && lrm_state_is_connected(lrm_state)) { guint removed = g_hash_table_foreach_remove(lrm_state->active_ops, stop_recurring_actions, lrm_state); guint nremaining = g_hash_table_size(lrm_state->active_ops); if (removed || nremaining) { crm_notice("Stopped %u recurring operation%s at %s (%u remaining)", removed, pcmk__plural_s(removed), when, nremaining); } } if (lrm_state->active_ops != NULL) { g_hash_table_iter_init(&gIter, lrm_state->active_ops); while (g_hash_table_iter_next(&gIter, NULL, (void **)&pending)) { /* Ignore recurring actions in the shutdown calculations */ if (pending->interval_ms == 0) { counter++; } } } if (counter > 0) { do_crm_log(log_level, "%d pending executor operation%s at %s", counter, pcmk__plural_s(counter), when); if ((cur_state == S_TERMINATE) || !pcmk_is_set(controld_globals.fsa_input_register, R_SENT_RSC_STOP)) { g_hash_table_iter_init(&gIter, lrm_state->active_ops); while (g_hash_table_iter_next(&gIter, (gpointer*)&key, (gpointer*)&pending)) { do_crm_log(log_level, "Pending action: %s (%s)", key, pending->op_key); } } else { rc = FALSE; } return rc; } if (lrm_state->resource_history == NULL) { return rc; } if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { /* At this point we're not waiting, we're just shutting down */ when = "shutdown"; } counter = 0; g_hash_table_iter_init(&gIter, lrm_state->resource_history); while (g_hash_table_iter_next(&gIter, NULL, (gpointer*)&entry)) { if (is_rsc_active(lrm_state, entry->id) == FALSE) { continue; } counter++; if (log_level == LOG_ERR) { crm_info("Found %s active at %s", entry->id, when); } else { crm_trace("Found %s active at %s", entry->id, when); } if (lrm_state->active_ops != NULL) { GHashTableIter hIter; g_hash_table_iter_init(&hIter, lrm_state->active_ops); while (g_hash_table_iter_next(&hIter, (gpointer*)&key, (gpointer*)&pending)) { if (pcmk__str_eq(entry->id, pending->rsc_id, pcmk__str_none)) { crm_notice("%sction %s (%s) incomplete at %s", pending->interval_ms == 0 ? "A" : "Recurring a", key, pending->op_key, when); } } } } if (counter) { crm_err("%d resource%s active at %s", counter, (counter == 1)? " was" : "s were", when); } return rc; } static gboolean is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id) { rsc_history_t *entry = NULL; entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); if (entry == NULL || entry->last == NULL) { return FALSE; } crm_trace("Processing %s: %s.%d=%d", rsc_id, entry->last->op_type, entry->last->interval_ms, entry->last->rc); if ((entry->last->rc == PCMK_OCF_OK) && pcmk__str_eq(entry->last->op_type, PCMK_ACTION_STOP, pcmk__str_casei)) { return FALSE; } else if (entry->last->rc == PCMK_OCF_OK && pcmk__str_eq(entry->last->op_type, PCMK_ACTION_MIGRATE_TO, pcmk__str_casei)) { // A stricter check is too complex ... leave that to the scheduler return FALSE; } else if (entry->last->rc == PCMK_OCF_NOT_RUNNING) { return FALSE; } else if ((entry->last->interval_ms == 0) && (entry->last->rc == PCMK_OCF_NOT_CONFIGURED)) { /* Badly configured resources can't be reliably stopped */ return FALSE; } return TRUE; } static gboolean build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list) { GHashTableIter iter; rsc_history_t *entry = NULL; g_hash_table_iter_init(&iter, lrm_state->resource_history); while (g_hash_table_iter_next(&iter, NULL, (void **)&entry)) { GList *gIter = NULL; xmlNode *xml_rsc = create_xml_node(rsc_list, XML_LRM_TAG_RESOURCE); crm_xml_add(xml_rsc, PCMK_XA_ID, entry->id); crm_xml_add(xml_rsc, PCMK_XA_TYPE, entry->rsc.type); crm_xml_add(xml_rsc, PCMK_XA_CLASS, entry->rsc.standard); crm_xml_add(xml_rsc, PCMK_XA_PROVIDER, entry->rsc.provider); if (entry->last && entry->last->params) { static const char *name = CRM_META "_" PCMK__META_CONTAINER; const char *container = g_hash_table_lookup(entry->last->params, name); if (container) { crm_trace("Resource %s is a part of container resource %s", entry->id, container); crm_xml_add(xml_rsc, PCMK__META_CONTAINER, container); } } controld_add_resource_history_xml(xml_rsc, &(entry->rsc), entry->failed, lrm_state->node_name); controld_add_resource_history_xml(xml_rsc, &(entry->rsc), entry->last, lrm_state->node_name); for (gIter = entry->recurring_op_list; gIter != NULL; gIter = gIter->next) { controld_add_resource_history_xml(xml_rsc, &(entry->rsc), gIter->data, lrm_state->node_name); } } return FALSE; } xmlNode * controld_query_executor_state(void) { xmlNode *xml_state = NULL; xmlNode *xml_data = NULL; xmlNode *rsc_list = NULL; crm_node_t *peer = NULL; lrm_state_t *lrm_state = lrm_state_find(controld_globals.our_nodename); if (!lrm_state) { crm_err("Could not find executor state for node %s", controld_globals.our_nodename); return NULL; } peer = pcmk__get_node(0, lrm_state->node_name, NULL, pcmk__node_search_any); CRM_CHECK(peer != NULL, return NULL); xml_state = create_node_state_update(peer, node_update_cluster|node_update_peer, NULL, __func__); if (xml_state == NULL) { return NULL; } - xml_data = create_xml_node(xml_state, XML_CIB_TAG_LRM); + xml_data = create_xml_node(xml_state, PCMK__XE_LRM); crm_xml_add(xml_data, PCMK_XA_ID, peer->uuid); rsc_list = create_xml_node(xml_data, XML_LRM_TAG_RESOURCES); /* Build a list of active (not always running) resources */ build_active_RAs(lrm_state, rsc_list); crm_log_xml_trace(xml_state, "Current executor state"); return xml_state; } /*! * \internal * \brief Map standard Pacemaker return code to operation status and OCF code * * \param[out] event Executor event whose status and return code should be set * \param[in] rc Standard Pacemaker return code */ void controld_rc2event(lrmd_event_data_t *event, int rc) { /* This is called for cleanup requests from controller peers/clients, not * for resource actions, so no exit reason is needed. */ switch (rc) { case pcmk_rc_ok: lrmd__set_result(event, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); break; case EACCES: lrmd__set_result(event, PCMK_OCF_INSUFFICIENT_PRIV, PCMK_EXEC_ERROR, NULL); break; default: lrmd__set_result(event, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, NULL); break; } } /*! * \internal * \brief Trigger a new transition after CIB status was deleted * * If a CIB status delete was not expected (as part of the transition graph), * trigger a new transition by updating the (arbitrary) "last-lrm-refresh" * cluster property. * * \param[in] from_sys IPC name that requested the delete * \param[in] rsc_id Resource whose status was deleted (for logging only) */ void controld_trigger_delete_refresh(const char *from_sys, const char *rsc_id) { if (!pcmk__str_eq(from_sys, CRM_SYSTEM_TENGINE, pcmk__str_casei)) { char *now_s = crm_strdup_printf("%lld", (long long) time(NULL)); crm_debug("Triggering a refresh after %s cleaned %s", from_sys, rsc_id); cib__update_node_attr(controld_globals.logger_out, controld_globals.cib_conn, cib_none, PCMK_XE_CRM_CONFIG, NULL, NULL, NULL, NULL, "last-lrm-refresh", now_s, NULL, NULL); free(now_s); } } static void notify_deleted(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_id, int rc) { lrmd_event_data_t *op = NULL; const char *from_sys = crm_element_value(input->msg, PCMK__XA_CRM_SYS_FROM); const char *from_host = crm_element_value(input->msg, PCMK__XA_SRC); crm_info("Notifying %s on %s that %s was%s deleted", from_sys, (from_host? from_host : "localhost"), rsc_id, ((rc == pcmk_ok)? "" : " not")); op = construct_op(lrm_state, input->xml, rsc_id, PCMK_ACTION_DELETE); controld_rc2event(op, pcmk_legacy2rc(rc)); controld_ack_event_directly(from_host, from_sys, NULL, op, rsc_id); lrmd_free_event(op); controld_trigger_delete_refresh(from_sys, rsc_id); } static gboolean lrm_remove_deleted_rsc(gpointer key, gpointer value, gpointer user_data) { struct delete_event_s *event = user_data; struct pending_deletion_op_s *op = value; if (pcmk__str_eq(event->rsc, op->rsc, pcmk__str_none)) { notify_deleted(event->lrm_state, op->input, event->rsc, event->rc); return TRUE; } return FALSE; } static gboolean lrm_remove_deleted_op(gpointer key, gpointer value, gpointer user_data) { const char *rsc = user_data; active_op_t *pending = value; if (pcmk__str_eq(rsc, pending->rsc_id, pcmk__str_none)) { crm_info("Removing op %s:%d for deleted resource %s", pending->op_key, pending->call_id, rsc); return TRUE; } return FALSE; } static void delete_rsc_entry(lrm_state_t *lrm_state, ha_msg_input_t *input, const char *rsc_id, GHashTableIter *rsc_iter, int rc, const char *user_name, bool from_cib) { struct delete_event_s event; CRM_CHECK(rsc_id != NULL, return); if (rc == pcmk_ok) { char *rsc_id_copy = strdup(rsc_id); if (rsc_iter) { g_hash_table_iter_remove(rsc_iter); } else { g_hash_table_remove(lrm_state->resource_history, rsc_id_copy); } if (from_cib) { controld_delete_resource_history(rsc_id_copy, lrm_state->node_name, user_name, crmd_cib_smart_opt()); } g_hash_table_foreach_remove(lrm_state->active_ops, lrm_remove_deleted_op, rsc_id_copy); free(rsc_id_copy); } if (input) { notify_deleted(lrm_state, input, rsc_id, rc); } event.rc = rc; event.rsc = rsc_id; event.lrm_state = lrm_state; g_hash_table_foreach_remove(lrm_state->deletion_ops, lrm_remove_deleted_rsc, &event); } static inline gboolean last_failed_matches_op(rsc_history_t *entry, const char *op, guint interval_ms) { if (entry == NULL) { return FALSE; } if (op == NULL) { return TRUE; } return (pcmk__str_eq(op, entry->failed->op_type, pcmk__str_casei) && (interval_ms == entry->failed->interval_ms)); } /*! * \internal * \brief Clear a resource's last failure * * Erase a resource's last failure on a particular node from both the * LRM resource history in the CIB, and the resource history remembered * for the LRM state. * * \param[in] rsc_id Resource name * \param[in] node_name Node name * \param[in] operation If specified, only clear if matching this operation * \param[in] interval_ms If operation is specified, it has this interval */ void lrm_clear_last_failure(const char *rsc_id, const char *node_name, const char *operation, guint interval_ms) { lrm_state_t *lrm_state = lrm_state_find(node_name); if (lrm_state == NULL) { return; } if (lrm_state->resource_history != NULL) { rsc_history_t *entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); if (last_failed_matches_op(entry, operation, interval_ms)) { lrmd_free_event(entry->failed); entry->failed = NULL; } } } /* Returns: gboolean - cancellation is in progress */ static gboolean cancel_op(lrm_state_t * lrm_state, const char *rsc_id, const char *key, int op, gboolean remove) { int rc = pcmk_ok; char *local_key = NULL; active_op_t *pending = NULL; CRM_CHECK(op != 0, return FALSE); CRM_CHECK(rsc_id != NULL, return FALSE); if (key == NULL) { local_key = make_stop_id(rsc_id, op); key = local_key; } pending = g_hash_table_lookup(lrm_state->active_ops, key); if (pending) { if (remove && !pcmk_is_set(pending->flags, active_op_remove)) { controld_set_active_op_flags(pending, active_op_remove); crm_debug("Scheduling %s for removal", key); } if (pcmk_is_set(pending->flags, active_op_cancelled)) { crm_debug("Operation %s already cancelled", key); free(local_key); return FALSE; } controld_set_active_op_flags(pending, active_op_cancelled); } else { crm_info("No pending op found for %s", key); free(local_key); return FALSE; } crm_debug("Cancelling op %d for %s (%s)", op, rsc_id, key); rc = lrm_state_cancel(lrm_state, pending->rsc_id, pending->op_type, pending->interval_ms); if (rc == pcmk_ok) { crm_debug("Op %d for %s (%s): cancelled", op, rsc_id, key); free(local_key); return TRUE; } crm_debug("Op %d for %s (%s): Nothing to cancel", op, rsc_id, key); /* The caller needs to make sure the entry is * removed from the active operations list * * Usually by returning TRUE inside the worker function * supplied to g_hash_table_foreach_remove() * * Not removing the entry from active operations will block * the node from shutting down */ free(local_key); return FALSE; } struct cancel_data { gboolean done; gboolean remove; const char *key; lrmd_rsc_info_t *rsc; lrm_state_t *lrm_state; }; static gboolean cancel_action_by_key(gpointer key, gpointer value, gpointer user_data) { gboolean remove = FALSE; struct cancel_data *data = user_data; active_op_t *op = value; if (pcmk__str_eq(op->op_key, data->key, pcmk__str_none)) { data->done = TRUE; remove = !cancel_op(data->lrm_state, data->rsc->id, key, op->call_id, data->remove); } return remove; } static gboolean cancel_op_key(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *key, gboolean remove) { guint removed = 0; struct cancel_data data; CRM_CHECK(rsc != NULL, return FALSE); CRM_CHECK(key != NULL, return FALSE); data.key = key; data.rsc = rsc; data.done = FALSE; data.remove = remove; data.lrm_state = lrm_state; removed = g_hash_table_foreach_remove(lrm_state->active_ops, cancel_action_by_key, &data); crm_trace("Removed %u op cache entries, new size: %u", removed, g_hash_table_size(lrm_state->active_ops)); return data.done; } /*! * \internal * \brief Retrieve resource information from LRM * * \param[in,out] lrm_state Executor connection state to use * \param[in] rsc_xml XML containing resource configuration * \param[in] do_create If true, register resource if not already * \param[out] rsc_info Where to store information obtained from executor * * \retval pcmk_ok Success (and rsc_info holds newly allocated result) * \retval -EINVAL Required information is missing from arguments * \retval -ENOTCONN No active connection to LRM * \retval -ENODEV Resource not found * \retval -errno Error communicating with executor when registering resource * * \note Caller is responsible for freeing result on success. */ static int get_lrm_resource(lrm_state_t *lrm_state, const xmlNode *rsc_xml, gboolean do_create, lrmd_rsc_info_t **rsc_info) { const char *id = ID(rsc_xml); CRM_CHECK(lrm_state && rsc_xml && rsc_info, return -EINVAL); CRM_CHECK(id, return -EINVAL); if (lrm_state_is_connected(lrm_state) == FALSE) { return -ENOTCONN; } crm_trace("Retrieving resource information for %s from the executor", id); *rsc_info = lrm_state_get_rsc_info(lrm_state, id, 0); // If resource isn't known by ID, try clone name, if provided if (!*rsc_info) { const char *long_id = crm_element_value(rsc_xml, PCMK__XA_LONG_ID); if (long_id) { *rsc_info = lrm_state_get_rsc_info(lrm_state, long_id, 0); } } if ((*rsc_info == NULL) && do_create) { const char *class = crm_element_value(rsc_xml, PCMK_XA_CLASS); const char *provider = crm_element_value(rsc_xml, PCMK_XA_PROVIDER); const char *type = crm_element_value(rsc_xml, PCMK_XA_TYPE); int rc; crm_trace("Registering resource %s with the executor", id); rc = lrm_state_register_rsc(lrm_state, id, class, provider, type, lrmd_opt_drop_recurring); if (rc != pcmk_ok) { fsa_data_t *msg_data = NULL; crm_err("Could not register resource %s with the executor on %s: %s " CRM_XS " rc=%d", id, lrm_state->node_name, pcmk_strerror(rc), rc); /* Register this as an internal error if this involves the local * executor. Otherwise, we're likely dealing with an unresponsive * remote node, which is not an FSA failure. */ if (lrm_state_is_local(lrm_state) == TRUE) { register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } return rc; } *rsc_info = lrm_state_get_rsc_info(lrm_state, id, 0); } return *rsc_info? pcmk_ok : -ENODEV; } static void delete_resource(lrm_state_t *lrm_state, const char *id, lrmd_rsc_info_t *rsc, GHashTableIter *iter, const char *sys, const char *user, ha_msg_input_t *request, bool unregister, bool from_cib) { int rc = pcmk_ok; crm_info("Removing resource %s from executor for %s%s%s", id, sys, (user? " as " : ""), (user? user : "")); if (rsc && unregister) { rc = lrm_state_unregister_rsc(lrm_state, id, 0); } if (rc == pcmk_ok) { crm_trace("Resource %s deleted from executor", id); } else if (rc == -EINPROGRESS) { crm_info("Deletion of resource '%s' from executor is pending", id); if (request) { struct pending_deletion_op_s *op = NULL; char *ref = crm_element_value_copy(request->msg, PCMK_XA_REFERENCE); op = calloc(1, sizeof(struct pending_deletion_op_s)); op->rsc = strdup(rsc->id); op->input = copy_ha_msg_input(request); g_hash_table_insert(lrm_state->deletion_ops, ref, op); } return; } else { crm_warn("Could not delete '%s' from executor for %s%s%s: %s " CRM_XS " rc=%d", id, sys, (user? " as " : ""), (user? user : ""), pcmk_strerror(rc), rc); } delete_rsc_entry(lrm_state, request, id, iter, rc, user, from_cib); } static int get_fake_call_id(lrm_state_t *lrm_state, const char *rsc_id) { int call_id = 999999999; rsc_history_t *entry = NULL; if(lrm_state) { entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); } /* Make sure the call id is greater than the last successful operation, * otherwise the failure will not result in a possible recovery of the resource * as it could appear the failure occurred before the successful start */ if (entry) { call_id = entry->last_callid + 1; } if (call_id < 0) { call_id = 1; } return call_id; } static void fake_op_status(lrm_state_t *lrm_state, lrmd_event_data_t *op, int op_status, enum ocf_exitcode op_exitcode, const char *exit_reason) { op->call_id = get_fake_call_id(lrm_state, op->rsc_id); op->t_run = time(NULL); op->t_rcchange = op->t_run; lrmd__set_result(op, op_exitcode, op_status, exit_reason); } static void force_reprobe(lrm_state_t *lrm_state, const char *from_sys, const char *from_host, const char *user_name, gboolean is_remote_node, bool reprobe_all_nodes) { GHashTableIter gIter; rsc_history_t *entry = NULL; crm_info("Clearing resource history on node %s", lrm_state->node_name); g_hash_table_iter_init(&gIter, lrm_state->resource_history); while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) { /* only unregister the resource during a reprobe if it is not a remote connection * resource. otherwise unregistering the connection will terminate remote-node * membership */ bool unregister = true; if (is_remote_lrmd_ra(NULL, NULL, entry->id)) { unregister = false; if (reprobe_all_nodes) { lrm_state_t *remote_lrm_state = lrm_state_find(entry->id); if (remote_lrm_state != NULL) { /* If reprobing all nodes, be sure to reprobe the remote * node before clearing its connection resource */ force_reprobe(remote_lrm_state, from_sys, from_host, user_name, TRUE, reprobe_all_nodes); } } } /* Don't delete from the CIB, since we'll delete the whole node's LRM * state from the CIB soon */ delete_resource(lrm_state, entry->id, &entry->rsc, &gIter, from_sys, user_name, NULL, unregister, false); } /* Now delete the copy in the CIB */ controld_delete_node_state(lrm_state->node_name, controld_section_lrm, cib_scope_local); // @COMPAT DCs < 1.1.14 need this deleted (in case it was explicitly false) update_attrd(lrm_state->node_name, CRM_OP_PROBED, NULL, user_name, is_remote_node); } /*! * \internal * \brief Fail a requested action without actually executing it * * For an action that can't be executed, process it similarly to an actual * execution result, with specified error status (except for notify actions, * which will always be treated as successful). * * \param[in,out] lrm_state Executor connection that action is for * \param[in] action Action XML from request * \param[in] rc Desired return code to use * \param[in] op_status Desired operation status to use * \param[in] exit_reason Human-friendly detail, if error */ static void synthesize_lrmd_failure(lrm_state_t *lrm_state, const xmlNode *action, int op_status, enum ocf_exitcode rc, const char *exit_reason) { lrmd_event_data_t *op = NULL; const char *operation = crm_element_value(action, PCMK_XA_OPERATION); const char *target_node = crm_element_value(action, PCMK__META_ON_NODE); xmlNode *xml_rsc = find_xml_node(action, PCMK_XE_PRIMITIVE, TRUE); if ((xml_rsc == NULL) || (ID(xml_rsc) == NULL)) { /* @TODO Should we do something else, like direct ack? */ crm_info("Can't fake %s failure (%d) on %s without resource configuration", crm_element_value(action, PCMK__XA_OPERATION_KEY), rc, target_node); return; } else if(operation == NULL) { /* This probably came from crm_resource -C, nothing to do */ crm_info("Can't fake %s failure (%d) on %s without operation", ID(xml_rsc), rc, target_node); return; } op = construct_op(lrm_state, action, ID(xml_rsc), operation); if (pcmk__str_eq(operation, PCMK_ACTION_NOTIFY, pcmk__str_casei)) { // Notifications can't fail fake_op_status(lrm_state, op, PCMK_EXEC_DONE, PCMK_OCF_OK, NULL); } else { fake_op_status(lrm_state, op, op_status, rc, exit_reason); } crm_info("Faking " PCMK__OP_FMT " result (%d) on %s", op->rsc_id, op->op_type, op->interval_ms, op->rc, target_node); // Process the result as if it came from the LRM process_lrm_event(lrm_state, op, NULL, action); lrmd_free_event(op); } /*! * \internal * \brief Get target of an LRM operation (replacing \p NULL with local node * name) * * \param[in] xml LRM operation data XML * * \return LRM operation target node name (local node or Pacemaker Remote node) */ static const char * lrm_op_target(const xmlNode *xml) { const char *target = NULL; if (xml) { target = crm_element_value(xml, PCMK__META_ON_NODE); } if (target == NULL) { target = controld_globals.our_nodename; } return target; } static void fail_lrm_resource(xmlNode *xml, lrm_state_t *lrm_state, const char *user_name, const char *from_host, const char *from_sys) { lrmd_event_data_t *op = NULL; lrmd_rsc_info_t *rsc = NULL; xmlNode *xml_rsc = find_xml_node(xml, PCMK_XE_PRIMITIVE, TRUE); CRM_CHECK(xml_rsc != NULL, return); /* The executor simply executes operations and reports the results, without * any concept of success or failure, so to fail a resource, we must fake * what a failure looks like. * * To do this, we create a fake executor operation event for the resource, * and pass that event to the executor client callback so it will be * processed as if it came from the executor. */ op = construct_op(lrm_state, xml, ID(xml_rsc), "asyncmon"); free((char*) op->user_data); op->user_data = NULL; op->interval_ms = 0; if (user_name && !pcmk__is_privileged(user_name)) { crm_err("%s does not have permission to fail %s", user_name, ID(xml_rsc)); fake_op_status(lrm_state, op, PCMK_EXEC_ERROR, PCMK_OCF_INSUFFICIENT_PRIV, "Unprivileged user cannot fail resources"); controld_ack_event_directly(from_host, from_sys, NULL, op, ID(xml_rsc)); lrmd_free_event(op); return; } if (get_lrm_resource(lrm_state, xml_rsc, TRUE, &rsc) == pcmk_ok) { crm_info("Failing resource %s...", rsc->id); fake_op_status(lrm_state, op, PCMK_EXEC_DONE, PCMK_OCF_UNKNOWN_ERROR, "Simulated failure"); process_lrm_event(lrm_state, op, NULL, xml); op->rc = PCMK_OCF_OK; // The request to fail the resource succeeded lrmd_free_rsc_info(rsc); } else { crm_info("Cannot find/create resource in order to fail it..."); crm_log_xml_warn(xml, "bad input"); fake_op_status(lrm_state, op, PCMK_EXEC_ERROR, PCMK_OCF_UNKNOWN_ERROR, "Cannot fail unknown resource"); } controld_ack_event_directly(from_host, from_sys, NULL, op, ID(xml_rsc)); lrmd_free_event(op); } static void handle_reprobe_op(lrm_state_t *lrm_state, const char *from_sys, const char *from_host, const char *user_name, gboolean is_remote_node, bool reprobe_all_nodes) { crm_notice("Forcing the status of all resources to be redetected"); force_reprobe(lrm_state, from_sys, from_host, user_name, is_remote_node, reprobe_all_nodes); if (!pcmk__strcase_any_of(from_sys, CRM_SYSTEM_PENGINE, CRM_SYSTEM_TENGINE, NULL)) { xmlNode *reply = create_request(CRM_OP_INVOKE_LRM, NULL, from_host, from_sys, CRM_SYSTEM_LRMD, controld_globals.our_uuid); crm_debug("ACK'ing re-probe from %s (%s)", from_sys, from_host); if (relay_message(reply, TRUE) == FALSE) { crm_log_xml_err(reply, "Unable to route reply"); } free_xml(reply); } } static bool do_lrm_cancel(ha_msg_input_t *input, lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, const char *from_host, const char *from_sys) { char *op_key = NULL; char *meta_key = NULL; int call = 0; const char *call_id = NULL; const char *op_task = NULL; guint interval_ms = 0; gboolean in_progress = FALSE; xmlNode *params = find_xml_node(input->xml, PCMK__XE_ATTRIBUTES, TRUE); CRM_CHECK(params != NULL, return FALSE); meta_key = crm_meta_name(PCMK_XA_OPERATION); op_task = crm_element_value(params, meta_key); free(meta_key); CRM_CHECK(op_task != NULL, return FALSE); meta_key = crm_meta_name(PCMK_META_INTERVAL); if (crm_element_value_ms(params, meta_key, &interval_ms) != pcmk_ok) { free(meta_key); return FALSE; } free(meta_key); op_key = pcmk__op_key(rsc->id, op_task, interval_ms); meta_key = crm_meta_name(PCMK__XA_CALL_ID); call_id = crm_element_value(params, meta_key); free(meta_key); crm_debug("Scheduler requested op %s (call=%s) be cancelled", op_key, (call_id? call_id : "NA")); pcmk__scan_min_int(call_id, &call, 0); if (call == 0) { // Normal case when the scheduler cancels a recurring op in_progress = cancel_op_key(lrm_state, rsc, op_key, TRUE); } else { // Normal case when the scheduler cancels an orphan op in_progress = cancel_op(lrm_state, rsc->id, NULL, call, TRUE); } // Acknowledge cancellation operation if for a remote connection resource if (!in_progress || is_remote_lrmd_ra(NULL, NULL, rsc->id)) { char *op_id = make_stop_id(rsc->id, call); if (is_remote_lrmd_ra(NULL, NULL, rsc->id) == FALSE) { crm_info("Nothing known about operation %d for %s", call, op_key); } controld_delete_action_history_by_key(rsc->id, lrm_state->node_name, op_key, call); send_task_ok_ack(lrm_state, input, rsc->id, rsc, op_task, from_host, from_sys); /* needed at least for cancellation of a remote operation */ if (lrm_state->active_ops != NULL) { g_hash_table_remove(lrm_state->active_ops, op_id); } free(op_id); } else { /* No ack is needed since abcdaa8, but peers with older versions * in a rolling upgrade need one. We didn't bump the feature set * at that commit, so we can only compare against the previous * CRM version (3.0.8). If any peers have feature set 3.0.9 but * not abcdaa8, they will time out waiting for the ack (no * released versions of Pacemaker are affected). */ const char *peer_version = crm_element_value(params, PCMK_XA_CRM_FEATURE_SET); if (compare_version(peer_version, "3.0.8") <= 0) { crm_info("Sending compatibility ack for %s cancellation to %s (CRM version %s)", op_key, from_host, peer_version); send_task_ok_ack(lrm_state, input, rsc->id, rsc, op_task, from_host, from_sys); } } free(op_key); return TRUE; } static void do_lrm_delete(ha_msg_input_t *input, lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, const char *from_sys, const char *from_host, bool crm_rsc_delete, const char *user_name) { bool unregister = true; int cib_rc = controld_delete_resource_history(rsc->id, lrm_state->node_name, user_name, cib_dryrun|cib_sync_call); if (cib_rc != pcmk_rc_ok) { lrmd_event_data_t *op = NULL; op = construct_op(lrm_state, input->xml, rsc->id, PCMK_ACTION_DELETE); /* These are resource clean-ups, not actions, so no exit reason is * needed. */ lrmd__set_result(op, pcmk_rc2ocf(cib_rc), PCMK_EXEC_ERROR, NULL); controld_ack_event_directly(from_host, from_sys, NULL, op, rsc->id); lrmd_free_event(op); return; } if (crm_rsc_delete && is_remote_lrmd_ra(NULL, NULL, rsc->id)) { unregister = false; } delete_resource(lrm_state, rsc->id, rsc, NULL, from_sys, user_name, input, unregister, true); } // User data for asynchronous metadata execution struct metadata_cb_data { lrmd_rsc_info_t *rsc; // Copy of resource information xmlNode *input_xml; // Copy of FSA input XML }; static struct metadata_cb_data * new_metadata_cb_data(lrmd_rsc_info_t *rsc, xmlNode *input_xml) { struct metadata_cb_data *data = NULL; data = calloc(1, sizeof(struct metadata_cb_data)); CRM_ASSERT(data != NULL); data->input_xml = copy_xml(input_xml); data->rsc = lrmd_copy_rsc_info(rsc); return data; } static void free_metadata_cb_data(struct metadata_cb_data *data) { lrmd_free_rsc_info(data->rsc); free_xml(data->input_xml); free(data); } /*! * \internal * \brief Execute an action after metadata has been retrieved * * \param[in] pid Ignored * \param[in] result Result of metadata action * \param[in] user_data Metadata callback data */ static void metadata_complete(int pid, const pcmk__action_result_t *result, void *user_data) { struct metadata_cb_data *data = (struct metadata_cb_data *) user_data; struct ra_metadata_s *md = NULL; lrm_state_t *lrm_state = lrm_state_find(lrm_op_target(data->input_xml)); if ((lrm_state != NULL) && pcmk__result_ok(result)) { md = controld_cache_metadata(lrm_state->metadata_cache, data->rsc, result->action_stdout); } if (!pcmk_is_set(controld_globals.fsa_input_register, R_HA_DISCONNECTED)) { do_lrm_rsc_op(lrm_state, data->rsc, data->input_xml, md); } free_metadata_cb_data(data); } /* A_LRM_INVOKE */ void do_lrm_invoke(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { lrm_state_t *lrm_state = NULL; const char *crm_op = NULL; const char *from_sys = NULL; const char *from_host = NULL; const char *operation = NULL; ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); const char *user_name = NULL; const char *target_node = lrm_op_target(input->xml); gboolean is_remote_node = FALSE; bool crm_rsc_delete = FALSE; // Message routed to the local node is targeting a specific, non-local node is_remote_node = !pcmk__str_eq(target_node, controld_globals.our_nodename, pcmk__str_casei); lrm_state = lrm_state_find(target_node); if ((lrm_state == NULL) && is_remote_node) { crm_err("Failing action because local node has never had connection to remote node %s", target_node); synthesize_lrmd_failure(NULL, input->xml, PCMK_EXEC_NOT_CONNECTED, PCMK_OCF_UNKNOWN_ERROR, "Local node has no connection to remote"); return; } CRM_ASSERT(lrm_state != NULL); user_name = pcmk__update_acl_user(input->msg, PCMK__XA_CRM_USER, NULL); crm_op = crm_element_value(input->msg, PCMK__XA_CRM_TASK); from_sys = crm_element_value(input->msg, PCMK__XA_CRM_SYS_FROM); if (!pcmk__str_eq(from_sys, CRM_SYSTEM_TENGINE, pcmk__str_none)) { from_host = crm_element_value(input->msg, PCMK__XA_SRC); } if (pcmk__str_eq(crm_op, PCMK_ACTION_LRM_DELETE, pcmk__str_none)) { if (!pcmk__str_eq(from_sys, CRM_SYSTEM_TENGINE, pcmk__str_none)) { crm_rsc_delete = TRUE; // from crm_resource } operation = PCMK_ACTION_DELETE; } else if (input->xml != NULL) { operation = crm_element_value(input->xml, PCMK_XA_OPERATION); } CRM_CHECK(!pcmk__str_empty(crm_op) || !pcmk__str_empty(operation), return); crm_trace("'%s' execution request from %s as %s user", pcmk__s(crm_op, operation), pcmk__s(from_sys, "unknown subsystem"), pcmk__s(user_name, "current")); if (pcmk__str_eq(crm_op, CRM_OP_LRM_FAIL, pcmk__str_none)) { fail_lrm_resource(input->xml, lrm_state, user_name, from_host, from_sys); } else if (pcmk__str_eq(crm_op, CRM_OP_LRM_REFRESH, pcmk__str_none)) { /* @COMPAT This can only be sent by crm_resource --refresh on a * Pacemaker Remote node running Pacemaker 1.1.9, which is extremely * unlikely. It previously would cause the controller to re-write its * resource history to the CIB. Just ignore it. */ crm_notice("Ignoring refresh request from Pacemaker Remote 1.1.9 node"); // @COMPAT DCs <1.1.14 in a rolling upgrade might schedule this op } else if (pcmk__str_eq(operation, CRM_OP_PROBED, pcmk__str_none)) { update_attrd(lrm_state->node_name, CRM_OP_PROBED, PCMK_VALUE_TRUE, user_name, is_remote_node); } else if (pcmk__str_eq(crm_op, CRM_OP_REPROBE, pcmk__str_none) || pcmk__str_eq(operation, CRM_OP_REPROBE, pcmk__str_none)) { const char *raw_target = NULL; if (input->xml != NULL) { // For CRM_OP_REPROBE, a NULL target means we're targeting all nodes raw_target = crm_element_value(input->xml, PCMK__META_ON_NODE); } handle_reprobe_op(lrm_state, from_sys, from_host, user_name, is_remote_node, (raw_target == NULL)); } else if (operation != NULL) { lrmd_rsc_info_t *rsc = NULL; xmlNode *xml_rsc = find_xml_node(input->xml, PCMK_XE_PRIMITIVE, TRUE); gboolean create_rsc = !pcmk__str_eq(operation, PCMK_ACTION_DELETE, pcmk__str_none); int rc; // We can't return anything meaningful without a resource ID CRM_CHECK(xml_rsc && ID(xml_rsc), return); rc = get_lrm_resource(lrm_state, xml_rsc, create_rsc, &rsc); if (rc == -ENOTCONN) { synthesize_lrmd_failure(lrm_state, input->xml, PCMK_EXEC_NOT_CONNECTED, PCMK_OCF_UNKNOWN_ERROR, "Not connected to remote executor"); return; } else if ((rc < 0) && !create_rsc) { /* Delete of malformed or nonexistent resource * (deleting something that does not exist is a success) */ crm_notice("Not registering resource '%s' for a %s event " CRM_XS " get-rc=%d (%s) transition-key=%s", ID(xml_rsc), operation, rc, pcmk_strerror(rc), ID(input->xml)); delete_rsc_entry(lrm_state, input, ID(xml_rsc), NULL, pcmk_ok, user_name, true); return; } else if (rc == -EINVAL) { // Resource operation on malformed resource crm_err("Invalid resource definition for %s", ID(xml_rsc)); crm_log_xml_warn(input->msg, "invalid resource"); synthesize_lrmd_failure(lrm_state, input->xml, PCMK_EXEC_ERROR, PCMK_OCF_NOT_CONFIGURED, // fatal error "Invalid resource definition"); return; } else if (rc < 0) { // Error communicating with the executor crm_err("Could not register resource '%s' with executor: %s " CRM_XS " rc=%d", ID(xml_rsc), pcmk_strerror(rc), rc); crm_log_xml_warn(input->msg, "failed registration"); synthesize_lrmd_failure(lrm_state, input->xml, PCMK_EXEC_ERROR, PCMK_OCF_INVALID_PARAM, // hard error "Could not register resource with executor"); return; } if (pcmk__str_eq(operation, PCMK_ACTION_CANCEL, pcmk__str_none)) { if (!do_lrm_cancel(input, lrm_state, rsc, from_host, from_sys)) { crm_log_xml_warn(input->xml, "Bad command"); } } else if (pcmk__str_eq(operation, PCMK_ACTION_DELETE, pcmk__str_none)) { do_lrm_delete(input, lrm_state, rsc, from_sys, from_host, crm_rsc_delete, user_name); } else { struct ra_metadata_s *md = NULL; /* Getting metadata from cache is OK except for start actions -- * always refresh from the agent for those, in case the resource * agent was updated. * * @TODO Only refresh metadata for starts if the agent actually * changed (using something like inotify, or a hash or modification * time of the agent executable). */ if (strcmp(operation, PCMK_ACTION_START) != 0) { md = controld_get_rsc_metadata(lrm_state, rsc, controld_metadata_from_cache); } if ((md == NULL) && crm_op_needs_metadata(rsc->standard, operation)) { /* Most likely, we'll need the agent metadata to record the * pending operation and the operation result. Get it now rather * than wait until then, so the metadata action doesn't eat into * the real action's timeout. * * @TODO Metadata is retrieved via direct execution of the * agent, which has a couple of related issues: the executor * should execute agents, not the controller; and metadata for * Pacemaker Remote nodes should be collected on those nodes, * not locally. */ struct metadata_cb_data *data = NULL; data = new_metadata_cb_data(rsc, input->xml); crm_info("Retrieving metadata for %s (%s%s%s:%s) asynchronously", rsc->id, rsc->standard, ((rsc->provider == NULL)? "" : ":"), ((rsc->provider == NULL)? "" : rsc->provider), rsc->type); (void) lrmd__metadata_async(rsc, metadata_complete, (void *) data); } else { do_lrm_rsc_op(lrm_state, rsc, input->xml, md); } } lrmd_free_rsc_info(rsc); } else { crm_err("Invalid execution request: unknown command '%s' (bug?)", crm_op); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } static lrmd_event_data_t * construct_op(const lrm_state_t *lrm_state, const xmlNode *rsc_op, const char *rsc_id, const char *operation) { lrmd_event_data_t *op = NULL; const char *op_delay = NULL; const char *op_timeout = NULL; GHashTable *params = NULL; xmlNode *primitive = NULL; const char *class = NULL; const char *transition = NULL; CRM_ASSERT(rsc_id && operation); op = lrmd_new_event(rsc_id, operation, 0); op->type = lrmd_event_exec_complete; op->timeout = 0; op->start_delay = 0; lrmd__set_result(op, PCMK_OCF_UNKNOWN, PCMK_EXEC_PENDING, NULL); if (rsc_op == NULL) { CRM_LOG_ASSERT(pcmk__str_eq(operation, PCMK_ACTION_STOP, pcmk__str_casei)); op->user_data = NULL; /* the stop_all_resources() case * by definition there is no DC (or they'd be shutting * us down). * So we should put our version here. */ op->params = pcmk__strkey_table(free, free); g_hash_table_insert(op->params, strdup(PCMK_XA_CRM_FEATURE_SET), strdup(CRM_FEATURE_SET)); crm_trace("Constructed %s op for %s", operation, rsc_id); return op; } params = xml2list(rsc_op); g_hash_table_remove(params, CRM_META "_" PCMK__META_OP_TARGET_RC); op_delay = crm_meta_value(params, PCMK_META_START_DELAY); pcmk__scan_min_int(op_delay, &op->start_delay, 0); op_timeout = crm_meta_value(params, PCMK_META_TIMEOUT); pcmk__scan_min_int(op_timeout, &op->timeout, 0); if (pcmk__guint_from_hash(params, CRM_META "_" PCMK_META_INTERVAL, 0, &(op->interval_ms)) != pcmk_rc_ok) { op->interval_ms = 0; } /* Use pcmk_monitor_timeout instead of meta timeout for stonith recurring monitor, if set */ primitive = find_xml_node(rsc_op, PCMK_XE_PRIMITIVE, FALSE); class = crm_element_value(primitive, PCMK_XA_CLASS); if (pcmk_is_set(pcmk_get_ra_caps(class), pcmk_ra_cap_fence_params) && pcmk__str_eq(operation, PCMK_ACTION_MONITOR, pcmk__str_casei) && (op->interval_ms > 0)) { op_timeout = g_hash_table_lookup(params, "pcmk_monitor_timeout"); if (op_timeout != NULL) { op->timeout = crm_get_msec(op_timeout); } } if (!pcmk__str_eq(operation, PCMK_ACTION_STOP, pcmk__str_casei)) { op->params = params; } else { rsc_history_t *entry = NULL; if (lrm_state) { entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); } /* If we do not have stop parameters cached, use * whatever we are given */ if (!entry || !entry->stop_params) { op->params = params; } else { /* Copy the cached parameter list so that we stop the resource * with the old attributes, not the new ones */ op->params = pcmk__strkey_table(free, free); g_hash_table_foreach(params, copy_meta_keys, op->params); g_hash_table_foreach(entry->stop_params, copy_instance_keys, op->params); g_hash_table_destroy(params); params = NULL; } } /* sanity */ if (op->timeout <= 0) { op->timeout = op->interval_ms; } if (op->start_delay < 0) { op->start_delay = 0; } transition = crm_element_value(rsc_op, PCMK__XA_TRANSITION_KEY); CRM_CHECK(transition != NULL, return op); op->user_data = strdup(transition); if (op->interval_ms != 0) { if (pcmk__strcase_any_of(operation, PCMK_ACTION_START, PCMK_ACTION_STOP, NULL)) { crm_err("Start and Stop actions cannot have an interval: %u", op->interval_ms); op->interval_ms = 0; } } crm_trace("Constructed %s op for %s: interval=%u", operation, rsc_id, op->interval_ms); return op; } /*! * \internal * \brief Send a (synthesized) event result * * Reply with a synthesized event result directly, as opposed to going through * the executor. * * \param[in] to_host Host to send result to * \param[in] to_sys IPC name to send result (NULL for transition engine) * \param[in] rsc Type information about resource the result is for * \param[in,out] op Event with result to send * \param[in] rsc_id ID of resource the result is for */ void controld_ack_event_directly(const char *to_host, const char *to_sys, const lrmd_rsc_info_t *rsc, lrmd_event_data_t *op, const char *rsc_id) { xmlNode *reply = NULL; xmlNode *update, *iter; crm_node_t *peer = NULL; CRM_CHECK(op != NULL, return); if (op->rsc_id == NULL) { CRM_ASSERT(rsc_id != NULL); op->rsc_id = strdup(rsc_id); } if (to_sys == NULL) { to_sys = CRM_SYSTEM_TENGINE; } peer = pcmk__get_node(0, controld_globals.our_nodename, NULL, pcmk__node_search_cluster); update = create_node_state_update(peer, node_update_none, NULL, __func__); - iter = create_xml_node(update, XML_CIB_TAG_LRM); + iter = create_xml_node(update, PCMK__XE_LRM); crm_xml_add(iter, PCMK_XA_ID, controld_globals.our_uuid); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE); crm_xml_add(iter, PCMK_XA_ID, op->rsc_id); controld_add_resource_history_xml(iter, rsc, op, controld_globals.our_nodename); reply = create_request(CRM_OP_INVOKE_LRM, update, to_host, to_sys, CRM_SYSTEM_LRMD, NULL); crm_log_xml_trace(update, "[direct ACK]"); crm_debug("ACK'ing resource op " PCMK__OP_FMT " from %s: %s", op->rsc_id, op->op_type, op->interval_ms, op->user_data, crm_element_value(reply, PCMK_XA_REFERENCE)); if (relay_message(reply, TRUE) == FALSE) { crm_log_xml_err(reply, "Unable to route reply"); } free_xml(update); free_xml(reply); } gboolean verify_stopped(enum crmd_fsa_state cur_state, int log_level) { gboolean res = TRUE; GList *lrm_state_list = lrm_state_get_list(); GList *state_entry; for (state_entry = lrm_state_list; state_entry != NULL; state_entry = state_entry->next) { lrm_state_t *lrm_state = state_entry->data; if (!lrm_state_verify_stopped(lrm_state, cur_state, log_level)) { /* keep iterating through all even when false is returned */ res = FALSE; } } controld_set_fsa_input_flags(R_SENT_RSC_STOP); g_list_free(lrm_state_list); lrm_state_list = NULL; return res; } struct stop_recurring_action_s { lrmd_rsc_info_t *rsc; lrm_state_t *lrm_state; }; static gboolean stop_recurring_action_by_rsc(gpointer key, gpointer value, gpointer user_data) { gboolean remove = FALSE; struct stop_recurring_action_s *event = user_data; active_op_t *op = value; if ((op->interval_ms != 0) && pcmk__str_eq(op->rsc_id, event->rsc->id, pcmk__str_none)) { crm_debug("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id, (char*)key); remove = !cancel_op(event->lrm_state, event->rsc->id, key, op->call_id, FALSE); } return remove; } static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer user_data) { gboolean remove = FALSE; lrm_state_t *lrm_state = user_data; active_op_t *op = value; if (op->interval_ms != 0) { crm_info("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id, (const char *) key); remove = !cancel_op(lrm_state, op->rsc_id, key, op->call_id, FALSE); } return remove; } /*! * \internal * \brief Check whether recurring actions should be cancelled before an action * * \param[in] rsc_id Resource that action is for * \param[in] action Action being performed * \param[in] interval_ms Operation interval of \p action (in milliseconds) * * \return true if recurring actions should be cancelled, otherwise false */ static bool should_cancel_recurring(const char *rsc_id, const char *action, guint interval_ms) { if (is_remote_lrmd_ra(NULL, NULL, rsc_id) && (interval_ms == 0) && (strcmp(action, PCMK_ACTION_MIGRATE_TO) == 0)) { /* Don't stop monitoring a migrating Pacemaker Remote connection * resource until the entire migration has completed. We must detect if * the connection is unexpectedly severed, even during a migration. */ return false; } // Cancel recurring actions before changing resource state return (interval_ms == 0) && !pcmk__str_any_of(action, PCMK_ACTION_MONITOR, PCMK_ACTION_NOTIFY, NULL); } /*! * \internal * \brief Check whether an action should not be performed at this time * * \param[in] operation Action to be performed * * \return Readable description of why action should not be performed, * or NULL if it should be performed */ static const char * should_nack_action(const char *action) { if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN) && pcmk__str_eq(action, PCMK_ACTION_START, pcmk__str_none)) { register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL); return "Not attempting start due to shutdown in progress"; } switch (controld_globals.fsa_state) { case S_NOT_DC: case S_POLICY_ENGINE: // Recalculating case S_TRANSITION_ENGINE: break; default: if (!pcmk__str_eq(action, PCMK_ACTION_STOP, pcmk__str_none)) { return "Controller cannot attempt actions at this time"; } break; } return NULL; } static void do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, xmlNode *msg, struct ra_metadata_s *md) { int rc; int call_id = 0; char *op_id = NULL; lrmd_event_data_t *op = NULL; fsa_data_t *msg_data = NULL; const char *transition = NULL; const char *operation = NULL; const char *nack_reason = NULL; CRM_CHECK((rsc != NULL) && (msg != NULL), return); operation = crm_element_value(msg, PCMK_XA_OPERATION); CRM_CHECK(!pcmk__str_empty(operation), return); transition = crm_element_value(msg, PCMK__XA_TRANSITION_KEY); if (pcmk__str_empty(transition)) { crm_log_xml_err(msg, "Missing transition number"); } if (lrm_state == NULL) { // This shouldn't be possible, but provide a failsafe just in case crm_err("Cannot execute %s of %s: No executor connection " CRM_XS " transition_key=%s", operation, rsc->id, pcmk__s(transition, "")); synthesize_lrmd_failure(NULL, msg, PCMK_EXEC_INVALID, PCMK_OCF_UNKNOWN_ERROR, "No executor connection"); return; } if (pcmk__str_any_of(operation, PCMK_ACTION_RELOAD, PCMK_ACTION_RELOAD_AGENT, NULL)) { /* Pre-2.1.0 DCs will schedule reload actions only, and 2.1.0+ DCs * will schedule reload-agent actions only. In either case, we need * to map that to whatever the resource agent actually supports. * Default to the OCF 1.1 name. */ if ((md != NULL) && pcmk_is_set(md->ra_flags, ra_supports_legacy_reload)) { operation = PCMK_ACTION_RELOAD; } else { operation = PCMK_ACTION_RELOAD_AGENT; } } op = construct_op(lrm_state, msg, rsc->id, operation); CRM_CHECK(op != NULL, return); if (should_cancel_recurring(rsc->id, operation, op->interval_ms)) { guint removed = 0; struct stop_recurring_action_s data; data.rsc = rsc; data.lrm_state = lrm_state; removed = g_hash_table_foreach_remove(lrm_state->active_ops, stop_recurring_action_by_rsc, &data); if (removed) { crm_debug("Stopped %u recurring operation%s in preparation for " PCMK__OP_FMT, removed, pcmk__plural_s(removed), rsc->id, operation, op->interval_ms); } } /* now do the op */ crm_notice("Requesting local execution of %s operation for %s on %s " CRM_XS " transition_key=%s op_key=" PCMK__OP_FMT, pcmk__readable_action(op->op_type, op->interval_ms), rsc->id, lrm_state->node_name, pcmk__s(transition, ""), rsc->id, operation, op->interval_ms); nack_reason = should_nack_action(operation); if (nack_reason != NULL) { crm_notice("Discarding attempt to perform action %s on %s in state %s " "(shutdown=%s)", operation, rsc->id, fsa_state2string(controld_globals.fsa_state), pcmk__flag_text(controld_globals.fsa_input_register, R_SHUTDOWN)); lrmd__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_INVALID, nack_reason); controld_ack_event_directly(NULL, NULL, rsc, op, rsc->id); lrmd_free_event(op); free(op_id); return; } controld_record_pending_op(lrm_state->node_name, rsc, op); op_id = pcmk__op_key(rsc->id, op->op_type, op->interval_ms); if (op->interval_ms > 0) { /* cancel it so we can then restart it without conflict */ cancel_op_key(lrm_state, rsc, op_id, FALSE); } rc = controld_execute_resource_agent(lrm_state, rsc->id, op->op_type, op->user_data, op->interval_ms, op->timeout, op->start_delay, op->params, &call_id); if (rc == pcmk_rc_ok) { /* record all operations so we can wait * for them to complete during shutdown */ char *call_id_s = make_stop_id(rsc->id, call_id); active_op_t *pending = NULL; pending = calloc(1, sizeof(active_op_t)); crm_trace("Recording pending op: %d - %s %s", call_id, op_id, call_id_s); pending->call_id = call_id; pending->interval_ms = op->interval_ms; pending->op_type = strdup(operation); pending->op_key = strdup(op_id); pending->rsc_id = strdup(rsc->id); pending->start_time = time(NULL); pcmk__str_update(&pending->user_data, op->user_data); if (crm_element_value_epoch(msg, PCMK_OPT_SHUTDOWN_LOCK, &(pending->lock_time)) != pcmk_ok) { pending->lock_time = 0; } g_hash_table_replace(lrm_state->active_ops, call_id_s, pending); if ((op->interval_ms > 0) && (op->start_delay > START_DELAY_THRESHOLD)) { int target_rc = PCMK_OCF_OK; crm_info("Faking confirmation of %s: execution postponed for over 5 minutes", op_id); decode_transition_key(op->user_data, NULL, NULL, NULL, &target_rc); lrmd__set_result(op, target_rc, PCMK_EXEC_DONE, NULL); controld_ack_event_directly(NULL, NULL, rsc, op, rsc->id); } pending->params = op->params; op->params = NULL; } else if (lrm_state_is_local(lrm_state)) { crm_err("Could not initiate %s action for resource %s locally: %s " CRM_XS " rc=%d", operation, rsc->id, pcmk_rc_str(rc), rc); fake_op_status(lrm_state, op, PCMK_EXEC_NOT_CONNECTED, PCMK_OCF_UNKNOWN_ERROR, pcmk_rc_str(rc)); process_lrm_event(lrm_state, op, NULL, NULL); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } else { crm_err("Could not initiate %s action for resource %s remotely on %s: " "%s " CRM_XS " rc=%d", operation, rsc->id, lrm_state->node_name, pcmk_rc_str(rc), rc); fake_op_status(lrm_state, op, PCMK_EXEC_NOT_CONNECTED, PCMK_OCF_UNKNOWN_ERROR, pcmk_rc_str(rc)); process_lrm_event(lrm_state, op, NULL, NULL); } free(op_id); lrmd_free_event(op); } void do_lrm_event(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t * msg_data) { CRM_CHECK(FALSE, return); } static char * unescape_newlines(const char *string) { char *pch = NULL; char *ret = NULL; static const char *escaped_newline = "\\n"; if (!string) { return NULL; } ret = strdup(string); pch = strstr(ret, escaped_newline); while (pch != NULL) { /* Replace newline escape pattern with actual newline (and a space so we * don't have to shuffle the rest of the buffer) */ pch[0] = '\n'; pch[1] = ' '; pch = strstr(pch, escaped_newline); } return ret; } static bool did_lrm_rsc_op_fail(lrm_state_t *lrm_state, const char * rsc_id, const char * op_type, guint interval_ms) { rsc_history_t *entry = NULL; CRM_CHECK(lrm_state != NULL, return FALSE); CRM_CHECK(rsc_id != NULL, return FALSE); CRM_CHECK(op_type != NULL, return FALSE); entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); if (entry == NULL || entry->failed == NULL) { return FALSE; } if (pcmk__str_eq(entry->failed->rsc_id, rsc_id, pcmk__str_none) && pcmk__str_eq(entry->failed->op_type, op_type, pcmk__str_casei) && entry->failed->interval_ms == interval_ms) { return TRUE; } return FALSE; } /*! * \internal * \brief Log the result of an executor action (actual or synthesized) * * \param[in] op Executor action to log result for * \param[in] op_key Operation key for action * \param[in] node_name Name of node action was performed on, if known * \param[in] confirmed Whether to log that graph action was confirmed */ static void log_executor_event(const lrmd_event_data_t *op, const char *op_key, const char *node_name, gboolean confirmed) { int log_level = LOG_ERR; GString *str = g_string_sized_new(100); // reasonable starting size pcmk__g_strcat(str, "Result of ", pcmk__readable_action(op->op_type, op->interval_ms), " operation for ", op->rsc_id, NULL); if (node_name != NULL) { pcmk__g_strcat(str, " on ", node_name, NULL); } switch (op->op_status) { case PCMK_EXEC_DONE: log_level = LOG_NOTICE; pcmk__g_strcat(str, ": ", services_ocf_exitcode_str(op->rc), NULL); break; case PCMK_EXEC_TIMEOUT: pcmk__g_strcat(str, ": ", pcmk_exec_status_str(op->op_status), " after ", pcmk__readable_interval(op->timeout), NULL); break; case PCMK_EXEC_CANCELLED: log_level = LOG_INFO; /* order of __attribute__ and Fall through comment is IMPORTANT! * do not change it without proper testing with both clang and gcc * in multiple versions. * the clang check allows to build with all versions of clang. * the has_c_attribute check is to workaround a bug in clang version * in rhel7. has_attribute would happily return "YES SIR WE GOT IT" * and fail the build the next line. */ #ifdef __clang__ #ifdef __has_c_attribute #if __has_attribute(fallthrough) __attribute__((fallthrough)); #endif #endif #endif // Fall through default: pcmk__g_strcat(str, ": ", pcmk_exec_status_str(op->op_status), NULL); } if ((op->exit_reason != NULL) && ((op->op_status != PCMK_EXEC_DONE) || (op->rc != PCMK_OCF_OK))) { pcmk__g_strcat(str, " (", op->exit_reason, ")", NULL); } g_string_append(str, " " CRM_XS); g_string_append_printf(str, " graph action %sconfirmed; call=%d key=%s", (confirmed? "" : "un"), op->call_id, op_key); if (op->op_status == PCMK_EXEC_DONE) { g_string_append_printf(str, " rc=%d", op->rc); } do_crm_log(log_level, "%s", str->str); g_string_free(str, TRUE); /* The services library has already logged the output at info or debug * level, so just raise to notice if it looks like a failure. */ if ((op->output != NULL) && (op->rc != PCMK_OCF_OK)) { char *prefix = crm_strdup_printf(PCMK__OP_FMT "@%s output", op->rsc_id, op->op_type, op->interval_ms, node_name); crm_log_output(LOG_NOTICE, prefix, op->output); free(prefix); } } void process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op, active_op_t *pending, const xmlNode *action_xml) { char *op_id = NULL; char *op_key = NULL; gboolean remove = FALSE; gboolean removed = FALSE; bool need_direct_ack = FALSE; lrmd_rsc_info_t *rsc = NULL; const char *node_name = NULL; CRM_CHECK(op != NULL, return); CRM_CHECK(op->rsc_id != NULL, return); // Remap new status codes for older DCs if (compare_version(controld_globals.dc_version, "3.2.0") < 0) { switch (op->op_status) { case PCMK_EXEC_NOT_CONNECTED: lrmd__set_result(op, PCMK_OCF_CONNECTION_DIED, PCMK_EXEC_ERROR, op->exit_reason); break; case PCMK_EXEC_INVALID: lrmd__set_result(op, CRM_DIRECT_NACK_RC, PCMK_EXEC_ERROR, op->exit_reason); break; default: break; } } op_id = make_stop_id(op->rsc_id, op->call_id); op_key = pcmk__op_key(op->rsc_id, op->op_type, op->interval_ms); // Get resource info if available (from executor state or action XML) if (lrm_state) { rsc = lrm_state_get_rsc_info(lrm_state, op->rsc_id, 0); } if ((rsc == NULL) && action_xml) { xmlNode *xml = find_xml_node(action_xml, PCMK_XE_PRIMITIVE, TRUE); const char *standard = crm_element_value(xml, PCMK_XA_CLASS); const char *provider = crm_element_value(xml, PCMK_XA_PROVIDER); const char *type = crm_element_value(xml, PCMK_XA_TYPE); if (standard && type) { crm_info("%s agent information not cached, using %s%s%s:%s from action XML", op->rsc_id, standard, (provider? ":" : ""), (provider? provider : ""), type); rsc = lrmd_new_rsc_info(op->rsc_id, standard, provider, type); } else { crm_err("Can't process %s result because %s agent information not cached or in XML", op_key, op->rsc_id); } } // Get node name if available (from executor state or action XML) if (lrm_state) { node_name = lrm_state->node_name; } else if (action_xml) { node_name = crm_element_value(action_xml, PCMK__META_ON_NODE); } if(pending == NULL) { remove = TRUE; if (lrm_state) { pending = g_hash_table_lookup(lrm_state->active_ops, op_id); } } if (op->op_status == PCMK_EXEC_ERROR) { switch(op->rc) { case PCMK_OCF_NOT_RUNNING: case PCMK_OCF_RUNNING_PROMOTED: case PCMK_OCF_DEGRADED: case PCMK_OCF_DEGRADED_PROMOTED: // Leave it to the TE/scheduler to decide if this is an error op->op_status = PCMK_EXEC_DONE; break; default: /* Nothing to do */ break; } } if (op->op_status != PCMK_EXEC_CANCELLED) { /* We might not record the result, so directly acknowledge it to the * originator instead, so it doesn't time out waiting for the result * (especially important if part of a transition). */ need_direct_ack = TRUE; if (controld_action_is_recordable(op->op_type)) { if (node_name && rsc) { // We should record the result, and happily, we can time_t lock_time = (pending == NULL)? 0 : pending->lock_time; controld_update_resource_history(node_name, rsc, op, lock_time); need_direct_ack = FALSE; } else if (op->rsc_deleted) { /* We shouldn't record the result (likely the resource was * refreshed, cleaned, or removed while this operation was * in flight). */ crm_notice("Not recording %s result in CIB because " "resource information was removed since it was initiated", op_key); } else { /* This shouldn't be possible; the executor didn't consider the * resource deleted, but we couldn't find resource or node * information. */ crm_err("Unable to record %s result in CIB: %s", op_key, (node_name? "No resource information" : "No node name")); } } } else if (op->interval_ms == 0) { /* A non-recurring operation was cancelled. Most likely, the * never-initiated action was removed from the executor's pending * operations list upon resource removal. */ need_direct_ack = TRUE; } else if (pending == NULL) { /* This recurring operation was cancelled, but was not pending. No * transition actions are waiting on it, nothing needs to be done. */ } else if (op->user_data == NULL) { /* This recurring operation was cancelled and pending, but we don't * have a transition key. This should never happen. */ crm_err("Recurring operation %s was cancelled without transition information", op_key); } else if (pcmk_is_set(pending->flags, active_op_remove)) { /* This recurring operation was cancelled (by us) and pending, and we * have been waiting for it to finish. */ if (lrm_state) { controld_delete_action_history(op); } /* Directly acknowledge failed recurring actions here. The above call to * controld_delete_action_history() will not erase any corresponding * last_failure entry, which means that the DC won't confirm the * cancellation via process_op_deletion(), and the transition would * otherwise wait for the action timer to pop. */ if (did_lrm_rsc_op_fail(lrm_state, pending->rsc_id, pending->op_type, pending->interval_ms)) { need_direct_ack = TRUE; } } else if (op->rsc_deleted) { /* This recurring operation was cancelled (but not by us, and the * executor does not have resource information, likely due to resource * cleanup, refresh, or removal) and pending. */ crm_debug("Recurring op %s was cancelled due to resource deletion", op_key); need_direct_ack = TRUE; } else { /* This recurring operation was cancelled (but not by us, likely by the * executor before stopping the resource) and pending. We don't need to * do anything special. */ } if (need_direct_ack) { controld_ack_event_directly(NULL, NULL, NULL, op, op->rsc_id); } if(remove == FALSE) { /* The caller will do this afterwards, but keep the logging consistent */ removed = TRUE; } else if (lrm_state && ((op->interval_ms == 0) || (op->op_status == PCMK_EXEC_CANCELLED))) { gboolean found = g_hash_table_remove(lrm_state->active_ops, op_id); if (op->interval_ms != 0) { removed = TRUE; } else if (found) { removed = TRUE; crm_trace("Op %s (call=%d, stop-id=%s, remaining=%u): Confirmed", op_key, op->call_id, op_id, g_hash_table_size(lrm_state->active_ops)); } } log_executor_event(op, op_key, node_name, removed); if (lrm_state) { if (!pcmk__str_eq(op->op_type, PCMK_ACTION_META_DATA, pcmk__str_casei)) { crmd_alert_resource_op(lrm_state->node_name, op); } else if (rsc && (op->rc == PCMK_OCF_OK)) { char *metadata = unescape_newlines(op->output); controld_cache_metadata(lrm_state->metadata_cache, rsc, metadata); free(metadata); } } if (op->rsc_deleted) { crm_info("Deletion of resource '%s' complete after %s", op->rsc_id, op_key); if (lrm_state) { delete_rsc_entry(lrm_state, NULL, op->rsc_id, NULL, pcmk_ok, NULL, true); } } /* If a shutdown was escalated while operations were pending, * then the FSA will be stalled right now... allow it to continue */ controld_trigger_fsa(); if (lrm_state && rsc) { update_history_cache(lrm_state, rsc, op); } lrmd_free_rsc_info(rsc); free(op_key); free(op_id); } diff --git a/daemons/controld/controld_te_actions.c b/daemons/controld/controld_te_actions.c index 7459c9fdb3..c43afb61ad 100644 --- a/daemons/controld/controld_te_actions.c +++ b/daemons/controld/controld_te_actions.c @@ -1,760 +1,760 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include // lrmd_event_data_t, lrmd_free_event() #include #include #include #include #include static GHashTable *te_targets = NULL; void send_rsc_command(pcmk__graph_action_t *action); static void te_update_job_count(pcmk__graph_action_t *action, int offset); static void te_start_action_timer(const pcmk__graph_t *graph, pcmk__graph_action_t *action) { action->timer = g_timeout_add(action->timeout + graph->network_delay, action_timer_callback, (void *) action); CRM_ASSERT(action->timer != 0); } /*! * \internal * \brief Execute a graph pseudo-action * * \param[in,out] graph Transition graph being executed * \param[in,out] pseudo Pseudo-action to execute * * \return Standard Pacemaker return code */ static int execute_pseudo_action(pcmk__graph_t *graph, pcmk__graph_action_t *pseudo) { const char *task = crm_element_value(pseudo->xml, PCMK_XA_OPERATION); /* send to peers as well? */ if (pcmk__str_eq(task, PCMK_ACTION_MAINTENANCE_NODES, pcmk__str_casei)) { GHashTableIter iter; crm_node_t *node = NULL; g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { xmlNode *cmd = NULL; if (pcmk__str_eq(controld_globals.our_nodename, node->uname, pcmk__str_casei)) { continue; } cmd = create_request(task, pseudo->xml, node->uname, CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL); send_cluster_message(node, crm_msg_crmd, cmd, FALSE); free_xml(cmd); } remote_ra_process_maintenance_nodes(pseudo->xml); } else { /* Check action for Pacemaker Remote node side effects */ remote_ra_process_pseudo(pseudo->xml); } crm_debug("Pseudo-action %d (%s) fired and confirmed", pseudo->id, crm_element_value(pseudo->xml, PCMK__XA_OPERATION_KEY)); te_action_confirmed(pseudo, graph); return pcmk_rc_ok; } static int get_target_rc(pcmk__graph_action_t *action) { int exit_status; pcmk__scan_min_int(crm_meta_value(action->params, PCMK__META_OP_TARGET_RC), &exit_status, 0); return exit_status; } /*! * \internal * \brief Execute a cluster action from a transition graph * * \param[in,out] graph Transition graph being executed * \param[in,out] action Cluster action to execute * * \return Standard Pacemaker return code */ static int execute_cluster_action(pcmk__graph_t *graph, pcmk__graph_action_t *action) { char *counter = NULL; xmlNode *cmd = NULL; gboolean is_local = FALSE; const char *id = NULL; const char *task = NULL; const char *value = NULL; const char *on_node = NULL; const char *router_node = NULL; gboolean rc = TRUE; gboolean no_wait = FALSE; id = ID(action->xml); CRM_CHECK(!pcmk__str_empty(id), return EPROTO); task = crm_element_value(action->xml, PCMK_XA_OPERATION); CRM_CHECK(!pcmk__str_empty(task), return EPROTO); on_node = crm_element_value(action->xml, PCMK__META_ON_NODE); CRM_CHECK(!pcmk__str_empty(on_node), return pcmk_rc_node_unknown); router_node = crm_element_value(action->xml, PCMK__XA_ROUTER_NODE); if (router_node == NULL) { router_node = on_node; if (pcmk__str_eq(task, PCMK_ACTION_LRM_DELETE, pcmk__str_none)) { const char *mode = crm_element_value(action->xml, PCMK__XA_MODE); if (pcmk__str_eq(mode, PCMK__VALUE_CIB, pcmk__str_none)) { router_node = controld_globals.our_nodename; } } } if (pcmk__str_eq(router_node, controld_globals.our_nodename, pcmk__str_casei)) { is_local = TRUE; } value = crm_meta_value(action->params, PCMK__META_OP_NO_WAIT); if (crm_is_true(value)) { no_wait = TRUE; } crm_info("Handling controller request '%s' (%s on %s)%s%s", id, task, on_node, (is_local? " locally" : ""), (no_wait? " without waiting" : "")); if (is_local && pcmk__str_eq(task, PCMK_ACTION_DO_SHUTDOWN, pcmk__str_none)) { /* defer until everything else completes */ crm_info("Controller request '%s' is a local shutdown", id); graph->completion_action = pcmk__graph_shutdown; graph->abort_reason = "local shutdown"; te_action_confirmed(action, graph); return pcmk_rc_ok; } else if (pcmk__str_eq(task, PCMK_ACTION_DO_SHUTDOWN, pcmk__str_none)) { crm_node_t *peer = pcmk__get_node(0, router_node, NULL, pcmk__node_search_cluster); pcmk__update_peer_expected(__func__, peer, CRMD_JOINSTATE_DOWN); } cmd = create_request(task, action->xml, router_node, CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL); counter = pcmk__transition_key(controld_globals.transition_graph->id, action->id, get_target_rc(action), controld_globals.te_uuid); crm_xml_add(cmd, PCMK__XA_TRANSITION_KEY, counter); rc = send_cluster_message(pcmk__get_node(0, router_node, NULL, pcmk__node_search_cluster), crm_msg_crmd, cmd, TRUE); free(counter); free_xml(cmd); if (rc == FALSE) { crm_err("Action %d failed: send", action->id); return ECOMM; } else if (no_wait) { te_action_confirmed(action, graph); } else { if (action->timeout <= 0) { crm_err("Action %d: %s on %s had an invalid timeout (%dms). Using %ums instead", action->id, task, on_node, action->timeout, graph->network_delay); action->timeout = (int) graph->network_delay; } te_start_action_timer(graph, action); } return pcmk_rc_ok; } /*! * \internal * \brief Synthesize an executor event for a resource action timeout * * \param[in] action Resource action that timed out * \param[in] target_rc Expected result of action that timed out * * Synthesize an executor event for a resource action timeout. (If the executor * gets a timeout while waiting for a resource action to complete, that will be * reported via the usual callback. This timeout means we didn't hear from the * executor itself or the controller that relayed the action to the executor.) * * \return Newly created executor event for result of \p action * \note The caller is responsible for freeing the return value using * lrmd_free_event(). */ static lrmd_event_data_t * synthesize_timeout_event(const pcmk__graph_action_t *action, int target_rc) { lrmd_event_data_t *op = NULL; const char *target = crm_element_value(action->xml, PCMK__META_ON_NODE); const char *reason = NULL; char *dynamic_reason = NULL; if (pcmk__str_eq(target, get_local_node_name(), pcmk__str_casei)) { reason = "Local executor did not return result in time"; } else { const char *router_node = NULL; router_node = crm_element_value(action->xml, PCMK__XA_ROUTER_NODE); if (router_node == NULL) { router_node = target; } dynamic_reason = crm_strdup_printf("Controller on %s did not return " "result in time", router_node); reason = dynamic_reason; } op = pcmk__event_from_graph_action(NULL, action, PCMK_EXEC_TIMEOUT, PCMK_OCF_UNKNOWN_ERROR, reason); op->call_id = -1; op->user_data = pcmk__transition_key(controld_globals.transition_graph->id, action->id, target_rc, controld_globals.te_uuid); free(dynamic_reason); return op; } static void controld_record_action_event(pcmk__graph_action_t *action, lrmd_event_data_t *op) { cib_t *cib_conn = controld_globals.cib_conn; xmlNode *state = NULL; xmlNode *rsc = NULL; xmlNode *action_rsc = NULL; int rc = pcmk_ok; const char *rsc_id = NULL; const char *target = crm_element_value(action->xml, PCMK__META_ON_NODE); const char *task_uuid = crm_element_value(action->xml, PCMK__XA_OPERATION_KEY); const char *target_uuid = crm_element_value(action->xml, PCMK__META_ON_NODE_UUID); int target_rc = get_target_rc(action); action_rsc = find_xml_node(action->xml, PCMK_XE_PRIMITIVE, TRUE); if (action_rsc == NULL) { return; } rsc_id = ID(action_rsc); CRM_CHECK(rsc_id != NULL, crm_log_xml_err(action->xml, "Bad:action"); return); /* update the CIB */ state = create_xml_node(NULL, PCMK__XE_NODE_STATE); crm_xml_add(state, PCMK_XA_ID, target_uuid); crm_xml_add(state, PCMK_XA_UNAME, target); - rsc = create_xml_node(state, XML_CIB_TAG_LRM); + rsc = create_xml_node(state, PCMK__XE_LRM); crm_xml_add(rsc, PCMK_XA_ID, target_uuid); rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCES); rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCE); crm_xml_add(rsc, PCMK_XA_ID, rsc_id); crm_copy_xml_element(action_rsc, rsc, PCMK_XA_TYPE); crm_copy_xml_element(action_rsc, rsc, PCMK_XA_CLASS); crm_copy_xml_element(action_rsc, rsc, PCMK_XA_PROVIDER); pcmk__create_history_xml(rsc, op, CRM_FEATURE_SET, target_rc, target, __func__); rc = cib_conn->cmds->modify(cib_conn, PCMK_XE_STATUS, state, cib_scope_local); fsa_register_cib_callback(rc, NULL, cib_action_updated); free_xml(state); crm_trace("Sent CIB update (call ID %d) for synthesized event of action %d (%s on %s)", rc, action->id, task_uuid, target); pcmk__set_graph_action_flags(action, pcmk__graph_action_sent_update); } void controld_record_action_timeout(pcmk__graph_action_t *action) { lrmd_event_data_t *op = NULL; const char *target = crm_element_value(action->xml, PCMK__META_ON_NODE); const char *task_uuid = crm_element_value(action->xml, PCMK__XA_OPERATION_KEY); int target_rc = get_target_rc(action); crm_warn("%s %d: %s on %s timed out", action->xml->name, action->id, task_uuid, target); op = synthesize_timeout_event(action, target_rc); controld_record_action_event(action, op); lrmd_free_event(op); } /*! * \internal * \brief Execute a resource action from a transition graph * * \param[in,out] graph Transition graph being executed * \param[in,out] action Resource action to execute * * \return Standard Pacemaker return code */ static int execute_rsc_action(pcmk__graph_t *graph, pcmk__graph_action_t *action) { /* never overwrite stop actions in the CIB with * anything other than completed results * * Writing pending stops makes it look like the * resource is running again */ xmlNode *cmd = NULL; xmlNode *rsc_op = NULL; gboolean rc = TRUE; gboolean no_wait = FALSE; gboolean is_local = FALSE; char *counter = NULL; const char *task = NULL; const char *value = NULL; const char *on_node = NULL; const char *router_node = NULL; const char *task_uuid = NULL; CRM_ASSERT(action != NULL); CRM_ASSERT(action->xml != NULL); pcmk__clear_graph_action_flags(action, pcmk__graph_action_executed); on_node = crm_element_value(action->xml, PCMK__META_ON_NODE); CRM_CHECK(!pcmk__str_empty(on_node), crm_err("Corrupted command(id=%s) %s: no node", ID(action->xml), pcmk__s(task, "without task")); return pcmk_rc_node_unknown); rsc_op = action->xml; task = crm_element_value(rsc_op, PCMK_XA_OPERATION); task_uuid = crm_element_value(action->xml, PCMK__XA_OPERATION_KEY); router_node = crm_element_value(rsc_op, PCMK__XA_ROUTER_NODE); if (!router_node) { router_node = on_node; } counter = pcmk__transition_key(controld_globals.transition_graph->id, action->id, get_target_rc(action), controld_globals.te_uuid); crm_xml_add(rsc_op, PCMK__XA_TRANSITION_KEY, counter); if (pcmk__str_eq(router_node, controld_globals.our_nodename, pcmk__str_casei)) { is_local = TRUE; } value = crm_meta_value(action->params, PCMK__META_OP_NO_WAIT); if (crm_is_true(value)) { no_wait = TRUE; } crm_notice("Initiating %s operation %s%s on %s%s "CRM_XS" action %d", task, task_uuid, (is_local? " locally" : ""), on_node, (no_wait? " without waiting" : ""), action->id); cmd = create_request(CRM_OP_INVOKE_LRM, rsc_op, router_node, CRM_SYSTEM_LRMD, CRM_SYSTEM_TENGINE, NULL); if (is_local) { /* shortcut local resource commands */ ha_msg_input_t data = { .msg = cmd, .xml = rsc_op, }; fsa_data_t msg = { .id = 0, .data = &data, .data_type = fsa_dt_ha_msg, .fsa_input = I_NULL, .fsa_cause = C_FSA_INTERNAL, .actions = A_LRM_INVOKE, .origin = __func__, }; do_lrm_invoke(A_LRM_INVOKE, C_FSA_INTERNAL, controld_globals.fsa_state, I_NULL, &msg); } else { rc = send_cluster_message(pcmk__get_node(0, router_node, NULL, pcmk__node_search_cluster), crm_msg_lrmd, cmd, TRUE); } free(counter); free_xml(cmd); pcmk__set_graph_action_flags(action, pcmk__graph_action_executed); if (rc == FALSE) { crm_err("Action %d failed: send", action->id); return ECOMM; } else if (no_wait) { /* Just mark confirmed. Don't bump the job count only to immediately * decrement it. */ crm_info("Action %d confirmed - no wait", action->id); pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed); pcmk__update_graph(controld_globals.transition_graph, action); trigger_graph(); } else if (pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) { crm_debug("Action %d: %s %s on %s(timeout %dms) was already confirmed.", action->id, task, task_uuid, on_node, action->timeout); } else { if (action->timeout <= 0) { crm_err("Action %d: %s %s on %s had an invalid timeout (%dms). Using %ums instead", action->id, task, task_uuid, on_node, action->timeout, graph->network_delay); action->timeout = (int) graph->network_delay; } te_update_job_count(action, 1); te_start_action_timer(graph, action); } return pcmk_rc_ok; } struct te_peer_s { char *name; int jobs; int migrate_jobs; }; static void te_peer_free(gpointer p) { struct te_peer_s *peer = p; free(peer->name); free(peer); } void te_reset_job_counts(void) { GHashTableIter iter; struct te_peer_s *peer = NULL; if(te_targets == NULL) { te_targets = pcmk__strkey_table(NULL, te_peer_free); } g_hash_table_iter_init(&iter, te_targets); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & peer)) { peer->jobs = 0; peer->migrate_jobs = 0; } } static void te_update_job_count_on(const char *target, int offset, bool migrate) { struct te_peer_s *r = NULL; if(target == NULL || te_targets == NULL) { return; } r = g_hash_table_lookup(te_targets, target); if(r == NULL) { r = calloc(1, sizeof(struct te_peer_s)); r->name = strdup(target); g_hash_table_insert(te_targets, r->name, r); } r->jobs += offset; if(migrate) { r->migrate_jobs += offset; } crm_trace("jobs[%s] = %d", target, r->jobs); } static void te_update_job_count(pcmk__graph_action_t *action, int offset) { const char *task = crm_element_value(action->xml, PCMK_XA_OPERATION); const char *target = crm_element_value(action->xml, PCMK__META_ON_NODE); if ((action->type != pcmk__rsc_graph_action) || (target == NULL)) { /* No limit on these */ return; } /* if we have a router node, this means the action is performing * on a remote node. For now, we count all actions occurring on a * remote node against the job list on the cluster node hosting * the connection resources */ target = crm_element_value(action->xml, PCMK__XA_ROUTER_NODE); if ((target == NULL) && pcmk__strcase_any_of(task, PCMK_ACTION_MIGRATE_TO, PCMK_ACTION_MIGRATE_FROM, NULL)) { const char *t1 = crm_meta_value(action->params, PCMK__META_MIGRATE_SOURCE); const char *t2 = crm_meta_value(action->params, PCMK__META_MIGRATE_TARGET); te_update_job_count_on(t1, offset, TRUE); te_update_job_count_on(t2, offset, TRUE); return; } else if (target == NULL) { target = crm_element_value(action->xml, PCMK__META_ON_NODE); } te_update_job_count_on(target, offset, FALSE); } /*! * \internal * \brief Check whether a graph action is allowed to be executed on a node * * \param[in] graph Transition graph being executed * \param[in] action Graph action being executed * \param[in] target Name of node where action should be executed * * \return true if action is allowed, otherwise false */ static bool allowed_on_node(const pcmk__graph_t *graph, const pcmk__graph_action_t *action, const char *target) { int limit = 0; struct te_peer_s *r = NULL; const char *task = crm_element_value(action->xml, PCMK_XA_OPERATION); const char *id = crm_element_value(action->xml, PCMK__XA_OPERATION_KEY); if(target == NULL) { /* No limit on these */ return true; } else if(te_targets == NULL) { return false; } r = g_hash_table_lookup(te_targets, target); limit = throttle_get_job_limit(target); if(r == NULL) { r = calloc(1, sizeof(struct te_peer_s)); r->name = strdup(target); g_hash_table_insert(te_targets, r->name, r); } if(limit <= r->jobs) { crm_trace("Peer %s is over their job limit of %d (%d): deferring %s", target, limit, r->jobs, id); return false; } else if(graph->migration_limit > 0 && r->migrate_jobs >= graph->migration_limit) { if (pcmk__strcase_any_of(task, PCMK_ACTION_MIGRATE_TO, PCMK_ACTION_MIGRATE_FROM, NULL)) { crm_trace("Peer %s is over their migration job limit of %d (%d): deferring %s", target, graph->migration_limit, r->migrate_jobs, id); return false; } } crm_trace("Peer %s has not hit their limit yet. current jobs = %d limit= %d limit", target, r->jobs, limit); return true; } /*! * \internal * \brief Check whether a graph action is allowed to be executed * * \param[in] graph Transition graph being executed * \param[in] action Graph action being executed * * \return true if action is allowed, otherwise false */ static bool graph_action_allowed(pcmk__graph_t *graph, pcmk__graph_action_t *action) { const char *target = NULL; const char *task = crm_element_value(action->xml, PCMK_XA_OPERATION); if (action->type != pcmk__rsc_graph_action) { /* No limit on these */ return true; } /* if we have a router node, this means the action is performing * on a remote node. For now, we count all actions occurring on a * remote node against the job list on the cluster node hosting * the connection resources */ target = crm_element_value(action->xml, PCMK__XA_ROUTER_NODE); if ((target == NULL) && pcmk__strcase_any_of(task, PCMK_ACTION_MIGRATE_TO, PCMK_ACTION_MIGRATE_FROM, NULL)) { target = crm_meta_value(action->params, PCMK__META_MIGRATE_SOURCE); if (!allowed_on_node(graph, action, target)) { return false; } target = crm_meta_value(action->params, PCMK__META_MIGRATE_TARGET); } else if (target == NULL) { target = crm_element_value(action->xml, PCMK__META_ON_NODE); } return allowed_on_node(graph, action, target); } /*! * \brief Confirm a graph action (and optionally update graph) * * \param[in,out] action Action to confirm * \param[in,out] graph Update and trigger this graph (if non-NULL) */ void te_action_confirmed(pcmk__graph_action_t *action, pcmk__graph_t *graph) { if (!pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) { if ((action->type == pcmk__rsc_graph_action) && (crm_element_value(action->xml, PCMK__META_ON_NODE) != NULL)) { te_update_job_count(action, -1); } pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed); } if (graph) { pcmk__update_graph(graph, action); trigger_graph(); } } static pcmk__graph_functions_t te_graph_fns = { execute_pseudo_action, execute_rsc_action, execute_cluster_action, controld_execute_fence_action, graph_action_allowed, }; /* * \internal * \brief Register the transitioner's graph functions with \p libpacemaker */ void controld_register_graph_functions(void) { pcmk__set_graph_functions(&te_graph_fns); } void notify_crmd(pcmk__graph_t *graph) { const char *type = "unknown"; enum crmd_fsa_input event = I_NULL; crm_debug("Processing transition completion in state %s", fsa_state2string(controld_globals.fsa_state)); CRM_CHECK(graph->complete, graph->complete = true); switch (graph->completion_action) { case pcmk__graph_wait: type = "stop"; if (controld_globals.fsa_state == S_TRANSITION_ENGINE) { event = I_TE_SUCCESS; } break; case pcmk__graph_done: type = "done"; if (controld_globals.fsa_state == S_TRANSITION_ENGINE) { event = I_TE_SUCCESS; } break; case pcmk__graph_restart: type = "restart"; if (controld_globals.fsa_state == S_TRANSITION_ENGINE) { if (controld_get_period_transition_timer() > 0) { controld_stop_transition_timer(); controld_start_transition_timer(); } else { event = I_PE_CALC; } } else if (controld_globals.fsa_state == S_POLICY_ENGINE) { controld_set_fsa_action_flags(A_PE_INVOKE); controld_trigger_fsa(); } break; case pcmk__graph_shutdown: type = "shutdown"; if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { event = I_STOP; } else { crm_err("We didn't ask to be shut down, yet the scheduler is telling us to"); event = I_TERMINATE; } } crm_debug("Transition %d status: %s - %s", graph->id, type, pcmk__s(graph->abort_reason, "unspecified reason")); graph->abort_reason = NULL; graph->completion_action = pcmk__graph_done; if (event != I_NULL) { register_fsa_input(C_FSA_INTERNAL, event, NULL); } else { controld_trigger_fsa(); } } diff --git a/daemons/controld/controld_te_callbacks.c b/daemons/controld/controld_te_callbacks.c index 98bcaacfdf..e094117289 100644 --- a/daemons/controld/controld_te_callbacks.c +++ b/daemons/controld/controld_te_callbacks.c @@ -1,697 +1,697 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include /* For ONLINESTATUS etc */ #include void te_update_confirm(const char *event, xmlNode * msg); #define RSC_OP_PREFIX "//" PCMK__XE_DIFF_ADDED "//" PCMK_XE_CIB \ "//" XML_LRM_TAG_RSC_OP "[@" PCMK_XA_ID "='" // An explicit PCMK_OPT_SHUTDOWN_LOCK of 0 means the lock has been cleared static bool shutdown_lock_cleared(xmlNode *lrm_resource) { time_t shutdown_lock = 0; return (crm_element_value_epoch(lrm_resource, PCMK_OPT_SHUTDOWN_LOCK, &shutdown_lock) == pcmk_ok) && (shutdown_lock == 0); } static void te_update_diff_v1(const char *event, xmlNode *diff) { int lpc, max; xmlXPathObject *xpathObj = NULL; GString *rsc_op_xpath = NULL; CRM_CHECK(diff != NULL, return); pcmk__output_set_log_level(controld_globals.logger_out, LOG_TRACE); controld_globals.logger_out->message(controld_globals.logger_out, "xml-patchset", diff); if (cib__config_changed_v1(NULL, NULL, &diff)) { abort_transition(INFINITY, pcmk__graph_restart, "Non-status change", diff); goto bail; /* configuration changed */ } /* Tickets Attributes - Added/Updated */ xpathObj = xpath_search(diff, "//" F_CIB_UPDATE_RESULT "//" PCMK__XE_DIFF_ADDED "//" XML_CIB_TAG_TICKETS); if (numXpathResults(xpathObj) > 0) { xmlNode *aborted = getXpathResult(xpathObj, 0); abort_transition(INFINITY, pcmk__graph_restart, "Ticket attribute: update", aborted); goto bail; } freeXpathObject(xpathObj); /* Tickets Attributes - Removed */ xpathObj = xpath_search(diff, "//" F_CIB_UPDATE_RESULT "//" PCMK__XE_DIFF_REMOVED "//" XML_CIB_TAG_TICKETS); if (numXpathResults(xpathObj) > 0) { xmlNode *aborted = getXpathResult(xpathObj, 0); abort_transition(INFINITY, pcmk__graph_restart, "Ticket attribute: removal", aborted); goto bail; } freeXpathObject(xpathObj); /* Transient Attributes - Removed */ xpathObj = xpath_search(diff, "//" F_CIB_UPDATE_RESULT "//" PCMK__XE_DIFF_REMOVED "//" XML_TAG_TRANSIENT_NODEATTRS); if (numXpathResults(xpathObj) > 0) { xmlNode *aborted = getXpathResult(xpathObj, 0); abort_transition(INFINITY, pcmk__graph_restart, "Transient attribute: removal", aborted); goto bail; } freeXpathObject(xpathObj); // Check for lrm_resource entries xpathObj = xpath_search(diff, "//" F_CIB_UPDATE_RESULT "//" PCMK__XE_DIFF_ADDED "//" XML_LRM_TAG_RESOURCE); max = numXpathResults(xpathObj); /* * Updates by, or in response to, graph actions will never affect more than * one resource at a time, so such updates indicate an LRM refresh. In that * case, start a new transition rather than check each result individually, * which can result in _huge_ speedups in large clusters. * * Unfortunately, we can only do so when there are no pending actions. * Otherwise, we could mistakenly throw away those results here, and * the cluster will stall waiting for them and time out the operation. */ if ((controld_globals.transition_graph->pending == 0) && (max > 1)) { crm_debug("Ignoring resource operation updates due to history refresh of %d resources", max); crm_log_xml_trace(diff, "lrm-refresh"); abort_transition(INFINITY, pcmk__graph_restart, "History refresh", NULL); goto bail; } if (max == 1) { xmlNode *lrm_resource = getXpathResult(xpathObj, 0); if (shutdown_lock_cleared(lrm_resource)) { // @TODO would be more efficient to abort once after transition done abort_transition(INFINITY, pcmk__graph_restart, "Shutdown lock cleared", lrm_resource); // Still process results, so we stop timers and update failcounts } } freeXpathObject(xpathObj); /* Process operation updates */ xpathObj = xpath_search(diff, "//" F_CIB_UPDATE_RESULT "//" PCMK__XE_DIFF_ADDED "//" XML_LRM_TAG_RSC_OP); max = numXpathResults(xpathObj); if (max > 0) { int lpc = 0; for (lpc = 0; lpc < max; lpc++) { xmlNode *rsc_op = getXpathResult(xpathObj, lpc); const char *node = get_node_id(rsc_op); process_graph_event(rsc_op, node); } } freeXpathObject(xpathObj); /* Detect deleted (as opposed to replaced or added) actions - eg. crm_resource -C */ xpathObj = xpath_search(diff, "//" PCMK__XE_DIFF_REMOVED "//" XML_LRM_TAG_RSC_OP); max = numXpathResults(xpathObj); for (lpc = 0; lpc < max; lpc++) { const char *op_id = NULL; xmlXPathObject *op_match = NULL; xmlNode *match = getXpathResult(xpathObj, lpc); CRM_LOG_ASSERT(match != NULL); if(match == NULL) { continue; }; op_id = ID(match); if (rsc_op_xpath == NULL) { rsc_op_xpath = g_string_new(RSC_OP_PREFIX); } else { g_string_truncate(rsc_op_xpath, sizeof(RSC_OP_PREFIX) - 1); } pcmk__g_strcat(rsc_op_xpath, op_id, "']", NULL); op_match = xpath_search(diff, (const char *) rsc_op_xpath->str); if (numXpathResults(op_match) == 0) { /* Prevent false positives by matching cancelations too */ const char *node = get_node_id(match); pcmk__graph_action_t *cancelled = get_cancel_action(op_id, node); if (cancelled == NULL) { crm_debug("No match for deleted action %s (%s on %s)", (const char *) rsc_op_xpath->str, op_id, node); abort_transition(INFINITY, pcmk__graph_restart, "Resource op removal", match); freeXpathObject(op_match); goto bail; } else { crm_debug("Deleted lrm_rsc_op %s on %s was for graph event %d", op_id, node, cancelled->id); } } freeXpathObject(op_match); } bail: freeXpathObject(xpathObj); if (rsc_op_xpath != NULL) { g_string_free(rsc_op_xpath, TRUE); } } static void process_lrm_resource_diff(xmlNode *lrm_resource, const char *node) { for (xmlNode *rsc_op = pcmk__xml_first_child(lrm_resource); rsc_op != NULL; rsc_op = pcmk__xml_next(rsc_op)) { process_graph_event(rsc_op, node); } if (shutdown_lock_cleared(lrm_resource)) { // @TODO would be more efficient to abort once after transition done abort_transition(INFINITY, pcmk__graph_restart, "Shutdown lock cleared", lrm_resource); } } static void process_resource_updates(const char *node, xmlNode *xml, xmlNode *change, const char *op, const char *xpath) { xmlNode *rsc = NULL; if (xml == NULL) { return; } - if (pcmk__xe_is(xml, XML_CIB_TAG_LRM)) { + if (pcmk__xe_is(xml, PCMK__XE_LRM)) { xml = first_named_child(xml, XML_LRM_TAG_RESOURCES); CRM_CHECK(xml != NULL, return); } CRM_CHECK(pcmk__xe_is(xml, XML_LRM_TAG_RESOURCES), return); /* * Updates by, or in response to, TE actions will never contain updates * for more than one resource at a time, so such updates indicate an * LRM refresh. * * In that case, start a new transition rather than check each result * individually, which can result in _huge_ speedups in large clusters. * * Unfortunately, we can only do so when there are no pending actions. * Otherwise, we could mistakenly throw away those results here, and * the cluster will stall waiting for them and time out the operation. */ if ((controld_globals.transition_graph->pending == 0) && (xml->children != NULL) && (xml->children->next != NULL)) { crm_log_xml_trace(change, "lrm-refresh"); abort_transition(INFINITY, pcmk__graph_restart, "History refresh", NULL); return; } for (rsc = pcmk__xml_first_child(xml); rsc != NULL; rsc = pcmk__xml_next(rsc)) { crm_trace("Processing %s", ID(rsc)); process_lrm_resource_diff(rsc, node); } } static char *extract_node_uuid(const char *xpath) { char *mutable_path = strdup(xpath); char *node_uuid = NULL; char *search = NULL; char *match = NULL; match = strstr(mutable_path, PCMK__XE_NODE_STATE "[@" PCMK_XA_ID "=\'"); if (match == NULL) { free(mutable_path); return NULL; } match += strlen(PCMK__XE_NODE_STATE "[@" PCMK_XA_ID "=\'"); search = strchr(match, '\''); if (search == NULL) { free(mutable_path); return NULL; } search[0] = 0; node_uuid = strdup(match); free(mutable_path); return node_uuid; } static void abort_unless_down(const char *xpath, const char *op, xmlNode *change, const char *reason) { char *node_uuid = NULL; pcmk__graph_action_t *down = NULL; if(!pcmk__str_eq(op, "delete", pcmk__str_casei)) { abort_transition(INFINITY, pcmk__graph_restart, reason, change); return; } node_uuid = extract_node_uuid(xpath); if(node_uuid == NULL) { crm_err("Could not extract node ID from %s", xpath); abort_transition(INFINITY, pcmk__graph_restart, reason, change); return; } down = match_down_event(node_uuid); if (down == NULL) { crm_trace("Not expecting %s to be down (%s)", node_uuid, xpath); abort_transition(INFINITY, pcmk__graph_restart, reason, change); } else { crm_trace("Expecting changes to %s (%s)", node_uuid, xpath); } free(node_uuid); } static void process_op_deletion(const char *xpath, xmlNode *change) { char *mutable_key = strdup(xpath); char *key; char *node_uuid; // Extract the part of xpath between last pair of single quotes key = strrchr(mutable_key, '\''); if (key != NULL) { *key = '\0'; key = strrchr(mutable_key, '\''); } if (key == NULL) { crm_warn("Ignoring malformed CIB update (resource deletion of %s)", xpath); free(mutable_key); return; } ++key; node_uuid = extract_node_uuid(xpath); if (confirm_cancel_action(key, node_uuid) == FALSE) { abort_transition(INFINITY, pcmk__graph_restart, "Resource operation removal", change); } free(mutable_key); free(node_uuid); } static void process_delete_diff(const char *xpath, const char *op, xmlNode *change) { if (strstr(xpath, "/" XML_LRM_TAG_RSC_OP "[")) { process_op_deletion(xpath, change); - } else if (strstr(xpath, "/" XML_CIB_TAG_LRM "[")) { + } else if (strstr(xpath, "/" PCMK__XE_LRM "[")) { abort_unless_down(xpath, op, change, "Resource state removal"); } else if (strstr(xpath, "/" PCMK__XE_NODE_STATE "[")) { abort_unless_down(xpath, op, change, "Node state removal"); } else { crm_trace("Ignoring delete of %s", xpath); } } static void process_node_state_diff(xmlNode *state, xmlNode *change, const char *op, const char *xpath) { - xmlNode *lrm = first_named_child(state, XML_CIB_TAG_LRM); + xmlNode *lrm = first_named_child(state, PCMK__XE_LRM); process_resource_updates(ID(state), lrm, change, op, xpath); } static void process_status_diff(xmlNode *status, xmlNode *change, const char *op, const char *xpath) { for (xmlNode *state = pcmk__xml_first_child(status); state != NULL; state = pcmk__xml_next(state)) { process_node_state_diff(state, change, op, xpath); } } static void process_cib_diff(xmlNode *cib, xmlNode *change, const char *op, const char *xpath) { xmlNode *status = first_named_child(cib, PCMK_XE_STATUS); xmlNode *config = first_named_child(cib, PCMK_XE_CONFIGURATION); if (status) { process_status_diff(status, change, op, xpath); } if (config) { abort_transition(INFINITY, pcmk__graph_restart, "Non-status-only change", change); } } static void te_update_diff_v2(xmlNode *diff) { crm_log_xml_trace(diff, "Patch:Raw"); for (xmlNode *change = pcmk__xml_first_child(diff); change != NULL; change = pcmk__xml_next(change)) { xmlNode *match = NULL; const char *name = NULL; const char *xpath = crm_element_value(change, PCMK_XA_PATH); // Possible ops: create, modify, delete, move const char *op = crm_element_value(change, PCMK_XA_OPERATION); // Ignore uninteresting updates if (op == NULL) { continue; } else if (xpath == NULL) { crm_trace("Ignoring %s change for version field", op); continue; } else if ((strcmp(op, "move") == 0) && (strstr(xpath, "/" PCMK_XE_CIB "/" PCMK_XE_CONFIGURATION "/" PCMK_XE_RESOURCES) == NULL)) { /* We still need to consider moves within the resources section, * since they affect placement order. */ crm_trace("Ignoring move change at %s", xpath); continue; } // Find the result of create/modify ops if (strcmp(op, "create") == 0) { match = change->children; } else if (strcmp(op, "modify") == 0) { match = first_named_child(change, XML_DIFF_RESULT); if(match) { match = match->children; } } else if (!pcmk__str_any_of(op, "delete", "move", NULL)) { crm_warn("Ignoring malformed CIB update (%s operation on %s is unrecognized)", op, xpath); continue; } if (match) { if (match->type == XML_COMMENT_NODE) { crm_trace("Ignoring %s operation for comment at %s", op, xpath); continue; } name = (const char *)match->name; } crm_trace("Handling %s operation for %s%s%s", op, (xpath? xpath : "CIB"), (name? " matched by " : ""), (name? name : "")); if (strstr(xpath, "/" PCMK_XE_CIB "/" PCMK_XE_CONFIGURATION)) { abort_transition(INFINITY, pcmk__graph_restart, "Configuration change", change); break; // Won't be packaged with operation results we may be waiting for } else if (strstr(xpath, "/" XML_CIB_TAG_TICKETS) || pcmk__str_eq(name, XML_CIB_TAG_TICKETS, pcmk__str_none)) { abort_transition(INFINITY, pcmk__graph_restart, "Ticket attribute change", change); break; // Won't be packaged with operation results we may be waiting for } else if (strstr(xpath, "/" XML_TAG_TRANSIENT_NODEATTRS "[") || pcmk__str_eq(name, XML_TAG_TRANSIENT_NODEATTRS, pcmk__str_none)) { abort_unless_down(xpath, op, change, "Transient attribute change"); break; // Won't be packaged with operation results we may be waiting for } else if (strcmp(op, "delete") == 0) { process_delete_diff(xpath, op, change); } else if (name == NULL) { crm_warn("Ignoring malformed CIB update (%s at %s has no result)", op, xpath); } else if (strcmp(name, PCMK_XE_CIB) == 0) { process_cib_diff(match, change, op, xpath); } else if (strcmp(name, PCMK_XE_STATUS) == 0) { process_status_diff(match, change, op, xpath); } else if (strcmp(name, PCMK__XE_NODE_STATE) == 0) { process_node_state_diff(match, change, op, xpath); - } else if (strcmp(name, XML_CIB_TAG_LRM) == 0) { + } else if (strcmp(name, PCMK__XE_LRM) == 0) { process_resource_updates(ID(match), match, change, op, xpath); } else if (strcmp(name, XML_LRM_TAG_RESOURCES) == 0) { - char *local_node = pcmk__xpath_node_id(xpath, "lrm"); + char *local_node = pcmk__xpath_node_id(xpath, PCMK__XE_LRM); process_resource_updates(local_node, match, change, op, xpath); free(local_node); } else if (strcmp(name, XML_LRM_TAG_RESOURCE) == 0) { - char *local_node = pcmk__xpath_node_id(xpath, "lrm"); + char *local_node = pcmk__xpath_node_id(xpath, PCMK__XE_LRM); process_lrm_resource_diff(match, local_node); free(local_node); } else if (strcmp(name, XML_LRM_TAG_RSC_OP) == 0) { - char *local_node = pcmk__xpath_node_id(xpath, "lrm"); + char *local_node = pcmk__xpath_node_id(xpath, PCMK__XE_LRM); process_graph_event(match, local_node); free(local_node); } else { crm_warn("Ignoring malformed CIB update (%s at %s has unrecognized result %s)", op, xpath, name); } } } void te_update_diff(const char *event, xmlNode * msg) { xmlNode *diff = NULL; const char *op = NULL; int rc = -EINVAL; int format = 1; int p_add[] = { 0, 0, 0 }; int p_del[] = { 0, 0, 0 }; CRM_CHECK(msg != NULL, return); crm_element_value_int(msg, F_CIB_RC, &rc); if (controld_globals.transition_graph == NULL) { crm_trace("No graph"); return; } else if (rc < pcmk_ok) { crm_trace("Filter rc=%d (%s)", rc, pcmk_strerror(rc)); return; } else if (controld_globals.transition_graph->complete && (controld_globals.fsa_state != S_IDLE) && (controld_globals.fsa_state != S_TRANSITION_ENGINE) && (controld_globals.fsa_state != S_POLICY_ENGINE)) { crm_trace("Filter state=%s (complete)", fsa_state2string(controld_globals.fsa_state)); return; } op = crm_element_value(msg, F_CIB_OPERATION); diff = get_message_xml(msg, F_CIB_UPDATE_RESULT); xml_patch_versions(diff, p_add, p_del); crm_debug("Processing (%s) diff: %d.%d.%d -> %d.%d.%d (%s)", op, p_del[0], p_del[1], p_del[2], p_add[0], p_add[1], p_add[2], fsa_state2string(controld_globals.fsa_state)); crm_element_value_int(diff, PCMK_XA_FORMAT, &format); switch (format) { case 1: te_update_diff_v1(event, diff); break; case 2: te_update_diff_v2(diff); break; default: crm_warn("Ignoring malformed CIB update (unknown patch format %d)", format); } controld_remove_all_outside_events(); } void process_te_message(xmlNode * msg, xmlNode * xml_data) { const char *value = NULL; xmlXPathObject *xpathObj = NULL; int nmatches = 0; CRM_CHECK(msg != NULL, return); // Transition requests must specify transition engine as subsystem value = crm_element_value(msg, PCMK__XA_CRM_SYS_TO); if (pcmk__str_empty(value) || !pcmk__str_eq(value, CRM_SYSTEM_TENGINE, pcmk__str_none)) { crm_info("Received invalid transition request: subsystem '%s' not '" CRM_SYSTEM_TENGINE "'", pcmk__s(value, "")); return; } // Only the lrm_invoke command is supported as a transition request value = crm_element_value(msg, PCMK__XA_CRM_TASK); if (!pcmk__str_eq(value, CRM_OP_INVOKE_LRM, pcmk__str_none)) { crm_info("Received invalid transition request: command '%s' not '" CRM_OP_INVOKE_LRM "'", pcmk__s(value, "")); return; } // Transition requests must be marked as coming from the executor value = crm_element_value(msg, PCMK__XA_CRM_SYS_FROM); if (!pcmk__str_eq(value, CRM_SYSTEM_LRMD, pcmk__str_none)) { crm_info("Received invalid transition request: from '%s' not '" CRM_SYSTEM_LRMD "'", pcmk__s(value, "")); return; } crm_debug("Processing transition request with ref='%s' origin='%s'", pcmk__s(crm_element_value(msg, PCMK_XA_REFERENCE), ""), pcmk__s(crm_element_value(msg, PCMK__XA_SRC), "")); xpathObj = xpath_search(xml_data, "//" XML_LRM_TAG_RSC_OP); nmatches = numXpathResults(xpathObj); if (nmatches == 0) { crm_err("Received transition request with no results (bug?)"); } else { for (int lpc = 0; lpc < nmatches; lpc++) { xmlNode *rsc_op = getXpathResult(xpathObj, lpc); const char *node = get_node_id(rsc_op); process_graph_event(rsc_op, node); } } freeXpathObject(xpathObj); } void cib_action_updated(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { if (rc < pcmk_ok) { crm_err("Update %d FAILED: %s", call_id, pcmk_strerror(rc)); } } /*! * \brief Handle a timeout in node-to-node communication * * \param[in,out] data Pointer to graph action * * \return FALSE (indicating that source should be not be re-added) */ gboolean action_timer_callback(gpointer data) { pcmk__graph_action_t *action = (pcmk__graph_action_t *) data; const char *task = NULL; const char *on_node = NULL; const char *via_node = NULL; CRM_CHECK(data != NULL, return FALSE); stop_te_timer(action); task = crm_element_value(action->xml, PCMK_XA_OPERATION); on_node = crm_element_value(action->xml, PCMK__META_ON_NODE); via_node = crm_element_value(action->xml, PCMK__XA_ROUTER_NODE); if (controld_globals.transition_graph->complete) { crm_notice("Node %s did not send %s result (via %s) within %dms " "(ignoring because transition not in progress)", (on_node? on_node : ""), (task? task : "unknown action"), (via_node? via_node : "controller"), action->timeout); } else { /* fail the action */ crm_err("Node %s did not send %s result (via %s) within %dms " "(action timeout plus " PCMK_OPT_CLUSTER_DELAY ")", (on_node? on_node : ""), (task? task : "unknown action"), (via_node? via_node : "controller"), (action->timeout + controld_globals.transition_graph->network_delay)); pcmk__log_graph_action(LOG_ERR, action); pcmk__set_graph_action_flags(action, pcmk__graph_action_failed); te_action_confirmed(action, controld_globals.transition_graph); abort_transition(INFINITY, pcmk__graph_restart, "Action lost", NULL); // Record timeout in the CIB if appropriate if ((action->type == pcmk__rsc_graph_action) && controld_action_is_recordable(task)) { controld_record_action_timeout(action); } } return FALSE; } diff --git a/include/crm_internal.h b/include/crm_internal.h index 5c078742d6..c65a84d974 100644 --- a/include/crm_internal.h +++ b/include/crm_internal.h @@ -1,212 +1,213 @@ /* * Copyright 2006-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef CRM_INTERNAL__H # define CRM_INTERNAL__H # ifndef PCMK__CONFIG_H # define PCMK__CONFIG_H # include # endif # include /* Our minimum glib dependency is 2.42. Define that as both the minimum and * maximum glib APIs that are allowed (i.e. APIs that were already deprecated * in 2.42, and APIs introduced after 2.42, cannot be used by Pacemaker code). */ #define GLIB_VERSION_MIN_REQUIRED GLIB_VERSION_2_42 #define GLIB_VERSION_MAX_ALLOWED GLIB_VERSION_2_42 # include # include # include /* Public API headers can guard including deprecated API headers with this * symbol, thus preventing internal code (which includes this header) from using * deprecated APIs, while still allowing external code to use them by default. */ #define PCMK_ALLOW_DEPRECATED 0 # include # include # include # include # include # include # include # include # include # include #define N_(String) (String) #ifdef ENABLE_NLS # define _(String) gettext(String) #else # define _(String) (String) #endif /* * XML element names used only by internal code */ #define PCMK__XE_ATTRIBUTES "attributes" +#define PCMK__XE_LRM "lrm" #define PCMK__XE_NODE_STATE "node_state" #define PCMK__XE_PARAM "param" #define PCMK__XE_PING_RESPONSE "ping_response" // @COMPAT Deprecated since 2.1.7 #define PCMK__XE_DIFF_ADDED "diff-added" // @COMPAT Deprecated since 2.1.7 #define PCMK__XE_DIFF_REMOVED "diff-removed" /* @COMPAT Deprecated since 2.0.0; alias for with PCMK_META_PROMOTABLE * set to "true" */ #define PCMK__XE_PROMOTABLE_LEGACY "master" /* * XML attribute names used only by internal code */ #define PCMK__XA_ATTR_DAMPENING "attr_dampening" #define PCMK__XA_ATTR_FORCE "attrd_is_force_write" #define PCMK__XA_ATTR_INTERVAL "attr_clear_interval" #define PCMK__XA_ATTR_IS_PRIVATE "attr_is_private" #define PCMK__XA_ATTR_IS_REMOTE "attr_is_remote" #define PCMK__XA_ATTR_NAME "attr_name" #define PCMK__XA_ATTR_NODE_ID "attr_host_id" #define PCMK__XA_ATTR_NODE_NAME "attr_host" #define PCMK__XA_ATTR_OPERATION "attr_clear_operation" #define PCMK__XA_ATTR_PATTERN "attr_regex" #define PCMK__XA_ATTR_RESOURCE "attr_resource" #define PCMK__XA_ATTR_SECTION "attr_section" #define PCMK__XA_ATTR_SET "attr_set" #define PCMK__XA_ATTR_SET_TYPE "attr_set_type" #define PCMK__XA_ATTR_SYNC_POINT "attr_sync_point" #define PCMK__XA_ATTR_USER "attr_user" #define PCMK__XA_ATTR_UUID "attr_key" #define PCMK__XA_ATTR_VALUE "attr_value" #define PCMK__XA_ATTR_VERSION "attr_version" #define PCMK__XA_ATTR_WRITER "attr_writer" #define PCMK__XA_CALL_ID "call-id" #define PCMK__XA_CONFIG_ERRORS "config-errors" #define PCMK__XA_CONFIG_WARNINGS "config-warnings" #define PCMK__XA_CONFIRM "confirm" #define PCMK__XA_CONN_HOST "connection_host" #define PCMK__XA_CRMD "crmd" #define PCMK__XA_CRMD_STATE "crmd_state" #define PCMK__XA_CRM_HOST_TO "crm_host_to" #define PCMK__XA_CRM_LIMIT_MAX "crm-limit-max" #define PCMK__XA_CRM_LIMIT_MODE "crm-limit-mode" #define PCMK__XA_CRM_SUBSYSTEM "crm_subsystem" #define PCMK__XA_CRM_SYS_FROM "crm_sys_from" #define PCMK__XA_CRM_SYS_TO "crm_sys_to" #define PCMK__XA_CRM_TASK "crm_task" #define PCMK__XA_CRM_TGRAPH_IN "crm-tgraph-in" #define PCMK__XA_CRM_USER "crm_user" #define PCMK__XA_DC_LEAVING "dc-leaving" #define PCMK__XA_DIGEST "digest" #define PCMK__XA_ELECTION_AGE_SEC "election-age-sec" #define PCMK__XA_ELECTION_AGE_NANO_SEC "election-age-nano-sec" #define PCMK__XA_ELECTION_ID "election-id" #define PCMK__XA_ELECTION_OWNER "election-owner" #define PCMK__XA_EXPECTED "expected" #define PCMK__XA_FILE "file" #define PCMK__XA_GRAPH_ERRORS "graph-errors" #define PCMK__XA_GRAPH_WARNINGS "graph-warnings" #define PCMK__XA_IN_CCM "in_ccm" #define PCMK__XA_JOIN "join" #define PCMK__XA_JOIN_ID "join_id" #define PCMK__XA_LONG_ID "long-id" #define PCMK__XA_MODE "mode" #define PCMK__XA_NODE_START_STATE "node_start_state" #define PCMK__XA_NODE_STATE "node_state" #define PCMK__XA_OPERATION_KEY "operation_key" #define PCMK__XA_OP_DIGEST "op-digest" #define PCMK__XA_OP_FORCE_RESTART "op-force-restart" #define PCMK__XA_OP_RESTART_DIGEST "op-restart-digest" #define PCMK__XA_OP_SECURE_DIGEST "op-secure-digest" #define PCMK__XA_OP_SECURE_PARAMS "op-secure-params" #define PCMK__XA_OP_STATUS "op-status" #define PCMK__XA_PACEMAKERD_STATE "pacemakerd_state" #define PCMK__XA_PRIORITY "priority" #define PCMK__XA_RC_CODE "rc-code" #define PCMK__XA_REAP "reap" /* Actions to be executed on Pacemaker Remote nodes are routed through the * controller on the cluster node hosting the remote connection. That cluster * node is considered the router node for the action. */ #define PCMK__XA_ROUTER_NODE "router_node" #define PCMK__XA_RSC_ID "rsc-id" #define PCMK__XA_SCHEMA "schema" #define PCMK__XA_SCHEMAS "schemas" #define PCMK__XA_SRC "src" #define PCMK__XA_SUBT "subt" // subtype #define PCMK__XA_T "t" // type #define PCMK__XA_TASK "task" #define PCMK__XA_TRANSITION_KEY "transition-key" #define PCMK__XA_TRANSITION_MAGIC "transition-magic" #define PCMK__XA_UPTIME "uptime" // @COMPAT Deprecated since 2.1.5 #define PCMK__XA_FIRST_INSTANCE "first-instance" // @COMPAT Deprecated since 1.1.12 #define PCMK__XA_REF "ref" // @COMPAT Deprecated since 2.1.6 #define PCMK__XA_REPLACE "replace" // @COMPAT Deprecated since 2.1.5 #define PCMK__XA_RSC_INSTANCE "rsc-instance" // @COMPAT Deprecated since 1.1.12 #define PCMK__XA_TAG "tag" // @COMPAT Deprecated since 2.1.5 #define PCMK__XA_THEN_INSTANCE "then-instance" // @COMPAT Deprecated since 2.1.5 #define PCMK__XA_WITH_RSC_INSTANCE "with-rsc-instance" /* * IPC service names that are only used internally */ # define PCMK__SERVER_BASED_RO "cib_ro" # define PCMK__SERVER_BASED_RW "cib_rw" # define PCMK__SERVER_BASED_SHM "cib_shm" /* * IPC commands that can be sent to Pacemaker daemons */ #define PCMK__ATTRD_CMD_PEER_REMOVE "peer-remove" #define PCMK__ATTRD_CMD_UPDATE "update" #define PCMK__ATTRD_CMD_UPDATE_BOTH "update-both" #define PCMK__ATTRD_CMD_UPDATE_DELAY "update-delay" #define PCMK__ATTRD_CMD_QUERY "query" #define PCMK__ATTRD_CMD_REFRESH "refresh" #define PCMK__ATTRD_CMD_FLUSH "flush" #define PCMK__ATTRD_CMD_SYNC "sync" #define PCMK__ATTRD_CMD_SYNC_RESPONSE "sync-response" #define PCMK__ATTRD_CMD_CLEAR_FAILURE "clear-failure" #define PCMK__ATTRD_CMD_CONFIRM "confirm" #define PCMK__CONTROLD_CMD_NODES "list-nodes" #endif /* CRM_INTERNAL__H */ diff --git a/lib/common/tests/xpath/pcmk__xpath_node_id_test.c b/lib/common/tests/xpath/pcmk__xpath_node_id_test.c index cd88da8c9d..e50d6b019d 100644 --- a/lib/common/tests/xpath/pcmk__xpath_node_id_test.c +++ b/lib/common/tests/xpath/pcmk__xpath_node_id_test.c @@ -1,59 +1,60 @@ /* * Copyright 2021-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include static void empty_input(void **state) { - assert_null(pcmk__xpath_node_id(NULL, "lrm")); - assert_null(pcmk__xpath_node_id("", "lrm")); + assert_null(pcmk__xpath_node_id(NULL, PCMK__XE_LRM)); + assert_null(pcmk__xpath_node_id("", PCMK__XE_LRM)); assert_null(pcmk__xpath_node_id("/blah/blah", NULL)); assert_null(pcmk__xpath_node_id("/blah/blah", "")); assert_null(pcmk__xpath_node_id(NULL, NULL)); } static void no_quotes(void **state) { - const char *xpath = "/some/xpath/lrm[@" PCMK_XA_ID "=xyz]"; - pcmk__assert_asserts(pcmk__xpath_node_id(xpath, "lrm")); + const char *xpath = "/some/xpath/" PCMK__XE_LRM "[@" PCMK_XA_ID "=xyz]"; + pcmk__assert_asserts(pcmk__xpath_node_id(xpath, PCMK__XE_LRM)); } static void not_present(void **state) { const char *xpath = "/some/xpath/string[@" PCMK_XA_ID "='xyz']"; - assert_null(pcmk__xpath_node_id(xpath, "lrm")); + assert_null(pcmk__xpath_node_id(xpath, PCMK__XE_LRM)); - xpath = "/some/xpath/containing[@" PCMK_XA_ID "='lrm']"; - assert_null(pcmk__xpath_node_id(xpath, "lrm")); + xpath = "/some/xpath/containing[@" PCMK_XA_ID "='" PCMK__XE_LRM "']"; + assert_null(pcmk__xpath_node_id(xpath, PCMK__XE_LRM)); } static void present(void **state) { char *s = NULL; - const char *xpath = "/some/xpath/containing/lrm[@" PCMK_XA_ID "='xyz']"; + const char *xpath = "/some/xpath/containing" + "/" PCMK__XE_LRM "[@" PCMK_XA_ID "='xyz']"; - s = pcmk__xpath_node_id(xpath, "lrm"); + s = pcmk__xpath_node_id(xpath, PCMK__XE_LRM); assert_int_equal(strcmp(s, "xyz"), 0); free(s); - xpath = "/some/other/lrm[@" PCMK_XA_ID "='xyz']/xpath"; - s = pcmk__xpath_node_id(xpath, "lrm"); + xpath = "/some/other/" PCMK__XE_LRM "[@" PCMK_XA_ID "='xyz']/xpath"; + s = pcmk__xpath_node_id(xpath, PCMK__XE_LRM); assert_int_equal(strcmp(s, "xyz"), 0); free(s); } PCMK__UNIT_TEST(NULL, NULL, cmocka_unit_test(empty_input), cmocka_unit_test(no_quotes), cmocka_unit_test(not_present), cmocka_unit_test(present)) diff --git a/lib/pacemaker/pcmk_injections.c b/lib/pacemaker/pcmk_injections.c index fbf1ba379c..0a48efb138 100644 --- a/lib/pacemaker/pcmk_injections.c +++ b/lib/pacemaker/pcmk_injections.c @@ -1,795 +1,795 @@ /* * Copyright 2009-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include // lrmd_event_data_t, etc. #include #include #include #include "libpacemaker_private.h" bool pcmk__simulate_node_config = false; #define XPATH_NODE_CONFIG "//" PCMK_XE_NODE "[@" PCMK_XA_UNAME "='%s']" #define XPATH_NODE_STATE "//" PCMK__XE_NODE_STATE "[@" PCMK_XA_UNAME "='%s']" #define XPATH_NODE_STATE_BY_ID "//" PCMK__XE_NODE_STATE "[@" PCMK_XA_ID "='%s']" #define XPATH_RSC_HISTORY XPATH_NODE_STATE \ "//" XML_LRM_TAG_RESOURCE "[@" PCMK_XA_ID "='%s']" /*! * \internal * \brief Inject a fictitious transient node attribute into scheduler input * * \param[in,out] out Output object for displaying error messages * \param[in,out] cib_node \c PCMK__XE_NODE_STATE XML to inject attribute into * \param[in] name Transient node attribute name to inject * \param[in] value Transient node attribute value to inject */ static void inject_transient_attr(pcmk__output_t *out, xmlNode *cib_node, const char *name, const char *value) { xmlNode *attrs = NULL; xmlNode *instance_attrs = NULL; const char *node_uuid = ID(cib_node); out->message(out, "inject-attr", name, value, cib_node); attrs = first_named_child(cib_node, XML_TAG_TRANSIENT_NODEATTRS); if (attrs == NULL) { attrs = create_xml_node(cib_node, XML_TAG_TRANSIENT_NODEATTRS); crm_xml_add(attrs, PCMK_XA_ID, node_uuid); } instance_attrs = first_named_child(attrs, PCMK_XE_INSTANCE_ATTRIBUTES); if (instance_attrs == NULL) { instance_attrs = create_xml_node(attrs, PCMK_XE_INSTANCE_ATTRIBUTES); crm_xml_add(instance_attrs, PCMK_XA_ID, node_uuid); } crm_create_nvpair_xml(instance_attrs, NULL, name, value); } /*! * \internal * \brief Inject a fictitious fail count into a scheduler input * * \param[in,out] out Output object for displaying error messages * \param[in,out] cib_node Node state XML to inject into * \param[in] resource ID of resource for fail count to inject * \param[in] task Action name for fail count to inject * \param[in] interval_ms Action interval (in milliseconds) for fail count * \param[in] rc Action result for fail count to inject (if 0, or * 7 when interval_ms is 0, inject nothing) */ void pcmk__inject_failcount(pcmk__output_t *out, xmlNode *cib_node, const char *resource, const char *task, guint interval_ms, int rc) { if (rc == 0) { return; } else if ((rc == 7) && (interval_ms == 0)) { return; } else { char *name = NULL; char *now = pcmk__ttoa(time(NULL)); name = pcmk__failcount_name(resource, task, interval_ms); inject_transient_attr(out, cib_node, name, "value++"); free(name); name = pcmk__lastfailure_name(resource, task, interval_ms); inject_transient_attr(out, cib_node, name, now); free(name); free(now); } } /*! * \internal * \brief Create a CIB configuration entry for a fictitious node * * \param[in,out] cib_conn CIB object to use * \param[in] node Node name to use */ static void create_node_entry(cib_t *cib_conn, const char *node) { int rc = pcmk_ok; char *xpath = crm_strdup_printf(XPATH_NODE_CONFIG, node); rc = cib_conn->cmds->query(cib_conn, xpath, NULL, cib_xpath|cib_sync_call|cib_scope_local); if (rc == -ENXIO) { // Only add if not already existing xmlNode *cib_object = create_xml_node(NULL, PCMK_XE_NODE); crm_xml_add(cib_object, PCMK_XA_ID, node); // Use node name as ID crm_xml_add(cib_object, PCMK_XA_UNAME, node); cib_conn->cmds->create(cib_conn, PCMK_XE_NODES, cib_object, cib_sync_call|cib_scope_local); /* Not bothering with subsequent query to see if it exists, we'll bomb out later in the call to query_node_uuid()... */ free_xml(cib_object); } free(xpath); } /*! * \internal * \brief Synthesize a fake executor event for an action * * \param[in] cib_resource XML for any existing resource action history * \param[in] task Name of action to synthesize * \param[in] interval_ms Interval of action to synthesize * \param[in] outcome Result of action to synthesize * * \return Newly allocated executor event * \note It is the caller's responsibility to free the result with * lrmd_free_event(). */ static lrmd_event_data_t * create_op(const xmlNode *cib_resource, const char *task, guint interval_ms, int outcome) { lrmd_event_data_t *op = NULL; xmlNode *xop = NULL; op = lrmd_new_event(ID(cib_resource), task, interval_ms); lrmd__set_result(op, outcome, PCMK_EXEC_DONE, "Simulated action result"); op->params = NULL; // Not needed for simulation purposes op->t_run = (unsigned int) time(NULL); op->t_rcchange = op->t_run; // Use a call ID higher than any existing history entries op->call_id = 0; for (xop = pcmk__xe_first_child(cib_resource); xop != NULL; xop = pcmk__xe_next(xop)) { int tmp = 0; crm_element_value_int(xop, PCMK__XA_CALL_ID, &tmp); if (tmp > op->call_id) { op->call_id = tmp; } } op->call_id++; return op; } /*! * \internal * \brief Inject a fictitious resource history entry into a scheduler input * * \param[in,out] cib_resource Resource history XML to inject entry into * \param[in,out] op Action result to inject * \param[in] target_rc Expected result for action to inject * * \return XML of injected resource history entry */ xmlNode * pcmk__inject_action_result(xmlNode *cib_resource, lrmd_event_data_t *op, int target_rc) { return pcmk__create_history_xml(cib_resource, op, CRM_FEATURE_SET, target_rc, NULL, crm_system_name); } /*! * \internal * \brief Inject a fictitious node into a scheduler input * * \param[in,out] cib_conn Scheduler input CIB to inject node into * \param[in] node Name of node to inject * \param[in] uuid UUID of node to inject * * \return XML of \c PCMK__XE_NODE_STATE entry for new node * \note If the global pcmk__simulate_node_config has been set to true, a * node entry in the configuration section will be added, as well as a * node state entry in the status section. */ xmlNode * pcmk__inject_node(cib_t *cib_conn, const char *node, const char *uuid) { int rc = pcmk_ok; xmlNode *cib_object = NULL; char *xpath = crm_strdup_printf(XPATH_NODE_STATE, node); bool duplicate = false; char *found_uuid = NULL; if (pcmk__simulate_node_config) { create_node_entry(cib_conn, node); } rc = cib_conn->cmds->query(cib_conn, xpath, &cib_object, cib_xpath|cib_sync_call|cib_scope_local); if ((cib_object != NULL) && (ID(cib_object) == NULL)) { crm_err("Detected multiple " PCMK__XE_NODE_STATE " entries for " "xpath=%s, bailing", xpath); duplicate = true; goto done; } if (rc == -ENXIO) { if (uuid == NULL) { query_node_uuid(cib_conn, node, &found_uuid, NULL); } else { found_uuid = strdup(uuid); } if (found_uuid) { char *xpath_by_uuid = crm_strdup_printf(XPATH_NODE_STATE_BY_ID, found_uuid); /* It's possible that a PCMK__XE_NODE_STATE entry doesn't have a * PCMK_XA_UNAME yet */ rc = cib_conn->cmds->query(cib_conn, xpath_by_uuid, &cib_object, cib_xpath|cib_sync_call|cib_scope_local); if ((cib_object != NULL) && (ID(cib_object) == NULL)) { crm_err("Can't inject node state for %s because multiple " "state entries found for ID %s", node, found_uuid); duplicate = true; free(xpath_by_uuid); goto done; } else if (cib_object != NULL) { crm_xml_add(cib_object, PCMK_XA_UNAME, node); rc = cib_conn->cmds->modify(cib_conn, PCMK_XE_STATUS, cib_object, cib_sync_call|cib_scope_local); } free(xpath_by_uuid); } } if (rc == -ENXIO) { cib_object = create_xml_node(NULL, PCMK__XE_NODE_STATE); crm_xml_add(cib_object, PCMK_XA_ID, found_uuid); crm_xml_add(cib_object, PCMK_XA_UNAME, node); cib_conn->cmds->create(cib_conn, PCMK_XE_STATUS, cib_object, cib_sync_call|cib_scope_local); free_xml(cib_object); rc = cib_conn->cmds->query(cib_conn, xpath, &cib_object, cib_xpath|cib_sync_call|cib_scope_local); crm_trace("Injecting node state for %s (rc=%d)", node, rc); } done: free(found_uuid); free(xpath); if (duplicate) { crm_log_xml_warn(cib_object, "Duplicates"); crm_exit(CRM_EX_SOFTWARE); return NULL; // not reached, but makes static analysis happy } CRM_ASSERT(rc == pcmk_ok); return cib_object; } /*! * \internal * \brief Inject a fictitious node state change into a scheduler input * * \param[in,out] cib_conn Scheduler input CIB to inject into * \param[in] node Name of node to inject change for * \param[in] up If true, change state to online, otherwise offline * * \return XML of changed (or added) node state entry */ xmlNode * pcmk__inject_node_state_change(cib_t *cib_conn, const char *node, bool up) { xmlNode *cib_node = pcmk__inject_node(cib_conn, node, NULL); if (up) { pcmk__xe_set_props(cib_node, PCMK__XA_IN_CCM, PCMK_VALUE_TRUE, PCMK__XA_CRMD, ONLINESTATUS, PCMK__XA_JOIN, CRMD_JOINSTATE_MEMBER, PCMK__XA_EXPECTED, CRMD_JOINSTATE_MEMBER, NULL); } else { pcmk__xe_set_props(cib_node, PCMK__XA_IN_CCM, PCMK_VALUE_FALSE, PCMK__XA_CRMD, OFFLINESTATUS, PCMK__XA_JOIN, CRMD_JOINSTATE_DOWN, PCMK__XA_EXPECTED, CRMD_JOINSTATE_DOWN, NULL); } crm_xml_add(cib_node, PCMK_XA_CRM_DEBUG_ORIGIN, crm_system_name); return cib_node; } /*! * \internal * \brief Check whether a node has history for a given resource * * \param[in,out] cib_node Node state XML to check * \param[in] resource Resource name to check for * * \return Resource's lrm_resource XML entry beneath \p cib_node if found, * otherwise NULL */ static xmlNode * find_resource_xml(xmlNode *cib_node, const char *resource) { const char *node = crm_element_value(cib_node, PCMK_XA_UNAME); char *xpath = crm_strdup_printf(XPATH_RSC_HISTORY, node, resource); xmlNode *match = get_xpath_object(xpath, cib_node, LOG_TRACE); free(xpath); return match; } /*! * \internal * \brief Inject a resource history element into a scheduler input * * \param[in,out] out Output object for displaying error messages * \param[in,out] cib_node Node state XML to inject resource history entry into * \param[in] resource ID (in configuration) of resource to inject * \param[in] lrm_name ID as used in history (could be clone instance) * \param[in] rclass Resource agent class of resource to inject * \param[in] rtype Resource agent type of resource to inject * \param[in] rprovider Resource agent provider of resource to inject * * \return XML of injected resource history element * \note If a history element already exists under either \p resource or * \p lrm_name, this will return it rather than injecting a new one. */ xmlNode * pcmk__inject_resource_history(pcmk__output_t *out, xmlNode *cib_node, const char *resource, const char *lrm_name, const char *rclass, const char *rtype, const char *rprovider) { xmlNode *lrm = NULL; xmlNode *container = NULL; xmlNode *cib_resource = NULL; cib_resource = find_resource_xml(cib_node, resource); if (cib_resource != NULL) { /* If an existing LRM history entry uses the resource name, * continue using it, even if lrm_name is different. */ return cib_resource; } // Check for history entry under preferred name if (strcmp(resource, lrm_name) != 0) { cib_resource = find_resource_xml(cib_node, lrm_name); if (cib_resource != NULL) { return cib_resource; } } if ((rclass == NULL) || (rtype == NULL)) { // @TODO query configuration for class, provider, type out->err(out, "Resource %s not found in the status section of %s " "(supply class and type to continue)", resource, ID(cib_node)); return NULL; } else if (!pcmk__strcase_any_of(rclass, PCMK_RESOURCE_CLASS_OCF, PCMK_RESOURCE_CLASS_STONITH, PCMK_RESOURCE_CLASS_SERVICE, PCMK_RESOURCE_CLASS_UPSTART, PCMK_RESOURCE_CLASS_SYSTEMD, PCMK_RESOURCE_CLASS_LSB, NULL)) { out->err(out, "Invalid class for %s: %s", resource, rclass); return NULL; } else if (pcmk_is_set(pcmk_get_ra_caps(rclass), pcmk_ra_cap_provider) && (rprovider == NULL)) { // @TODO query configuration for provider out->err(out, "Please specify the provider for resource %s", resource); return NULL; } crm_info("Injecting new resource %s into node state '%s'", lrm_name, ID(cib_node)); - lrm = first_named_child(cib_node, XML_CIB_TAG_LRM); + lrm = first_named_child(cib_node, PCMK__XE_LRM); if (lrm == NULL) { const char *node_uuid = ID(cib_node); - lrm = create_xml_node(cib_node, XML_CIB_TAG_LRM); + lrm = create_xml_node(cib_node, PCMK__XE_LRM); crm_xml_add(lrm, PCMK_XA_ID, node_uuid); } container = first_named_child(lrm, XML_LRM_TAG_RESOURCES); if (container == NULL) { container = create_xml_node(lrm, XML_LRM_TAG_RESOURCES); } cib_resource = create_xml_node(container, XML_LRM_TAG_RESOURCE); // If we're creating a new entry, use the preferred name crm_xml_add(cib_resource, PCMK_XA_ID, lrm_name); crm_xml_add(cib_resource, PCMK_XA_CLASS, rclass); crm_xml_add(cib_resource, PCMK_XA_PROVIDER, rprovider); crm_xml_add(cib_resource, PCMK_XA_TYPE, rtype); return cib_resource; } static int find_ticket_state(pcmk__output_t *out, cib_t *the_cib, const char *ticket_id, xmlNode **ticket_state_xml) { int rc = pcmk_ok; xmlNode *xml_search = NULL; GString *xpath = g_string_sized_new(256); CRM_ASSERT(ticket_state_xml != NULL); *ticket_state_xml = NULL; g_string_append(xpath, "/" PCMK_XE_CIB "/" PCMK_XE_STATUS "/" XML_CIB_TAG_TICKETS); if (ticket_id) { pcmk__g_strcat(xpath, "/" XML_CIB_TAG_TICKET_STATE "[@" PCMK_XA_ID "=\"", ticket_id, "\"]", NULL); } rc = the_cib->cmds->query(the_cib, (const char *) xpath->str, &xml_search, cib_sync_call|cib_scope_local|cib_xpath); g_string_free(xpath, TRUE); if (rc != pcmk_ok) { return rc; } crm_log_xml_debug(xml_search, "Match"); if ((xml_search->children != NULL) && (ticket_id != NULL)) { out->err(out, "Multiple ticket_states match ticket_id=%s", ticket_id); } *ticket_state_xml = xml_search; return rc; } /*! * \internal * \brief Inject a ticket attribute into ticket state * * \param[in,out] out Output object for displaying error messages * \param[in] ticket_id Ticket whose state should be changed * \param[in] attr_name Ticket attribute name to inject * \param[in] attr_value Boolean value of ticket attribute to inject * \param[in,out] cib CIB object to use * * \return Standard Pacemaker return code */ static int set_ticket_state_attr(pcmk__output_t *out, const char *ticket_id, const char *attr_name, bool attr_value, cib_t *cib) { int rc = pcmk_rc_ok; xmlNode *xml_top = NULL; xmlNode *ticket_state_xml = NULL; // Check for an existing ticket state entry rc = find_ticket_state(out, cib, ticket_id, &ticket_state_xml); rc = pcmk_legacy2rc(rc); if (rc == pcmk_rc_ok) { // Ticket state found, use it crm_debug("Injecting attribute into existing ticket state %s", ticket_id); xml_top = ticket_state_xml; } else if (rc == ENXIO) { // No ticket state, create it xmlNode *xml_obj = NULL; xml_top = create_xml_node(NULL, PCMK_XE_STATUS); xml_obj = create_xml_node(xml_top, XML_CIB_TAG_TICKETS); ticket_state_xml = create_xml_node(xml_obj, XML_CIB_TAG_TICKET_STATE); crm_xml_add(ticket_state_xml, PCMK_XA_ID, ticket_id); } else { // Error return rc; } // Add the attribute to the ticket state pcmk__xe_set_bool_attr(ticket_state_xml, attr_name, attr_value); crm_log_xml_debug(xml_top, "Update"); // Commit the change to the CIB rc = cib->cmds->modify(cib, PCMK_XE_STATUS, xml_top, cib_sync_call|cib_scope_local); rc = pcmk_legacy2rc(rc); free_xml(xml_top); return rc; } /*! * \internal * \brief Inject a fictitious action into the cluster * * \param[in,out] out Output object for displaying error messages * \param[in] spec Action specification to inject * \param[in,out] cib CIB object for scheduler input * \param[in] scheduler Scheduler data */ static void inject_action(pcmk__output_t *out, const char *spec, cib_t *cib, const pcmk_scheduler_t *scheduler) { int rc; int outcome = PCMK_OCF_OK; guint interval_ms = 0; char *key = NULL; char *node = NULL; char *task = NULL; char *resource = NULL; const char *rtype = NULL; const char *rclass = NULL; const char *rprovider = NULL; xmlNode *cib_op = NULL; xmlNode *cib_node = NULL; xmlNode *cib_resource = NULL; const pcmk_resource_t *rsc = NULL; lrmd_event_data_t *op = NULL; out->message(out, "inject-spec", spec); key = calloc(1, strlen(spec) + 1); node = calloc(1, strlen(spec) + 1); rc = sscanf(spec, "%[^@]@%[^=]=%d", key, node, &outcome); if (rc != 3) { out->err(out, "Invalid operation spec: %s. Only found %d fields", spec, rc); goto done; } parse_op_key(key, &resource, &task, &interval_ms); rsc = pe_find_resource(scheduler->resources, resource); if (rsc == NULL) { out->err(out, "Invalid resource name: %s", resource); goto done; } rclass = crm_element_value(rsc->xml, PCMK_XA_CLASS); rtype = crm_element_value(rsc->xml, PCMK_XA_TYPE); rprovider = crm_element_value(rsc->xml, PCMK_XA_PROVIDER); cib_node = pcmk__inject_node(cib, node, NULL); CRM_ASSERT(cib_node != NULL); pcmk__inject_failcount(out, cib_node, resource, task, interval_ms, outcome); cib_resource = pcmk__inject_resource_history(out, cib_node, resource, resource, rclass, rtype, rprovider); CRM_ASSERT(cib_resource != NULL); op = create_op(cib_resource, task, interval_ms, outcome); CRM_ASSERT(op != NULL); cib_op = pcmk__inject_action_result(cib_resource, op, 0); CRM_ASSERT(cib_op != NULL); lrmd_free_event(op); rc = cib->cmds->modify(cib, PCMK_XE_STATUS, cib_node, cib_sync_call|cib_scope_local); CRM_ASSERT(rc == pcmk_ok); done: free(task); free(node); free(key); } /*! * \internal * \brief Inject fictitious scheduler inputs * * \param[in,out] scheduler Scheduler data * \param[in,out] cib CIB object for scheduler input to modify * \param[in] injections Injections to apply */ void pcmk__inject_scheduler_input(pcmk_scheduler_t *scheduler, cib_t *cib, const pcmk_injections_t *injections) { int rc = pcmk_ok; const GList *iter = NULL; xmlNode *cib_node = NULL; pcmk__output_t *out = scheduler->priv; out->message(out, "inject-modify-config", injections->quorum, injections->watchdog); if (injections->quorum != NULL) { xmlNode *top = create_xml_node(NULL, PCMK_XE_CIB); /* crm_xml_add(top, PCMK_XA_DC_UUID, dc_uuid); */ crm_xml_add(top, PCMK_XA_HAVE_QUORUM, injections->quorum); rc = cib->cmds->modify(cib, NULL, top, cib_sync_call|cib_scope_local); CRM_ASSERT(rc == pcmk_ok); } if (injections->watchdog != NULL) { rc = cib__update_node_attr(out, cib, cib_sync_call|cib_scope_local, PCMK_XE_CRM_CONFIG, NULL, NULL, NULL, NULL, PCMK_OPT_HAVE_WATCHDOG, injections->watchdog, NULL, NULL); CRM_ASSERT(rc == pcmk_rc_ok); } for (iter = injections->node_up; iter != NULL; iter = iter->next) { const char *node = (const char *) iter->data; out->message(out, "inject-modify-node", "Online", node); cib_node = pcmk__inject_node_state_change(cib, node, true); CRM_ASSERT(cib_node != NULL); rc = cib->cmds->modify(cib, PCMK_XE_STATUS, cib_node, cib_sync_call|cib_scope_local); CRM_ASSERT(rc == pcmk_ok); free_xml(cib_node); } for (iter = injections->node_down; iter != NULL; iter = iter->next) { const char *node = (const char *) iter->data; char *xpath = NULL; out->message(out, "inject-modify-node", "Offline", node); cib_node = pcmk__inject_node_state_change(cib, node, false); CRM_ASSERT(cib_node != NULL); rc = cib->cmds->modify(cib, PCMK_XE_STATUS, cib_node, cib_sync_call|cib_scope_local); CRM_ASSERT(rc == pcmk_ok); free_xml(cib_node); xpath = crm_strdup_printf("//" PCMK__XE_NODE_STATE "[@" PCMK_XA_UNAME "='%s']" - "/" XML_CIB_TAG_LRM, + "/" PCMK__XE_LRM, node); cib->cmds->remove(cib, xpath, NULL, cib_xpath|cib_sync_call|cib_scope_local); free(xpath); xpath = crm_strdup_printf("//" PCMK__XE_NODE_STATE "[@" PCMK_XA_UNAME "='%s']" "/" XML_TAG_TRANSIENT_NODEATTRS, node); cib->cmds->remove(cib, xpath, NULL, cib_xpath|cib_sync_call|cib_scope_local); free(xpath); } for (iter = injections->node_fail; iter != NULL; iter = iter->next) { const char *node = (const char *) iter->data; out->message(out, "inject-modify-node", "Failing", node); cib_node = pcmk__inject_node_state_change(cib, node, true); crm_xml_add(cib_node, PCMK__XA_IN_CCM, PCMK_VALUE_FALSE); CRM_ASSERT(cib_node != NULL); rc = cib->cmds->modify(cib, PCMK_XE_STATUS, cib_node, cib_sync_call|cib_scope_local); CRM_ASSERT(rc == pcmk_ok); free_xml(cib_node); } for (iter = injections->ticket_grant; iter != NULL; iter = iter->next) { const char *ticket_id = (const char *) iter->data; out->message(out, "inject-modify-ticket", "Granting", ticket_id); rc = set_ticket_state_attr(out, ticket_id, "granted", true, cib); CRM_ASSERT(rc == pcmk_rc_ok); } for (iter = injections->ticket_revoke; iter != NULL; iter = iter->next) { const char *ticket_id = (const char *) iter->data; out->message(out, "inject-modify-ticket", "Revoking", ticket_id); rc = set_ticket_state_attr(out, ticket_id, "granted", false, cib); CRM_ASSERT(rc == pcmk_rc_ok); } for (iter = injections->ticket_standby; iter != NULL; iter = iter->next) { const char *ticket_id = (const char *) iter->data; out->message(out, "inject-modify-ticket", "Standby", ticket_id); rc = set_ticket_state_attr(out, ticket_id, "standby", true, cib); CRM_ASSERT(rc == pcmk_rc_ok); } for (iter = injections->ticket_activate; iter != NULL; iter = iter->next) { const char *ticket_id = (const char *) iter->data; out->message(out, "inject-modify-ticket", "Activating", ticket_id); rc = set_ticket_state_attr(out, ticket_id, "standby", false, cib); CRM_ASSERT(rc == pcmk_rc_ok); } for (iter = injections->op_inject; iter != NULL; iter = iter->next) { inject_action(out, (const char *) iter->data, cib, scheduler); } if (!out->is_quiet(out)) { out->end_list(out); } } void pcmk_free_injections(pcmk_injections_t *injections) { if (injections == NULL) { return; } g_list_free_full(injections->node_up, g_free); g_list_free_full(injections->node_down, g_free); g_list_free_full(injections->node_fail, g_free); g_list_free_full(injections->op_fail, g_free); g_list_free_full(injections->op_inject, g_free); g_list_free_full(injections->ticket_grant, g_free); g_list_free_full(injections->ticket_revoke, g_free); g_list_free_full(injections->ticket_standby, g_free); g_list_free_full(injections->ticket_activate, g_free); free(injections->quorum); free(injections->watchdog); free(injections); } diff --git a/lib/pacemaker/pcmk_resource.c b/lib/pacemaker/pcmk_resource.c index 758672a562..e5a292280c 100644 --- a/lib/pacemaker/pcmk_resource.c +++ b/lib/pacemaker/pcmk_resource.c @@ -1,249 +1,249 @@ /* * Copyright 2021-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include // Search path for resource operation history (takes node name and resource ID) -#define XPATH_OP_HISTORY "//" PCMK_XE_STATUS \ - "/" PCMK__XE_NODE_STATE \ - "[@" PCMK_XA_UNAME "='%s']" \ - "/" XML_CIB_TAG_LRM "/" XML_LRM_TAG_RESOURCES \ +#define XPATH_OP_HISTORY "//" PCMK_XE_STATUS \ + "/" PCMK__XE_NODE_STATE \ + "[@" PCMK_XA_UNAME "='%s']" \ + "/" PCMK__XE_LRM "/" XML_LRM_TAG_RESOURCES \ "/" XML_LRM_TAG_RESOURCE "[@" PCMK_XA_ID "='%s']" static xmlNode * best_op(const pcmk_resource_t *rsc, const pcmk_node_t *node) { char *xpath = NULL; xmlNode *history = NULL; xmlNode *best = NULL; bool best_effective_op = false; guint best_interval = 0; bool best_failure = false; const char *best_digest = NULL; // Find node's resource history xpath = crm_strdup_printf(XPATH_OP_HISTORY, node->details->uname, rsc->id); history = get_xpath_object(xpath, rsc->cluster->input, LOG_NEVER); free(xpath); // Examine each history entry for (xmlNode *lrm_rsc_op = first_named_child(history, XML_LRM_TAG_RSC_OP); lrm_rsc_op != NULL; lrm_rsc_op = crm_next_same_xml(lrm_rsc_op)) { const char *digest = crm_element_value(lrm_rsc_op, PCMK__XA_OP_RESTART_DIGEST); guint interval_ms = 0; const char *task = crm_element_value(lrm_rsc_op, PCMK_XA_OPERATION); bool effective_op = false; bool failure = pcmk__ends_with(ID(lrm_rsc_op), "_last_failure_0"); crm_element_value_ms(lrm_rsc_op, PCMK_META_INTERVAL, &interval_ms); effective_op = interval_ms == 0 && pcmk__strcase_any_of(task, PCMK_ACTION_MONITOR, PCMK_ACTION_START, PCMK_ACTION_PROMOTE, PCMK_ACTION_MIGRATE_FROM, NULL); if (best == NULL) { goto is_best; } if (best_effective_op) { // Do not use an ineffective op if there's an effective one. if (!effective_op) { continue; } // Do not use an ineffective non-recurring op if there's a recurring one } else if (best_interval != 0 && !effective_op && interval_ms == 0) { continue; } // Do not use last failure if there's a successful one. if (!best_failure && failure) { continue; } // Do not use an op without a restart digest if there's one with. if (best_digest != NULL && digest == NULL) { continue; } // Do not use an older op if there's a newer one. if (pe__is_newer_op(best, lrm_rsc_op, true) > 0) { continue; } is_best: best = lrm_rsc_op; best_effective_op = effective_op; best_interval = interval_ms; best_failure = failure; best_digest = digest; } return best; } /*! * \internal * \brief Remove a resource * * \param[in,out] cib An open connection to the CIB * \param[in] cib_opts Options to use in the CIB operation call * \param[in] rsc_id Resource to remove * \param[in] rsc_type Type of the resource ("primitive", "group", etc.) * * \return Standard Pacemaker return code */ int pcmk__resource_delete(cib_t *cib, uint32_t cib_opts, const char *rsc_id, const char *rsc_type) { int rc = pcmk_rc_ok; xmlNode *msg_data = NULL; if (cib == NULL) { return ENOTCONN; } if (rsc_id == NULL || rsc_type == NULL) { return EINVAL; } msg_data = create_xml_node(NULL, rsc_type); crm_xml_add(msg_data, PCMK_XA_ID, rsc_id); rc = cib->cmds->remove(cib, PCMK_XE_RESOURCES, msg_data, cib_opts); rc = pcmk_legacy2rc(rc); free_xml(msg_data); return rc; } int pcmk_resource_delete(xmlNodePtr *xml, const char *rsc_id, const char *rsc_type) { pcmk__output_t *out = NULL; int rc = pcmk_rc_ok; uint32_t cib_opts = cib_sync_call; cib_t *cib = NULL; rc = pcmk__xml_output_new(&out, xml); if (rc != pcmk_rc_ok) { return rc; } cib = cib_new(); if (cib == NULL) { rc = pcmk_rc_cib_corrupt; goto done; } rc = cib->cmds->signon(cib, crm_system_name, cib_command); rc = pcmk_legacy2rc(rc); if (rc != pcmk_rc_ok) { goto done; } rc = pcmk__resource_delete(cib, cib_opts, rsc_id, rsc_type); done: if (cib != NULL) { cib__clean_up_connection(&cib); } pcmk__xml_output_finish(out, pcmk_rc2exitc(rc), xml); return rc; } /*! * \internal * \brief Calculate and output resource operation digests * * \param[in,out] out Output object * \param[in,out] rsc Resource to calculate digests for * \param[in] node Node whose operation history should be used * \param[in] overrides Hash table of configuration parameters to override * * \return Standard Pacemaker return code */ int pcmk__resource_digests(pcmk__output_t *out, pcmk_resource_t *rsc, const pcmk_node_t *node, GHashTable *overrides) { const char *task = NULL; xmlNode *xml_op = NULL; pcmk__op_digest_t *digests = NULL; guint interval_ms = 0; int rc = pcmk_rc_ok; if ((out == NULL) || (rsc == NULL) || (node == NULL)) { return EINVAL; } if (rsc->variant != pcmk_rsc_variant_primitive) { // Only primitives get operation digests return EOPNOTSUPP; } // Find XML of operation history to use xml_op = best_op(rsc, node); // Generate an operation key if (xml_op != NULL) { task = crm_element_value(xml_op, PCMK_XA_OPERATION); crm_element_value_ms(xml_op, PCMK_META_INTERVAL, &interval_ms); } if (task == NULL) { // Assume start if no history is available task = PCMK_ACTION_START; interval_ms = 0; } // Calculate and show digests digests = pe__calculate_digests(rsc, task, &interval_ms, node, xml_op, overrides, true, rsc->cluster); rc = out->message(out, "digests", rsc, node, task, interval_ms, digests); pe__free_digests(digests); return rc; } // @COMPAT The scheduler parameter is unused and can be removed at the next break int pcmk_resource_digests(xmlNodePtr *xml, pcmk_resource_t *rsc, const pcmk_node_t *node, GHashTable *overrides, pcmk_scheduler_t *scheduler) { pcmk__output_t *out = NULL; int rc = pcmk_rc_ok; rc = pcmk__xml_output_new(&out, xml); if (rc != pcmk_rc_ok) { return rc; } pcmk__register_lib_messages(out); rc = pcmk__resource_digests(out, rsc, node, overrides); pcmk__xml_output_finish(out, pcmk_rc2exitc(rc), xml); return rc; } diff --git a/lib/pacemaker/pcmk_sched_actions.c b/lib/pacemaker/pcmk_sched_actions.c index 52d44b8cf5..6f39b70d34 100644 --- a/lib/pacemaker/pcmk_sched_actions.c +++ b/lib/pacemaker/pcmk_sched_actions.c @@ -1,1933 +1,1933 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include "libpacemaker_private.h" /*! * \internal * \brief Get the action flags relevant to ordering constraints * * \param[in,out] action Action to check * \param[in] node Node that *other* action in the ordering is on * (used only for clone resource actions) * * \return Action flags that should be used for orderings */ static uint32_t action_flags_for_ordering(pcmk_action_t *action, const pcmk_node_t *node) { bool runnable = false; uint32_t flags; // For non-resource actions, return the action flags if (action->rsc == NULL) { return action->flags; } /* For non-clone resources, or a clone action not assigned to a node, * return the flags as determined by the resource method without a node * specified. */ flags = action->rsc->cmds->action_flags(action, NULL); if ((node == NULL) || !pe_rsc_is_clone(action->rsc)) { return flags; } /* Otherwise (i.e., for clone resource actions on a specific node), first * remember whether the non-node-specific action is runnable. */ runnable = pcmk_is_set(flags, pcmk_action_runnable); // Then recheck the resource method with the node flags = action->rsc->cmds->action_flags(action, node); /* For clones in ordering constraints, the node-specific "runnable" doesn't * matter, just the non-node-specific setting (i.e., is the action runnable * anywhere). * * This applies only to runnable, and only for ordering constraints. This * function shouldn't be used for other types of constraints without * changes. Not very satisfying, but it's logical and appears to work well. */ if (runnable && !pcmk_is_set(flags, pcmk_action_runnable)) { pcmk__set_raw_action_flags(flags, action->rsc->id, pcmk_action_runnable); } return flags; } /*! * \internal * \brief Get action UUID that should be used with a resource ordering * * When an action is ordered relative to an action for a collective resource * (clone, group, or bundle), it actually needs to be ordered after all * instances of the collective have completed the relevant action (for example, * given "start CLONE then start RSC", RSC must wait until all instances of * CLONE have started). Given the UUID and resource of the first action in an * ordering, this returns the UUID of the action that should actually be used * for ordering (for example, "CLONE_started_0" instead of "CLONE_start_0"). * * \param[in] first_uuid UUID of first action in ordering * \param[in] first_rsc Resource of first action in ordering * * \return Newly allocated copy of UUID to use with ordering * \note It is the caller's responsibility to free the return value. */ static char * action_uuid_for_ordering(const char *first_uuid, const pcmk_resource_t *first_rsc) { guint interval_ms = 0; char *uuid = NULL; char *rid = NULL; char *first_task_str = NULL; enum action_tasks first_task = pcmk_action_unspecified; enum action_tasks remapped_task = pcmk_action_unspecified; // Only non-notify actions for collective resources need remapping if ((strstr(first_uuid, PCMK_ACTION_NOTIFY) != NULL) || (first_rsc->variant < pcmk_rsc_variant_group)) { goto done; } // Only non-recurring actions need remapping CRM_ASSERT(parse_op_key(first_uuid, &rid, &first_task_str, &interval_ms)); if (interval_ms > 0) { goto done; } first_task = text2task(first_task_str); switch (first_task) { case pcmk_action_stop: case pcmk_action_start: case pcmk_action_notify: case pcmk_action_promote: case pcmk_action_demote: remapped_task = first_task + 1; break; case pcmk_action_stopped: case pcmk_action_started: case pcmk_action_notified: case pcmk_action_promoted: case pcmk_action_demoted: remapped_task = first_task; break; case pcmk_action_monitor: case pcmk_action_shutdown: case pcmk_action_fence: break; default: crm_err("Unknown action '%s' in ordering", first_task_str); break; } if (remapped_task != pcmk_action_unspecified) { /* If a clone or bundle has notifications enabled, the ordering will be * relative to when notifications have been sent for the remapped task. */ if (pcmk_is_set(first_rsc->flags, pcmk_rsc_notify) && (pe_rsc_is_clone(first_rsc) || pe_rsc_is_bundled(first_rsc))) { uuid = pcmk__notify_key(rid, "confirmed-post", task2text(remapped_task)); } else { uuid = pcmk__op_key(rid, task2text(remapped_task), 0); } pcmk__rsc_trace(first_rsc, "Remapped action UUID %s to %s for ordering purposes", first_uuid, uuid); } done: if (uuid == NULL) { uuid = strdup(first_uuid); CRM_ASSERT(uuid != NULL); } free(first_task_str); free(rid); return uuid; } /*! * \internal * \brief Get actual action that should be used with an ordering * * When an action is ordered relative to an action for a collective resource * (clone, group, or bundle), it actually needs to be ordered after all * instances of the collective have completed the relevant action (for example, * given "start CLONE then start RSC", RSC must wait until all instances of * CLONE have started). Given the first action in an ordering, this returns the * the action that should actually be used for ordering (for example, the * started action instead of the start action). * * \param[in] action First action in an ordering * * \return Actual action that should be used for the ordering */ static pcmk_action_t * action_for_ordering(pcmk_action_t *action) { pcmk_action_t *result = action; pcmk_resource_t *rsc = action->rsc; if ((rsc != NULL) && (rsc->variant >= pcmk_rsc_variant_group) && (action->uuid != NULL)) { char *uuid = action_uuid_for_ordering(action->uuid, rsc); result = find_first_action(rsc->actions, uuid, NULL, NULL); if (result == NULL) { crm_warn("Not remapping %s to %s because %s does not have " "remapped action", action->uuid, uuid, rsc->id); result = action; } free(uuid); } return result; } /*! * \internal * \brief Wrapper for update_ordered_actions() method for readability * * \param[in,out] rsc Resource to call method for * \param[in,out] first 'First' action in an ordering * \param[in,out] then 'Then' action in an ordering * \param[in] node If not NULL, limit scope of ordering to this * node (only used when interleaving instances) * \param[in] flags Action flags for \p first for ordering purposes * \param[in] filter Action flags to limit scope of certain updates * (may include pcmk_action_optional to affect only * mandatory actions, and pe_action_runnable to * affect only runnable actions) * \param[in] type Group of enum pcmk__action_relation_flags to apply * \param[in,out] scheduler Scheduler data * * \return Group of enum pcmk__updated flags indicating what was updated */ static inline uint32_t update(pcmk_resource_t *rsc, pcmk_action_t *first, pcmk_action_t *then, const pcmk_node_t *node, uint32_t flags, uint32_t filter, uint32_t type, pcmk_scheduler_t *scheduler) { return rsc->cmds->update_ordered_actions(first, then, node, flags, filter, type, scheduler); } /*! * \internal * \brief Update flags for ordering's actions appropriately for ordering's flags * * \param[in,out] first First action in an ordering * \param[in,out] then Then action in an ordering * \param[in] first_flags Action flags for \p first for ordering purposes * \param[in] then_flags Action flags for \p then for ordering purposes * \param[in,out] order Action wrapper for \p first in ordering * \param[in,out] scheduler Scheduler data * * \return Group of enum pcmk__updated flags */ static uint32_t update_action_for_ordering_flags(pcmk_action_t *first, pcmk_action_t *then, uint32_t first_flags, uint32_t then_flags, pcmk__related_action_t *order, pcmk_scheduler_t *scheduler) { uint32_t changed = pcmk__updated_none; /* The node will only be used for clones. If interleaved, node will be NULL, * otherwise the ordering scope will be limited to the node. Normally, the * whole 'then' clone should restart if 'first' is restarted, so then->node * is needed. */ pcmk_node_t *node = then->node; if (pcmk_is_set(order->type, pcmk__ar_first_implies_same_node_then)) { /* For unfencing, only instances of 'then' on the same node as 'first' * (the unfencing operation) should restart, so reset node to * first->node, at which point this case is handled like a normal * pcmk__ar_first_implies_then. */ pcmk__clear_relation_flags(order->type, pcmk__ar_first_implies_same_node_then); pcmk__set_relation_flags(order->type, pcmk__ar_first_implies_then); node = first->node; pcmk__rsc_trace(then->rsc, "%s then %s: mapped " "pcmk__ar_first_implies_same_node_then to " "pcmk__ar_first_implies_then on %s", first->uuid, then->uuid, pcmk__node_name(node)); } if (pcmk_is_set(order->type, pcmk__ar_first_implies_then)) { if (then->rsc != NULL) { changed |= update(then->rsc, first, then, node, first_flags & pcmk_action_optional, pcmk_action_optional, pcmk__ar_first_implies_then, scheduler); } else if (!pcmk_is_set(first_flags, pcmk_action_optional) && pcmk_is_set(then->flags, pcmk_action_optional)) { pcmk__clear_action_flags(then, pcmk_action_optional); pcmk__set_updated_flags(changed, first, pcmk__updated_then); } pcmk__rsc_trace(then->rsc, "%s then %s: %s after pcmk__ar_first_implies_then", first->uuid, then->uuid, (changed? "changed" : "unchanged")); } if (pcmk_is_set(order->type, pcmk__ar_intermediate_stop) && (then->rsc != NULL)) { enum pe_action_flags restart = pcmk_action_optional |pcmk_action_runnable; changed |= update(then->rsc, first, then, node, first_flags, restart, pcmk__ar_intermediate_stop, scheduler); pcmk__rsc_trace(then->rsc, "%s then %s: %s after pcmk__ar_intermediate_stop", first->uuid, then->uuid, (changed? "changed" : "unchanged")); } if (pcmk_is_set(order->type, pcmk__ar_then_implies_first)) { if (first->rsc != NULL) { changed |= update(first->rsc, first, then, node, first_flags, pcmk_action_optional, pcmk__ar_then_implies_first, scheduler); } else if (!pcmk_is_set(first_flags, pcmk_action_optional) && pcmk_is_set(first->flags, pcmk_action_runnable)) { pcmk__clear_action_flags(first, pcmk_action_runnable); pcmk__set_updated_flags(changed, first, pcmk__updated_first); } pcmk__rsc_trace(then->rsc, "%s then %s: %s after pcmk__ar_then_implies_first", first->uuid, then->uuid, (changed? "changed" : "unchanged")); } if (pcmk_is_set(order->type, pcmk__ar_promoted_then_implies_first)) { if (then->rsc != NULL) { changed |= update(then->rsc, first, then, node, first_flags & pcmk_action_optional, pcmk_action_optional, pcmk__ar_promoted_then_implies_first, scheduler); } pcmk__rsc_trace(then->rsc, "%s then %s: %s after " "pcmk__ar_promoted_then_implies_first", first->uuid, then->uuid, (changed? "changed" : "unchanged")); } if (pcmk_is_set(order->type, pcmk__ar_min_runnable)) { if (then->rsc != NULL) { changed |= update(then->rsc, first, then, node, first_flags, pcmk_action_runnable, pcmk__ar_min_runnable, scheduler); } else if (pcmk_is_set(first_flags, pcmk_action_runnable)) { // We have another runnable instance of "first" then->runnable_before++; /* Mark "then" as runnable if it requires a certain number of * "before" instances to be runnable, and they now are. */ if ((then->runnable_before >= then->required_runnable_before) && !pcmk_is_set(then->flags, pcmk_action_runnable)) { pcmk__set_action_flags(then, pcmk_action_runnable); pcmk__set_updated_flags(changed, first, pcmk__updated_then); } } pcmk__rsc_trace(then->rsc, "%s then %s: %s after pcmk__ar_min_runnable", first->uuid, then->uuid, (changed? "changed" : "unchanged")); } if (pcmk_is_set(order->type, pcmk__ar_nested_remote_probe) && (then->rsc != NULL)) { if (!pcmk_is_set(first_flags, pcmk_action_runnable) && (first->rsc != NULL) && (first->rsc->running_on != NULL)) { pcmk__rsc_trace(then->rsc, "%s then %s: ignoring because first is stopping", first->uuid, then->uuid); order->type = (enum pe_ordering) pcmk__ar_none; } else { changed |= update(then->rsc, first, then, node, first_flags, pcmk_action_runnable, pcmk__ar_unrunnable_first_blocks, scheduler); } pcmk__rsc_trace(then->rsc, "%s then %s: %s after pcmk__ar_nested_remote_probe", first->uuid, then->uuid, (changed? "changed" : "unchanged")); } if (pcmk_is_set(order->type, pcmk__ar_unrunnable_first_blocks)) { if (then->rsc != NULL) { changed |= update(then->rsc, first, then, node, first_flags, pcmk_action_runnable, pcmk__ar_unrunnable_first_blocks, scheduler); } else if (!pcmk_is_set(first_flags, pcmk_action_runnable) && pcmk_is_set(then->flags, pcmk_action_runnable)) { pcmk__clear_action_flags(then, pcmk_action_runnable); pcmk__set_updated_flags(changed, first, pcmk__updated_then); } pcmk__rsc_trace(then->rsc, "%s then %s: %s after pcmk__ar_unrunnable_first_blocks", first->uuid, then->uuid, (changed? "changed" : "unchanged")); } if (pcmk_is_set(order->type, pcmk__ar_unmigratable_then_blocks)) { if (then->rsc != NULL) { changed |= update(then->rsc, first, then, node, first_flags, pcmk_action_optional, pcmk__ar_unmigratable_then_blocks, scheduler); } pcmk__rsc_trace(then->rsc, "%s then %s: %s after " "pcmk__ar_unmigratable_then_blocks", first->uuid, then->uuid, (changed? "changed" : "unchanged")); } if (pcmk_is_set(order->type, pcmk__ar_first_else_then)) { if (then->rsc != NULL) { changed |= update(then->rsc, first, then, node, first_flags, pcmk_action_optional, pcmk__ar_first_else_then, scheduler); } pcmk__rsc_trace(then->rsc, "%s then %s: %s after pcmk__ar_first_else_then", first->uuid, then->uuid, (changed? "changed" : "unchanged")); } if (pcmk_is_set(order->type, pcmk__ar_ordered)) { if (then->rsc != NULL) { changed |= update(then->rsc, first, then, node, first_flags, pcmk_action_runnable, pcmk__ar_ordered, scheduler); } pcmk__rsc_trace(then->rsc, "%s then %s: %s after pcmk__ar_ordered", first->uuid, then->uuid, (changed? "changed" : "unchanged")); } if (pcmk_is_set(order->type, pcmk__ar_asymmetric)) { if (then->rsc != NULL) { changed |= update(then->rsc, first, then, node, first_flags, pcmk_action_runnable, pcmk__ar_asymmetric, scheduler); } pcmk__rsc_trace(then->rsc, "%s then %s: %s after pcmk__ar_asymmetric", first->uuid, then->uuid, (changed? "changed" : "unchanged")); } if (pcmk_is_set(first->flags, pcmk_action_runnable) && pcmk_is_set(order->type, pcmk__ar_first_implies_then_graphed) && !pcmk_is_set(first_flags, pcmk_action_optional)) { pcmk__rsc_trace(then->rsc, "%s will be in graph because %s is required", then->uuid, first->uuid); pcmk__set_action_flags(then, pcmk_action_always_in_graph); // Don't bother marking 'then' as changed just for this } if (pcmk_is_set(order->type, pcmk__ar_then_implies_first_graphed) && !pcmk_is_set(then_flags, pcmk_action_optional)) { pcmk__rsc_trace(then->rsc, "%s will be in graph because %s is required", first->uuid, then->uuid); pcmk__set_action_flags(first, pcmk_action_always_in_graph); // Don't bother marking 'first' as changed just for this } if (pcmk_any_flags_set(order->type, pcmk__ar_first_implies_then |pcmk__ar_then_implies_first |pcmk__ar_intermediate_stop) && (first->rsc != NULL) && !pcmk_is_set(first->rsc->flags, pcmk_rsc_managed) && pcmk_is_set(first->rsc->flags, pcmk_rsc_blocked) && !pcmk_is_set(first->flags, pcmk_action_runnable) && pcmk__str_eq(first->task, PCMK_ACTION_STOP, pcmk__str_none)) { if (pcmk_is_set(then->flags, pcmk_action_runnable)) { pcmk__clear_action_flags(then, pcmk_action_runnable); pcmk__set_updated_flags(changed, first, pcmk__updated_then); } pcmk__rsc_trace(then->rsc, "%s then %s: %s after checking whether first " "is blocked, unmanaged, unrunnable stop", first->uuid, then->uuid, (changed? "changed" : "unchanged")); } return changed; } // Convenience macros for logging action properties #define action_type_str(flags) \ (pcmk_is_set((flags), pcmk_action_pseudo)? "pseudo-action" : "action") #define action_optional_str(flags) \ (pcmk_is_set((flags), pcmk_action_optional)? "optional" : "required") #define action_runnable_str(flags) \ (pcmk_is_set((flags), pcmk_action_runnable)? "runnable" : "unrunnable") #define action_node_str(a) \ (((a)->node == NULL)? "no node" : (a)->node->details->uname) /*! * \internal * \brief Update an action's flags for all orderings where it is "then" * * \param[in,out] then Action to update * \param[in,out] scheduler Scheduler data */ void pcmk__update_action_for_orderings(pcmk_action_t *then, pcmk_scheduler_t *scheduler) { GList *lpc = NULL; uint32_t changed = pcmk__updated_none; int last_flags = then->flags; pcmk__rsc_trace(then->rsc, "Updating %s %s (%s %s) on %s", action_type_str(then->flags), then->uuid, action_optional_str(then->flags), action_runnable_str(then->flags), action_node_str(then)); if (pcmk_is_set(then->flags, pcmk_action_min_runnable)) { /* Initialize current known "runnable before" actions. As * update_action_for_ordering_flags() is called for each of then's * before actions, this number will increment as runnable 'first' * actions are encountered. */ then->runnable_before = 0; if (then->required_runnable_before == 0) { /* @COMPAT This ordering constraint uses the deprecated * "require-all=false" attribute. Treat it like * PCMK_META_CLONE_MIN=1. */ then->required_runnable_before = 1; } /* The pcmk__ar_min_runnable clause of * update_action_for_ordering_flags() (called below) * will reset runnable if appropriate. */ pcmk__clear_action_flags(then, pcmk_action_runnable); } for (lpc = then->actions_before; lpc != NULL; lpc = lpc->next) { pcmk__related_action_t *other = lpc->data; pcmk_action_t *first = other->action; pcmk_node_t *then_node = then->node; pcmk_node_t *first_node = first->node; if ((first->rsc != NULL) && (first->rsc->variant == pcmk_rsc_variant_group) && pcmk__str_eq(first->task, PCMK_ACTION_START, pcmk__str_none)) { first_node = first->rsc->fns->location(first->rsc, NULL, FALSE); if (first_node != NULL) { pcmk__rsc_trace(first->rsc, "Found %s for 'first' %s", pcmk__node_name(first_node), first->uuid); } } if ((then->rsc != NULL) && (then->rsc->variant == pcmk_rsc_variant_group) && pcmk__str_eq(then->task, PCMK_ACTION_START, pcmk__str_none)) { then_node = then->rsc->fns->location(then->rsc, NULL, FALSE); if (then_node != NULL) { pcmk__rsc_trace(then->rsc, "Found %s for 'then' %s", pcmk__node_name(then_node), then->uuid); } } // Disable constraint if it only applies when on same node, but isn't if (pcmk_is_set(other->type, pcmk__ar_if_on_same_node) && (first_node != NULL) && (then_node != NULL) && !pcmk__same_node(first_node, then_node)) { pcmk__rsc_trace(then->rsc, "Disabled ordering %s on %s then %s on %s: " "not same node", other->action->uuid, pcmk__node_name(first_node), then->uuid, pcmk__node_name(then_node)); other->type = (enum pe_ordering) pcmk__ar_none; continue; } pcmk__clear_updated_flags(changed, then, pcmk__updated_first); if ((first->rsc != NULL) && pcmk_is_set(other->type, pcmk__ar_then_cancels_first) && !pcmk_is_set(then->flags, pcmk_action_optional)) { /* 'then' is required, so we must abandon 'first' * (e.g. a required stop cancels any agent reload). */ pcmk__set_action_flags(other->action, pcmk_action_optional); if (!strcmp(first->task, PCMK_ACTION_RELOAD_AGENT)) { pcmk__clear_rsc_flags(first->rsc, pcmk_rsc_reload); } } if ((first->rsc != NULL) && (then->rsc != NULL) && (first->rsc != then->rsc) && !is_parent(then->rsc, first->rsc)) { first = action_for_ordering(first); } if (first != other->action) { pcmk__rsc_trace(then->rsc, "Ordering %s after %s instead of %s", then->uuid, first->uuid, other->action->uuid); } pcmk__rsc_trace(then->rsc, "%s (%#.6x) then %s (%#.6x): type=%#.6x node=%s", first->uuid, first->flags, then->uuid, then->flags, other->type, action_node_str(first)); if (first == other->action) { /* 'first' was not remapped (e.g. from 'start' to 'running'), which * could mean it is a non-resource action, a primitive resource * action, or already expanded. */ uint32_t first_flags, then_flags; first_flags = action_flags_for_ordering(first, then_node); then_flags = action_flags_for_ordering(then, first_node); changed |= update_action_for_ordering_flags(first, then, first_flags, then_flags, other, scheduler); /* 'first' was for a complex resource (clone, group, etc), * create a new dependency if necessary */ } else if (order_actions(first, then, other->type)) { /* This was the first time 'first' and 'then' were associated, * start again to get the new actions_before list */ pcmk__set_updated_flags(changed, then, pcmk__updated_then); pcmk__rsc_trace(then->rsc, "Disabled ordering %s then %s in favor of %s " "then %s", other->action->uuid, then->uuid, first->uuid, then->uuid); other->type = (enum pe_ordering) pcmk__ar_none; } if (pcmk_is_set(changed, pcmk__updated_first)) { crm_trace("Re-processing %s and its 'after' actions " "because it changed", first->uuid); for (GList *lpc2 = first->actions_after; lpc2 != NULL; lpc2 = lpc2->next) { pcmk__related_action_t *other = lpc2->data; pcmk__update_action_for_orderings(other->action, scheduler); } pcmk__update_action_for_orderings(first, scheduler); } } if (pcmk_is_set(then->flags, pcmk_action_min_runnable)) { if (last_flags == then->flags) { pcmk__clear_updated_flags(changed, then, pcmk__updated_then); } else { pcmk__set_updated_flags(changed, then, pcmk__updated_then); } } if (pcmk_is_set(changed, pcmk__updated_then)) { crm_trace("Re-processing %s and its 'after' actions because it changed", then->uuid); if (pcmk_is_set(last_flags, pcmk_action_runnable) && !pcmk_is_set(then->flags, pcmk_action_runnable)) { pcmk__block_colocation_dependents(then); } pcmk__update_action_for_orderings(then, scheduler); for (lpc = then->actions_after; lpc != NULL; lpc = lpc->next) { pcmk__related_action_t *other = lpc->data; pcmk__update_action_for_orderings(other->action, scheduler); } } } static inline bool is_primitive_action(const pcmk_action_t *action) { return (action != NULL) && (action->rsc != NULL) && (action->rsc->variant == pcmk_rsc_variant_primitive); } /*! * \internal * \brief Clear a single action flag and set reason text * * \param[in,out] action Action whose flag should be cleared * \param[in] flag Action flag that should be cleared * \param[in] reason Action that is the reason why flag is being cleared */ #define clear_action_flag_because(action, flag, reason) do { \ if (pcmk_is_set((action)->flags, (flag))) { \ pcmk__clear_action_flags(action, flag); \ if ((action)->rsc != (reason)->rsc) { \ char *reason_text = pe__action2reason((reason), (flag)); \ pe_action_set_reason((action), reason_text, false); \ free(reason_text); \ } \ } \ } while (0) /*! * \internal * \brief Update actions in an asymmetric ordering * * If the "first" action in an asymmetric ordering is unrunnable, make the * "second" action unrunnable as well, if appropriate. * * \param[in] first 'First' action in an asymmetric ordering * \param[in,out] then 'Then' action in an asymmetric ordering */ static void handle_asymmetric_ordering(const pcmk_action_t *first, pcmk_action_t *then) { /* Only resource actions after an unrunnable 'first' action need updates for * asymmetric ordering. */ if ((then->rsc == NULL) || pcmk_is_set(first->flags, pcmk_action_runnable)) { return; } // Certain optional 'then' actions are unaffected by unrunnable 'first' if (pcmk_is_set(then->flags, pcmk_action_optional)) { enum rsc_role_e then_rsc_role = then->rsc->fns->state(then->rsc, TRUE); if ((then_rsc_role == pcmk_role_stopped) && pcmk__str_eq(then->task, PCMK_ACTION_STOP, pcmk__str_none)) { /* If 'then' should stop after 'first' but is already stopped, the * ordering is irrelevant. */ return; } else if ((then_rsc_role >= pcmk_role_started) && pcmk__str_eq(then->task, PCMK_ACTION_START, pcmk__str_none) && pe__rsc_running_on_only(then->rsc, then->node)) { /* Similarly if 'then' should start after 'first' but is already * started on a single node. */ return; } } // 'First' can't run, so 'then' can't either clear_action_flag_because(then, pcmk_action_optional, first); clear_action_flag_because(then, pcmk_action_runnable, first); } /*! * \internal * \brief Set action bits appropriately when pe_restart_order is used * * \param[in,out] first 'First' action in an ordering with pe_restart_order * \param[in,out] then 'Then' action in an ordering with pe_restart_order * \param[in] filter What action flags to care about * * \note pe_restart_order is set for "stop resource before starting it" and * "stop later group member before stopping earlier group member" */ static void handle_restart_ordering(pcmk_action_t *first, pcmk_action_t *then, uint32_t filter) { const char *reason = NULL; CRM_ASSERT(is_primitive_action(first)); CRM_ASSERT(is_primitive_action(then)); // We need to update the action in two cases: // ... if 'then' is required if (pcmk_is_set(filter, pcmk_action_optional) && !pcmk_is_set(then->flags, pcmk_action_optional)) { reason = "restart"; } /* ... if 'then' is unrunnable action on same resource (if a resource * should restart but can't start, we still want to stop) */ if (pcmk_is_set(filter, pcmk_action_runnable) && !pcmk_is_set(then->flags, pcmk_action_runnable) && pcmk_is_set(then->rsc->flags, pcmk_rsc_managed) && (first->rsc == then->rsc)) { reason = "stop"; } if (reason == NULL) { return; } pcmk__rsc_trace(first->rsc, "Handling %s -> %s for %s", first->uuid, then->uuid, reason); // Make 'first' required if it is runnable if (pcmk_is_set(first->flags, pcmk_action_runnable)) { clear_action_flag_because(first, pcmk_action_optional, then); } // Make 'first' required if 'then' is required if (!pcmk_is_set(then->flags, pcmk_action_optional)) { clear_action_flag_because(first, pcmk_action_optional, then); } // Make 'first' unmigratable if 'then' is unmigratable if (!pcmk_is_set(then->flags, pcmk_action_migratable)) { clear_action_flag_because(first, pcmk_action_migratable, then); } // Make 'then' unrunnable if 'first' is required but unrunnable if (!pcmk_is_set(first->flags, pcmk_action_optional) && !pcmk_is_set(first->flags, pcmk_action_runnable)) { clear_action_flag_because(then, pcmk_action_runnable, first); } } /*! * \internal * \brief Update two actions according to an ordering between them * * Given information about an ordering of two actions, update the actions' flags * (and runnable_before members if appropriate) as appropriate for the ordering. * Effects may cascade to other orderings involving the actions as well. * * \param[in,out] first 'First' action in an ordering * \param[in,out] then 'Then' action in an ordering * \param[in] node If not NULL, limit scope of ordering to this node * (ignored) * \param[in] flags Action flags for \p first for ordering purposes * \param[in] filter Action flags to limit scope of certain updates (may * include pcmk_action_optional to affect only * mandatory actions, and pcmk_action_runnable to * affect only runnable actions) * \param[in] type Group of enum pcmk__action_relation_flags to apply * \param[in,out] scheduler Scheduler data * * \return Group of enum pcmk__updated flags indicating what was updated */ uint32_t pcmk__update_ordered_actions(pcmk_action_t *first, pcmk_action_t *then, const pcmk_node_t *node, uint32_t flags, uint32_t filter, uint32_t type, pcmk_scheduler_t *scheduler) { uint32_t changed = pcmk__updated_none; uint32_t then_flags = 0U; uint32_t first_flags = 0U; CRM_ASSERT((first != NULL) && (then != NULL) && (scheduler != NULL)); then_flags = then->flags; first_flags = first->flags; if (pcmk_is_set(type, pcmk__ar_asymmetric)) { handle_asymmetric_ordering(first, then); } if (pcmk_is_set(type, pcmk__ar_then_implies_first) && !pcmk_is_set(then_flags, pcmk_action_optional)) { // Then is required, and implies first should be, too if (pcmk_is_set(filter, pcmk_action_optional) && !pcmk_is_set(flags, pcmk_action_optional) && pcmk_is_set(first_flags, pcmk_action_optional)) { clear_action_flag_because(first, pcmk_action_optional, then); } if (pcmk_is_set(flags, pcmk_action_migratable) && !pcmk_is_set(then->flags, pcmk_action_migratable)) { clear_action_flag_because(first, pcmk_action_migratable, then); } } if (pcmk_is_set(type, pcmk__ar_promoted_then_implies_first) && (then->rsc != NULL) && (then->rsc->role == pcmk_role_promoted) && pcmk_is_set(filter, pcmk_action_optional) && !pcmk_is_set(then->flags, pcmk_action_optional)) { clear_action_flag_because(first, pcmk_action_optional, then); if (pcmk_is_set(first->flags, pcmk_action_migratable) && !pcmk_is_set(then->flags, pcmk_action_migratable)) { clear_action_flag_because(first, pcmk_action_migratable, then); } } if (pcmk_is_set(type, pcmk__ar_unmigratable_then_blocks) && pcmk_is_set(filter, pcmk_action_optional)) { if (!pcmk_all_flags_set(then->flags, pcmk_action_migratable |pcmk_action_runnable)) { clear_action_flag_because(first, pcmk_action_runnable, then); } if (!pcmk_is_set(then->flags, pcmk_action_optional)) { clear_action_flag_because(first, pcmk_action_optional, then); } } if (pcmk_is_set(type, pcmk__ar_first_else_then) && pcmk_is_set(filter, pcmk_action_optional) && !pcmk_is_set(first->flags, pcmk_action_runnable)) { clear_action_flag_because(then, pcmk_action_migratable, first); pcmk__clear_action_flags(then, pcmk_action_pseudo); } if (pcmk_is_set(type, pcmk__ar_unrunnable_first_blocks) && pcmk_is_set(filter, pcmk_action_runnable) && pcmk_is_set(then->flags, pcmk_action_runnable) && !pcmk_is_set(flags, pcmk_action_runnable)) { clear_action_flag_because(then, pcmk_action_runnable, first); clear_action_flag_because(then, pcmk_action_migratable, first); } if (pcmk_is_set(type, pcmk__ar_first_implies_then) && pcmk_is_set(filter, pcmk_action_optional) && pcmk_is_set(then->flags, pcmk_action_optional) && !pcmk_is_set(flags, pcmk_action_optional) && !pcmk_is_set(first->flags, pcmk_action_migratable)) { clear_action_flag_because(then, pcmk_action_optional, first); } if (pcmk_is_set(type, pcmk__ar_intermediate_stop)) { handle_restart_ordering(first, then, filter); } if (then_flags != then->flags) { pcmk__set_updated_flags(changed, first, pcmk__updated_then); pcmk__rsc_trace(then->rsc, "%s on %s: flags are now %#.6x (was %#.6x) " "because of 'first' %s (%#.6x)", then->uuid, pcmk__node_name(then->node), then->flags, then_flags, first->uuid, first->flags); if ((then->rsc != NULL) && (then->rsc->parent != NULL)) { // Required to handle "X_stop then X_start" for cloned groups pcmk__update_action_for_orderings(then, scheduler); } } if (first_flags != first->flags) { pcmk__set_updated_flags(changed, first, pcmk__updated_first); pcmk__rsc_trace(first->rsc, "%s on %s: flags are now %#.6x (was %#.6x) " "because of 'then' %s (%#.6x)", first->uuid, pcmk__node_name(first->node), first->flags, first_flags, then->uuid, then->flags); } return changed; } /*! * \internal * \brief Trace-log an action (optionally with its dependent actions) * * \param[in] pre_text If not NULL, prefix the log with this plus ": " * \param[in] action Action to log * \param[in] details If true, recursively log dependent actions */ void pcmk__log_action(const char *pre_text, const pcmk_action_t *action, bool details) { const char *node_uname = NULL; const char *node_uuid = NULL; const char *desc = NULL; CRM_CHECK(action != NULL, return); if (!pcmk_is_set(action->flags, pcmk_action_pseudo)) { if (action->node != NULL) { node_uname = action->node->details->uname; node_uuid = action->node->details->id; } else { node_uname = ""; } } switch (text2task(action->task)) { case pcmk_action_fence: case pcmk_action_shutdown: if (pcmk_is_set(action->flags, pcmk_action_pseudo)) { desc = "Pseudo "; } else if (pcmk_is_set(action->flags, pcmk_action_optional)) { desc = "Optional "; } else if (!pcmk_is_set(action->flags, pcmk_action_runnable)) { desc = "!!Non-Startable!! "; } else { desc = "(Provisional) "; } crm_trace("%s%s%sAction %d: %s%s%s%s%s%s", ((pre_text == NULL)? "" : pre_text), ((pre_text == NULL)? "" : ": "), desc, action->id, action->uuid, (node_uname? "\ton " : ""), (node_uname? node_uname : ""), (node_uuid? "\t\t(" : ""), (node_uuid? node_uuid : ""), (node_uuid? ")" : "")); break; default: if (pcmk_is_set(action->flags, pcmk_action_optional)) { desc = "Optional "; } else if (pcmk_is_set(action->flags, pcmk_action_pseudo)) { desc = "Pseudo "; } else if (!pcmk_is_set(action->flags, pcmk_action_runnable)) { desc = "!!Non-Startable!! "; } else { desc = "(Provisional) "; } crm_trace("%s%s%sAction %d: %s %s%s%s%s%s%s", ((pre_text == NULL)? "" : pre_text), ((pre_text == NULL)? "" : ": "), desc, action->id, action->uuid, (action->rsc? action->rsc->id : ""), (node_uname? "\ton " : ""), (node_uname? node_uname : ""), (node_uuid? "\t\t(" : ""), (node_uuid? node_uuid : ""), (node_uuid? ")" : "")); break; } if (details) { const GList *iter = NULL; const pcmk__related_action_t *other = NULL; crm_trace("\t\t====== Preceding Actions"); for (iter = action->actions_before; iter != NULL; iter = iter->next) { other = (const pcmk__related_action_t *) iter->data; pcmk__log_action("\t\t", other->action, false); } crm_trace("\t\t====== Subsequent Actions"); for (iter = action->actions_after; iter != NULL; iter = iter->next) { other = (const pcmk__related_action_t *) iter->data; pcmk__log_action("\t\t", other->action, false); } crm_trace("\t\t====== End"); } else { crm_trace("\t\t(before=%d, after=%d)", g_list_length(action->actions_before), g_list_length(action->actions_after)); } } /*! * \internal * \brief Create a new shutdown action for a node * * \param[in,out] node Node being shut down * * \return Newly created shutdown action for \p node */ pcmk_action_t * pcmk__new_shutdown_action(pcmk_node_t *node) { char *shutdown_id = NULL; pcmk_action_t *shutdown_op = NULL; CRM_ASSERT(node != NULL); shutdown_id = crm_strdup_printf("%s-%s", PCMK_ACTION_DO_SHUTDOWN, node->details->uname); shutdown_op = custom_action(NULL, shutdown_id, PCMK_ACTION_DO_SHUTDOWN, node, FALSE, node->details->data_set); pcmk__order_stops_before_shutdown(node, shutdown_op); add_hash_param(shutdown_op->meta, PCMK__META_OP_NO_WAIT, PCMK_VALUE_TRUE); return shutdown_op; } /*! * \internal * \brief Calculate and add an operation digest to XML * * Calculate an operation digest, which enables us to later determine when a * restart is needed due to the resource's parameters being changed, and add it * to given XML. * * \param[in] op Operation result from executor * \param[in,out] update XML to add digest to */ static void add_op_digest_to_xml(const lrmd_event_data_t *op, xmlNode *update) { char *digest = NULL; xmlNode *args_xml = NULL; if (op->params == NULL) { return; } args_xml = create_xml_node(NULL, PCMK_XE_PARAMETERS); g_hash_table_foreach(op->params, hash2field, args_xml); pcmk__filter_op_for_digest(args_xml); digest = calculate_operation_digest(args_xml, NULL); crm_xml_add(update, PCMK__XA_OP_DIGEST, digest); free_xml(args_xml); free(digest); } #define FAKE_TE_ID "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" /*! * \internal * \brief Create XML for resource operation history update * * \param[in,out] parent Parent XML node to add to * \param[in,out] op Operation event data * \param[in] caller_version DC feature set * \param[in] target_rc Expected result of operation * \param[in] node Name of node on which operation was performed * \param[in] origin Arbitrary description of update source * * \return Newly created XML node for history update */ xmlNode * pcmk__create_history_xml(xmlNode *parent, lrmd_event_data_t *op, const char *caller_version, int target_rc, const char *node, const char *origin) { char *key = NULL; char *magic = NULL; char *op_id = NULL; char *op_id_additional = NULL; char *local_user_data = NULL; const char *exit_reason = NULL; xmlNode *xml_op = NULL; const char *task = NULL; CRM_CHECK(op != NULL, return NULL); crm_trace("Creating history XML for %s-interval %s action for %s on %s " "(DC version: %s, origin: %s)", pcmk__readable_interval(op->interval_ms), op->op_type, op->rsc_id, ((node == NULL)? "no node" : node), caller_version, origin); task = op->op_type; /* Record a successful agent reload as a start, and a failed one as a * monitor, to make life easier for the scheduler when determining the * current state. * * @COMPAT We should check "reload" here only if the operation was for a * pre-OCF-1.1 resource agent, but we don't know that here, and we should * only ever get results for actions scheduled by us, so we can reasonably * assume any "reload" is actually a pre-1.1 agent reload. */ if (pcmk__str_any_of(task, PCMK_ACTION_RELOAD, PCMK_ACTION_RELOAD_AGENT, NULL)) { if (op->op_status == PCMK_EXEC_DONE) { task = PCMK_ACTION_START; } else { task = PCMK_ACTION_MONITOR; } } key = pcmk__op_key(op->rsc_id, task, op->interval_ms); if (pcmk__str_eq(task, PCMK_ACTION_NOTIFY, pcmk__str_none)) { const char *n_type = crm_meta_value(op->params, "notify_type"); const char *n_task = crm_meta_value(op->params, "notify_operation"); CRM_LOG_ASSERT(n_type != NULL); CRM_LOG_ASSERT(n_task != NULL); op_id = pcmk__notify_key(op->rsc_id, n_type, n_task); if (op->op_status != PCMK_EXEC_PENDING) { /* Ignore notify errors. * * @TODO It might be better to keep the correct result here, and * ignore it in process_graph_event(). */ lrmd__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); } /* Migration history is preserved separately, which usually matters for * multiple nodes and is important for future cluster transitions. */ } else if (pcmk__str_any_of(op->op_type, PCMK_ACTION_MIGRATE_TO, PCMK_ACTION_MIGRATE_FROM, NULL)) { op_id = strdup(key); } else if (did_rsc_op_fail(op, target_rc)) { op_id = pcmk__op_key(op->rsc_id, "last_failure", 0); if (op->interval_ms == 0) { /* Ensure 'last' gets updated, in case PCMK_META_RECORD_PENDING is * true */ op_id_additional = pcmk__op_key(op->rsc_id, "last", 0); } exit_reason = op->exit_reason; } else if (op->interval_ms > 0) { op_id = strdup(key); } else { op_id = pcmk__op_key(op->rsc_id, "last", 0); } again: xml_op = pcmk__xe_match(parent, XML_LRM_TAG_RSC_OP, PCMK_XA_ID, op_id); if (xml_op == NULL) { xml_op = create_xml_node(parent, XML_LRM_TAG_RSC_OP); } if (op->user_data == NULL) { crm_debug("Generating fake transition key for: " PCMK__OP_FMT " %d from %s", op->rsc_id, op->op_type, op->interval_ms, op->call_id, origin); local_user_data = pcmk__transition_key(-1, op->call_id, target_rc, FAKE_TE_ID); op->user_data = local_user_data; } if (magic == NULL) { magic = crm_strdup_printf("%d:%d;%s", op->op_status, op->rc, (const char *) op->user_data); } crm_xml_add(xml_op, PCMK_XA_ID, op_id); crm_xml_add(xml_op, PCMK__XA_OPERATION_KEY, key); crm_xml_add(xml_op, PCMK_XA_OPERATION, task); crm_xml_add(xml_op, PCMK_XA_CRM_DEBUG_ORIGIN, origin); crm_xml_add(xml_op, PCMK_XA_CRM_FEATURE_SET, caller_version); crm_xml_add(xml_op, PCMK__XA_TRANSITION_KEY, op->user_data); crm_xml_add(xml_op, PCMK__XA_TRANSITION_MAGIC, magic); crm_xml_add(xml_op, PCMK_XA_EXIT_REASON, pcmk__s(exit_reason, "")); crm_xml_add(xml_op, PCMK__META_ON_NODE, node); // For context during triage crm_xml_add_int(xml_op, PCMK__XA_CALL_ID, op->call_id); crm_xml_add_int(xml_op, PCMK__XA_RC_CODE, op->rc); crm_xml_add_int(xml_op, PCMK__XA_OP_STATUS, op->op_status); crm_xml_add_ms(xml_op, PCMK_META_INTERVAL, op->interval_ms); if (compare_version("2.1", caller_version) <= 0) { if (op->t_run || op->t_rcchange || op->exec_time || op->queue_time) { crm_trace("Timing data (" PCMK__OP_FMT "): last=%u change=%u exec=%u queue=%u", op->rsc_id, op->op_type, op->interval_ms, op->t_run, op->t_rcchange, op->exec_time, op->queue_time); if ((op->interval_ms != 0) && (op->t_rcchange != 0)) { // Recurring ops may have changed rc after initial run crm_xml_add_ll(xml_op, PCMK_XA_LAST_RC_CHANGE, (long long) op->t_rcchange); } else { crm_xml_add_ll(xml_op, PCMK_XA_LAST_RC_CHANGE, (long long) op->t_run); } crm_xml_add_int(xml_op, PCMK_XA_EXEC_TIME, op->exec_time); crm_xml_add_int(xml_op, PCMK_XA_QUEUE_TIME, op->queue_time); } } if (pcmk__str_any_of(op->op_type, PCMK_ACTION_MIGRATE_TO, PCMK_ACTION_MIGRATE_FROM, NULL)) { /* Record PCMK__META_MIGRATE_SOURCE and PCMK__META_MIGRATE_TARGET always * for migrate ops. */ const char *name = PCMK__META_MIGRATE_SOURCE; crm_xml_add(xml_op, name, crm_meta_value(op->params, name)); name = PCMK__META_MIGRATE_TARGET; crm_xml_add(xml_op, name, crm_meta_value(op->params, name)); } add_op_digest_to_xml(op, xml_op); if (op_id_additional) { free(op_id); op_id = op_id_additional; op_id_additional = NULL; goto again; } if (local_user_data) { free(local_user_data); op->user_data = NULL; } free(magic); free(op_id); free(key); return xml_op; } /*! * \internal * \brief Check whether an action shutdown-locks a resource to a node * * If the PCMK_OPT_SHUTDOWN_LOCK cluster property is set, resources will not be * recovered on a different node if cleanly stopped, and may start only on that * same node. This function checks whether that applies to a given action, so * that the transition graph can be marked appropriately. * * \param[in] action Action to check * * \return true if \p action locks its resource to the action's node, * otherwise false */ bool pcmk__action_locks_rsc_to_node(const pcmk_action_t *action) { // Only resource actions taking place on resource's lock node are locked if ((action == NULL) || (action->rsc == NULL) || !pcmk__same_node(action->node, action->rsc->lock_node)) { return false; } /* During shutdown, only stops are locked (otherwise, another action such as * a demote would cause the controller to clear the lock) */ if (action->node->details->shutdown && (action->task != NULL) && (strcmp(action->task, PCMK_ACTION_STOP) != 0)) { return false; } return true; } /* lowest to highest */ static gint sort_action_id(gconstpointer a, gconstpointer b) { const pcmk__related_action_t *action_wrapper2 = a; const pcmk__related_action_t *action_wrapper1 = b; if (a == NULL) { return 1; } if (b == NULL) { return -1; } if (action_wrapper1->action->id < action_wrapper2->action->id) { return 1; } if (action_wrapper1->action->id > action_wrapper2->action->id) { return -1; } return 0; } /*! * \internal * \brief Remove any duplicate action inputs, merging action flags * * \param[in,out] action Action whose inputs should be checked */ void pcmk__deduplicate_action_inputs(pcmk_action_t *action) { GList *item = NULL; GList *next = NULL; pcmk__related_action_t *last_input = NULL; action->actions_before = g_list_sort(action->actions_before, sort_action_id); for (item = action->actions_before; item != NULL; item = next) { pcmk__related_action_t *input = item->data; next = item->next; if ((last_input != NULL) && (input->action->id == last_input->action->id)) { crm_trace("Input %s (%d) duplicate skipped for action %s (%d)", input->action->uuid, input->action->id, action->uuid, action->id); /* For the purposes of scheduling, the ordering flags no longer * matter, but crm_simulate looks at certain ones when creating a * dot graph. Combining the flags is sufficient for that purpose. */ last_input->type |= input->type; if (input->state == pe_link_dumped) { last_input->state = pe_link_dumped; } free(item->data); action->actions_before = g_list_delete_link(action->actions_before, item); } else { last_input = input; input->state = pe_link_not_dumped; } } } /*! * \internal * \brief Output all scheduled actions * * \param[in,out] scheduler Scheduler data */ void pcmk__output_actions(pcmk_scheduler_t *scheduler) { pcmk__output_t *out = scheduler->priv; // Output node (non-resource) actions for (GList *iter = scheduler->actions; iter != NULL; iter = iter->next) { char *node_name = NULL; char *task = NULL; pcmk_action_t *action = (pcmk_action_t *) iter->data; if (action->rsc != NULL) { continue; // Resource actions will be output later } else if (pcmk_is_set(action->flags, pcmk_action_optional)) { continue; // This action was not scheduled } if (pcmk__str_eq(action->task, PCMK_ACTION_DO_SHUTDOWN, pcmk__str_none)) { task = strdup("Shutdown"); } else if (pcmk__str_eq(action->task, PCMK_ACTION_STONITH, pcmk__str_none)) { const char *op = g_hash_table_lookup(action->meta, "stonith_action"); task = crm_strdup_printf("Fence (%s)", op); } else { continue; // Don't display other node action types } if (pe__is_guest_node(action->node)) { const pcmk_resource_t *remote = action->node->details->remote_rsc; node_name = crm_strdup_printf("%s (resource: %s)", pcmk__node_name(action->node), remote->container->id); } else if (action->node != NULL) { node_name = crm_strdup_printf("%s", pcmk__node_name(action->node)); } out->message(out, "node-action", task, node_name, action->reason); free(node_name); free(task); } // Output resource actions for (GList *iter = scheduler->resources; iter != NULL; iter = iter->next) { pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data; rsc->cmds->output_actions(rsc); } } /*! * \internal * \brief Get action name needed to compare digest for configuration changes * * \param[in] task Action name from history * \param[in] interval_ms Action interval (in milliseconds) * * \return Action name whose digest should be compared */ static const char * task_for_digest(const char *task, guint interval_ms) { /* Certain actions need to be compared against the parameters used to start * the resource. */ if ((interval_ms == 0) && pcmk__str_any_of(task, PCMK_ACTION_MONITOR, PCMK_ACTION_MIGRATE_FROM, PCMK_ACTION_PROMOTE, NULL)) { task = PCMK_ACTION_START; } return task; } /*! * \internal * \brief Check whether only sanitized parameters to an action changed * * When collecting CIB files for troubleshooting, crm_report will mask * sensitive resource parameters. If simulations were run using that, affected * resources would appear to need a restart, which would complicate * troubleshooting. To avoid that, we save a "secure digest" of non-sensitive * parameters. This function used that digest to check whether only masked * parameters are different. * * \param[in] xml_op Resource history entry with secure digest * \param[in] digest_data Operation digest information being compared * \param[in] scheduler Scheduler data * * \return true if only sanitized parameters changed, otherwise false */ static bool only_sanitized_changed(const xmlNode *xml_op, const pcmk__op_digest_t *digest_data, const pcmk_scheduler_t *scheduler) { const char *digest_secure = NULL; if (!pcmk_is_set(scheduler->flags, pcmk_sched_sanitized)) { // The scheduler is not being run as a simulation return false; } digest_secure = crm_element_value(xml_op, PCMK__XA_OP_SECURE_DIGEST); return (digest_data->rc != pcmk__digest_match) && (digest_secure != NULL) && (digest_data->digest_secure_calc != NULL) && (strcmp(digest_data->digest_secure_calc, digest_secure) == 0); } /*! * \internal * \brief Force a restart due to a configuration change * * \param[in,out] rsc Resource that action is for * \param[in] task Name of action whose configuration changed * \param[in] interval_ms Action interval (in milliseconds) * \param[in,out] node Node where resource should be restarted */ static void force_restart(pcmk_resource_t *rsc, const char *task, guint interval_ms, pcmk_node_t *node) { char *key = pcmk__op_key(rsc->id, task, interval_ms); pcmk_action_t *required = custom_action(rsc, key, task, NULL, FALSE, rsc->cluster); pe_action_set_reason(required, "resource definition change", true); trigger_unfencing(rsc, node, "Device parameters changed", NULL, rsc->cluster); } /*! * \internal * \brief Schedule a reload of a resource on a node * * \param[in,out] data Resource to reload * \param[in] user_data Where resource should be reloaded */ static void schedule_reload(gpointer data, gpointer user_data) { pcmk_resource_t *rsc = data; const pcmk_node_t *node = user_data; pcmk_action_t *reload = NULL; // For collective resources, just call recursively for children if (rsc->variant > pcmk_rsc_variant_primitive) { g_list_foreach(rsc->children, schedule_reload, user_data); return; } // Skip the reload in certain situations if ((node == NULL) || !pcmk_is_set(rsc->flags, pcmk_rsc_managed) || pcmk_is_set(rsc->flags, pcmk_rsc_failed)) { pcmk__rsc_trace(rsc, "Skip reload of %s:%s%s %s", rsc->id, pcmk_is_set(rsc->flags, pcmk_rsc_managed)? "" : " unmanaged", pcmk_is_set(rsc->flags, pcmk_rsc_failed)? " failed" : "", (node == NULL)? "inactive" : node->details->uname); return; } /* If a resource's configuration changed while a start was pending, * force a full restart instead of a reload. */ if (pcmk_is_set(rsc->flags, pcmk_rsc_start_pending)) { pcmk__rsc_trace(rsc, "%s: preventing agent reload because start pending", rsc->id); custom_action(rsc, stop_key(rsc), PCMK_ACTION_STOP, node, FALSE, rsc->cluster); return; } // Schedule the reload pcmk__set_rsc_flags(rsc, pcmk_rsc_reload); reload = custom_action(rsc, reload_key(rsc), PCMK_ACTION_RELOAD_AGENT, node, FALSE, rsc->cluster); pe_action_set_reason(reload, "resource definition change", FALSE); // Set orderings so that a required stop or demote cancels the reload pcmk__new_ordering(NULL, NULL, reload, rsc, stop_key(rsc), NULL, pcmk__ar_ordered|pcmk__ar_then_cancels_first, rsc->cluster); pcmk__new_ordering(NULL, NULL, reload, rsc, demote_key(rsc), NULL, pcmk__ar_ordered|pcmk__ar_then_cancels_first, rsc->cluster); } /*! * \internal * \brief Handle any configuration change for an action * * Given an action from resource history, if the resource's configuration * changed since the action was done, schedule any actions needed (restart, * reload, unfencing, rescheduling recurring actions, etc.). * * \param[in,out] rsc Resource that action is for * \param[in,out] node Node that action was on * \param[in] xml_op Action XML from resource history * * \return true if action configuration changed, otherwise false */ bool pcmk__check_action_config(pcmk_resource_t *rsc, pcmk_node_t *node, const xmlNode *xml_op) { guint interval_ms = 0; const char *task = NULL; const pcmk__op_digest_t *digest_data = NULL; CRM_CHECK((rsc != NULL) && (node != NULL) && (xml_op != NULL), return false); task = crm_element_value(xml_op, PCMK_XA_OPERATION); CRM_CHECK(task != NULL, return false); crm_element_value_ms(xml_op, PCMK_META_INTERVAL, &interval_ms); // If this is a recurring action, check whether it has been orphaned if (interval_ms > 0) { if (pcmk__find_action_config(rsc, task, interval_ms, false) != NULL) { pcmk__rsc_trace(rsc, "%s-interval %s for %s on %s is in configuration", pcmk__readable_interval(interval_ms), task, rsc->id, pcmk__node_name(node)); } else if (pcmk_is_set(rsc->cluster->flags, pcmk_sched_cancel_removed_actions)) { pcmk__schedule_cancel(rsc, crm_element_value(xml_op, PCMK__XA_CALL_ID), task, interval_ms, node, "orphan"); return true; } else { pcmk__rsc_debug(rsc, "%s-interval %s for %s on %s is orphaned", pcmk__readable_interval(interval_ms), task, rsc->id, pcmk__node_name(node)); return true; } } crm_trace("Checking %s-interval %s for %s on %s for configuration changes", pcmk__readable_interval(interval_ms), task, rsc->id, pcmk__node_name(node)); task = task_for_digest(task, interval_ms); digest_data = rsc_action_digest_cmp(rsc, xml_op, node, rsc->cluster); if (only_sanitized_changed(xml_op, digest_data, rsc->cluster)) { if (!pcmk__is_daemon && (rsc->cluster->priv != NULL)) { pcmk__output_t *out = rsc->cluster->priv; out->info(out, "Only 'private' parameters to %s-interval %s for %s " "on %s changed: %s", pcmk__readable_interval(interval_ms), task, rsc->id, pcmk__node_name(node), crm_element_value(xml_op, PCMK__XA_TRANSITION_MAGIC)); } return false; } switch (digest_data->rc) { case pcmk__digest_restart: crm_log_xml_debug(digest_data->params_restart, "params:restart"); force_restart(rsc, task, interval_ms, node); return true; case pcmk__digest_unknown: case pcmk__digest_mismatch: // Changes that can potentially be handled by an agent reload if (interval_ms > 0) { /* Recurring actions aren't reloaded per se, they are just * re-scheduled so the next run uses the new parameters. * The old instance will be cancelled automatically. */ crm_log_xml_debug(digest_data->params_all, "params:reschedule"); pcmk__reschedule_recurring(rsc, task, interval_ms, node); } else if (crm_element_value(xml_op, PCMK__XA_OP_RESTART_DIGEST) != NULL) { // Agent supports reload, so use it trigger_unfencing(rsc, node, "Device parameters changed (reload)", NULL, rsc->cluster); crm_log_xml_debug(digest_data->params_all, "params:reload"); schedule_reload((gpointer) rsc, (gpointer) node); } else { pcmk__rsc_trace(rsc, "Restarting %s " "because agent doesn't support reload", rsc->id); crm_log_xml_debug(digest_data->params_restart, "params:restart"); force_restart(rsc, task, interval_ms, node); } return true; default: break; } return false; } /*! * \internal * \brief Create a list of resource's action history entries, sorted by call ID * * \param[in] rsc_entry Resource's status XML * \param[out] start_index Where to store index of start-like action, if any * \param[out] stop_index Where to store index of stop action, if any */ static GList * rsc_history_as_list(const xmlNode *rsc_entry, int *start_index, int *stop_index) { GList *ops = NULL; for (xmlNode *rsc_op = first_named_child(rsc_entry, XML_LRM_TAG_RSC_OP); rsc_op != NULL; rsc_op = crm_next_same_xml(rsc_op)) { ops = g_list_prepend(ops, rsc_op); } ops = g_list_sort(ops, sort_op_by_callid); calculate_active_ops(ops, start_index, stop_index); return ops; } /*! * \internal * \brief Process a resource's action history from the CIB status * * Given a resource's action history, if the resource's configuration * changed since the actions were done, schedule any actions needed (restart, * reload, unfencing, rescheduling recurring actions, clean-up, etc.). * (This also cancels recurring actions for maintenance mode, which is not * entirely related but convenient to do here.) * * \param[in] rsc_entry Resource's status XML * \param[in,out] rsc Resource whose history is being processed * \param[in,out] node Node whose history is being processed */ static void process_rsc_history(const xmlNode *rsc_entry, pcmk_resource_t *rsc, pcmk_node_t *node) { int offset = -1; int stop_index = 0; int start_index = 0; GList *sorted_op_list = NULL; if (pcmk_is_set(rsc->flags, pcmk_rsc_removed)) { if (pe_rsc_is_anon_clone(pe__const_top_resource(rsc, false))) { pcmk__rsc_trace(rsc, "Skipping configuration check " "for orphaned clone instance %s", rsc->id); } else { pcmk__rsc_trace(rsc, "Skipping configuration check and scheduling " "clean-up for orphaned resource %s", rsc->id); pcmk__schedule_cleanup(rsc, node, false); } return; } if (pe_find_node_id(rsc->running_on, node->details->id) == NULL) { if (pcmk__rsc_agent_changed(rsc, node, rsc_entry, false)) { pcmk__schedule_cleanup(rsc, node, false); } pcmk__rsc_trace(rsc, "Skipping configuration check for %s " "because no longer active on %s", rsc->id, pcmk__node_name(node)); return; } pcmk__rsc_trace(rsc, "Checking for configuration changes for %s on %s", rsc->id, pcmk__node_name(node)); if (pcmk__rsc_agent_changed(rsc, node, rsc_entry, true)) { pcmk__schedule_cleanup(rsc, node, false); } sorted_op_list = rsc_history_as_list(rsc_entry, &start_index, &stop_index); if (start_index < stop_index) { return; // Resource is stopped } for (GList *iter = sorted_op_list; iter != NULL; iter = iter->next) { xmlNode *rsc_op = (xmlNode *) iter->data; const char *task = NULL; guint interval_ms = 0; if (++offset < start_index) { // Skip actions that happened before a start continue; } task = crm_element_value(rsc_op, PCMK_XA_OPERATION); crm_element_value_ms(rsc_op, PCMK_META_INTERVAL, &interval_ms); if ((interval_ms > 0) && (pcmk_is_set(rsc->flags, pcmk_rsc_maintenance) || node->details->maintenance)) { // Maintenance mode cancels recurring operations pcmk__schedule_cancel(rsc, crm_element_value(rsc_op, PCMK__XA_CALL_ID), task, interval_ms, node, "maintenance mode"); } else if ((interval_ms > 0) || pcmk__strcase_any_of(task, PCMK_ACTION_MONITOR, PCMK_ACTION_START, PCMK_ACTION_PROMOTE, PCMK_ACTION_MIGRATE_FROM, NULL)) { /* If a resource operation failed, and the operation's definition * has changed, clear any fail count so they can be retried fresh. */ if (pe__bundle_needs_remote_name(rsc)) { /* We haven't assigned resources to nodes yet, so if the * REMOTE_CONTAINER_HACK is used, we may calculate the digest * based on the literal "#uname" value rather than the properly * substituted value. That would mistakenly make the action * definition appear to have been changed. Defer the check until * later in this case. */ pe__add_param_check(rsc_op, rsc, node, pcmk__check_active, rsc->cluster); } else if (pcmk__check_action_config(rsc, node, rsc_op) && (pe_get_failcount(node, rsc, NULL, pcmk__fc_effective, NULL) != 0)) { pe__clear_failcount(rsc, node, "action definition changed", rsc->cluster); } } } g_list_free(sorted_op_list); } /*! * \internal * \brief Process a node's action history from the CIB status * * Given a node's resource history, if the resource's configuration changed * since the actions were done, schedule any actions needed (restart, * reload, unfencing, rescheduling recurring actions, clean-up, etc.). * (This also cancels recurring actions for maintenance mode, which is not * entirely related but convenient to do here.) * * \param[in,out] node Node whose history is being processed * \param[in] lrm_rscs Node's from CIB status XML */ static void process_node_history(pcmk_node_t *node, const xmlNode *lrm_rscs) { crm_trace("Processing node history for %s", pcmk__node_name(node)); for (const xmlNode *rsc_entry = first_named_child(lrm_rscs, XML_LRM_TAG_RESOURCE); rsc_entry != NULL; rsc_entry = crm_next_same_xml(rsc_entry)) { if (rsc_entry->children != NULL) { GList *result = pcmk__rscs_matching_id(ID(rsc_entry), node->details->data_set); for (GList *iter = result; iter != NULL; iter = iter->next) { pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data; if (rsc->variant == pcmk_rsc_variant_primitive) { process_rsc_history(rsc_entry, rsc, node); } } g_list_free(result); } } } // XPath to find a node's resource history #define XPATH_NODE_HISTORY "/" PCMK_XE_CIB "/" PCMK_XE_STATUS \ "/" PCMK__XE_NODE_STATE \ "[@" PCMK_XA_UNAME "='%s']" \ - "/" XML_CIB_TAG_LRM "/" XML_LRM_TAG_RESOURCES + "/" PCMK__XE_LRM "/" XML_LRM_TAG_RESOURCES /*! * \internal * \brief Process any resource configuration changes in the CIB status * * Go through all nodes' resource history, and if a resource's configuration * changed since its actions were done, schedule any actions needed (restart, * reload, unfencing, rescheduling recurring actions, clean-up, etc.). * (This also cancels recurring actions for maintenance mode, which is not * entirely related but convenient to do here.) * * \param[in,out] scheduler Scheduler data */ void pcmk__handle_rsc_config_changes(pcmk_scheduler_t *scheduler) { crm_trace("Check resource and action configuration for changes"); /* Rather than iterate through the status section, iterate through the nodes * and search for the appropriate status subsection for each. This skips * orphaned nodes and lets us eliminate some cases before searching the XML. */ for (GList *iter = scheduler->nodes; iter != NULL; iter = iter->next) { pcmk_node_t *node = (pcmk_node_t *) iter->data; /* Don't bother checking actions for a node that can't run actions ... * unless it's in maintenance mode, in which case we still need to * cancel any existing recurring monitors. */ if (node->details->maintenance || pcmk__node_available(node, false, false)) { char *xpath = NULL; xmlNode *history = NULL; xpath = crm_strdup_printf(XPATH_NODE_HISTORY, node->details->uname); history = get_xpath_object(xpath, scheduler->input, LOG_NEVER); free(xpath); process_node_history(node, history); } } } diff --git a/lib/pacemaker/pcmk_simulate.c b/lib/pacemaker/pcmk_simulate.c index b4f5cd8645..f1ed5c7987 100644 --- a/lib/pacemaker/pcmk_simulate.c +++ b/lib/pacemaker/pcmk_simulate.c @@ -1,1005 +1,1005 @@ /* * Copyright 2021-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include "libpacemaker_private.h" static pcmk__output_t *out = NULL; static cib_t *fake_cib = NULL; static GList *fake_resource_list = NULL; static const GList *fake_op_fail_list = NULL; static void set_effective_date(pcmk_scheduler_t *scheduler, bool print_original, const char *use_date); /*! * \internal * \brief Create an action name for use in a dot graph * * \param[in] action Action to create name for * \param[in] verbose If true, add action ID to name * * \return Newly allocated string with action name * \note It is the caller's responsibility to free the result. */ static char * create_action_name(const pcmk_action_t *action, bool verbose) { char *action_name = NULL; const char *prefix = ""; const char *action_host = NULL; const char *clone_name = NULL; const char *task = action->task; if (action->node != NULL) { action_host = action->node->details->uname; } else if (!pcmk_is_set(action->flags, pcmk_action_pseudo)) { action_host = ""; } if (pcmk__str_eq(action->task, PCMK_ACTION_CANCEL, pcmk__str_none)) { prefix = "Cancel "; task = action->cancel_task; } if (action->rsc != NULL) { clone_name = action->rsc->clone_name; } if (clone_name != NULL) { char *key = NULL; guint interval_ms = 0; if (pcmk__guint_from_hash(action->meta, PCMK_META_INTERVAL, 0, &interval_ms) != pcmk_rc_ok) { interval_ms = 0; } if (pcmk__strcase_any_of(action->task, PCMK_ACTION_NOTIFY, PCMK_ACTION_NOTIFIED, NULL)) { const char *n_type = g_hash_table_lookup(action->meta, "notify_key_type"); const char *n_task = g_hash_table_lookup(action->meta, "notify_key_operation"); CRM_ASSERT(n_type != NULL); CRM_ASSERT(n_task != NULL); key = pcmk__notify_key(clone_name, n_type, n_task); } else { key = pcmk__op_key(clone_name, task, interval_ms); } if (action_host != NULL) { action_name = crm_strdup_printf("%s%s %s", prefix, key, action_host); } else { action_name = crm_strdup_printf("%s%s", prefix, key); } free(key); } else if (pcmk__str_eq(action->task, PCMK_ACTION_STONITH, pcmk__str_none)) { const char *op = g_hash_table_lookup(action->meta, "stonith_action"); action_name = crm_strdup_printf("%s%s '%s' %s", prefix, action->task, op, action_host); } else if (action->rsc && action_host) { action_name = crm_strdup_printf("%s%s %s", prefix, action->uuid, action_host); } else if (action_host) { action_name = crm_strdup_printf("%s%s %s", prefix, action->task, action_host); } else { action_name = crm_strdup_printf("%s", action->uuid); } if (verbose) { char *with_id = crm_strdup_printf("%s (%d)", action_name, action->id); free(action_name); action_name = with_id; } return action_name; } /*! * \internal * \brief Display the status of a cluster * * \param[in,out] scheduler Scheduler data * \param[in] show_opts How to modify display (as pcmk_show_opt_e flags) * \param[in] section_opts Sections to display (as pcmk_section_e flags) * \param[in] title What to use as list title * \param[in] print_spacer Whether to display a spacer first */ static void print_cluster_status(pcmk_scheduler_t *scheduler, uint32_t show_opts, uint32_t section_opts, const char *title, bool print_spacer) { pcmk__output_t *out = scheduler->priv; GList *all = NULL; crm_exit_t stonith_rc = 0; enum pcmk_pacemakerd_state state = pcmk_pacemakerd_state_invalid; section_opts |= pcmk_section_nodes | pcmk_section_resources; show_opts |= pcmk_show_inactive_rscs | pcmk_show_failed_detail; all = g_list_prepend(all, (gpointer) "*"); PCMK__OUTPUT_SPACER_IF(out, print_spacer); out->begin_list(out, NULL, NULL, "%s", title); out->message(out, "cluster-status", scheduler, state, stonith_rc, NULL, false, section_opts, show_opts, NULL, all, all); out->end_list(out); g_list_free(all); } /*! * \internal * \brief Display a summary of all actions scheduled in a transition * * \param[in,out] scheduler Scheduler data (fully scheduled) * \param[in] print_spacer Whether to display a spacer first */ static void print_transition_summary(pcmk_scheduler_t *scheduler, bool print_spacer) { pcmk__output_t *out = scheduler->priv; PCMK__OUTPUT_SPACER_IF(out, print_spacer); out->begin_list(out, NULL, NULL, "Transition Summary"); pcmk__output_actions(scheduler); out->end_list(out); } /*! * \internal * \brief Reset scheduler input, output, date, and flags * * \param[in,out] scheduler Scheduler data * \param[in] input What to set as cluster input * \param[in] out What to set as cluster output object * \param[in] use_date What to set as cluster's current timestamp * \param[in] flags Group of enum pcmk_scheduler_flags to set */ static void reset(pcmk_scheduler_t *scheduler, xmlNodePtr input, pcmk__output_t *out, const char *use_date, unsigned int flags) { scheduler->input = input; scheduler->priv = out; set_effective_date(scheduler, true, use_date); if (pcmk_is_set(flags, pcmk_sim_sanitized)) { pcmk__set_scheduler_flags(scheduler, pcmk_sched_sanitized); } if (pcmk_is_set(flags, pcmk_sim_show_scores)) { pcmk__set_scheduler_flags(scheduler, pcmk_sched_output_scores); } if (pcmk_is_set(flags, pcmk_sim_show_utilization)) { pcmk__set_scheduler_flags(scheduler, pcmk_sched_show_utilization); } } /*! * \brief Write out a file in dot(1) format describing the actions that will * be taken by the scheduler in response to an input CIB file. * * \param[in,out] scheduler Scheduler data * \param[in] dot_file The filename to write * \param[in] all_actions Write all actions, even those that are optional * or are on unmanaged resources * \param[in] verbose Add extra information, such as action IDs, to the * output * * \return Standard Pacemaker return code */ static int write_sim_dotfile(pcmk_scheduler_t *scheduler, const char *dot_file, bool all_actions, bool verbose) { GList *iter = NULL; FILE *dot_strm = fopen(dot_file, "w"); if (dot_strm == NULL) { return errno; } fprintf(dot_strm, " digraph \"g\" {\n"); for (iter = scheduler->actions; iter != NULL; iter = iter->next) { pcmk_action_t *action = (pcmk_action_t *) iter->data; const char *style = "dashed"; const char *font = "black"; const char *color = "black"; char *action_name = create_action_name(action, verbose); if (pcmk_is_set(action->flags, pcmk_action_pseudo)) { font = "orange"; } if (pcmk_is_set(action->flags, pcmk_action_added_to_graph)) { style = "bold"; color = "green"; } else if ((action->rsc != NULL) && !pcmk_is_set(action->rsc->flags, pcmk_rsc_managed)) { color = "red"; font = "purple"; if (!all_actions) { goto do_not_write; } } else if (pcmk_is_set(action->flags, pcmk_action_optional)) { color = "blue"; if (!all_actions) { goto do_not_write; } } else { color = "red"; CRM_LOG_ASSERT(!pcmk_is_set(action->flags, pcmk_action_runnable)); } pcmk__set_action_flags(action, pcmk_action_added_to_graph); fprintf(dot_strm, "\"%s\" [ style=%s color=\"%s\" fontcolor=\"%s\"]\n", action_name, style, color, font); do_not_write: free(action_name); } for (iter = scheduler->actions; iter != NULL; iter = iter->next) { pcmk_action_t *action = (pcmk_action_t *) iter->data; for (GList *before_iter = action->actions_before; before_iter != NULL; before_iter = before_iter->next) { pcmk__related_action_t *before = before_iter->data; char *before_name = NULL; char *after_name = NULL; const char *style = "dashed"; bool optional = true; if (before->state == pe_link_dumped) { optional = false; style = "bold"; } else if ((uint32_t) before->type == pcmk__ar_none) { continue; } else if (pcmk_is_set(before->action->flags, pcmk_action_added_to_graph) && pcmk_is_set(action->flags, pcmk_action_added_to_graph) && (uint32_t) before->type != pcmk__ar_if_on_same_node_or_target) { optional = false; } if (all_actions || !optional) { before_name = create_action_name(before->action, verbose); after_name = create_action_name(action, verbose); fprintf(dot_strm, "\"%s\" -> \"%s\" [ style = %s]\n", before_name, after_name, style); free(before_name); free(after_name); } } } fprintf(dot_strm, "}\n"); fflush(dot_strm); fclose(dot_strm); return pcmk_rc_ok; } /*! * \brief Profile the configuration updates and scheduler actions in a single * CIB file, printing the profiling timings. * * \note \p scheduler->priv must have been set to a valid \p pcmk__output_t * object before this function is called. * * \param[in] xml_file The CIB file to profile * \param[in] repeat Number of times to run * \param[in,out] scheduler Scheduler data * \param[in] use_date The date to set the cluster's time to (may be NULL) */ static void profile_file(const char *xml_file, long long repeat, pcmk_scheduler_t *scheduler, const char *use_date) { pcmk__output_t *out = scheduler->priv; xmlNode *cib_object = NULL; clock_t start = 0; clock_t end; unsigned long long scheduler_flags = pcmk_sched_no_compat; CRM_ASSERT(out != NULL); cib_object = filename2xml(xml_file); start = clock(); if (pcmk_find_cib_element(cib_object, PCMK_XE_STATUS) == NULL) { create_xml_node(cib_object, PCMK_XE_STATUS); } if (cli_config_update(&cib_object, NULL, FALSE) == FALSE) { free_xml(cib_object); return; } if (validate_xml(cib_object, NULL, FALSE) != TRUE) { free_xml(cib_object); return; } if (pcmk_is_set(scheduler->flags, pcmk_sched_output_scores)) { scheduler_flags |= pcmk_sched_output_scores; } if (pcmk_is_set(scheduler->flags, pcmk_sched_show_utilization)) { scheduler_flags |= pcmk_sched_show_utilization; } for (int i = 0; i < repeat; ++i) { xmlNode *input = (repeat == 1)? cib_object : copy_xml(cib_object); scheduler->input = input; set_effective_date(scheduler, false, use_date); pcmk__schedule_actions(input, scheduler_flags, scheduler); pe_reset_working_set(scheduler); } end = clock(); out->message(out, "profile", xml_file, start, end); } void pcmk__profile_dir(const char *dir, long long repeat, pcmk_scheduler_t *scheduler, const char *use_date) { pcmk__output_t *out = scheduler->priv; struct dirent **namelist; int file_num = scandir(dir, &namelist, 0, alphasort); CRM_ASSERT(out != NULL); if (file_num > 0) { struct stat prop; char buffer[FILENAME_MAX]; out->begin_list(out, NULL, NULL, "Timings"); while (file_num--) { if ('.' == namelist[file_num]->d_name[0]) { free(namelist[file_num]); continue; } else if (!pcmk__ends_with_ext(namelist[file_num]->d_name, ".xml")) { free(namelist[file_num]); continue; } snprintf(buffer, sizeof(buffer), "%s/%s", dir, namelist[file_num]->d_name); if (stat(buffer, &prop) == 0 && S_ISREG(prop.st_mode)) { profile_file(buffer, repeat, scheduler, use_date); } free(namelist[file_num]); } free(namelist); out->end_list(out); } } /*! * \brief Set the date of the cluster, either to the value given by * \p use_date, or to the "execution-date" value in the CIB. * * \note \p scheduler->priv must have been set to a valid \p pcmk__output_t * object before this function is called. * * \param[in,out] scheduler Scheduler data * \param[in] print_original If \p true, the "execution-date" should * also be printed * \param[in] use_date The date to set the cluster's time to * (may be NULL) */ static void set_effective_date(pcmk_scheduler_t *scheduler, bool print_original, const char *use_date) { pcmk__output_t *out = scheduler->priv; time_t original_date = 0; CRM_ASSERT(out != NULL); crm_element_value_epoch(scheduler->input, "execution-date", &original_date); if (use_date) { scheduler->now = crm_time_new(use_date); out->info(out, "Setting effective cluster time: %s", use_date); crm_time_log(LOG_NOTICE, "Pretending 'now' is", scheduler->now, crm_time_log_date | crm_time_log_timeofday); } else if (original_date != 0) { scheduler->now = pcmk__copy_timet(original_date); if (print_original) { char *when = crm_time_as_string(scheduler->now, crm_time_log_date|crm_time_log_timeofday); out->info(out, "Using the original execution date of: %s", when); free(when); } } } /*! * \internal * \brief Simulate successfully executing a pseudo-action in a graph * * \param[in,out] graph Graph to update with pseudo-action result * \param[in,out] action Pseudo-action to simulate executing * * \return Standard Pacemaker return code */ static int simulate_pseudo_action(pcmk__graph_t *graph, pcmk__graph_action_t *action) { const char *node = crm_element_value(action->xml, PCMK__META_ON_NODE); const char *task = crm_element_value(action->xml, PCMK__XA_OPERATION_KEY); pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed); out->message(out, "inject-pseudo-action", node, task); pcmk__update_graph(graph, action); return pcmk_rc_ok; } /*! * \internal * \brief Simulate executing a resource action in a graph * * \param[in,out] graph Graph to update with resource action result * \param[in,out] action Resource action to simulate executing * * \return Standard Pacemaker return code */ static int simulate_resource_action(pcmk__graph_t *graph, pcmk__graph_action_t *action) { int rc; lrmd_event_data_t *op = NULL; int target_outcome = PCMK_OCF_OK; const char *rtype = NULL; const char *rclass = NULL; const char *resource = NULL; const char *rprovider = NULL; const char *resource_config_name = NULL; const char *operation = crm_element_value(action->xml, PCMK_XA_OPERATION); const char *target_rc_s = crm_meta_value(action->params, PCMK__META_OP_TARGET_RC); xmlNode *cib_node = NULL; xmlNode *cib_resource = NULL; xmlNode *action_rsc = first_named_child(action->xml, PCMK_XE_PRIMITIVE); char *node = crm_element_value_copy(action->xml, PCMK__META_ON_NODE); char *uuid = NULL; const char *router_node = crm_element_value(action->xml, PCMK__XA_ROUTER_NODE); // Certain actions don't need to be displayed or history entries if (pcmk__str_eq(operation, CRM_OP_REPROBE, pcmk__str_none)) { crm_debug("No history injection for %s op on %s", operation, node); goto done; // Confirm action and update graph } if (action_rsc == NULL) { // Shouldn't be possible crm_log_xml_err(action->xml, "Bad"); free(node); return EPROTO; } /* A resource might be known by different names in the configuration and in * the action (for example, a clone instance). Grab the configuration name * (which is preferred when writing history), and if necessary, the instance * name. */ resource_config_name = crm_element_value(action_rsc, PCMK_XA_ID); if (resource_config_name == NULL) { // Shouldn't be possible crm_log_xml_err(action->xml, "No ID"); free(node); return EPROTO; } resource = resource_config_name; if (pe_find_resource(fake_resource_list, resource) == NULL) { const char *longname = crm_element_value(action_rsc, PCMK__XA_LONG_ID); if ((longname != NULL) && (pe_find_resource(fake_resource_list, longname) != NULL)) { resource = longname; } } // Certain actions need to be displayed but don't need history entries if (pcmk__strcase_any_of(operation, PCMK_ACTION_DELETE, PCMK_ACTION_META_DATA, NULL)) { out->message(out, "inject-rsc-action", resource, operation, node, (guint) 0); goto done; // Confirm action and update graph } rclass = crm_element_value(action_rsc, PCMK_XA_CLASS); rtype = crm_element_value(action_rsc, PCMK_XA_TYPE); rprovider = crm_element_value(action_rsc, PCMK_XA_PROVIDER); pcmk__scan_min_int(target_rc_s, &target_outcome, 0); CRM_ASSERT(fake_cib->cmds->query(fake_cib, NULL, NULL, cib_sync_call|cib_scope_local) == pcmk_ok); // Ensure the action node is in the CIB uuid = crm_element_value_copy(action->xml, PCMK__META_ON_NODE_UUID); cib_node = pcmk__inject_node(fake_cib, node, ((router_node == NULL)? uuid: node)); free(uuid); CRM_ASSERT(cib_node != NULL); // Add a history entry for the action cib_resource = pcmk__inject_resource_history(out, cib_node, resource, resource_config_name, rclass, rtype, rprovider); if (cib_resource == NULL) { crm_err("Could not simulate action %d history for resource %s", action->id, resource); free(node); free_xml(cib_node); return EINVAL; } // Simulate and display an executor event for the action result op = pcmk__event_from_graph_action(cib_resource, action, PCMK_EXEC_DONE, target_outcome, "User-injected result"); out->message(out, "inject-rsc-action", resource, op->op_type, node, op->interval_ms); // Check whether action is in a list of desired simulated failures for (const GList *iter = fake_op_fail_list; iter != NULL; iter = iter->next) { const char *spec = (const char *) iter->data; char *key = NULL; const char *match_name = NULL; // Allow user to specify anonymous clone with or without instance number key = crm_strdup_printf(PCMK__OP_FMT "@%s=", resource, op->op_type, op->interval_ms, node); if (strncasecmp(key, spec, strlen(key)) == 0) { match_name = resource; } free(key); // If not found, try the resource's name in the configuration if ((match_name == NULL) && (strcmp(resource, resource_config_name) != 0)) { key = crm_strdup_printf(PCMK__OP_FMT "@%s=", resource_config_name, op->op_type, op->interval_ms, node); if (strncasecmp(key, spec, strlen(key)) == 0) { match_name = resource_config_name; } free(key); } if (match_name == NULL) { continue; // This failed action entry doesn't match } // ${match_name}_${task}_${interval_in_ms}@${node}=${rc} rc = sscanf(spec, "%*[^=]=%d", (int *) &op->rc); if (rc != 1) { out->err(out, "Invalid failed operation '%s' " "(result code must be integer)", spec); continue; // Keep checking other list entries } out->info(out, "Pretending action %d failed with rc=%d", action->id, op->rc); pcmk__set_graph_action_flags(action, pcmk__graph_action_failed); graph->abort_priority = INFINITY; pcmk__inject_failcount(out, cib_node, match_name, op->op_type, op->interval_ms, op->rc); break; } pcmk__inject_action_result(cib_resource, op, target_outcome); lrmd_free_event(op); rc = fake_cib->cmds->modify(fake_cib, PCMK_XE_STATUS, cib_node, cib_sync_call|cib_scope_local); CRM_ASSERT(rc == pcmk_ok); done: free(node); free_xml(cib_node); pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed); pcmk__update_graph(graph, action); return pcmk_rc_ok; } /*! * \internal * \brief Simulate successfully executing a cluster action * * \param[in,out] graph Graph to update with action result * \param[in,out] action Cluster action to simulate * * \return Standard Pacemaker return code */ static int simulate_cluster_action(pcmk__graph_t *graph, pcmk__graph_action_t *action) { const char *node = crm_element_value(action->xml, PCMK__META_ON_NODE); const char *task = crm_element_value(action->xml, PCMK_XA_OPERATION); xmlNode *rsc = first_named_child(action->xml, PCMK_XE_PRIMITIVE); pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed); out->message(out, "inject-cluster-action", node, task, rsc); pcmk__update_graph(graph, action); return pcmk_rc_ok; } /*! * \internal * \brief Simulate successfully executing a fencing action * * \param[in,out] graph Graph to update with action result * \param[in,out] action Fencing action to simulate * * \return Standard Pacemaker return code */ static int simulate_fencing_action(pcmk__graph_t *graph, pcmk__graph_action_t *action) { const char *op = crm_meta_value(action->params, "stonith_action"); char *target = crm_element_value_copy(action->xml, PCMK__META_ON_NODE); out->message(out, "inject-fencing-action", target, op); if (!pcmk__str_eq(op, PCMK_ACTION_ON, pcmk__str_casei)) { int rc = pcmk_ok; GString *xpath = g_string_sized_new(512); // Set node state to offline xmlNode *cib_node = pcmk__inject_node_state_change(fake_cib, target, false); CRM_ASSERT(cib_node != NULL); crm_xml_add(cib_node, PCMK_XA_CRM_DEBUG_ORIGIN, __func__); rc = fake_cib->cmds->replace(fake_cib, PCMK_XE_STATUS, cib_node, cib_sync_call|cib_scope_local); CRM_ASSERT(rc == pcmk_ok); // Simulate controller clearing node's resource history and attributes pcmk__g_strcat(xpath, "//" PCMK__XE_NODE_STATE - "[@" PCMK_XA_UNAME "='", target, "']/" XML_CIB_TAG_LRM, + "[@" PCMK_XA_UNAME "='", target, "']/" PCMK__XE_LRM, NULL); fake_cib->cmds->remove(fake_cib, (const char *) xpath->str, NULL, cib_xpath|cib_sync_call|cib_scope_local); g_string_truncate(xpath, 0); pcmk__g_strcat(xpath, "//" PCMK__XE_NODE_STATE "[@" PCMK_XA_UNAME "='", target, "']" "/" XML_TAG_TRANSIENT_NODEATTRS, NULL); fake_cib->cmds->remove(fake_cib, (const char *) xpath->str, NULL, cib_xpath|cib_sync_call|cib_scope_local); free_xml(cib_node); g_string_free(xpath, TRUE); } pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed); pcmk__update_graph(graph, action); free(target); return pcmk_rc_ok; } enum pcmk__graph_status pcmk__simulate_transition(pcmk_scheduler_t *scheduler, cib_t *cib, const GList *op_fail_list) { pcmk__graph_t *transition = NULL; enum pcmk__graph_status graph_rc; pcmk__graph_functions_t simulation_fns = { simulate_pseudo_action, simulate_resource_action, simulate_cluster_action, simulate_fencing_action, }; out = scheduler->priv; fake_cib = cib; fake_op_fail_list = op_fail_list; if (!out->is_quiet(out)) { out->begin_list(out, NULL, NULL, "Executing Cluster Transition"); } pcmk__set_graph_functions(&simulation_fns); transition = pcmk__unpack_graph(scheduler->graph, crm_system_name); pcmk__log_graph(LOG_DEBUG, transition); fake_resource_list = scheduler->resources; do { graph_rc = pcmk__execute_graph(transition); } while (graph_rc == pcmk__graph_active); fake_resource_list = NULL; if (graph_rc != pcmk__graph_complete) { out->err(out, "Transition failed: %s", pcmk__graph_status2text(graph_rc)); pcmk__log_graph(LOG_ERR, transition); out->err(out, "An invalid transition was produced"); } pcmk__free_graph(transition); if (!out->is_quiet(out)) { // If not quiet, we'll need the resulting CIB for later display xmlNode *cib_object = NULL; int rc = fake_cib->cmds->query(fake_cib, NULL, &cib_object, cib_sync_call|cib_scope_local); CRM_ASSERT(rc == pcmk_ok); pe_reset_working_set(scheduler); scheduler->input = cib_object; out->end_list(out); } return graph_rc; } int pcmk__simulate(pcmk_scheduler_t *scheduler, pcmk__output_t *out, const pcmk_injections_t *injections, unsigned int flags, uint32_t section_opts, const char *use_date, const char *input_file, const char *graph_file, const char *dot_file) { int printed = pcmk_rc_no_output; int rc = pcmk_rc_ok; xmlNodePtr input = NULL; cib_t *cib = NULL; rc = cib__signon_query(out, &cib, &input); if (rc != pcmk_rc_ok) { goto simulate_done; } reset(scheduler, input, out, use_date, flags); cluster_status(scheduler); if ((cib->variant == cib_native) && pcmk_is_set(section_opts, pcmk_section_times)) { if (pcmk__our_nodename == NULL) { // Currently used only in the times section pcmk__query_node_name(out, 0, &pcmk__our_nodename, 0); } scheduler->localhost = pcmk__our_nodename; } if (!out->is_quiet(out)) { const bool show_pending = pcmk_is_set(flags, pcmk_sim_show_pending); if (pcmk_is_set(scheduler->flags, pcmk_sched_in_maintenance)) { printed = out->message(out, "maint-mode", scheduler->flags); } if (scheduler->disabled_resources || scheduler->blocked_resources) { PCMK__OUTPUT_SPACER_IF(out, printed == pcmk_rc_ok); printed = out->info(out, "%d of %d resource instances DISABLED and " "%d BLOCKED from further action due to failure", scheduler->disabled_resources, scheduler->ninstances, scheduler->blocked_resources); } /* Most formatted output headers use caps for each word, but this one * only has the first word capitalized for compatibility with pcs. */ print_cluster_status(scheduler, (show_pending? pcmk_show_pending : 0), section_opts, "Current cluster status", (printed == pcmk_rc_ok)); printed = pcmk_rc_ok; } // If the user requested any injections, handle them if ((injections->node_down != NULL) || (injections->node_fail != NULL) || (injections->node_up != NULL) || (injections->op_inject != NULL) || (injections->ticket_activate != NULL) || (injections->ticket_grant != NULL) || (injections->ticket_revoke != NULL) || (injections->ticket_standby != NULL) || (injections->watchdog != NULL)) { PCMK__OUTPUT_SPACER_IF(out, printed == pcmk_rc_ok); pcmk__inject_scheduler_input(scheduler, cib, injections); printed = pcmk_rc_ok; rc = cib->cmds->query(cib, NULL, &input, cib_sync_call); if (rc != pcmk_rc_ok) { rc = pcmk_legacy2rc(rc); goto simulate_done; } cleanup_calculations(scheduler); reset(scheduler, input, out, use_date, flags); cluster_status(scheduler); } if (input_file != NULL) { rc = write_xml_file(input, input_file, FALSE); if (rc < 0) { rc = pcmk_legacy2rc(rc); goto simulate_done; } } if (pcmk_any_flags_set(flags, pcmk_sim_process | pcmk_sim_simulate)) { pcmk__output_t *logger_out = NULL; unsigned long long scheduler_flags = pcmk_sched_no_compat; if (pcmk_is_set(scheduler->flags, pcmk_sched_output_scores)) { scheduler_flags |= pcmk_sched_output_scores; } if (pcmk_is_set(scheduler->flags, pcmk_sched_show_utilization)) { scheduler_flags |= pcmk_sched_show_utilization; } if (pcmk_all_flags_set(scheduler->flags, pcmk_sched_output_scores |pcmk_sched_show_utilization)) { PCMK__OUTPUT_SPACER_IF(out, printed == pcmk_rc_ok); out->begin_list(out, NULL, NULL, "Assignment Scores and Utilization Information"); printed = pcmk_rc_ok; } else if (pcmk_is_set(scheduler->flags, pcmk_sched_output_scores)) { PCMK__OUTPUT_SPACER_IF(out, printed == pcmk_rc_ok); out->begin_list(out, NULL, NULL, "Assignment Scores"); printed = pcmk_rc_ok; } else if (pcmk_is_set(scheduler->flags, pcmk_sched_show_utilization)) { PCMK__OUTPUT_SPACER_IF(out, printed == pcmk_rc_ok); out->begin_list(out, NULL, NULL, "Utilization Information"); printed = pcmk_rc_ok; } else { rc = pcmk__log_output_new(&logger_out); if (rc != pcmk_rc_ok) { goto simulate_done; } pe__register_messages(logger_out); pcmk__register_lib_messages(logger_out); scheduler->priv = logger_out; } pcmk__schedule_actions(input, scheduler_flags, scheduler); if (logger_out == NULL) { out->end_list(out); } else { logger_out->finish(logger_out, CRM_EX_OK, true, NULL); pcmk__output_free(logger_out); scheduler->priv = out; } input = NULL; /* Don't try and free it twice */ if (graph_file != NULL) { rc = write_xml_file(scheduler->graph, graph_file, FALSE); if (rc < 0) { rc = pcmk_rc_graph_error; goto simulate_done; } } if (dot_file != NULL) { rc = write_sim_dotfile(scheduler, dot_file, pcmk_is_set(flags, pcmk_sim_all_actions), pcmk_is_set(flags, pcmk_sim_verbose)); if (rc != pcmk_rc_ok) { rc = pcmk_rc_dot_error; goto simulate_done; } } if (!out->is_quiet(out)) { print_transition_summary(scheduler, printed == pcmk_rc_ok); } } rc = pcmk_rc_ok; if (!pcmk_is_set(flags, pcmk_sim_simulate)) { goto simulate_done; } PCMK__OUTPUT_SPACER_IF(out, printed == pcmk_rc_ok); if (pcmk__simulate_transition(scheduler, cib, injections->op_fail) != pcmk__graph_complete) { rc = pcmk_rc_invalid_transition; } if (out->is_quiet(out)) { goto simulate_done; } set_effective_date(scheduler, true, use_date); if (pcmk_is_set(flags, pcmk_sim_show_scores)) { pcmk__set_scheduler_flags(scheduler, pcmk_sched_output_scores); } if (pcmk_is_set(flags, pcmk_sim_show_utilization)) { pcmk__set_scheduler_flags(scheduler, pcmk_sched_show_utilization); } cluster_status(scheduler); print_cluster_status(scheduler, 0, section_opts, "Revised Cluster Status", true); simulate_done: cib__clean_up_connection(&cib); return rc; } int pcmk_simulate(xmlNodePtr *xml, pcmk_scheduler_t *scheduler, const pcmk_injections_t *injections, unsigned int flags, unsigned int section_opts, const char *use_date, const char *input_file, const char *graph_file, const char *dot_file) { pcmk__output_t *out = NULL; int rc = pcmk_rc_ok; rc = pcmk__xml_output_new(&out, xml); if (rc != pcmk_rc_ok) { return rc; } pe__register_messages(out); pcmk__register_lib_messages(out); rc = pcmk__simulate(scheduler, out, injections, flags, section_opts, use_date, input_file, graph_file, dot_file); pcmk__xml_output_finish(out, pcmk_rc2exitc(rc), xml); return rc; } diff --git a/lib/pengine/pe_output.c b/lib/pengine/pe_output.c index 4afefb9ce5..622419cb69 100644 --- a/lib/pengine/pe_output.c +++ b/lib/pengine/pe_output.c @@ -1,3210 +1,3210 @@ /* * Copyright 2019-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include const char * pe__resource_description(const pcmk_resource_t *rsc, uint32_t show_opts) { const char * desc = NULL; // User-supplied description if (pcmk_any_flags_set(show_opts, pcmk_show_rsc_only|pcmk_show_description)) { desc = crm_element_value(rsc->xml, PCMK_XA_DESCRIPTION); } return desc; } /* Never display node attributes whose name starts with one of these prefixes */ #define FILTER_STR { PCMK__FAIL_COUNT_PREFIX, PCMK__LAST_FAILURE_PREFIX, \ "shutdown", PCMK_NODE_ATTR_TERMINATE, "standby", "#", \ NULL } static int compare_attribute(gconstpointer a, gconstpointer b) { int rc; rc = strcmp((const char *)a, (const char *)b); return rc; } /*! * \internal * \brief Determine whether extended information about an attribute should be added. * * \param[in] node Node that ran this resource * \param[in,out] rsc_list List of resources for this node * \param[in,out] scheduler Scheduler data * \param[in] attrname Attribute to find * \param[out] expected_score Expected value for this attribute * * \return true if extended information should be printed, false otherwise * \note Currently, extended information is only supported for ping/pingd * resources, for which a message will be printed if connectivity is lost * or degraded. */ static bool add_extra_info(const pcmk_node_t *node, GList *rsc_list, pcmk_scheduler_t *scheduler, const char *attrname, int *expected_score) { GList *gIter = NULL; for (gIter = rsc_list; gIter != NULL; gIter = gIter->next) { pcmk_resource_t *rsc = (pcmk_resource_t *) gIter->data; const char *type = g_hash_table_lookup(rsc->meta, PCMK_XA_TYPE); const char *name = NULL; GHashTable *params = NULL; if (rsc->children != NULL) { if (add_extra_info(node, rsc->children, scheduler, attrname, expected_score)) { return true; } } if (!pcmk__strcase_any_of(type, "ping", "pingd", NULL)) { continue; } params = pe_rsc_params(rsc, node, scheduler); name = g_hash_table_lookup(params, PCMK_XA_NAME); if (name == NULL) { name = "pingd"; } /* To identify the resource with the attribute name. */ if (pcmk__str_eq(name, attrname, pcmk__str_casei)) { int host_list_num = 0; const char *hosts = g_hash_table_lookup(params, "host_list"); const char *multiplier = g_hash_table_lookup(params, "multiplier"); int multiplier_i; if (hosts) { char **host_list = g_strsplit(hosts, " ", 0); host_list_num = g_strv_length(host_list); g_strfreev(host_list); } if ((multiplier == NULL) || (pcmk__scan_min_int(multiplier, &multiplier_i, INT_MIN) != pcmk_rc_ok)) { /* The ocf:pacemaker:ping resource agent defaults multiplier to * 1. The agent currently does not handle invalid text, but it * should, and this would be a reasonable choice ... */ multiplier_i = 1; } *expected_score = host_list_num * multiplier_i; return true; } } return false; } static GList * filter_attr_list(GList *attr_list, char *name) { int i; const char *filt_str[] = FILTER_STR; CRM_CHECK(name != NULL, return attr_list); /* filtering automatic attributes */ for (i = 0; filt_str[i] != NULL; i++) { if (g_str_has_prefix(name, filt_str[i])) { return attr_list; } } return g_list_insert_sorted(attr_list, name, compare_attribute); } static GList * get_operation_list(xmlNode *rsc_entry) { GList *op_list = NULL; xmlNode *rsc_op = NULL; for (rsc_op = pcmk__xe_first_child(rsc_entry); rsc_op != NULL; rsc_op = pcmk__xe_next(rsc_op)) { const char *task = crm_element_value(rsc_op, PCMK_XA_OPERATION); const char *interval_ms_s = crm_element_value(rsc_op, PCMK_META_INTERVAL); const char *op_rc = crm_element_value(rsc_op, PCMK__XA_RC_CODE); int op_rc_i; pcmk__scan_min_int(op_rc, &op_rc_i, 0); /* Display 0-interval monitors as "probe" */ if (pcmk__str_eq(task, PCMK_ACTION_MONITOR, pcmk__str_casei) && pcmk__str_eq(interval_ms_s, "0", pcmk__str_null_matches | pcmk__str_casei)) { task = "probe"; } /* Ignore notifies and some probes */ if (pcmk__str_eq(task, PCMK_ACTION_NOTIFY, pcmk__str_none) || (pcmk__str_eq(task, "probe", pcmk__str_none) && (op_rc_i == CRM_EX_NOT_RUNNING))) { continue; } if (pcmk__str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, pcmk__str_none)) { op_list = g_list_append(op_list, rsc_op); } } op_list = g_list_sort(op_list, sort_op_by_callid); return op_list; } static void add_dump_node(gpointer key, gpointer value, gpointer user_data) { xmlNodePtr node = user_data; pcmk_create_xml_text_node(node, (const char *) key, (const char *) value); } static void append_dump_text(gpointer key, gpointer value, gpointer user_data) { char **dump_text = user_data; char *new_text = crm_strdup_printf("%s %s=%s", *dump_text, (char *)key, (char *)value); free(*dump_text); *dump_text = new_text; } #define XPATH_STACK "//" PCMK_XE_NVPAIR \ "[@" PCMK_XA_NAME "='" \ PCMK_OPT_CLUSTER_INFRASTRUCTURE "']" static const char * get_cluster_stack(pcmk_scheduler_t *scheduler) { xmlNode *stack = get_xpath_object(XPATH_STACK, scheduler->input, LOG_DEBUG); return (stack != NULL)? crm_element_value(stack, PCMK_XA_VALUE) : "unknown"; } static char * last_changed_string(const char *last_written, const char *user, const char *client, const char *origin) { if (last_written != NULL || user != NULL || client != NULL || origin != NULL) { return crm_strdup_printf("%s%s%s%s%s%s%s", last_written ? last_written : "", user ? " by " : "", user ? user : "", client ? " via " : "", client ? client : "", origin ? " on " : "", origin ? origin : ""); } else { return strdup(""); } } static char * op_history_string(xmlNode *xml_op, const char *task, const char *interval_ms_s, int rc, bool print_timing) { const char *call = crm_element_value(xml_op, PCMK__XA_CALL_ID); char *interval_str = NULL; char *buf = NULL; if (interval_ms_s && !pcmk__str_eq(interval_ms_s, "0", pcmk__str_casei)) { char *pair = pcmk__format_nvpair("interval", interval_ms_s, "ms"); interval_str = crm_strdup_printf(" %s", pair); free(pair); } if (print_timing) { char *last_change_str = NULL; char *exec_str = NULL; char *queue_str = NULL; const char *value = NULL; time_t epoch = 0; if ((crm_element_value_epoch(xml_op, PCMK_XA_LAST_RC_CHANGE, &epoch) == pcmk_ok) && (epoch > 0)) { char *epoch_str = pcmk__epoch2str(&epoch, 0); last_change_str = crm_strdup_printf(" %s=\"%s\"", PCMK_XA_LAST_RC_CHANGE, pcmk__s(epoch_str, "")); free(epoch_str); } value = crm_element_value(xml_op, PCMK_XA_EXEC_TIME); if (value) { char *pair = pcmk__format_nvpair(PCMK_XA_EXEC_TIME, value, "ms"); exec_str = crm_strdup_printf(" %s", pair); free(pair); } value = crm_element_value(xml_op, PCMK_XA_QUEUE_TIME); if (value) { char *pair = pcmk__format_nvpair(PCMK_XA_QUEUE_TIME, value, "ms"); queue_str = crm_strdup_printf(" %s", pair); free(pair); } buf = crm_strdup_printf("(%s) %s:%s%s%s%s rc=%d (%s)", call, task, interval_str ? interval_str : "", last_change_str ? last_change_str : "", exec_str ? exec_str : "", queue_str ? queue_str : "", rc, services_ocf_exitcode_str(rc)); if (last_change_str) { free(last_change_str); } if (exec_str) { free(exec_str); } if (queue_str) { free(queue_str); } } else { buf = crm_strdup_printf("(%s) %s%s%s", call, task, interval_str ? ":" : "", interval_str ? interval_str : ""); } if (interval_str) { free(interval_str); } return buf; } static char * resource_history_string(pcmk_resource_t *rsc, const char *rsc_id, bool all, int failcount, time_t last_failure) { char *buf = NULL; if (rsc == NULL) { buf = crm_strdup_printf("%s: orphan", rsc_id); } else if (all || failcount || last_failure > 0) { char *failcount_s = NULL; char *lastfail_s = NULL; if (failcount > 0) { failcount_s = crm_strdup_printf(" %s=%d", PCMK__FAIL_COUNT_PREFIX, failcount); } else { failcount_s = strdup(""); } if (last_failure > 0) { buf = pcmk__epoch2str(&last_failure, 0); lastfail_s = crm_strdup_printf(" %s='%s'", PCMK__LAST_FAILURE_PREFIX, buf); free(buf); } buf = crm_strdup_printf("%s: " PCMK_META_MIGRATION_THRESHOLD "=%d%s%s", rsc_id, rsc->migration_threshold, failcount_s, lastfail_s? lastfail_s : ""); free(failcount_s); free(lastfail_s); } else { buf = crm_strdup_printf("%s:", rsc_id); } return buf; } /*! * \internal * \brief Get a node's feature set for status display purposes * * \param[in] node Node to check * * \return String representation of feature set if the node is fully up (using * "<3.15.1" for older nodes that don't set the #feature-set attribute), * otherwise NULL */ static const char * get_node_feature_set(const pcmk_node_t *node) { if (node->details->online && node->details->expected_up && !pe__is_guest_or_remote_node(node)) { const char *feature_set = g_hash_table_lookup(node->details->attrs, CRM_ATTR_FEATURE_SET); /* The feature set attribute is present since 3.15.1. If it is missing, * then the node must be running an earlier version. */ return pcmk__s(feature_set, "<3.15.1"); } return NULL; } static bool is_mixed_version(pcmk_scheduler_t *scheduler) { const char *feature_set = NULL; for (GList *gIter = scheduler->nodes; gIter != NULL; gIter = gIter->next) { pcmk_node_t *node = gIter->data; const char *node_feature_set = get_node_feature_set(node); if (node_feature_set != NULL) { if (feature_set == NULL) { feature_set = node_feature_set; } else if (strcmp(feature_set, node_feature_set) != 0) { return true; } } } return false; } static char * formatted_xml_buf(const pcmk_resource_t *rsc, bool raw) { if (raw) { return dump_xml_formatted(rsc->orig_xml ? rsc->orig_xml : rsc->xml); } else { return dump_xml_formatted(rsc->xml); } } #define XPATH_DC_VERSION "//" PCMK_XE_NVPAIR \ "[@" PCMK_XA_NAME "='" PCMK_OPT_DC_VERSION "']" PCMK__OUTPUT_ARGS("cluster-summary", "pcmk_scheduler_t *", "enum pcmk_pacemakerd_state", "uint32_t", "uint32_t") static int cluster_summary(pcmk__output_t *out, va_list args) { pcmk_scheduler_t *scheduler = va_arg(args, pcmk_scheduler_t *); enum pcmk_pacemakerd_state pcmkd_state = (enum pcmk_pacemakerd_state) va_arg(args, int); uint32_t section_opts = va_arg(args, uint32_t); uint32_t show_opts = va_arg(args, uint32_t); int rc = pcmk_rc_no_output; const char *stack_s = get_cluster_stack(scheduler); if (pcmk_is_set(section_opts, pcmk_section_stack)) { PCMK__OUTPUT_LIST_HEADER(out, false, rc, "Cluster Summary"); out->message(out, "cluster-stack", stack_s, pcmkd_state); } if (pcmk_is_set(section_opts, pcmk_section_dc)) { xmlNode *dc_version = get_xpath_object(XPATH_DC_VERSION, scheduler->input, LOG_DEBUG); const char *dc_version_s = dc_version? crm_element_value(dc_version, PCMK_XA_VALUE) : NULL; const char *quorum = crm_element_value(scheduler->input, PCMK_XA_HAVE_QUORUM); char *dc_name = scheduler->dc_node? pe__node_display_name(scheduler->dc_node, pcmk_is_set(show_opts, pcmk_show_node_id)) : NULL; bool mixed_version = is_mixed_version(scheduler); PCMK__OUTPUT_LIST_HEADER(out, false, rc, "Cluster Summary"); out->message(out, "cluster-dc", scheduler->dc_node, quorum, dc_version_s, dc_name, mixed_version); free(dc_name); } if (pcmk_is_set(section_opts, pcmk_section_times)) { const char *last_written = crm_element_value(scheduler->input, PCMK_XA_CIB_LAST_WRITTEN); const char *user = crm_element_value(scheduler->input, PCMK_XA_UPDATE_USER); const char *client = crm_element_value(scheduler->input, PCMK_XA_UPDATE_CLIENT); const char *origin = crm_element_value(scheduler->input, PCMK_XA_UPDATE_ORIGIN); PCMK__OUTPUT_LIST_HEADER(out, false, rc, "Cluster Summary"); out->message(out, "cluster-times", scheduler->localhost, last_written, user, client, origin); } if (pcmk_is_set(section_opts, pcmk_section_counts)) { PCMK__OUTPUT_LIST_HEADER(out, false, rc, "Cluster Summary"); out->message(out, "cluster-counts", g_list_length(scheduler->nodes), scheduler->ninstances, scheduler->disabled_resources, scheduler->blocked_resources); } if (pcmk_is_set(section_opts, pcmk_section_options)) { PCMK__OUTPUT_LIST_HEADER(out, false, rc, "Cluster Summary"); out->message(out, "cluster-options", scheduler); } PCMK__OUTPUT_LIST_FOOTER(out, rc); if (pcmk_is_set(section_opts, pcmk_section_maint_mode)) { if (out->message(out, "maint-mode", scheduler->flags) == pcmk_rc_ok) { rc = pcmk_rc_ok; } } return rc; } PCMK__OUTPUT_ARGS("cluster-summary", "pcmk_scheduler_t *", "enum pcmk_pacemakerd_state", "uint32_t", "uint32_t") static int cluster_summary_html(pcmk__output_t *out, va_list args) { pcmk_scheduler_t *scheduler = va_arg(args, pcmk_scheduler_t *); enum pcmk_pacemakerd_state pcmkd_state = (enum pcmk_pacemakerd_state) va_arg(args, int); uint32_t section_opts = va_arg(args, uint32_t); uint32_t show_opts = va_arg(args, uint32_t); int rc = pcmk_rc_no_output; const char *stack_s = get_cluster_stack(scheduler); if (pcmk_is_set(section_opts, pcmk_section_stack)) { PCMK__OUTPUT_LIST_HEADER(out, false, rc, "Cluster Summary"); out->message(out, "cluster-stack", stack_s, pcmkd_state); } /* Always print DC if none, even if not requested */ if ((scheduler->dc_node == NULL) || pcmk_is_set(section_opts, pcmk_section_dc)) { xmlNode *dc_version = get_xpath_object(XPATH_DC_VERSION, scheduler->input, LOG_DEBUG); const char *dc_version_s = dc_version? crm_element_value(dc_version, PCMK_XA_VALUE) : NULL; const char *quorum = crm_element_value(scheduler->input, PCMK_XA_HAVE_QUORUM); char *dc_name = scheduler->dc_node? pe__node_display_name(scheduler->dc_node, pcmk_is_set(show_opts, pcmk_show_node_id)) : NULL; bool mixed_version = is_mixed_version(scheduler); PCMK__OUTPUT_LIST_HEADER(out, false, rc, "Cluster Summary"); out->message(out, "cluster-dc", scheduler->dc_node, quorum, dc_version_s, dc_name, mixed_version); free(dc_name); } if (pcmk_is_set(section_opts, pcmk_section_times)) { const char *last_written = crm_element_value(scheduler->input, PCMK_XA_CIB_LAST_WRITTEN); const char *user = crm_element_value(scheduler->input, PCMK_XA_UPDATE_USER); const char *client = crm_element_value(scheduler->input, PCMK_XA_UPDATE_CLIENT); const char *origin = crm_element_value(scheduler->input, PCMK_XA_UPDATE_ORIGIN); PCMK__OUTPUT_LIST_HEADER(out, false, rc, "Cluster Summary"); out->message(out, "cluster-times", scheduler->localhost, last_written, user, client, origin); } if (pcmk_is_set(section_opts, pcmk_section_counts)) { PCMK__OUTPUT_LIST_HEADER(out, false, rc, "Cluster Summary"); out->message(out, "cluster-counts", g_list_length(scheduler->nodes), scheduler->ninstances, scheduler->disabled_resources, scheduler->blocked_resources); } if (pcmk_is_set(section_opts, pcmk_section_options)) { /* Kind of a hack - close the list we may have opened earlier in this * function so we can put all the options into their own list. We * only want to do this on HTML output, though. */ PCMK__OUTPUT_LIST_FOOTER(out, rc); out->begin_list(out, NULL, NULL, "Config Options"); out->message(out, "cluster-options", scheduler); } PCMK__OUTPUT_LIST_FOOTER(out, rc); if (pcmk_is_set(section_opts, pcmk_section_maint_mode)) { if (out->message(out, "maint-mode", scheduler->flags) == pcmk_rc_ok) { rc = pcmk_rc_ok; } } return rc; } char * pe__node_display_name(pcmk_node_t *node, bool print_detail) { char *node_name; const char *node_host = NULL; const char *node_id = NULL; int name_len; CRM_ASSERT((node != NULL) && (node->details != NULL) && (node->details->uname != NULL)); /* Host is displayed only if this is a guest node and detail is requested */ if (print_detail && pe__is_guest_node(node)) { const pcmk_resource_t *container = node->details->remote_rsc->container; const pcmk_node_t *host_node = pcmk__current_node(container); if (host_node && host_node->details) { node_host = host_node->details->uname; } if (node_host == NULL) { node_host = ""; /* so we at least get "uname@" to indicate guest */ } } /* Node ID is displayed if different from uname and detail is requested */ if (print_detail && !pcmk__str_eq(node->details->uname, node->details->id, pcmk__str_casei)) { node_id = node->details->id; } /* Determine name length */ name_len = strlen(node->details->uname) + 1; if (node_host) { name_len += strlen(node_host) + 1; /* "@node_host" */ } if (node_id) { name_len += strlen(node_id) + 3; /* + " (node_id)" */ } /* Allocate and populate display name */ node_name = malloc(name_len); CRM_ASSERT(node_name != NULL); strcpy(node_name, node->details->uname); if (node_host) { strcat(node_name, "@"); strcat(node_name, node_host); } if (node_id) { strcat(node_name, " ("); strcat(node_name, node_id); strcat(node_name, ")"); } return node_name; } int pe__name_and_nvpairs_xml(pcmk__output_t *out, bool is_list, const char *tag_name , size_t pairs_count, ...) { xmlNodePtr xml_node = NULL; va_list args; CRM_ASSERT(tag_name != NULL); xml_node = pcmk__output_xml_peek_parent(out); CRM_ASSERT(xml_node != NULL); xml_node = create_xml_node(xml_node, tag_name); va_start(args, pairs_count); while(pairs_count--) { const char *param_name = va_arg(args, const char *); const char *param_value = va_arg(args, const char *); if (param_name && param_value) { crm_xml_add(xml_node, param_name, param_value); } }; va_end(args); if (is_list) { pcmk__output_xml_push_parent(out, xml_node); } return pcmk_rc_ok; } static const char * role_desc(enum rsc_role_e role) { if (role == pcmk_role_promoted) { #ifdef PCMK__COMPAT_2_0 return "as " PCMK__ROLE_PROMOTED_LEGACY " "; #else return "in " PCMK__ROLE_PROMOTED " role "; #endif } return ""; } PCMK__OUTPUT_ARGS("ban", "pcmk_node_t *", "pcmk__location_t *", "uint32_t") static int ban_html(pcmk__output_t *out, va_list args) { pcmk_node_t *pe_node = va_arg(args, pcmk_node_t *); pcmk__location_t *location = va_arg(args, pcmk__location_t *); uint32_t show_opts = va_arg(args, uint32_t); char *node_name = pe__node_display_name(pe_node, pcmk_is_set(show_opts, pcmk_show_node_id)); char *buf = crm_strdup_printf("%s\tprevents %s from running %son %s", location->id, location->rsc->id, role_desc(location->role_filter), node_name); pcmk__output_create_html_node(out, "li", NULL, NULL, buf); free(node_name); free(buf); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("ban", "pcmk_node_t *", "pcmk__location_t *", "uint32_t") static int ban_text(pcmk__output_t *out, va_list args) { pcmk_node_t *pe_node = va_arg(args, pcmk_node_t *); pcmk__location_t *location = va_arg(args, pcmk__location_t *); uint32_t show_opts = va_arg(args, uint32_t); char *node_name = pe__node_display_name(pe_node, pcmk_is_set(show_opts, pcmk_show_node_id)); out->list_item(out, NULL, "%s\tprevents %s from running %son %s", location->id, location->rsc->id, role_desc(location->role_filter), node_name); free(node_name); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("ban", "pcmk_node_t *", "pcmk__location_t *", "uint32_t") static int ban_xml(pcmk__output_t *out, va_list args) { pcmk_node_t *pe_node = va_arg(args, pcmk_node_t *); pcmk__location_t *location = va_arg(args, pcmk__location_t *); uint32_t show_opts G_GNUC_UNUSED = va_arg(args, uint32_t); const char *promoted_only = pcmk__btoa(location->role_filter == pcmk_role_promoted); char *weight_s = pcmk__itoa(pe_node->weight); pcmk__output_create_xml_node(out, "ban", PCMK_XA_ID, location->id, "resource", location->rsc->id, PCMK_XA_NODE, pe_node->details->uname, "weight", weight_s, "promoted-only", promoted_only, /* This is a deprecated alias for * promoted_only. Removing it will break * backward compatibility of the API schema, * which will require an API schema major * version bump. */ "master_only", promoted_only, NULL); free(weight_s); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("ban-list", "pcmk_scheduler_t *", "const char *", "GList *", "uint32_t", "bool") static int ban_list(pcmk__output_t *out, va_list args) { pcmk_scheduler_t *scheduler = va_arg(args, pcmk_scheduler_t *); const char *prefix = va_arg(args, const char *); GList *only_rsc = va_arg(args, GList *); uint32_t show_opts = va_arg(args, uint32_t); bool print_spacer = va_arg(args, int); GList *gIter, *gIter2; int rc = pcmk_rc_no_output; /* Print each ban */ for (gIter = scheduler->placement_constraints; gIter != NULL; gIter = gIter->next) { pcmk__location_t *location = gIter->data; const pcmk_resource_t *rsc = location->rsc; if (prefix != NULL && !g_str_has_prefix(location->id, prefix)) { continue; } if (!pcmk__str_in_list(rsc_printable_id(rsc), only_rsc, pcmk__str_star_matches) && !pcmk__str_in_list(rsc_printable_id(pe__const_top_resource(rsc, false)), only_rsc, pcmk__str_star_matches)) { continue; } for (gIter2 = location->nodes; gIter2 != NULL; gIter2 = gIter2->next) { pcmk_node_t *node = (pcmk_node_t *) gIter2->data; if (node->weight < 0) { PCMK__OUTPUT_LIST_HEADER(out, print_spacer, rc, "Negative Location Constraints"); out->message(out, "ban", node, location, show_opts); } } } PCMK__OUTPUT_LIST_FOOTER(out, rc); return rc; } PCMK__OUTPUT_ARGS("cluster-counts", "unsigned int", "int", "int", "int") static int cluster_counts_html(pcmk__output_t *out, va_list args) { unsigned int nnodes = va_arg(args, unsigned int); int nresources = va_arg(args, int); int ndisabled = va_arg(args, int); int nblocked = va_arg(args, int); xmlNodePtr nodes_node = pcmk__output_create_xml_node(out, "li", NULL); xmlNodePtr resources_node = pcmk__output_create_xml_node(out, "li", NULL); char *nnodes_str = crm_strdup_printf("%d node%s configured", nnodes, pcmk__plural_s(nnodes)); pcmk_create_html_node(nodes_node, "span", NULL, NULL, nnodes_str); free(nnodes_str); if (ndisabled && nblocked) { char *s = crm_strdup_printf("%d resource instance%s configured (%d ", nresources, pcmk__plural_s(nresources), ndisabled); pcmk_create_html_node(resources_node, "span", NULL, NULL, s); free(s); pcmk_create_html_node(resources_node, "span", NULL, "bold", "DISABLED"); s = crm_strdup_printf(", %d ", nblocked); pcmk_create_html_node(resources_node, "span", NULL, NULL, s); free(s); pcmk_create_html_node(resources_node, "span", NULL, "bold", "BLOCKED"); pcmk_create_html_node(resources_node, "span", NULL, NULL, " from further action due to failure)"); } else if (ndisabled && !nblocked) { char *s = crm_strdup_printf("%d resource instance%s configured (%d ", nresources, pcmk__plural_s(nresources), ndisabled); pcmk_create_html_node(resources_node, "span", NULL, NULL, s); free(s); pcmk_create_html_node(resources_node, "span", NULL, "bold", "DISABLED"); pcmk_create_html_node(resources_node, "span", NULL, NULL, ")"); } else if (!ndisabled && nblocked) { char *s = crm_strdup_printf("%d resource instance%s configured (%d ", nresources, pcmk__plural_s(nresources), nblocked); pcmk_create_html_node(resources_node, "span", NULL, NULL, s); free(s); pcmk_create_html_node(resources_node, "span", NULL, "bold", "BLOCKED"); pcmk_create_html_node(resources_node, "span", NULL, NULL, " from further action due to failure)"); } else { char *s = crm_strdup_printf("%d resource instance%s configured", nresources, pcmk__plural_s(nresources)); pcmk_create_html_node(resources_node, "span", NULL, NULL, s); free(s); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-counts", "unsigned int", "int", "int", "int") static int cluster_counts_text(pcmk__output_t *out, va_list args) { unsigned int nnodes = va_arg(args, unsigned int); int nresources = va_arg(args, int); int ndisabled = va_arg(args, int); int nblocked = va_arg(args, int); out->list_item(out, NULL, "%d node%s configured", nnodes, pcmk__plural_s(nnodes)); if (ndisabled && nblocked) { out->list_item(out, NULL, "%d resource instance%s configured " "(%d DISABLED, %d BLOCKED from " "further action due to failure)", nresources, pcmk__plural_s(nresources), ndisabled, nblocked); } else if (ndisabled && !nblocked) { out->list_item(out, NULL, "%d resource instance%s configured " "(%d DISABLED)", nresources, pcmk__plural_s(nresources), ndisabled); } else if (!ndisabled && nblocked) { out->list_item(out, NULL, "%d resource instance%s configured " "(%d BLOCKED from further action " "due to failure)", nresources, pcmk__plural_s(nresources), nblocked); } else { out->list_item(out, NULL, "%d resource instance%s configured", nresources, pcmk__plural_s(nresources)); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-counts", "unsigned int", "int", "int", "int") static int cluster_counts_xml(pcmk__output_t *out, va_list args) { unsigned int nnodes = va_arg(args, unsigned int); int nresources = va_arg(args, int); int ndisabled = va_arg(args, int); int nblocked = va_arg(args, int); xmlNodePtr nodes_node = pcmk__output_create_xml_node(out, "nodes_configured", NULL); xmlNodePtr resources_node = pcmk__output_create_xml_node(out, "resources_configured", NULL); char *s = pcmk__itoa(nnodes); crm_xml_add(nodes_node, "number", s); free(s); s = pcmk__itoa(nresources); crm_xml_add(resources_node, "number", s); free(s); s = pcmk__itoa(ndisabled); crm_xml_add(resources_node, "disabled", s); free(s); s = pcmk__itoa(nblocked); crm_xml_add(resources_node, "blocked", s); free(s); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-dc", "pcmk_node_t *", "const char *", "const char *", "char *", "int") static int cluster_dc_html(pcmk__output_t *out, va_list args) { pcmk_node_t *dc = va_arg(args, pcmk_node_t *); const char *quorum = va_arg(args, const char *); const char *dc_version_s = va_arg(args, const char *); char *dc_name = va_arg(args, char *); bool mixed_version = va_arg(args, int); xmlNodePtr node = pcmk__output_create_xml_node(out, "li", NULL); pcmk_create_html_node(node, "span", NULL, "bold", "Current DC: "); if (dc) { char *buf = crm_strdup_printf("%s (version %s) -", dc_name, dc_version_s ? dc_version_s : "unknown"); pcmk_create_html_node(node, "span", NULL, NULL, buf); free(buf); if (mixed_version) { pcmk_create_html_node(node, "span", NULL, "warning", " MIXED-VERSION"); } pcmk_create_html_node(node, "span", NULL, NULL, " partition"); if (crm_is_true(quorum)) { pcmk_create_html_node(node, "span", NULL, NULL, " with"); } else { pcmk_create_html_node(node, "span", NULL, "warning", " WITHOUT"); } pcmk_create_html_node(node, "span", NULL, NULL, " quorum"); } else { pcmk_create_html_node(node, "span", NULL, "warning", "NONE"); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-dc", "pcmk_node_t *", "const char *", "const char *", "char *", "int") static int cluster_dc_text(pcmk__output_t *out, va_list args) { pcmk_node_t *dc = va_arg(args, pcmk_node_t *); const char *quorum = va_arg(args, const char *); const char *dc_version_s = va_arg(args, const char *); char *dc_name = va_arg(args, char *); bool mixed_version = va_arg(args, int); if (dc) { out->list_item(out, "Current DC", "%s (version %s) - %spartition %s quorum", dc_name, dc_version_s ? dc_version_s : "unknown", mixed_version ? "MIXED-VERSION " : "", crm_is_true(quorum) ? "with" : "WITHOUT"); } else { out->list_item(out, "Current DC", "NONE"); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-dc", "pcmk_node_t *", "const char *", "const char *", "char *", "int") static int cluster_dc_xml(pcmk__output_t *out, va_list args) { pcmk_node_t *dc = va_arg(args, pcmk_node_t *); const char *quorum = va_arg(args, const char *); const char *dc_version_s = va_arg(args, const char *); char *dc_name G_GNUC_UNUSED = va_arg(args, char *); bool mixed_version = va_arg(args, int); if (dc) { pcmk__output_create_xml_node(out, "current_dc", "present", PCMK_VALUE_TRUE, PCMK_XA_VERSION, pcmk__s(dc_version_s, ""), PCMK_XA_NAME, dc->details->uname, PCMK_XA_ID, dc->details->id, "with_quorum", pcmk__btoa(crm_is_true(quorum)), "mixed_version", pcmk__btoa(mixed_version), NULL); } else { pcmk__output_create_xml_node(out, "current_dc", "present", PCMK_VALUE_FALSE, NULL); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("maint-mode", "unsigned long long int") static int cluster_maint_mode_text(pcmk__output_t *out, va_list args) { unsigned long long flags = va_arg(args, unsigned long long); if (pcmk_is_set(flags, pcmk_sched_in_maintenance)) { pcmk__formatted_printf(out, "\n *** Resource management is DISABLED ***\n"); pcmk__formatted_printf(out, " The cluster will not attempt to start, stop or recover services\n"); return pcmk_rc_ok; } else if (pcmk_is_set(flags, pcmk_sched_stop_all)) { pcmk__formatted_printf(out, "\n *** Resource management is DISABLED ***\n"); pcmk__formatted_printf(out, " The cluster will keep all resources stopped\n"); return pcmk_rc_ok; } else { return pcmk_rc_no_output; } } PCMK__OUTPUT_ARGS("cluster-options", "pcmk_scheduler_t *") static int cluster_options_html(pcmk__output_t *out, va_list args) { pcmk_scheduler_t *scheduler = va_arg(args, pcmk_scheduler_t *); if (pcmk_is_set(scheduler->flags, pcmk_sched_fencing_enabled)) { out->list_item(out, NULL, "STONITH of failed nodes enabled"); } else { out->list_item(out, NULL, "STONITH of failed nodes disabled"); } if (pcmk_is_set(scheduler->flags, pcmk_sched_symmetric_cluster)) { out->list_item(out, NULL, "Cluster is symmetric"); } else { out->list_item(out, NULL, "Cluster is asymmetric"); } switch (scheduler->no_quorum_policy) { case pcmk_no_quorum_freeze: out->list_item(out, NULL, "No quorum policy: Freeze resources"); break; case pcmk_no_quorum_stop: out->list_item(out, NULL, "No quorum policy: Stop ALL resources"); break; case pcmk_no_quorum_demote: out->list_item(out, NULL, "No quorum policy: Demote promotable " "resources and stop all other resources"); break; case pcmk_no_quorum_ignore: out->list_item(out, NULL, "No quorum policy: Ignore"); break; case pcmk_no_quorum_fence: out->list_item(out, NULL, "No quorum policy: Suicide"); break; } if (pcmk_is_set(scheduler->flags, pcmk_sched_in_maintenance)) { xmlNodePtr node = pcmk__output_create_xml_node(out, "li", NULL); pcmk_create_html_node(node, "span", NULL, NULL, "Resource management: "); pcmk_create_html_node(node, "span", NULL, "bold", "DISABLED"); pcmk_create_html_node(node, "span", NULL, NULL, " (the cluster will not attempt to start, stop, or recover services)"); } else if (pcmk_is_set(scheduler->flags, pcmk_sched_stop_all)) { xmlNodePtr node = pcmk__output_create_xml_node(out, "li", NULL); pcmk_create_html_node(node, "span", NULL, NULL, "Resource management: "); pcmk_create_html_node(node, "span", NULL, "bold", "STOPPED"); pcmk_create_html_node(node, "span", NULL, NULL, " (the cluster will keep all resources stopped)"); } else { out->list_item(out, NULL, "Resource management: enabled"); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-options", "pcmk_scheduler_t *") static int cluster_options_log(pcmk__output_t *out, va_list args) { pcmk_scheduler_t *scheduler = va_arg(args, pcmk_scheduler_t *); if (pcmk_is_set(scheduler->flags, pcmk_sched_in_maintenance)) { return out->info(out, "Resource management is DISABLED. The cluster will not attempt to start, stop or recover services."); } else if (pcmk_is_set(scheduler->flags, pcmk_sched_stop_all)) { return out->info(out, "Resource management is DISABLED. The cluster has stopped all resources."); } else { return pcmk_rc_no_output; } } PCMK__OUTPUT_ARGS("cluster-options", "pcmk_scheduler_t *") static int cluster_options_text(pcmk__output_t *out, va_list args) { pcmk_scheduler_t *scheduler = va_arg(args, pcmk_scheduler_t *); if (pcmk_is_set(scheduler->flags, pcmk_sched_fencing_enabled)) { out->list_item(out, NULL, "STONITH of failed nodes enabled"); } else { out->list_item(out, NULL, "STONITH of failed nodes disabled"); } if (pcmk_is_set(scheduler->flags, pcmk_sched_symmetric_cluster)) { out->list_item(out, NULL, "Cluster is symmetric"); } else { out->list_item(out, NULL, "Cluster is asymmetric"); } switch (scheduler->no_quorum_policy) { case pcmk_no_quorum_freeze: out->list_item(out, NULL, "No quorum policy: Freeze resources"); break; case pcmk_no_quorum_stop: out->list_item(out, NULL, "No quorum policy: Stop ALL resources"); break; case pcmk_no_quorum_demote: out->list_item(out, NULL, "No quorum policy: Demote promotable " "resources and stop all other resources"); break; case pcmk_no_quorum_ignore: out->list_item(out, NULL, "No quorum policy: Ignore"); break; case pcmk_no_quorum_fence: out->list_item(out, NULL, "No quorum policy: Suicide"); break; } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-options", "pcmk_scheduler_t *") static int cluster_options_xml(pcmk__output_t *out, va_list args) { pcmk_scheduler_t *scheduler = va_arg(args, pcmk_scheduler_t *); const char *no_quorum_policy = NULL; char *stonith_timeout_str = pcmk__itoa(scheduler->stonith_timeout); char *priority_fencing_delay_str = pcmk__itoa(scheduler->priority_fencing_delay * 1000); switch (scheduler->no_quorum_policy) { case pcmk_no_quorum_freeze: no_quorum_policy = "freeze"; break; case pcmk_no_quorum_stop: no_quorum_policy = "stop"; break; case pcmk_no_quorum_demote: no_quorum_policy = "demote"; break; case pcmk_no_quorum_ignore: no_quorum_policy = "ignore"; break; case pcmk_no_quorum_fence: no_quorum_policy = "suicide"; break; } pcmk__output_create_xml_node(out, "cluster_options", PCMK_OPT_STONITH_ENABLED, pcmk__flag_text(scheduler->flags, pcmk_sched_fencing_enabled), PCMK_OPT_SYMMETRIC_CLUSTER, pcmk__flag_text(scheduler->flags, pcmk_sched_symmetric_cluster), PCMK_OPT_NO_QUORUM_POLICY, no_quorum_policy, PCMK_OPT_MAINTENANCE_MODE, pcmk__flag_text(scheduler->flags, pcmk_sched_in_maintenance), PCMK_OPT_STOP_ALL_RESOURCES, pcmk__flag_text(scheduler->flags, pcmk_sched_stop_all), PCMK_OPT_STONITH_TIMEOUT "-ms", stonith_timeout_str, PCMK_OPT_PRIORITY_FENCING_DELAY "-ms", priority_fencing_delay_str, NULL); free(stonith_timeout_str); free(priority_fencing_delay_str); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-stack", "const char *", "enum pcmk_pacemakerd_state") static int cluster_stack_html(pcmk__output_t *out, va_list args) { const char *stack_s = va_arg(args, const char *); enum pcmk_pacemakerd_state pcmkd_state = (enum pcmk_pacemakerd_state) va_arg(args, int); xmlNodePtr node = pcmk__output_create_xml_node(out, "li", NULL); pcmk_create_html_node(node, "span", NULL, "bold", "Stack: "); pcmk_create_html_node(node, "span", NULL, NULL, stack_s); if (pcmkd_state != pcmk_pacemakerd_state_invalid) { pcmk_create_html_node(node, "span", NULL, NULL, " ("); pcmk_create_html_node(node, "span", NULL, NULL, pcmk__pcmkd_state_enum2friendly(pcmkd_state)); pcmk_create_html_node(node, "span", NULL, NULL, ")"); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-stack", "const char *", "enum pcmk_pacemakerd_state") static int cluster_stack_text(pcmk__output_t *out, va_list args) { const char *stack_s = va_arg(args, const char *); enum pcmk_pacemakerd_state pcmkd_state = (enum pcmk_pacemakerd_state) va_arg(args, int); if (pcmkd_state != pcmk_pacemakerd_state_invalid) { out->list_item(out, "Stack", "%s (%s)", stack_s, pcmk__pcmkd_state_enum2friendly(pcmkd_state)); } else { out->list_item(out, "Stack", "%s", stack_s); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-stack", "const char *", "enum pcmk_pacemakerd_state") static int cluster_stack_xml(pcmk__output_t *out, va_list args) { const char *stack_s = va_arg(args, const char *); enum pcmk_pacemakerd_state pcmkd_state = (enum pcmk_pacemakerd_state) va_arg(args, int); const char *state_s = NULL; if (pcmkd_state != pcmk_pacemakerd_state_invalid) { state_s = pcmk_pacemakerd_api_daemon_state_enum2text(pcmkd_state); } pcmk__output_create_xml_node(out, "stack", PCMK_XA_TYPE, stack_s, "pacemakerd-state", state_s, NULL); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-times", "const char *", "const char *", "const char *", "const char *", "const char *") static int cluster_times_html(pcmk__output_t *out, va_list args) { const char *our_nodename = va_arg(args, const char *); const char *last_written = va_arg(args, const char *); const char *user = va_arg(args, const char *); const char *client = va_arg(args, const char *); const char *origin = va_arg(args, const char *); xmlNodePtr updated_node = pcmk__output_create_xml_node(out, "li", NULL); xmlNodePtr changed_node = pcmk__output_create_xml_node(out, "li", NULL); char *time_s = pcmk__epoch2str(NULL, 0); pcmk_create_html_node(updated_node, "span", NULL, "bold", "Last updated: "); pcmk_create_html_node(updated_node, "span", NULL, NULL, time_s); if (our_nodename != NULL) { pcmk_create_html_node(updated_node, "span", NULL, NULL, " on "); pcmk_create_html_node(updated_node, "span", NULL, NULL, our_nodename); } free(time_s); time_s = last_changed_string(last_written, user, client, origin); pcmk_create_html_node(changed_node, "span", NULL, "bold", "Last change: "); pcmk_create_html_node(changed_node, "span", NULL, NULL, time_s); free(time_s); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-times", "const char *", "const char *", "const char *", "const char *", "const char *") static int cluster_times_xml(pcmk__output_t *out, va_list args) { const char *our_nodename = va_arg(args, const char *); const char *last_written = va_arg(args, const char *); const char *user = va_arg(args, const char *); const char *client = va_arg(args, const char *); const char *origin = va_arg(args, const char *); char *time_s = pcmk__epoch2str(NULL, 0); pcmk__output_create_xml_node(out, "last_update", "time", time_s, PCMK_XA_ORIGIN, our_nodename, NULL); pcmk__output_create_xml_node(out, "last_change", "time", last_written ? last_written : "", "user", user ? user : "", "client", client ? client : "", PCMK_XA_ORIGIN, pcmk__s(origin, ""), NULL); free(time_s); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-times", "const char *", "const char *", "const char *", "const char *", "const char *") static int cluster_times_text(pcmk__output_t *out, va_list args) { const char *our_nodename = va_arg(args, const char *); const char *last_written = va_arg(args, const char *); const char *user = va_arg(args, const char *); const char *client = va_arg(args, const char *); const char *origin = va_arg(args, const char *); char *time_s = pcmk__epoch2str(NULL, 0); out->list_item(out, "Last updated", "%s%s%s", time_s, (our_nodename != NULL)? " on " : "", pcmk__s(our_nodename, "")); free(time_s); time_s = last_changed_string(last_written, user, client, origin); out->list_item(out, "Last change", " %s", time_s); free(time_s); return pcmk_rc_ok; } /*! * \internal * \brief Display a failed action in less-technical natural language * * \param[in,out] out Output object to use for display * \param[in] xml_op XML containing failed action * \param[in] op_key Operation key of failed action * \param[in] node_name Where failed action occurred * \param[in] rc OCF exit code of failed action * \param[in] status Execution status of failed action * \param[in] exit_reason Exit reason given for failed action * \param[in] exec_time String containing execution time in milliseconds */ static void failed_action_friendly(pcmk__output_t *out, const xmlNode *xml_op, const char *op_key, const char *node_name, int rc, int status, const char *exit_reason, const char *exec_time) { char *rsc_id = NULL; char *task = NULL; guint interval_ms = 0; time_t last_change_epoch = 0; GString *str = NULL; if (pcmk__str_empty(op_key) || !parse_op_key(op_key, &rsc_id, &task, &interval_ms)) { rsc_id = strdup("unknown resource"); task = strdup("unknown action"); interval_ms = 0; } CRM_ASSERT((rsc_id != NULL) && (task != NULL)); str = g_string_sized_new(256); // Should be sufficient for most messages pcmk__g_strcat(str, rsc_id, " ", NULL); if (interval_ms != 0) { pcmk__g_strcat(str, pcmk__readable_interval(interval_ms), "-interval ", NULL); } pcmk__g_strcat(str, pcmk__readable_action(task, interval_ms), " on ", node_name, NULL); if (status == PCMK_EXEC_DONE) { pcmk__g_strcat(str, " returned '", services_ocf_exitcode_str(rc), "'", NULL); if (!pcmk__str_empty(exit_reason)) { pcmk__g_strcat(str, " (", exit_reason, ")", NULL); } } else { pcmk__g_strcat(str, " could not be executed (", pcmk_exec_status_str(status), NULL); if (!pcmk__str_empty(exit_reason)) { pcmk__g_strcat(str, ": ", exit_reason, NULL); } g_string_append_c(str, ')'); } if (crm_element_value_epoch(xml_op, PCMK_XA_LAST_RC_CHANGE, &last_change_epoch) == pcmk_ok) { char *s = pcmk__epoch2str(&last_change_epoch, 0); pcmk__g_strcat(str, " at ", s, NULL); free(s); } if (!pcmk__str_empty(exec_time)) { int exec_time_ms = 0; if ((pcmk__scan_min_int(exec_time, &exec_time_ms, 0) == pcmk_rc_ok) && (exec_time_ms > 0)) { pcmk__g_strcat(str, " after ", pcmk__readable_interval(exec_time_ms), NULL); } } out->list_item(out, NULL, "%s", str->str); g_string_free(str, TRUE); free(rsc_id); free(task); } /*! * \internal * \brief Display a failed action with technical details * * \param[in,out] out Output object to use for display * \param[in] xml_op XML containing failed action * \param[in] op_key Operation key of failed action * \param[in] node_name Where failed action occurred * \param[in] rc OCF exit code of failed action * \param[in] status Execution status of failed action * \param[in] exit_reason Exit reason given for failed action * \param[in] exec_time String containing execution time in milliseconds */ static void failed_action_technical(pcmk__output_t *out, const xmlNode *xml_op, const char *op_key, const char *node_name, int rc, int status, const char *exit_reason, const char *exec_time) { const char *call_id = crm_element_value(xml_op, PCMK__XA_CALL_ID); const char *queue_time = crm_element_value(xml_op, PCMK_XA_QUEUE_TIME); const char *exit_status = services_ocf_exitcode_str(rc); const char *lrm_status = pcmk_exec_status_str(status); time_t last_change_epoch = 0; GString *str = NULL; if (pcmk__str_empty(op_key)) { op_key = "unknown operation"; } if (pcmk__str_empty(exit_status)) { exit_status = "unknown exit status"; } if (pcmk__str_empty(call_id)) { call_id = "unknown"; } str = g_string_sized_new(256); g_string_append_printf(str, "%s on %s '%s' (%d): call=%s, status='%s'", op_key, node_name, exit_status, rc, call_id, lrm_status); if (!pcmk__str_empty(exit_reason)) { pcmk__g_strcat(str, ", exitreason='", exit_reason, "'", NULL); } if (crm_element_value_epoch(xml_op, PCMK_XA_LAST_RC_CHANGE, &last_change_epoch) == pcmk_ok) { char *last_change_str = pcmk__epoch2str(&last_change_epoch, 0); pcmk__g_strcat(str, ", " PCMK_XA_LAST_RC_CHANGE "=" "'", last_change_str, "'", NULL); free(last_change_str); } if (!pcmk__str_empty(queue_time)) { pcmk__g_strcat(str, ", queued=", queue_time, "ms", NULL); } if (!pcmk__str_empty(exec_time)) { pcmk__g_strcat(str, ", exec=", exec_time, "ms", NULL); } out->list_item(out, NULL, "%s", str->str); g_string_free(str, TRUE); } PCMK__OUTPUT_ARGS("failed-action", "xmlNodePtr", "uint32_t") static int failed_action_default(pcmk__output_t *out, va_list args) { xmlNodePtr xml_op = va_arg(args, xmlNodePtr); uint32_t show_opts = va_arg(args, uint32_t); const char *op_key = pcmk__xe_history_key(xml_op); const char *node_name = crm_element_value(xml_op, PCMK_XA_UNAME); const char *exit_reason = crm_element_value(xml_op, PCMK_XA_EXIT_REASON); const char *exec_time = crm_element_value(xml_op, PCMK_XA_EXEC_TIME); int rc; int status; pcmk__scan_min_int(crm_element_value(xml_op, PCMK__XA_RC_CODE), &rc, 0); pcmk__scan_min_int(crm_element_value(xml_op, PCMK__XA_OP_STATUS), &status, 0); if (pcmk__str_empty(node_name)) { node_name = "unknown node"; } if (pcmk_is_set(show_opts, pcmk_show_failed_detail)) { failed_action_technical(out, xml_op, op_key, node_name, rc, status, exit_reason, exec_time); } else { failed_action_friendly(out, xml_op, op_key, node_name, rc, status, exit_reason, exec_time); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("failed-action", "xmlNodePtr", "uint32_t") static int failed_action_xml(pcmk__output_t *out, va_list args) { xmlNodePtr xml_op = va_arg(args, xmlNodePtr); uint32_t show_opts G_GNUC_UNUSED = va_arg(args, uint32_t); const char *op_key = pcmk__xe_history_key(xml_op); const char *op_key_name = "op_key"; int rc; int status; const char *uname = crm_element_value(xml_op, PCMK_XA_UNAME); const char *call_id = crm_element_value(xml_op, PCMK__XA_CALL_ID); const char *exit_reason = crm_element_value(xml_op, PCMK_XA_EXIT_REASON); time_t epoch = 0; char *rc_s = NULL; char *reason_s = crm_xml_escape(exit_reason ? exit_reason : "none"); xmlNodePtr node = NULL; pcmk__scan_min_int(crm_element_value(xml_op, PCMK__XA_RC_CODE), &rc, 0); pcmk__scan_min_int(crm_element_value(xml_op, PCMK__XA_OP_STATUS), &status, 0); rc_s = pcmk__itoa(rc); if (crm_element_value(xml_op, PCMK__XA_OPERATION_KEY) == NULL) { op_key_name = PCMK_XA_ID; } node = pcmk__output_create_xml_node(out, "failure", op_key_name, op_key, PCMK_XA_NODE, uname, "exitstatus", services_ocf_exitcode_str(rc), "exitreason", pcmk__s(reason_s, ""), "exitcode", rc_s, "call", call_id, "status", pcmk_exec_status_str(status), NULL); free(rc_s); if ((crm_element_value_epoch(xml_op, PCMK_XA_LAST_RC_CHANGE, &epoch) == pcmk_ok) && (epoch > 0)) { const char *queue_time = crm_element_value(xml_op, PCMK_XA_QUEUE_TIME); guint interval_ms = 0; char *interval_ms_s = NULL; char *rc_change = pcmk__epoch2str(&epoch, crm_time_log_date |crm_time_log_timeofday |crm_time_log_with_timezone); crm_element_value_ms(xml_op, PCMK_META_INTERVAL, &interval_ms); interval_ms_s = crm_strdup_printf("%u", interval_ms); pcmk__xe_set_props(node, PCMK_XA_LAST_RC_CHANGE, rc_change, "queued", queue_time, "exec", crm_element_value(xml_op, PCMK_XA_EXEC_TIME), "interval", interval_ms_s, "task", crm_element_value(xml_op, PCMK_XA_OPERATION), NULL); free(interval_ms_s); free(rc_change); } free(reason_s); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("failed-action-list", "pcmk_scheduler_t *", "GList *", "GList *", "uint32_t", "bool") static int failed_action_list(pcmk__output_t *out, va_list args) { pcmk_scheduler_t *scheduler = va_arg(args, pcmk_scheduler_t *); GList *only_node = va_arg(args, GList *); GList *only_rsc = va_arg(args, GList *); uint32_t show_opts = va_arg(args, uint32_t); bool print_spacer = va_arg(args, int); xmlNode *xml_op = NULL; int rc = pcmk_rc_no_output; if (xmlChildElementCount(scheduler->failed) == 0) { return rc; } for (xml_op = pcmk__xml_first_child(scheduler->failed); xml_op != NULL; xml_op = pcmk__xml_next(xml_op)) { char *rsc = NULL; if (!pcmk__str_in_list(crm_element_value(xml_op, PCMK_XA_UNAME), only_node, pcmk__str_star_matches|pcmk__str_casei)) { continue; } if (pcmk_xe_mask_probe_failure(xml_op)) { continue; } if (!parse_op_key(pcmk__xe_history_key(xml_op), &rsc, NULL, NULL)) { continue; } if (!pcmk__str_in_list(rsc, only_rsc, pcmk__str_star_matches)) { free(rsc); continue; } free(rsc); PCMK__OUTPUT_LIST_HEADER(out, print_spacer, rc, "Failed Resource Actions"); out->message(out, "failed-action", xml_op, show_opts); } PCMK__OUTPUT_LIST_FOOTER(out, rc); return rc; } static void status_node(pcmk_node_t *node, xmlNodePtr parent, uint32_t show_opts) { int health = pe__node_health(node); // Cluster membership if (node->details->online) { pcmk_create_html_node(parent, "span", NULL, "online", " online"); } else { pcmk_create_html_node(parent, "span", NULL, "offline", " OFFLINE"); } // Standby mode if (node->details->standby_onfail && (node->details->running_rsc != NULL)) { pcmk_create_html_node(parent, "span", NULL, "standby", " (in standby due to on-fail," " with active resources)"); } else if (node->details->standby_onfail) { pcmk_create_html_node(parent, "span", NULL, "standby", " (in standby due to on-fail)"); } else if (node->details->standby && (node->details->running_rsc != NULL)) { pcmk_create_html_node(parent, "span", NULL, "standby", " (in standby, with active resources)"); } else if (node->details->standby) { pcmk_create_html_node(parent, "span", NULL, "standby", " (in standby)"); } // Maintenance mode if (node->details->maintenance) { pcmk_create_html_node(parent, "span", NULL, "maint", " (in maintenance mode)"); } // Node health if (health < 0) { pcmk_create_html_node(parent, "span", NULL, "health_red", " (health is RED)"); } else if (health == 0) { pcmk_create_html_node(parent, "span", NULL, "health_yellow", " (health is YELLOW)"); } // Feature set if (pcmk_is_set(show_opts, pcmk_show_feature_set)) { const char *feature_set = get_node_feature_set(node); if (feature_set != NULL) { char *buf = crm_strdup_printf(", feature set %s", feature_set); pcmk_create_html_node(parent, "span", NULL, NULL, buf); free(buf); } } } PCMK__OUTPUT_ARGS("node", "pcmk_node_t *", "uint32_t", "bool", "GList *", "GList *") static int node_html(pcmk__output_t *out, va_list args) { pcmk_node_t *node = va_arg(args, pcmk_node_t *); uint32_t show_opts = va_arg(args, uint32_t); bool full = va_arg(args, int); GList *only_node = va_arg(args, GList *); GList *only_rsc = va_arg(args, GList *); char *node_name = pe__node_display_name(node, pcmk_is_set(show_opts, pcmk_show_node_id)); if (full) { xmlNodePtr item_node; if (pcmk_all_flags_set(show_opts, pcmk_show_brief | pcmk_show_rscs_by_node)) { GList *rscs = pe__filter_rsc_list(node->details->running_rsc, only_rsc); out->begin_list(out, NULL, NULL, "%s:", node_name); item_node = pcmk__output_xml_create_parent(out, "li", NULL); pcmk_create_html_node(item_node, "span", NULL, NULL, "Status:"); status_node(node, item_node, show_opts); if (rscs != NULL) { uint32_t new_show_opts = (show_opts | pcmk_show_rsc_only) & ~pcmk_show_inactive_rscs; out->begin_list(out, NULL, NULL, "Resources"); pe__rscs_brief_output(out, rscs, new_show_opts); out->end_list(out); } pcmk__output_xml_pop_parent(out); out->end_list(out); } else if (pcmk_is_set(show_opts, pcmk_show_rscs_by_node)) { GList *lpc2 = NULL; int rc = pcmk_rc_no_output; out->begin_list(out, NULL, NULL, "%s:", node_name); item_node = pcmk__output_xml_create_parent(out, "li", NULL); pcmk_create_html_node(item_node, "span", NULL, NULL, "Status:"); status_node(node, item_node, show_opts); for (lpc2 = node->details->running_rsc; lpc2 != NULL; lpc2 = lpc2->next) { pcmk_resource_t *rsc = (pcmk_resource_t *) lpc2->data; PCMK__OUTPUT_LIST_HEADER(out, false, rc, "Resources"); show_opts |= pcmk_show_rsc_only; out->message(out, crm_map_element_name(rsc->xml), show_opts, rsc, only_node, only_rsc); } PCMK__OUTPUT_LIST_FOOTER(out, rc); pcmk__output_xml_pop_parent(out); out->end_list(out); } else { char *buf = crm_strdup_printf("%s:", node_name); item_node = pcmk__output_create_xml_node(out, "li", NULL); pcmk_create_html_node(item_node, "span", NULL, "bold", buf); status_node(node, item_node, show_opts); free(buf); } } else { out->begin_list(out, NULL, NULL, "%s:", node_name); } free(node_name); return pcmk_rc_ok; } /*! * \internal * \brief Get a human-friendly textual description of a node's status * * \param[in] node Node to check * * \return String representation of node's status */ static const char * node_text_status(const pcmk_node_t *node) { if (node->details->unclean) { if (node->details->online) { return "UNCLEAN (online)"; } else if (node->details->pending) { return "UNCLEAN (pending)"; } else { return "UNCLEAN (offline)"; } } else if (node->details->pending) { return "pending"; } else if (node->details->standby_onfail && node->details->online) { return "standby (on-fail)"; } else if (node->details->standby) { if (node->details->online) { if (node->details->running_rsc) { return "standby (with active resources)"; } else { return "standby"; } } else { return "OFFLINE (standby)"; } } else if (node->details->maintenance) { if (node->details->online) { return "maintenance"; } else { return "OFFLINE (maintenance)"; } } else if (node->details->online) { return "online"; } return "OFFLINE"; } PCMK__OUTPUT_ARGS("node", "pcmk_node_t *", "uint32_t", "bool", "GList *", "GList *") static int node_text(pcmk__output_t *out, va_list args) { pcmk_node_t *node = va_arg(args, pcmk_node_t *); uint32_t show_opts = va_arg(args, uint32_t); bool full = va_arg(args, int); GList *only_node = va_arg(args, GList *); GList *only_rsc = va_arg(args, GList *); if (full) { char *node_name = pe__node_display_name(node, pcmk_is_set(show_opts, pcmk_show_node_id)); GString *str = g_string_sized_new(64); int health = pe__node_health(node); // Create a summary line with node type, name, and status if (pe__is_guest_node(node)) { g_string_append(str, "GuestNode"); } else if (pe__is_remote_node(node)) { g_string_append(str, "RemoteNode"); } else { g_string_append(str, "Node"); } pcmk__g_strcat(str, " ", node_name, ": ", node_text_status(node), NULL); if (health < 0) { g_string_append(str, " (health is RED)"); } else if (health == 0) { g_string_append(str, " (health is YELLOW)"); } if (pcmk_is_set(show_opts, pcmk_show_feature_set)) { const char *feature_set = get_node_feature_set(node); if (feature_set != NULL) { pcmk__g_strcat(str, ", feature set ", feature_set, NULL); } } /* If we're grouping by node, print its resources */ if (pcmk_is_set(show_opts, pcmk_show_rscs_by_node)) { if (pcmk_is_set(show_opts, pcmk_show_brief)) { GList *rscs = pe__filter_rsc_list(node->details->running_rsc, only_rsc); if (rscs != NULL) { uint32_t new_show_opts = (show_opts | pcmk_show_rsc_only) & ~pcmk_show_inactive_rscs; out->begin_list(out, NULL, NULL, "%s", str->str); out->begin_list(out, NULL, NULL, "Resources"); pe__rscs_brief_output(out, rscs, new_show_opts); out->end_list(out); out->end_list(out); g_list_free(rscs); } } else { GList *gIter2 = NULL; out->begin_list(out, NULL, NULL, "%s", str->str); out->begin_list(out, NULL, NULL, "Resources"); for (gIter2 = node->details->running_rsc; gIter2 != NULL; gIter2 = gIter2->next) { pcmk_resource_t *rsc = (pcmk_resource_t *) gIter2->data; show_opts |= pcmk_show_rsc_only; out->message(out, crm_map_element_name(rsc->xml), show_opts, rsc, only_node, only_rsc); } out->end_list(out); out->end_list(out); } } else { out->list_item(out, NULL, "%s", str->str); } g_string_free(str, TRUE); free(node_name); } else { char *node_name = pe__node_display_name(node, pcmk_is_set(show_opts, pcmk_show_node_id)); out->begin_list(out, NULL, NULL, "Node: %s", node_name); free(node_name); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("node", "pcmk_node_t *", "uint32_t", "bool", "GList *", "GList *") static int node_xml(pcmk__output_t *out, va_list args) { pcmk_node_t *node = va_arg(args, pcmk_node_t *); uint32_t show_opts G_GNUC_UNUSED = va_arg(args, uint32_t); bool full = va_arg(args, int); GList *only_node = va_arg(args, GList *); GList *only_rsc = va_arg(args, GList *); if (full) { const char *node_type = "unknown"; char *length_s = pcmk__itoa(g_list_length(node->details->running_rsc)); int health = pe__node_health(node); const char *health_s = NULL; const char *feature_set; switch (node->details->type) { case pcmk_node_variant_cluster: node_type = "member"; break; case pcmk_node_variant_remote: node_type = "remote"; break; case node_ping: node_type = "ping"; break; } if (health < 0) { health_s = "red"; } else if (health == 0) { health_s = "yellow"; } else { health_s = "green"; } feature_set = get_node_feature_set(node); pe__name_and_nvpairs_xml(out, true, PCMK_XE_NODE, 15, PCMK_XA_NAME, node->details->uname, PCMK_XA_ID, node->details->id, "online", pcmk__btoa(node->details->online), "standby", pcmk__btoa(node->details->standby), "standby_onfail", pcmk__btoa(node->details->standby_onfail), "maintenance", pcmk__btoa(node->details->maintenance), "pending", pcmk__btoa(node->details->pending), "unclean", pcmk__btoa(node->details->unclean), "health", health_s, "feature_set", feature_set, "shutdown", pcmk__btoa(node->details->shutdown), "expected_up", pcmk__btoa(node->details->expected_up), "is_dc", pcmk__btoa(node->details->is_dc), "resources_running", length_s, PCMK_XA_TYPE, node_type); if (pe__is_guest_node(node)) { xmlNodePtr xml_node = pcmk__output_xml_peek_parent(out); crm_xml_add(xml_node, "id_as_resource", node->details->remote_rsc->container->id); } if (pcmk_is_set(show_opts, pcmk_show_rscs_by_node)) { GList *lpc = NULL; for (lpc = node->details->running_rsc; lpc != NULL; lpc = lpc->next) { pcmk_resource_t *rsc = (pcmk_resource_t *) lpc->data; show_opts |= pcmk_show_rsc_only; out->message(out, crm_map_element_name(rsc->xml), show_opts, rsc, only_node, only_rsc); } } free(length_s); out->end_list(out); } else { pcmk__output_xml_create_parent(out, PCMK_XE_NODE, PCMK_XA_NAME, node->details->uname, NULL); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("node-attribute", "const char *", "const char *", "bool", "int") static int node_attribute_text(pcmk__output_t *out, va_list args) { const char *name = va_arg(args, const char *); const char *value = va_arg(args, const char *); bool add_extra = va_arg(args, int); int expected_score = va_arg(args, int); if (add_extra) { int v; if (value == NULL) { v = 0; } else { pcmk__scan_min_int(value, &v, INT_MIN); } if (v <= 0) { out->list_item(out, NULL, "%-32s\t: %-10s\t: Connectivity is lost", name, value); } else if (v < expected_score) { out->list_item(out, NULL, "%-32s\t: %-10s\t: Connectivity is degraded (Expected=%d)", name, value, expected_score); } else { out->list_item(out, NULL, "%-32s\t: %-10s", name, value); } } else { out->list_item(out, NULL, "%-32s\t: %-10s", name, value); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("node-attribute", "const char *", "const char *", "bool", "int") static int node_attribute_html(pcmk__output_t *out, va_list args) { const char *name = va_arg(args, const char *); const char *value = va_arg(args, const char *); bool add_extra = va_arg(args, int); int expected_score = va_arg(args, int); if (add_extra) { int v; char *s = crm_strdup_printf("%s: %s", name, value); xmlNodePtr item_node = pcmk__output_create_xml_node(out, "li", NULL); if (value == NULL) { v = 0; } else { pcmk__scan_min_int(value, &v, INT_MIN); } pcmk_create_html_node(item_node, "span", NULL, NULL, s); free(s); if (v <= 0) { pcmk_create_html_node(item_node, "span", NULL, "bold", "(connectivity is lost)"); } else if (v < expected_score) { char *buf = crm_strdup_printf("(connectivity is degraded -- expected %d", expected_score); pcmk_create_html_node(item_node, "span", NULL, "bold", buf); free(buf); } } else { out->list_item(out, NULL, "%s: %s", name, value); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("node-and-op", "pcmk_scheduler_t *", "xmlNodePtr") static int node_and_op(pcmk__output_t *out, va_list args) { pcmk_scheduler_t *scheduler = va_arg(args, pcmk_scheduler_t *); xmlNodePtr xml_op = va_arg(args, xmlNodePtr); pcmk_resource_t *rsc = NULL; gchar *node_str = NULL; char *last_change_str = NULL; const char *op_rsc = crm_element_value(xml_op, "resource"); int status; time_t last_change = 0; pcmk__scan_min_int(crm_element_value(xml_op, PCMK__XA_OP_STATUS), &status, PCMK_EXEC_UNKNOWN); rsc = pe_find_resource(scheduler->resources, op_rsc); if (rsc) { const pcmk_node_t *node = pcmk__current_node(rsc); const char *target_role = g_hash_table_lookup(rsc->meta, PCMK_META_TARGET_ROLE); uint32_t show_opts = pcmk_show_rsc_only | pcmk_show_pending; if (node == NULL) { node = rsc->pending_node; } node_str = pcmk__native_output_string(rsc, rsc_printable_id(rsc), node, show_opts, target_role, false); } else { node_str = crm_strdup_printf("Unknown resource %s", op_rsc); } if (crm_element_value_epoch(xml_op, PCMK_XA_LAST_RC_CHANGE, &last_change) == pcmk_ok) { const char *exec_time = crm_element_value(xml_op, PCMK_XA_EXEC_TIME); last_change_str = crm_strdup_printf(", %s='%s', exec=%sms", PCMK_XA_LAST_RC_CHANGE, pcmk__trim(ctime(&last_change)), exec_time); } out->list_item(out, NULL, "%s: %s (node=%s, call=%s, rc=%s%s): %s", node_str, pcmk__xe_history_key(xml_op), crm_element_value(xml_op, PCMK_XA_UNAME), crm_element_value(xml_op, PCMK__XA_CALL_ID), crm_element_value(xml_op, PCMK__XA_RC_CODE), last_change_str ? last_change_str : "", pcmk_exec_status_str(status)); g_free(node_str); free(last_change_str); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("node-and-op", "pcmk_scheduler_t *", "xmlNodePtr") static int node_and_op_xml(pcmk__output_t *out, va_list args) { pcmk_scheduler_t *scheduler = va_arg(args, pcmk_scheduler_t *); xmlNodePtr xml_op = va_arg(args, xmlNodePtr); pcmk_resource_t *rsc = NULL; const char *uname = crm_element_value(xml_op, PCMK_XA_UNAME); const char *call_id = crm_element_value(xml_op, PCMK__XA_CALL_ID); const char *rc_s = crm_element_value(xml_op, PCMK__XA_RC_CODE); const char *op_rsc = crm_element_value(xml_op, "resource"); int status; time_t last_change = 0; xmlNode *node = NULL; pcmk__scan_min_int(crm_element_value(xml_op, PCMK__XA_OP_STATUS), &status, PCMK_EXEC_UNKNOWN); node = pcmk__output_create_xml_node(out, PCMK_XE_OPERATION, PCMK_XA_OP, pcmk__xe_history_key(xml_op), PCMK_XA_NODE, uname, "call", call_id, "rc", rc_s, "status", pcmk_exec_status_str(status), NULL); rsc = pe_find_resource(scheduler->resources, op_rsc); if (rsc) { const char *class = crm_element_value(rsc->xml, PCMK_XA_CLASS); const char *provider = crm_element_value(rsc->xml, PCMK_XA_PROVIDER); const char *kind = crm_element_value(rsc->xml, PCMK_XA_TYPE); bool has_provider = pcmk_is_set(pcmk_get_ra_caps(class), pcmk_ra_cap_provider); char *agent_tuple = crm_strdup_printf("%s:%s:%s", class, (has_provider? provider : ""), kind); pcmk__xe_set_props(node, PCMK_XA_RSC, rsc_printable_id(rsc), "agent", agent_tuple, NULL); free(agent_tuple); } if (crm_element_value_epoch(xml_op, PCMK_XA_LAST_RC_CHANGE, &last_change) == pcmk_ok) { const char *last_rc_change = pcmk__trim(ctime(&last_change)); const char *exec_time = crm_element_value(xml_op, PCMK_XA_EXEC_TIME); pcmk__xe_set_props(node, PCMK_XA_LAST_RC_CHANGE, last_rc_change, PCMK_XA_EXEC_TIME, exec_time, NULL); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("node-attribute", "const char *", "const char *", "bool", "int") static int node_attribute_xml(pcmk__output_t *out, va_list args) { const char *name = va_arg(args, const char *); const char *value = va_arg(args, const char *); bool add_extra = va_arg(args, int); int expected_score = va_arg(args, int); xmlNodePtr node = pcmk__output_create_xml_node(out, PCMK_XE_ATTRIBUTE, PCMK_XA_NAME, name, PCMK_XA_VALUE, value, NULL); if (add_extra) { char *buf = pcmk__itoa(expected_score); crm_xml_add(node, "expected", buf); free(buf); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("node-attribute-list", "pcmk_scheduler_t *", "uint32_t", "bool", "GList *", "GList *") static int node_attribute_list(pcmk__output_t *out, va_list args) { pcmk_scheduler_t *scheduler = va_arg(args, pcmk_scheduler_t *); uint32_t show_opts = va_arg(args, uint32_t); bool print_spacer = va_arg(args, int); GList *only_node = va_arg(args, GList *); GList *only_rsc = va_arg(args, GList *); int rc = pcmk_rc_no_output; /* Display each node's attributes */ for (GList *gIter = scheduler->nodes; gIter != NULL; gIter = gIter->next) { pcmk_node_t *node = gIter->data; GList *attr_list = NULL; GHashTableIter iter; gpointer key; if (!node || !node->details || !node->details->online) { continue; } g_hash_table_iter_init(&iter, node->details->attrs); while (g_hash_table_iter_next (&iter, &key, NULL)) { attr_list = filter_attr_list(attr_list, key); } if (attr_list == NULL) { continue; } if (!pcmk__str_in_list(node->details->uname, only_node, pcmk__str_star_matches|pcmk__str_casei)) { g_list_free(attr_list); continue; } PCMK__OUTPUT_LIST_HEADER(out, print_spacer, rc, "Node Attributes"); out->message(out, "node", node, show_opts, false, only_node, only_rsc); for (GList *aIter = attr_list; aIter != NULL; aIter = aIter->next) { const char *name = aIter->data; const char *value = NULL; int expected_score = 0; bool add_extra = false; value = pe_node_attribute_raw(node, name); add_extra = add_extra_info(node, node->details->running_rsc, scheduler, name, &expected_score); /* Print attribute name and value */ out->message(out, "node-attribute", name, value, add_extra, expected_score); } g_list_free(attr_list); out->end_list(out); } PCMK__OUTPUT_LIST_FOOTER(out, rc); return rc; } PCMK__OUTPUT_ARGS("node-capacity", "const pcmk_node_t *", "const char *") static int node_capacity(pcmk__output_t *out, va_list args) { const pcmk_node_t *node = va_arg(args, pcmk_node_t *); const char *comment = va_arg(args, const char *); char *dump_text = crm_strdup_printf("%s: %s capacity:", comment, pcmk__node_name(node)); g_hash_table_foreach(node->details->utilization, append_dump_text, &dump_text); out->list_item(out, NULL, "%s", dump_text); free(dump_text); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("node-capacity", "const pcmk_node_t *", "const char *") static int node_capacity_xml(pcmk__output_t *out, va_list args) { const pcmk_node_t *node = va_arg(args, pcmk_node_t *); const char *uname = node->details->uname; const char *comment = va_arg(args, const char *); xmlNodePtr xml_node = pcmk__output_create_xml_node(out, "capacity", PCMK_XA_NODE, uname, "comment", comment, NULL); g_hash_table_foreach(node->details->utilization, add_dump_node, xml_node); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("node-history-list", "pcmk_scheduler_t *", "pcmk_node_t *", "xmlNodePtr", "GList *", "GList *", "uint32_t", "uint32_t") static int node_history_list(pcmk__output_t *out, va_list args) { pcmk_scheduler_t *scheduler = va_arg(args, pcmk_scheduler_t *); pcmk_node_t *node = va_arg(args, pcmk_node_t *); xmlNode *node_state = va_arg(args, xmlNode *); GList *only_node = va_arg(args, GList *); GList *only_rsc = va_arg(args, GList *); uint32_t section_opts = va_arg(args, uint32_t); uint32_t show_opts = va_arg(args, uint32_t); xmlNode *lrm_rsc = NULL; xmlNode *rsc_entry = NULL; int rc = pcmk_rc_no_output; - lrm_rsc = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE); + lrm_rsc = find_xml_node(node_state, PCMK__XE_LRM, FALSE); lrm_rsc = find_xml_node(lrm_rsc, XML_LRM_TAG_RESOURCES, FALSE); /* Print history of each of the node's resources */ for (rsc_entry = first_named_child(lrm_rsc, XML_LRM_TAG_RESOURCE); rsc_entry != NULL; rsc_entry = crm_next_same_xml(rsc_entry)) { const char *rsc_id = crm_element_value(rsc_entry, PCMK_XA_ID); pcmk_resource_t *rsc = pe_find_resource(scheduler->resources, rsc_id); const pcmk_resource_t *parent = pe__const_top_resource(rsc, false); /* We can't use is_filtered here to filter group resources. For is_filtered, * we have to decide whether to check the parent or not. If we check the * parent, all elements of a group will always be printed because that's how * is_filtered works for groups. If we do not check the parent, sometimes * this will filter everything out. * * For other resource types, is_filtered is okay. */ if (parent->variant == pcmk_rsc_variant_group) { if (!pcmk__str_in_list(rsc_printable_id(rsc), only_rsc, pcmk__str_star_matches) && !pcmk__str_in_list(rsc_printable_id(parent), only_rsc, pcmk__str_star_matches)) { continue; } } else { if (rsc->fns->is_filtered(rsc, only_rsc, TRUE)) { continue; } } if (!pcmk_is_set(section_opts, pcmk_section_operations)) { time_t last_failure = 0; int failcount = pe_get_failcount(node, rsc, &last_failure, pcmk__fc_default, NULL); if (failcount <= 0) { continue; } if (rc == pcmk_rc_no_output) { rc = pcmk_rc_ok; out->message(out, "node", node, show_opts, false, only_node, only_rsc); } out->message(out, "resource-history", rsc, rsc_id, false, failcount, last_failure, false); } else { GList *op_list = get_operation_list(rsc_entry); pcmk_resource_t *rsc = NULL; if (op_list == NULL) { continue; } rsc = pe_find_resource(scheduler->resources, crm_element_value(rsc_entry, PCMK_XA_ID)); if (rc == pcmk_rc_no_output) { rc = pcmk_rc_ok; out->message(out, "node", node, show_opts, false, only_node, only_rsc); } out->message(out, "resource-operation-list", scheduler, rsc, node, op_list, show_opts); } } PCMK__OUTPUT_LIST_FOOTER(out, rc); return rc; } PCMK__OUTPUT_ARGS("node-list", "GList *", "GList *", "GList *", "uint32_t", "bool") static int node_list_html(pcmk__output_t *out, va_list args) { GList *nodes = va_arg(args, GList *); GList *only_node = va_arg(args, GList *); GList *only_rsc = va_arg(args, GList *); uint32_t show_opts = va_arg(args, uint32_t); bool print_spacer G_GNUC_UNUSED = va_arg(args, int); int rc = pcmk_rc_no_output; for (GList *gIter = nodes; gIter != NULL; gIter = gIter->next) { pcmk_node_t *node = (pcmk_node_t *) gIter->data; if (!pcmk__str_in_list(node->details->uname, only_node, pcmk__str_star_matches|pcmk__str_casei)) { continue; } PCMK__OUTPUT_LIST_HEADER(out, false, rc, "Node List"); out->message(out, "node", node, show_opts, true, only_node, only_rsc); } PCMK__OUTPUT_LIST_FOOTER(out, rc); return rc; } PCMK__OUTPUT_ARGS("node-list", "GList *", "GList *", "GList *", "uint32_t", "bool") static int node_list_text(pcmk__output_t *out, va_list args) { GList *nodes = va_arg(args, GList *); GList *only_node = va_arg(args, GList *); GList *only_rsc = va_arg(args, GList *); uint32_t show_opts = va_arg(args, uint32_t); bool print_spacer = va_arg(args, int); /* space-separated lists of node names */ GString *online_nodes = NULL; GString *online_remote_nodes = NULL; GString *online_guest_nodes = NULL; GString *offline_nodes = NULL; GString *offline_remote_nodes = NULL; int rc = pcmk_rc_no_output; for (GList *gIter = nodes; gIter != NULL; gIter = gIter->next) { pcmk_node_t *node = (pcmk_node_t *) gIter->data; char *node_name = pe__node_display_name(node, pcmk_is_set(show_opts, pcmk_show_node_id)); if (!pcmk__str_in_list(node->details->uname, only_node, pcmk__str_star_matches|pcmk__str_casei)) { free(node_name); continue; } PCMK__OUTPUT_LIST_HEADER(out, print_spacer, rc, "Node List"); // Determine whether to display node individually or in a list if (node->details->unclean || node->details->pending || (node->details->standby_onfail && node->details->online) || node->details->standby || node->details->maintenance || pcmk_is_set(show_opts, pcmk_show_rscs_by_node) || pcmk_is_set(show_opts, pcmk_show_feature_set) || (pe__node_health(node) <= 0)) { // Display node individually } else if (node->details->online) { // Display online node in a list if (pe__is_guest_node(node)) { pcmk__add_word(&online_guest_nodes, 1024, node_name); } else if (pe__is_remote_node(node)) { pcmk__add_word(&online_remote_nodes, 1024, node_name); } else { pcmk__add_word(&online_nodes, 1024, node_name); } free(node_name); continue; } else { // Display offline node in a list if (pe__is_remote_node(node)) { pcmk__add_word(&offline_remote_nodes, 1024, node_name); } else if (pe__is_guest_node(node)) { /* ignore offline guest nodes */ } else { pcmk__add_word(&offline_nodes, 1024, node_name); } free(node_name); continue; } /* If we get here, node is in bad state, or we're grouping by node */ out->message(out, "node", node, show_opts, true, only_node, only_rsc); free(node_name); } /* If we're not grouping by node, summarize nodes by status */ if (online_nodes != NULL) { out->list_item(out, "Online", "[ %s ]", (const char *) online_nodes->str); g_string_free(online_nodes, TRUE); } if (offline_nodes != NULL) { out->list_item(out, "OFFLINE", "[ %s ]", (const char *) offline_nodes->str); g_string_free(offline_nodes, TRUE); } if (online_remote_nodes) { out->list_item(out, "RemoteOnline", "[ %s ]", (const char *) online_remote_nodes->str); g_string_free(online_remote_nodes, TRUE); } if (offline_remote_nodes) { out->list_item(out, "RemoteOFFLINE", "[ %s ]", (const char *) offline_remote_nodes->str); g_string_free(offline_remote_nodes, TRUE); } if (online_guest_nodes != NULL) { out->list_item(out, "GuestOnline", "[ %s ]", (const char *) online_guest_nodes->str); g_string_free(online_guest_nodes, TRUE); } PCMK__OUTPUT_LIST_FOOTER(out, rc); return rc; } PCMK__OUTPUT_ARGS("node-list", "GList *", "GList *", "GList *", "uint32_t", "bool") static int node_list_xml(pcmk__output_t *out, va_list args) { GList *nodes = va_arg(args, GList *); GList *only_node = va_arg(args, GList *); GList *only_rsc = va_arg(args, GList *); uint32_t show_opts = va_arg(args, uint32_t); bool print_spacer G_GNUC_UNUSED = va_arg(args, int); /* PCMK_XE_NODES acts as the list's element name for CLI tools that force * --xml-simple-list. Otherwise PCMK_XE_NODES is the value of the list's * PCMK_XA_NAME attribute. */ out->begin_list(out, NULL, NULL, PCMK_XE_NODES); for (GList *gIter = nodes; gIter != NULL; gIter = gIter->next) { pcmk_node_t *node = (pcmk_node_t *) gIter->data; if (!pcmk__str_in_list(node->details->uname, only_node, pcmk__str_star_matches|pcmk__str_casei)) { continue; } out->message(out, "node", node, show_opts, true, only_node, only_rsc); } out->end_list(out); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("node-summary", "pcmk_scheduler_t *", "GList *", "GList *", "uint32_t", "uint32_t", "bool") static int node_summary(pcmk__output_t *out, va_list args) { pcmk_scheduler_t *scheduler = va_arg(args, pcmk_scheduler_t *); GList *only_node = va_arg(args, GList *); GList *only_rsc = va_arg(args, GList *); uint32_t section_opts = va_arg(args, uint32_t); uint32_t show_opts = va_arg(args, uint32_t); bool print_spacer = va_arg(args, int); xmlNode *node_state = NULL; xmlNode *cib_status = pcmk_find_cib_element(scheduler->input, PCMK_XE_STATUS); int rc = pcmk_rc_no_output; if (xmlChildElementCount(cib_status) == 0) { return rc; } for (node_state = first_named_child(cib_status, PCMK__XE_NODE_STATE); node_state != NULL; node_state = crm_next_same_xml(node_state)) { pcmk_node_t *node = pe_find_node_id(scheduler->nodes, ID(node_state)); if (!node || !node->details || !node->details->online) { continue; } if (!pcmk__str_in_list(node->details->uname, only_node, pcmk__str_star_matches|pcmk__str_casei)) { continue; } PCMK__OUTPUT_LIST_HEADER(out, print_spacer, rc, pcmk_is_set(section_opts, pcmk_section_operations) ? "Operations" : "Migration Summary"); out->message(out, "node-history-list", scheduler, node, node_state, only_node, only_rsc, section_opts, show_opts); } PCMK__OUTPUT_LIST_FOOTER(out, rc); return rc; } PCMK__OUTPUT_ARGS("node-weight", "const pcmk_resource_t *", "const char *", "const char *", "const char *") static int node_weight(pcmk__output_t *out, va_list args) { const pcmk_resource_t *rsc = va_arg(args, const pcmk_resource_t *); const char *prefix = va_arg(args, const char *); const char *uname = va_arg(args, const char *); const char *score = va_arg(args, const char *); if (rsc) { out->list_item(out, NULL, "%s: %s allocation score on %s: %s", prefix, rsc->id, uname, score); } else { out->list_item(out, NULL, "%s: %s = %s", prefix, uname, score); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("node-weight", "const pcmk_resource_t *", "const char *", "const char *", "const char *") static int node_weight_xml(pcmk__output_t *out, va_list args) { const pcmk_resource_t *rsc = va_arg(args, const pcmk_resource_t *); const char *prefix = va_arg(args, const char *); const char *uname = va_arg(args, const char *); const char *score = va_arg(args, const char *); xmlNodePtr node = pcmk__output_create_xml_node(out, "node_weight", "function", prefix, PCMK_XA_NODE, uname, PCMK_XA_SCORE, score, NULL); if (rsc) { crm_xml_add(node, PCMK_XA_ID, rsc->id); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("op-history", "xmlNodePtr", "const char *", "const char *", "int", "uint32_t") static int op_history_text(pcmk__output_t *out, va_list args) { xmlNodePtr xml_op = va_arg(args, xmlNodePtr); const char *task = va_arg(args, const char *); const char *interval_ms_s = va_arg(args, const char *); int rc = va_arg(args, int); uint32_t show_opts = va_arg(args, uint32_t); char *buf = op_history_string(xml_op, task, interval_ms_s, rc, pcmk_is_set(show_opts, pcmk_show_timing)); out->list_item(out, NULL, "%s", buf); free(buf); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("op-history", "xmlNodePtr", "const char *", "const char *", "int", "uint32_t") static int op_history_xml(pcmk__output_t *out, va_list args) { xmlNodePtr xml_op = va_arg(args, xmlNodePtr); const char *task = va_arg(args, const char *); const char *interval_ms_s = va_arg(args, const char *); int rc = va_arg(args, int); uint32_t show_opts = va_arg(args, uint32_t); const char *call_id = crm_element_value(xml_op, PCMK__XA_CALL_ID); char *rc_s = pcmk__itoa(rc); xmlNodePtr node = pcmk__output_create_xml_node(out, "operation_history", "call", call_id, "task", task, "rc", rc_s, "rc_text", services_ocf_exitcode_str(rc), NULL); free(rc_s); if (interval_ms_s && !pcmk__str_eq(interval_ms_s, "0", pcmk__str_casei)) { char *s = crm_strdup_printf("%sms", interval_ms_s); crm_xml_add(node, "interval", s); free(s); } if (pcmk_is_set(show_opts, pcmk_show_timing)) { const char *value = NULL; time_t epoch = 0; if ((crm_element_value_epoch(xml_op, PCMK_XA_LAST_RC_CHANGE, &epoch) == pcmk_ok) && (epoch > 0)) { char *s = pcmk__epoch2str(&epoch, 0); crm_xml_add(node, PCMK_XA_LAST_RC_CHANGE, s); free(s); } value = crm_element_value(xml_op, PCMK_XA_EXEC_TIME); if (value) { char *s = crm_strdup_printf("%sms", value); crm_xml_add(node, PCMK_XA_EXEC_TIME, s); free(s); } value = crm_element_value(xml_op, PCMK_XA_QUEUE_TIME); if (value) { char *s = crm_strdup_printf("%sms", value); crm_xml_add(node, PCMK_XA_QUEUE_TIME, s); free(s); } } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("promotion-score", "pcmk_resource_t *", "pcmk_node_t *", "const char *") static int promotion_score(pcmk__output_t *out, va_list args) { pcmk_resource_t *child_rsc = va_arg(args, pcmk_resource_t *); pcmk_node_t *chosen = va_arg(args, pcmk_node_t *); const char *score = va_arg(args, const char *); out->list_item(out, NULL, "%s promotion score on %s: %s", child_rsc->id, chosen? chosen->details->uname : "none", score); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("promotion-score", "pcmk_resource_t *", "pcmk_node_t *", "const char *") static int promotion_score_xml(pcmk__output_t *out, va_list args) { pcmk_resource_t *child_rsc = va_arg(args, pcmk_resource_t *); pcmk_node_t *chosen = va_arg(args, pcmk_node_t *); const char *score = va_arg(args, const char *); xmlNodePtr node = pcmk__output_create_xml_node(out, "promotion_score", PCMK_XA_ID, child_rsc->id, PCMK_XA_SCORE, score, NULL); if (chosen) { crm_xml_add(node, PCMK_XA_NODE, chosen->details->uname); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("resource-config", "const pcmk_resource_t *", "bool") static int resource_config(pcmk__output_t *out, va_list args) { const pcmk_resource_t *rsc = va_arg(args, const pcmk_resource_t *); bool raw = va_arg(args, int); char *rsc_xml = formatted_xml_buf(rsc, raw); out->output_xml(out, "xml", rsc_xml); free(rsc_xml); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("resource-config", "const pcmk_resource_t *", "bool") static int resource_config_text(pcmk__output_t *out, va_list args) { const pcmk_resource_t *rsc = va_arg(args, const pcmk_resource_t *); bool raw = va_arg(args, int); char *rsc_xml = formatted_xml_buf(rsc, raw); pcmk__formatted_printf(out, "Resource XML:\n"); out->output_xml(out, "xml", rsc_xml); free(rsc_xml); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("resource-history", "pcmk_resource_t *", "const char *", "bool", "int", "time_t", "bool") static int resource_history_text(pcmk__output_t *out, va_list args) { pcmk_resource_t *rsc = va_arg(args, pcmk_resource_t *); const char *rsc_id = va_arg(args, const char *); bool all = va_arg(args, int); int failcount = va_arg(args, int); time_t last_failure = va_arg(args, time_t); bool as_header = va_arg(args, int); char *buf = resource_history_string(rsc, rsc_id, all, failcount, last_failure); if (as_header) { out->begin_list(out, NULL, NULL, "%s", buf); } else { out->list_item(out, NULL, "%s", buf); } free(buf); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("resource-history", "pcmk_resource_t *", "const char *", "bool", "int", "time_t", "bool") static int resource_history_xml(pcmk__output_t *out, va_list args) { pcmk_resource_t *rsc = va_arg(args, pcmk_resource_t *); const char *rsc_id = va_arg(args, const char *); bool all = va_arg(args, int); int failcount = va_arg(args, int); time_t last_failure = va_arg(args, time_t); bool as_header = va_arg(args, int); xmlNodePtr node = pcmk__output_xml_create_parent(out, "resource_history", PCMK_XA_ID, rsc_id, NULL); if (rsc == NULL) { pcmk__xe_set_bool_attr(node, "orphan", true); } else if (all || failcount || last_failure > 0) { char *migration_s = pcmk__itoa(rsc->migration_threshold); pcmk__xe_set_props(node, "orphan", PCMK_VALUE_FALSE, PCMK_META_MIGRATION_THRESHOLD, migration_s, NULL); free(migration_s); if (failcount > 0) { char *s = pcmk__itoa(failcount); crm_xml_add(node, PCMK__FAIL_COUNT_PREFIX, s); free(s); } if (last_failure > 0) { char *s = pcmk__epoch2str(&last_failure, 0); crm_xml_add(node, PCMK__LAST_FAILURE_PREFIX, s); free(s); } } if (!as_header) { pcmk__output_xml_pop_parent(out); } return pcmk_rc_ok; } static void print_resource_header(pcmk__output_t *out, uint32_t show_opts) { if (pcmk_is_set(show_opts, pcmk_show_rscs_by_node)) { /* Active resources have already been printed by node */ out->begin_list(out, NULL, NULL, "Inactive Resources"); } else if (pcmk_is_set(show_opts, pcmk_show_inactive_rscs)) { out->begin_list(out, NULL, NULL, "Full List of Resources"); } else { out->begin_list(out, NULL, NULL, "Active Resources"); } } PCMK__OUTPUT_ARGS("resource-list", "pcmk_scheduler_t *", "uint32_t", "bool", "GList *", "GList *", "bool") static int resource_list(pcmk__output_t *out, va_list args) { pcmk_scheduler_t *scheduler = va_arg(args, pcmk_scheduler_t *); uint32_t show_opts = va_arg(args, uint32_t); bool print_summary = va_arg(args, int); GList *only_node = va_arg(args, GList *); GList *only_rsc = va_arg(args, GList *); bool print_spacer = va_arg(args, int); GList *rsc_iter; int rc = pcmk_rc_no_output; bool printed_header = false; /* If we already showed active resources by node, and * we're not showing inactive resources, we have nothing to do */ if (pcmk_is_set(show_opts, pcmk_show_rscs_by_node) && !pcmk_is_set(show_opts, pcmk_show_inactive_rscs)) { return rc; } /* If we haven't already printed resources grouped by node, * and brief output was requested, print resource summary */ if (pcmk_is_set(show_opts, pcmk_show_brief) && !pcmk_is_set(show_opts, pcmk_show_rscs_by_node)) { GList *rscs = pe__filter_rsc_list(scheduler->resources, only_rsc); PCMK__OUTPUT_SPACER_IF(out, print_spacer); print_resource_header(out, show_opts); printed_header = true; rc = pe__rscs_brief_output(out, rscs, show_opts); g_list_free(rscs); } /* For each resource, display it if appropriate */ for (rsc_iter = scheduler->resources; rsc_iter != NULL; rsc_iter = rsc_iter->next) { pcmk_resource_t *rsc = (pcmk_resource_t *) rsc_iter->data; int x; /* Complex resources may have some sub-resources active and some inactive */ gboolean is_active = rsc->fns->active(rsc, TRUE); gboolean partially_active = rsc->fns->active(rsc, FALSE); /* Skip inactive orphans (deleted but still in CIB) */ if (pcmk_is_set(rsc->flags, pcmk_rsc_removed) && !is_active) { continue; /* Skip active resources if we already displayed them by node */ } else if (pcmk_is_set(show_opts, pcmk_show_rscs_by_node)) { if (is_active) { continue; } /* Skip primitives already counted in a brief summary */ } else if (pcmk_is_set(show_opts, pcmk_show_brief) && (rsc->variant == pcmk_rsc_variant_primitive)) { continue; /* Skip resources that aren't at least partially active, * unless we're displaying inactive resources */ } else if (!partially_active && !pcmk_is_set(show_opts, pcmk_show_inactive_rscs)) { continue; } else if (partially_active && !pe__rsc_running_on_any(rsc, only_node)) { continue; } if (!printed_header) { PCMK__OUTPUT_SPACER_IF(out, print_spacer); print_resource_header(out, show_opts); printed_header = true; } /* Print this resource */ x = out->message(out, crm_map_element_name(rsc->xml), show_opts, rsc, only_node, only_rsc); if (x == pcmk_rc_ok) { rc = pcmk_rc_ok; } } if (print_summary && rc != pcmk_rc_ok) { if (!printed_header) { PCMK__OUTPUT_SPACER_IF(out, print_spacer); print_resource_header(out, show_opts); printed_header = true; } if (pcmk_is_set(show_opts, pcmk_show_rscs_by_node)) { out->list_item(out, NULL, "No inactive resources"); } else if (pcmk_is_set(show_opts, pcmk_show_inactive_rscs)) { out->list_item(out, NULL, "No resources"); } else { out->list_item(out, NULL, "No active resources"); } } if (printed_header) { out->end_list(out); } return rc; } PCMK__OUTPUT_ARGS("resource-operation-list", "pcmk_scheduler_t *", "pcmk_resource_t *", "pcmk_node_t *", "GList *", "uint32_t") static int resource_operation_list(pcmk__output_t *out, va_list args) { pcmk_scheduler_t *scheduler G_GNUC_UNUSED = va_arg(args, pcmk_scheduler_t *); pcmk_resource_t *rsc = va_arg(args, pcmk_resource_t *); pcmk_node_t *node = va_arg(args, pcmk_node_t *); GList *op_list = va_arg(args, GList *); uint32_t show_opts = va_arg(args, uint32_t); GList *gIter = NULL; int rc = pcmk_rc_no_output; /* Print each operation */ for (gIter = op_list; gIter != NULL; gIter = gIter->next) { xmlNode *xml_op = (xmlNode *) gIter->data; const char *task = crm_element_value(xml_op, PCMK_XA_OPERATION); const char *interval_ms_s = crm_element_value(xml_op, PCMK_META_INTERVAL); const char *op_rc = crm_element_value(xml_op, PCMK__XA_RC_CODE); int op_rc_i; pcmk__scan_min_int(op_rc, &op_rc_i, 0); /* Display 0-interval monitors as "probe" */ if (pcmk__str_eq(task, PCMK_ACTION_MONITOR, pcmk__str_casei) && pcmk__str_eq(interval_ms_s, "0", pcmk__str_null_matches | pcmk__str_casei)) { task = "probe"; } /* If this is the first printed operation, print heading for resource */ if (rc == pcmk_rc_no_output) { time_t last_failure = 0; int failcount = pe_get_failcount(node, rsc, &last_failure, pcmk__fc_default, NULL); out->message(out, "resource-history", rsc, rsc_printable_id(rsc), true, failcount, last_failure, true); rc = pcmk_rc_ok; } /* Print the operation */ out->message(out, "op-history", xml_op, task, interval_ms_s, op_rc_i, show_opts); } /* Free the list we created (no need to free the individual items) */ g_list_free(op_list); PCMK__OUTPUT_LIST_FOOTER(out, rc); return rc; } PCMK__OUTPUT_ARGS("resource-util", "pcmk_resource_t *", "pcmk_node_t *", "const char *") static int resource_util(pcmk__output_t *out, va_list args) { pcmk_resource_t *rsc = va_arg(args, pcmk_resource_t *); pcmk_node_t *node = va_arg(args, pcmk_node_t *); const char *fn = va_arg(args, const char *); char *dump_text = crm_strdup_printf("%s: %s utilization on %s:", fn, rsc->id, pcmk__node_name(node)); g_hash_table_foreach(rsc->utilization, append_dump_text, &dump_text); out->list_item(out, NULL, "%s", dump_text); free(dump_text); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("resource-util", "pcmk_resource_t *", "pcmk_node_t *", "const char *") static int resource_util_xml(pcmk__output_t *out, va_list args) { pcmk_resource_t *rsc = va_arg(args, pcmk_resource_t *); pcmk_node_t *node = va_arg(args, pcmk_node_t *); const char *uname = node->details->uname; const char *fn = va_arg(args, const char *); xmlNodePtr xml_node = pcmk__output_create_xml_node(out, PCMK_XE_UTILIZATION, "resource", rsc->id, PCMK_XA_NODE, uname, "function", fn, NULL); g_hash_table_foreach(rsc->utilization, add_dump_node, xml_node); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("ticket", "pcmk_ticket_t *") static int ticket_html(pcmk__output_t *out, va_list args) { pcmk_ticket_t *ticket = va_arg(args, pcmk_ticket_t *); if (ticket->last_granted > -1) { char *epoch_str = pcmk__epoch2str(&(ticket->last_granted), 0); out->list_item(out, NULL, "%s:\t%s%s %s=\"%s\"", ticket->id, ticket->granted ? "granted" : "revoked", ticket->standby ? " [standby]" : "", "last-granted", pcmk__s(epoch_str, "")); free(epoch_str); } else { out->list_item(out, NULL, "%s:\t%s%s", ticket->id, ticket->granted ? "granted" : "revoked", ticket->standby ? " [standby]" : ""); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("ticket", "pcmk_ticket_t *") static int ticket_text(pcmk__output_t *out, va_list args) { pcmk_ticket_t *ticket = va_arg(args, pcmk_ticket_t *); if (ticket->last_granted > -1) { char *epoch_str = pcmk__epoch2str(&(ticket->last_granted), 0); out->list_item(out, ticket->id, "%s%s %s=\"%s\"", ticket->granted ? "granted" : "revoked", ticket->standby ? " [standby]" : "", "last-granted", pcmk__s(epoch_str, "")); free(epoch_str); } else { out->list_item(out, ticket->id, "%s%s", ticket->granted ? "granted" : "revoked", ticket->standby ? " [standby]" : ""); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("ticket", "pcmk_ticket_t *") static int ticket_xml(pcmk__output_t *out, va_list args) { pcmk_ticket_t *ticket = va_arg(args, pcmk_ticket_t *); xmlNodePtr node = NULL; node = pcmk__output_create_xml_node(out, "ticket", PCMK_XA_ID, ticket->id, "status", ticket->granted ? "granted" : "revoked", "standby", pcmk__btoa(ticket->standby), NULL); if (ticket->last_granted > -1) { char *buf = pcmk__epoch2str(&ticket->last_granted, 0); crm_xml_add(node, "last-granted", buf); free(buf); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("ticket-list", "pcmk_scheduler_t *", "bool") static int ticket_list(pcmk__output_t *out, va_list args) { pcmk_scheduler_t *scheduler = va_arg(args, pcmk_scheduler_t *); bool print_spacer = va_arg(args, int); GHashTableIter iter; gpointer key, value; if (g_hash_table_size(scheduler->tickets) == 0) { return pcmk_rc_no_output; } PCMK__OUTPUT_SPACER_IF(out, print_spacer); /* Print section heading */ out->begin_list(out, NULL, NULL, "Tickets"); /* Print each ticket */ g_hash_table_iter_init(&iter, scheduler->tickets); while (g_hash_table_iter_next(&iter, &key, &value)) { pcmk_ticket_t *ticket = (pcmk_ticket_t *) value; out->message(out, "ticket", ticket); } /* Close section */ out->end_list(out); return pcmk_rc_ok; } static pcmk__message_entry_t fmt_functions[] = { { "ban", "default", ban_text }, { "ban", "html", ban_html }, { "ban", "xml", ban_xml }, { "ban-list", "default", ban_list }, { "bundle", "default", pe__bundle_text }, { "bundle", "xml", pe__bundle_xml }, { "bundle", "html", pe__bundle_html }, { "clone", "default", pe__clone_default }, { "clone", "xml", pe__clone_xml }, { "cluster-counts", "default", cluster_counts_text }, { "cluster-counts", "html", cluster_counts_html }, { "cluster-counts", "xml", cluster_counts_xml }, { "cluster-dc", "default", cluster_dc_text }, { "cluster-dc", "html", cluster_dc_html }, { "cluster-dc", "xml", cluster_dc_xml }, { "cluster-options", "default", cluster_options_text }, { "cluster-options", "html", cluster_options_html }, { "cluster-options", "log", cluster_options_log }, { "cluster-options", "xml", cluster_options_xml }, { "cluster-summary", "default", cluster_summary }, { "cluster-summary", "html", cluster_summary_html }, { "cluster-stack", "default", cluster_stack_text }, { "cluster-stack", "html", cluster_stack_html }, { "cluster-stack", "xml", cluster_stack_xml }, { "cluster-times", "default", cluster_times_text }, { "cluster-times", "html", cluster_times_html }, { "cluster-times", "xml", cluster_times_xml }, { "failed-action", "default", failed_action_default }, { "failed-action", "xml", failed_action_xml }, { "failed-action-list", "default", failed_action_list }, { "group", "default", pe__group_default}, { "group", "xml", pe__group_xml }, { "maint-mode", "text", cluster_maint_mode_text }, { "node", "default", node_text }, { "node", "html", node_html }, { "node", "xml", node_xml }, { "node-and-op", "default", node_and_op }, { "node-and-op", "xml", node_and_op_xml }, { "node-capacity", "default", node_capacity }, { "node-capacity", "xml", node_capacity_xml }, { "node-history-list", "default", node_history_list }, { "node-list", "default", node_list_text }, { "node-list", "html", node_list_html }, { "node-list", "xml", node_list_xml }, { "node-weight", "default", node_weight }, { "node-weight", "xml", node_weight_xml }, { "node-attribute", "default", node_attribute_text }, { "node-attribute", "html", node_attribute_html }, { "node-attribute", "xml", node_attribute_xml }, { "node-attribute-list", "default", node_attribute_list }, { "node-summary", "default", node_summary }, { "op-history", "default", op_history_text }, { "op-history", "xml", op_history_xml }, { "primitive", "default", pe__resource_text }, { "primitive", "xml", pe__resource_xml }, { "primitive", "html", pe__resource_html }, { "promotion-score", "default", promotion_score }, { "promotion-score", "xml", promotion_score_xml }, { "resource-config", "default", resource_config }, { "resource-config", "text", resource_config_text }, { "resource-history", "default", resource_history_text }, { "resource-history", "xml", resource_history_xml }, { "resource-list", "default", resource_list }, { "resource-operation-list", "default", resource_operation_list }, { "resource-util", "default", resource_util }, { "resource-util", "xml", resource_util_xml }, { "ticket", "default", ticket_text }, { "ticket", "html", ticket_html }, { "ticket", "xml", ticket_xml }, { "ticket-list", "default", ticket_list }, { NULL, NULL, NULL } }; void pe__register_messages(pcmk__output_t *out) { pcmk__register_messages(out, fmt_functions); } diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c index 0c4d89ce23..ed2a0d22c2 100644 --- a/lib/pengine/unpack.c +++ b/lib/pengine/unpack.c @@ -1,5141 +1,5141 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include CRM_TRACE_INIT_DATA(pe_status); // A (parsed) resource action history entry struct action_history { pcmk_resource_t *rsc; // Resource that history is for pcmk_node_t *node; // Node that history is for xmlNode *xml; // History entry XML // Parsed from entry XML const char *id; // XML ID of history entry const char *key; // Operation key of action const char *task; // Action name const char *exit_reason; // Exit reason given for result guint interval_ms; // Action interval int call_id; // Call ID of action int expected_exit_status; // Expected exit status of action int exit_status; // Actual exit status of action int execution_status; // Execution status of action }; /* This uses pcmk__set_flags_as()/pcmk__clear_flags_as() directly rather than * use pcmk__set_scheduler_flags()/pcmk__clear_scheduler_flags() so that the * flag is stringified more readably in log messages. */ #define set_config_flag(scheduler, option, flag) do { \ const char *scf_value = pe_pref((scheduler)->config_hash, (option)); \ if (scf_value != NULL) { \ if (crm_is_true(scf_value)) { \ (scheduler)->flags = pcmk__set_flags_as(__func__, __LINE__, \ LOG_TRACE, "Scheduler", \ crm_system_name, (scheduler)->flags, \ (flag), #flag); \ } else { \ (scheduler)->flags = pcmk__clear_flags_as(__func__, __LINE__, \ LOG_TRACE, "Scheduler", \ crm_system_name, (scheduler)->flags, \ (flag), #flag); \ } \ } \ } while(0) static void unpack_rsc_op(pcmk_resource_t *rsc, pcmk_node_t *node, xmlNode *xml_op, xmlNode **last_failure, enum action_fail_response *failed); static void determine_remote_online_status(pcmk_scheduler_t *scheduler, pcmk_node_t *this_node); static void add_node_attrs(const xmlNode *xml_obj, pcmk_node_t *node, bool overwrite, pcmk_scheduler_t *scheduler); static void determine_online_status(const xmlNode *node_state, pcmk_node_t *this_node, pcmk_scheduler_t *scheduler); static void unpack_node_lrm(pcmk_node_t *node, const xmlNode *xml, pcmk_scheduler_t *scheduler); static gboolean is_dangling_guest_node(pcmk_node_t *node) { /* we are looking for a remote-node that was supposed to be mapped to a * container resource, but all traces of that container have disappeared * from both the config and the status section. */ if (pe__is_guest_or_remote_node(node) && node->details->remote_rsc && node->details->remote_rsc->container == NULL && pcmk_is_set(node->details->remote_rsc->flags, pcmk_rsc_removed_filler)) { return TRUE; } return FALSE; } /*! * \brief Schedule a fence action for a node * * \param[in,out] scheduler Scheduler data * \param[in,out] node Node to fence * \param[in] reason Text description of why fencing is needed * \param[in] priority_delay Whether to consider * \c PCMK_OPT_PRIORITY_FENCING_DELAY */ void pe_fence_node(pcmk_scheduler_t *scheduler, pcmk_node_t *node, const char *reason, bool priority_delay) { CRM_CHECK(node, return); /* A guest node is fenced by marking its container as failed */ if (pe__is_guest_node(node)) { pcmk_resource_t *rsc = node->details->remote_rsc->container; if (!pcmk_is_set(rsc->flags, pcmk_rsc_failed)) { if (!pcmk_is_set(rsc->flags, pcmk_rsc_managed)) { crm_notice("Not fencing guest node %s " "(otherwise would because %s): " "its guest resource %s is unmanaged", pcmk__node_name(node), reason, rsc->id); } else { pcmk__sched_warn("Guest node %s will be fenced " "(by recovering its guest resource %s): %s", pcmk__node_name(node), rsc->id, reason); /* We don't mark the node as unclean because that would prevent the * node from running resources. We want to allow it to run resources * in this transition if the recovery succeeds. */ node->details->remote_requires_reset = TRUE; pcmk__set_rsc_flags(rsc, pcmk_rsc_failed|pcmk_rsc_stop_if_failed); } } } else if (is_dangling_guest_node(node)) { crm_info("Cleaning up dangling connection for guest node %s: " "fencing was already done because %s, " "and guest resource no longer exists", pcmk__node_name(node), reason); pcmk__set_rsc_flags(node->details->remote_rsc, pcmk_rsc_failed|pcmk_rsc_stop_if_failed); } else if (pe__is_remote_node(node)) { pcmk_resource_t *rsc = node->details->remote_rsc; if ((rsc != NULL) && !pcmk_is_set(rsc->flags, pcmk_rsc_managed)) { crm_notice("Not fencing remote node %s " "(otherwise would because %s): connection is unmanaged", pcmk__node_name(node), reason); } else if(node->details->remote_requires_reset == FALSE) { node->details->remote_requires_reset = TRUE; pcmk__sched_warn("Remote node %s %s: %s", pcmk__node_name(node), pe_can_fence(scheduler, node)? "will be fenced" : "is unclean", reason); } node->details->unclean = TRUE; // No need to apply PCMK_OPT_PRIORITY_FENCING_DELAY for remote nodes pe_fence_op(node, NULL, TRUE, reason, FALSE, scheduler); } else if (node->details->unclean) { crm_trace("Cluster node %s %s because %s", pcmk__node_name(node), pe_can_fence(scheduler, node)? "would also be fenced" : "also is unclean", reason); } else { pcmk__sched_warn("Cluster node %s %s: %s", pcmk__node_name(node), pe_can_fence(scheduler, node)? "will be fenced" : "is unclean", reason); node->details->unclean = TRUE; pe_fence_op(node, NULL, TRUE, reason, priority_delay, scheduler); } } // @TODO xpaths can't handle templates, rules, or id-refs // nvpair with provides or requires set to unfencing #define XPATH_UNFENCING_NVPAIR PCMK_XE_NVPAIR \ "[(@" PCMK_XA_NAME "='" PCMK_STONITH_PROVIDES "'" \ "or @" PCMK_XA_NAME "='" PCMK_META_REQUIRES "') " \ "and @" PCMK_XA_VALUE "='" PCMK__VALUE_UNFENCING "']" // unfencing in rsc_defaults or any resource #define XPATH_ENABLE_UNFENCING \ "/" PCMK_XE_CIB "/" PCMK_XE_CONFIGURATION "/" PCMK_XE_RESOURCES \ "//" PCMK_XE_META_ATTRIBUTES "/" XPATH_UNFENCING_NVPAIR \ "|/" PCMK_XE_CIB "/" PCMK_XE_CONFIGURATION "/" PCMK_XE_RSC_DEFAULTS \ "/" PCMK_XE_META_ATTRIBUTES "/" XPATH_UNFENCING_NVPAIR static void set_if_xpath(uint64_t flag, const char *xpath, pcmk_scheduler_t *scheduler) { xmlXPathObjectPtr result = NULL; if (!pcmk_is_set(scheduler->flags, flag)) { result = xpath_search(scheduler->input, xpath); if (result && (numXpathResults(result) > 0)) { pcmk__set_scheduler_flags(scheduler, flag); } freeXpathObject(result); } } gboolean unpack_config(xmlNode *config, pcmk_scheduler_t *scheduler) { const char *value = NULL; guint interval_ms = 0U; GHashTable *config_hash = pcmk__strkey_table(free, free); pe_rule_eval_data_t rule_data = { .node_hash = NULL, .role = pcmk_role_unknown, .now = scheduler->now, .match_data = NULL, .rsc_data = NULL, .op_data = NULL }; scheduler->config_hash = config_hash; pe__unpack_dataset_nvpairs(config, PCMK_XE_CLUSTER_PROPERTY_SET, &rule_data, config_hash, CIB_OPTIONS_FIRST, FALSE, scheduler); verify_pe_options(scheduler->config_hash); set_config_flag(scheduler, PCMK_OPT_ENABLE_STARTUP_PROBES, pcmk_sched_probe_resources); if (!pcmk_is_set(scheduler->flags, pcmk_sched_probe_resources)) { crm_info("Startup probes: disabled (dangerous)"); } value = pe_pref(scheduler->config_hash, PCMK_OPT_HAVE_WATCHDOG); if (value && crm_is_true(value)) { crm_info("Watchdog-based self-fencing will be performed via SBD if " "fencing is required and " PCMK_OPT_STONITH_WATCHDOG_TIMEOUT " is nonzero"); pcmk__set_scheduler_flags(scheduler, pcmk_sched_have_fencing); } /* Set certain flags via xpath here, so they can be used before the relevant * configuration sections are unpacked. */ set_if_xpath(pcmk_sched_enable_unfencing, XPATH_ENABLE_UNFENCING, scheduler); value = pe_pref(scheduler->config_hash, PCMK_OPT_STONITH_TIMEOUT); pcmk_parse_interval_spec(value, &interval_ms); if (interval_ms >= INT_MAX) { scheduler->stonith_timeout = INT_MAX; } else { scheduler->stonith_timeout = (int) interval_ms; } crm_debug("STONITH timeout: %d", scheduler->stonith_timeout); set_config_flag(scheduler, PCMK_OPT_STONITH_ENABLED, pcmk_sched_fencing_enabled); if (pcmk_is_set(scheduler->flags, pcmk_sched_fencing_enabled)) { crm_debug("STONITH of failed nodes is enabled"); } else { crm_debug("STONITH of failed nodes is disabled"); } scheduler->stonith_action = pe_pref(scheduler->config_hash, PCMK_OPT_STONITH_ACTION); if (!strcmp(scheduler->stonith_action, "poweroff")) { pcmk__warn_once(pcmk__wo_poweroff, "Support for " PCMK_OPT_STONITH_ACTION " of " "'poweroff' is deprecated and will be removed in a " "future release (use 'off' instead)"); scheduler->stonith_action = PCMK_ACTION_OFF; } crm_trace("STONITH will %s nodes", scheduler->stonith_action); set_config_flag(scheduler, PCMK_OPT_CONCURRENT_FENCING, pcmk_sched_concurrent_fencing); if (pcmk_is_set(scheduler->flags, pcmk_sched_concurrent_fencing)) { crm_debug("Concurrent fencing is enabled"); } else { crm_debug("Concurrent fencing is disabled"); } value = pe_pref(scheduler->config_hash, PCMK_OPT_PRIORITY_FENCING_DELAY); if (value) { pcmk_parse_interval_spec(value, &interval_ms); scheduler->priority_fencing_delay = (int) (interval_ms / 1000); crm_trace("Priority fencing delay is %ds", scheduler->priority_fencing_delay); } set_config_flag(scheduler, PCMK_OPT_STOP_ALL_RESOURCES, pcmk_sched_stop_all); crm_debug("Stop all active resources: %s", pcmk__flag_text(scheduler->flags, pcmk_sched_stop_all)); set_config_flag(scheduler, PCMK_OPT_SYMMETRIC_CLUSTER, pcmk_sched_symmetric_cluster); if (pcmk_is_set(scheduler->flags, pcmk_sched_symmetric_cluster)) { crm_debug("Cluster is symmetric" " - resources can run anywhere by default"); } value = pe_pref(scheduler->config_hash, PCMK_OPT_NO_QUORUM_POLICY); if (pcmk__str_eq(value, "ignore", pcmk__str_casei)) { scheduler->no_quorum_policy = pcmk_no_quorum_ignore; } else if (pcmk__str_eq(value, "freeze", pcmk__str_casei)) { scheduler->no_quorum_policy = pcmk_no_quorum_freeze; } else if (pcmk__str_eq(value, "demote", pcmk__str_casei)) { scheduler->no_quorum_policy = pcmk_no_quorum_demote; } else if (pcmk__str_eq(value, "suicide", pcmk__str_casei)) { if (pcmk_is_set(scheduler->flags, pcmk_sched_fencing_enabled)) { int do_panic = 0; crm_element_value_int(scheduler->input, PCMK_XA_NO_QUORUM_PANIC, &do_panic); if (do_panic || pcmk_is_set(scheduler->flags, pcmk_sched_quorate)) { scheduler->no_quorum_policy = pcmk_no_quorum_fence; } else { crm_notice("Resetting " PCMK_OPT_NO_QUORUM_POLICY " to 'stop': cluster has never had quorum"); scheduler->no_quorum_policy = pcmk_no_quorum_stop; } } else { pcmk__config_err("Resetting " PCMK_OPT_NO_QUORUM_POLICY " to 'stop' because fencing is disabled"); scheduler->no_quorum_policy = pcmk_no_quorum_stop; } } else { scheduler->no_quorum_policy = pcmk_no_quorum_stop; } switch (scheduler->no_quorum_policy) { case pcmk_no_quorum_freeze: crm_debug("On loss of quorum: Freeze resources"); break; case pcmk_no_quorum_stop: crm_debug("On loss of quorum: Stop ALL resources"); break; case pcmk_no_quorum_demote: crm_debug("On loss of quorum: " "Demote promotable resources and stop other resources"); break; case pcmk_no_quorum_fence: crm_notice("On loss of quorum: Fence all remaining nodes"); break; case pcmk_no_quorum_ignore: crm_notice("On loss of quorum: Ignore"); break; } set_config_flag(scheduler, PCMK_OPT_STOP_ORPHAN_RESOURCES, pcmk_sched_stop_removed_resources); if (pcmk_is_set(scheduler->flags, pcmk_sched_stop_removed_resources)) { crm_trace("Orphan resources are stopped"); } else { crm_trace("Orphan resources are ignored"); } set_config_flag(scheduler, PCMK_OPT_STOP_ORPHAN_ACTIONS, pcmk_sched_cancel_removed_actions); if (pcmk_is_set(scheduler->flags, pcmk_sched_cancel_removed_actions)) { crm_trace("Orphan resource actions are stopped"); } else { crm_trace("Orphan resource actions are ignored"); } value = pe_pref(scheduler->config_hash, PCMK__OPT_REMOVE_AFTER_STOP); if (value != NULL) { if (crm_is_true(value)) { pcmk__set_scheduler_flags(scheduler, pcmk_sched_remove_after_stop); #ifndef PCMK__COMPAT_2_0 pcmk__warn_once(pcmk__wo_remove_after, "Support for the " PCMK__OPT_REMOVE_AFTER_STOP " cluster property is deprecated and will be " "removed in a future release"); #endif } else { pcmk__clear_scheduler_flags(scheduler, pcmk_sched_remove_after_stop); } } set_config_flag(scheduler, PCMK_OPT_MAINTENANCE_MODE, pcmk_sched_in_maintenance); crm_trace("Maintenance mode: %s", pcmk__flag_text(scheduler->flags, pcmk_sched_in_maintenance)); set_config_flag(scheduler, PCMK_OPT_START_FAILURE_IS_FATAL, pcmk_sched_start_failure_fatal); if (pcmk_is_set(scheduler->flags, pcmk_sched_start_failure_fatal)) { crm_trace("Start failures are always fatal"); } else { crm_trace("Start failures are handled by failcount"); } if (pcmk_is_set(scheduler->flags, pcmk_sched_fencing_enabled)) { set_config_flag(scheduler, PCMK_OPT_STARTUP_FENCING, pcmk_sched_startup_fencing); } if (pcmk_is_set(scheduler->flags, pcmk_sched_startup_fencing)) { crm_trace("Unseen nodes will be fenced"); } else { pcmk__warn_once(pcmk__wo_blind, "Blind faith: not fencing unseen nodes"); } pe__unpack_node_health_scores(scheduler); scheduler->placement_strategy = pe_pref(scheduler->config_hash, PCMK_OPT_PLACEMENT_STRATEGY); crm_trace("Placement strategy: %s", scheduler->placement_strategy); set_config_flag(scheduler, PCMK_OPT_SHUTDOWN_LOCK, pcmk_sched_shutdown_lock); if (pcmk_is_set(scheduler->flags, pcmk_sched_shutdown_lock)) { value = pe_pref(scheduler->config_hash, PCMK_OPT_SHUTDOWN_LOCK_LIMIT); pcmk_parse_interval_spec(value, &(scheduler->shutdown_lock)); scheduler->shutdown_lock /= 1000; crm_trace("Resources will be locked to nodes that were cleanly " "shut down (locks expire after %s)", pcmk__readable_interval(scheduler->shutdown_lock)); } else { crm_trace("Resources will not be locked to nodes that were cleanly " "shut down"); } value = pe_pref(scheduler->config_hash, PCMK_OPT_NODE_PENDING_TIMEOUT); pcmk_parse_interval_spec(value, &(scheduler->node_pending_timeout)); scheduler->node_pending_timeout /= 1000; if (scheduler->node_pending_timeout == 0) { crm_trace("Do not fence pending nodes"); } else { crm_trace("Fence pending nodes after %s", pcmk__readable_interval(scheduler->node_pending_timeout * 1000)); } return TRUE; } pcmk_node_t * pe_create_node(const char *id, const char *uname, const char *type, const char *score, pcmk_scheduler_t *scheduler) { pcmk_node_t *new_node = NULL; if (pe_find_node(scheduler->nodes, uname) != NULL) { pcmk__config_warn("More than one node entry has name '%s'", uname); } new_node = calloc(1, sizeof(pcmk_node_t)); if (new_node == NULL) { pcmk__sched_err("Could not allocate memory for node %s", uname); return NULL; } new_node->weight = char2score(score); new_node->details = calloc(1, sizeof(struct pe_node_shared_s)); if (new_node->details == NULL) { free(new_node); pcmk__sched_err("Could not allocate memory for node %s", uname); return NULL; } crm_trace("Creating node for entry %s/%s", uname, id); new_node->details->id = id; new_node->details->uname = uname; new_node->details->online = FALSE; new_node->details->shutdown = FALSE; new_node->details->rsc_discovery_enabled = TRUE; new_node->details->running_rsc = NULL; new_node->details->data_set = scheduler; if (pcmk__str_eq(type, "member", pcmk__str_null_matches | pcmk__str_casei)) { new_node->details->type = pcmk_node_variant_cluster; } else if (pcmk__str_eq(type, "remote", pcmk__str_casei)) { new_node->details->type = pcmk_node_variant_remote; pcmk__set_scheduler_flags(scheduler, pcmk_sched_have_remote_nodes); } else { /* @COMPAT 'ping' is the default for backward compatibility, but it * should be changed to 'member' at a compatibility break */ if (!pcmk__str_eq(type, "ping", pcmk__str_casei)) { pcmk__config_warn("Node %s has unrecognized type '%s', " "assuming 'ping'", pcmk__s(uname, "without name"), type); } pcmk__warn_once(pcmk__wo_ping_node, "Support for nodes of type 'ping' (such as %s) is " "deprecated and will be removed in a future release", pcmk__s(uname, "unnamed node")); new_node->details->type = node_ping; } new_node->details->attrs = pcmk__strkey_table(free, free); if (pe__is_guest_or_remote_node(new_node)) { g_hash_table_insert(new_node->details->attrs, strdup(CRM_ATTR_KIND), strdup("remote")); } else { g_hash_table_insert(new_node->details->attrs, strdup(CRM_ATTR_KIND), strdup("cluster")); } new_node->details->utilization = pcmk__strkey_table(free, free); new_node->details->digest_cache = pcmk__strkey_table(free, pe__free_digests); scheduler->nodes = g_list_insert_sorted(scheduler->nodes, new_node, pe__cmp_node_name); return new_node; } static const char * expand_remote_rsc_meta(xmlNode *xml_obj, xmlNode *parent, pcmk_scheduler_t *data) { xmlNode *attr_set = NULL; xmlNode *attr = NULL; const char *container_id = ID(xml_obj); const char *remote_name = NULL; const char *remote_server = NULL; const char *remote_port = NULL; const char *connect_timeout = "60s"; const char *remote_allow_migrate=NULL; const char *is_managed = NULL; for (attr_set = pcmk__xe_first_child(xml_obj); attr_set != NULL; attr_set = pcmk__xe_next(attr_set)) { if (!pcmk__str_eq((const char *) attr_set->name, PCMK_XE_META_ATTRIBUTES, pcmk__str_casei)) { continue; } for (attr = pcmk__xe_first_child(attr_set); attr != NULL; attr = pcmk__xe_next(attr)) { const char *value = crm_element_value(attr, PCMK_XA_VALUE); const char *name = crm_element_value(attr, PCMK_XA_NAME); if (name == NULL) { // Sanity continue; } if (strcmp(name, PCMK_META_REMOTE_NODE) == 0) { remote_name = value; } else if (strcmp(name, PCMK_META_REMOTE_ADDR) == 0) { remote_server = value; } else if (strcmp(name, PCMK_META_REMOTE_PORT) == 0) { remote_port = value; } else if (strcmp(name, PCMK_META_REMOTE_CONNECT_TIMEOUT) == 0) { connect_timeout = value; } else if (strcmp(name, PCMK_META_REMOTE_ALLOW_MIGRATE) == 0) { remote_allow_migrate = value; } else if (strcmp(name, PCMK_META_IS_MANAGED) == 0) { is_managed = value; } } } if (remote_name == NULL) { return NULL; } if (pe_find_resource(data->resources, remote_name) != NULL) { return NULL; } pe_create_remote_xml(parent, remote_name, container_id, remote_allow_migrate, is_managed, connect_timeout, remote_server, remote_port); return remote_name; } static void handle_startup_fencing(pcmk_scheduler_t *scheduler, pcmk_node_t *new_node) { if ((new_node->details->type == pcmk_node_variant_remote) && (new_node->details->remote_rsc == NULL)) { /* Ignore fencing for remote nodes that don't have a connection resource * associated with them. This happens when remote node entries get left * in the nodes section after the connection resource is removed. */ return; } if (pcmk_is_set(scheduler->flags, pcmk_sched_startup_fencing)) { // All nodes are unclean until we've seen their status entry new_node->details->unclean = TRUE; } else { // Blind faith ... new_node->details->unclean = FALSE; } /* We need to be able to determine if a node's status section * exists or not separate from whether the node is unclean. */ new_node->details->unseen = TRUE; } gboolean unpack_nodes(xmlNode *xml_nodes, pcmk_scheduler_t *scheduler) { xmlNode *xml_obj = NULL; pcmk_node_t *new_node = NULL; const char *id = NULL; const char *uname = NULL; const char *type = NULL; const char *score = NULL; for (xml_obj = pcmk__xe_first_child(xml_nodes); xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) { if (pcmk__str_eq((const char *) xml_obj->name, PCMK_XE_NODE, pcmk__str_none)) { new_node = NULL; id = crm_element_value(xml_obj, PCMK_XA_ID); uname = crm_element_value(xml_obj, PCMK_XA_UNAME); type = crm_element_value(xml_obj, PCMK_XA_TYPE); score = crm_element_value(xml_obj, PCMK_XA_SCORE); crm_trace("Processing node %s/%s", uname, id); if (id == NULL) { pcmk__config_err("Ignoring <" PCMK_XE_NODE "> entry in configuration without id"); continue; } new_node = pe_create_node(id, uname, type, score, scheduler); if (new_node == NULL) { return FALSE; } handle_startup_fencing(scheduler, new_node); add_node_attrs(xml_obj, new_node, FALSE, scheduler); crm_trace("Done with node %s", crm_element_value(xml_obj, PCMK_XA_UNAME)); } } if (scheduler->localhost && (pe_find_node(scheduler->nodes, scheduler->localhost) == NULL)) { crm_info("Creating a fake local node"); pe_create_node(scheduler->localhost, scheduler->localhost, NULL, 0, scheduler); } return TRUE; } static void setup_container(pcmk_resource_t *rsc, pcmk_scheduler_t *scheduler) { const char *container_id = NULL; if (rsc->children) { g_list_foreach(rsc->children, (GFunc) setup_container, scheduler); return; } container_id = g_hash_table_lookup(rsc->meta, PCMK__META_CONTAINER); if (container_id && !pcmk__str_eq(container_id, rsc->id, pcmk__str_casei)) { pcmk_resource_t *container = pe_find_resource(scheduler->resources, container_id); if (container) { rsc->container = container; pcmk__set_rsc_flags(container, pcmk_rsc_has_filler); container->fillers = g_list_append(container->fillers, rsc); pcmk__rsc_trace(rsc, "Resource %s's container is %s", rsc->id, container_id); } else { pcmk__config_err("Resource %s: Unknown resource container (%s)", rsc->id, container_id); } } } gboolean unpack_remote_nodes(xmlNode *xml_resources, pcmk_scheduler_t *scheduler) { xmlNode *xml_obj = NULL; /* Create remote nodes and guest nodes from the resource configuration * before unpacking resources. */ for (xml_obj = pcmk__xe_first_child(xml_resources); xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) { const char *new_node_id = NULL; /* Check for remote nodes, which are defined by ocf:pacemaker:remote * primitives. */ if (xml_contains_remote_node(xml_obj)) { new_node_id = ID(xml_obj); /* The "pe_find_node" check is here to make sure we don't iterate over * an expanded node that has already been added to the node list. */ if (new_node_id && (pe_find_node(scheduler->nodes, new_node_id) == NULL)) { crm_trace("Found remote node %s defined by resource %s", new_node_id, ID(xml_obj)); pe_create_node(new_node_id, new_node_id, "remote", NULL, scheduler); } continue; } /* Check for guest nodes, which are defined by special meta-attributes * of a primitive of any type (for example, VirtualDomain or Xen). */ if (pcmk__str_eq((const char *) xml_obj->name, PCMK_XE_PRIMITIVE, pcmk__str_none)) { /* This will add an ocf:pacemaker:remote primitive to the * configuration for the guest node's connection, to be unpacked * later. */ new_node_id = expand_remote_rsc_meta(xml_obj, xml_resources, scheduler); if (new_node_id && (pe_find_node(scheduler->nodes, new_node_id) == NULL)) { crm_trace("Found guest node %s in resource %s", new_node_id, ID(xml_obj)); pe_create_node(new_node_id, new_node_id, "remote", NULL, scheduler); } continue; } /* Check for guest nodes inside a group. Clones are currently not * supported as guest nodes. */ if (pcmk__str_eq((const char *) xml_obj->name, PCMK_XE_GROUP, pcmk__str_none)) { xmlNode *xml_obj2 = NULL; for (xml_obj2 = pcmk__xe_first_child(xml_obj); xml_obj2 != NULL; xml_obj2 = pcmk__xe_next(xml_obj2)) { new_node_id = expand_remote_rsc_meta(xml_obj2, xml_resources, scheduler); if (new_node_id && (pe_find_node(scheduler->nodes, new_node_id) == NULL)) { crm_trace("Found guest node %s in resource %s inside group %s", new_node_id, ID(xml_obj2), ID(xml_obj)); pe_create_node(new_node_id, new_node_id, "remote", NULL, scheduler); } } } } return TRUE; } /* Call this after all the nodes and resources have been * unpacked, but before the status section is read. * * A remote node's online status is reflected by the state * of the remote node's connection resource. We need to link * the remote node to this connection resource so we can have * easy access to the connection resource during the scheduler calculations. */ static void link_rsc2remotenode(pcmk_scheduler_t *scheduler, pcmk_resource_t *new_rsc) { pcmk_node_t *remote_node = NULL; if (new_rsc->is_remote_node == FALSE) { return; } if (pcmk_is_set(scheduler->flags, pcmk_sched_location_only)) { /* remote_nodes and remote_resources are not linked in quick location calculations */ return; } remote_node = pe_find_node(scheduler->nodes, new_rsc->id); CRM_CHECK(remote_node != NULL, return); pcmk__rsc_trace(new_rsc, "Linking remote connection resource %s to %s", new_rsc->id, pcmk__node_name(remote_node)); remote_node->details->remote_rsc = new_rsc; if (new_rsc->container == NULL) { /* Handle start-up fencing for remote nodes (as opposed to guest nodes) * the same as is done for cluster nodes. */ handle_startup_fencing(scheduler, remote_node); } else { /* pe_create_node() marks the new node as "remote" or "cluster"; now * that we know the node is a guest node, update it correctly. */ g_hash_table_replace(remote_node->details->attrs, strdup(CRM_ATTR_KIND), strdup("container")); } } static void destroy_tag(gpointer data) { pcmk_tag_t *tag = data; if (tag) { free(tag->id); g_list_free_full(tag->refs, free); free(tag); } } /*! * \internal * \brief Parse configuration XML for resource information * * \param[in] xml_resources Top of resource configuration XML * \param[in,out] scheduler Scheduler data * * \return TRUE * * \note unpack_remote_nodes() MUST be called before this, so that the nodes can * be used when pe__unpack_resource() calls resource_location() */ gboolean unpack_resources(const xmlNode *xml_resources, pcmk_scheduler_t *scheduler) { xmlNode *xml_obj = NULL; GList *gIter = NULL; scheduler->template_rsc_sets = pcmk__strkey_table(free, destroy_tag); for (xml_obj = pcmk__xe_first_child(xml_resources); xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) { pcmk_resource_t *new_rsc = NULL; const char *id = ID(xml_obj); if (pcmk__str_empty(id)) { pcmk__config_err("Ignoring <%s> resource without ID", xml_obj->name); continue; } if (pcmk__str_eq((const char *) xml_obj->name, PCMK_XE_TEMPLATE, pcmk__str_none)) { if (g_hash_table_lookup_extended(scheduler->template_rsc_sets, id, NULL, NULL) == FALSE) { /* Record the template's ID for the knowledge of its existence anyway. */ g_hash_table_insert(scheduler->template_rsc_sets, strdup(id), NULL); } continue; } crm_trace("Unpacking <%s " PCMK_XA_ID "='%s'>", xml_obj->name, id); if (pe__unpack_resource(xml_obj, &new_rsc, NULL, scheduler) == pcmk_rc_ok) { scheduler->resources = g_list_append(scheduler->resources, new_rsc); pcmk__rsc_trace(new_rsc, "Added resource %s", new_rsc->id); } else { pcmk__config_err("Ignoring <%s> resource '%s' " "because configuration is invalid", xml_obj->name, id); } } for (gIter = scheduler->resources; gIter != NULL; gIter = gIter->next) { pcmk_resource_t *rsc = (pcmk_resource_t *) gIter->data; setup_container(rsc, scheduler); link_rsc2remotenode(scheduler, rsc); } scheduler->resources = g_list_sort(scheduler->resources, pe__cmp_rsc_priority); if (pcmk_is_set(scheduler->flags, pcmk_sched_location_only)) { /* Ignore */ } else if (pcmk_is_set(scheduler->flags, pcmk_sched_fencing_enabled) && !pcmk_is_set(scheduler->flags, pcmk_sched_have_fencing)) { pcmk__config_err("Resource start-up disabled since no STONITH resources have been defined"); pcmk__config_err("Either configure some or disable STONITH with the " PCMK_OPT_STONITH_ENABLED " option"); pcmk__config_err("NOTE: Clusters with shared data need STONITH to ensure data integrity"); } return TRUE; } gboolean unpack_tags(xmlNode *xml_tags, pcmk_scheduler_t *scheduler) { xmlNode *xml_tag = NULL; scheduler->tags = pcmk__strkey_table(free, destroy_tag); for (xml_tag = pcmk__xe_first_child(xml_tags); xml_tag != NULL; xml_tag = pcmk__xe_next(xml_tag)) { xmlNode *xml_obj_ref = NULL; const char *tag_id = ID(xml_tag); if (!pcmk__str_eq((const char *)xml_tag->name, XML_CIB_TAG_TAG, pcmk__str_none)) { continue; } if (tag_id == NULL) { pcmk__config_err("Ignoring <%s> without " PCMK_XA_ID, (const char *) xml_tag->name); continue; } for (xml_obj_ref = pcmk__xe_first_child(xml_tag); xml_obj_ref != NULL; xml_obj_ref = pcmk__xe_next(xml_obj_ref)) { const char *obj_ref = ID(xml_obj_ref); if (!pcmk__str_eq((const char *)xml_obj_ref->name, XML_CIB_TAG_OBJ_REF, pcmk__str_none)) { continue; } if (obj_ref == NULL) { pcmk__config_err("Ignoring <%s> for tag '%s' without " PCMK_XA_ID, xml_obj_ref->name, tag_id); continue; } if (add_tag_ref(scheduler->tags, tag_id, obj_ref) == FALSE) { return FALSE; } } } return TRUE; } /* The ticket state section: * "/cib/status/tickets/ticket_state" */ static gboolean unpack_ticket_state(xmlNode *xml_ticket, pcmk_scheduler_t *scheduler) { const char *ticket_id = NULL; const char *granted = NULL; const char *last_granted = NULL; const char *standby = NULL; xmlAttrPtr xIter = NULL; pcmk_ticket_t *ticket = NULL; ticket_id = ID(xml_ticket); if (pcmk__str_empty(ticket_id)) { return FALSE; } crm_trace("Processing ticket state for %s", ticket_id); ticket = g_hash_table_lookup(scheduler->tickets, ticket_id); if (ticket == NULL) { ticket = ticket_new(ticket_id, scheduler); if (ticket == NULL) { return FALSE; } } for (xIter = xml_ticket->properties; xIter; xIter = xIter->next) { const char *prop_name = (const char *)xIter->name; const char *prop_value = pcmk__xml_attr_value(xIter); if (pcmk__str_eq(prop_name, PCMK_XA_ID, pcmk__str_none)) { continue; } g_hash_table_replace(ticket->state, strdup(prop_name), strdup(prop_value)); } granted = g_hash_table_lookup(ticket->state, "granted"); if (granted && crm_is_true(granted)) { ticket->granted = TRUE; crm_info("We have ticket '%s'", ticket->id); } else { ticket->granted = FALSE; crm_info("We do not have ticket '%s'", ticket->id); } last_granted = g_hash_table_lookup(ticket->state, "last-granted"); if (last_granted) { long long last_granted_ll; pcmk__scan_ll(last_granted, &last_granted_ll, 0LL); ticket->last_granted = (time_t) last_granted_ll; } standby = g_hash_table_lookup(ticket->state, "standby"); if (standby && crm_is_true(standby)) { ticket->standby = TRUE; if (ticket->granted) { crm_info("Granted ticket '%s' is in standby-mode", ticket->id); } } else { ticket->standby = FALSE; } crm_trace("Done with ticket state for %s", ticket_id); return TRUE; } static gboolean unpack_tickets_state(xmlNode *xml_tickets, pcmk_scheduler_t *scheduler) { xmlNode *xml_obj = NULL; for (xml_obj = pcmk__xe_first_child(xml_tickets); xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) { if (!pcmk__str_eq((const char *)xml_obj->name, XML_CIB_TAG_TICKET_STATE, pcmk__str_none)) { continue; } unpack_ticket_state(xml_obj, scheduler); } return TRUE; } static void unpack_handle_remote_attrs(pcmk_node_t *this_node, const xmlNode *state, pcmk_scheduler_t *scheduler) { const char *resource_discovery_enabled = NULL; const xmlNode *attrs = NULL; pcmk_resource_t *rsc = NULL; if (!pcmk__str_eq((const char *) state->name, PCMK__XE_NODE_STATE, pcmk__str_none)) { return; } if ((this_node == NULL) || !pe__is_guest_or_remote_node(this_node)) { return; } crm_trace("Processing Pacemaker Remote node %s", pcmk__node_name(this_node)); pcmk__scan_min_int(crm_element_value(state, XML_NODE_IS_MAINTENANCE), &(this_node->details->remote_maintenance), 0); rsc = this_node->details->remote_rsc; if (this_node->details->remote_requires_reset == FALSE) { this_node->details->unclean = FALSE; this_node->details->unseen = FALSE; } attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS, FALSE); add_node_attrs(attrs, this_node, TRUE, scheduler); if (pe__shutdown_requested(this_node)) { crm_info("%s is shutting down", pcmk__node_name(this_node)); this_node->details->shutdown = TRUE; } if (crm_is_true(pe_node_attribute_raw(this_node, "standby"))) { crm_info("%s is in standby mode", pcmk__node_name(this_node)); this_node->details->standby = TRUE; } if (crm_is_true(pe_node_attribute_raw(this_node, "maintenance")) || ((rsc != NULL) && !pcmk_is_set(rsc->flags, pcmk_rsc_managed))) { crm_info("%s is in maintenance mode", pcmk__node_name(this_node)); this_node->details->maintenance = TRUE; } resource_discovery_enabled = pe_node_attribute_raw(this_node, XML_NODE_ATTR_RSC_DISCOVERY); if (resource_discovery_enabled && !crm_is_true(resource_discovery_enabled)) { if (pe__is_remote_node(this_node) && !pcmk_is_set(scheduler->flags, pcmk_sched_fencing_enabled)) { pcmk__config_warn("Ignoring " XML_NODE_ATTR_RSC_DISCOVERY " attribute on Pacemaker Remote node %s" " because fencing is disabled", pcmk__node_name(this_node)); } else { /* This is either a remote node with fencing enabled, or a guest * node. We don't care whether fencing is enabled when fencing guest * nodes, because they are "fenced" by recovering their containing * resource. */ crm_info("%s has resource discovery disabled", pcmk__node_name(this_node)); this_node->details->rsc_discovery_enabled = FALSE; } } } /*! * \internal * \brief Unpack a cluster node's transient attributes * * \param[in] state CIB node state XML * \param[in,out] node Cluster node whose attributes are being unpacked * \param[in,out] scheduler Scheduler data */ static void unpack_transient_attributes(const xmlNode *state, pcmk_node_t *node, pcmk_scheduler_t *scheduler) { const char *discovery = NULL; const xmlNode *attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS, FALSE); add_node_attrs(attrs, node, TRUE, scheduler); if (crm_is_true(pe_node_attribute_raw(node, "standby"))) { crm_info("%s is in standby mode", pcmk__node_name(node)); node->details->standby = TRUE; } if (crm_is_true(pe_node_attribute_raw(node, "maintenance"))) { crm_info("%s is in maintenance mode", pcmk__node_name(node)); node->details->maintenance = TRUE; } discovery = pe_node_attribute_raw(node, XML_NODE_ATTR_RSC_DISCOVERY); if ((discovery != NULL) && !crm_is_true(discovery)) { pcmk__config_warn("Ignoring " XML_NODE_ATTR_RSC_DISCOVERY " attribute " "for %s because disabling resource discovery is " "not allowed for cluster nodes", pcmk__node_name(node)); } } /*! * \internal * \brief Unpack a node state entry (first pass) * * Unpack one node state entry from status. This unpacks information from the * \C PCMK__XE_NODE_STATE element itself and node attributes inside it, but not * the resource history inside it. Multiple passes through the status are needed * to fully unpack everything. * * \param[in] state CIB node state XML * \param[in,out] scheduler Scheduler data */ static void unpack_node_state(const xmlNode *state, pcmk_scheduler_t *scheduler) { const char *id = NULL; const char *uname = NULL; pcmk_node_t *this_node = NULL; id = crm_element_value(state, PCMK_XA_ID); if (id == NULL) { pcmk__config_err("Ignoring invalid " PCMK__XE_NODE_STATE " entry without " PCMK_XA_ID); crm_log_xml_info(state, "missing-id"); return; } uname = crm_element_value(state, PCMK_XA_UNAME); if (uname == NULL) { /* If a joining peer makes the cluster acquire the quorum from corosync * meanwhile it has not joined CPG membership of pacemaker-controld yet, * it's possible that the created PCMK__XE_NODE_STATE entry doesn't have * a PCMK_XA_UNAME yet. We should recognize the node as `pending` and * wait for it to join CPG. */ crm_trace("Handling " PCMK__XE_NODE_STATE " entry with id=\"%s\" " "without " PCMK_XA_UNAME, id); } this_node = pe_find_node_any(scheduler->nodes, id, uname); if (this_node == NULL) { pcmk__config_warn("Ignoring recorded node state for id=\"%s\" (%s) " "because it is no longer in the configuration", id, pcmk__s(uname, "uname unknown")); return; } if (pe__is_guest_or_remote_node(this_node)) { /* We can't determine the online status of Pacemaker Remote nodes until * after all resource history has been unpacked. In this first pass, we * do need to mark whether the node has been fenced, as this plays a * role during unpacking cluster node resource state. */ pcmk__scan_min_int(crm_element_value(state, XML_NODE_IS_FENCED), &(this_node->details->remote_was_fenced), 0); return; } unpack_transient_attributes(state, this_node, scheduler); /* Provisionally mark this cluster node as clean. We have at least seen it * in the current cluster's lifetime. */ this_node->details->unclean = FALSE; this_node->details->unseen = FALSE; crm_trace("Determining online status of cluster node %s (id %s)", pcmk__node_name(this_node), id); determine_online_status(state, this_node, scheduler); if (!pcmk_is_set(scheduler->flags, pcmk_sched_quorate) && this_node->details->online && (scheduler->no_quorum_policy == pcmk_no_quorum_fence)) { /* Everything else should flow from this automatically * (at least until the scheduler becomes able to migrate off * healthy resources) */ pe_fence_node(scheduler, this_node, "cluster does not have quorum", FALSE); } } /*! * \internal * \brief Unpack nodes' resource history as much as possible * * Unpack as many nodes' resource history as possible in one pass through the * status. We need to process Pacemaker Remote nodes' connections/containers * before unpacking their history; the connection/container history will be * in another node's history, so it might take multiple passes to unpack * everything. * * \param[in] status CIB XML status section * \param[in] fence If true, treat any not-yet-unpacked nodes as unseen * \param[in,out] scheduler Scheduler data * * \return Standard Pacemaker return code (specifically pcmk_rc_ok if done, * or EAGAIN if more unpacking remains to be done) */ static int unpack_node_history(const xmlNode *status, bool fence, pcmk_scheduler_t *scheduler) { int rc = pcmk_rc_ok; // Loop through all PCMK__XE_NODE_STATE entries in CIB status for (const xmlNode *state = first_named_child(status, PCMK__XE_NODE_STATE); state != NULL; state = crm_next_same_xml(state)) { const char *id = ID(state); const char *uname = crm_element_value(state, PCMK_XA_UNAME); pcmk_node_t *this_node = NULL; if ((id == NULL) || (uname == NULL)) { // Warning already logged in first pass through status section crm_trace("Not unpacking resource history from malformed " PCMK__XE_NODE_STATE " without id and/or uname"); continue; } this_node = pe_find_node_any(scheduler->nodes, id, uname); if (this_node == NULL) { // Warning already logged in first pass through status section crm_trace("Not unpacking resource history for node %s because " "no longer in configuration", id); continue; } if (this_node->details->unpacked) { crm_trace("Not unpacking resource history for node %s because " "already unpacked", id); continue; } if (fence) { // We're processing all remaining nodes } else if (pe__is_guest_node(this_node)) { /* We can unpack a guest node's history only after we've unpacked * other resource history to the point that we know that the node's * connection and containing resource are both up. */ pcmk_resource_t *rsc = this_node->details->remote_rsc; if ((rsc == NULL) || (rsc->role != pcmk_role_started) || (rsc->container->role != pcmk_role_started)) { crm_trace("Not unpacking resource history for guest node %s " "because container and connection are not known to " "be up", id); continue; } } else if (pe__is_remote_node(this_node)) { /* We can unpack a remote node's history only after we've unpacked * other resource history to the point that we know that the node's * connection is up, with the exception of when shutdown locks are * in use. */ pcmk_resource_t *rsc = this_node->details->remote_rsc; if ((rsc == NULL) || (!pcmk_is_set(scheduler->flags, pcmk_sched_shutdown_lock) && (rsc->role != pcmk_role_started))) { crm_trace("Not unpacking resource history for remote node %s " "because connection is not known to be up", id); continue; } /* If fencing and shutdown locks are disabled and we're not processing * unseen nodes, then we don't want to unpack offline nodes until online * nodes have been unpacked. This allows us to number active clone * instances first. */ } else if (!pcmk_any_flags_set(scheduler->flags, pcmk_sched_fencing_enabled |pcmk_sched_shutdown_lock) && !this_node->details->online) { crm_trace("Not unpacking resource history for offline " "cluster node %s", id); continue; } if (pe__is_guest_or_remote_node(this_node)) { determine_remote_online_status(scheduler, this_node); unpack_handle_remote_attrs(this_node, state, scheduler); } crm_trace("Unpacking resource history for %snode %s", (fence? "unseen " : ""), id); this_node->details->unpacked = TRUE; unpack_node_lrm(this_node, state, scheduler); rc = EAGAIN; // Other node histories might depend on this one } return rc; } /* remove nodes that are down, stopping */ /* create positive rsc_to_node constraints between resources and the nodes they are running on */ /* anything else? */ gboolean unpack_status(xmlNode *status, pcmk_scheduler_t *scheduler) { xmlNode *state = NULL; crm_trace("Beginning unpack"); if (scheduler->tickets == NULL) { scheduler->tickets = pcmk__strkey_table(free, destroy_ticket); } for (state = pcmk__xe_first_child(status); state != NULL; state = pcmk__xe_next(state)) { if (pcmk__str_eq((const char *)state->name, XML_CIB_TAG_TICKETS, pcmk__str_none)) { unpack_tickets_state((xmlNode *) state, scheduler); } else if (pcmk__str_eq((const char *) state->name, PCMK__XE_NODE_STATE, pcmk__str_none)) { unpack_node_state(state, scheduler); } } while (unpack_node_history(status, FALSE, scheduler) == EAGAIN) { crm_trace("Another pass through node resource histories is needed"); } // Now catch any nodes we didn't see unpack_node_history(status, pcmk_is_set(scheduler->flags, pcmk_sched_fencing_enabled), scheduler); /* Now that we know where resources are, we can schedule stops of containers * with failed bundle connections */ if (scheduler->stop_needed != NULL) { for (GList *item = scheduler->stop_needed; item; item = item->next) { pcmk_resource_t *container = item->data; pcmk_node_t *node = pcmk__current_node(container); if (node) { stop_action(container, node, FALSE); } } g_list_free(scheduler->stop_needed); scheduler->stop_needed = NULL; } /* Now that we know status of all Pacemaker Remote connections and nodes, * we can stop connections for node shutdowns, and check the online status * of remote/guest nodes that didn't have any node history to unpack. */ for (GList *gIter = scheduler->nodes; gIter != NULL; gIter = gIter->next) { pcmk_node_t *this_node = gIter->data; if (!pe__is_guest_or_remote_node(this_node)) { continue; } if (this_node->details->shutdown && (this_node->details->remote_rsc != NULL)) { pe__set_next_role(this_node->details->remote_rsc, pcmk_role_stopped, "remote shutdown"); } if (!this_node->details->unpacked) { determine_remote_online_status(scheduler, this_node); } } return TRUE; } /*! * \internal * \brief Unpack node's time when it became a member at the cluster layer * * \param[in] node_state Node's \c PCMK__XE_NODE_STATE entry * \param[in,out] scheduler Scheduler data * * \return Epoch time when node became a cluster member * (or scheduler effective time for legacy entries) if a member, * 0 if not a member, or -1 if no valid information available */ static long long unpack_node_member(const xmlNode *node_state, pcmk_scheduler_t *scheduler) { const char *member_time = crm_element_value(node_state, PCMK__XA_IN_CCM); int member = 0; if (member_time == NULL) { return -1LL; } else if (crm_str_to_boolean(member_time, &member) == 1) { /* If in_ccm=0, we'll return 0 here. If in_ccm=1, either the entry was * recorded as a boolean for a DC < 2.1.7, or the node is pending * shutdown and has left the CPG, in which case it was set to 1 to avoid * fencing for PCMK_OPT_NODE_PENDING_TIMEOUT. * * We return the effective time for in_ccm=1 because what's important to * avoid fencing is that effective time minus this value is less than * the pending node timeout. */ return member? (long long) get_effective_time(scheduler) : 0LL; } else { long long when_member = 0LL; if ((pcmk__scan_ll(member_time, &when_member, 0LL) != pcmk_rc_ok) || (when_member < 0LL)) { crm_warn("Unrecognized value '%s' for " PCMK__XA_IN_CCM " in " PCMK__XE_NODE_STATE " entry", member_time); return -1LL; } return when_member; } } /*! * \internal * \brief Unpack node's time when it became online in process group * * \param[in] node_state Node's \c PCMK__XE_NODE_STATE entry * * \return Epoch time when node became online in process group (or 0 if not * online, or 1 for legacy online entries) */ static long long unpack_node_online(const xmlNode *node_state) { const char *peer_time = crm_element_value(node_state, PCMK__XA_CRMD); // @COMPAT Entries recorded for DCs < 2.1.7 have "online" or "offline" if (pcmk__str_eq(peer_time, OFFLINESTATUS, pcmk__str_casei|pcmk__str_null_matches)) { return 0LL; } else if (pcmk__str_eq(peer_time, ONLINESTATUS, pcmk__str_casei)) { return 1LL; } else { long long when_online = 0LL; if ((pcmk__scan_ll(peer_time, &when_online, 0LL) != pcmk_rc_ok) || (when_online < 0)) { crm_warn("Unrecognized value '%s' for " PCMK__XA_CRMD " in " PCMK__XE_NODE_STATE " entry, assuming offline", peer_time); return 0LL; } return when_online; } } /*! * \internal * \brief Unpack node attribute for user-requested fencing * * \param[in] node Node to check * \param[in] node_state Node's \c PCMK__XE_NODE_STATE entry in CIB status * * \return \c true if fencing has been requested for \p node, otherwise \c false */ static bool unpack_node_terminate(const pcmk_node_t *node, const xmlNode *node_state) { long long value = 0LL; int value_i = 0; const char *value_s = pe_node_attribute_raw(node, PCMK_NODE_ATTR_TERMINATE); // Value may be boolean or an epoch time if (crm_str_to_boolean(value_s, &value_i) == 1) { return (value_i != 0); } if (pcmk__scan_ll(value_s, &value, 0LL) == pcmk_rc_ok) { return (value > 0); } crm_warn("Ignoring unrecognized value '%s' for " PCMK_NODE_ATTR_TERMINATE "node attribute for %s", value_s, pcmk__node_name(node)); return false; } static gboolean determine_online_status_no_fencing(pcmk_scheduler_t *scheduler, const xmlNode *node_state, pcmk_node_t *this_node) { gboolean online = FALSE; const char *join = crm_element_value(node_state, PCMK__XA_JOIN); const char *exp_state = crm_element_value(node_state, PCMK__XA_EXPECTED); long long when_member = unpack_node_member(node_state, scheduler); long long when_online = unpack_node_online(node_state); if (when_member <= 0) { crm_trace("Node %s is %sdown", pcmk__node_name(this_node), ((when_member < 0)? "presumed " : "")); } else if (when_online > 0) { if (pcmk__str_eq(join, CRMD_JOINSTATE_MEMBER, pcmk__str_casei)) { online = TRUE; } else { crm_debug("Node %s is not ready to run resources: %s", pcmk__node_name(this_node), join); } } else if (this_node->details->expected_up == FALSE) { crm_trace("Node %s controller is down: " "member@%lld online@%lld join=%s expected=%s", pcmk__node_name(this_node), when_member, when_online, pcmk__s(join, ""), pcmk__s(exp_state, "")); } else { /* mark it unclean */ pe_fence_node(scheduler, this_node, "peer is unexpectedly down", FALSE); crm_info("Node %s member@%lld online@%lld join=%s expected=%s", pcmk__node_name(this_node), when_member, when_online, pcmk__s(join, ""), pcmk__s(exp_state, "")); } return online; } /*! * \internal * \brief Check whether a node has taken too long to join controller group * * \param[in,out] scheduler Scheduler data * \param[in] node Node to check * \param[in] when_member Epoch time when node became a cluster member * \param[in] when_online Epoch time when node joined controller group * * \return true if node has been pending (on the way up) longer than * \c PCMK_OPT_NODE_PENDING_TIMEOUT, otherwise false * \note This will also update the cluster's recheck time if appropriate. */ static inline bool pending_too_long(pcmk_scheduler_t *scheduler, const pcmk_node_t *node, long long when_member, long long when_online) { if ((scheduler->node_pending_timeout > 0) && (when_member > 0) && (when_online <= 0)) { // There is a timeout on pending nodes, and node is pending time_t timeout = when_member + scheduler->node_pending_timeout; if (get_effective_time(node->details->data_set) >= timeout) { return true; // Node has timed out } // Node is pending, but still has time pe__update_recheck_time(timeout, scheduler, "pending node timeout"); } return false; } static bool determine_online_status_fencing(pcmk_scheduler_t *scheduler, const xmlNode *node_state, pcmk_node_t *this_node) { bool termination_requested = unpack_node_terminate(this_node, node_state); const char *join = crm_element_value(node_state, PCMK__XA_JOIN); const char *exp_state = crm_element_value(node_state, PCMK__XA_EXPECTED); long long when_member = unpack_node_member(node_state, scheduler); long long when_online = unpack_node_online(node_state); /* - PCMK__XA_JOIN ::= member|down|pending|banned - PCMK__XA_EXPECTED ::= member|down @COMPAT with entries recorded for DCs < 2.1.7 - PCMK__XA_IN_CCM ::= true|false - PCMK__XA_CRMD ::= online|offline Since crm_feature_set 3.18.0 (pacemaker-2.1.7): - PCMK__XA_IN_CCM ::= |0 Since when node has been a cluster member. A value 0 of means the node is not a cluster member. - PCMK__XA_CRMD ::= |0 Since when peer has been online in CPG. A value 0 means the peer is offline in CPG. */ crm_trace("Node %s member@%lld online@%lld join=%s expected=%s%s", pcmk__node_name(this_node), when_member, when_online, pcmk__s(join, ""), pcmk__s(exp_state, ""), (termination_requested? " (termination requested)" : "")); if (this_node->details->shutdown) { crm_debug("%s is shutting down", pcmk__node_name(this_node)); /* Slightly different criteria since we can't shut down a dead peer */ return (when_online > 0); } if (when_member < 0) { pe_fence_node(scheduler, this_node, "peer has not been seen by the cluster", FALSE); return false; } if (pcmk__str_eq(join, CRMD_JOINSTATE_NACK, pcmk__str_none)) { pe_fence_node(scheduler, this_node, "peer failed Pacemaker membership criteria", FALSE); } else if (termination_requested) { if ((when_member <= 0) && (when_online <= 0) && pcmk__str_eq(join, CRMD_JOINSTATE_DOWN, pcmk__str_none)) { crm_info("%s was fenced as requested", pcmk__node_name(this_node)); return false; } pe_fence_node(scheduler, this_node, "fencing was requested", false); } else if (pcmk__str_eq(exp_state, CRMD_JOINSTATE_DOWN, pcmk__str_null_matches)) { if (pending_too_long(scheduler, this_node, when_member, when_online)) { pe_fence_node(scheduler, this_node, "peer pending timed out on joining the process group", FALSE); } else if ((when_member > 0) || (when_online > 0)) { crm_info("- %s is not ready to run resources", pcmk__node_name(this_node)); this_node->details->standby = TRUE; this_node->details->pending = TRUE; } else { crm_trace("%s is down or still coming up", pcmk__node_name(this_node)); } } else if (when_member <= 0) { // Consider PCMK_OPT_PRIORITY_FENCING_DELAY for lost nodes pe_fence_node(scheduler, this_node, "peer is no longer part of the cluster", TRUE); } else if (when_online <= 0) { pe_fence_node(scheduler, this_node, "peer process is no longer available", FALSE); /* Everything is running at this point, now check join state */ } else if (pcmk__str_eq(join, CRMD_JOINSTATE_MEMBER, pcmk__str_none)) { crm_info("%s is active", pcmk__node_name(this_node)); } else if (pcmk__str_any_of(join, CRMD_JOINSTATE_PENDING, CRMD_JOINSTATE_DOWN, NULL)) { crm_info("%s is not ready to run resources", pcmk__node_name(this_node)); this_node->details->standby = TRUE; this_node->details->pending = TRUE; } else { pe_fence_node(scheduler, this_node, "peer was in an unknown state", FALSE); } return (when_member > 0); } static void determine_remote_online_status(pcmk_scheduler_t *scheduler, pcmk_node_t *this_node) { pcmk_resource_t *rsc = this_node->details->remote_rsc; pcmk_resource_t *container = NULL; pcmk_node_t *host = NULL; /* If there is a node state entry for a (former) Pacemaker Remote node * but no resource creating that node, the node's connection resource will * be NULL. Consider it an offline remote node in that case. */ if (rsc == NULL) { this_node->details->online = FALSE; goto remote_online_done; } container = rsc->container; if (container && pcmk__list_of_1(rsc->running_on)) { host = rsc->running_on->data; } /* If the resource is currently started, mark it online. */ if (rsc->role == pcmk_role_started) { crm_trace("%s node %s presumed ONLINE because connection resource is started", (container? "Guest" : "Remote"), this_node->details->id); this_node->details->online = TRUE; } /* consider this node shutting down if transitioning start->stop */ if ((rsc->role == pcmk_role_started) && (rsc->next_role == pcmk_role_stopped)) { crm_trace("%s node %s shutting down because connection resource is stopping", (container? "Guest" : "Remote"), this_node->details->id); this_node->details->shutdown = TRUE; } /* Now check all the failure conditions. */ if(container && pcmk_is_set(container->flags, pcmk_rsc_failed)) { crm_trace("Guest node %s UNCLEAN because guest resource failed", this_node->details->id); this_node->details->online = FALSE; this_node->details->remote_requires_reset = TRUE; } else if (pcmk_is_set(rsc->flags, pcmk_rsc_failed)) { crm_trace("%s node %s OFFLINE because connection resource failed", (container? "Guest" : "Remote"), this_node->details->id); this_node->details->online = FALSE; } else if ((rsc->role == pcmk_role_stopped) || ((container != NULL) && (container->role == pcmk_role_stopped))) { crm_trace("%s node %s OFFLINE because its resource is stopped", (container? "Guest" : "Remote"), this_node->details->id); this_node->details->online = FALSE; this_node->details->remote_requires_reset = FALSE; } else if (host && (host->details->online == FALSE) && host->details->unclean) { crm_trace("Guest node %s UNCLEAN because host is unclean", this_node->details->id); this_node->details->online = FALSE; this_node->details->remote_requires_reset = TRUE; } remote_online_done: crm_trace("Remote node %s online=%s", this_node->details->id, this_node->details->online ? "TRUE" : "FALSE"); } static void determine_online_status(const xmlNode *node_state, pcmk_node_t *this_node, pcmk_scheduler_t *scheduler) { gboolean online = FALSE; const char *exp_state = crm_element_value(node_state, PCMK__XA_EXPECTED); CRM_CHECK(this_node != NULL, return); this_node->details->shutdown = FALSE; this_node->details->expected_up = FALSE; if (pe__shutdown_requested(this_node)) { this_node->details->shutdown = TRUE; } else if (pcmk__str_eq(exp_state, CRMD_JOINSTATE_MEMBER, pcmk__str_casei)) { this_node->details->expected_up = TRUE; } if (this_node->details->type == node_ping) { this_node->details->unclean = FALSE; online = FALSE; /* As far as resource management is concerned, * the node is safely offline. * Anyone caught abusing this logic will be shot */ } else if (!pcmk_is_set(scheduler->flags, pcmk_sched_fencing_enabled)) { online = determine_online_status_no_fencing(scheduler, node_state, this_node); } else { online = determine_online_status_fencing(scheduler, node_state, this_node); } if (online) { this_node->details->online = TRUE; } else { /* remove node from contention */ this_node->fixed = TRUE; // @COMPAT deprecated and unused this_node->weight = -INFINITY; } if (online && this_node->details->shutdown) { /* don't run resources here */ this_node->fixed = TRUE; // @COMPAT deprecated and unused this_node->weight = -INFINITY; } if (this_node->details->type == node_ping) { crm_info("%s is not a Pacemaker node", pcmk__node_name(this_node)); } else if (this_node->details->unclean) { pcmk__sched_warn("%s is unclean", pcmk__node_name(this_node)); } else if (this_node->details->online) { crm_info("%s is %s", pcmk__node_name(this_node), this_node->details->shutdown ? "shutting down" : this_node->details->pending ? "pending" : this_node->details->standby ? "standby" : this_node->details->maintenance ? "maintenance" : "online"); } else { crm_trace("%s is offline", pcmk__node_name(this_node)); } } /*! * \internal * \brief Find the end of a resource's name, excluding any clone suffix * * \param[in] id Resource ID to check * * \return Pointer to last character of resource's base name */ const char * pe_base_name_end(const char *id) { if (!pcmk__str_empty(id)) { const char *end = id + strlen(id) - 1; for (const char *s = end; s > id; --s) { switch (*s) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': break; case ':': return (s == end)? s : (s - 1); default: return end; } } return end; } return NULL; } /*! * \internal * \brief Get a resource name excluding any clone suffix * * \param[in] last_rsc_id Resource ID to check * * \return Pointer to newly allocated string with resource's base name * \note It is the caller's responsibility to free() the result. * This asserts on error, so callers can assume result is not NULL. */ char * clone_strip(const char *last_rsc_id) { const char *end = pe_base_name_end(last_rsc_id); char *basename = NULL; CRM_ASSERT(end); basename = strndup(last_rsc_id, end - last_rsc_id + 1); CRM_ASSERT(basename); return basename; } /*! * \internal * \brief Get the name of the first instance of a cloned resource * * \param[in] last_rsc_id Resource ID to check * * \return Pointer to newly allocated string with resource's base name plus :0 * \note It is the caller's responsibility to free() the result. * This asserts on error, so callers can assume result is not NULL. */ char * clone_zero(const char *last_rsc_id) { const char *end = pe_base_name_end(last_rsc_id); size_t base_name_len = end - last_rsc_id + 1; char *zero = NULL; CRM_ASSERT(end); zero = calloc(base_name_len + 3, sizeof(char)); CRM_ASSERT(zero); memcpy(zero, last_rsc_id, base_name_len); zero[base_name_len] = ':'; zero[base_name_len + 1] = '0'; return zero; } static pcmk_resource_t * create_fake_resource(const char *rsc_id, const xmlNode *rsc_entry, pcmk_scheduler_t *scheduler) { pcmk_resource_t *rsc = NULL; xmlNode *xml_rsc = create_xml_node(NULL, PCMK_XE_PRIMITIVE); copy_in_properties(xml_rsc, rsc_entry); crm_xml_add(xml_rsc, PCMK_XA_ID, rsc_id); crm_log_xml_debug(xml_rsc, "Orphan resource"); if (pe__unpack_resource(xml_rsc, &rsc, NULL, scheduler) != pcmk_rc_ok) { return NULL; } if (xml_contains_remote_node(xml_rsc)) { pcmk_node_t *node; crm_debug("Detected orphaned remote node %s", rsc_id); node = pe_find_node(scheduler->nodes, rsc_id); if (node == NULL) { node = pe_create_node(rsc_id, rsc_id, "remote", NULL, scheduler); } link_rsc2remotenode(scheduler, rsc); if (node) { crm_trace("Setting node %s as shutting down due to orphaned connection resource", rsc_id); node->details->shutdown = TRUE; } } if (crm_element_value(rsc_entry, PCMK__META_CONTAINER)) { /* This orphaned rsc needs to be mapped to a container. */ crm_trace("Detected orphaned container filler %s", rsc_id); pcmk__set_rsc_flags(rsc, pcmk_rsc_removed_filler); } pcmk__set_rsc_flags(rsc, pcmk_rsc_removed); scheduler->resources = g_list_append(scheduler->resources, rsc); return rsc; } /*! * \internal * \brief Create orphan instance for anonymous clone resource history * * \param[in,out] parent Clone resource that orphan will be added to * \param[in] rsc_id Orphan's resource ID * \param[in] node Where orphan is active (for logging only) * \param[in,out] scheduler Scheduler data * * \return Newly added orphaned instance of \p parent */ static pcmk_resource_t * create_anonymous_orphan(pcmk_resource_t *parent, const char *rsc_id, const pcmk_node_t *node, pcmk_scheduler_t *scheduler) { pcmk_resource_t *top = pe__create_clone_child(parent, scheduler); // find_rsc() because we might be a cloned group pcmk_resource_t *orphan = top->fns->find_rsc(top, rsc_id, NULL, pcmk_rsc_match_clone_only); pcmk__rsc_debug(parent, "Created orphan %s for %s: %s on %s", top->id, parent->id, rsc_id, pcmk__node_name(node)); return orphan; } /*! * \internal * \brief Check a node for an instance of an anonymous clone * * Return a child instance of the specified anonymous clone, in order of * preference: (1) the instance running on the specified node, if any; * (2) an inactive instance (i.e. within the total of \c PCMK_META_CLONE_MAX * instances); (3) a newly created orphan (that is, \c PCMK_META_CLONE_MAX * instances are already active). * * \param[in,out] scheduler Scheduler data * \param[in] node Node on which to check for instance * \param[in,out] parent Clone to check * \param[in] rsc_id Name of cloned resource in history (no instance) */ static pcmk_resource_t * find_anonymous_clone(pcmk_scheduler_t *scheduler, const pcmk_node_t *node, pcmk_resource_t *parent, const char *rsc_id) { GList *rIter = NULL; pcmk_resource_t *rsc = NULL; pcmk_resource_t *inactive_instance = NULL; gboolean skip_inactive = FALSE; CRM_ASSERT(parent != NULL); CRM_ASSERT(pe_rsc_is_clone(parent)); CRM_ASSERT(!pcmk_is_set(parent->flags, pcmk_rsc_unique)); // Check for active (or partially active, for cloned groups) instance pcmk__rsc_trace(parent, "Looking for %s on %s in %s", rsc_id, pcmk__node_name(node), parent->id); for (rIter = parent->children; rsc == NULL && rIter; rIter = rIter->next) { GList *locations = NULL; pcmk_resource_t *child = rIter->data; /* Check whether this instance is already known to be active or pending * anywhere, at this stage of unpacking. Because this function is called * for a resource before the resource's individual operation history * entries are unpacked, locations will generally not contain the * desired node. * * However, there are three exceptions: * (1) when child is a cloned group and we have already unpacked the * history of another member of the group on the same node; * (2) when we've already unpacked the history of another numbered * instance on the same node (which can happen if * PCMK_META_GLOBALLY_UNIQUE was flipped from true to false); and * (3) when we re-run calculations on the same scheduler data as part of * a simulation. */ child->fns->location(child, &locations, 2); if (locations) { /* We should never associate the same numbered anonymous clone * instance with multiple nodes, and clone instances can't migrate, * so there must be only one location, regardless of history. */ CRM_LOG_ASSERT(locations->next == NULL); if (pcmk__same_node((pcmk_node_t *) locations->data, node)) { /* This child instance is active on the requested node, so check * for a corresponding configured resource. We use find_rsc() * instead of child because child may be a cloned group, and we * need the particular member corresponding to rsc_id. * * If the history entry is orphaned, rsc will be NULL. */ rsc = parent->fns->find_rsc(child, rsc_id, NULL, pcmk_rsc_match_clone_only); if (rsc) { /* If there are multiple instance history entries for an * anonymous clone in a single node's history (which can * happen if PCMK_META_GLOBALLY_UNIQUE is switched from true * to false), we want to consider the instances beyond the * first as orphans, even if there are inactive instance * numbers available. */ if (rsc->running_on) { crm_notice("Active (now-)anonymous clone %s has " "multiple (orphan) instance histories on %s", parent->id, pcmk__node_name(node)); skip_inactive = TRUE; rsc = NULL; } else { pcmk__rsc_trace(parent, "Resource %s, active", rsc->id); } } } g_list_free(locations); } else { pcmk__rsc_trace(parent, "Resource %s, skip inactive", child->id); if (!skip_inactive && !inactive_instance && !pcmk_is_set(child->flags, pcmk_rsc_blocked)) { // Remember one inactive instance in case we don't find active inactive_instance = parent->fns->find_rsc(child, rsc_id, NULL, pcmk_rsc_match_clone_only); /* ... but don't use it if it was already associated with a * pending action on another node */ if ((inactive_instance != NULL) && (inactive_instance->pending_node != NULL) && !pcmk__same_node(inactive_instance->pending_node, node)) { inactive_instance = NULL; } } } } if ((rsc == NULL) && !skip_inactive && (inactive_instance != NULL)) { pcmk__rsc_trace(parent, "Resource %s, empty slot", inactive_instance->id); rsc = inactive_instance; } /* If the resource has "requires" set to "quorum" or "nothing", and we don't * have a clone instance for every node, we don't want to consume a valid * instance number for unclean nodes. Such instances may appear to be active * according to the history, but should be considered inactive, so we can * start an instance elsewhere. Treat such instances as orphans. * * An exception is instances running on guest nodes -- since guest node * "fencing" is actually just a resource stop, requires shouldn't apply. * * @TODO Ideally, we'd use an inactive instance number if it is not needed * for any clean instances. However, we don't know that at this point. */ if ((rsc != NULL) && !pcmk_is_set(rsc->flags, pcmk_rsc_needs_fencing) && (!node->details->online || node->details->unclean) && !pe__is_guest_node(node) && !pe__is_universal_clone(parent, scheduler)) { rsc = NULL; } if (rsc == NULL) { rsc = create_anonymous_orphan(parent, rsc_id, node, scheduler); pcmk__rsc_trace(parent, "Resource %s, orphan", rsc->id); } return rsc; } static pcmk_resource_t * unpack_find_resource(pcmk_scheduler_t *scheduler, const pcmk_node_t *node, const char *rsc_id) { pcmk_resource_t *rsc = NULL; pcmk_resource_t *parent = NULL; crm_trace("looking for %s", rsc_id); rsc = pe_find_resource(scheduler->resources, rsc_id); if (rsc == NULL) { /* If we didn't find the resource by its name in the operation history, * check it again as a clone instance. Even when PCMK_META_CLONE_MAX=0, * we create a single :0 orphan to match against here. */ char *clone0_id = clone_zero(rsc_id); pcmk_resource_t *clone0 = pe_find_resource(scheduler->resources, clone0_id); if (clone0 && !pcmk_is_set(clone0->flags, pcmk_rsc_unique)) { rsc = clone0; parent = uber_parent(clone0); crm_trace("%s found as %s (%s)", rsc_id, clone0_id, parent->id); } else { crm_trace("%s is not known as %s either (orphan)", rsc_id, clone0_id); } free(clone0_id); } else if (rsc->variant > pcmk_rsc_variant_primitive) { crm_trace("Resource history for %s is orphaned because it is no longer primitive", rsc_id); return NULL; } else { parent = uber_parent(rsc); } if (pe_rsc_is_anon_clone(parent)) { if (pe_rsc_is_bundled(parent)) { rsc = pe__find_bundle_replica(parent->parent, node); } else { char *base = clone_strip(rsc_id); rsc = find_anonymous_clone(scheduler, node, parent, base); free(base); CRM_ASSERT(rsc != NULL); } } if (rsc && !pcmk__str_eq(rsc_id, rsc->id, pcmk__str_casei) && !pcmk__str_eq(rsc_id, rsc->clone_name, pcmk__str_casei)) { pcmk__str_update(&rsc->clone_name, rsc_id); pcmk__rsc_debug(rsc, "Internally renamed %s on %s to %s%s", rsc_id, pcmk__node_name(node), rsc->id, pcmk_is_set(rsc->flags, pcmk_rsc_removed)? " (ORPHAN)" : ""); } return rsc; } static pcmk_resource_t * process_orphan_resource(const xmlNode *rsc_entry, const pcmk_node_t *node, pcmk_scheduler_t *scheduler) { pcmk_resource_t *rsc = NULL; const char *rsc_id = crm_element_value(rsc_entry, PCMK_XA_ID); crm_debug("Detected orphan resource %s on %s", rsc_id, pcmk__node_name(node)); rsc = create_fake_resource(rsc_id, rsc_entry, scheduler); if (rsc == NULL) { return NULL; } if (!pcmk_is_set(scheduler->flags, pcmk_sched_stop_removed_resources)) { pcmk__clear_rsc_flags(rsc, pcmk_rsc_managed); } else { CRM_CHECK(rsc != NULL, return NULL); pcmk__rsc_trace(rsc, "Added orphan %s", rsc->id); resource_location(rsc, NULL, -INFINITY, "__orphan_do_not_run__", scheduler); } return rsc; } static void process_rsc_state(pcmk_resource_t *rsc, pcmk_node_t *node, enum action_fail_response on_fail) { pcmk_node_t *tmpnode = NULL; char *reason = NULL; enum action_fail_response save_on_fail = pcmk_on_fail_ignore; CRM_ASSERT(rsc); pcmk__rsc_trace(rsc, "Resource %s is %s on %s: on_fail=%s", rsc->id, pcmk_role_text(rsc->role), pcmk__node_name(node), fail2text(on_fail)); /* process current state */ if (rsc->role != pcmk_role_unknown) { pcmk_resource_t *iter = rsc; while (iter) { if (g_hash_table_lookup(iter->known_on, node->details->id) == NULL) { pcmk_node_t *n = pe__copy_node(node); pcmk__rsc_trace(rsc, "%s%s%s known on %s", rsc->id, ((rsc->clone_name == NULL)? "" : " also known as "), ((rsc->clone_name == NULL)? "" : rsc->clone_name), pcmk__node_name(n)); g_hash_table_insert(iter->known_on, (gpointer) n->details->id, n); } if (pcmk_is_set(iter->flags, pcmk_rsc_unique)) { break; } iter = iter->parent; } } /* If a managed resource is believed to be running, but node is down ... */ if ((rsc->role > pcmk_role_stopped) && node->details->online == FALSE && node->details->maintenance == FALSE && pcmk_is_set(rsc->flags, pcmk_rsc_managed)) { gboolean should_fence = FALSE; /* If this is a guest node, fence it (regardless of whether fencing is * enabled, because guest node fencing is done by recovery of the * container resource rather than by the fencer). Mark the resource * we're processing as failed. When the guest comes back up, its * operation history in the CIB will be cleared, freeing the affected * resource to run again once we are sure we know its state. */ if (pe__is_guest_node(node)) { pcmk__set_rsc_flags(rsc, pcmk_rsc_failed|pcmk_rsc_stop_if_failed); should_fence = TRUE; } else if (pcmk_is_set(rsc->cluster->flags, pcmk_sched_fencing_enabled)) { if (pe__is_remote_node(node) && node->details->remote_rsc && !pcmk_is_set(node->details->remote_rsc->flags, pcmk_rsc_failed)) { /* Setting unseen means that fencing of the remote node will * occur only if the connection resource is not going to start * somewhere. This allows connection resources on a failed * cluster node to move to another node without requiring the * remote nodes to be fenced as well. */ node->details->unseen = TRUE; reason = crm_strdup_printf("%s is active there (fencing will be" " revoked if remote connection can " "be re-established elsewhere)", rsc->id); } should_fence = TRUE; } if (should_fence) { if (reason == NULL) { reason = crm_strdup_printf("%s is thought to be active there", rsc->id); } pe_fence_node(rsc->cluster, node, reason, FALSE); } free(reason); } /* In order to calculate priority_fencing_delay correctly, save the failure information and pass it to native_add_running(). */ save_on_fail = on_fail; if (node->details->unclean) { /* No extra processing needed * Also allows resources to be started again after a node is shot */ on_fail = pcmk_on_fail_ignore; } switch (on_fail) { case pcmk_on_fail_ignore: /* nothing to do */ break; case pcmk_on_fail_demote: pcmk__set_rsc_flags(rsc, pcmk_rsc_failed); demote_action(rsc, node, FALSE); break; case pcmk_on_fail_fence_node: /* treat it as if it is still running * but also mark the node as unclean */ reason = crm_strdup_printf("%s failed there", rsc->id); pe_fence_node(rsc->cluster, node, reason, FALSE); free(reason); break; case pcmk_on_fail_standby_node: node->details->standby = TRUE; node->details->standby_onfail = TRUE; break; case pcmk_on_fail_block: /* is_managed == FALSE will prevent any * actions being sent for the resource */ pcmk__clear_rsc_flags(rsc, pcmk_rsc_managed); pcmk__set_rsc_flags(rsc, pcmk_rsc_blocked); break; case pcmk_on_fail_ban: /* make sure it comes up somewhere else * or not at all */ resource_location(rsc, node, -INFINITY, "__action_migration_auto__", rsc->cluster); break; case pcmk_on_fail_stop: pe__set_next_role(rsc, pcmk_role_stopped, "on-fail=stop"); break; case pcmk_on_fail_restart: if ((rsc->role != pcmk_role_stopped) && (rsc->role != pcmk_role_unknown)) { pcmk__set_rsc_flags(rsc, pcmk_rsc_failed|pcmk_rsc_stop_if_failed); stop_action(rsc, node, FALSE); } break; case pcmk_on_fail_restart_container: pcmk__set_rsc_flags(rsc, pcmk_rsc_failed|pcmk_rsc_stop_if_failed); if (rsc->container && pe_rsc_is_bundled(rsc)) { /* A bundle's remote connection can run on a different node than * the bundle's container. We don't necessarily know where the * container is running yet, so remember it and add a stop * action for it later. */ rsc->cluster->stop_needed = g_list_prepend(rsc->cluster->stop_needed, rsc->container); } else if (rsc->container) { stop_action(rsc->container, node, FALSE); } else if ((rsc->role != pcmk_role_stopped) && (rsc->role != pcmk_role_unknown)) { stop_action(rsc, node, FALSE); } break; case pcmk_on_fail_reset_remote: pcmk__set_rsc_flags(rsc, pcmk_rsc_failed|pcmk_rsc_stop_if_failed); if (pcmk_is_set(rsc->cluster->flags, pcmk_sched_fencing_enabled)) { tmpnode = NULL; if (rsc->is_remote_node) { tmpnode = pe_find_node(rsc->cluster->nodes, rsc->id); } if (tmpnode && pe__is_remote_node(tmpnode) && tmpnode->details->remote_was_fenced == 0) { /* The remote connection resource failed in a way that * should result in fencing the remote node. */ pe_fence_node(rsc->cluster, tmpnode, "remote connection is unrecoverable", FALSE); } } /* require the stop action regardless if fencing is occurring or not. */ if (rsc->role > pcmk_role_stopped) { stop_action(rsc, node, FALSE); } /* if reconnect delay is in use, prevent the connection from exiting the * "STOPPED" role until the failure is cleared by the delay timeout. */ if (rsc->remote_reconnect_ms) { pe__set_next_role(rsc, pcmk_role_stopped, "remote reset"); } break; } /* ensure a remote-node connection failure forces an unclean remote-node * to be fenced. By setting unseen = FALSE, the remote-node failure will * result in a fencing operation regardless if we're going to attempt to * reconnect to the remote-node in this transition or not. */ if (pcmk_is_set(rsc->flags, pcmk_rsc_failed) && rsc->is_remote_node) { tmpnode = pe_find_node(rsc->cluster->nodes, rsc->id); if (tmpnode && tmpnode->details->unclean) { tmpnode->details->unseen = FALSE; } } if ((rsc->role != pcmk_role_stopped) && (rsc->role != pcmk_role_unknown)) { if (pcmk_is_set(rsc->flags, pcmk_rsc_removed)) { if (pcmk_is_set(rsc->flags, pcmk_rsc_managed)) { pcmk__config_warn("Detected active orphan %s running on %s", rsc->id, pcmk__node_name(node)); } else { pcmk__config_warn("Resource '%s' must be stopped manually on " "%s because cluster is configured not to " "stop active orphans", rsc->id, pcmk__node_name(node)); } } native_add_running(rsc, node, rsc->cluster, (save_on_fail != pcmk_on_fail_ignore)); switch (on_fail) { case pcmk_on_fail_ignore: break; case pcmk_on_fail_demote: case pcmk_on_fail_block: pcmk__set_rsc_flags(rsc, pcmk_rsc_failed); break; default: pcmk__set_rsc_flags(rsc, pcmk_rsc_failed|pcmk_rsc_stop_if_failed); break; } } else if (rsc->clone_name && strchr(rsc->clone_name, ':') != NULL) { /* Only do this for older status sections that included instance numbers * Otherwise stopped instances will appear as orphans */ pcmk__rsc_trace(rsc, "Resetting clone_name %s for %s (stopped)", rsc->clone_name, rsc->id); free(rsc->clone_name); rsc->clone_name = NULL; } else { GList *possible_matches = pe__resource_actions(rsc, node, PCMK_ACTION_STOP, FALSE); GList *gIter = possible_matches; for (; gIter != NULL; gIter = gIter->next) { pcmk_action_t *stop = (pcmk_action_t *) gIter->data; pcmk__set_action_flags(stop, pcmk_action_optional); } g_list_free(possible_matches); } /* A successful stop after migrate_to on the migration source doesn't make * the partially migrated resource stopped on the migration target. */ if ((rsc->role == pcmk_role_stopped) && rsc->partial_migration_source && rsc->partial_migration_source->details == node->details && rsc->partial_migration_target && rsc->running_on) { rsc->role = pcmk_role_started; } } /* create active recurring operations as optional */ static void process_recurring(pcmk_node_t *node, pcmk_resource_t *rsc, int start_index, int stop_index, GList *sorted_op_list, pcmk_scheduler_t *scheduler) { int counter = -1; const char *task = NULL; const char *status = NULL; GList *gIter = sorted_op_list; CRM_ASSERT(rsc); pcmk__rsc_trace(rsc, "%s: Start index %d, stop index = %d", rsc->id, start_index, stop_index); for (; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; guint interval_ms = 0; char *key = NULL; const char *id = ID(rsc_op); counter++; if (node->details->online == FALSE) { pcmk__rsc_trace(rsc, "Skipping %s on %s: node is offline", rsc->id, pcmk__node_name(node)); break; /* Need to check if there's a monitor for role="Stopped" */ } else if (start_index < stop_index && counter <= stop_index) { pcmk__rsc_trace(rsc, "Skipping %s on %s: resource is not active", id, pcmk__node_name(node)); continue; } else if (counter < start_index) { pcmk__rsc_trace(rsc, "Skipping %s on %s: old %d", id, pcmk__node_name(node), counter); continue; } crm_element_value_ms(rsc_op, PCMK_META_INTERVAL, &interval_ms); if (interval_ms == 0) { pcmk__rsc_trace(rsc, "Skipping %s on %s: non-recurring", id, pcmk__node_name(node)); continue; } status = crm_element_value(rsc_op, PCMK__XA_OP_STATUS); if (pcmk__str_eq(status, "-1", pcmk__str_casei)) { pcmk__rsc_trace(rsc, "Skipping %s on %s: status", id, pcmk__node_name(node)); continue; } task = crm_element_value(rsc_op, PCMK_XA_OPERATION); /* create the action */ key = pcmk__op_key(rsc->id, task, interval_ms); pcmk__rsc_trace(rsc, "Creating %s on %s", key, pcmk__node_name(node)); custom_action(rsc, key, task, node, TRUE, scheduler); } } void calculate_active_ops(const GList *sorted_op_list, int *start_index, int *stop_index) { int counter = -1; int implied_monitor_start = -1; int implied_clone_start = -1; const char *task = NULL; const char *status = NULL; *stop_index = -1; *start_index = -1; for (const GList *iter = sorted_op_list; iter != NULL; iter = iter->next) { const xmlNode *rsc_op = (const xmlNode *) iter->data; counter++; task = crm_element_value(rsc_op, PCMK_XA_OPERATION); status = crm_element_value(rsc_op, PCMK__XA_OP_STATUS); if (pcmk__str_eq(task, PCMK_ACTION_STOP, pcmk__str_casei) && pcmk__str_eq(status, "0", pcmk__str_casei)) { *stop_index = counter; } else if (pcmk__strcase_any_of(task, PCMK_ACTION_START, PCMK_ACTION_MIGRATE_FROM, NULL)) { *start_index = counter; } else if ((implied_monitor_start <= *stop_index) && pcmk__str_eq(task, PCMK_ACTION_MONITOR, pcmk__str_casei)) { const char *rc = crm_element_value(rsc_op, PCMK__XA_RC_CODE); if (pcmk__strcase_any_of(rc, "0", "8", NULL)) { implied_monitor_start = counter; } } else if (pcmk__strcase_any_of(task, PCMK_ACTION_PROMOTE, PCMK_ACTION_DEMOTE, NULL)) { implied_clone_start = counter; } } if (*start_index == -1) { if (implied_clone_start != -1) { *start_index = implied_clone_start; } else if (implied_monitor_start != -1) { *start_index = implied_monitor_start; } } } // If resource history entry has shutdown lock, remember lock node and time static void unpack_shutdown_lock(const xmlNode *rsc_entry, pcmk_resource_t *rsc, const pcmk_node_t *node, pcmk_scheduler_t *scheduler) { time_t lock_time = 0; // When lock started (i.e. node shutdown time) if ((crm_element_value_epoch(rsc_entry, PCMK_OPT_SHUTDOWN_LOCK, &lock_time) == pcmk_ok) && (lock_time != 0)) { if ((scheduler->shutdown_lock > 0) && (get_effective_time(scheduler) > (lock_time + scheduler->shutdown_lock))) { pcmk__rsc_info(rsc, "Shutdown lock for %s on %s expired", rsc->id, pcmk__node_name(node)); pe__clear_resource_history(rsc, node); } else { /* @COMPAT I don't like breaking const signatures, but * rsc->lock_node should really be const -- we just can't change it * until the next API compatibility break. */ rsc->lock_node = (pcmk_node_t *) node; rsc->lock_time = lock_time; } } } /*! * \internal * \brief Unpack one lrm_resource entry from a node's CIB status * * \param[in,out] node Node whose status is being unpacked * \param[in] rsc_entry lrm_resource XML being unpacked * \param[in,out] scheduler Scheduler data * * \return Resource corresponding to the entry, or NULL if no operation history */ static pcmk_resource_t * unpack_lrm_resource(pcmk_node_t *node, const xmlNode *lrm_resource, pcmk_scheduler_t *scheduler) { GList *gIter = NULL; int stop_index = -1; int start_index = -1; enum rsc_role_e req_role = pcmk_role_unknown; const char *rsc_id = ID(lrm_resource); pcmk_resource_t *rsc = NULL; GList *op_list = NULL; GList *sorted_op_list = NULL; xmlNode *rsc_op = NULL; xmlNode *last_failure = NULL; enum action_fail_response on_fail = pcmk_on_fail_ignore; enum rsc_role_e saved_role = pcmk_role_unknown; if (rsc_id == NULL) { pcmk__config_err("Ignoring invalid " XML_LRM_TAG_RESOURCE " entry: No " PCMK_XA_ID); crm_log_xml_info(lrm_resource, "missing-id"); return NULL; } crm_trace("Unpacking " XML_LRM_TAG_RESOURCE " for %s on %s", rsc_id, pcmk__node_name(node)); // Build a list of individual lrm_rsc_op entries, so we can sort them for (rsc_op = first_named_child(lrm_resource, XML_LRM_TAG_RSC_OP); rsc_op != NULL; rsc_op = crm_next_same_xml(rsc_op)) { op_list = g_list_prepend(op_list, rsc_op); } if (!pcmk_is_set(scheduler->flags, pcmk_sched_shutdown_lock)) { if (op_list == NULL) { // If there are no operations, there is nothing to do return NULL; } } /* find the resource */ rsc = unpack_find_resource(scheduler, node, rsc_id); if (rsc == NULL) { if (op_list == NULL) { // If there are no operations, there is nothing to do return NULL; } else { rsc = process_orphan_resource(lrm_resource, node, scheduler); } } CRM_ASSERT(rsc != NULL); // Check whether the resource is "shutdown-locked" to this node if (pcmk_is_set(scheduler->flags, pcmk_sched_shutdown_lock)) { unpack_shutdown_lock(lrm_resource, rsc, node, scheduler); } /* process operations */ saved_role = rsc->role; rsc->role = pcmk_role_unknown; sorted_op_list = g_list_sort(op_list, sort_op_by_callid); for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; unpack_rsc_op(rsc, node, rsc_op, &last_failure, &on_fail); } /* create active recurring operations as optional */ calculate_active_ops(sorted_op_list, &start_index, &stop_index); process_recurring(node, rsc, start_index, stop_index, sorted_op_list, scheduler); /* no need to free the contents */ g_list_free(sorted_op_list); process_rsc_state(rsc, node, on_fail); if (get_target_role(rsc, &req_role)) { if ((rsc->next_role == pcmk_role_unknown) || (req_role < rsc->next_role)) { pe__set_next_role(rsc, req_role, PCMK_META_TARGET_ROLE); } else if (req_role > rsc->next_role) { pcmk__rsc_info(rsc, "%s: Not overwriting calculated next role %s" " with requested next role %s", rsc->id, pcmk_role_text(rsc->next_role), pcmk_role_text(req_role)); } } if (saved_role > rsc->role) { rsc->role = saved_role; } return rsc; } static void handle_orphaned_container_fillers(const xmlNode *lrm_rsc_list, pcmk_scheduler_t *scheduler) { for (const xmlNode *rsc_entry = pcmk__xe_first_child(lrm_rsc_list); rsc_entry != NULL; rsc_entry = pcmk__xe_next(rsc_entry)) { pcmk_resource_t *rsc; pcmk_resource_t *container; const char *rsc_id; const char *container_id; if (!pcmk__str_eq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE, pcmk__str_casei)) { continue; } container_id = crm_element_value(rsc_entry, PCMK__META_CONTAINER); rsc_id = crm_element_value(rsc_entry, PCMK_XA_ID); if (container_id == NULL || rsc_id == NULL) { continue; } container = pe_find_resource(scheduler->resources, container_id); if (container == NULL) { continue; } rsc = pe_find_resource(scheduler->resources, rsc_id); if ((rsc == NULL) || (rsc->container != NULL) || !pcmk_is_set(rsc->flags, pcmk_rsc_removed_filler)) { continue; } pcmk__rsc_trace(rsc, "Mapped container of orphaned resource %s to %s", rsc->id, container_id); rsc->container = container; container->fillers = g_list_append(container->fillers, rsc); } } /*! * \internal * \brief Unpack one node's lrm status section * * \param[in,out] node Node whose status is being unpacked * \param[in] xml CIB node state XML * \param[in,out] scheduler Scheduler data */ static void unpack_node_lrm(pcmk_node_t *node, const xmlNode *xml, pcmk_scheduler_t *scheduler) { bool found_orphaned_container_filler = false; // Drill down to lrm_resources section - xml = find_xml_node(xml, XML_CIB_TAG_LRM, FALSE); + xml = find_xml_node(xml, PCMK__XE_LRM, FALSE); if (xml == NULL) { return; } xml = find_xml_node(xml, XML_LRM_TAG_RESOURCES, FALSE); if (xml == NULL) { return; } // Unpack each lrm_resource entry for (const xmlNode *rsc_entry = first_named_child(xml, XML_LRM_TAG_RESOURCE); rsc_entry != NULL; rsc_entry = crm_next_same_xml(rsc_entry)) { pcmk_resource_t *rsc = unpack_lrm_resource(node, rsc_entry, scheduler); if ((rsc != NULL) && pcmk_is_set(rsc->flags, pcmk_rsc_removed_filler)) { found_orphaned_container_filler = true; } } /* Now that all resource state has been unpacked for this node, map any * orphaned container fillers to their container resource. */ if (found_orphaned_container_filler) { handle_orphaned_container_fillers(xml, scheduler); } } static void set_active(pcmk_resource_t *rsc) { const pcmk_resource_t *top = pe__const_top_resource(rsc, false); if (top && pcmk_is_set(top->flags, pcmk_rsc_promotable)) { rsc->role = pcmk_role_unpromoted; } else { rsc->role = pcmk_role_started; } } static void set_node_score(gpointer key, gpointer value, gpointer user_data) { pcmk_node_t *node = value; int *score = user_data; node->weight = *score; } #define XPATH_NODE_STATE "/" PCMK_XE_CIB "/" PCMK_XE_STATUS \ "/" PCMK__XE_NODE_STATE -#define SUB_XPATH_LRM_RESOURCE "/" XML_CIB_TAG_LRM \ - "/" XML_LRM_TAG_RESOURCES \ +#define SUB_XPATH_LRM_RESOURCE "/" PCMK__XE_LRM \ + "/" XML_LRM_TAG_RESOURCES \ "/" XML_LRM_TAG_RESOURCE #define SUB_XPATH_LRM_RSC_OP "/" XML_LRM_TAG_RSC_OP static xmlNode * find_lrm_op(const char *resource, const char *op, const char *node, const char *source, int target_rc, pcmk_scheduler_t *scheduler) { GString *xpath = NULL; xmlNode *xml = NULL; CRM_CHECK((resource != NULL) && (op != NULL) && (node != NULL), return NULL); xpath = g_string_sized_new(256); pcmk__g_strcat(xpath, XPATH_NODE_STATE "[@" PCMK_XA_UNAME "='", node, "']" SUB_XPATH_LRM_RESOURCE "[@" PCMK_XA_ID "='", resource, "']" SUB_XPATH_LRM_RSC_OP "[@" PCMK_XA_OPERATION "='", op, "'", NULL); /* Need to check against transition_magic too? */ if ((source != NULL) && (strcmp(op, PCMK_ACTION_MIGRATE_TO) == 0)) { pcmk__g_strcat(xpath, " and @" PCMK__META_MIGRATE_TARGET "='", source, "']", NULL); } else if ((source != NULL) && (strcmp(op, PCMK_ACTION_MIGRATE_FROM) == 0)) { pcmk__g_strcat(xpath, " and @" PCMK__META_MIGRATE_SOURCE "='", source, "']", NULL); } else { g_string_append_c(xpath, ']'); } xml = get_xpath_object((const char *) xpath->str, scheduler->input, LOG_DEBUG); g_string_free(xpath, TRUE); if (xml && target_rc >= 0) { int rc = PCMK_OCF_UNKNOWN_ERROR; int status = PCMK_EXEC_ERROR; crm_element_value_int(xml, PCMK__XA_RC_CODE, &rc); crm_element_value_int(xml, PCMK__XA_OP_STATUS, &status); if ((rc != target_rc) || (status != PCMK_EXEC_DONE)) { return NULL; } } return xml; } static xmlNode * find_lrm_resource(const char *rsc_id, const char *node_name, pcmk_scheduler_t *scheduler) { GString *xpath = NULL; xmlNode *xml = NULL; CRM_CHECK((rsc_id != NULL) && (node_name != NULL), return NULL); xpath = g_string_sized_new(256); pcmk__g_strcat(xpath, XPATH_NODE_STATE "[@" PCMK_XA_UNAME "='", node_name, "']" SUB_XPATH_LRM_RESOURCE "[@" PCMK_XA_ID "='", rsc_id, "']", NULL); xml = get_xpath_object((const char *) xpath->str, scheduler->input, LOG_DEBUG); g_string_free(xpath, TRUE); return xml; } /*! * \internal * \brief Check whether a resource has no completed action history on a node * * \param[in,out] rsc Resource to check * \param[in] node_name Node to check * * \return true if \p rsc_id is unknown on \p node_name, otherwise false */ static bool unknown_on_node(pcmk_resource_t *rsc, const char *node_name) { bool result = false; xmlXPathObjectPtr search; GString *xpath = g_string_sized_new(256); pcmk__g_strcat(xpath, XPATH_NODE_STATE "[@" PCMK_XA_UNAME "='", node_name, "']" SUB_XPATH_LRM_RESOURCE "[@" PCMK_XA_ID "='", rsc->id, "']" SUB_XPATH_LRM_RSC_OP "[@" PCMK__XA_RC_CODE "!='193']", NULL); search = xpath_search(rsc->cluster->input, (const char *) xpath->str); result = (numXpathResults(search) == 0); freeXpathObject(search); g_string_free(xpath, TRUE); return result; } /*! * \brief Check whether a probe/monitor indicating the resource was not running * on a node happened after some event * * \param[in] rsc_id Resource being checked * \param[in] node_name Node being checked * \param[in] xml_op Event that monitor is being compared to * \param[in] same_node Whether the operations are on the same node * \param[in,out] scheduler Scheduler data * * \return true if such a monitor happened after event, false otherwise */ static bool monitor_not_running_after(const char *rsc_id, const char *node_name, const xmlNode *xml_op, bool same_node, pcmk_scheduler_t *scheduler) { /* Any probe/monitor operation on the node indicating it was not running * there */ xmlNode *monitor = find_lrm_op(rsc_id, PCMK_ACTION_MONITOR, node_name, NULL, PCMK_OCF_NOT_RUNNING, scheduler); return (monitor && pe__is_newer_op(monitor, xml_op, same_node) > 0); } /*! * \brief Check whether any non-monitor operation on a node happened after some * event * * \param[in] rsc_id Resource being checked * \param[in] node_name Node being checked * \param[in] xml_op Event that non-monitor is being compared to * \param[in] same_node Whether the operations are on the same node * \param[in,out] scheduler Scheduler data * * \return true if such a operation happened after event, false otherwise */ static bool non_monitor_after(const char *rsc_id, const char *node_name, const xmlNode *xml_op, bool same_node, pcmk_scheduler_t *scheduler) { xmlNode *lrm_resource = NULL; lrm_resource = find_lrm_resource(rsc_id, node_name, scheduler); if (lrm_resource == NULL) { return false; } for (xmlNode *op = first_named_child(lrm_resource, XML_LRM_TAG_RSC_OP); op != NULL; op = crm_next_same_xml(op)) { const char * task = NULL; if (op == xml_op) { continue; } task = crm_element_value(op, PCMK_XA_OPERATION); if (pcmk__str_any_of(task, PCMK_ACTION_START, PCMK_ACTION_STOP, PCMK_ACTION_MIGRATE_TO, PCMK_ACTION_MIGRATE_FROM, NULL) && pe__is_newer_op(op, xml_op, same_node) > 0) { return true; } } return false; } /*! * \brief Check whether the resource has newer state on a node after a migration * attempt * * \param[in] rsc_id Resource being checked * \param[in] node_name Node being checked * \param[in] migrate_to Any migrate_to event that is being compared to * \param[in] migrate_from Any migrate_from event that is being compared to * \param[in,out] scheduler Scheduler data * * \return true if such a operation happened after event, false otherwise */ static bool newer_state_after_migrate(const char *rsc_id, const char *node_name, const xmlNode *migrate_to, const xmlNode *migrate_from, pcmk_scheduler_t *scheduler) { const xmlNode *xml_op = migrate_to; const char *source = NULL; const char *target = NULL; bool same_node = false; if (migrate_from) { xml_op = migrate_from; } source = crm_element_value(xml_op, PCMK__META_MIGRATE_SOURCE); target = crm_element_value(xml_op, PCMK__META_MIGRATE_TARGET); /* It's preferred to compare to the migrate event on the same node if * existing, since call ids are more reliable. */ if (pcmk__str_eq(node_name, target, pcmk__str_casei)) { if (migrate_from) { xml_op = migrate_from; same_node = true; } else { xml_op = migrate_to; } } else if (pcmk__str_eq(node_name, source, pcmk__str_casei)) { if (migrate_to) { xml_op = migrate_to; same_node = true; } else { xml_op = migrate_from; } } /* If there's any newer non-monitor operation on the node, or any newer * probe/monitor operation on the node indicating it was not running there, * the migration events potentially no longer matter for the node. */ return non_monitor_after(rsc_id, node_name, xml_op, same_node, scheduler) || monitor_not_running_after(rsc_id, node_name, xml_op, same_node, scheduler); } /*! * \internal * \brief Parse migration source and target node names from history entry * * \param[in] entry Resource history entry for a migration action * \param[in] source_node If not NULL, source must match this node * \param[in] target_node If not NULL, target must match this node * \param[out] source_name Where to store migration source node name * \param[out] target_name Where to store migration target node name * * \return Standard Pacemaker return code */ static int get_migration_node_names(const xmlNode *entry, const pcmk_node_t *source_node, const pcmk_node_t *target_node, const char **source_name, const char **target_name) { *source_name = crm_element_value(entry, PCMK__META_MIGRATE_SOURCE); *target_name = crm_element_value(entry, PCMK__META_MIGRATE_TARGET); if ((*source_name == NULL) || (*target_name == NULL)) { pcmk__config_err("Ignoring resource history entry %s without " PCMK__META_MIGRATE_SOURCE " and " PCMK__META_MIGRATE_TARGET, ID(entry)); return pcmk_rc_unpack_error; } if ((source_node != NULL) && !pcmk__str_eq(*source_name, source_node->details->uname, pcmk__str_casei|pcmk__str_null_matches)) { pcmk__config_err("Ignoring resource history entry %s because " PCMK__META_MIGRATE_SOURCE "='%s' does not match %s", ID(entry), *source_name, pcmk__node_name(source_node)); return pcmk_rc_unpack_error; } if ((target_node != NULL) && !pcmk__str_eq(*target_name, target_node->details->uname, pcmk__str_casei|pcmk__str_null_matches)) { pcmk__config_err("Ignoring resource history entry %s because " PCMK__META_MIGRATE_TARGET "='%s' does not match %s", ID(entry), *target_name, pcmk__node_name(target_node)); return pcmk_rc_unpack_error; } return pcmk_rc_ok; } /* * \internal * \brief Add a migration source to a resource's list of dangling migrations * * If the migrate_to and migrate_from actions in a live migration both * succeeded, but there is no stop on the source, the migration is considered * "dangling." Add the source to the resource's dangling migration list, which * will be used to schedule a stop on the source without affecting the target. * * \param[in,out] rsc Resource involved in migration * \param[in] node Migration source */ static void add_dangling_migration(pcmk_resource_t *rsc, const pcmk_node_t *node) { pcmk__rsc_trace(rsc, "Dangling migration of %s requires stop on %s", rsc->id, pcmk__node_name(node)); rsc->role = pcmk_role_stopped; rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, (gpointer) node); } /*! * \internal * \brief Update resource role etc. after a successful migrate_to action * * \param[in,out] history Parsed action result history */ static void unpack_migrate_to_success(struct action_history *history) { /* A complete migration sequence is: * 1. migrate_to on source node (which succeeded if we get to this function) * 2. migrate_from on target node * 3. stop on source node * * If no migrate_from has happened, the migration is considered to be * "partial". If the migrate_from succeeded but no stop has happened, the * migration is considered to be "dangling". * * If a successful migrate_to and stop have happened on the source node, we * still need to check for a partial migration, due to scenarios (easier to * produce with batch-limit=1) like: * * - A resource is migrating from node1 to node2, and a migrate_to is * initiated for it on node1. * * - node2 goes into standby mode while the migrate_to is pending, which * aborts the transition. * * - Upon completion of the migrate_to, a new transition schedules a stop * on both nodes and a start on node1. * * - If the new transition is aborted for any reason while the resource is * stopping on node1, the transition after that stop completes will see * the migrate_to and stop on the source, but it's still a partial * migration, and the resource must be stopped on node2 because it is * potentially active there due to the migrate_to. * * We also need to take into account that either node's history may be * cleared at any point in the migration process. */ int from_rc = PCMK_OCF_OK; int from_status = PCMK_EXEC_PENDING; pcmk_node_t *target_node = NULL; xmlNode *migrate_from = NULL; const char *source = NULL; const char *target = NULL; bool source_newer_op = false; bool target_newer_state = false; bool active_on_target = false; // Get source and target node names from XML if (get_migration_node_names(history->xml, history->node, NULL, &source, &target) != pcmk_rc_ok) { return; } // Check for newer state on the source source_newer_op = non_monitor_after(history->rsc->id, source, history->xml, true, history->rsc->cluster); // Check for a migrate_from action from this source on the target migrate_from = find_lrm_op(history->rsc->id, PCMK_ACTION_MIGRATE_FROM, target, source, -1, history->rsc->cluster); if (migrate_from != NULL) { if (source_newer_op) { /* There's a newer non-monitor operation on the source and a * migrate_from on the target, so this migrate_to is irrelevant to * the resource's state. */ return; } crm_element_value_int(migrate_from, PCMK__XA_RC_CODE, &from_rc); crm_element_value_int(migrate_from, PCMK__XA_OP_STATUS, &from_status); } /* If the resource has newer state on both the source and target after the * migration events, this migrate_to is irrelevant to the resource's state. */ target_newer_state = newer_state_after_migrate(history->rsc->id, target, history->xml, migrate_from, history->rsc->cluster); if (source_newer_op && target_newer_state) { return; } /* Check for dangling migration (migrate_from succeeded but stop not done). * We know there's no stop because we already returned if the target has a * migrate_from and the source has any newer non-monitor operation. */ if ((from_rc == PCMK_OCF_OK) && (from_status == PCMK_EXEC_DONE)) { add_dangling_migration(history->rsc, history->node); return; } /* Without newer state, this migrate_to implies the resource is active. * (Clones are not allowed to migrate, so role can't be promoted.) */ history->rsc->role = pcmk_role_started; target_node = pe_find_node(history->rsc->cluster->nodes, target); active_on_target = !target_newer_state && (target_node != NULL) && target_node->details->online; if (from_status != PCMK_EXEC_PENDING) { // migrate_from failed on target if (active_on_target) { native_add_running(history->rsc, target_node, history->rsc->cluster, TRUE); } else { // Mark resource as failed, require recovery, and prevent migration pcmk__set_rsc_flags(history->rsc, pcmk_rsc_failed|pcmk_rsc_stop_if_failed); pcmk__clear_rsc_flags(history->rsc, pcmk_rsc_migratable); } return; } // The migrate_from is pending, complete but erased, or to be scheduled /* If there is no history at all for the resource on an online target, then * it was likely cleaned. Just return, and we'll schedule a probe. Once we * have the probe result, it will be reflected in target_newer_state. */ if ((target_node != NULL) && target_node->details->online && unknown_on_node(history->rsc, target)) { return; } if (active_on_target) { pcmk_node_t *source_node = pe_find_node(history->rsc->cluster->nodes, source); native_add_running(history->rsc, target_node, history->rsc->cluster, FALSE); if ((source_node != NULL) && source_node->details->online) { /* This is a partial migration: the migrate_to completed * successfully on the source, but the migrate_from has not * completed. Remember the source and target; if the newly * chosen target remains the same when we schedule actions * later, we may continue with the migration. */ history->rsc->partial_migration_target = target_node; history->rsc->partial_migration_source = source_node; } } else if (!source_newer_op) { // Mark resource as failed, require recovery, and prevent migration pcmk__set_rsc_flags(history->rsc, pcmk_rsc_failed|pcmk_rsc_stop_if_failed); pcmk__clear_rsc_flags(history->rsc, pcmk_rsc_migratable); } } /*! * \internal * \brief Update resource role etc. after a failed migrate_to action * * \param[in,out] history Parsed action result history */ static void unpack_migrate_to_failure(struct action_history *history) { xmlNode *target_migrate_from = NULL; const char *source = NULL; const char *target = NULL; // Get source and target node names from XML if (get_migration_node_names(history->xml, history->node, NULL, &source, &target) != pcmk_rc_ok) { return; } /* If a migration failed, we have to assume the resource is active. Clones * are not allowed to migrate, so role can't be promoted. */ history->rsc->role = pcmk_role_started; // Check for migrate_from on the target target_migrate_from = find_lrm_op(history->rsc->id, PCMK_ACTION_MIGRATE_FROM, target, source, PCMK_OCF_OK, history->rsc->cluster); if (/* If the resource state is unknown on the target, it will likely be * probed there. * Don't just consider it running there. We will get back here anyway in * case the probe detects it's running there. */ !unknown_on_node(history->rsc, target) /* If the resource has newer state on the target after the migration * events, this migrate_to no longer matters for the target. */ && !newer_state_after_migrate(history->rsc->id, target, history->xml, target_migrate_from, history->rsc->cluster)) { /* The resource has no newer state on the target, so assume it's still * active there. * (if it is up). */ pcmk_node_t *target_node = pe_find_node(history->rsc->cluster->nodes, target); if (target_node && target_node->details->online) { native_add_running(history->rsc, target_node, history->rsc->cluster, FALSE); } } else if (!non_monitor_after(history->rsc->id, source, history->xml, true, history->rsc->cluster)) { /* We know the resource has newer state on the target, but this * migrate_to still matters for the source as long as there's no newer * non-monitor operation there. */ // Mark node as having dangling migration so we can force a stop later history->rsc->dangling_migrations = g_list_prepend(history->rsc->dangling_migrations, (gpointer) history->node); } } /*! * \internal * \brief Update resource role etc. after a failed migrate_from action * * \param[in,out] history Parsed action result history */ static void unpack_migrate_from_failure(struct action_history *history) { xmlNode *source_migrate_to = NULL; const char *source = NULL; const char *target = NULL; // Get source and target node names from XML if (get_migration_node_names(history->xml, NULL, history->node, &source, &target) != pcmk_rc_ok) { return; } /* If a migration failed, we have to assume the resource is active. Clones * are not allowed to migrate, so role can't be promoted. */ history->rsc->role = pcmk_role_started; // Check for a migrate_to on the source source_migrate_to = find_lrm_op(history->rsc->id, PCMK_ACTION_MIGRATE_TO, source, target, PCMK_OCF_OK, history->rsc->cluster); if (/* If the resource state is unknown on the source, it will likely be * probed there. * Don't just consider it running there. We will get back here anyway in * case the probe detects it's running there. */ !unknown_on_node(history->rsc, source) /* If the resource has newer state on the source after the migration * events, this migrate_from no longer matters for the source. */ && !newer_state_after_migrate(history->rsc->id, source, source_migrate_to, history->xml, history->rsc->cluster)) { /* The resource has no newer state on the source, so assume it's still * active there (if it is up). */ pcmk_node_t *source_node = pe_find_node(history->rsc->cluster->nodes, source); if (source_node && source_node->details->online) { native_add_running(history->rsc, source_node, history->rsc->cluster, TRUE); } } } /*! * \internal * \brief Add an action to cluster's list of failed actions * * \param[in,out] history Parsed action result history */ static void record_failed_op(struct action_history *history) { if (!(history->node->details->online)) { return; } for (const xmlNode *xIter = history->rsc->cluster->failed->children; xIter != NULL; xIter = xIter->next) { const char *key = pcmk__xe_history_key(xIter); const char *uname = crm_element_value(xIter, PCMK_XA_UNAME); if (pcmk__str_eq(history->key, key, pcmk__str_none) && pcmk__str_eq(uname, history->node->details->uname, pcmk__str_casei)) { crm_trace("Skipping duplicate entry %s on %s", history->key, pcmk__node_name(history->node)); return; } } crm_trace("Adding entry for %s on %s to failed action list", history->key, pcmk__node_name(history->node)); crm_xml_add(history->xml, PCMK_XA_UNAME, history->node->details->uname); crm_xml_add(history->xml, PCMK__XA_RSC_ID, history->rsc->id); add_node_copy(history->rsc->cluster->failed, history->xml); } static char * last_change_str(const xmlNode *xml_op) { time_t when; char *result = NULL; if (crm_element_value_epoch(xml_op, PCMK_XA_LAST_RC_CHANGE, &when) == pcmk_ok) { char *when_s = pcmk__epoch2str(&when, 0); const char *p = strchr(when_s, ' '); // Skip day of week to make message shorter if ((p != NULL) && (*(++p) != '\0')) { result = strdup(p); CRM_ASSERT(result != NULL); } free(when_s); } if (result == NULL) { result = strdup("unknown time"); CRM_ASSERT(result != NULL); } return result; } /*! * \internal * \brief Compare two on-fail values * * \param[in] first One on-fail value to compare * \param[in] second The other on-fail value to compare * * \return A negative number if second is more severe than first, zero if they * are equal, or a positive number if first is more severe than second. * \note This is only needed until the action_fail_response values can be * renumbered at the next API compatibility break. */ static int cmp_on_fail(enum action_fail_response first, enum action_fail_response second) { switch (first) { case pcmk_on_fail_demote: switch (second) { case pcmk_on_fail_ignore: return 1; case pcmk_on_fail_demote: return 0; default: return -1; } break; case pcmk_on_fail_reset_remote: switch (second) { case pcmk_on_fail_ignore: case pcmk_on_fail_demote: case pcmk_on_fail_restart: return 1; case pcmk_on_fail_reset_remote: return 0; default: return -1; } break; case pcmk_on_fail_restart_container: switch (second) { case pcmk_on_fail_ignore: case pcmk_on_fail_demote: case pcmk_on_fail_restart: case pcmk_on_fail_reset_remote: return 1; case pcmk_on_fail_restart_container: return 0; default: return -1; } break; default: break; } switch (second) { case pcmk_on_fail_demote: return (first == pcmk_on_fail_ignore)? -1 : 1; case pcmk_on_fail_reset_remote: switch (first) { case pcmk_on_fail_ignore: case pcmk_on_fail_demote: case pcmk_on_fail_restart: return -1; default: return 1; } break; case pcmk_on_fail_restart_container: switch (first) { case pcmk_on_fail_ignore: case pcmk_on_fail_demote: case pcmk_on_fail_restart: case pcmk_on_fail_reset_remote: return -1; default: return 1; } break; default: break; } return first - second; } /*! * \internal * \brief Ban a resource (or its clone if an anonymous instance) from all nodes * * \param[in,out] rsc Resource to ban */ static void ban_from_all_nodes(pcmk_resource_t *rsc) { int score = -INFINITY; pcmk_resource_t *fail_rsc = rsc; if (fail_rsc->parent != NULL) { pcmk_resource_t *parent = uber_parent(fail_rsc); if (pe_rsc_is_anon_clone(parent)) { /* For anonymous clones, if an operation with on-fail=stop fails for * any instance, the entire clone must stop. */ fail_rsc = parent; } } // Ban the resource from all nodes crm_notice("%s will not be started under current conditions", fail_rsc->id); if (fail_rsc->allowed_nodes != NULL) { g_hash_table_destroy(fail_rsc->allowed_nodes); } fail_rsc->allowed_nodes = pe__node_list2table(rsc->cluster->nodes); g_hash_table_foreach(fail_rsc->allowed_nodes, set_node_score, &score); } /*! * \internal * \brief Get configured failure handling and role after failure for an action * * \param[in,out] history Unpacked action history entry * \param[out] on_fail Where to set configured failure handling * \param[out] fail_role Where to set to role after failure */ static void unpack_failure_handling(struct action_history *history, enum action_fail_response *on_fail, enum rsc_role_e *fail_role) { xmlNode *config = pcmk__find_action_config(history->rsc, history->task, history->interval_ms, true); GHashTable *meta = pcmk__unpack_action_meta(history->rsc, history->node, history->task, history->interval_ms, config); const char *on_fail_str = g_hash_table_lookup(meta, PCMK_META_ON_FAIL); *on_fail = pcmk__parse_on_fail(history->rsc, history->task, history->interval_ms, on_fail_str); *fail_role = pcmk__role_after_failure(history->rsc, history->task, *on_fail, meta); g_hash_table_destroy(meta); } /*! * \internal * \brief Update resource role, failure handling, etc., after a failed action * * \param[in,out] history Parsed action result history * \param[in] config_on_fail Action failure handling from configuration * \param[in] fail_role Resource's role after failure of this action * \param[out] last_failure This will be set to the history XML * \param[in,out] on_fail Actual handling of action result */ static void unpack_rsc_op_failure(struct action_history *history, enum action_fail_response config_on_fail, enum rsc_role_e fail_role, xmlNode **last_failure, enum action_fail_response *on_fail) { bool is_probe = false; char *last_change_s = NULL; *last_failure = history->xml; is_probe = pcmk_xe_is_probe(history->xml); last_change_s = last_change_str(history->xml); if (!pcmk_is_set(history->rsc->cluster->flags, pcmk_sched_symmetric_cluster) && (history->exit_status == PCMK_OCF_NOT_INSTALLED)) { crm_trace("Unexpected result (%s%s%s) was recorded for " "%s of %s on %s at %s " CRM_XS " exit-status=%d id=%s", services_ocf_exitcode_str(history->exit_status), (pcmk__str_empty(history->exit_reason)? "" : ": "), pcmk__s(history->exit_reason, ""), (is_probe? "probe" : history->task), history->rsc->id, pcmk__node_name(history->node), last_change_s, history->exit_status, history->id); } else { pcmk__sched_warn("Unexpected result (%s%s%s) was recorded for %s of " "%s on %s at %s " CRM_XS " exit-status=%d id=%s", services_ocf_exitcode_str(history->exit_status), (pcmk__str_empty(history->exit_reason)? "" : ": "), pcmk__s(history->exit_reason, ""), (is_probe? "probe" : history->task), history->rsc->id, pcmk__node_name(history->node), last_change_s, history->exit_status, history->id); if (is_probe && (history->exit_status != PCMK_OCF_OK) && (history->exit_status != PCMK_OCF_NOT_RUNNING) && (history->exit_status != PCMK_OCF_RUNNING_PROMOTED)) { /* A failed (not just unexpected) probe result could mean the user * didn't know resources will be probed even where they can't run. */ crm_notice("If it is not possible for %s to run on %s, see " "the " PCMK_XA_RESOURCE_DISCOVERY " option for location " "constraints", history->rsc->id, pcmk__node_name(history->node)); } record_failed_op(history); } free(last_change_s); if (cmp_on_fail(*on_fail, config_on_fail) < 0) { pcmk__rsc_trace(history->rsc, "on-fail %s -> %s for %s", fail2text(*on_fail), fail2text(config_on_fail), history->key); *on_fail = config_on_fail; } if (strcmp(history->task, PCMK_ACTION_STOP) == 0) { resource_location(history->rsc, history->node, -INFINITY, "__stop_fail__", history->rsc->cluster); } else if (strcmp(history->task, PCMK_ACTION_MIGRATE_TO) == 0) { unpack_migrate_to_failure(history); } else if (strcmp(history->task, PCMK_ACTION_MIGRATE_FROM) == 0) { unpack_migrate_from_failure(history); } else if (strcmp(history->task, PCMK_ACTION_PROMOTE) == 0) { history->rsc->role = pcmk_role_promoted; } else if (strcmp(history->task, PCMK_ACTION_DEMOTE) == 0) { if (config_on_fail == pcmk_on_fail_block) { history->rsc->role = pcmk_role_promoted; pe__set_next_role(history->rsc, pcmk_role_stopped, "demote with on-fail=block"); } else if (history->exit_status == PCMK_OCF_NOT_RUNNING) { history->rsc->role = pcmk_role_stopped; } else { /* Staying in the promoted role would put the scheduler and * controller into a loop. Setting the role to unpromoted is not * dangerous because the resource will be stopped as part of * recovery, and any promotion will be ordered after that stop. */ history->rsc->role = pcmk_role_unpromoted; } } if (is_probe && (history->exit_status == PCMK_OCF_NOT_INSTALLED)) { /* leave stopped */ pcmk__rsc_trace(history->rsc, "Leaving %s stopped", history->rsc->id); history->rsc->role = pcmk_role_stopped; } else if (history->rsc->role < pcmk_role_started) { pcmk__rsc_trace(history->rsc, "Setting %s active", history->rsc->id); set_active(history->rsc); } pcmk__rsc_trace(history->rsc, "Resource %s: role=%s unclean=%s on_fail=%s fail_role=%s", history->rsc->id, pcmk_role_text(history->rsc->role), pcmk__btoa(history->node->details->unclean), fail2text(config_on_fail), pcmk_role_text(fail_role)); if ((fail_role != pcmk_role_started) && (history->rsc->next_role < fail_role)) { pe__set_next_role(history->rsc, fail_role, "failure"); } if (fail_role == pcmk_role_stopped) { ban_from_all_nodes(history->rsc); } } /*! * \internal * \brief Block a resource with a failed action if it cannot be recovered * * If resource action is a failed stop and fencing is not possible, mark the * resource as unmanaged and blocked, since recovery cannot be done. * * \param[in,out] history Parsed action history entry */ static void block_if_unrecoverable(struct action_history *history) { char *last_change_s = NULL; if (strcmp(history->task, PCMK_ACTION_STOP) != 0) { return; // All actions besides stop are always recoverable } if (pe_can_fence(history->node->details->data_set, history->node)) { return; // Failed stops are recoverable via fencing } last_change_s = last_change_str(history->xml); pcmk__sched_err("No further recovery can be attempted for %s " "because %s on %s failed (%s%s%s) at %s " CRM_XS " rc=%d id=%s", history->rsc->id, history->task, pcmk__node_name(history->node), services_ocf_exitcode_str(history->exit_status), (pcmk__str_empty(history->exit_reason)? "" : ": "), pcmk__s(history->exit_reason, ""), last_change_s, history->exit_status, history->id); free(last_change_s); pcmk__clear_rsc_flags(history->rsc, pcmk_rsc_managed); pcmk__set_rsc_flags(history->rsc, pcmk_rsc_blocked); } /*! * \internal * \brief Update action history's execution status and why * * \param[in,out] history Parsed action history entry * \param[out] why Where to store reason for update * \param[in] value New value * \param[in] reason Description of why value was changed */ static inline void remap_because(struct action_history *history, const char **why, int value, const char *reason) { if (history->execution_status != value) { history->execution_status = value; *why = reason; } } /*! * \internal * \brief Remap informational monitor results and operation status * * For the monitor results, certain OCF codes are for providing extended information * to the user about services that aren't yet failed but not entirely healthy either. * These must be treated as the "normal" result by Pacemaker. * * For operation status, the action result can be used to determine an appropriate * status for the purposes of responding to the action. The status provided by the * executor is not directly usable since the executor does not know what was expected. * * \param[in,out] history Parsed action history entry * \param[in,out] on_fail What should be done about the result * \param[in] expired Whether result is expired * * \note If the result is remapped and the node is not shutting down or failed, * the operation will be recorded in the scheduler data's list of failed * operations to highlight it for the user. * * \note This may update the resource's current and next role. */ static void remap_operation(struct action_history *history, enum action_fail_response *on_fail, bool expired) { bool is_probe = false; int orig_exit_status = history->exit_status; int orig_exec_status = history->execution_status; const char *why = NULL; const char *task = history->task; // Remap degraded results to their successful counterparts history->exit_status = pcmk__effective_rc(history->exit_status); if (history->exit_status != orig_exit_status) { why = "degraded result"; if (!expired && (!history->node->details->shutdown || history->node->details->online)) { record_failed_op(history); } } if (!pe_rsc_is_bundled(history->rsc) && pcmk_xe_mask_probe_failure(history->xml) && ((history->execution_status != PCMK_EXEC_DONE) || (history->exit_status != PCMK_OCF_NOT_RUNNING))) { history->execution_status = PCMK_EXEC_DONE; history->exit_status = PCMK_OCF_NOT_RUNNING; why = "equivalent probe result"; } /* If the executor reported an execution status of anything but done or * error, consider that final. But for done or error, we know better whether * it should be treated as a failure or not, because we know the expected * result. */ switch (history->execution_status) { case PCMK_EXEC_DONE: case PCMK_EXEC_ERROR: break; // These should be treated as node-fatal case PCMK_EXEC_NO_FENCE_DEVICE: case PCMK_EXEC_NO_SECRETS: remap_because(history, &why, PCMK_EXEC_ERROR_HARD, "node-fatal error"); goto remap_done; default: goto remap_done; } is_probe = pcmk_xe_is_probe(history->xml); if (is_probe) { task = "probe"; } if (history->expected_exit_status < 0) { /* Pre-1.0 Pacemaker versions, and Pacemaker 1.1.6 or earlier with * Heartbeat 2.0.7 or earlier as the cluster layer, did not include the * expected exit status in the transition key, which (along with the * similar case of a corrupted transition key in the CIB) will be * reported to this function as -1. Pacemaker 2.0+ does not support * rolling upgrades from those versions or processing of saved CIB files * from those versions, so we do not need to care much about this case. */ remap_because(history, &why, PCMK_EXEC_ERROR, "obsolete history format"); pcmk__config_warn("Expected result not found for %s on %s " "(corrupt or obsolete CIB?)", history->key, pcmk__node_name(history->node)); } else if (history->exit_status == history->expected_exit_status) { remap_because(history, &why, PCMK_EXEC_DONE, "expected result"); } else { remap_because(history, &why, PCMK_EXEC_ERROR, "unexpected result"); pcmk__rsc_debug(history->rsc, "%s on %s: expected %d (%s), got %d (%s%s%s)", history->key, pcmk__node_name(history->node), history->expected_exit_status, services_ocf_exitcode_str(history->expected_exit_status), history->exit_status, services_ocf_exitcode_str(history->exit_status), (pcmk__str_empty(history->exit_reason)? "" : ": "), pcmk__s(history->exit_reason, "")); } switch (history->exit_status) { case PCMK_OCF_OK: if (is_probe && (history->expected_exit_status == PCMK_OCF_NOT_RUNNING)) { char *last_change_s = last_change_str(history->xml); remap_because(history, &why, PCMK_EXEC_DONE, "probe"); pcmk__rsc_info(history->rsc, "Probe found %s active on %s at %s", history->rsc->id, pcmk__node_name(history->node), last_change_s); free(last_change_s); } break; case PCMK_OCF_NOT_RUNNING: if (is_probe || (history->expected_exit_status == history->exit_status) || !pcmk_is_set(history->rsc->flags, pcmk_rsc_managed)) { /* For probes, recurring monitors for the Stopped role, and * unmanaged resources, "not running" is not considered a * failure. */ remap_because(history, &why, PCMK_EXEC_DONE, "exit status"); history->rsc->role = pcmk_role_stopped; *on_fail = pcmk_on_fail_ignore; pe__set_next_role(history->rsc, pcmk_role_unknown, "not running"); } break; case PCMK_OCF_RUNNING_PROMOTED: if (is_probe && (history->exit_status != history->expected_exit_status)) { char *last_change_s = last_change_str(history->xml); remap_because(history, &why, PCMK_EXEC_DONE, "probe"); pcmk__rsc_info(history->rsc, "Probe found %s active and promoted on %s at %s", history->rsc->id, pcmk__node_name(history->node), last_change_s); free(last_change_s); } if (!expired || (history->exit_status == history->expected_exit_status)) { history->rsc->role = pcmk_role_promoted; } break; case PCMK_OCF_FAILED_PROMOTED: if (!expired) { history->rsc->role = pcmk_role_promoted; } remap_because(history, &why, PCMK_EXEC_ERROR, "exit status"); break; case PCMK_OCF_NOT_CONFIGURED: remap_because(history, &why, PCMK_EXEC_ERROR_FATAL, "exit status"); break; case PCMK_OCF_UNIMPLEMENT_FEATURE: { guint interval_ms = 0; crm_element_value_ms(history->xml, PCMK_META_INTERVAL, &interval_ms); if (interval_ms == 0) { if (!expired) { block_if_unrecoverable(history); } remap_because(history, &why, PCMK_EXEC_ERROR_HARD, "exit status"); } else { remap_because(history, &why, PCMK_EXEC_NOT_SUPPORTED, "exit status"); } } break; case PCMK_OCF_NOT_INSTALLED: case PCMK_OCF_INVALID_PARAM: case PCMK_OCF_INSUFFICIENT_PRIV: if (!expired) { block_if_unrecoverable(history); } remap_because(history, &why, PCMK_EXEC_ERROR_HARD, "exit status"); break; default: if (history->execution_status == PCMK_EXEC_DONE) { char *last_change_s = last_change_str(history->xml); crm_info("Treating unknown exit status %d from %s of %s " "on %s at %s as failure", history->exit_status, task, history->rsc->id, pcmk__node_name(history->node), last_change_s); remap_because(history, &why, PCMK_EXEC_ERROR, "unknown exit status"); free(last_change_s); } break; } remap_done: if (why != NULL) { pcmk__rsc_trace(history->rsc, "Remapped %s result from [%s: %s] to [%s: %s] " "because of %s", history->key, pcmk_exec_status_str(orig_exec_status), crm_exit_str(orig_exit_status), pcmk_exec_status_str(history->execution_status), crm_exit_str(history->exit_status), why); } } // return TRUE if start or monitor last failure but parameters changed static bool should_clear_for_param_change(const xmlNode *xml_op, const char *task, pcmk_resource_t *rsc, pcmk_node_t *node) { if (pcmk__str_any_of(task, PCMK_ACTION_START, PCMK_ACTION_MONITOR, NULL)) { if (pe__bundle_needs_remote_name(rsc)) { /* We haven't allocated resources yet, so we can't reliably * substitute addr parameters for the REMOTE_CONTAINER_HACK. * When that's needed, defer the check until later. */ pe__add_param_check(xml_op, rsc, node, pcmk__check_last_failure, rsc->cluster); } else { pcmk__op_digest_t *digest_data = NULL; digest_data = rsc_action_digest_cmp(rsc, xml_op, node, rsc->cluster); switch (digest_data->rc) { case pcmk__digest_unknown: crm_trace("Resource %s history entry %s on %s" " has no digest to compare", rsc->id, pcmk__xe_history_key(xml_op), node->details->id); break; case pcmk__digest_match: break; default: return TRUE; } } } return FALSE; } // Order action after fencing of remote node, given connection rsc static void order_after_remote_fencing(pcmk_action_t *action, pcmk_resource_t *remote_conn, pcmk_scheduler_t *scheduler) { pcmk_node_t *remote_node = pe_find_node(scheduler->nodes, remote_conn->id); if (remote_node) { pcmk_action_t *fence = pe_fence_op(remote_node, NULL, TRUE, NULL, FALSE, scheduler); order_actions(fence, action, pcmk__ar_first_implies_then); } } static bool should_ignore_failure_timeout(const pcmk_resource_t *rsc, const char *task, guint interval_ms, bool is_last_failure) { /* Clearing failures of recurring monitors has special concerns. The * executor reports only changes in the monitor result, so if the * monitor is still active and still getting the same failure result, * that will go undetected after the failure is cleared. * * Also, the operation history will have the time when the recurring * monitor result changed to the given code, not the time when the * result last happened. * * @TODO We probably should clear such failures only when the failure * timeout has passed since the last occurrence of the failed result. * However we don't record that information. We could maybe approximate * that by clearing only if there is a more recent successful monitor or * stop result, but we don't even have that information at this point * since we are still unpacking the resource's operation history. * * This is especially important for remote connection resources with a * reconnect interval, so in that case, we skip clearing failures * if the remote node hasn't been fenced. */ if (rsc->remote_reconnect_ms && pcmk_is_set(rsc->cluster->flags, pcmk_sched_fencing_enabled) && (interval_ms != 0) && pcmk__str_eq(task, PCMK_ACTION_MONITOR, pcmk__str_casei)) { pcmk_node_t *remote_node = pe_find_node(rsc->cluster->nodes, rsc->id); if (remote_node && !remote_node->details->remote_was_fenced) { if (is_last_failure) { crm_info("Waiting to clear monitor failure for remote node %s" " until fencing has occurred", rsc->id); } return TRUE; } } return FALSE; } /*! * \internal * \brief Check operation age and schedule failure clearing when appropriate * * This function has two distinct purposes. The first is to check whether an * operation history entry is expired (i.e. the resource has a failure timeout, * the entry is older than the timeout, and the resource either has no fail * count or its fail count is entirely older than the timeout). The second is to * schedule fail count clearing when appropriate (i.e. the operation is expired * and either the resource has an expired fail count or the operation is a * last_failure for a remote connection resource with a reconnect interval, * or the operation is a last_failure for a start or monitor operation and the * resource's parameters have changed since the operation). * * \param[in,out] history Parsed action result history * * \return true if operation history entry is expired, otherwise false */ static bool check_operation_expiry(struct action_history *history) { bool expired = false; bool is_last_failure = pcmk__ends_with(history->id, "_last_failure_0"); time_t last_run = 0; int unexpired_fail_count = 0; const char *clear_reason = NULL; if (history->execution_status == PCMK_EXEC_NOT_INSTALLED) { pcmk__rsc_trace(history->rsc, "Resource history entry %s on %s is not expired: " "Not Installed does not expire", history->id, pcmk__node_name(history->node)); return false; // "Not installed" must always be cleared manually } if ((history->rsc->failure_timeout > 0) && (crm_element_value_epoch(history->xml, PCMK_XA_LAST_RC_CHANGE, &last_run) == 0)) { /* Resource has a PCMK_META_FAILURE_TIMEOUT and history entry has a * timestamp */ time_t now = get_effective_time(history->rsc->cluster); time_t last_failure = 0; // Is this particular operation history older than the failure timeout? if ((now >= (last_run + history->rsc->failure_timeout)) && !should_ignore_failure_timeout(history->rsc, history->task, history->interval_ms, is_last_failure)) { expired = true; } // Does the resource as a whole have an unexpired fail count? unexpired_fail_count = pe_get_failcount(history->node, history->rsc, &last_failure, pcmk__fc_effective, history->xml); // Update scheduler recheck time according to *last* failure crm_trace("%s@%lld is %sexpired @%lld with unexpired_failures=%d timeout=%ds" " last-failure@%lld", history->id, (long long) last_run, (expired? "" : "not "), (long long) now, unexpired_fail_count, history->rsc->failure_timeout, (long long) last_failure); last_failure += history->rsc->failure_timeout + 1; if (unexpired_fail_count && (now < last_failure)) { pe__update_recheck_time(last_failure, history->rsc->cluster, "fail count expiration"); } } if (expired) { if (pe_get_failcount(history->node, history->rsc, NULL, pcmk__fc_default, history->xml)) { // There is a fail count ignoring timeout if (unexpired_fail_count == 0) { // There is no fail count considering timeout clear_reason = "it expired"; } else { /* This operation is old, but there is an unexpired fail count. * In a properly functioning cluster, this should only be * possible if this operation is not a failure (otherwise the * fail count should be expired too), so this is really just a * failsafe. */ pcmk__rsc_trace(history->rsc, "Resource history entry %s on %s is not " "expired: Unexpired fail count", history->id, pcmk__node_name(history->node)); expired = false; } } else if (is_last_failure && (history->rsc->remote_reconnect_ms != 0)) { /* Clear any expired last failure when reconnect interval is set, * even if there is no fail count. */ clear_reason = "reconnect interval is set"; } } if (!expired && is_last_failure && should_clear_for_param_change(history->xml, history->task, history->rsc, history->node)) { clear_reason = "resource parameters have changed"; } if (clear_reason != NULL) { pcmk_action_t *clear_op = NULL; // Schedule clearing of the fail count clear_op = pe__clear_failcount(history->rsc, history->node, clear_reason, history->rsc->cluster); if (pcmk_is_set(history->rsc->cluster->flags, pcmk_sched_fencing_enabled) && (history->rsc->remote_reconnect_ms != 0)) { /* If we're clearing a remote connection due to a reconnect * interval, we want to wait until any scheduled fencing * completes. * * We could limit this to remote_node->details->unclean, but at * this point, that's always true (it won't be reliable until * after unpack_node_history() is done). */ crm_info("Clearing %s failure will wait until any scheduled " "fencing of %s completes", history->task, history->rsc->id); order_after_remote_fencing(clear_op, history->rsc, history->rsc->cluster); } } if (expired && (history->interval_ms == 0) && pcmk__str_eq(history->task, PCMK_ACTION_MONITOR, pcmk__str_none)) { switch (history->exit_status) { case PCMK_OCF_OK: case PCMK_OCF_NOT_RUNNING: case PCMK_OCF_RUNNING_PROMOTED: case PCMK_OCF_DEGRADED: case PCMK_OCF_DEGRADED_PROMOTED: // Don't expire probes that return these values pcmk__rsc_trace(history->rsc, "Resource history entry %s on %s is not " "expired: Probe result", history->id, pcmk__node_name(history->node)); expired = false; break; } } return expired; } int pe__target_rc_from_xml(const xmlNode *xml_op) { int target_rc = 0; const char *key = crm_element_value(xml_op, PCMK__XA_TRANSITION_KEY); if (key == NULL) { return -1; } decode_transition_key(key, NULL, NULL, NULL, &target_rc); return target_rc; } /*! * \internal * \brief Update a resource's state for an action result * * \param[in,out] history Parsed action history entry * \param[in] exit_status Exit status to base new state on * \param[in] last_failure Resource's last_failure entry, if known * \param[in,out] on_fail Resource's current failure handling */ static void update_resource_state(struct action_history *history, int exit_status, const xmlNode *last_failure, enum action_fail_response *on_fail) { bool clear_past_failure = false; if ((exit_status == PCMK_OCF_NOT_INSTALLED) || (!pe_rsc_is_bundled(history->rsc) && pcmk_xe_mask_probe_failure(history->xml))) { history->rsc->role = pcmk_role_stopped; } else if (exit_status == PCMK_OCF_NOT_RUNNING) { clear_past_failure = true; } else if (pcmk__str_eq(history->task, PCMK_ACTION_MONITOR, pcmk__str_none)) { if ((last_failure != NULL) && pcmk__str_eq(history->key, pcmk__xe_history_key(last_failure), pcmk__str_none)) { clear_past_failure = true; } if (history->rsc->role < pcmk_role_started) { set_active(history->rsc); } } else if (pcmk__str_eq(history->task, PCMK_ACTION_START, pcmk__str_none)) { history->rsc->role = pcmk_role_started; clear_past_failure = true; } else if (pcmk__str_eq(history->task, PCMK_ACTION_STOP, pcmk__str_none)) { history->rsc->role = pcmk_role_stopped; clear_past_failure = true; } else if (pcmk__str_eq(history->task, PCMK_ACTION_PROMOTE, pcmk__str_none)) { history->rsc->role = pcmk_role_promoted; clear_past_failure = true; } else if (pcmk__str_eq(history->task, PCMK_ACTION_DEMOTE, pcmk__str_none)) { if (*on_fail == pcmk_on_fail_demote) { // Demote clears an error only if on-fail=demote clear_past_failure = true; } history->rsc->role = pcmk_role_unpromoted; } else if (pcmk__str_eq(history->task, PCMK_ACTION_MIGRATE_FROM, pcmk__str_none)) { history->rsc->role = pcmk_role_started; clear_past_failure = true; } else if (pcmk__str_eq(history->task, PCMK_ACTION_MIGRATE_TO, pcmk__str_none)) { unpack_migrate_to_success(history); } else if (history->rsc->role < pcmk_role_started) { pcmk__rsc_trace(history->rsc, "%s active on %s", history->rsc->id, pcmk__node_name(history->node)); set_active(history->rsc); } if (!clear_past_failure) { return; } switch (*on_fail) { case pcmk_on_fail_stop: case pcmk_on_fail_ban: case pcmk_on_fail_standby_node: case pcmk_on_fail_fence_node: pcmk__rsc_trace(history->rsc, "%s (%s) is not cleared by a completed %s", history->rsc->id, fail2text(*on_fail), history->task); break; case pcmk_on_fail_block: case pcmk_on_fail_ignore: case pcmk_on_fail_demote: case pcmk_on_fail_restart: case pcmk_on_fail_restart_container: *on_fail = pcmk_on_fail_ignore; pe__set_next_role(history->rsc, pcmk_role_unknown, "clear past failures"); break; case pcmk_on_fail_reset_remote: if (history->rsc->remote_reconnect_ms == 0) { /* With no reconnect interval, the connection is allowed to * start again after the remote node is fenced and * completely stopped. (With a reconnect interval, we wait * for the failure to be cleared entirely before attempting * to reconnect.) */ *on_fail = pcmk_on_fail_ignore; pe__set_next_role(history->rsc, pcmk_role_unknown, "clear past failures and reset remote"); } break; } } /*! * \internal * \brief Check whether a given history entry matters for resource state * * \param[in] history Parsed action history entry * * \return true if action can affect resource state, otherwise false */ static inline bool can_affect_state(struct action_history *history) { #if 0 /* @COMPAT It might be better to parse only actions we know we're interested * in, rather than exclude a couple we don't. However that would be a * behavioral change that should be done at a major or minor series release. * Currently, unknown operations can affect whether a resource is considered * active and/or failed. */ return pcmk__str_any_of(history->task, PCMK_ACTION_MONITOR, PCMK_ACTION_START, PCMK_ACTION_STOP, PCMK_ACTION_PROMOTE, PCMK_ACTION_DEMOTE, PCMK_ACTION_MIGRATE_TO, PCMK_ACTION_MIGRATE_FROM, "asyncmon", NULL); #else return !pcmk__str_any_of(history->task, PCMK_ACTION_NOTIFY, PCMK_ACTION_META_DATA, NULL); #endif } /*! * \internal * \brief Unpack execution/exit status and exit reason from a history entry * * \param[in,out] history Action history entry to unpack * * \return Standard Pacemaker return code */ static int unpack_action_result(struct action_history *history) { if ((crm_element_value_int(history->xml, PCMK__XA_OP_STATUS, &(history->execution_status)) < 0) || (history->execution_status < PCMK_EXEC_PENDING) || (history->execution_status > PCMK_EXEC_MAX) || (history->execution_status == PCMK_EXEC_CANCELLED)) { pcmk__config_err("Ignoring resource history entry %s for %s on %s " "with invalid " PCMK__XA_OP_STATUS " '%s'", history->id, history->rsc->id, pcmk__node_name(history->node), pcmk__s(crm_element_value(history->xml, PCMK__XA_OP_STATUS), "")); return pcmk_rc_unpack_error; } if ((crm_element_value_int(history->xml, PCMK__XA_RC_CODE, &(history->exit_status)) < 0) || (history->exit_status < 0) || (history->exit_status > CRM_EX_MAX)) { #if 0 /* @COMPAT We should ignore malformed entries, but since that would * change behavior, it should be done at a major or minor series * release. */ pcmk__config_err("Ignoring resource history entry %s for %s on %s " "with invalid " PCMK__XA_RC_CODE " '%s'", history->id, history->rsc->id, pcmk__node_name(history->node), pcmk__s(crm_element_value(history->xml, PCMK__XA_RC_CODE), "")); return pcmk_rc_unpack_error; #else history->exit_status = CRM_EX_ERROR; #endif } history->exit_reason = crm_element_value(history->xml, PCMK_XA_EXIT_REASON); return pcmk_rc_ok; } /*! * \internal * \brief Process an action history entry whose result expired * * \param[in,out] history Parsed action history entry * \param[in] orig_exit_status Action exit status before remapping * * \return Standard Pacemaker return code (in particular, pcmk_rc_ok means the * entry needs no further processing) */ static int process_expired_result(struct action_history *history, int orig_exit_status) { if (!pe_rsc_is_bundled(history->rsc) && pcmk_xe_mask_probe_failure(history->xml) && (orig_exit_status != history->expected_exit_status)) { if (history->rsc->role <= pcmk_role_stopped) { history->rsc->role = pcmk_role_unknown; } crm_trace("Ignoring resource history entry %s for probe of %s on %s: " "Masked failure expired", history->id, history->rsc->id, pcmk__node_name(history->node)); return pcmk_rc_ok; } if (history->exit_status == history->expected_exit_status) { return pcmk_rc_undetermined; // Only failures expire } if (history->interval_ms == 0) { crm_notice("Ignoring resource history entry %s for %s of %s on %s: " "Expired failure", history->id, history->task, history->rsc->id, pcmk__node_name(history->node)); return pcmk_rc_ok; } if (history->node->details->online && !history->node->details->unclean) { /* Reschedule the recurring action. schedule_cancel() won't work at * this stage, so as a hacky workaround, forcibly change the restart * digest so pcmk__check_action_config() does what we want later. * * @TODO We should skip this if there is a newer successful monitor. * Also, this causes rescheduling only if the history entry * has a PCMK__XA_OP_DIGEST (which the expire-non-blocked-failure * scheduler regression test doesn't, but that may not be a * realistic scenario in production). */ crm_notice("Rescheduling %s-interval %s of %s on %s " "after failure expired", pcmk__readable_interval(history->interval_ms), history->task, history->rsc->id, pcmk__node_name(history->node)); crm_xml_add(history->xml, PCMK__XA_OP_RESTART_DIGEST, "calculated-failure-timeout"); return pcmk_rc_ok; } return pcmk_rc_undetermined; } /*! * \internal * \brief Process a masked probe failure * * \param[in,out] history Parsed action history entry * \param[in] orig_exit_status Action exit status before remapping * \param[in] last_failure Resource's last_failure entry, if known * \param[in,out] on_fail Resource's current failure handling */ static void mask_probe_failure(struct action_history *history, int orig_exit_status, const xmlNode *last_failure, enum action_fail_response *on_fail) { pcmk_resource_t *ban_rsc = history->rsc; if (!pcmk_is_set(history->rsc->flags, pcmk_rsc_unique)) { ban_rsc = uber_parent(history->rsc); } crm_notice("Treating probe result '%s' for %s on %s as 'not running'", services_ocf_exitcode_str(orig_exit_status), history->rsc->id, pcmk__node_name(history->node)); update_resource_state(history, history->expected_exit_status, last_failure, on_fail); crm_xml_add(history->xml, PCMK_XA_UNAME, history->node->details->uname); record_failed_op(history); resource_location(ban_rsc, history->node, -INFINITY, "masked-probe-failure", history->rsc->cluster); } /*! * \internal Check whether a given failure is for a given pending action * * \param[in] history Parsed history entry for pending action * \param[in] last_failure Resource's last_failure entry, if known * * \return true if \p last_failure is failure of pending action in \p history, * otherwise false * \note Both \p history and \p last_failure must come from the same * lrm_resource block, as node and resource are assumed to be the same. */ static bool failure_is_newer(const struct action_history *history, const xmlNode *last_failure) { guint failure_interval_ms = 0U; long long failure_change = 0LL; long long this_change = 0LL; if (last_failure == NULL) { return false; // Resource has no last_failure entry } if (!pcmk__str_eq(history->task, crm_element_value(last_failure, PCMK_XA_OPERATION), pcmk__str_none)) { return false; // last_failure is for different action } if ((crm_element_value_ms(last_failure, PCMK_META_INTERVAL, &failure_interval_ms) != pcmk_ok) || (history->interval_ms != failure_interval_ms)) { return false; // last_failure is for action with different interval } if ((pcmk__scan_ll(crm_element_value(history->xml, PCMK_XA_LAST_RC_CHANGE), &this_change, 0LL) != pcmk_rc_ok) || (pcmk__scan_ll(crm_element_value(last_failure, PCMK_XA_LAST_RC_CHANGE), &failure_change, 0LL) != pcmk_rc_ok) || (failure_change < this_change)) { return false; // Failure is not known to be newer } return true; } /*! * \internal * \brief Update a resource's role etc. for a pending action * * \param[in,out] history Parsed history entry for pending action * \param[in] last_failure Resource's last_failure entry, if known */ static void process_pending_action(struct action_history *history, const xmlNode *last_failure) { /* For recurring monitors, a failure is recorded only in RSC_last_failure_0, * and there might be a RSC_monitor_INTERVAL entry with the last successful * or pending result. * * If last_failure contains the failure of the pending recurring monitor * we're processing here, and is newer, the action is no longer pending. * (Pending results have call ID -1, which sorts last, so the last failure * if any should be known.) */ if (failure_is_newer(history, last_failure)) { return; } if (strcmp(history->task, PCMK_ACTION_START) == 0) { pcmk__set_rsc_flags(history->rsc, pcmk_rsc_start_pending); set_active(history->rsc); } else if (strcmp(history->task, PCMK_ACTION_PROMOTE) == 0) { history->rsc->role = pcmk_role_promoted; } else if ((strcmp(history->task, PCMK_ACTION_MIGRATE_TO) == 0) && history->node->details->unclean) { /* A migrate_to action is pending on a unclean source, so force a stop * on the target. */ const char *migrate_target = NULL; pcmk_node_t *target = NULL; migrate_target = crm_element_value(history->xml, PCMK__META_MIGRATE_TARGET); target = pe_find_node(history->rsc->cluster->nodes, migrate_target); if (target != NULL) { stop_action(history->rsc, target, FALSE); } } if (history->rsc->pending_task != NULL) { /* There should never be multiple pending actions, but as a failsafe, * just remember the first one processed for display purposes. */ return; } if (pcmk_is_probe(history->task, history->interval_ms)) { /* Pending probes are currently never displayed, even if pending * operations are requested. If we ever want to change that, * enable the below and the corresponding part of * native.c:native_pending_task(). */ #if 0 history->rsc->pending_task = strdup("probe"); history->rsc->pending_node = history->node; #endif } else { history->rsc->pending_task = strdup(history->task); history->rsc->pending_node = history->node; } } static void unpack_rsc_op(pcmk_resource_t *rsc, pcmk_node_t *node, xmlNode *xml_op, xmlNode **last_failure, enum action_fail_response *on_fail) { int old_rc = 0; bool expired = false; pcmk_resource_t *parent = rsc; enum rsc_role_e fail_role = pcmk_role_unknown; enum action_fail_response failure_strategy = pcmk_on_fail_restart; struct action_history history = { .rsc = rsc, .node = node, .xml = xml_op, .execution_status = PCMK_EXEC_UNKNOWN, }; CRM_CHECK(rsc && node && xml_op, return); history.id = ID(xml_op); if (history.id == NULL) { pcmk__config_err("Ignoring resource history entry for %s on %s " "without ID", rsc->id, pcmk__node_name(node)); return; } // Task and interval history.task = crm_element_value(xml_op, PCMK_XA_OPERATION); if (history.task == NULL) { pcmk__config_err("Ignoring resource history entry %s for %s on %s " "without " PCMK_XA_OPERATION, history.id, rsc->id, pcmk__node_name(node)); return; } crm_element_value_ms(xml_op, PCMK_META_INTERVAL, &(history.interval_ms)); if (!can_affect_state(&history)) { pcmk__rsc_trace(rsc, "Ignoring resource history entry %s for %s on %s " "with irrelevant action '%s'", history.id, rsc->id, pcmk__node_name(node), history.task); return; } if (unpack_action_result(&history) != pcmk_rc_ok) { return; // Error already logged } history.expected_exit_status = pe__target_rc_from_xml(xml_op); history.key = pcmk__xe_history_key(xml_op); crm_element_value_int(xml_op, PCMK__XA_CALL_ID, &(history.call_id)); pcmk__rsc_trace(rsc, "Unpacking %s (%s call %d on %s): %s (%s)", history.id, history.task, history.call_id, pcmk__node_name(node), pcmk_exec_status_str(history.execution_status), crm_exit_str(history.exit_status)); if (node->details->unclean) { pcmk__rsc_trace(rsc, "%s is running on %s, which is unclean (further action " "depends on value of stop's on-fail attribute)", rsc->id, pcmk__node_name(node)); } expired = check_operation_expiry(&history); old_rc = history.exit_status; remap_operation(&history, on_fail, expired); if (expired && (process_expired_result(&history, old_rc) == pcmk_rc_ok)) { goto done; } if (!pe_rsc_is_bundled(rsc) && pcmk_xe_mask_probe_failure(xml_op)) { mask_probe_failure(&history, old_rc, *last_failure, on_fail); goto done; } if (!pcmk_is_set(rsc->flags, pcmk_rsc_unique)) { parent = uber_parent(rsc); } switch (history.execution_status) { case PCMK_EXEC_PENDING: process_pending_action(&history, *last_failure); goto done; case PCMK_EXEC_DONE: update_resource_state(&history, history.exit_status, *last_failure, on_fail); goto done; case PCMK_EXEC_NOT_INSTALLED: unpack_failure_handling(&history, &failure_strategy, &fail_role); if (failure_strategy == pcmk_on_fail_ignore) { crm_warn("Cannot ignore failed %s of %s on %s: " "Resource agent doesn't exist " CRM_XS " status=%d rc=%d id=%s", history.task, rsc->id, pcmk__node_name(node), history.execution_status, history.exit_status, history.id); /* Also for printing it as "FAILED" by marking it as * pcmk_rsc_failed later */ *on_fail = pcmk_on_fail_ban; } resource_location(parent, node, -INFINITY, "hard-error", rsc->cluster); unpack_rsc_op_failure(&history, failure_strategy, fail_role, last_failure, on_fail); goto done; case PCMK_EXEC_NOT_CONNECTED: if (pe__is_guest_or_remote_node(node) && pcmk_is_set(node->details->remote_rsc->flags, pcmk_rsc_managed)) { /* We should never get into a situation where a managed remote * connection resource is considered OK but a resource action * behind the connection gets a "not connected" status. But as a * fail-safe in case a bug or unusual circumstances do lead to * that, ensure the remote connection is considered failed. */ pcmk__set_rsc_flags(node->details->remote_rsc, pcmk_rsc_failed|pcmk_rsc_stop_if_failed); } break; // Not done, do error handling case PCMK_EXEC_ERROR: case PCMK_EXEC_ERROR_HARD: case PCMK_EXEC_ERROR_FATAL: case PCMK_EXEC_TIMEOUT: case PCMK_EXEC_NOT_SUPPORTED: case PCMK_EXEC_INVALID: break; // Not done, do error handling default: // No other value should be possible at this point break; } unpack_failure_handling(&history, &failure_strategy, &fail_role); if ((failure_strategy == pcmk_on_fail_ignore) || ((failure_strategy == pcmk_on_fail_restart_container) && (strcmp(history.task, PCMK_ACTION_STOP) == 0))) { char *last_change_s = last_change_str(xml_op); crm_warn("Pretending failed %s (%s%s%s) of %s on %s at %s succeeded " CRM_XS " %s", history.task, services_ocf_exitcode_str(history.exit_status), (pcmk__str_empty(history.exit_reason)? "" : ": "), pcmk__s(history.exit_reason, ""), rsc->id, pcmk__node_name(node), last_change_s, history.id); free(last_change_s); update_resource_state(&history, history.expected_exit_status, *last_failure, on_fail); crm_xml_add(xml_op, PCMK_XA_UNAME, node->details->uname); pcmk__set_rsc_flags(rsc, pcmk_rsc_ignore_failure); record_failed_op(&history); if ((failure_strategy == pcmk_on_fail_restart_container) && cmp_on_fail(*on_fail, pcmk_on_fail_restart) <= 0) { *on_fail = failure_strategy; } } else { unpack_rsc_op_failure(&history, failure_strategy, fail_role, last_failure, on_fail); if (history.execution_status == PCMK_EXEC_ERROR_HARD) { uint8_t log_level = LOG_ERR; if (history.exit_status == PCMK_OCF_NOT_INSTALLED) { log_level = LOG_NOTICE; } do_crm_log(log_level, "Preventing %s from restarting on %s because " "of hard failure (%s%s%s) " CRM_XS " %s", parent->id, pcmk__node_name(node), services_ocf_exitcode_str(history.exit_status), (pcmk__str_empty(history.exit_reason)? "" : ": "), pcmk__s(history.exit_reason, ""), history.id); resource_location(parent, node, -INFINITY, "hard-error", rsc->cluster); } else if (history.execution_status == PCMK_EXEC_ERROR_FATAL) { pcmk__sched_err("Preventing %s from restarting anywhere because " "of fatal failure (%s%s%s) " CRM_XS " %s", parent->id, services_ocf_exitcode_str(history.exit_status), (pcmk__str_empty(history.exit_reason)? "" : ": "), pcmk__s(history.exit_reason, ""), history.id); resource_location(parent, NULL, -INFINITY, "fatal-error", rsc->cluster); } } done: pcmk__rsc_trace(rsc, "%s role on %s after %s is %s (next %s)", rsc->id, pcmk__node_name(node), history.id, pcmk_role_text(rsc->role), pcmk_role_text(rsc->next_role)); } static void add_node_attrs(const xmlNode *xml_obj, pcmk_node_t *node, bool overwrite, pcmk_scheduler_t *scheduler) { const char *cluster_name = NULL; pe_rule_eval_data_t rule_data = { .node_hash = NULL, .role = pcmk_role_unknown, .now = scheduler->now, .match_data = NULL, .rsc_data = NULL, .op_data = NULL }; g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_UNAME), strdup(node->details->uname)); g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_ID), strdup(node->details->id)); if (pcmk__str_eq(node->details->id, scheduler->dc_uuid, pcmk__str_casei)) { scheduler->dc_node = node; node->details->is_dc = TRUE; g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_IS_DC), strdup(PCMK_VALUE_TRUE)); } else { g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_IS_DC), strdup(PCMK_VALUE_FALSE)); } cluster_name = g_hash_table_lookup(scheduler->config_hash, PCMK_OPT_CLUSTER_NAME); if (cluster_name) { g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_CLUSTER_NAME), strdup(cluster_name)); } pe__unpack_dataset_nvpairs(xml_obj, PCMK_XE_INSTANCE_ATTRIBUTES, &rule_data, node->details->attrs, NULL, overwrite, scheduler); pe__unpack_dataset_nvpairs(xml_obj, PCMK_XE_UTILIZATION, &rule_data, node->details->utilization, NULL, FALSE, scheduler); if (pe_node_attribute_raw(node, CRM_ATTR_SITE_NAME) == NULL) { const char *site_name = pe_node_attribute_raw(node, "site-name"); if (site_name) { g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_SITE_NAME), strdup(site_name)); } else if (cluster_name) { /* Default to cluster-name if unset */ g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_SITE_NAME), strdup(cluster_name)); } } } static GList * extract_operations(const char *node, const char *rsc, xmlNode * rsc_entry, gboolean active_filter) { int counter = -1; int stop_index = -1; int start_index = -1; xmlNode *rsc_op = NULL; GList *gIter = NULL; GList *op_list = NULL; GList *sorted_op_list = NULL; /* extract operations */ op_list = NULL; sorted_op_list = NULL; for (rsc_op = pcmk__xe_first_child(rsc_entry); rsc_op != NULL; rsc_op = pcmk__xe_next(rsc_op)) { if (pcmk__str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, pcmk__str_none)) { crm_xml_add(rsc_op, "resource", rsc); crm_xml_add(rsc_op, PCMK_XA_UNAME, node); op_list = g_list_prepend(op_list, rsc_op); } } if (op_list == NULL) { /* if there are no operations, there is nothing to do */ return NULL; } sorted_op_list = g_list_sort(op_list, sort_op_by_callid); /* create active recurring operations as optional */ if (active_filter == FALSE) { return sorted_op_list; } op_list = NULL; calculate_active_ops(sorted_op_list, &start_index, &stop_index); for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; counter++; if (start_index < stop_index) { crm_trace("Skipping %s: not active", ID(rsc_entry)); break; } else if (counter < start_index) { crm_trace("Skipping %s: old", ID(rsc_op)); continue; } op_list = g_list_append(op_list, rsc_op); } g_list_free(sorted_op_list); return op_list; } GList * find_operations(const char *rsc, const char *node, gboolean active_filter, pcmk_scheduler_t *scheduler) { GList *output = NULL; GList *intermediate = NULL; xmlNode *tmp = NULL; xmlNode *status = find_xml_node(scheduler->input, PCMK_XE_STATUS, TRUE); pcmk_node_t *this_node = NULL; xmlNode *node_state = NULL; for (node_state = pcmk__xe_first_child(status); node_state != NULL; node_state = pcmk__xe_next(node_state)) { if (pcmk__str_eq((const char *) node_state->name, PCMK__XE_NODE_STATE, pcmk__str_none)) { const char *uname = crm_element_value(node_state, PCMK_XA_UNAME); if (node != NULL && !pcmk__str_eq(uname, node, pcmk__str_casei)) { continue; } this_node = pe_find_node(scheduler->nodes, uname); if(this_node == NULL) { CRM_LOG_ASSERT(this_node != NULL); continue; } else if (pe__is_guest_or_remote_node(this_node)) { determine_remote_online_status(scheduler, this_node); } else { determine_online_status(node_state, this_node, scheduler); } if (this_node->details->online || pcmk_is_set(scheduler->flags, pcmk_sched_fencing_enabled)) { /* offline nodes run no resources... * unless stonith is enabled in which case we need to * make sure rsc start events happen after the stonith */ xmlNode *lrm_rsc = NULL; - tmp = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE); + tmp = find_xml_node(node_state, PCMK__XE_LRM, FALSE); tmp = find_xml_node(tmp, XML_LRM_TAG_RESOURCES, FALSE); for (lrm_rsc = pcmk__xe_first_child(tmp); lrm_rsc != NULL; lrm_rsc = pcmk__xe_next(lrm_rsc)) { if (pcmk__str_eq((const char *)lrm_rsc->name, XML_LRM_TAG_RESOURCE, pcmk__str_none)) { const char *rsc_id = crm_element_value(lrm_rsc, PCMK_XA_ID); if (rsc != NULL && !pcmk__str_eq(rsc_id, rsc, pcmk__str_casei)) { continue; } intermediate = extract_operations(uname, rsc_id, lrm_rsc, active_filter); output = g_list_concat(output, intermediate); } } } } } return output; } diff --git a/tools/crm_mon.c b/tools/crm_mon.c index 2fbd29d2d5..acd8993444 100644 --- a/tools/crm_mon.c +++ b/tools/crm_mon.c @@ -1,2288 +1,2288 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include // pcmk__ends_with_ext() #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "crm_mon.h" #define SUMMARY "Provides a summary of cluster's current state.\n\n" \ "Outputs varying levels of detail in a number of different formats." /* * Definitions indicating which items to print */ static uint32_t show; static uint32_t show_opts = pcmk_show_pending; /* * Definitions indicating how to output */ static mon_output_format_t output_format = mon_output_unset; /* other globals */ static GIOChannel *io_channel = NULL; static GMainLoop *mainloop = NULL; static guint reconnect_timer = 0; static mainloop_timer_t *refresh_timer = NULL; static enum pcmk_pacemakerd_state pcmkd_state = pcmk_pacemakerd_state_invalid; static cib_t *cib = NULL; static stonith_t *st = NULL; static xmlNode *current_cib = NULL; static GError *error = NULL; static pcmk__common_args_t *args = NULL; static pcmk__output_t *out = NULL; static GOptionContext *context = NULL; static gchar **processed_args = NULL; static time_t last_refresh = 0; volatile crm_trigger_t *refresh_trigger = NULL; static enum pcmk__fence_history fence_history = pcmk__fence_history_none; int interactive_fence_level = 0; static pcmk__supported_format_t formats[] = { #if CURSES_ENABLED CRM_MON_SUPPORTED_FORMAT_CURSES, #endif PCMK__SUPPORTED_FORMAT_HTML, PCMK__SUPPORTED_FORMAT_NONE, PCMK__SUPPORTED_FORMAT_TEXT, PCMK__SUPPORTED_FORMAT_XML, { NULL, NULL, NULL } }; PCMK__OUTPUT_ARGS("crm-mon-disconnected", "const char *", "enum pcmk_pacemakerd_state") static int crm_mon_disconnected_default(pcmk__output_t *out, va_list args) { return pcmk_rc_no_output; } PCMK__OUTPUT_ARGS("crm-mon-disconnected", "const char *", "enum pcmk_pacemakerd_state") static int crm_mon_disconnected_html(pcmk__output_t *out, va_list args) { const char *desc = va_arg(args, const char *); enum pcmk_pacemakerd_state state = (enum pcmk_pacemakerd_state) va_arg(args, int); if (out->dest != stdout) { out->reset(out); } pcmk__output_create_xml_text_node(out, "span", "Not connected to CIB"); if (desc != NULL) { pcmk__output_create_xml_text_node(out, "span", ": "); pcmk__output_create_xml_text_node(out, "span", desc); } if (state != pcmk_pacemakerd_state_invalid) { const char *state_s = pcmk__pcmkd_state_enum2friendly(state); pcmk__output_create_xml_text_node(out, "span", " ("); pcmk__output_create_xml_text_node(out, "span", state_s); pcmk__output_create_xml_text_node(out, "span", ")"); } out->finish(out, CRM_EX_DISCONNECT, true, NULL); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("crm-mon-disconnected", "const char *", "enum pcmk_pacemakerd_state") static int crm_mon_disconnected_text(pcmk__output_t *out, va_list args) { const char *desc = va_arg(args, const char *); enum pcmk_pacemakerd_state state = (enum pcmk_pacemakerd_state) va_arg(args, int); int rc = pcmk_rc_ok; if (out->dest != stdout) { out->reset(out); } if (state != pcmk_pacemakerd_state_invalid) { rc = out->info(out, "Not connected to CIB%s%s (%s)", (desc != NULL)? ": " : "", pcmk__s(desc, ""), pcmk__pcmkd_state_enum2friendly(state)); } else { rc = out->info(out, "Not connected to CIB%s%s", (desc != NULL)? ": " : "", pcmk__s(desc, "")); } out->finish(out, CRM_EX_DISCONNECT, true, NULL); return rc; } PCMK__OUTPUT_ARGS("crm-mon-disconnected", "const char *", "enum pcmk_pacemakerd_state") static int crm_mon_disconnected_xml(pcmk__output_t *out, va_list args) { const char *desc = va_arg(args, const char *); enum pcmk_pacemakerd_state state = (enum pcmk_pacemakerd_state) va_arg(args, int); const char *state_s = NULL; if (out->dest != stdout) { out->reset(out); } if (state != pcmk_pacemakerd_state_invalid) { state_s = pcmk_pacemakerd_api_daemon_state_enum2text(state); } pcmk__output_create_xml_node(out, "crm-mon-disconnected", PCMK_XA_DESCRIPTION, desc, "pacemakerd-state", state_s, NULL); out->finish(out, CRM_EX_DISCONNECT, true, NULL); return pcmk_rc_ok; } static pcmk__message_entry_t fmt_functions[] = { { "crm-mon-disconnected", "default", crm_mon_disconnected_default }, { "crm-mon-disconnected", "html", crm_mon_disconnected_html }, { "crm-mon-disconnected", "text", crm_mon_disconnected_text }, { "crm-mon-disconnected", "xml", crm_mon_disconnected_xml }, { NULL, NULL, NULL }, }; /* Define exit codes for monitoring-compatible output * For nagios plugins, the possibilities are * OK=0, WARN=1, CRIT=2, and UNKNOWN=3 */ #define MON_STATUS_WARN CRM_EX_ERROR #define MON_STATUS_CRIT CRM_EX_INVALID_PARAM #define MON_STATUS_UNKNOWN CRM_EX_UNIMPLEMENT_FEATURE #define RECONNECT_MSECS 5000 struct { guint reconnect_ms; enum mon_exec_mode exec_mode; gboolean fence_connect; gboolean print_pending; gboolean show_bans; gboolean watch_fencing; char *pid_file; char *external_agent; char *external_recipient; char *neg_location_prefix; char *only_node; char *only_rsc; GSList *user_includes_excludes; GSList *includes_excludes; } options = { .reconnect_ms = RECONNECT_MSECS, .exec_mode = mon_exec_unset, .fence_connect = TRUE, }; static crm_exit_t clean_up(crm_exit_t exit_code); static void crm_diff_update(const char *event, xmlNode * msg); static void clean_up_on_connection_failure(int rc); static int mon_refresh_display(gpointer user_data); static int setup_cib_connection(void); static int setup_fencer_connection(void); static int setup_api_connections(void); static void mon_st_callback_event(stonith_t * st, stonith_event_t * e); static void mon_st_callback_display(stonith_t * st, stonith_event_t * e); static void refresh_after_event(gboolean data_updated, gboolean enforce); static uint32_t all_includes(mon_output_format_t fmt) { if (fmt == mon_output_monitor || fmt == mon_output_plain || fmt == mon_output_console) { return ~pcmk_section_options; } else { return pcmk_section_all; } } static uint32_t default_includes(mon_output_format_t fmt) { switch (fmt) { case mon_output_monitor: case mon_output_plain: case mon_output_console: case mon_output_html: case mon_output_cgi: return pcmk_section_summary |pcmk_section_nodes |pcmk_section_resources |pcmk_section_failures; case mon_output_xml: return all_includes(fmt); default: return 0; } } struct { const char *name; uint32_t bit; } sections[] = { { "attributes", pcmk_section_attributes }, { "bans", pcmk_section_bans }, { "counts", pcmk_section_counts }, { "dc", pcmk_section_dc }, { "failcounts", pcmk_section_failcounts }, { "failures", pcmk_section_failures }, { PCMK__VALUE_FENCING, pcmk_section_fencing_all }, { "fencing-failed", pcmk_section_fence_failed }, { "fencing-pending", pcmk_section_fence_pending }, { "fencing-succeeded", pcmk_section_fence_worked }, { "maint-mode", pcmk_section_maint_mode }, { "nodes", pcmk_section_nodes }, { "operations", pcmk_section_operations }, { "options", pcmk_section_options }, { "resources", pcmk_section_resources }, { "stack", pcmk_section_stack }, { "summary", pcmk_section_summary }, { "tickets", pcmk_section_tickets }, { "times", pcmk_section_times }, { NULL } }; static uint32_t find_section_bit(const char *name) { for (int i = 0; sections[i].name != NULL; i++) { if (pcmk__str_eq(sections[i].name, name, pcmk__str_casei)) { return sections[i].bit; } } return 0; } static gboolean apply_exclude(const gchar *excludes, GError **error) { char **parts = NULL; gboolean result = TRUE; parts = g_strsplit(excludes, ",", 0); for (char **s = parts; *s != NULL; s++) { uint32_t bit = find_section_bit(*s); if (pcmk__str_eq(*s, "all", pcmk__str_none)) { show = 0; } else if (pcmk__str_eq(*s, PCMK__VALUE_NONE, pcmk__str_none)) { show = all_includes(output_format); } else if (bit != 0) { show &= ~bit; } else { g_set_error(error, PCMK__EXITC_ERROR, CRM_EX_USAGE, "--exclude options: all, attributes, bans, counts, dc, " "failcounts, failures, fencing, fencing-failed, " "fencing-pending, fencing-succeeded, maint-mode, nodes, " PCMK__VALUE_NONE ", operations, options, resources, " "stack, summary, tickets, times"); result = FALSE; break; } } g_strfreev(parts); return result; } static gboolean apply_include(const gchar *includes, GError **error) { char **parts = NULL; gboolean result = TRUE; parts = g_strsplit(includes, ",", 0); for (char **s = parts; *s != NULL; s++) { uint32_t bit = find_section_bit(*s); if (pcmk__str_eq(*s, "all", pcmk__str_none)) { show = all_includes(output_format); } else if (pcmk__starts_with(*s, "bans")) { show |= pcmk_section_bans; if (options.neg_location_prefix != NULL) { free(options.neg_location_prefix); options.neg_location_prefix = NULL; } if (strlen(*s) > 4 && (*s)[4] == ':') { options.neg_location_prefix = strdup(*s+5); } } else if (pcmk__str_any_of(*s, "default", "defaults", NULL)) { show |= default_includes(output_format); } else if (pcmk__str_eq(*s, PCMK__VALUE_NONE, pcmk__str_none)) { show = 0; } else if (bit != 0) { show |= bit; } else { g_set_error(error, PCMK__EXITC_ERROR, CRM_EX_USAGE, "--include options: all, attributes, bans[:PREFIX], counts, dc, " "default, failcounts, failures, fencing, fencing-failed, " "fencing-pending, fencing-succeeded, maint-mode, nodes, " PCMK__VALUE_NONE ", operations, options, resources, " "stack, summary, tickets, times"); result = FALSE; break; } } g_strfreev(parts); return result; } static gboolean apply_include_exclude(GSList *lst, GError **error) { gboolean rc = TRUE; GSList *node = lst; while (node != NULL) { char *s = node->data; if (pcmk__starts_with(s, "--include=")) { rc = apply_include(s+10, error); } else if (pcmk__starts_with(s, "-I=")) { rc = apply_include(s+3, error); } else if (pcmk__starts_with(s, "--exclude=")) { rc = apply_exclude(s+10, error); } else if (pcmk__starts_with(s, "-U=")) { rc = apply_exclude(s+3, error); } if (rc != TRUE) { break; } node = node->next; } return rc; } static gboolean user_include_exclude_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { char *s = crm_strdup_printf("%s=%s", option_name, optarg); options.user_includes_excludes = g_slist_append(options.user_includes_excludes, s); return TRUE; } static gboolean include_exclude_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { char *s = crm_strdup_printf("%s=%s", option_name, optarg); options.includes_excludes = g_slist_append(options.includes_excludes, s); return TRUE; } static gboolean as_cgi_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { pcmk__str_update(&args->output_ty, "html"); output_format = mon_output_cgi; options.exec_mode = mon_exec_one_shot; return TRUE; } static gboolean as_html_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { pcmk__str_update(&args->output_dest, optarg); pcmk__str_update(&args->output_ty, "html"); output_format = mon_output_html; umask(S_IWGRP | S_IWOTH); // World-readable HTML return TRUE; } static gboolean as_simple_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { pcmk__str_update(&args->output_ty, "text"); output_format = mon_output_monitor; options.exec_mode = mon_exec_one_shot; return TRUE; } static gboolean as_xml_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { pcmk__str_update(&args->output_ty, "xml"); output_format = mon_output_legacy_xml; return TRUE; } static gboolean fence_history_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { if (optarg == NULL) { interactive_fence_level = 2; } else { pcmk__scan_min_int(optarg, &interactive_fence_level, 0); } switch (interactive_fence_level) { case 3: options.fence_connect = TRUE; fence_history = pcmk__fence_history_full; return include_exclude_cb("--include", PCMK__VALUE_FENCING, data, err); case 2: options.fence_connect = TRUE; fence_history = pcmk__fence_history_full; return include_exclude_cb("--include", PCMK__VALUE_FENCING, data, err); case 1: options.fence_connect = TRUE; fence_history = pcmk__fence_history_full; return include_exclude_cb("--include", "fencing-failed,fencing-pending", data, err); case 0: options.fence_connect = FALSE; fence_history = pcmk__fence_history_none; return include_exclude_cb("--exclude", PCMK__VALUE_FENCING, data, err); default: g_set_error(err, PCMK__EXITC_ERROR, CRM_EX_INVALID_PARAM, "Fence history must be 0-3"); return FALSE; } } static gboolean group_by_node_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { show_opts |= pcmk_show_rscs_by_node; return TRUE; } static gboolean hide_headers_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { return user_include_exclude_cb("--exclude", "summary", data, err); } static gboolean inactive_resources_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { show_opts |= pcmk_show_inactive_rscs; return TRUE; } static gboolean no_curses_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { pcmk__str_update(&args->output_ty, "text"); output_format = mon_output_plain; return TRUE; } static gboolean print_brief_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { show_opts |= pcmk_show_brief; return TRUE; } static gboolean print_detail_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { show_opts |= pcmk_show_details; return TRUE; } static gboolean print_description_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { show_opts |= pcmk_show_description; return TRUE; } static gboolean print_timing_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { show_opts |= pcmk_show_timing; return user_include_exclude_cb("--include", "operations", data, err); } static gboolean reconnect_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { int rc = crm_get_msec(optarg); if (rc == -1) { g_set_error(err, PCMK__EXITC_ERROR, CRM_EX_INVALID_PARAM, "Invalid value for -i: %s", optarg); return FALSE; } else { pcmk_parse_interval_spec(optarg, &options.reconnect_ms); if (options.exec_mode != mon_exec_daemonized) { // Reconnect interval applies to daemonized too, so don't override options.exec_mode = mon_exec_update; } } return TRUE; } /*! * \internal * \brief Enable one-shot mode * * \param[in] option_name Name of option being parsed (ignored) * \param[in] optarg Value to be parsed (ignored) * \param[in] data User data (ignored) * \param[out] err Where to store error (ignored) */ static gboolean one_shot_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { options.exec_mode = mon_exec_one_shot; return TRUE; } /*! * \internal * \brief Enable daemonized mode * * \param[in] option_name Name of option being parsed (ignored) * \param[in] optarg Value to be parsed (ignored) * \param[in] data User data (ignored) * \param[out] err Where to store error (ignored) */ static gboolean daemonize_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { options.exec_mode = mon_exec_daemonized; return TRUE; } static gboolean show_attributes_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { return user_include_exclude_cb("--include", "attributes", data, err); } static gboolean show_bans_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { if (optarg != NULL) { char *s = crm_strdup_printf("bans:%s", optarg); gboolean rc = user_include_exclude_cb("--include", s, data, err); free(s); return rc; } else { return user_include_exclude_cb("--include", "bans", data, err); } } static gboolean show_failcounts_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { return user_include_exclude_cb("--include", "failcounts", data, err); } static gboolean show_operations_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { return user_include_exclude_cb("--include", "failcounts,operations", data, err); } static gboolean show_tickets_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { return user_include_exclude_cb("--include", "tickets", data, err); } static gboolean use_cib_file_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { setenv("CIB_file", optarg, 1); options.exec_mode = mon_exec_one_shot; return TRUE; } #define INDENT " " /* *INDENT-OFF* */ static GOptionEntry addl_entries[] = { { "interval", 'i', 0, G_OPTION_ARG_CALLBACK, reconnect_cb, "Update frequency (default is 5 seconds)", "TIMESPEC" }, { "one-shot", '1', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, one_shot_cb, "Display the cluster status once and exit", NULL }, { "daemonize", 'd', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, daemonize_cb, "Run in the background as a daemon.\n" INDENT "Requires at least one of --output-to and --external-agent.", NULL }, { "pid-file", 'p', 0, G_OPTION_ARG_FILENAME, &options.pid_file, "(Advanced) Daemon pid file location", "FILE" }, { "external-agent", 'E', 0, G_OPTION_ARG_FILENAME, &options.external_agent, "A program to run when resource operations take place", "FILE" }, { "external-recipient", 'e', 0, G_OPTION_ARG_STRING, &options.external_recipient, "A recipient for your program (assuming you want the program to send something to someone).", "RCPT" }, { "watch-fencing", 'W', 0, G_OPTION_ARG_NONE, &options.watch_fencing, "Listen for fencing events. For use with --external-agent.", NULL }, { "xml-file", 'x', G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_CALLBACK, use_cib_file_cb, NULL, NULL }, { NULL } }; static GOptionEntry display_entries[] = { { "include", 'I', 0, G_OPTION_ARG_CALLBACK, user_include_exclude_cb, "A list of sections to include in the output.\n" INDENT "See `Output Control` help for more information.", "SECTION(s)" }, { "exclude", 'U', 0, G_OPTION_ARG_CALLBACK, user_include_exclude_cb, "A list of sections to exclude from the output.\n" INDENT "See `Output Control` help for more information.", "SECTION(s)" }, { "node", 0, 0, G_OPTION_ARG_STRING, &options.only_node, "When displaying information about nodes, show only what's related to the given\n" INDENT "node, or to all nodes tagged with the given tag", "NODE" }, { "resource", 0, 0, G_OPTION_ARG_STRING, &options.only_rsc, "When displaying information about resources, show only what's related to the given\n" INDENT "resource, or to all resources tagged with the given tag", "RSC" }, { "group-by-node", 'n', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, group_by_node_cb, "Group resources by node", NULL }, { "inactive", 'r', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, inactive_resources_cb, "Display inactive resources", NULL }, { "failcounts", 'f', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, show_failcounts_cb, "Display resource fail counts", NULL }, { "operations", 'o', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, show_operations_cb, "Display resource operation history", NULL }, { "timing-details", 't', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, print_timing_cb, "Display resource operation history with timing details", NULL }, { "tickets", 'c', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, show_tickets_cb, "Display cluster tickets", NULL }, { "fence-history", 'm', G_OPTION_FLAG_OPTIONAL_ARG, G_OPTION_ARG_CALLBACK, fence_history_cb, "Show fence history:\n" INDENT "0=off, 1=failures and pending (default without option),\n" INDENT "2=add successes (default without value for option),\n" INDENT "3=show full history without reduction to most recent of each flavor", "LEVEL" }, { "neg-locations", 'L', G_OPTION_FLAG_OPTIONAL_ARG, G_OPTION_ARG_CALLBACK, show_bans_cb, "Display negative location constraints [optionally filtered by id prefix]", NULL }, { "show-node-attributes", 'A', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, show_attributes_cb, "Display node attributes", NULL }, { "hide-headers", 'D', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, hide_headers_cb, "Hide all headers", NULL }, { "show-detail", 'R', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, print_detail_cb, "Show more details (node IDs, individual clone instances)", NULL }, { "show-description", 0, G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, print_description_cb, "Show resource descriptions", NULL }, { "brief", 'b', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, print_brief_cb, "Brief output", NULL }, { "pending", 'j', G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_NONE, &options.print_pending, "Display pending state if '" PCMK_META_RECORD_PENDING "' is enabled", NULL }, { NULL } }; static GOptionEntry deprecated_entries[] = { { "as-html", 'h', G_OPTION_FLAG_FILENAME, G_OPTION_ARG_CALLBACK, as_html_cb, "Write cluster status to the named HTML file.\n" INDENT "Use --output-as=html --output-to=FILE instead.", "FILE" }, { "as-xml", 'X', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, as_xml_cb, "Write cluster status as XML to stdout. This will enable one-shot mode.\n" INDENT "Use --output-as=xml instead.", NULL }, { "simple-status", 's', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, as_simple_cb, "Display the cluster status once as a simple one line output\n" INDENT "(suitable for nagios)", NULL }, { "disable-ncurses", 'N', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, no_curses_cb, "Disable the use of ncurses.\n" INDENT "Use --output-as=text instead.", NULL }, { "web-cgi", 'w', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, as_cgi_cb, "Web mode with output suitable for CGI (preselected when run as *.cgi).\n" INDENT "Use --output-as=html --html-cgi instead.", NULL }, { NULL } }; /* *INDENT-ON* */ /* Reconnect to the CIB and fencing agent after reconnect_ms has passed. This sounds * like it would be more broadly useful, but only ever happens after a disconnect via * mon_cib_connection_destroy. */ static gboolean reconnect_after_timeout(gpointer data) { #if CURSES_ENABLED if (output_format == mon_output_console) { clear(); refresh(); } #endif out->transient(out, "Reconnecting..."); if (setup_api_connections() == pcmk_rc_ok) { // Trigger redrawing the screen (needs reconnect_timer == 0) reconnect_timer = 0; refresh_after_event(FALSE, TRUE); return G_SOURCE_REMOVE; } out->message(out, "crm-mon-disconnected", "Latest connection attempt failed", pcmkd_state); reconnect_timer = g_timeout_add(options.reconnect_ms, reconnect_after_timeout, NULL); return G_SOURCE_REMOVE; } /* Called from various places when we are disconnected from the CIB or from the * fencing agent. If the CIB connection is still valid, this function will also * attempt to sign off and reconnect. */ static void mon_cib_connection_destroy(gpointer user_data) { const char *msg = "Connection to the cluster lost"; pcmkd_state = pcmk_pacemakerd_state_invalid; /* No crm-mon-disconnected message for console; a working implementation * is not currently worth the effort */ out->transient(out, "%s", msg); out->message(out, "crm-mon-disconnected", msg, pcmkd_state); if (refresh_timer != NULL) { /* we'll trigger a refresh after reconnect */ mainloop_timer_stop(refresh_timer); } if (reconnect_timer) { /* we'll trigger a new reconnect-timeout at the end */ g_source_remove(reconnect_timer); reconnect_timer = 0; } /* the client API won't properly reconnect notifications if they are still * in the table - so remove them */ stonith_api_delete(st); st = NULL; if (cib) { cib->cmds->signoff(cib); reconnect_timer = g_timeout_add(options.reconnect_ms, reconnect_after_timeout, NULL); } } /* Signal handler installed into the mainloop for normal program shutdown */ static void mon_shutdown(int nsig) { clean_up(CRM_EX_OK); } #if CURSES_ENABLED static volatile sighandler_t ncurses_winch_handler; /* Signal handler installed the regular way (not into the main loop) for when * the screen is resized. Commonly, this happens when running in an xterm and * the user changes its size. */ static void mon_winresize(int nsig) { static int not_done; int lines = 0, cols = 0; if (!not_done++) { if (ncurses_winch_handler) /* the original ncurses WINCH signal handler does the * magic of retrieving the new window size; * otherwise, we'd have to use ioctl or tgetent */ (*ncurses_winch_handler) (SIGWINCH); getmaxyx(stdscr, lines, cols); resizeterm(lines, cols); /* Alert the mainloop code we'd like the refresh_trigger to run next * time the mainloop gets around to checking. */ mainloop_set_trigger((crm_trigger_t *) refresh_trigger); } not_done--; } #endif static int setup_fencer_connection(void) { int rc = pcmk_ok; if (options.fence_connect && st == NULL) { st = stonith_api_new(); } if (!options.fence_connect || st == NULL || st->state != stonith_disconnected) { return rc; } rc = st->cmds->connect(st, crm_system_name, NULL); if (rc == pcmk_ok) { crm_trace("Setting up stonith callbacks"); if (options.watch_fencing) { st->cmds->register_notification(st, T_STONITH_NOTIFY_DISCONNECT, mon_st_callback_event); st->cmds->register_notification(st, T_STONITH_NOTIFY_FENCE, mon_st_callback_event); } else { st->cmds->register_notification(st, T_STONITH_NOTIFY_DISCONNECT, mon_st_callback_display); st->cmds->register_notification(st, T_STONITH_NOTIFY_HISTORY, mon_st_callback_display); } } else { stonith_api_delete(st); st = NULL; } return rc; } static int setup_cib_connection(void) { int rc = pcmk_rc_ok; CRM_CHECK(cib != NULL, return EINVAL); if (cib->state != cib_disconnected) { // Already connected with notifications registered for CIB updates return rc; } rc = cib__signon_query(out, &cib, ¤t_cib); if (rc == pcmk_rc_ok) { rc = pcmk_legacy2rc(cib->cmds->set_connection_dnotify(cib, mon_cib_connection_destroy)); if (rc == EPROTONOSUPPORT) { out->err(out, "CIB client does not support connection loss " "notifications; crm_mon will be unable to reconnect after " "connection loss"); rc = pcmk_rc_ok; } if (rc == pcmk_rc_ok) { cib->cmds->del_notify_callback(cib, T_CIB_DIFF_NOTIFY, crm_diff_update); rc = pcmk_legacy2rc(cib->cmds->add_notify_callback(cib, T_CIB_DIFF_NOTIFY, crm_diff_update)); } if (rc != pcmk_rc_ok) { if (rc == EPROTONOSUPPORT) { out->err(out, "CIB client does not support CIB diff " "notifications"); } else { out->err(out, "CIB diff notification setup failed"); } out->err(out, "Cannot monitor CIB changes; exiting"); cib__clean_up_connection(&cib); stonith_api_delete(st); st = NULL; } } return rc; } /* This is used to set up the fencing options after the interactive UI has been stared. * fence_history_cb can't be used because it builds up a list of includes/excludes that * then have to be processed with apply_include_exclude and that could affect other * things. */ static void set_fencing_options(int level) { switch (level) { case 3: options.fence_connect = TRUE; fence_history = pcmk__fence_history_full; show |= pcmk_section_fencing_all; break; case 2: options.fence_connect = TRUE; fence_history = pcmk__fence_history_full; show |= pcmk_section_fencing_all; break; case 1: options.fence_connect = TRUE; fence_history = pcmk__fence_history_full; show |= pcmk_section_fence_failed | pcmk_section_fence_pending; break; default: interactive_fence_level = 0; options.fence_connect = FALSE; fence_history = pcmk__fence_history_none; show &= ~pcmk_section_fencing_all; break; } } static int setup_api_connections(void) { int rc = pcmk_rc_ok; CRM_CHECK(cib != NULL, return EINVAL); if (cib->state != cib_disconnected) { return rc; } if (cib->variant == cib_native) { rc = pcmk__pacemakerd_status(out, crm_system_name, options.reconnect_ms / 2, false, &pcmkd_state); if (rc != pcmk_rc_ok) { return rc; } switch (pcmkd_state) { case pcmk_pacemakerd_state_running: case pcmk_pacemakerd_state_remote: case pcmk_pacemakerd_state_shutting_down: /* Fencer and CIB may still be available while shutting down or * running on a Pacemaker Remote node */ break; default: // Fencer and CIB are definitely unavailable return ENOTCONN; } setup_fencer_connection(); } rc = setup_cib_connection(); return rc; } #if CURSES_ENABLED static const char * get_option_desc(char c) { const char *desc = "No help available"; for (GOptionEntry *entry = display_entries; entry != NULL; entry++) { if (entry->short_name == c) { desc = entry->description; break; } } return desc; } #define print_option_help(out, option, condition) \ curses_formatted_printf(out, "%c %c: \t%s\n", ((condition)? '*': ' '), option, get_option_desc(option)); /* This function is called from the main loop when there is something to be read * on stdin, like an interactive user's keystroke. All it does is read the keystroke, * set flags (or show the page showing which keystrokes are valid), and redraw the * screen. It does not do anything with connections to the CIB or fencing agent * agent what would happen in mon_refresh_display. */ static gboolean detect_user_input(GIOChannel *channel, GIOCondition condition, gpointer user_data) { int c; gboolean config_mode = FALSE; while (1) { /* Get user input */ c = getchar(); switch (c) { case 'm': interactive_fence_level++; if (interactive_fence_level > 3) { interactive_fence_level = 0; } set_fencing_options(interactive_fence_level); break; case 'c': show ^= pcmk_section_tickets; break; case 'f': show ^= pcmk_section_failcounts; break; case 'n': show_opts ^= pcmk_show_rscs_by_node; break; case 'o': show ^= pcmk_section_operations; if (!pcmk_is_set(show, pcmk_section_operations)) { show_opts &= ~pcmk_show_timing; } break; case 'r': show_opts ^= pcmk_show_inactive_rscs; break; case 'R': show_opts ^= pcmk_show_details; #ifdef PCMK__COMPAT_2_0 // Keep failed action output the same as 2.0.x show_opts |= pcmk_show_failed_detail; #endif break; case 't': show_opts ^= pcmk_show_timing; if (pcmk_is_set(show_opts, pcmk_show_timing)) { show |= pcmk_section_operations; } break; case 'A': show ^= pcmk_section_attributes; break; case 'L': show ^= pcmk_section_bans; break; case 'D': /* If any header is shown, clear them all, otherwise set them all */ if (pcmk_any_flags_set(show, pcmk_section_summary)) { show &= ~pcmk_section_summary; } else { show |= pcmk_section_summary; } /* Regardless, we don't show options in console mode. */ show &= ~pcmk_section_options; break; case 'b': show_opts ^= pcmk_show_brief; break; case 'j': show_opts ^= pcmk_show_pending; break; case '?': config_mode = TRUE; break; default: /* All other keys just redraw the screen. */ goto refresh; } if (!config_mode) goto refresh; clear(); refresh(); curses_formatted_printf(out, "%s", "Display option change mode\n"); print_option_help(out, 'c', pcmk_is_set(show, pcmk_section_tickets)); print_option_help(out, 'f', pcmk_is_set(show, pcmk_section_failcounts)); print_option_help(out, 'n', pcmk_is_set(show_opts, pcmk_show_rscs_by_node)); print_option_help(out, 'o', pcmk_is_set(show, pcmk_section_operations)); print_option_help(out, 'r', pcmk_is_set(show_opts, pcmk_show_inactive_rscs)); print_option_help(out, 't', pcmk_is_set(show_opts, pcmk_show_timing)); print_option_help(out, 'A', pcmk_is_set(show, pcmk_section_attributes)); print_option_help(out, 'L', pcmk_is_set(show, pcmk_section_bans)); print_option_help(out, 'D', !pcmk_is_set(show, pcmk_section_summary)); #ifdef PCMK__COMPAT_2_0 print_option_help(out, 'R', pcmk_any_flags_set(show_opts, pcmk_show_details & ~pcmk_show_failed_detail)); #else print_option_help(out, 'R', pcmk_any_flags_set(show_opts, pcmk_show_details)); #endif print_option_help(out, 'b', pcmk_is_set(show_opts, pcmk_show_brief)); print_option_help(out, 'j', pcmk_is_set(show_opts, pcmk_show_pending)); curses_formatted_printf(out, "%d m: \t%s\n", interactive_fence_level, get_option_desc('m')); curses_formatted_printf(out, "%s", "\nToggle fields via field letter, type any other key to return\n"); } refresh: refresh_after_event(FALSE, TRUE); return TRUE; } #endif // CURSES_ENABLED // Basically crm_signal_handler(SIGCHLD, SIG_IGN) plus the SA_NOCLDWAIT flag static void avoid_zombies(void) { struct sigaction sa; memset(&sa, 0, sizeof(struct sigaction)); if (sigemptyset(&sa.sa_mask) < 0) { crm_warn("Cannot avoid zombies: %s", pcmk_rc_str(errno)); return; } sa.sa_handler = SIG_IGN; sa.sa_flags = SA_RESTART|SA_NOCLDWAIT; if (sigaction(SIGCHLD, &sa, NULL) < 0) { crm_warn("Cannot avoid zombies: %s", pcmk_rc_str(errno)); } } static GOptionContext * build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) { GOptionContext *context = NULL; GOptionEntry extra_prog_entries[] = { { "quiet", 'Q', 0, G_OPTION_ARG_NONE, &(args->quiet), "Be less descriptive in output.", NULL }, { NULL } }; #if CURSES_ENABLED const char *fmts = "console (default), html, text, xml, none"; #else const char *fmts = "text (default), html, xml, none"; #endif // CURSES_ENABLED const char *desc = NULL; desc = "Notes:\n\n" "If this program is called as crm_mon.cgi, --output-as=html and\n" "--html-cgi are automatically added to the command line\n" "arguments.\n\n" "Time Specification:\n\n" "The TIMESPEC in any command line option can be specified in many\n" "different formats. It can be an integer number of seconds, a\n" "number plus units (us/usec/ms/msec/s/sec/m/min/h/hr), or an ISO\n" "8601 period specification.\n\n" "Output Control:\n\n" "By default, a particular set of sections are written to the\n" "output destination. The default varies based on the output\n" "format: XML includes all sections by default, while other output\n" "formats include less. This set can be modified with the --include\n" "and --exclude command line options. Each option may be passed\n" "multiple times, and each can specify a comma-separated list of\n" "sections. The options are applied to the default set, in order\n" "from left to right as they are passed on the command line. For a\n" "list of valid sections, pass --include=list or --exclude=list.\n\n" "Interactive Use:\n\n" #if CURSES_ENABLED "When run interactively, crm_mon can be told to hide and show\n" "various sections of output. To see a help screen explaining the\n" "options, press '?'. Any key stroke aside from those listed will\n" "cause the screen to refresh.\n\n" #else "The local installation of Pacemaker was built without support for\n" "interactive (console) mode. A curses library must be available at\n" "build time to support interactive mode.\n\n" #endif // CURSES_ENABLED "Examples:\n\n" #if CURSES_ENABLED "Display the cluster status on the console with updates as they\n" "occur:\n\n" "\tcrm_mon\n\n" #endif // CURSES_ENABLED "Display the cluster status once and exit:\n\n" "\tcrm_mon -1\n\n" "Display the cluster status, group resources by node, and include\n" "inactive resources in the list:\n\n" "\tcrm_mon --group-by-node --inactive\n\n" "Start crm_mon as a background daemon and have it write the\n" "cluster status to an HTML file:\n\n" "\tcrm_mon --daemonize --output-as html " "--output-to /path/to/docroot/filename.html\n\n" "Display the cluster status as XML:\n\n" "\tcrm_mon --output-as xml\n\n"; context = pcmk__build_arg_context(args, fmts, group, NULL); pcmk__add_main_args(context, extra_prog_entries); g_option_context_set_description(context, desc); pcmk__add_arg_group(context, "display", "Display Options:", "Show display options", display_entries); pcmk__add_arg_group(context, "additional", "Additional Options:", "Show additional options", addl_entries); pcmk__add_arg_group(context, "deprecated", "Deprecated Options:", "Show deprecated options", deprecated_entries); return context; } /* If certain format options were specified, we want to set some extra * options. We can just process these like they were given on the * command line. */ static void add_output_args(void) { GError *err = NULL; if (output_format == mon_output_plain) { if (!pcmk__force_args(context, &err, "%s --text-fancy", g_get_prgname())) { g_propagate_error(&error, err); clean_up(CRM_EX_USAGE); } } else if (output_format == mon_output_cgi) { if (!pcmk__force_args(context, &err, "%s --html-cgi", g_get_prgname())) { g_propagate_error(&error, err); clean_up(CRM_EX_USAGE); } } else if (output_format == mon_output_xml) { if (!pcmk__force_args(context, &err, "%s --xml-simple-list --xml-substitute", g_get_prgname())) { g_propagate_error(&error, err); clean_up(CRM_EX_USAGE); } } else if (output_format == mon_output_legacy_xml) { output_format = mon_output_xml; if (!pcmk__force_args(context, &err, "%s --xml-legacy --xml-substitute", g_get_prgname())) { g_propagate_error(&error, err); clean_up(CRM_EX_USAGE); } } } /*! * \internal * \brief Set output format based on \p --output-as arguments and mode arguments * * When the deprecated output format arguments (\p --as-cgi, \p --as-html, * \p --simple-status, \p --as-xml) are parsed, callback functions set * \p output_format (and the umask if appropriate). If none of the deprecated * arguments were specified, this function does the same based on the current * \p --output-as arguments and the \p --one-shot and \p --daemonize arguments. * * \param[in,out] args Command line arguments */ static void reconcile_output_format(pcmk__common_args_t *args) { if (output_format != mon_output_unset) { /* One of the deprecated arguments was used, and we're finished. Note * that this means the deprecated arguments take precedence. */ return; } if (pcmk__str_eq(args->output_ty, "none", pcmk__str_none)) { output_format = mon_output_none; } else if (pcmk__str_eq(args->output_ty, "html", pcmk__str_none)) { output_format = mon_output_html; umask(S_IWGRP | S_IWOTH); // World-readable HTML } else if (pcmk__str_eq(args->output_ty, "xml", pcmk__str_none)) { output_format = mon_output_xml; #if CURSES_ENABLED } else if (pcmk__str_eq(args->output_ty, "console", pcmk__str_null_matches)) { /* Console is the default format if no conflicting options are given. * * Use text output instead if one of the following conditions is met: * * We've requested daemonized or one-shot mode (console output is * incompatible with modes other than mon_exec_update) * * We requested the version, which is effectively one-shot * * We specified a non-stdout output destination (console mode is * compatible only with stdout) */ if ((options.exec_mode == mon_exec_daemonized) || (options.exec_mode == mon_exec_one_shot) || args->version || !pcmk__str_eq(args->output_dest, "-", pcmk__str_null_matches)) { pcmk__str_update(&args->output_ty, "text"); output_format = mon_output_plain; } else { pcmk__str_update(&args->output_ty, "console"); output_format = mon_output_console; crm_enable_stderr(FALSE); } #endif // CURSES_ENABLED } else if (pcmk__str_eq(args->output_ty, "text", pcmk__str_null_matches)) { /* Text output was explicitly requested, or it's the default because * curses is not enabled */ pcmk__str_update(&args->output_ty, "text"); output_format = mon_output_plain; } // Otherwise, invalid format. Let pcmk__output_new() throw an error. } /*! * \internal * \brief Set execution mode to the output format's default if appropriate * * \param[in,out] args Command line arguments */ static void set_default_exec_mode(const pcmk__common_args_t *args) { if (output_format == mon_output_console) { /* Update is the only valid mode for console, but set here instead of * reconcile_output_format() for isolation and consistency */ options.exec_mode = mon_exec_update; } else if (options.exec_mode == mon_exec_unset) { // Default to one-shot mode for all other formats options.exec_mode = mon_exec_one_shot; } else if ((options.exec_mode == mon_exec_update) && pcmk__str_eq(args->output_dest, "-", pcmk__str_null_matches)) { // If not using console format, update mode cannot be used with stdout options.exec_mode = mon_exec_one_shot; } } static void clean_up_on_connection_failure(int rc) { if (output_format == mon_output_monitor) { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "CLUSTER CRIT: Connection to cluster failed: %s", pcmk_rc_str(rc)); clean_up(MON_STATUS_CRIT); } else if (rc == ENOTCONN) { if (pcmkd_state == pcmk_pacemakerd_state_remote) { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Error: remote-node not connected to cluster"); } else { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Error: cluster is not available on this node"); } } else { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Connection to cluster failed: %s", pcmk_rc_str(rc)); } clean_up(pcmk_rc2exitc(rc)); } static void one_shot(void) { int rc = pcmk__status(out, cib, fence_history, show, show_opts, options.only_node, options.only_rsc, options.neg_location_prefix, output_format == mon_output_monitor, 0); if (rc == pcmk_rc_ok) { clean_up(pcmk_rc2exitc(rc)); } else { clean_up_on_connection_failure(rc); } } static void exit_on_invalid_cib(void) { if (cib != NULL) { return; } // Shouldn't really be possible g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Invalid CIB source"); clean_up(CRM_EX_ERROR); } int main(int argc, char **argv) { int rc = pcmk_rc_ok; GOptionGroup *output_group = NULL; args = pcmk__new_common_args(SUMMARY); context = build_arg_context(args, &output_group); pcmk__register_formats(output_group, formats); options.pid_file = strdup("/tmp/ClusterMon.pid"); pcmk__cli_init_logging("crm_mon", 0); // Avoid needing to wait for subprocesses forked for -E/--external-agent avoid_zombies(); if (pcmk__ends_with_ext(argv[0], ".cgi")) { output_format = mon_output_cgi; options.exec_mode = mon_exec_one_shot; } processed_args = pcmk__cmdline_preproc(argv, "ehimpxEILU"); fence_history_cb("--fence-history", "1", NULL, NULL); /* Set an HTML title regardless of what format we will eventually use. This can't * be done in add_output_args. That function is called after command line * arguments are processed in the next block, which means it'll override whatever * title the user provides. Doing this here means the user can give their own * title on the command line. */ if (!pcmk__force_args(context, &error, "%s --html-title \"Cluster Status\"", g_get_prgname())) { return clean_up(CRM_EX_USAGE); } if (!g_option_context_parse_strv(context, &processed_args, &error)) { return clean_up(CRM_EX_USAGE); } for (int i = 0; i < args->verbosity; i++) { crm_bump_log_level(argc, argv); } if (!args->version) { if (args->quiet) { include_exclude_cb("--exclude", "times", NULL, NULL); } if (options.watch_fencing) { fence_history_cb("--fence-history", "0", NULL, NULL); options.fence_connect = TRUE; } /* create the cib-object early to be able to do further * decisions based on the cib-source */ cib = cib_new(); exit_on_invalid_cib(); switch (cib->variant) { case cib_native: // Everything (fencer, CIB, pcmkd status) should be available break; case cib_file: // Live fence history is not meaningful fence_history_cb("--fence-history", "0", NULL, NULL); /* Notifications are unsupported; nothing to monitor * @COMPAT: Let setup_cib_connection() handle this by exiting? */ options.exec_mode = mon_exec_one_shot; break; case cib_remote: // We won't receive any fencing updates fence_history_cb("--fence-history", "0", NULL, NULL); break; default: /* something is odd */ exit_on_invalid_cib(); break; } if ((options.exec_mode == mon_exec_daemonized) && !options.external_agent && pcmk__str_eq(args->output_dest, "-", pcmk__str_null_matches)) { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE, "--daemonize requires at least one of --output-to " "(with value not set to '-') and --external-agent"); return clean_up(CRM_EX_USAGE); } } reconcile_output_format(args); set_default_exec_mode(args); add_output_args(); /* output_format MUST NOT BE CHANGED AFTER THIS POINT. */ rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv); if (rc != pcmk_rc_ok) { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Error creating output format %s: %s", args->output_ty, pcmk_rc_str(rc)); return clean_up(CRM_EX_ERROR); } /* If we had a valid format for pcmk__output_new(), output_format should be * set by now. */ CRM_ASSERT(output_format != mon_output_unset); if (options.exec_mode == mon_exec_daemonized) { if (!options.external_agent && (output_format == mon_output_none)) { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE, "--daemonize requires --external-agent if used with " "--output-as=none"); return clean_up(CRM_EX_USAGE); } crm_enable_stderr(FALSE); cib_delete(cib); cib = NULL; pcmk__daemonize(crm_system_name, options.pid_file); cib = cib_new(); exit_on_invalid_cib(); } show = default_includes(output_format); /* Apply --include/--exclude flags we used internally. There's no error reporting * here because this would be a programming error. */ apply_include_exclude(options.includes_excludes, &error); /* And now apply any --include/--exclude flags the user gave on the command line. * These are done in a separate pass from the internal ones because we want to * make sure whatever the user specifies overrides whatever we do. */ if (!apply_include_exclude(options.user_includes_excludes, &error)) { return clean_up(CRM_EX_USAGE); } /* Sync up the initial value of interactive_fence_level with whatever was set with * --include/--exclude= options. */ if (pcmk_all_flags_set(show, pcmk_section_fencing_all)) { interactive_fence_level = 3; } else if (pcmk_is_set(show, pcmk_section_fence_worked)) { interactive_fence_level = 2; } else if (pcmk_any_flags_set(show, pcmk_section_fence_failed | pcmk_section_fence_pending)) { interactive_fence_level = 1; } else { interactive_fence_level = 0; } pcmk__register_lib_messages(out); crm_mon_register_messages(out); pe__register_messages(out); stonith__register_messages(out); // Messages internal to this file, nothing curses-specific pcmk__register_messages(out, fmt_functions); if (args->version) { out->version(out, false); return clean_up(CRM_EX_OK); } /* Extra sanity checks when in CGI mode */ if (output_format == mon_output_cgi) { if (cib->variant == cib_file) { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE, "CGI mode used with CIB file"); return clean_up(CRM_EX_USAGE); } else if (options.external_agent != NULL) { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE, "CGI mode cannot be used with --external-agent"); return clean_up(CRM_EX_USAGE); } else if (options.exec_mode == mon_exec_daemonized) { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE, "CGI mode cannot be used with -d"); return clean_up(CRM_EX_USAGE); } } if (output_format == mon_output_xml) { show_opts |= pcmk_show_inactive_rscs | pcmk_show_timing; } if ((output_format == mon_output_html || output_format == mon_output_cgi) && out->dest != stdout) { pcmk__html_add_header("meta", "http-equiv", "refresh", "content", pcmk__itoa(options.reconnect_ms / 1000), NULL); } #ifdef PCMK__COMPAT_2_0 // Keep failed action output the same as 2.0.x show_opts |= pcmk_show_failed_detail; #endif crm_info("Starting %s", crm_system_name); cib__set_output(cib, out); if (options.exec_mode == mon_exec_one_shot) { one_shot(); } out->message(out, "crm-mon-disconnected", "Waiting for initial connection", pcmkd_state); do { out->transient(out, "Connecting to cluster..."); rc = setup_api_connections(); if (rc != pcmk_rc_ok) { if ((rc == ENOTCONN) || (rc == ECONNREFUSED)) { out->transient(out, "Connection failed. Retrying in %ums...", options.reconnect_ms); } // Give some time to view all output even if we won't retry pcmk__sleep_ms(options.reconnect_ms); #if CURSES_ENABLED if (output_format == mon_output_console) { clear(); refresh(); } #endif } } while ((rc == ENOTCONN) || (rc == ECONNREFUSED)); if (rc != pcmk_rc_ok) { clean_up_on_connection_failure(rc); } set_fencing_options(interactive_fence_level); mon_refresh_display(NULL); mainloop = g_main_loop_new(NULL, FALSE); mainloop_add_signal(SIGTERM, mon_shutdown); mainloop_add_signal(SIGINT, mon_shutdown); #if CURSES_ENABLED if (output_format == mon_output_console) { ncurses_winch_handler = crm_signal_handler(SIGWINCH, mon_winresize); if (ncurses_winch_handler == SIG_DFL || ncurses_winch_handler == SIG_IGN || ncurses_winch_handler == SIG_ERR) ncurses_winch_handler = NULL; io_channel = g_io_channel_unix_new(STDIN_FILENO); g_io_add_watch(io_channel, G_IO_IN, detect_user_input, NULL); } #endif /* When refresh_trigger->trigger is set to TRUE, call mon_refresh_display. In * this file, that is anywhere mainloop_set_trigger is called. */ refresh_trigger = mainloop_add_trigger(G_PRIORITY_LOW, mon_refresh_display, NULL); g_main_loop_run(mainloop); g_main_loop_unref(mainloop); if (io_channel != NULL) { g_io_channel_shutdown(io_channel, TRUE, NULL); } crm_info("Exiting %s", crm_system_name); return clean_up(CRM_EX_OK); } static int send_custom_trap(const char *node, const char *rsc, const char *task, int target_rc, int rc, int status, const char *desc) { pid_t pid; /*setenv needs chars, these are ints */ char *rc_s = pcmk__itoa(rc); char *status_s = pcmk__itoa(status); char *target_rc_s = pcmk__itoa(target_rc); crm_debug("Sending external notification to '%s' via '%s'", options.external_recipient, options.external_agent); if(rsc) { setenv("CRM_notify_rsc", rsc, 1); } if (options.external_recipient) { setenv("CRM_notify_recipient", options.external_recipient, 1); } setenv("CRM_notify_node", node, 1); setenv("CRM_notify_task", task, 1); setenv("CRM_notify_desc", desc, 1); setenv("CRM_notify_rc", rc_s, 1); setenv("CRM_notify_target_rc", target_rc_s, 1); setenv("CRM_notify_status", status_s, 1); pid = fork(); if (pid == -1) { out->err(out, "notification fork() failed: %s", strerror(errno)); } if (pid == 0) { /* crm_debug("notification: I am the child. Executing the nofitication program."); */ execl(options.external_agent, options.external_agent, NULL); exit(CRM_EX_ERROR); } crm_trace("Finished running custom notification program '%s'.", options.external_agent); free(target_rc_s); free(status_s); free(rc_s); return 0; } static int handle_rsc_op(xmlNode *xml, void *userdata) { const char *node_id = (const char *) userdata; int rc = -1; int status = -1; int target_rc = -1; gboolean notify = TRUE; char *rsc = NULL; char *task = NULL; const char *desc = NULL; const char *magic = NULL; const char *id = NULL; const char *node = NULL; xmlNode *n = xml; xmlNode * rsc_op = xml; if(strcmp((const char*)xml->name, XML_LRM_TAG_RSC_OP) != 0) { pcmk__xe_foreach_child(xml, NULL, handle_rsc_op, (void *) node_id); return pcmk_rc_ok; } id = pcmk__xe_history_key(rsc_op); magic = crm_element_value(rsc_op, PCMK__XA_TRANSITION_MAGIC); if (magic == NULL) { /* non-change */ return pcmk_rc_ok; } if (!decode_transition_magic(magic, NULL, NULL, NULL, &status, &rc, &target_rc)) { crm_err("Invalid event %s detected for %s", magic, id); return pcmk_rc_ok; } if (parse_op_key(id, &rsc, &task, NULL) == FALSE) { crm_err("Invalid event detected for %s", id); goto bail; } node = crm_element_value(rsc_op, PCMK__META_ON_NODE); while ((n != NULL) && !pcmk__xe_is(n, PCMK__XE_NODE_STATE)) { n = n->parent; } if(node == NULL && n) { node = crm_element_value(n, PCMK_XA_UNAME); } if (node == NULL && n) { node = ID(n); } if (node == NULL) { node = node_id; } if (node == NULL) { crm_err("No node detected for event %s (%s)", magic, id); goto bail; } /* look up where we expected it to be? */ desc = pcmk_rc_str(pcmk_rc_ok); if ((status == PCMK_EXEC_DONE) && (target_rc == rc)) { crm_notice("%s of %s on %s completed: %s", task, rsc, node, desc); if (rc == PCMK_OCF_NOT_RUNNING) { notify = FALSE; } } else if (status == PCMK_EXEC_DONE) { desc = services_ocf_exitcode_str(rc); crm_warn("%s of %s on %s failed: %s", task, rsc, node, desc); } else { desc = pcmk_exec_status_str(status); crm_warn("%s of %s on %s failed: %s", task, rsc, node, desc); } if (notify && options.external_agent) { send_custom_trap(node, rsc, task, target_rc, rc, status, desc); } bail: free(rsc); free(task); return pcmk_rc_ok; } /* This function is just a wrapper around mainloop_set_trigger so that it can be * called from a mainloop directly. It's simply another way of ensuring the screen * gets redrawn. */ static gboolean mon_trigger_refresh(gpointer user_data) { mainloop_set_trigger((crm_trigger_t *) refresh_trigger); return FALSE; } static int handle_op_for_node(xmlNode *xml, void *userdata) { const char *node = crm_element_value(xml, PCMK_XA_UNAME); if (node == NULL) { node = ID(xml); } handle_rsc_op(xml, (void *) node); return pcmk_rc_ok; } static void crm_diff_update_v2(const char *event, xmlNode * msg) { xmlNode *change = NULL; xmlNode *diff = get_message_xml(msg, F_CIB_UPDATE_RESULT); for (change = pcmk__xml_first_child(diff); change != NULL; change = pcmk__xml_next(change)) { const char *name = NULL; const char *op = crm_element_value(change, PCMK_XA_OPERATION); const char *xpath = crm_element_value(change, PCMK_XA_PATH); xmlNode *match = NULL; const char *node = NULL; if(op == NULL) { continue; } else if(strcmp(op, "create") == 0) { match = change->children; } else if(strcmp(op, "move") == 0) { continue; } else if(strcmp(op, "delete") == 0) { continue; } else if(strcmp(op, "modify") == 0) { match = first_named_child(change, XML_DIFF_RESULT); if(match) { match = match->children; } } if(match) { name = (const char *)match->name; } crm_trace("Handling %s operation for %s %p, %s", op, xpath, match, name); if(xpath == NULL) { /* Version field, ignore */ } else if(name == NULL) { crm_debug("No result for %s operation to %s", op, xpath); CRM_ASSERT(strcmp(op, "delete") == 0 || strcmp(op, "move") == 0); } else if (strcmp(name, PCMK_XE_CIB) == 0) { pcmk__xe_foreach_child(first_named_child(match, PCMK_XE_STATUS), NULL, handle_op_for_node, NULL); } else if (strcmp(name, PCMK_XE_STATUS) == 0) { pcmk__xe_foreach_child(match, NULL, handle_op_for_node, NULL); } else if (strcmp(name, PCMK__XE_NODE_STATE) == 0) { node = crm_element_value(match, PCMK_XA_UNAME); if (node == NULL) { node = ID(match); } handle_rsc_op(match, (void *) node); - } else if(strcmp(name, XML_CIB_TAG_LRM) == 0) { + } else if (strcmp(name, PCMK__XE_LRM) == 0) { node = ID(match); handle_rsc_op(match, (void *) node); } else if(strcmp(name, XML_LRM_TAG_RESOURCES) == 0) { - char *local_node = pcmk__xpath_node_id(xpath, "lrm"); + char *local_node = pcmk__xpath_node_id(xpath, PCMK__XE_LRM); handle_rsc_op(match, local_node); free(local_node); } else if(strcmp(name, XML_LRM_TAG_RESOURCE) == 0) { - char *local_node = pcmk__xpath_node_id(xpath, "lrm"); + char *local_node = pcmk__xpath_node_id(xpath, PCMK__XE_LRM); handle_rsc_op(match, local_node); free(local_node); } else if(strcmp(name, XML_LRM_TAG_RSC_OP) == 0) { - char *local_node = pcmk__xpath_node_id(xpath, "lrm"); + char *local_node = pcmk__xpath_node_id(xpath, PCMK__XE_LRM); handle_rsc_op(match, local_node); free(local_node); } else { crm_trace("Ignoring %s operation for %s %p, %s", op, xpath, match, name); } } } static void crm_diff_update_v1(const char *event, xmlNode * msg) { /* Process operation updates */ xmlXPathObject *xpathObj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" PCMK__XE_DIFF_ADDED "//" XML_LRM_TAG_RSC_OP); int lpc = 0, max = numXpathResults(xpathObj); for (lpc = 0; lpc < max; lpc++) { xmlNode *rsc_op = getXpathResult(xpathObj, lpc); handle_rsc_op(rsc_op, NULL); } freeXpathObject(xpathObj); } static void crm_diff_update(const char *event, xmlNode * msg) { int rc = -1; static bool stale = FALSE; gboolean cib_updated = FALSE; xmlNode *diff = get_message_xml(msg, F_CIB_UPDATE_RESULT); out->progress(out, false); if (current_cib != NULL) { rc = xml_apply_patchset(current_cib, diff, TRUE); switch (rc) { case -pcmk_err_diff_resync: case -pcmk_err_diff_failed: crm_notice("[%s] Patch aborted: %s (%d)", event, pcmk_strerror(rc), rc); free_xml(current_cib); current_cib = NULL; break; case pcmk_ok: cib_updated = TRUE; break; default: crm_notice("[%s] ABORTED: %s (%d)", event, pcmk_strerror(rc), rc); free_xml(current_cib); current_cib = NULL; } } if (current_cib == NULL) { crm_trace("Re-requesting the full cib"); cib->cmds->query(cib, NULL, ¤t_cib, cib_scope_local | cib_sync_call); } if (options.external_agent) { int format = 0; crm_element_value_int(diff, PCMK_XA_FORMAT, &format); switch(format) { case 1: crm_diff_update_v1(event, msg); break; case 2: crm_diff_update_v2(event, msg); break; default: crm_err("Unknown patch format: %d", format); } } if (current_cib == NULL) { if(!stale) { out->info(out, "--- Stale data ---"); } stale = TRUE; return; } stale = FALSE; refresh_after_event(cib_updated, FALSE); } static int mon_refresh_display(gpointer user_data) { int rc = pcmk_rc_ok; last_refresh = time(NULL); if (output_format == mon_output_none) { return G_SOURCE_REMOVE; } if (fence_history == pcmk__fence_history_full && !pcmk_all_flags_set(show, pcmk_section_fencing_all) && output_format != mon_output_xml) { fence_history = pcmk__fence_history_reduced; } // Get an up-to-date pacemakerd status for the cluster summary if (cib->variant == cib_native) { pcmk__pacemakerd_status(out, crm_system_name, options.reconnect_ms / 2, false, &pcmkd_state); } if (out->dest != stdout) { out->reset(out); } rc = pcmk__output_cluster_status(out, st, cib, current_cib, pcmkd_state, fence_history, show, show_opts, options.only_node,options.only_rsc, options.neg_location_prefix, output_format == mon_output_monitor); if (output_format == mon_output_monitor && rc != pcmk_rc_ok) { clean_up(MON_STATUS_WARN); return G_SOURCE_REMOVE; } else if (rc == pcmk_rc_schema_validation) { clean_up(CRM_EX_CONFIG); return G_SOURCE_REMOVE; } if (out->dest != stdout) { out->finish(out, CRM_EX_OK, true, NULL); } return G_SOURCE_CONTINUE; } /* This function is called for fencing events (see setup_fencer_connection() for * which ones) when --watch-fencing is used on the command line */ static void mon_st_callback_event(stonith_t * st, stonith_event_t * e) { if (st->state == stonith_disconnected) { /* disconnect cib as well and have everything reconnect */ mon_cib_connection_destroy(NULL); } else if (options.external_agent) { char *desc = stonith__event_description(e); send_custom_trap(e->target, NULL, e->operation, pcmk_ok, e->result, 0, desc); free(desc); } } /* Cause the screen to be redrawn (via mainloop_set_trigger) when various conditions are met: * * - If the last update occurred more than reconnect_ms ago (defaults to 5s, but * can be changed via the -i command line option), or * - After every 10 CIB updates, or * - If it's been 2s since the last update * * This function sounds like it would be more broadly useful, but it is only called when a * fencing event is received or a CIB diff occurrs. */ static void refresh_after_event(gboolean data_updated, gboolean enforce) { static int updates = 0; time_t now = time(NULL); if (data_updated) { updates++; } if(refresh_timer == NULL) { refresh_timer = mainloop_timer_add("refresh", 2000, FALSE, mon_trigger_refresh, NULL); } if (reconnect_timer > 0) { /* we will receive a refresh request after successful reconnect */ mainloop_timer_stop(refresh_timer); return; } /* as we're not handling initial failure of fencer-connection as * fatal give it a retry here * not getting here if cib-reconnection is already on the way */ setup_fencer_connection(); if (enforce || ((now - last_refresh) > (options.reconnect_ms / 1000)) || updates >= 10) { mainloop_set_trigger((crm_trigger_t *) refresh_trigger); mainloop_timer_stop(refresh_timer); updates = 0; } else { mainloop_timer_start(refresh_timer); } } /* This function is called for fencing events (see setup_fencer_connection() for * which ones) when --watch-fencing is NOT used on the command line */ static void mon_st_callback_display(stonith_t * st, stonith_event_t * e) { if (st->state == stonith_disconnected) { /* disconnect cib as well and have everything reconnect */ mon_cib_connection_destroy(NULL); } else { out->progress(out, false); refresh_after_event(TRUE, FALSE); } } /* * De-init ncurses, disconnect from the CIB manager, disconnect fencing, * deallocate memory and show usage-message if requested. * * We don't actually return, but nominally returning crm_exit_t allows a usage * like "return clean_up(exit_code);" which helps static analysis understand the * code flow. */ static crm_exit_t clean_up(crm_exit_t exit_code) { /* Quitting crm_mon is much more complicated than it ought to be. */ /* (1) Close connections, free things, etc. */ cib__clean_up_connection(&cib); stonith_api_delete(st); free(options.neg_location_prefix); free(options.only_node); free(options.only_rsc); free(options.pid_file); g_slist_free_full(options.includes_excludes, free); g_strfreev(processed_args); /* (2) If this is abnormal termination and we're in curses mode, shut down * curses first. Any messages displayed to the screen before curses is shut * down will be lost because doing the shut down will also restore the * screen to whatever it looked like before crm_mon was started. */ if (((error != NULL) || (exit_code == CRM_EX_USAGE)) && (output_format == mon_output_console) && (out != NULL)) { out->finish(out, exit_code, false, NULL); pcmk__output_free(out); out = NULL; } /* (3) If this is a command line usage related failure, print the usage * message. */ if (exit_code == CRM_EX_USAGE && (output_format == mon_output_console || output_format == mon_output_plain)) { char *help = g_option_context_get_help(context, TRUE, NULL); fprintf(stderr, "%s", help); g_free(help); } pcmk__free_arg_context(context); /* (4) If this is any kind of error, print the error out and exit. Make * sure to handle situations both before and after formatted output is * set up. We want errors to appear formatted if at all possible. */ if (error != NULL) { if (out != NULL) { out->err(out, "%s: %s", g_get_prgname(), error->message); out->finish(out, exit_code, true, NULL); pcmk__output_free(out); } else { fprintf(stderr, "%s: %s\n", g_get_prgname(), error->message); } g_clear_error(&error); crm_exit(exit_code); } /* (5) Print formatted output to the screen if we made it far enough in * crm_mon to be able to do so. */ if (out != NULL) { if (options.exec_mode != mon_exec_daemonized) { out->finish(out, exit_code, true, NULL); } pcmk__output_free(out); pcmk__unregister_formats(); } crm_exit(exit_code); } diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c index 594c006bf5..4507584398 100644 --- a/tools/crm_resource_runtime.c +++ b/tools/crm_resource_runtime.c @@ -1,2260 +1,2260 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include static GList * build_node_info_list(const pcmk_resource_t *rsc) { GList *retval = NULL; for (const GList *iter = rsc->children; iter != NULL; iter = iter->next) { const pcmk_resource_t *child = (const pcmk_resource_t *) iter->data; for (const GList *iter2 = child->running_on; iter2 != NULL; iter2 = iter2->next) { const pcmk_node_t *node = (const pcmk_node_t *) iter2->data; node_info_t *ni = calloc(1, sizeof(node_info_t)); ni->node_name = node->details->uname; ni->promoted = pcmk_is_set(rsc->flags, pcmk_rsc_promotable) && child->fns->state(child, TRUE) == pcmk_role_promoted; retval = g_list_prepend(retval, ni); } } return retval; } GList * cli_resource_search(pcmk_resource_t *rsc, const char *requested_name, pcmk_scheduler_t *scheduler) { GList *retval = NULL; const pcmk_resource_t *parent = pe__const_top_resource(rsc, false); if (pe_rsc_is_clone(rsc)) { retval = build_node_info_list(rsc); /* The anonymous clone children's common ID is supplied */ } else if (pe_rsc_is_clone(parent) && !pcmk_is_set(rsc->flags, pcmk_rsc_unique) && rsc->clone_name && pcmk__str_eq(requested_name, rsc->clone_name, pcmk__str_casei) && !pcmk__str_eq(requested_name, rsc->id, pcmk__str_casei)) { retval = build_node_info_list(parent); } else if (rsc->running_on != NULL) { for (GList *iter = rsc->running_on; iter != NULL; iter = iter->next) { pcmk_node_t *node = (pcmk_node_t *) iter->data; node_info_t *ni = calloc(1, sizeof(node_info_t)); ni->node_name = node->details->uname; ni->promoted = (rsc->fns->state(rsc, TRUE) == pcmk_role_promoted); retval = g_list_prepend(retval, ni); } } return retval; } // \return Standard Pacemaker return code static int find_resource_attr(pcmk__output_t *out, cib_t * the_cib, const char *attr, const char *rsc, const char *attr_set_type, const char *set_name, const char *attr_id, const char *attr_name, char **value) { int rc = pcmk_rc_ok; xmlNode *xml_search = NULL; GString *xpath = NULL; const char *xpath_base = NULL; if(value) { *value = NULL; } if(the_cib == NULL) { return ENOTCONN; } xpath_base = pcmk_cib_xpath_for(PCMK_XE_RESOURCES); if (xpath_base == NULL) { crm_err(PCMK_XE_RESOURCES " CIB element not known (bug?)"); return ENOMSG; } xpath = g_string_sized_new(1024); pcmk__g_strcat(xpath, xpath_base, "//*[@" PCMK_XA_ID "=\"", rsc, "\"]", NULL); if (attr_set_type != NULL) { pcmk__g_strcat(xpath, "/", attr_set_type, NULL); if (set_name != NULL) { pcmk__g_strcat(xpath, "[@" PCMK_XA_ID "=\"", set_name, "\"]", NULL); } } g_string_append(xpath, "//" PCMK_XE_NVPAIR "["); if (attr_id != NULL) { pcmk__g_strcat(xpath, "@" PCMK_XA_ID "=\"", attr_id, "\"", NULL); } if (attr_name != NULL) { if (attr_id != NULL) { g_string_append(xpath, " and "); } pcmk__g_strcat(xpath, "@" PCMK_XA_NAME "=\"", attr_name, "\"", NULL); } g_string_append_c(xpath, ']'); rc = the_cib->cmds->query(the_cib, (const char *) xpath->str, &xml_search, cib_sync_call | cib_scope_local | cib_xpath); rc = pcmk_legacy2rc(rc); if (rc != pcmk_rc_ok) { goto done; } crm_log_xml_debug(xml_search, "Match"); if (xml_search->children != NULL) { xmlNode *child = NULL; rc = ENOTUNIQ; out->info(out, "Multiple attributes match name=%s", attr_name); for (child = pcmk__xml_first_child(xml_search); child != NULL; child = pcmk__xml_next(child)) { out->info(out, " Value: %s \t(id=%s)", crm_element_value(child, PCMK_XA_VALUE), ID(child)); } out->spacer(out); } else if(value) { pcmk__str_update(value, crm_element_value(xml_search, attr)); } done: g_string_free(xpath, TRUE); free_xml(xml_search); return rc; } /* PRIVATE. Use the find_matching_attr_resources instead. */ static void find_matching_attr_resources_recursive(pcmk__output_t *out, GList /* */ **result, pcmk_resource_t *rsc, const char *rsc_id, const char * attr_set, const char * attr_set_type, const char * attr_id, const char * attr_name, cib_t * cib, const char * cmd, int depth) { int rc = pcmk_rc_ok; char *lookup_id = clone_strip(rsc->id); char *local_attr_id = NULL; /* visit the children */ for(GList *gIter = rsc->children; gIter; gIter = gIter->next) { find_matching_attr_resources_recursive(out, result, (pcmk_resource_t *) gIter->data, rsc_id, attr_set, attr_set_type, attr_id, attr_name, cib, cmd, depth+1); /* do it only once for clones */ if (rsc->variant == pcmk_rsc_variant_clone) { break; } } rc = find_resource_attr(out, cib, PCMK_XA_ID, lookup_id, attr_set_type, attr_set, attr_id, attr_name, &local_attr_id); /* Post-order traversal. * The root is always on the list and it is the last item. */ if((0 == depth) || (pcmk_rc_ok == rc)) { /* push the head */ *result = g_list_append(*result, rsc); } free(local_attr_id); free(lookup_id); } /* The result is a linearized pre-ordered tree of resources. */ static GList/**/ * find_matching_attr_resources(pcmk__output_t *out, pcmk_resource_t *rsc, const char * rsc_id, const char * attr_set, const char * attr_set_type, const char * attr_id, const char * attr_name, cib_t * cib, const char * cmd, gboolean force) { int rc = pcmk_rc_ok; char *lookup_id = NULL; char *local_attr_id = NULL; GList * result = NULL; /* If --force is used, update only the requested resource (clone or primitive). * Otherwise, if the primitive has the attribute, use that. * Otherwise use the clone. */ if(force == TRUE) { return g_list_append(result, rsc); } if ((rsc->parent != NULL) && (rsc->parent->variant == pcmk_rsc_variant_clone)) { int rc = pcmk_rc_ok; char *local_attr_id = NULL; rc = find_resource_attr(out, cib, PCMK_XA_ID, rsc_id, attr_set_type, attr_set, attr_id, attr_name, &local_attr_id); free(local_attr_id); if(rc != pcmk_rc_ok) { rsc = rsc->parent; out->info(out, "Performing %s of '%s' on '%s', the parent of '%s'", cmd, attr_name, rsc->id, rsc_id); } return g_list_append(result, rsc); } else if ((rsc->parent == NULL) && (rsc->children != NULL) && (rsc->variant == pcmk_rsc_variant_clone)) { pcmk_resource_t *child = rsc->children->data; if (child->variant == pcmk_rsc_variant_primitive) { lookup_id = clone_strip(child->id); /* Could be a cloned group! */ rc = find_resource_attr(out, cib, PCMK_XA_ID, lookup_id, attr_set_type, attr_set, attr_id, attr_name, &local_attr_id); if(rc == pcmk_rc_ok) { rsc = child; out->info(out, "A value for '%s' already exists in child '%s', performing %s on that instead of '%s'", attr_name, lookup_id, cmd, rsc_id); } free(local_attr_id); free(lookup_id); } return g_list_append(result, rsc); } /* If the resource is a group ==> children inherit the attribute if defined. */ find_matching_attr_resources_recursive(out, &result, rsc, rsc_id, attr_set, attr_set_type, attr_id, attr_name, cib, cmd, 0); return result; } // \return Standard Pacemaker return code int cli_resource_update_attribute(pcmk_resource_t *rsc, const char *requested_name, const char *attr_set, const char *attr_set_type, const char *attr_id, const char *attr_name, const char *attr_value, gboolean recursive, cib_t *cib, int cib_options, gboolean force) { pcmk__output_t *out = rsc->cluster->priv; int rc = pcmk_rc_ok; char *found_attr_id = NULL; GList/**/ *resources = NULL; const char *top_id = pe__const_top_resource(rsc, false)->id; if ((attr_id == NULL) && !force) { find_resource_attr(out, cib, PCMK_XA_ID, top_id, NULL, NULL, NULL, attr_name, NULL); } if (pcmk__str_eq(attr_set_type, PCMK_XE_INSTANCE_ATTRIBUTES, pcmk__str_casei)) { if (!force) { rc = find_resource_attr(out, cib, PCMK_XA_ID, top_id, PCMK_XE_META_ATTRIBUTES, attr_set, attr_id, attr_name, &found_attr_id); if ((rc == pcmk_rc_ok) && !out->is_quiet(out)) { out->err(out, "WARNING: There is already a meta attribute " "for '%s' called '%s' (id=%s)", top_id, attr_name, found_attr_id); out->err(out, " Delete '%s' first or use the force option " "to override", found_attr_id); } free(found_attr_id); if (rc == pcmk_rc_ok) { return ENOTUNIQ; } } resources = g_list_append(resources, rsc); } else if (pcmk__str_eq(attr_set_type, ATTR_SET_ELEMENT, pcmk__str_none)) { crm_xml_add(rsc->xml, attr_name, attr_value); CRM_ASSERT(cib != NULL); rc = cib->cmds->replace(cib, PCMK_XE_RESOURCES, rsc->xml, cib_options); rc = pcmk_legacy2rc(rc); if (rc == pcmk_rc_ok) { out->info(out, "Set attribute: name=%s value=%s", attr_name, attr_value); } return rc; } else { resources = find_matching_attr_resources(out, rsc, requested_name, attr_set, attr_set_type, attr_id, attr_name, cib, "update", force); } /* If the user specified attr_set or attr_id, the intent is to modify a * single resource, which will be the last item in the list. */ if ((attr_set != NULL) || (attr_id != NULL)) { GList *last = g_list_last(resources); resources = g_list_remove_link(resources, last); g_list_free(resources); resources = last; } for (GList *iter = resources; iter != NULL; iter = iter->next) { char *lookup_id = NULL; char *local_attr_set = NULL; const char *rsc_attr_id = attr_id; const char *rsc_attr_set = attr_set; xmlNode *xml_top = NULL; xmlNode *xml_obj = NULL; found_attr_id = NULL; rsc = (pcmk_resource_t *) iter->data; lookup_id = clone_strip(rsc->id); /* Could be a cloned group! */ rc = find_resource_attr(out, cib, PCMK_XA_ID, lookup_id, attr_set_type, attr_set, attr_id, attr_name, &found_attr_id); switch (rc) { case pcmk_rc_ok: crm_debug("Found a match for name=%s: id=%s", attr_name, found_attr_id); rsc_attr_id = found_attr_id; break; case ENXIO: if (rsc_attr_set == NULL) { local_attr_set = crm_strdup_printf("%s-%s", lookup_id, attr_set_type); rsc_attr_set = local_attr_set; } if (rsc_attr_id == NULL) { found_attr_id = crm_strdup_printf("%s-%s", rsc_attr_set, attr_name); rsc_attr_id = found_attr_id; } xml_top = create_xml_node(NULL, (const char *) rsc->xml->name); crm_xml_add(xml_top, PCMK_XA_ID, lookup_id); xml_obj = create_xml_node(xml_top, attr_set_type); crm_xml_add(xml_obj, PCMK_XA_ID, rsc_attr_set); break; default: free(lookup_id); free(found_attr_id); g_list_free(resources); return rc; } xml_obj = crm_create_nvpair_xml(xml_obj, rsc_attr_id, attr_name, attr_value); if (xml_top == NULL) { xml_top = xml_obj; } crm_log_xml_debug(xml_top, "Update"); rc = cib->cmds->modify(cib, PCMK_XE_RESOURCES, xml_top, cib_options); rc = pcmk_legacy2rc(rc); if (rc == pcmk_rc_ok) { out->info(out, "Set '%s' option: id=%s%s%s%s%s value=%s", lookup_id, found_attr_id, ((rsc_attr_set == NULL)? "" : " set="), pcmk__s(rsc_attr_set, ""), ((attr_name == NULL)? "" : " name="), pcmk__s(attr_name, ""), attr_value); } free_xml(xml_top); free(lookup_id); free(found_attr_id); free(local_attr_set); if (recursive && pcmk__str_eq(attr_set_type, PCMK_XE_META_ATTRIBUTES, pcmk__str_casei)) { GList *lpc = NULL; static bool need_init = true; if (need_init) { need_init = false; pcmk__unpack_constraints(rsc->cluster); pe__clear_resource_flags_on_all(rsc->cluster, pcmk_rsc_detect_loop); } /* We want to set the attribute only on resources explicitly * colocated with this one, so we use rsc->rsc_cons_lhs directly * rather than the with_this_colocations() method. */ pcmk__set_rsc_flags(rsc, pcmk_rsc_detect_loop); for (lpc = rsc->rsc_cons_lhs; lpc != NULL; lpc = lpc->next) { pcmk__colocation_t *cons = (pcmk__colocation_t *) lpc->data; crm_debug("Checking %s %d", cons->id, cons->score); if (!pcmk_is_set(cons->dependent->flags, pcmk_rsc_detect_loop) && (cons->score > 0)) { crm_debug("Setting %s=%s for dependent resource %s", attr_name, attr_value, cons->dependent->id); cli_resource_update_attribute(cons->dependent, cons->dependent->id, NULL, attr_set_type, NULL, attr_name, attr_value, recursive, cib, cib_options, force); } } } } g_list_free(resources); return rc; } // \return Standard Pacemaker return code int cli_resource_delete_attribute(pcmk_resource_t *rsc, const char *requested_name, const char *attr_set, const char *attr_set_type, const char *attr_id, const char *attr_name, cib_t *cib, int cib_options, gboolean force) { pcmk__output_t *out = rsc->cluster->priv; int rc = pcmk_rc_ok; GList/**/ *resources = NULL; if ((attr_id == NULL) && !force) { find_resource_attr(out, cib, PCMK_XA_ID, pe__const_top_resource(rsc, false)->id, NULL, NULL, NULL, attr_name, NULL); } if (pcmk__str_eq(attr_set_type, PCMK_XE_META_ATTRIBUTES, pcmk__str_casei)) { resources = find_matching_attr_resources(out, rsc, requested_name, attr_set, attr_set_type, attr_id, attr_name, cib, "delete", force); } else if (pcmk__str_eq(attr_set_type, ATTR_SET_ELEMENT, pcmk__str_none)) { xml_remove_prop(rsc->xml, attr_name); CRM_ASSERT(cib != NULL); rc = cib->cmds->replace(cib, PCMK_XE_RESOURCES, rsc->xml, cib_options); rc = pcmk_legacy2rc(rc); if (rc == pcmk_rc_ok) { out->info(out, "Deleted attribute: %s", attr_name); } return rc; } else { resources = g_list_append(resources, rsc); } for (GList *iter = resources; iter != NULL; iter = iter->next) { char *lookup_id = NULL; xmlNode *xml_obj = NULL; char *found_attr_id = NULL; const char *rsc_attr_id = attr_id; rsc = (pcmk_resource_t *) iter->data; lookup_id = clone_strip(rsc->id); rc = find_resource_attr(out, cib, PCMK_XA_ID, lookup_id, attr_set_type, attr_set, attr_id, attr_name, &found_attr_id); switch (rc) { case pcmk_rc_ok: break; case ENXIO: free(lookup_id); rc = pcmk_rc_ok; continue; default: free(lookup_id); g_list_free(resources); return rc; } if (rsc_attr_id == NULL) { rsc_attr_id = found_attr_id; } xml_obj = crm_create_nvpair_xml(NULL, rsc_attr_id, attr_name, NULL); crm_log_xml_debug(xml_obj, "Delete"); CRM_ASSERT(cib); rc = cib->cmds->remove(cib, PCMK_XE_RESOURCES, xml_obj, cib_options); rc = pcmk_legacy2rc(rc); if (rc == pcmk_rc_ok) { out->info(out, "Deleted '%s' option: id=%s%s%s%s%s", lookup_id, found_attr_id, ((attr_set == NULL)? "" : " set="), pcmk__s(attr_set, ""), ((attr_name == NULL)? "" : " name="), pcmk__s(attr_name, "")); } free(lookup_id); free_xml(xml_obj); free(found_attr_id); } g_list_free(resources); return rc; } // \return Standard Pacemaker return code static int send_lrm_rsc_op(pcmk_ipc_api_t *controld_api, bool do_fail_resource, const char *host_uname, const char *rsc_id, pcmk_scheduler_t *scheduler) { pcmk__output_t *out = scheduler->priv; const char *router_node = host_uname; const char *rsc_api_id = NULL; const char *rsc_long_id = NULL; const char *rsc_class = NULL; const char *rsc_provider = NULL; const char *rsc_type = NULL; bool cib_only = false; pcmk_resource_t *rsc = pe_find_resource(scheduler->resources, rsc_id); if (rsc == NULL) { out->err(out, "Resource %s not found", rsc_id); return ENXIO; } else if (rsc->variant != pcmk_rsc_variant_primitive) { out->err(out, "We can only process primitive resources, not %s", rsc_id); return EINVAL; } rsc_class = crm_element_value(rsc->xml, PCMK_XA_CLASS); rsc_provider = crm_element_value(rsc->xml, PCMK_XA_PROVIDER), rsc_type = crm_element_value(rsc->xml, PCMK_XA_TYPE); if ((rsc_class == NULL) || (rsc_type == NULL)) { out->err(out, "Resource %s does not have a class and type", rsc_id); return EINVAL; } { pcmk_node_t *node = pe_find_node(scheduler->nodes, host_uname); if (node == NULL) { out->err(out, "Node %s not found", host_uname); return pcmk_rc_node_unknown; } if (!(node->details->online)) { if (do_fail_resource) { out->err(out, "Node %s is not online", host_uname); return ENOTCONN; } else { cib_only = true; } } if (!cib_only && pe__is_guest_or_remote_node(node)) { node = pcmk__current_node(node->details->remote_rsc); if (node == NULL) { out->err(out, "No cluster connection to Pacemaker Remote node %s detected", host_uname); return ENOTCONN; } router_node = node->details->uname; } } if (rsc->clone_name) { rsc_api_id = rsc->clone_name; rsc_long_id = rsc->id; } else { rsc_api_id = rsc->id; } if (do_fail_resource) { return pcmk_controld_api_fail(controld_api, host_uname, router_node, rsc_api_id, rsc_long_id, rsc_class, rsc_provider, rsc_type); } else { return pcmk_controld_api_refresh(controld_api, host_uname, router_node, rsc_api_id, rsc_long_id, rsc_class, rsc_provider, rsc_type, cib_only); } } /*! * \internal * \brief Get resource name as used in failure-related node attributes * * \param[in] rsc Resource to check * * \return Newly allocated string containing resource's fail name * \note The caller is responsible for freeing the result. */ static inline char * rsc_fail_name(const pcmk_resource_t *rsc) { const char *name = (rsc->clone_name? rsc->clone_name : rsc->id); if (pcmk_is_set(rsc->flags, pcmk_rsc_unique)) { return strdup(name); } return clone_strip(name); } // \return Standard Pacemaker return code static int clear_rsc_history(pcmk_ipc_api_t *controld_api, const char *host_uname, const char *rsc_id, pcmk_scheduler_t *scheduler) { int rc = pcmk_rc_ok; /* Erase the resource's entire LRM history in the CIB, even if we're only * clearing a single operation's fail count. If we erased only entries for a * single operation, we might wind up with a wrong idea of the current * resource state, and we might not re-probe the resource. */ rc = send_lrm_rsc_op(controld_api, false, host_uname, rsc_id, scheduler); if (rc != pcmk_rc_ok) { return rc; } crm_trace("Processing %d mainloop inputs", pcmk_controld_api_replies_expected(controld_api)); while (g_main_context_iteration(NULL, FALSE)) { crm_trace("Processed mainloop input, %d still remaining", pcmk_controld_api_replies_expected(controld_api)); } return rc; } // \return Standard Pacemaker return code static int clear_rsc_failures(pcmk__output_t *out, pcmk_ipc_api_t *controld_api, const char *node_name, const char *rsc_id, const char *operation, const char *interval_spec, pcmk_scheduler_t *scheduler) { int rc = pcmk_rc_ok; const char *failed_value = NULL; const char *failed_id = NULL; char *interval_ms_s = NULL; GHashTable *rscs = NULL; GHashTableIter iter; /* Create a hash table to use as a set of resources to clean. This lets us * clean each resource only once (per node) regardless of how many failed * operations it has. */ rscs = pcmk__strkey_table(NULL, NULL); // Normalize interval to milliseconds for comparison to history entry if (operation) { guint interval_ms = 0U; pcmk_parse_interval_spec(interval_spec, &interval_ms); interval_ms_s = crm_strdup_printf("%u", interval_ms); } for (xmlNode *xml_op = pcmk__xml_first_child(scheduler->failed); xml_op != NULL; xml_op = pcmk__xml_next(xml_op)) { failed_id = crm_element_value(xml_op, PCMK__XA_RSC_ID); if (failed_id == NULL) { // Malformed history entry, should never happen continue; } // No resource specified means all resources match if (rsc_id) { pcmk_resource_t *fail_rsc = NULL; fail_rsc = pe_find_resource_with_flags(scheduler->resources, failed_id, pcmk_rsc_match_history |pcmk_rsc_match_anon_basename); if (!fail_rsc || !pcmk__str_eq(rsc_id, fail_rsc->id, pcmk__str_casei)) { continue; } } // Host name should always have been provided by this point failed_value = crm_element_value(xml_op, PCMK_XA_UNAME); if (!pcmk__str_eq(node_name, failed_value, pcmk__str_casei)) { continue; } // No operation specified means all operations match if (operation) { failed_value = crm_element_value(xml_op, PCMK_XA_OPERATION); if (!pcmk__str_eq(operation, failed_value, pcmk__str_casei)) { continue; } // Interval (if operation was specified) defaults to 0 (not all) failed_value = crm_element_value(xml_op, PCMK_META_INTERVAL); if (!pcmk__str_eq(interval_ms_s, failed_value, pcmk__str_casei)) { continue; } } g_hash_table_add(rscs, (gpointer) failed_id); } free(interval_ms_s); g_hash_table_iter_init(&iter, rscs); while (g_hash_table_iter_next(&iter, (gpointer *) &failed_id, NULL)) { crm_debug("Erasing failures of %s on %s", failed_id, node_name); rc = clear_rsc_history(controld_api, node_name, failed_id, scheduler); if (rc != pcmk_rc_ok) { return rc; } } g_hash_table_destroy(rscs); return rc; } // \return Standard Pacemaker return code static int clear_rsc_fail_attrs(const pcmk_resource_t *rsc, const char *operation, const char *interval_spec, const pcmk_node_t *node) { int rc = pcmk_rc_ok; int attr_options = pcmk__node_attr_none; char *rsc_name = rsc_fail_name(rsc); if (pe__is_guest_or_remote_node(node)) { attr_options |= pcmk__node_attr_remote; } rc = pcmk__attrd_api_clear_failures(NULL, node->details->uname, rsc_name, operation, interval_spec, NULL, attr_options); free(rsc_name); return rc; } // \return Standard Pacemaker return code int cli_resource_delete(pcmk_ipc_api_t *controld_api, const char *host_uname, const pcmk_resource_t *rsc, const char *operation, const char *interval_spec, bool just_failures, pcmk_scheduler_t *scheduler, gboolean force) { pcmk__output_t *out = scheduler->priv; int rc = pcmk_rc_ok; pcmk_node_t *node = NULL; if (rsc == NULL) { return ENXIO; } else if (rsc->children) { for (const GList *lpc = rsc->children; lpc != NULL; lpc = lpc->next) { const pcmk_resource_t *child = (const pcmk_resource_t *) lpc->data; rc = cli_resource_delete(controld_api, host_uname, child, operation, interval_spec, just_failures, scheduler, force); if (rc != pcmk_rc_ok) { return rc; } } return pcmk_rc_ok; } else if (host_uname == NULL) { GList *lpc = NULL; GList *nodes = g_hash_table_get_values(rsc->known_on); if(nodes == NULL && force) { nodes = pcmk__copy_node_list(scheduler->nodes, false); } else if(nodes == NULL && rsc->exclusive_discover) { GHashTableIter iter; pcmk_node_t *node = NULL; g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void**)&node)) { if(node->weight >= 0) { nodes = g_list_prepend(nodes, node); } } } else if(nodes == NULL) { nodes = g_hash_table_get_values(rsc->allowed_nodes); } for (lpc = nodes; lpc != NULL; lpc = lpc->next) { node = (pcmk_node_t *) lpc->data; if (node->details->online) { rc = cli_resource_delete(controld_api, node->details->uname, rsc, operation, interval_spec, just_failures, scheduler, force); } if (rc != pcmk_rc_ok) { g_list_free(nodes); return rc; } } g_list_free(nodes); return pcmk_rc_ok; } node = pe_find_node(scheduler->nodes, host_uname); if (node == NULL) { out->err(out, "Unable to clean up %s because node %s not found", rsc->id, host_uname); return ENODEV; } if (!node->details->rsc_discovery_enabled) { out->err(out, "Unable to clean up %s because resource discovery disabled on %s", rsc->id, host_uname); return EOPNOTSUPP; } if (controld_api == NULL) { out->err(out, "Dry run: skipping clean-up of %s on %s due to CIB_file", rsc->id, host_uname); return pcmk_rc_ok; } rc = clear_rsc_fail_attrs(rsc, operation, interval_spec, node); if (rc != pcmk_rc_ok) { out->err(out, "Unable to clean up %s failures on %s: %s", rsc->id, host_uname, pcmk_rc_str(rc)); return rc; } if (just_failures) { rc = clear_rsc_failures(out, controld_api, host_uname, rsc->id, operation, interval_spec, scheduler); } else { rc = clear_rsc_history(controld_api, host_uname, rsc->id, scheduler); } if (rc != pcmk_rc_ok) { out->err(out, "Cleaned %s failures on %s, but unable to clean history: %s", rsc->id, host_uname, pcmk_rc_str(rc)); } else { out->info(out, "Cleaned up %s on %s", rsc->id, host_uname); } return rc; } // \return Standard Pacemaker return code int cli_cleanup_all(pcmk_ipc_api_t *controld_api, const char *node_name, const char *operation, const char *interval_spec, pcmk_scheduler_t *scheduler) { pcmk__output_t *out = scheduler->priv; int rc = pcmk_rc_ok; int attr_options = pcmk__node_attr_none; const char *display_name = node_name? node_name : "all nodes"; if (controld_api == NULL) { out->info(out, "Dry run: skipping clean-up of %s due to CIB_file", display_name); return rc; } if (node_name) { pcmk_node_t *node = pe_find_node(scheduler->nodes, node_name); if (node == NULL) { out->err(out, "Unknown node: %s", node_name); return ENXIO; } if (pe__is_guest_or_remote_node(node)) { attr_options |= pcmk__node_attr_remote; } } rc = pcmk__attrd_api_clear_failures(NULL, node_name, NULL, operation, interval_spec, NULL, attr_options); if (rc != pcmk_rc_ok) { out->err(out, "Unable to clean up all failures on %s: %s", display_name, pcmk_rc_str(rc)); return rc; } if (node_name) { rc = clear_rsc_failures(out, controld_api, node_name, NULL, operation, interval_spec, scheduler); if (rc != pcmk_rc_ok) { out->err(out, "Cleaned all resource failures on %s, but unable to clean history: %s", node_name, pcmk_rc_str(rc)); return rc; } } else { for (GList *iter = scheduler->nodes; iter; iter = iter->next) { pcmk_node_t *node = (pcmk_node_t *) iter->data; rc = clear_rsc_failures(out, controld_api, node->details->uname, NULL, operation, interval_spec, scheduler); if (rc != pcmk_rc_ok) { out->err(out, "Cleaned all resource failures on all nodes, but unable to clean history: %s", pcmk_rc_str(rc)); return rc; } } } out->info(out, "Cleaned up all resources on %s", display_name); return rc; } static void check_role(resource_checks_t *checks) { const char *role_s = g_hash_table_lookup(checks->rsc->meta, PCMK_META_TARGET_ROLE); if (role_s == NULL) { return; } switch (pcmk_parse_role(role_s)) { case pcmk_role_stopped: checks->flags |= rsc_remain_stopped; break; case pcmk_role_unpromoted: if (pcmk_is_set(pe__const_top_resource(checks->rsc, false)->flags, pcmk_rsc_promotable)) { checks->flags |= rsc_unpromotable; } break; default: break; } } static void check_managed(resource_checks_t *checks) { const char *managed_s = g_hash_table_lookup(checks->rsc->meta, PCMK_META_IS_MANAGED); if ((managed_s != NULL) && !crm_is_true(managed_s)) { checks->flags |= rsc_unmanaged; } } static void check_locked(resource_checks_t *checks) { if (checks->rsc->lock_node != NULL) { checks->flags |= rsc_locked; checks->lock_node = checks->rsc->lock_node->details->uname; } } static bool node_is_unhealthy(pcmk_node_t *node) { switch (pe__health_strategy(node->details->data_set)) { case pcmk__health_strategy_none: break; case pcmk__health_strategy_no_red: if (pe__node_health(node) < 0) { return true; } break; case pcmk__health_strategy_only_green: if (pe__node_health(node) <= 0) { return true; } break; case pcmk__health_strategy_progressive: case pcmk__health_strategy_custom: /* @TODO These are finite scores, possibly with rules, and possibly * combining with other scores, so attributing these as a cause is * nontrivial. */ break; } return false; } static void check_node_health(resource_checks_t *checks, pcmk_node_t *node) { if (node == NULL) { GHashTableIter iter; bool allowed = false; bool all_nodes_unhealthy = true; g_hash_table_iter_init(&iter, checks->rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) { allowed = true; if (!node_is_unhealthy(node)) { all_nodes_unhealthy = false; break; } } if (allowed && all_nodes_unhealthy) { checks->flags |= rsc_node_health; } } else if (node_is_unhealthy(node)) { checks->flags |= rsc_node_health; } } int cli_resource_check(pcmk__output_t *out, pcmk_resource_t *rsc, pcmk_node_t *node) { resource_checks_t checks = { .rsc = rsc }; check_role(&checks); check_managed(&checks); check_locked(&checks); check_node_health(&checks, node); return out->message(out, "resource-check-list", &checks); } // \return Standard Pacemaker return code int cli_resource_fail(pcmk_ipc_api_t *controld_api, const char *host_uname, const char *rsc_id, pcmk_scheduler_t *scheduler) { crm_notice("Failing %s on %s", rsc_id, host_uname); return send_lrm_rsc_op(controld_api, true, host_uname, rsc_id, scheduler); } static GHashTable * generate_resource_params(pcmk_resource_t *rsc, pcmk_node_t *node, pcmk_scheduler_t *scheduler) { GHashTable *params = NULL; GHashTable *meta = NULL; GHashTable *combined = NULL; GHashTableIter iter; char *key = NULL; char *value = NULL; combined = pcmk__strkey_table(free, free); params = pe_rsc_params(rsc, node, scheduler); if (params != NULL) { g_hash_table_iter_init(&iter, params); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { g_hash_table_insert(combined, strdup(key), strdup(value)); } } meta = pcmk__strkey_table(free, free); get_meta_attributes(meta, rsc, node, scheduler); if (meta != NULL) { g_hash_table_iter_init(&iter, meta); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { char *crm_name = crm_meta_name(key); g_hash_table_insert(combined, crm_name, strdup(value)); } g_hash_table_destroy(meta); } return combined; } bool resource_is_running_on(pcmk_resource_t *rsc, const char *host) { bool found = true; GList *hIter = NULL; GList *hosts = NULL; if (rsc == NULL) { return false; } rsc->fns->location(rsc, &hosts, TRUE); for (hIter = hosts; host != NULL && hIter != NULL; hIter = hIter->next) { pcmk_node_t *node = (pcmk_node_t *) hIter->data; if (pcmk__strcase_any_of(host, node->details->uname, node->details->id, NULL)) { crm_trace("Resource %s is running on %s\n", rsc->id, host); goto done; } } if (host != NULL) { crm_trace("Resource %s is not running on: %s\n", rsc->id, host); found = false; } else if(host == NULL && hosts == NULL) { crm_trace("Resource %s is not running\n", rsc->id); found = false; } done: g_list_free(hosts); return found; } /*! * \internal * \brief Create a list of all resources active on host from a given list * * \param[in] host Name of host to check whether resources are active * \param[in] rsc_list List of resources to check * * \return New list of resources from list that are active on host */ static GList * get_active_resources(const char *host, GList *rsc_list) { GList *rIter = NULL; GList *active = NULL; for (rIter = rsc_list; rIter != NULL; rIter = rIter->next) { pcmk_resource_t *rsc = (pcmk_resource_t *) rIter->data; /* Expand groups to their members, because if we're restarting a member * other than the first, we can't otherwise tell which resources are * stopping and starting. */ if (rsc->variant == pcmk_rsc_variant_group) { active = g_list_concat(active, get_active_resources(host, rsc->children)); } else if (resource_is_running_on(rsc, host)) { active = g_list_append(active, strdup(rsc->id)); } } return active; } static void dump_list(GList *items, const char *tag) { int lpc = 0; GList *item = NULL; for (item = items; item != NULL; item = item->next) { crm_trace("%s[%d]: %s", tag, lpc, (char*)item->data); lpc++; } } static void display_list(pcmk__output_t *out, GList *items, const char *tag) { GList *item = NULL; for (item = items; item != NULL; item = item->next) { out->info(out, "%s%s", tag, (const char *)item->data); } } /*! * \internal * \brief Upgrade XML to latest schema version and use it as scheduler input * * This also updates the scheduler timestamp to the current time. * * \param[in,out] scheduler Scheduler data to update * \param[in,out] xml XML to use as input * * \return Standard Pacemaker return code * \note On success, caller is responsible for freeing memory allocated for * scheduler->now. * \todo This follows the example of other callers of cli_config_update() * and returns ENOKEY ("Required key not available") if that fails, * but perhaps pcmk_rc_schema_validation would be better in that case. */ int update_scheduler_input(pcmk_scheduler_t *scheduler, xmlNode **xml) { if (cli_config_update(xml, NULL, FALSE) == FALSE) { return ENOKEY; } scheduler->input = *xml; scheduler->now = crm_time_new(NULL); return pcmk_rc_ok; } /*! * \internal * \brief Update scheduler XML input based on a CIB query * * \param[in] scheduler Scheduler data to initialize * \param[in] cib Connection to the CIB manager * * \return Standard Pacemaker return code * \note On success, caller is responsible for freeing memory allocated for * scheduler->input and scheduler->now. */ static int update_scheduler_input_to_cib(pcmk__output_t *out, pcmk_scheduler_t *scheduler, cib_t *cib) { xmlNode *cib_xml_copy = NULL; int rc = pcmk_rc_ok; rc = cib->cmds->query(cib, NULL, &cib_xml_copy, cib_scope_local | cib_sync_call); rc = pcmk_legacy2rc(rc); if (rc != pcmk_rc_ok) { out->err(out, "Could not obtain the current CIB: %s (%d)", pcmk_rc_str(rc), rc); return rc; } rc = update_scheduler_input(scheduler, &cib_xml_copy); if (rc != pcmk_rc_ok) { out->err(out, "Could not upgrade the current CIB XML"); free_xml(cib_xml_copy); return rc; } return rc; } // \return Standard Pacemaker return code static int update_dataset(cib_t *cib, pcmk_scheduler_t *scheduler, bool simulate) { char *pid = NULL; char *shadow_file = NULL; cib_t *shadow_cib = NULL; int rc = pcmk_rc_ok; pcmk__output_t *out = scheduler->priv; pe_reset_working_set(scheduler); pcmk__set_scheduler_flags(scheduler, pcmk_sched_no_counts|pcmk_sched_no_compat); rc = update_scheduler_input_to_cib(out, scheduler, cib); if (rc != pcmk_rc_ok) { return rc; } if(simulate) { bool prev_quiet = false; pid = pcmk__getpid_s(); shadow_cib = cib_shadow_new(pid); shadow_file = get_shadow_file(pid); if (shadow_cib == NULL) { out->err(out, "Could not create shadow cib: '%s'", pid); rc = ENXIO; goto done; } rc = write_xml_file(scheduler->input, shadow_file, FALSE); if (rc < 0) { out->err(out, "Could not populate shadow cib: %s (%d)", pcmk_strerror(rc), rc); goto done; } rc = shadow_cib->cmds->signon(shadow_cib, crm_system_name, cib_command); rc = pcmk_legacy2rc(rc); if (rc != pcmk_rc_ok) { out->err(out, "Could not connect to shadow cib: %s (%d)", pcmk_rc_str(rc), rc); goto done; } pcmk__schedule_actions(scheduler->input, pcmk_sched_no_counts|pcmk_sched_no_compat, scheduler); prev_quiet = out->is_quiet(out); out->quiet = true; pcmk__simulate_transition(scheduler, shadow_cib, NULL); out->quiet = prev_quiet; rc = update_dataset(shadow_cib, scheduler, false); } else { cluster_status(scheduler); } done: // Do not free scheduler->input here, we need rsc->xml to be valid later on cib_delete(shadow_cib); free(pid); if(shadow_file) { unlink(shadow_file); free(shadow_file); } return rc; } /*! * \internal * \brief Find the maximum stop timeout of a resource and its children (if any) * * \param[in,out] rsc Resource to get timeout for * * \return Maximum stop timeout for \p rsc (in milliseconds) */ static int max_rsc_stop_timeout(pcmk_resource_t *rsc) { long long result_ll; int max_delay = 0; xmlNode *config = NULL; GHashTable *meta = NULL; if (rsc == NULL) { return 0; } // If resource is collective, use maximum of its children's stop timeouts if (rsc->children != NULL) { for (GList *iter = rsc->children; iter; iter = iter->next) { pcmk_resource_t *child = iter->data; int delay = max_rsc_stop_timeout(child); if (delay > max_delay) { pcmk__rsc_trace(rsc, "Maximum stop timeout for %s is now %s " "due to %s", rsc->id, pcmk__readable_interval(delay), child->id); max_delay = delay; } } return max_delay; } // Get resource's stop action configuration from CIB config = pcmk__find_action_config(rsc, PCMK_ACTION_STOP, 0, true); /* Get configured timeout for stop action (fully evaluated for rules, * defaults, etc.). * * @TODO This currently ignores node (which might matter for rules) */ meta = pcmk__unpack_action_meta(rsc, NULL, PCMK_ACTION_STOP, 0, config); if ((pcmk__scan_ll(g_hash_table_lookup(meta, PCMK_META_TIMEOUT), &result_ll, -1LL) == pcmk_rc_ok) && (result_ll >= 0) && (result_ll <= INT_MAX)) { max_delay = (int) result_ll; } g_hash_table_destroy(meta); return max_delay; } /*! * \internal * \brief Find a reasonable waiting time for stopping any one resource in a list * * \param[in,out] scheduler Scheduler data * \param[in] resources List of names of resources that will be stopped * * \return Rough estimate of a reasonable time to wait (in seconds) to stop any * one resource in \p resources * \note This estimate is very rough, simply the maximum stop timeout of all * given resources and their children, plus a small fudge factor. It does * not account for children that must be stopped in sequence, action * throttling, or any demotions needed. It checks the stop timeout, even * if the resources in question are actually being started. */ static int wait_time_estimate(pcmk_scheduler_t *scheduler, const GList *resources) { int max_delay = 0; // Find maximum stop timeout in milliseconds for (const GList *item = resources; item != NULL; item = item->next) { pcmk_resource_t *rsc = pe_find_resource(scheduler->resources, (const char *) item->data); int delay = max_rsc_stop_timeout(rsc); if (delay > max_delay) { pcmk__rsc_trace(rsc, "Wait time is now %s due to %s", pcmk__readable_interval(delay), rsc->id); max_delay = delay; } } return (max_delay / 1000) + 5; } #define waiting_for_starts(d, r, h) ((d != NULL) || \ (!resource_is_running_on((r), (h)))) /*! * \internal * \brief Restart a resource (on a particular host if requested). * * \param[in,out] out Output object * \param[in,out] rsc The resource to restart * \param[in] node Node to restart resource on (NULL for all) * \param[in] move_lifetime If not NULL, how long constraint should * remain in effect (as ISO 8601 string) * \param[in] timeout_ms Consider failed if actions do not complete * in this time (specified in milliseconds, * but a two-second granularity is actually * used; if 0, it will be calculated based on * the resource timeout) * \param[in,out] cib Connection to the CIB manager * \param[in] cib_options Group of enum cib_call_options flags to * use with CIB calls * \param[in] promoted_role_only If true, limit to promoted instances * \param[in] force If true, apply only to requested instance * if part of a collective resource * * \return Standard Pacemaker return code (exits on certain failures) */ int cli_resource_restart(pcmk__output_t *out, pcmk_resource_t *rsc, const pcmk_node_t *node, const char *move_lifetime, int timeout_ms, cib_t *cib, int cib_options, gboolean promoted_role_only, gboolean force) { int rc = pcmk_rc_ok; int lpc = 0; int before = 0; int step_timeout_s = 0; int sleep_interval = 2; int timeout = timeout_ms / 1000; bool stop_via_ban = false; char *rsc_id = NULL; char *lookup_id = NULL; char *orig_target_role = NULL; GList *list_delta = NULL; GList *target_active = NULL; GList *current_active = NULL; GList *restart_target_active = NULL; pcmk_scheduler_t *scheduler = NULL; pcmk_resource_t *parent = uber_parent(rsc); bool running = false; const char *id = rsc->clone_name ? rsc->clone_name : rsc->id; const char *host = node ? node->details->uname : NULL; /* If the implicit resource or primitive resource of a bundle is given, operate on the * bundle itself instead. */ if (pe_rsc_is_bundled(rsc)) { rsc = parent->parent; } running = resource_is_running_on(rsc, host); if (pe_rsc_is_clone(parent) && !running) { if (pe_rsc_is_unique_clone(parent)) { lookup_id = strdup(rsc->id); } else { lookup_id = clone_strip(rsc->id); } rsc = parent->fns->find_rsc(parent, lookup_id, node, pcmk_rsc_match_basename |pcmk_rsc_match_current_node); free(lookup_id); running = resource_is_running_on(rsc, host); } if (!running) { if (host) { out->err(out, "%s is not running on %s and so cannot be restarted", id, host); } else { out->err(out, "%s is not running anywhere and so cannot be restarted", id); } return ENXIO; } if (!pcmk_is_set(rsc->flags, pcmk_rsc_managed)) { out->err(out, "Unmanaged resources cannot be restarted."); return EAGAIN; } rsc_id = strdup(rsc->id); if (pe_rsc_is_unique_clone(parent)) { lookup_id = strdup(rsc->id); } else { lookup_id = clone_strip(rsc->id); } if (host) { if (pe_rsc_is_clone(rsc) || pe_bundle_replicas(rsc)) { stop_via_ban = true; } else if (pe_rsc_is_clone(parent)) { stop_via_ban = true; free(lookup_id); lookup_id = strdup(parent->id); } } /* grab full cib determine originally active resources disable or ban poll cib and watch for affected resources to get stopped without --timeout, calculate the stop timeout for each step and wait for that if we hit --timeout or the service timeout, re-enable or un-ban, report failure and indicate which resources we couldn't take down if everything stopped, re-enable or un-ban poll cib and watch for affected resources to get started without --timeout, calculate the start timeout for each step and wait for that if we hit --timeout or the service timeout, report (different) failure and indicate which resources we couldn't bring back up report success Optimizations: - use constraints to determine ordered list of affected resources - Allow a --no-deps option (aka. --force-restart) */ scheduler = pe_new_working_set(); if (scheduler == NULL) { rc = errno; out->err(out, "Could not allocate scheduler data: %s", pcmk_rc_str(rc)); goto done; } scheduler->priv = out; rc = update_dataset(cib, scheduler, false); if(rc != pcmk_rc_ok) { out->err(out, "Could not get new resource list: %s (%d)", pcmk_rc_str(rc), rc); goto done; } restart_target_active = get_active_resources(host, scheduler->resources); current_active = get_active_resources(host, scheduler->resources); dump_list(current_active, "Origin"); if (stop_via_ban) { /* Stop the clone or bundle instance by banning it from the host */ out->quiet = true; rc = cli_resource_ban(out, lookup_id, host, move_lifetime, cib, cib_options, promoted_role_only, PCMK__ROLE_PROMOTED); } else { /* Stop the resource by setting PCMK_META_TARGET_ROLE to Stopped. * Remember any existing PCMK_META_TARGET_ROLE so we can restore it * later (though it only makes any difference if it's Unpromoted). */ find_resource_attr(out, cib, PCMK_XA_VALUE, lookup_id, NULL, NULL, NULL, PCMK_META_TARGET_ROLE, &orig_target_role); rc = cli_resource_update_attribute(rsc, rsc_id, NULL, PCMK_XE_META_ATTRIBUTES, NULL, PCMK_META_TARGET_ROLE, PCMK_ACTION_STOPPED, FALSE, cib, cib_options, force); } if(rc != pcmk_rc_ok) { out->err(out, "Could not set " PCMK_META_TARGET_ROLE " for %s: %s (%d)", rsc_id, pcmk_rc_str(rc), rc); if (current_active != NULL) { g_list_free_full(current_active, free); current_active = NULL; } if (restart_target_active != NULL) { g_list_free_full(restart_target_active, free); restart_target_active = NULL; } goto done; } rc = update_dataset(cib, scheduler, true); if(rc != pcmk_rc_ok) { out->err(out, "Could not determine which resources would be stopped"); goto failure; } target_active = get_active_resources(host, scheduler->resources); dump_list(target_active, "Target"); list_delta = pcmk__subtract_lists(current_active, target_active, (GCompareFunc) strcmp); out->info(out, "Waiting for %d resources to stop:", g_list_length(list_delta)); display_list(out, list_delta, " * "); step_timeout_s = timeout / sleep_interval; while (list_delta != NULL) { before = g_list_length(list_delta); if(timeout_ms == 0) { step_timeout_s = wait_time_estimate(scheduler, list_delta) / sleep_interval; } /* We probably don't need the entire step timeout */ for(lpc = 0; (lpc < step_timeout_s) && (list_delta != NULL); lpc++) { sleep(sleep_interval); if(timeout) { timeout -= sleep_interval; crm_trace("%ds remaining", timeout); } rc = update_dataset(cib, scheduler, FALSE); if(rc != pcmk_rc_ok) { out->err(out, "Could not determine which resources were stopped"); goto failure; } if (current_active != NULL) { g_list_free_full(current_active, free); } current_active = get_active_resources(host, scheduler->resources); g_list_free(list_delta); list_delta = pcmk__subtract_lists(current_active, target_active, (GCompareFunc) strcmp); dump_list(current_active, "Current"); dump_list(list_delta, "Delta"); } crm_trace("%d (was %d) resources remaining", g_list_length(list_delta), before); if(before == g_list_length(list_delta)) { /* aborted during stop phase, print the contents of list_delta */ out->err(out, "Could not complete shutdown of %s, %d resources remaining", rsc_id, g_list_length(list_delta)); display_list(out, list_delta, " * "); rc = ETIME; goto failure; } } if (stop_via_ban) { rc = cli_resource_clear(lookup_id, host, NULL, cib, cib_options, true, force); } else if (orig_target_role) { rc = cli_resource_update_attribute(rsc, rsc_id, NULL, PCMK_XE_META_ATTRIBUTES, NULL, PCMK_META_TARGET_ROLE, orig_target_role, FALSE, cib, cib_options, force); free(orig_target_role); orig_target_role = NULL; } else { rc = cli_resource_delete_attribute(rsc, rsc_id, NULL, PCMK_XE_META_ATTRIBUTES, NULL, PCMK_META_TARGET_ROLE, cib, cib_options, force); } if(rc != pcmk_rc_ok) { out->err(out, "Could not unset " PCMK_META_TARGET_ROLE " for %s: %s (%d)", rsc_id, pcmk_rc_str(rc), rc); goto done; } if (target_active != NULL) { g_list_free_full(target_active, free); } target_active = restart_target_active; list_delta = pcmk__subtract_lists(target_active, current_active, (GCompareFunc) strcmp); out->info(out, "Waiting for %d resources to start again:", g_list_length(list_delta)); display_list(out, list_delta, " * "); step_timeout_s = timeout / sleep_interval; while (waiting_for_starts(list_delta, rsc, host)) { before = g_list_length(list_delta); if(timeout_ms == 0) { step_timeout_s = wait_time_estimate(scheduler, list_delta) / sleep_interval; } /* We probably don't need the entire step timeout */ for (lpc = 0; (lpc < step_timeout_s) && waiting_for_starts(list_delta, rsc, host); lpc++) { sleep(sleep_interval); if(timeout) { timeout -= sleep_interval; crm_trace("%ds remaining", timeout); } rc = update_dataset(cib, scheduler, false); if(rc != pcmk_rc_ok) { out->err(out, "Could not determine which resources were started"); goto failure; } /* It's OK if dependent resources moved to a different node, * so we check active resources on all nodes. */ if (current_active != NULL) { g_list_free_full(current_active, free); } current_active = get_active_resources(NULL, scheduler->resources); g_list_free(list_delta); list_delta = pcmk__subtract_lists(target_active, current_active, (GCompareFunc) strcmp); dump_list(current_active, "Current"); dump_list(list_delta, "Delta"); } if(before == g_list_length(list_delta)) { /* aborted during start phase, print the contents of list_delta */ out->err(out, "Could not complete restart of %s, %d resources remaining", rsc_id, g_list_length(list_delta)); display_list(out, list_delta, " * "); rc = ETIME; goto failure; } } rc = pcmk_rc_ok; goto done; failure: if (stop_via_ban) { cli_resource_clear(lookup_id, host, NULL, cib, cib_options, true, force); } else if (orig_target_role) { cli_resource_update_attribute(rsc, rsc_id, NULL, PCMK_XE_META_ATTRIBUTES, NULL, PCMK_META_TARGET_ROLE, orig_target_role, FALSE, cib, cib_options, force); free(orig_target_role); } else { cli_resource_delete_attribute(rsc, rsc_id, NULL, PCMK_XE_META_ATTRIBUTES, NULL, PCMK_META_TARGET_ROLE, cib, cib_options, force); } done: if (list_delta != NULL) { g_list_free(list_delta); } if (current_active != NULL) { g_list_free_full(current_active, free); } if (target_active != NULL && (target_active != restart_target_active)) { g_list_free_full(target_active, free); } if (restart_target_active != NULL) { g_list_free_full(restart_target_active, free); } free(rsc_id); free(lookup_id); pe_free_working_set(scheduler); return rc; } static inline bool action_is_pending(const pcmk_action_t *action) { if (pcmk_any_flags_set(action->flags, pcmk_action_optional|pcmk_action_pseudo) || !pcmk_is_set(action->flags, pcmk_action_runnable) || pcmk__str_eq(PCMK_ACTION_NOTIFY, action->task, pcmk__str_casei)) { return false; } return true; } /*! * \internal * \brief Check whether any actions in a list are pending * * \param[in] actions List of actions to check * * \return true if any actions in the list are pending, otherwise false */ static bool actions_are_pending(const GList *actions) { for (const GList *action = actions; action != NULL; action = action->next) { const pcmk_action_t *a = (const pcmk_action_t *) action->data; if (action_is_pending(a)) { crm_notice("Waiting for %s (flags=%#.8x)", a->uuid, a->flags); return true; } } return false; } static void print_pending_actions(pcmk__output_t *out, GList *actions) { GList *action; out->info(out, "Pending actions:"); for (action = actions; action != NULL; action = action->next) { pcmk_action_t *a = (pcmk_action_t *) action->data; if (!action_is_pending(a)) { continue; } if (a->node) { out->info(out, "\tAction %d: %s\ton %s", a->id, a->uuid, pcmk__node_name(a->node)); } else { out->info(out, "\tAction %d: %s", a->id, a->uuid); } } } /* For --wait, timeout (in seconds) to use if caller doesn't specify one */ #define WAIT_DEFAULT_TIMEOUT_S (60 * 60) /* For --wait, how long to sleep between cluster state checks */ #define WAIT_SLEEP_S (2) /*! * \internal * \brief Wait until all pending cluster actions are complete * * This waits until either the CIB's transition graph is idle or a timeout is * reached. * * \param[in,out] out Output object * \param[in] timeout_ms Consider failed if actions do not complete in * this time (specified in milliseconds, but * one-second granularity is actually used; if 0, a * default will be used) * \param[in,out] cib Connection to the CIB manager * * \return Standard Pacemaker return code */ int wait_till_stable(pcmk__output_t *out, int timeout_ms, cib_t * cib) { pcmk_scheduler_t *scheduler = NULL; xmlXPathObjectPtr search; int rc = pcmk_rc_ok; bool pending_unknown_state_resources; int timeout_s = timeout_ms? ((timeout_ms + 999) / 1000) : WAIT_DEFAULT_TIMEOUT_S; time_t expire_time = time(NULL) + timeout_s; time_t time_diff; bool printed_version_warning = out->is_quiet(out); // i.e. don't print if quiet scheduler = pe_new_working_set(); if (scheduler == NULL) { return ENOMEM; } do { /* Abort if timeout is reached */ time_diff = expire_time - time(NULL); if (time_diff > 0) { crm_info("Waiting up to %lld seconds for cluster actions to complete", (long long) time_diff); } else { print_pending_actions(out, scheduler->actions); pe_free_working_set(scheduler); return ETIME; } if (rc == pcmk_rc_ok) { /* this avoids sleep on first loop iteration */ sleep(WAIT_SLEEP_S); } /* Get latest transition graph */ pe_reset_working_set(scheduler); rc = update_scheduler_input_to_cib(out, scheduler, cib); if (rc != pcmk_rc_ok) { pe_free_working_set(scheduler); return rc; } pcmk__schedule_actions(scheduler->input, pcmk_sched_no_counts|pcmk_sched_no_compat, scheduler); if (!printed_version_warning) { /* If the DC has a different version than the local node, the two * could come to different conclusions about what actions need to be * done. Warn the user in this case. * * @TODO A possible long-term solution would be to reimplement the * wait as a new controller operation that would be forwarded to the * DC. However, that would have potential problems of its own. */ const char *dc_version = g_hash_table_lookup(scheduler->config_hash, PCMK_OPT_DC_VERSION); if (!pcmk__str_eq(dc_version, PACEMAKER_VERSION "-" BUILD_VERSION, pcmk__str_casei)) { out->info(out, "warning: wait option may not work properly in " "mixed-version cluster"); printed_version_warning = true; } } search = xpath_search(scheduler->input, "/" PCMK_XE_CIB "/" PCMK_XE_STATUS - "/" PCMK__XE_NODE_STATE "/" XML_CIB_TAG_LRM + "/" PCMK__XE_NODE_STATE "/" PCMK__XE_LRM "/" XML_LRM_TAG_RESOURCES "/" XML_LRM_TAG_RESOURCE "/" XML_LRM_TAG_RSC_OP "[@" PCMK__XA_RC_CODE "='193']"); pending_unknown_state_resources = (numXpathResults(search) > 0); freeXpathObject(search); } while (actions_are_pending(scheduler->actions) || pending_unknown_state_resources); pe_free_working_set(scheduler); return rc; } static const char * get_action(const char *rsc_action) { const char *action = NULL; if (pcmk__str_eq(rsc_action, "validate", pcmk__str_casei)) { action = PCMK_ACTION_VALIDATE_ALL; } else if (pcmk__str_eq(rsc_action, "force-check", pcmk__str_casei)) { action = PCMK_ACTION_MONITOR; } else if (pcmk__strcase_any_of(rsc_action, "force-start", "force-stop", "force-demote", "force-promote", NULL)) { action = rsc_action+6; } else { action = rsc_action; } return action; } /*! * \brief Set up environment variables as expected by resource agents * * When the cluster executes resource agents, it adds certain environment * variables (directly or via resource meta-attributes) expected by some * resource agents. Add the essential ones that many resource agents expect, so * the behavior is the same for command-line execution. * * \param[in,out] params Resource parameters that will be passed to agent * \param[in] timeout_ms Action timeout (in milliseconds) * \param[in] check_level OCF check level * \param[in] verbosity Verbosity level */ static void set_agent_environment(GHashTable *params, int timeout_ms, int check_level, int verbosity) { g_hash_table_insert(params, crm_meta_name(PCMK_META_TIMEOUT), crm_strdup_printf("%d", timeout_ms)); g_hash_table_insert(params, strdup(PCMK_XA_CRM_FEATURE_SET), strdup(CRM_FEATURE_SET)); if (check_level >= 0) { char *level = crm_strdup_printf("%d", check_level); setenv("OCF_CHECK_LEVEL", level, 1); free(level); } pcmk__set_env_option(PCMK__ENV_DEBUG, ((verbosity > 0)? "1" : "0"), true); if (verbosity > 1) { setenv("OCF_TRACE_RA", "1", 1); } /* A resource agent using the standard ocf-shellfuncs library will not print * messages to stderr if it doesn't have a controlling terminal (e.g. if * crm_resource is called via script or ssh). This forces it to do so. */ setenv("OCF_TRACE_FILE", "/dev/stderr", 0); } /*! * \internal * \brief Apply command-line overrides to resource parameters * * \param[in,out] params Parameters to be passed to agent * \param[in] overrides Parameters to override (or NULL if none) */ static void apply_overrides(GHashTable *params, GHashTable *overrides) { if (overrides != NULL) { GHashTableIter iter; char *name = NULL; char *value = NULL; g_hash_table_iter_init(&iter, overrides); while (g_hash_table_iter_next(&iter, (gpointer *) &name, (gpointer *) &value)) { g_hash_table_replace(params, strdup(name), strdup(value)); } } } crm_exit_t cli_resource_execute_from_params(pcmk__output_t *out, const char *rsc_name, const char *rsc_class, const char *rsc_prov, const char *rsc_type, const char *rsc_action, GHashTable *params, GHashTable *override_hash, int timeout_ms, int resource_verbose, gboolean force, int check_level) { const char *class = rsc_class; const char *action = get_action(rsc_action); crm_exit_t exit_code = CRM_EX_OK; svc_action_t *op = NULL; // If no timeout was provided, use the same default as the cluster if (timeout_ms == 0) { timeout_ms = PCMK_DEFAULT_ACTION_TIMEOUT_MS; } set_agent_environment(params, timeout_ms, check_level, resource_verbose); apply_overrides(params, override_hash); op = services__create_resource_action(rsc_name? rsc_name : "test", rsc_class, rsc_prov, rsc_type, action, 0, timeout_ms, params, 0); if (op == NULL) { out->err(out, "Could not execute %s using %s%s%s:%s: %s", action, rsc_class, (rsc_prov? ":" : ""), (rsc_prov? rsc_prov : ""), rsc_type, strerror(ENOMEM)); g_hash_table_destroy(params); return CRM_EX_OSERR; } if (pcmk__str_eq(rsc_class, PCMK_RESOURCE_CLASS_SERVICE, pcmk__str_casei)) { class = resources_find_service_class(rsc_type); } if (!pcmk__strcase_any_of(class, PCMK_RESOURCE_CLASS_OCF, PCMK_RESOURCE_CLASS_LSB, NULL)) { services__format_result(op, CRM_EX_UNIMPLEMENT_FEATURE, PCMK_EXEC_ERROR, "Manual execution of the %s standard is " "unsupported", pcmk__s(class, "unspecified")); } if (op->rc != PCMK_OCF_UNKNOWN) { exit_code = op->rc; goto done; } services_action_sync(op); // Map results to OCF codes for consistent reporting to user { enum ocf_exitcode ocf_code = services_result2ocf(class, action, op->rc); // Cast variable instead of function return to keep compilers happy exit_code = (crm_exit_t) ocf_code; } done: out->message(out, "resource-agent-action", resource_verbose, rsc_class, rsc_prov, rsc_type, rsc_name, rsc_action, override_hash, exit_code, op->status, services__exit_reason(op), op->stdout_data, op->stderr_data); services_action_free(op); return exit_code; } crm_exit_t cli_resource_execute(pcmk_resource_t *rsc, const char *requested_name, const char *rsc_action, GHashTable *override_hash, int timeout_ms, cib_t *cib, pcmk_scheduler_t *scheduler, int resource_verbose, gboolean force, int check_level) { pcmk__output_t *out = scheduler->priv; crm_exit_t exit_code = CRM_EX_OK; const char *rid = NULL; const char *rtype = NULL; const char *rprov = NULL; const char *rclass = NULL; GHashTable *params = NULL; if (pcmk__strcase_any_of(rsc_action, "force-start", "force-demote", "force-promote", NULL)) { if(pe_rsc_is_clone(rsc)) { GList *nodes = cli_resource_search(rsc, requested_name, scheduler); if(nodes != NULL && force == FALSE) { out->err(out, "It is not safe to %s %s here: the cluster claims it is already active", rsc_action, rsc->id); out->err(out, "Try setting " PCMK_META_TARGET_ROLE "=" PCMK__ROLE_STOPPED " first or specifying the force option"); return CRM_EX_UNSAFE; } g_list_free_full(nodes, free); } } if(pe_rsc_is_clone(rsc)) { /* Grab the first child resource in the hope it's not a group */ rsc = rsc->children->data; } if (rsc->variant == pcmk_rsc_variant_group) { out->err(out, "Sorry, the %s option doesn't support group resources", rsc_action); return CRM_EX_UNIMPLEMENT_FEATURE; } else if (pe_rsc_is_bundled(rsc)) { out->err(out, "Sorry, the %s option doesn't support bundled resources", rsc_action); return CRM_EX_UNIMPLEMENT_FEATURE; } rclass = crm_element_value(rsc->xml, PCMK_XA_CLASS); rprov = crm_element_value(rsc->xml, PCMK_XA_PROVIDER); rtype = crm_element_value(rsc->xml, PCMK_XA_TYPE); params = generate_resource_params(rsc, NULL /* @TODO use local node */, scheduler); if (timeout_ms == 0) { timeout_ms = pe_get_configured_timeout(rsc, get_action(rsc_action), scheduler); } rid = pe_rsc_is_anon_clone(rsc->parent)? requested_name : rsc->id; exit_code = cli_resource_execute_from_params(out, rid, rclass, rprov, rtype, rsc_action, params, override_hash, timeout_ms, resource_verbose, force, check_level); return exit_code; } // \return Standard Pacemaker return code int cli_resource_move(const pcmk_resource_t *rsc, const char *rsc_id, const char *host_name, const char *move_lifetime, cib_t *cib, int cib_options, pcmk_scheduler_t *scheduler, gboolean promoted_role_only, gboolean force) { pcmk__output_t *out = scheduler->priv; int rc = pcmk_rc_ok; unsigned int count = 0; pcmk_node_t *current = NULL; pcmk_node_t *dest = pe_find_node(scheduler->nodes, host_name); bool cur_is_dest = false; if (dest == NULL) { return pcmk_rc_node_unknown; } if (promoted_role_only && !pcmk_is_set(rsc->flags, pcmk_rsc_promotable)) { const pcmk_resource_t *p = pe__const_top_resource(rsc, false); if (pcmk_is_set(p->flags, pcmk_rsc_promotable)) { out->info(out, "Using parent '%s' for move instead of '%s'.", rsc->id, rsc_id); rsc_id = p->id; rsc = p; } else { out->info(out, "Ignoring --promoted option: %s is not promotable", rsc_id); promoted_role_only = FALSE; } } current = pe__find_active_requires(rsc, &count); if (pcmk_is_set(rsc->flags, pcmk_rsc_promotable)) { unsigned int promoted_count = 0; pcmk_node_t *promoted_node = NULL; for (const GList *iter = rsc->children; iter; iter = iter->next) { const pcmk_resource_t *child = (const pcmk_resource_t *) iter->data; enum rsc_role_e child_role = child->fns->state(child, TRUE); if (child_role == pcmk_role_promoted) { rsc = child; promoted_node = pcmk__current_node(child); promoted_count++; } } if (promoted_role_only || (promoted_count != 0)) { count = promoted_count; current = promoted_node; } } if (count > 1) { if (pe_rsc_is_clone(rsc)) { current = NULL; } else { return pcmk_rc_multiple; } } if (pcmk__same_node(current, dest)) { cur_is_dest = true; if (force) { crm_info("%s is already %s on %s, reinforcing placement with location constraint.", rsc_id, promoted_role_only?"promoted":"active", pcmk__node_name(dest)); } else { return pcmk_rc_already; } } /* Clear any previous prefer constraints across all nodes. */ cli_resource_clear(rsc_id, NULL, scheduler->nodes, cib, cib_options, false, force); /* Clear any previous ban constraints on 'dest'. */ cli_resource_clear(rsc_id, dest->details->uname, scheduler->nodes, cib, cib_options, TRUE, force); /* Record an explicit preference for 'dest' */ rc = cli_resource_prefer(out, rsc_id, dest->details->uname, move_lifetime, cib, cib_options, promoted_role_only, PCMK__ROLE_PROMOTED); crm_trace("%s%s now prefers %s%s", rsc->id, (promoted_role_only? " (promoted)" : ""), pcmk__node_name(dest), force?"(forced)":""); /* only ban the previous location if current location != destination location. * it is possible to use -M to enforce a location without regard of where the * resource is currently located */ if (force && !cur_is_dest) { /* Ban the original location if possible */ if(current) { (void)cli_resource_ban(out, rsc_id, current->details->uname, move_lifetime, cib, cib_options, promoted_role_only, PCMK__ROLE_PROMOTED); } else if(count > 1) { out->info(out, "Resource '%s' is currently %s in %d locations. " "One may now move to %s", rsc_id, (promoted_role_only? "promoted" : "active"), count, pcmk__node_name(dest)); out->info(out, "To prevent '%s' from being %s at a specific location, " "specify a node.", rsc_id, (promoted_role_only? "promoted" : "active")); } else { crm_trace("Not banning %s from its current location: not active", rsc_id); } } return rc; }